aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp8
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp2
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp41
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp73
-rw-r--r--lib/Target/ARM/ARMISelLowering.h8
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td17
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td56
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td116
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td5
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td32
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp43
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h22
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp143
-rw-r--r--lib/Target/ARM/CMakeLists.txt4
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp70
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp6
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp5
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp7
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp7
-rw-r--r--lib/Target/CBackend/CBackend.cpp1
-rw-r--r--lib/Target/CBackend/CTargetMachine.h4
-rw-r--r--lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp5
-rw-r--r--lib/Target/CellSPU/SPUISelDAGToDAG.cpp1
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.cpp10
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.h8
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp1
-rw-r--r--lib/Target/CppBackend/CPPTargetMachine.h4
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.cpp11
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.h7
-rw-r--r--lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp5
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp5
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp1
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp7
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.cpp13
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.h7
-rw-r--r--lib/Target/Mips/CMakeLists.txt1
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp8
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp37
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp5
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td20
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp26
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.h6
-rw-r--r--lib/Target/Mips/MipsFrameLowering.cpp41
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp72
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td65
-rw-r--r--lib/Target/Mips/MipsMCInstLower.cpp178
-rw-r--r--lib/Target/Mips/MipsMCInstLower.h4
-rw-r--r--lib/Target/Mips/MipsMCSymbolRefExpr.cpp70
-rw-r--r--lib/Target/Mips/MipsMCSymbolRefExpr.h67
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp6
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp34
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h25
-rw-r--r--lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp5
-rw-r--r--lib/Target/PTX/PTXTargetMachine.cpp67
-rw-r--r--lib/Target/PTX/PTXTargetMachine.h18
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp5
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp1
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp3
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp14
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp21
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h3
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp20
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h15
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp5
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp19
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.h13
-rw-r--r--lib/Target/TargetLibraryInfo.cpp25
-rw-r--r--lib/Target/TargetMachine.cpp8
-rw-r--r--lib/Target/X86/InstPrinter/X86InstComments.cpp92
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp5
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.cpp43
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.h15
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp69
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp762
-rw-r--r--lib/Target/X86/X86ISelLowering.h20
-rw-r--r--lib/Target/X86/X86InstrBuilder.h1
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td20
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp35
-rw-r--r--lib/Target/X86/X86InstrInfo.td3
-rw-r--r--lib/Target/X86/X86InstrSSE.td885
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp1
-rw-r--r--lib/Target/X86/X86Subtarget.h4
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp45
-rw-r--r--lib/Target/X86/X86TargetMachine.h17
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp5
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp8
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.h5
87 files changed, 2184 insertions, 1513 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 7a7267a..9315348 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -28,7 +28,6 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/BranchProbability.h"
@@ -47,7 +46,7 @@ EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
cl::desc("Enable ARM 2-addr to 3-addr conv"));
static cl::opt<bool>
-WidenVMOVS("widen-vmovs", cl::Hidden,
+WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true),
cl::desc("Widen ARM vmovs to vmovd when possible"));
/// ARM_MLxEntry - Record information about MLA / MLS instructions.
@@ -710,8 +709,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned Align = MFI.getObjectAlignment(FI);
MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo(
- PseudoSourceValue::getFixedStack(FI)),
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
MachineMemOperand::MOStore,
MFI.getObjectSize(FI),
Align);
@@ -862,7 +860,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned Align = MFI.getObjectAlignment(FI);
MachineMemOperand *MMO =
MF.getMachineMemOperand(
- MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachinePointerInfo::getFixedStack(FI),
MachineMemOperand::MOLoad,
MFI.getObjectSize(FI),
Align);
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index fb7d96a..fc464ea 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -824,7 +824,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MI.eraseFromParent();
return true;
}
- case ARM::Int_eh_sjlj_dispatchsetup: {
+ case ARM::eh_sjlj_dispatchsetup: {
MachineFunction &MF = *MI.getParent()->getParent();
const ARMBaseInstrInfo *AII =
static_cast<const ARMBaseInstrInfo*>(TII);
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 4df084f..9bae422 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -37,7 +37,6 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
@@ -197,8 +196,6 @@ class ARMFastISel : public FastISel {
// Call handling routines.
private:
- bool FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
- unsigned &ResultReg);
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return);
bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
SmallVectorImpl<unsigned> &ArgRegs,
@@ -687,6 +684,8 @@ unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
return 0;
}
+// TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF);
+
unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
// Don't handle dynamic allocas.
if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
@@ -1115,7 +1114,7 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) {
// Create the base instruction, then add the operands.
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(StrOpc))
- .addReg(SrcReg, getKillRegState(true));
+ .addReg(SrcReg);
AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore, useAM3);
return true;
}
@@ -1304,6 +1303,8 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
int Imm = 0;
bool UseImm = false;
bool isNegativeImm = false;
+ // FIXME: At -O0 we don't have anything that canonicalizes operand order.
+ // Thus, Src1Value may be a ConstantInt, but we're missing it.
if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 ||
SrcVT == MVT::i1) {
@@ -1669,12 +1670,6 @@ bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) {
if (isFloat && !Subtarget->hasVFP2())
return false;
- unsigned Op1 = getRegForValue(I->getOperand(0));
- if (Op1 == 0) return false;
-
- unsigned Op2 = getRegForValue(I->getOperand(1));
- if (Op2 == 0) return false;
-
unsigned Opc;
bool is64bit = VT == MVT::f64 || VT == MVT::i64;
switch (ISDOpcode) {
@@ -1689,6 +1684,12 @@ bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) {
Opc = is64bit ? ARM::VMULD : ARM::VMULS;
break;
}
+ unsigned Op1 = getRegForValue(I->getOperand(0));
+ if (Op1 == 0) return false;
+
+ unsigned Op2 = getRegForValue(I->getOperand(1));
+ if (Op2 == 0) return false;
+
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ResultReg)
@@ -1699,18 +1700,6 @@ bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) {
// Call Handling Code
-bool ARMFastISel::FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src,
- EVT SrcVT, unsigned &ResultReg) {
- unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
- Src, /*TODO: Kill=*/false);
-
- if (RR != 0) {
- ResultReg = RR;
- return true;
- } else
- return false;
-}
-
// This is largely taken directly from CCAssignFnForNode - we don't support
// varargs in FastISel so that part has been removed.
// TODO: We may not support all of this.
@@ -2119,9 +2108,6 @@ bool ARMFastISel::SelectCall(const Instruction *I,
if (IntrMemName && e-i <= 2)
break;
- unsigned Arg = getRegForValue(*i);
- if (Arg == 0)
- return false;
ISD::ArgFlagsTy Flags;
unsigned AttrInd = i - CS.arg_begin() + 1;
if (CS.paramHasAttr(AttrInd, Attribute::SExt))
@@ -2141,6 +2127,11 @@ bool ARMFastISel::SelectCall(const Instruction *I,
if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8 &&
ArgVT != MVT::i1)
return false;
+
+ unsigned Arg = getRegForValue(*i);
+ if (Arg == 0)
+ return false;
+
unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
Flags.setOrigAlign(OriginalAlignment);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index b55ef70..8c4c06f 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -40,7 +40,6 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Target/TargetOptions.h"
@@ -687,7 +686,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
if (Subtarget->isTargetDarwin()) {
setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
- setOperationAction(ISD::EH_SJLJ_DISPATCHSETUP, MVT::Other, Custom);
setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
}
@@ -864,7 +862,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
- case ARMISD::EH_SJLJ_DISPATCHSETUP:return "ARMISD::EH_SJLJ_DISPATCHSETUP";
case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
@@ -912,6 +909,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
+ case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
case ARMISD::VDUP: return "ARMISD::VDUP";
case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
case ARMISD::VEXT: return "ARMISD::VEXT";
@@ -2212,14 +2210,6 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
}
SDValue
-ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG)
- const {
- DebugLoc dl = Op.getDebugLoc();
- return DAG.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
- Op.getOperand(0), Op.getOperand(1));
-}
-
-SDValue
ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
SDValue Val = DAG.getConstant(0, MVT::i32);
@@ -3986,6 +3976,16 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
}
+
+ // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
+ if (VT == MVT::v2f32 || VT == MVT::v4f32) {
+ ConstantFPSDNode *C = cast<ConstantFPSDNode>(Op.getOperand(0));
+ int ImmVal = ARM_AM::getFP32Imm(C->getValueAPF());
+ if (ImmVal != -1) {
+ SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32);
+ return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
+ }
+ }
}
}
@@ -5014,7 +5014,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
- case ISD::EH_SJLJ_DISPATCHSETUP: return LowerEH_SJLJ_DISPATCHSETUP(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
Subtarget);
case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG);
@@ -5556,52 +5555,6 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
return BB;
}
-/// EmitBasePointerRecalculation - For functions using a base pointer, we
-/// rematerialize it (via the frame pointer).
-void ARMTargetLowering::
-EmitBasePointerRecalculation(MachineInstr *MI, MachineBasicBlock *MBB,
- MachineBasicBlock *DispatchBB) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
- MachineFunction &MF = *MI->getParent()->getParent();
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
-
- if (!RI.hasBasePointer(MF)) return;
-
- MachineBasicBlock::iterator MBBI = MI;
-
- int32_t NumBytes = AFI->getFramePtrSpillOffset();
- unsigned FramePtr = RI.getFrameRegister(MF);
- assert(MF.getTarget().getFrameLowering()->hasFP(MF) &&
- "Base pointer without frame pointer?");
-
- if (AFI->isThumb2Function())
- llvm::emitT2RegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6,
- FramePtr, -NumBytes, ARMCC::AL, 0, *AII);
- else if (AFI->isThumbFunction())
- llvm::emitThumbRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6,
- FramePtr, -NumBytes, *AII, RI);
- else
- llvm::emitARMRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6,
- FramePtr, -NumBytes, ARMCC::AL, 0, *AII);
-
- if (!RI.needsStackRealignment(MF)) return;
-
- // If there's dynamic realignment, adjust for it.
- MachineFrameInfo *MFI = MF.getFrameInfo();
- unsigned MaxAlign = MFI->getMaxAlignment();
- assert(!AFI->isThumb1OnlyFunction());
-
- // Emit bic r6, r6, MaxAlign
- unsigned bicOpc = AFI->isThumbFunction() ? ARM::t2BICri : ARM::BICri;
- AddDefaultCC(
- AddDefaultPred(
- BuildMI(*MBB, MBBI, MI->getDebugLoc(), TII->get(bicOpc), ARM::R6)
- .addReg(ARM::R6, RegState::Kill)
- .addImm(MaxAlign - 1)));
-}
-
/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
/// registers the function context.
void ARMTargetLowering::
@@ -5636,8 +5589,6 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
MachineMemOperand::MOStore, 4, 4);
- EmitBasePointerRecalculation(MI, MBB, DispatchBB);
-
// Load the address of the dispatch MBB into the jump buffer.
if (isThumb2) {
// Incoming value: jbuf
@@ -5811,6 +5762,8 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
MachineMemOperand::MOLoad |
MachineMemOperand::MOVolatile, 4, 4);
+ BuildMI(DispatchBB, dl, TII->get(ARM::eh_sjlj_dispatchsetup));
+
unsigned NumLPads = LPadList.size();
if (Subtarget->isThumb2()) {
unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index be6a530..b8dc4bf 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -81,7 +81,6 @@ namespace llvm {
EH_SJLJ_SETJMP, // SjLj exception handling setjmp.
EH_SJLJ_LONGJMP, // SjLj exception handling longjmp.
- EH_SJLJ_DISPATCHSETUP, // SjLj exception handling dispatch setup.
TC_RETURN, // Tail call return pseudo.
@@ -146,6 +145,9 @@ namespace llvm {
VMOVIMM,
VMVNIMM,
+ // Vector move f32 immediate:
+ VMOVFPIMM,
+
// Vector duplicate:
VDUP,
VDUPLANE,
@@ -407,7 +409,6 @@ namespace llvm {
ISD::ArgFlagsTy Flags) const;
SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -517,9 +518,6 @@ namespace llvm {
bool signExtend,
ARMCC::CondCodes Cond) const;
- void EmitBasePointerRecalculation(MachineInstr *MI, MachineBasicBlock *MBB,
- MachineBasicBlock *DispatchBB) const;
-
void SetupEntryBlockForSjLj(MachineInstr *MI,
MachineBasicBlock *MBB,
MachineBasicBlock *DispatchBB, int FI) const;
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 06ee2c8..6940156 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -2041,9 +2041,26 @@ multiclass VFPDT64InstAlias<string opc, string asm, dag Result> {
def _64 : VFPDataTypeInstAlias<opc, ".64", asm, Result>;
defm : VFPDT64ReqInstAlias<opc, asm, Result>;
}
+multiclass VFPDT64NoF64ReqInstAlias<string opc, string asm, dag Result> {
+ def I64 : VFPDataTypeInstAlias<opc, ".i64", asm, Result>;
+ def S64 : VFPDataTypeInstAlias<opc, ".s64", asm, Result>;
+ def U64 : VFPDataTypeInstAlias<opc, ".u64", asm, Result>;
+ def D : VFPDataTypeInstAlias<opc, ".d", asm, Result>;
+}
+// VFPDT64ReqInstAlias plus plain ".64"
+multiclass VFPDT64NoF64InstAlias<string opc, string asm, dag Result> {
+ def _64 : VFPDataTypeInstAlias<opc, ".64", asm, Result>;
+ defm : VFPDT64ReqInstAlias<opc, asm, Result>;
+}
multiclass VFPDTAnyInstAlias<string opc, string asm, dag Result> {
defm : VFPDT8InstAlias<opc, asm, Result>;
defm : VFPDT16InstAlias<opc, asm, Result>;
defm : VFPDT32InstAlias<opc, asm, Result>;
defm : VFPDT64InstAlias<opc, asm, Result>;
}
+multiclass VFPDTAnyNoF64InstAlias<string opc, string asm, dag Result> {
+ defm : VFPDT8InstAlias<opc, asm, Result>;
+ defm : VFPDT16InstAlias<opc, asm, Result>;
+ defm : VFPDT32InstAlias<opc, asm, Result>;
+ defm : VFPDT64NoF64InstAlias<opc, asm, Result>;
+}
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 770703c..be03924 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -58,8 +58,6 @@ def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>,
SDTCisInt<2>]>;
def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
-def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 1, [SDTCisInt<0>]>;
-
def SDT_ARMMEMBARRIER : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_ARMPREFETCH : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisSameAs<1, 2>,
@@ -143,9 +141,6 @@ def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP",
SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>;
def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>;
-def ARMeh_sjlj_dispatchsetup: SDNode<"ARMISD::EH_SJLJ_DISPATCHSETUP",
- SDT_ARMEH_SJLJ_DispatchSetup, [SDNPHasChain]>;
-
def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER,
[SDNPHasChain]>;
@@ -475,6 +470,7 @@ def shift_so_reg_reg : Operand<i32>, // reg reg imm
let EncoderMethod = "getSORegRegOpValue";
let PrintMethod = "printSORegRegOperand";
let DecoderMethod = "DecodeSORegRegOperand";
+ let ParserMatchClass = ShiftedRegAsmOperand;
let MIOperandInfo = (ops GPR, GPR, i32imm);
}
@@ -485,6 +481,7 @@ def shift_so_reg_imm : Operand<i32>, // reg reg imm
let EncoderMethod = "getSORegImmOpValue";
let PrintMethod = "printSORegImmOperand";
let DecoderMethod = "DecodeSORegImmOperand";
+ let ParserMatchClass = ShiftedImmAsmOperand;
let MIOperandInfo = (ops GPR, i32imm);
}
@@ -1555,7 +1552,7 @@ PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
}
// Atomic pseudo-insts which will be lowered to ldrexd/strexd loops.
-// (These psuedos use a hand-written selection code).
+// (These pseudos use a hand-written selection code).
let usesCustomInserter = 1, Defs = [CPSR], mayLoad = 1, mayStore = 1 in {
def ATOMOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
(ins GPR:$addr, GPR:$src1, GPR:$src2),
@@ -4673,11 +4670,8 @@ def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch),
// This pseudo is used for ARM, Thumb1 and Thumb2. Any differences are
// handled when the pseudo is expanded (which happens before any passes
// that need the instruction size).
-let isBarrier = 1, hasSideEffects = 1 in
-def Int_eh_sjlj_dispatchsetup :
- PseudoInst<(outs), (ins GPR:$src), NoItinerary,
- [(ARMeh_sjlj_dispatchsetup GPR:$src)]>,
- Requires<[IsDarwin]>;
+let isBarrier = 1 in
+def eh_sjlj_dispatchsetup : PseudoInst<(outs), (ins), NoItinerary, []>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
@@ -5023,3 +5017,43 @@ def LSLi : ARMAsmPseudo<"lsl${s}${p} $Rd, $Rm, $imm",
def RORi : ARMAsmPseudo<"ror${s}${p} $Rd, $Rm, $imm",
(ins GPR:$Rd, GPR:$Rm, imm0_31:$imm, pred:$p,
cc_out:$s)>;
+def RRXi : ARMAsmPseudo<"rrx${s}${p} $Rd, $Rm",
+ (ins GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, cc_out:$s)>;
+def ASRr : ARMAsmPseudo<"asr${s}${p} $Rd, $Rn, $Rm",
+ (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+def LSRr : ARMAsmPseudo<"lsr${s}${p} $Rd, $Rn, $Rm",
+ (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+def LSLr : ARMAsmPseudo<"lsl${s}${p} $Rd, $Rn, $Rm",
+ (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+def RORr : ARMAsmPseudo<"ror${s}${p} $Rd, $Rn, $Rm",
+ (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+// shifter instructions also support a two-operand form.
+def : ARMInstAlias<"asr${s}${p} $Rm, $imm",
+ (ASRi GPR:$Rm, GPR:$Rm, imm0_32:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"lsr${s}${p} $Rm, $imm",
+ (LSRi GPR:$Rm, GPR:$Rm, imm0_32:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"lsl${s}${p} $Rm, $imm",
+ (LSLi GPR:$Rm, GPR:$Rm, imm0_31:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"ror${s}${p} $Rm, $imm",
+ (RORi GPR:$Rm, GPR:$Rm, imm0_31:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"asr${s}${p} $Rn, $Rm",
+ (ASRr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+def : ARMInstAlias<"lsr${s}${p} $Rn, $Rm",
+ (LSRr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+def : ARMInstAlias<"lsl${s}${p} $Rn, $Rm",
+ (LSLr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+def : ARMInstAlias<"ror${s}${p} $Rn, $Rm",
+ (RORr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+ cc_out:$s)>;
+
+
+// 'mul' instruction can be specified with only two operands.
+def : ARMInstAlias<"mul${s}${p} $Rn, $Rm",
+ (MUL rGPR:$Rn, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 49cc254..f2ca963 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -39,6 +39,10 @@ def nImmVMOVI32 : Operand<i32> {
let PrintMethod = "printNEONModImmOperand";
let ParserMatchClass = nImmVMOVI32AsmOperand;
}
+def nImmVMOVF32 : Operand<i32> {
+ let PrintMethod = "printFPImmOperand";
+ let ParserMatchClass = FPImmOperand;
+}
def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; }
def nImmSplatI64 : Operand<i32> {
let PrintMethod = "printNEONModImmOperand";
@@ -173,6 +177,7 @@ def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
+def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>;
def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
SDTCisVT<2, i32>]>;
@@ -4464,6 +4469,10 @@ def : InstAlias<"vmov${p} $Vd, $Vm",
(VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
def : InstAlias<"vmov${p} $Vd, $Vm",
(VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
+defm : VFPDTAnyNoF64InstAlias<"vmov${p}", "$Vd, $Vm",
+ (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
+defm : VFPDTAnyNoF64InstAlias<"vmov${p}", "$Vd, $Vm",
+ (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
// VMOV : Vector Move (Immediate)
@@ -4513,6 +4522,15 @@ def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
(ins nImmSplatI64:$SIMM), IIC_VMOVImm,
"vmov", "i64", "$Vd, $SIMM", "",
[(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>;
+
+def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd),
+ (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
+ "vmov", "f32", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>;
+def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd),
+ (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
+ "vmov", "f32", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>;
} // isReMaterializable
// VMOV : Vector Get Lane (move scalar to ARM core register)
@@ -4801,6 +4819,7 @@ def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
v4f32, v4i32, uint_to_fp>;
// VCVT : Vector Convert Between Floating-Point and Fixed-Point.
+let DecoderMethod = "DecodeVCVTD" in {
def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
@@ -4809,7 +4828,9 @@ def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
+}
+let DecoderMethod = "DecodeVCVTQ" in {
def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
@@ -4818,6 +4839,7 @@ def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
+}
// VCVT : Vector Convert Between Half-Precision and Single-Precision.
def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
@@ -5218,6 +5240,19 @@ defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
(VLD1d32wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
(VLD1d64wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
+// with writeback, register stride
+defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
+ (VLD1d8wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn,
+ rGPR:$Rm, pred:$p)>;
+defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
+ (VLD1d16wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn,
+ rGPR:$Rm, pred:$p)>;
+defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
+ (VLD1d32wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn,
+ rGPR:$Rm, pred:$p)>;
+defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
+ (VLD1d64wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn,
+ rGPR:$Rm, pred:$p)>;
// Load two D registers.
defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn",
@@ -5237,6 +5272,19 @@ defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
(VLD1q32wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
(VLD1q64wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
+// with writeback, register stride
+defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
+ (VLD1q8wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn,
+ rGPR:$Rm, pred:$p)>;
+defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
+ (VLD1q16wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn,
+ rGPR:$Rm, pred:$p)>;
+defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
+ (VLD1q32wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn,
+ rGPR:$Rm, pred:$p)>;
+defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
+ (VLD1q64wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn,
+ rGPR:$Rm, pred:$p)>;
// Load three D registers.
defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn",
@@ -5260,6 +5308,19 @@ defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
(VLD1d64Twb_fixed VecListThreeD:$Vd, zero_reg,
addrmode6:$Rn, pred:$p)>;
+// with writeback, register stride
+defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
+ (VLD1d8Twb_register VecListThreeD:$Vd, zero_reg,
+ addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
+defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
+ (VLD1d16Twb_register VecListThreeD:$Vd, zero_reg,
+ addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
+defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
+ (VLD1d32Twb_register VecListThreeD:$Vd, zero_reg,
+ addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
+defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
+ (VLD1d64Twb_register VecListThreeD:$Vd, zero_reg,
+ addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
// Load four D registers.
@@ -5284,6 +5345,19 @@ defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
(VLD1d64Qwb_fixed VecListFourD:$Vd, zero_reg,
addrmode6:$Rn, pred:$p)>;
+// with writeback, register stride
+defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
+ (VLD1d8Qwb_register VecListFourD:$Vd, zero_reg,
+ addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
+defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
+ (VLD1d16Qwb_register VecListFourD:$Vd, zero_reg,
+ addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
+defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
+ (VLD1d32Qwb_register VecListFourD:$Vd, zero_reg,
+ addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
+defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
+ (VLD1d64Qwb_register VecListFourD:$Vd, zero_reg,
+ addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
// VST1 requires a size suffix, but also accepts type specific variants.
// Store one D register.
@@ -5304,6 +5378,19 @@ defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
(VST1d32wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
(VST1d64wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
+// with writeback, register stride
+defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
+ (VST1d8wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm,
+ VecListOneD:$Vd, pred:$p)>;
+defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
+ (VST1d16wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm,
+ VecListOneD:$Vd, pred:$p)>;
+defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
+ (VST1d32wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm,
+ VecListOneD:$Vd, pred:$p)>;
+defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
+ (VST1d64wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm,
+ VecListOneD:$Vd, pred:$p)>;
// Store two D registers.
defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn",
@@ -5323,6 +5410,19 @@ defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
(VST1q32wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
(VST1q64wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
+// with writeback, register stride
+defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
+ (VST1q8wb_register zero_reg, addrmode6:$Rn,
+ rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>;
+defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
+ (VST1q16wb_register zero_reg, addrmode6:$Rn,
+ rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>;
+defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
+ (VST1q32wb_register zero_reg, addrmode6:$Rn,
+ rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>;
+defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
+ (VST1q64wb_register zero_reg, addrmode6:$Rn,
+ rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>;
// FIXME: The three and four register VST1 instructions haven't been moved
// to the VecList* encoding yet, so we can't do assembly parsing support
@@ -5346,3 +5446,19 @@ defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
// (VST1d32Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>;
//defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn",
// (VST1d64Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>;
+
+
+// VTRN instructions data type suffix aliases for more-specific types.
+defm : VFPDT8ReqInstAlias <"vtrn${p}", "$Dd, $Dm",
+ (VTRNd8 DPR:$Dd, DPR:$Dm, pred:$p)>;
+defm : VFPDT16ReqInstAlias<"vtrn${p}", "$Dd, $Dm",
+ (VTRNd16 DPR:$Dd, DPR:$Dm, pred:$p)>;
+defm : VFPDT32ReqInstAlias<"vtrn${p}", "$Dd, $Dm",
+ (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
+
+defm : VFPDT8ReqInstAlias <"vtrn${p}", "$Qd, $Qm",
+ (VTRNq8 QPR:$Qd, QPR:$Qm, pred:$p)>;
+defm : VFPDT16ReqInstAlias<"vtrn${p}", "$Qd, $Qm",
+ (VTRNq16 QPR:$Qd, QPR:$Qm, pred:$p)>;
+defm : VFPDT32ReqInstAlias<"vtrn${p}", "$Qd, $Qm",
+ (VTRNq32 QPR:$Qd, QPR:$Qm, pred:$p)>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 03077c0..6129fa3 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -4084,3 +4084,8 @@ def : t2InstAlias<"sxth${p} $Rd, $Rm$rot",
// for isel.
def : t2InstAlias<"mov${p} $Rd, $imm",
(t2MVNi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>;
+
+
+// Wide 'mul' encoding can be specified with only two operands.
+def : t2InstAlias<"mul${p} $Rn, $Rm",
+ (t2MUL rGPR:$Rn, rGPR:$Rn, rGPR:$Rm, pred:$p)>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 488c508..e420135 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -1172,3 +1172,35 @@ defm : VFPDT64InstAlias<"vldr${p}", "$Dd, $addr",
(VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
defm : VFPDT64InstAlias<"vstr${p}", "$Dd, $addr",
(VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
+
+// VMUL has a two-operand form (implied destination operand)
+def : VFP2InstAlias<"vmul${p}.f64 $Dn, $Dm",
+ (VMULD DPR:$Dn, DPR:$Dn, DPR:$Dm, pred:$p)>;
+def : VFP2InstAlias<"vmul${p}.f32 $Sn, $Sm",
+ (VMULS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>;
+// VADD has a two-operand form (implied destination operand)
+def : VFP2InstAlias<"vadd${p}.f64 $Dn, $Dm",
+ (VADDD DPR:$Dn, DPR:$Dn, DPR:$Dm, pred:$p)>;
+def : VFP2InstAlias<"vadd${p}.f32 $Sn, $Sm",
+ (VADDS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>;
+// VSUB has a two-operand form (implied destination operand)
+def : VFP2InstAlias<"vsub${p}.f64 $Dn, $Dm",
+ (VSUBD DPR:$Dn, DPR:$Dn, DPR:$Dm, pred:$p)>;
+def : VFP2InstAlias<"vsub${p}.f32 $Sn, $Sm",
+ (VSUBS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>;
+
+// VMOV can accept optional .f32/.f64 suffix.
+def : VFP2InstAlias<"vmov${p}.f32 $Rt, $Sn",
+ (VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.f32 $Sn, $Rt",
+ (VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>;
+
+def : VFP2InstAlias<"vmov${p}.f64 $Rt, $Rt2, $Dn",
+ (VMOVRRD GPR:$Rt, GPR:$Rt2, DPR:$Dn, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.f64 $Dn, $Rt, $Rt2",
+ (VMOVDRR DPR:$Dn, GPR:$Rt, GPR:$Rt2, pred:$p)>;
+
+// VMOVS doesn't need the .f32 to disambiguate from the NEON encoding the way
+// VMOVD does.
+def : VFP2InstAlias<"vmov${p} $Sd, $Sm",
+ (VMOVS SPR:$Sd, SPR:$Sm, pred:$p)>;
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index cf1432d..6cbb24b 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -38,8 +38,9 @@ extern "C" void LLVMInitializeARMTarget() {
///
ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
Subtarget(TT, CPU, FS),
JITInfo(),
InstrItins(Subtarget.getInstrItineraryData()) {
@@ -50,8 +51,9 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM), InstrInfo(Subtarget),
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM, OL), InstrInfo(Subtarget),
DataLayout(Subtarget.isAPCS_ABI() ?
std::string("e-p:32:32-f64:32:64-i64:32:64-"
"v128:32:128-v64:32:64-n32-S32") :
@@ -71,8 +73,9 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM),
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM, OL),
InstrInfo(Subtarget.hasThumb2()
? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget))
: ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
@@ -95,34 +98,30 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
: (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) {
}
-bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
- if (OptLevel != CodeGenOpt::None && EnableGlobalMerge)
+bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM) {
+ if (getOptLevel() != CodeGenOpt::None && EnableGlobalMerge)
PM.add(createGlobalMergePass(getTargetLowering()));
return false;
}
-bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
- PM.add(createARMISelDag(*this, OptLevel));
+bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM) {
+ PM.add(createARMISelDag(*this, getOptLevel()));
return false;
}
-bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM) {
// FIXME: temporarily disabling load / store optimization pass for Thumb1.
- if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
+ if (getOptLevel() != CodeGenOpt::None && !Subtarget.isThumb1Only())
PM.add(createARMLoadStoreOptimizationPass(true));
- if (OptLevel != CodeGenOpt::None && Subtarget.isCortexA9())
+ if (getOptLevel() != CodeGenOpt::None && Subtarget.isCortexA9())
PM.add(createMLxExpansionPass());
return true;
}
-bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM) {
// FIXME: temporarily disabling load / store optimization pass for Thumb1.
- if (OptLevel != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOpt::None) {
if (!Subtarget.isThumb1Only())
PM.add(createARMLoadStoreOptimizationPass());
if (Subtarget.hasNEON())
@@ -133,7 +132,7 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
// proper scheduling.
PM.add(createARMExpandPseudoPass());
- if (OptLevel != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOpt::None) {
if (!Subtarget.isThumb1Only())
PM.add(createIfConverterPass());
}
@@ -143,8 +142,7 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
return true;
}
-bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM) {
if (Subtarget.isThumb2() && !Subtarget.prefers32BitThumb())
PM.add(createThumb2SizeReductionPass());
@@ -153,7 +151,6 @@ bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
}
bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel,
JITCodeEmitter &JCE) {
// Machine code emitter pass for ARM.
PM.add(createARMJITCodeEmitterPass(*this, JCE));
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index c8c601c..a1f517b 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -41,7 +41,8 @@ private:
public:
ARMBaseTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
virtual ARMJITInfo *getJITInfo() { return &JITInfo; }
virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; }
@@ -50,13 +51,12 @@ public:
}
// Pass Pipeline Configuration
- virtual bool addPreISel(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addPreSched2(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
- JITCodeEmitter &MCE);
+ virtual bool addPreISel(PassManagerBase &PM);
+ virtual bool addInstSelector(PassManagerBase &PM);
+ virtual bool addPreRegAlloc(PassManagerBase &PM);
+ virtual bool addPreSched2(PassManagerBase &PM);
+ virtual bool addPreEmitPass(PassManagerBase &PM);
+ virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &MCE);
};
/// ARMTargetMachine - ARM target machine.
@@ -71,7 +71,8 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
public:
ARMTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
virtual const ARMRegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
@@ -111,7 +112,8 @@ class ThumbTargetMachine : public ARMBaseTargetMachine {
public:
ThumbTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
/// returns either Thumb1RegisterInfo or Thumb2RegisterInfo
virtual const ARMBaseRegisterInfo *getRegisterInfo() const {
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 1d66d12..bb83e5e 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -1946,18 +1946,15 @@ void ARMOperand::print(raw_ostream &OS) const {
break;
case k_ShiftedRegister:
OS << "<so_reg_reg "
- << RegShiftedReg.SrcReg
- << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(RegShiftedReg.ShiftImm))
- << ", " << RegShiftedReg.ShiftReg << ", "
- << ARM_AM::getSORegOffset(RegShiftedReg.ShiftImm)
- << ">";
+ << RegShiftedReg.SrcReg << " "
+ << ARM_AM::getShiftOpcStr(RegShiftedReg.ShiftTy)
+ << " " << RegShiftedReg.ShiftReg << ">";
break;
case k_ShiftedImmediate:
OS << "<so_reg_imm "
- << RegShiftedImm.SrcReg
- << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(RegShiftedImm.ShiftImm))
- << ", " << ARM_AM::getSORegOffset(RegShiftedImm.ShiftImm)
- << ">";
+ << RegShiftedImm.SrcReg << " "
+ << ARM_AM::getShiftOpcStr(RegShiftedImm.ShiftTy)
+ << " #" << RegShiftedImm.ShiftImm << ">";
break;
case k_RotateImmediate:
OS << "<ror " << " #" << (RotImm.Imm * 8) << ">";
@@ -2366,7 +2363,7 @@ static unsigned getDRegFromQReg(unsigned QReg) {
case ARM::Q6: return ARM::D12;
case ARM::Q7: return ARM::D14;
case ARM::Q8: return ARM::D16;
- case ARM::Q9: return ARM::D19;
+ case ARM::Q9: return ARM::D18;
case ARM::Q10: return ARM::D20;
case ARM::Q11: return ARM::D22;
case ARM::Q12: return ARM::D24;
@@ -2420,7 +2417,7 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
while (Parser.getTok().is(AsmToken::Comma) ||
Parser.getTok().is(AsmToken::Minus)) {
if (Parser.getTok().is(AsmToken::Minus)) {
- Parser.Lex(); // Eat the comma.
+ Parser.Lex(); // Eat the minus.
SMLoc EndLoc = Parser.getTok().getLoc();
int EndReg = tryParseRegister();
if (EndReg == -1)
@@ -2487,10 +2484,31 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// parse a vector register list
ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- if(Parser.getTok().isNot(AsmToken::LCurly))
+ SMLoc S = Parser.getTok().getLoc();
+ // As an extension (to match gas), support a plain D register or Q register
+ // (without encosing curly braces) as a single or double entry list,
+ // respectively.
+ if (Parser.getTok().is(AsmToken::Identifier)) {
+ int Reg = tryParseRegister();
+ if (Reg == -1)
+ return MatchOperand_NoMatch;
+ SMLoc E = Parser.getTok().getLoc();
+ if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) {
+ Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, S, E));
+ return MatchOperand_Success;
+ }
+ if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+ Reg = getDRegFromQReg(Reg);
+ Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, S, E));
+ return MatchOperand_Success;
+ }
+ Error(S, "vector register expected");
+ return MatchOperand_ParseFail;
+ }
+
+ if (Parser.getTok().isNot(AsmToken::LCurly))
return MatchOperand_NoMatch;
- SMLoc S = Parser.getTok().getLoc();
Parser.Lex(); // Eat '{' token.
SMLoc RegLoc = Parser.getTok().getLoc();
@@ -2509,7 +2527,39 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
++Count;
}
- while (Parser.getTok().is(AsmToken::Comma)) {
+ while (Parser.getTok().is(AsmToken::Comma) ||
+ Parser.getTok().is(AsmToken::Minus)) {
+ if (Parser.getTok().is(AsmToken::Minus)) {
+ Parser.Lex(); // Eat the minus.
+ SMLoc EndLoc = Parser.getTok().getLoc();
+ int EndReg = tryParseRegister();
+ if (EndReg == -1) {
+ Error(EndLoc, "register expected");
+ return MatchOperand_ParseFail;
+ }
+ // Allow Q regs and just interpret them as the two D sub-registers.
+ if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg))
+ EndReg = getDRegFromQReg(EndReg) + 1;
+ // If the register is the same as the start reg, there's nothing
+ // more to do.
+ if (Reg == EndReg)
+ continue;
+ // The register must be in the same register class as the first.
+ if (!ARMMCRegisterClasses[ARM::DPRRegClassID].contains(EndReg)) {
+ Error(EndLoc, "invalid register in register list");
+ return MatchOperand_ParseFail;
+ }
+ // Ranges must go from low to high.
+ if (Reg > EndReg) {
+ Error(EndLoc, "bad range in register list");
+ return MatchOperand_ParseFail;
+ }
+
+ // Add all the registers in the range to the register list.
+ Count += EndReg - Reg;
+ Reg = EndReg;
+ continue;
+ }
Parser.Lex(); // Eat the comma.
RegLoc = Parser.getTok().getLoc();
int OldReg = Reg;
@@ -3538,9 +3588,12 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
// If we have a '#', it's an immediate offset, else assume it's a register
- // offset.
- if (Parser.getTok().is(AsmToken::Hash)) {
- Parser.Lex(); // Eat the '#'.
+ // offset. Be friendly and also accept a plain integer (without a leading
+ // hash) for gas compatibility.
+ if (Parser.getTok().is(AsmToken::Hash) ||
+ Parser.getTok().is(AsmToken::Integer)) {
+ if (Parser.getTok().is(AsmToken::Hash))
+ Parser.Lex(); // Eat the '#'.
E = Parser.getTok().getLoc();
bool isNegative = getParser().getTok().is(AsmToken::Minus);
@@ -4098,6 +4151,7 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
// remove the cc_out operand.
(!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) ||
!isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) ||
+ !isARMLowRegister(static_cast<ARMOperand*>(Operands[5])->getReg()) ||
!inITBlock() ||
(static_cast<ARMOperand*>(Operands[3])->getReg() !=
static_cast<ARMOperand*>(Operands[5])->getReg() &&
@@ -4105,6 +4159,20 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
static_cast<ARMOperand*>(Operands[4])->getReg())))
return true;
+ // Also check the 'mul' syntax variant that doesn't specify an explicit
+ // destination register.
+ if (isThumbTwo() && Mnemonic == "mul" && Operands.size() == 5 &&
+ static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
+ static_cast<ARMOperand*>(Operands[3])->isReg() &&
+ static_cast<ARMOperand*>(Operands[4])->isReg() &&
+ // If the registers aren't low regs or the cc_out operand is zero
+ // outside of an IT block, we have to use the 32-bit encoding, so
+ // remove the cc_out operand.
+ (!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) ||
+ !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) ||
+ !inITBlock()))
+ return true;
+
// Register-register 'add/sub' for thumb does not have a cc_out operand
@@ -4542,12 +4610,37 @@ processInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
switch (Inst.getOpcode()) {
// Handle the MOV complex aliases.
+ case ARM::ASRr:
+ case ARM::LSRr:
+ case ARM::LSLr:
+ case ARM::RORr: {
+ ARM_AM::ShiftOpc ShiftTy;
+ switch(Inst.getOpcode()) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM::ASRr: ShiftTy = ARM_AM::asr; break;
+ case ARM::LSRr: ShiftTy = ARM_AM::lsr; break;
+ case ARM::LSLr: ShiftTy = ARM_AM::lsl; break;
+ case ARM::RORr: ShiftTy = ARM_AM::ror; break;
+ }
+ // A shift by zero is a plain MOVr, not a MOVsi.
+ unsigned Shifter = ARM_AM::getSORegOpc(ShiftTy, 0);
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVsr);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rd
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // Rm
+ TmpInst.addOperand(MCOperand::CreateImm(Shifter)); // Shift value and ty
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ TmpInst.addOperand(Inst.getOperand(5)); // cc_out
+ Inst = TmpInst;
+ return true;
+ }
case ARM::ASRi:
case ARM::LSRi:
case ARM::LSLi:
case ARM::RORi: {
ARM_AM::ShiftOpc ShiftTy;
- unsigned Amt = Inst.getOperand(2).getImm();
switch(Inst.getOpcode()) {
default: llvm_unreachable("unexpected opcode!");
case ARM::ASRi: ShiftTy = ARM_AM::asr; break;
@@ -4556,6 +4649,7 @@ processInstruction(MCInst &Inst,
case ARM::RORi: ShiftTy = ARM_AM::ror; break;
}
// A shift by zero is a plain MOVr, not a MOVsi.
+ unsigned Amt = Inst.getOperand(2).getImm();
unsigned Opc = Amt == 0 ? ARM::MOVr : ARM::MOVsi;
unsigned Shifter = ARM_AM::getSORegOpc(ShiftTy, Amt);
MCInst TmpInst;
@@ -4570,6 +4664,19 @@ processInstruction(MCInst &Inst,
Inst = TmpInst;
return true;
}
+ case ARM::RRXi: {
+ unsigned Shifter = ARM_AM::getSORegOpc(ARM_AM::rrx, 0);
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVsi);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rd
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(MCOperand::CreateImm(Shifter)); // Shift value and ty
+ TmpInst.addOperand(Inst.getOperand(2)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4)); // cc_out
+ Inst = TmpInst;
+ return true;
+ }
case ARM::t2LDMIA_UPD: {
// If this is a load of a single register, then we should use
// a post-indexed LDR instruction instead, per the ARM ARM.
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index baa55f2..511932e 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -62,8 +62,8 @@ add_llvm_library_dependencies(LLVMARMCodeGen
LLVMTarget
)
-# workaround for hanging compilation on MSVC10
-if( MSVC_VERSION EQUAL 1600 )
+# workaround for hanging compilation on MSVC9, 10
+if( MSVC_VERSION EQUAL 1600 OR MSVC_VERSION EQUAL 1500 )
set_property(
SOURCE ARMISelLowering.cpp
PROPERTY COMPILE_FLAGS "/Od"
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 0b9b5d0..ad250ab 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -179,8 +179,6 @@ static DecodeStatus DecodeAddrMode7Operand(llvm::MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeBranchImmInstruction(llvm::MCInst &Inst,unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVCVTImmOperand(llvm::MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Val,
@@ -251,6 +249,11 @@ static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeSwap(llvm::MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVCVTD(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVCVTQ(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn,
uint64_t Address, const void *Decoder);
@@ -1921,12 +1924,6 @@ DecodeBranchImmInstruction(llvm::MCInst &Inst, unsigned Insn,
}
-static DecodeStatus DecodeVCVTImmOperand(llvm::MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
- Inst.addOperand(MCOperand::CreateImm(64 - Val));
- return MCDisassembler::Success;
-}
-
static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -4085,3 +4082,60 @@ static DecodeStatus DecodeSwap(llvm::MCInst &Inst, unsigned Insn,
return S;
}
+
+static DecodeStatus DecodeVCVTD(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0);
+ Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4);
+ unsigned Vm = (fieldFromInstruction32(Insn, 0, 4) << 0);
+ Vm |= (fieldFromInstruction32(Insn, 5, 1) << 4);
+ unsigned imm = fieldFromInstruction32(Insn, 16, 6);
+ unsigned cmode = fieldFromInstruction32(Insn, 8, 4);
+
+ DecodeStatus S = MCDisassembler::Success;
+
+ // VMOVv2f32 is ambiguous with these decodings.
+ if (!(imm & 0x38) && cmode == 0xF) {
+ Inst.setOpcode(ARM::VMOVv2f32);
+ return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder);
+ }
+
+ if (!(imm & 0x20)) Check(S, MCDisassembler::SoftFail);
+
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Vd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Vm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(64 - imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeVCVTQ(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0);
+ Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4);
+ unsigned Vm = (fieldFromInstruction32(Insn, 0, 4) << 0);
+ Vm |= (fieldFromInstruction32(Insn, 5, 1) << 4);
+ unsigned imm = fieldFromInstruction32(Insn, 16, 6);
+ unsigned cmode = fieldFromInstruction32(Insn, 8, 4);
+
+ DecodeStatus S = MCDisassembler::Success;
+
+ // VMOVv4f32 is ambiguous with these decodings.
+ if (!(imm & 0x38) && cmode == 0xF) {
+ Inst.setOpcode(ARM::VMOVv4f32);
+ return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder);
+ }
+
+ if (!(imm & 0x20)) Check(S, MCDisassembler::SoftFail);
+
+ if (!Check(S, DecodeQPRRegisterClass(Inst, Vd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeQPRRegisterClass(Inst, Vm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(64 - imm));
+
+ return S;
+}
+
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 1bc585b..62d04c4 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -60,7 +60,7 @@ public:
// ARMFixupKinds.h.
//
// Name Offset (bits) Size (bits) Flags
-{ "fixup_arm_ldst_pcrel_12", 1, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_t2_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
{ "fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
@@ -68,7 +68,7 @@ public:
MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
{ "fixup_thumb_adr_pcrel_10",0, 8, MCFixupKindInfo::FKF_IsPCRel |
MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_arm_adr_pcrel_12", 1, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_t2_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
{ "fixup_arm_condbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
@@ -138,7 +138,7 @@ bool ARMAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
const uint16_t Thumb1_16bitNopEncoding = 0x46c0; // using MOV r8,r8
const uint16_t Thumb2_16bitNopEncoding = 0xbf00; // NOP
const uint32_t ARMv4_NopEncoding = 0xe1a0000; // using MOV r0,r0
- const uint32_t ARMv6T2_NopEncoding = 0xe3207800; // NOP
+ const uint32_t ARMv6T2_NopEncoding = 0xe320f000; // NOP
if (isThumb()) {
const uint16_t nopEncoding = hasNOP() ? Thumb2_16bitNopEncoding
: Thumb1_16bitNopEncoding;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 6042b11..e86f48e 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -129,14 +129,15 @@ static MCAsmInfo *createARMMCAsmInfo(const Target &T, StringRef TT) {
}
static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM) {
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
if (RM == Reloc::Default) {
Triple TheTriple(TT);
// Default relocation model on Darwin is PIC, not DynamicNoPIC.
RM = TheTriple.isOSDarwin() ? Reloc::PIC_ : Reloc::DynamicNoPIC;
}
- X->InitMCCodeGenInfo(RM, CM);
+ X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 218311d..de33bd6 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -18,7 +18,6 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/ADT/SmallVector.h"
#include "Thumb1InstrInfo.h"
@@ -60,8 +59,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(
- MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
MachineMemOperand::MOStore,
MFI.getObjectSize(FI),
MFI.getObjectAlignment(FI));
@@ -89,8 +87,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(
- MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
MachineMemOperand::MOLoad,
MFI.getObjectSize(FI),
MFI.getObjectAlignment(FI));
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index cf040c8..7ec3c0e 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -20,7 +20,6 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/CommandLine.h"
@@ -130,8 +129,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(
- MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
MachineMemOperand::MOStore,
MFI.getObjectSize(FI),
MFI.getObjectAlignment(FI));
@@ -158,8 +156,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(
- MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
MachineMemOperand::MOLoad,
MFI.getObjectSize(FI),
MFI.getObjectAlignment(FI));
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index 06e812b..8bce52c 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -3604,7 +3604,6 @@ void CWriter::visitExtractValueInst(ExtractValueInst &EVI) {
bool CTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
formatted_raw_ostream &o,
CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel,
bool DisableVerify) {
if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h
index 4f1ca97..ca346af 100644
--- a/lib/Target/CBackend/CTargetMachine.h
+++ b/lib/Target/CBackend/CTargetMachine.h
@@ -22,13 +22,13 @@ namespace llvm {
struct CTargetMachine : public TargetMachine {
CTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
: TargetMachine(T, TT, CPU, FS) {}
virtual bool addPassesToEmitFile(PassManagerBase &PM,
formatted_raw_ostream &Out,
CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel,
bool DisableVerify);
virtual const TargetData *getTargetData() const { return 0; }
diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
index d5af2a8..5ce14c9 100644
--- a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
+++ b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
@@ -62,11 +62,12 @@ static MCAsmInfo *createSPUMCAsmInfo(const Target &T, StringRef TT) {
}
static MCCodeGenInfo *createSPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM) {
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
// For the time being, use static relocations, since there's really no
// support for PIC yet.
- X->InitMCCodeGenInfo(Reloc::Static, CM);
+ X->InitMCCodeGenInfo(Reloc::Static, CM, OL);
return X;
}
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index 99837df..a851be3 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -22,7 +22,6 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Constants.h"
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index 93a7f6e..6940316 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -34,8 +34,9 @@ SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
Subtarget(TT, CPU, FS),
DataLayout(Subtarget.getTargetDataString()),
InstrInfo(*this),
@@ -49,8 +50,7 @@ SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT,
// Pass Pipeline Configuration
//===----------------------------------------------------------------------===//
-bool SPUTargetMachine::addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+bool SPUTargetMachine::addInstSelector(PassManagerBase &PM) {
// Install an instruction selector.
PM.add(createSPUISelDag(*this));
return false;
@@ -58,7 +58,7 @@ bool SPUTargetMachine::addInstSelector(PassManagerBase &PM,
// passes to run just before printing the assembly
bool SPUTargetMachine::
-addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
+addPreEmitPass(PassManagerBase &PM) {
// load the TCE instruction scheduler, if available via
// loaded plugins
typedef llvm::FunctionPass* (*BuilderFunc)(const char*);
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
index fffe77c..909f12e 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -40,7 +40,8 @@ class SPUTargetMachine : public LLVMTargetMachine {
public:
SPUTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
/// Return the subtarget implementation object
virtual const SPUSubtarget *getSubtargetImpl() const {
@@ -81,9 +82,8 @@ public:
}
// Pass Pipeline Configuration
- virtual bool addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel);
- virtual bool addPreEmitPass(PassManagerBase &, CodeGenOpt::Level);
+ virtual bool addInstSelector(PassManagerBase &PM);
+ virtual bool addPreEmitPass(PassManagerBase &);
};
} // end namespace llvm
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 394ea2b..efeb989 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -2065,7 +2065,6 @@ char CppWriter::ID = 0;
bool CPPTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
formatted_raw_ostream &o,
CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel,
bool DisableVerify) {
if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
PM.add(new CppWriter(o));
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index 287e537..a3613b4 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -24,13 +24,13 @@ class formatted_raw_ostream;
struct CPPTargetMachine : public TargetMachine {
CPPTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
: TargetMachine(T, TT, CPU, FS) {}
virtual bool addPassesToEmitFile(PassManagerBase &PM,
formatted_raw_ostream &Out,
CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel,
bool DisableVerify);
virtual const TargetData *getTargetData() const { return 0; }
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
index 7bff53e..4ad7bd6 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -34,8 +34,9 @@ extern "C" void LLVMInitializeMBlazeTarget() {
MBlazeTargetMachine::
MBlazeTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM):
- LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL):
+ LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
Subtarget(TT, CPU, FS),
DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"),
InstrInfo(*this),
@@ -46,8 +47,7 @@ MBlazeTargetMachine(const Target &T, StringRef TT,
// Install an instruction selector pass using
// the ISelDag to gen MBlaze code.
-bool MBlazeTargetMachine::addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+bool MBlazeTargetMachine::addInstSelector(PassManagerBase &PM) {
PM.add(createMBlazeISelDag(*this));
return false;
}
@@ -55,8 +55,7 @@ bool MBlazeTargetMachine::addInstSelector(PassManagerBase &PM,
// Implemented by targets that want to run passes immediately before
// machine code is emitted. return true if -print-machineinstrs should
// print out the code after the passes.
-bool MBlazeTargetMachine::addPreEmitPass(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+bool MBlazeTargetMachine::addPreEmitPass(PassManagerBase &PM) {
PM.add(createMBlazeDelaySlotFillerPass(*this));
return true;
}
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h
index c1bc08a..1c1aa53 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.h
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -43,7 +43,8 @@ namespace llvm {
public:
MBlazeTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
virtual const MBlazeInstrInfo *getInstrInfo() const
{ return &InstrInfo; }
@@ -77,8 +78,8 @@ namespace llvm {
}
// Pass Pipeline Configuration
- virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level Opt);
- virtual bool addPreEmitPass(PassManagerBase &PM,CodeGenOpt::Level Opt);
+ virtual bool addInstSelector(PassManagerBase &PM);
+ virtual bool addPreEmitPass(PassManagerBase &PM);
};
} // End llvm namespace
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
index 43ae281..a3a5cf4 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
@@ -62,13 +62,14 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
}
static MCCodeGenInfo *createMBlazeMCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM) {
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
if (RM == Reloc::Default)
RM = Reloc::Static;
if (CM == CodeModel::Default)
CM = CodeModel::Small;
- X->InitMCCodeGenInfo(RM, CM);
+ X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
index fda70b8..0d532e3 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -51,9 +51,10 @@ static MCSubtargetInfo *createMSP430MCSubtargetInfo(StringRef TT, StringRef CPU,
}
static MCCodeGenInfo *createMSP430MCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM) {
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
- X->InitMCCodeGenInfo(RM, CM);
+ X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 9daeb2a..5c94137 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -29,7 +29,6 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/ValueTypes.h"
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index ffd4318..81f766e 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -19,7 +19,6 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
@@ -43,8 +42,7 @@ void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(
- MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
MachineMemOperand::MOStore,
MFI.getObjectSize(FrameIdx),
MFI.getObjectAlignment(FrameIdx));
@@ -72,8 +70,7 @@ void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(
- MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
MachineMemOperand::MOLoad,
MFI.getObjectSize(FrameIdx),
MFI.getObjectAlignment(FrameIdx));
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index 4dd8933..fe185fb 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -28,8 +28,9 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T,
StringRef TT,
StringRef CPU,
StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
Subtarget(TT, CPU, FS),
// FIXME: Check TargetData string.
DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
@@ -37,15 +38,13 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T,
FrameLowering(Subtarget) { }
-bool MSP430TargetMachine::addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+bool MSP430TargetMachine::addInstSelector(PassManagerBase &PM) {
// Install an instruction selector.
- PM.add(createMSP430ISelDag(*this, OptLevel));
+ PM.add(createMSP430ISelDag(*this, getOptLevel()));
return false;
}
-bool MSP430TargetMachine::addPreEmitPass(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+bool MSP430TargetMachine::addPreEmitPass(PassManagerBase &PM) {
// Must run branch selection immediately preceding the asm printer.
PM.add(createMSP430BranchSelectionPass());
return false;
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index eb483dc..4fb060f 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -40,7 +40,8 @@ class MSP430TargetMachine : public LLVMTargetMachine {
public:
MSP430TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
virtual const TargetFrameLowering *getFrameLowering() const {
return &FrameLowering;
@@ -61,8 +62,8 @@ public:
return &TSInfo;
}
- virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addInstSelector(PassManagerBase &PM);
+ virtual bool addPreEmitPass(PassManagerBase &PM);
}; // MSP430TargetMachine.
} // end namespace llvm
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index 53656d4d..ac9cfc0 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -22,7 +22,6 @@ add_llvm_target(MipsCodeGen
MipsISelLowering.cpp
MipsFrameLowering.cpp
MipsMCInstLower.cpp
- MipsMCSymbolRefExpr.cpp
MipsRegisterInfo.cpp
MipsSubtarget.cpp
MipsTargetMachine.cpp
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 4f017d0..7bc5fe4 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -58,6 +58,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
switch (Kind) {
default:
break;
+ case FK_GPRel_4:
case FK_Data_4:
Value &= 0xffffffff;
break;
@@ -68,6 +69,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case Mips::fixup_Mips_PC16:
Value &= 0x0000ffff;
break;
+ case Mips::fixup_Mips_HI16:
+ Value >>= 16;
+ break;
}
return Value;
@@ -104,15 +108,17 @@ public:
llvm_unreachable("Unknown fixup kind!");
case Mips::fixup_Mips_GOT16: // This will be fixed up at link time
break;
+ case FK_GPRel_4:
case FK_Data_4:
case Mips::fixup_Mips_26:
case Mips::fixup_Mips_LO16:
case Mips::fixup_Mips_PC16:
+ case Mips::fixup_Mips_HI16:
// For each byte of the fragment that the fixup touches, mask i
// the fixup value. The Value has been "split up" into the appr
// bitfields above.
for (unsigned i = 0; i != 4; ++i) // FIXME - Need to support 2 and 8 bytes
- Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+ Data[Offset + i] += uint8_t((Value >> (i * 8)) & 0xff);
break;
}
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 1115fec..0c3cbb3 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -173,11 +173,21 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
} else if (MO.isExpr()) {
const MCExpr *Expr = MO.getExpr();
MCExpr::ExprKind Kind = Expr->getKind();
+ unsigned Ret = 0;
+
+ if (Kind == MCExpr::Binary) {
+ const MCBinaryExpr *BE = static_cast<const MCBinaryExpr*>(Expr);
+ Expr = BE->getLHS();
+ Kind = Expr->getKind();
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(BE->getRHS());
+ assert((Kind == MCExpr::SymbolRef) && CE &&
+ "Binary expression must be sym+const.");
+ Ret = CE->getValue();
+ }
+
if (Kind == MCExpr::SymbolRef) {
- Mips::Fixups FixupKind = Mips::fixup_Mips_NONE;
- MCSymbolRefExpr::VariantKind SymRefKind =
- cast<MCSymbolRefExpr>(Expr)->getKind();
- switch(SymRefKind) {
+ Mips::Fixups FixupKind;
+ switch(cast<MCSymbolRefExpr>(Expr)->getKind()) {
case MCSymbolRefExpr::VK_Mips_GPREL:
FixupKind = Mips::fixup_Mips_GPREL16;
break;
@@ -206,12 +216,12 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
FixupKind = Mips::fixup_Mips_TPREL_LO;
break;
default:
- return 0;
+ return Ret;
} // switch
Fixups.push_back(MCFixup::Create(0, Expr, MCFixupKind(FixupKind)));
} // if SymbolRef
// All of the information is in the fixup.
- return 0;
+ return Ret;
}
llvm_unreachable("Unable to encode MCOperand!");
// Not reached
@@ -234,15 +244,22 @@ MipsMCCodeEmitter::getMemEncoding(const MCInst &MI, unsigned OpNo,
unsigned
MipsMCCodeEmitter::getSizeExtEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {
- // FIXME: implement
- return 0;
+ assert(MI.getOperand(OpNo).isImm());
+ unsigned szEncoding = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups);
+ return szEncoding - 1;
}
+// FIXME: should be called getMSBEncoding
+//
unsigned
MipsMCCodeEmitter::getSizeInsEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {
- // FIXME: implement
- return 0;
+ assert(MI.getOperand(OpNo-1).isImm());
+ assert(MI.getOperand(OpNo).isImm());
+ unsigned pos = getMachineOpValue(MI, MI.getOperand(OpNo-1), Fixups);
+ unsigned sz = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups);
+
+ return pos + sz - 1;
}
#include "MipsGenMCCodeEmitter.inc"
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index e6040e4..1fec88a 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -63,11 +63,12 @@ static MCAsmInfo *createMipsMCAsmInfo(const Target &T, StringRef TT) {
}
static MCCodeGenInfo *createMipsMCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM) {
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
if (RM == Reloc::Default)
RM = Reloc::PIC_;
- X->InitMCCodeGenInfo(RM, CM);
+ X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 3c97241..b0fb4fa 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -175,6 +175,7 @@ def SCD : SCBase<0x3c, "scd", CPU64Regs, mem>, Requires<[NotN64]>;
def SCD_P8 : SCBase<0x3c, "scd", CPU64Regs, mem64>, Requires<[IsN64]>;
/// Jump and Branch Instructions
+def JR64 : JumpFR<0x00, 0x08, "jr", CPU64Regs>;
def JAL64 : JumpLink64<0x03, "jal">;
def JALR64 : JumpLinkReg64<0x00, 0x09, "jalr">;
def BEQ64 : CBranch<0x04, "beq", seteq, CPU64Regs>;
@@ -231,7 +232,24 @@ let Predicates = [IsN64] in {
}
// hi/lo relocs
-def : Pat<(i64 (MipsLo tglobaladdr:$in)), (DADDiu ZERO_64, tglobaladdr:$in)>;
+def : Pat<(MipsHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>;
+def : Pat<(MipsHi tblockaddress:$in), (LUi64 tblockaddress:$in)>;
+def : Pat<(MipsHi tjumptable:$in), (LUi64 tjumptable:$in)>;
+def : Pat<(MipsHi tconstpool:$in), (LUi64 tconstpool:$in)>;
+
+def : Pat<(MipsLo tglobaladdr:$in), (DADDiu ZERO_64, tglobaladdr:$in)>;
+def : Pat<(MipsLo tblockaddress:$in), (DADDiu ZERO_64, tblockaddress:$in)>;
+def : Pat<(MipsLo tjumptable:$in), (DADDiu ZERO_64, tjumptable:$in)>;
+def : Pat<(MipsLo tconstpool:$in), (DADDiu ZERO_64, tconstpool:$in)>;
+
+def : Pat<(add CPU64Regs:$hi, (MipsLo tglobaladdr:$lo)),
+ (DADDiu CPU64Regs:$hi, tglobaladdr:$lo)>;
+def : Pat<(add CPU64Regs:$hi, (MipsLo tblockaddress:$lo)),
+ (DADDiu CPU64Regs:$hi, tblockaddress:$lo)>;
+def : Pat<(add CPU64Regs:$hi, (MipsLo tjumptable:$lo)),
+ (DADDiu CPU64Regs:$hi, tjumptable:$lo)>;
+def : Pat<(add CPU64Regs:$hi, (MipsLo tconstpool:$lo)),
+ (DADDiu CPU64Regs:$hi, tconstpool:$lo)>;
defm : BrcondPats<CPU64Regs, BEQ64, BNE64, SLT64, SLTu64, SLTi64, SLTiu64,
ZERO_64>;
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 186a5e3..d27e3ab 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -18,7 +18,6 @@
#include "MipsInstrInfo.h"
#include "MipsMachineFunction.h"
#include "MipsMCInstLower.h"
-#include "MipsMCSymbolRefExpr.h"
#include "InstPrinter/MipsInstPrinter.h"
#include "MCTargetDesc/MipsBaseInfo.h"
#include "llvm/ADT/SmallString.h"
@@ -79,12 +78,19 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Enclose unaligned load or store with .macro & .nomacro directives.
if (isUnalignedLoadStore(Opc)) {
- MCInst Directive;
- Directive.setOpcode(Mips::MACRO);
- OutStreamer.EmitInstruction(Directive);
- OutStreamer.EmitInstruction(TmpInst0);
- Directive.setOpcode(Mips::NOMACRO);
- OutStreamer.EmitInstruction(Directive);
+ if (OutStreamer.hasRawTextSupport()) {
+ MCInst Directive;
+ Directive.setOpcode(Mips::MACRO);
+ OutStreamer.EmitInstruction(Directive);
+ OutStreamer.EmitInstruction(TmpInst0);
+ Directive.setOpcode(Mips::NOMACRO);
+ OutStreamer.EmitInstruction(Directive);
+ } else {
+ MCInstLowering.LowerUnalignedLoadStore(MI, MCInsts);
+ for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); I
+ != MCInsts.end(); ++I)
+ OutStreamer.EmitInstruction(*I);
+ }
return;
}
@@ -92,8 +98,8 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Lower CPLOAD and CPRESTORE
if (Opc == Mips::CPLOAD) {
MCInstLowering.LowerCPLOAD(MI, MCInsts);
- for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin();
- I != MCInsts.end(); ++I)
+ for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); I
+ != MCInsts.end(); ++I)
OutStreamer.EmitInstruction(*I);
return;
}
@@ -102,7 +108,7 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCInstLowering.LowerCPRESTORE(MI, TmpInst0);
OutStreamer.EmitInstruction(TmpInst0);
return;
- }
+ }
}
OutStreamer.EmitInstruction(TmpInst0);
diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h
index 16461ff..f0c6626 100644
--- a/lib/Target/Mips/MipsAsmPrinter.h
+++ b/lib/Target/Mips/MipsAsmPrinter.h
@@ -27,9 +27,11 @@ class MachineBasicBlock;
class Module;
class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter {
- const MipsSubtarget *Subtarget;
-
+
public:
+
+ const MipsSubtarget *Subtarget;
+
explicit MipsAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
: AsmPrinter(TM, Streamer) {
Subtarget = &TM.getSubtarget<MipsSubtarget>();
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index 19bb1a5..36aef99 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -152,6 +152,9 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
bool ATUsed;
unsigned GP = STI.isABI_N64() ? Mips::GP_64 : Mips::GP;
unsigned T9 = STI.isABI_N64() ? Mips::T9_64 : Mips::T9;
+ unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
+ unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
+ unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
unsigned LUi = STI.isABI_N64() ? Mips::LUi64 : Mips::LUi;
@@ -169,13 +172,14 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
MFI->setStackSize(StackSize);
BuildMI(MBB, MBBI, dl, TII.get(Mips::NOREORDER));
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO));
// Emit instructions that set $gp using the the value of $t9.
// O32 uses the directive .cpload while N32/64 requires three instructions to
// do this.
// TODO: Do not emit these instructions if no instructions use $gp.
if (isPIC && STI.isABI_O32())
- BuildMI(MBB, MBBI, dl, TII.get(Mips::CPLOAD))
+ BuildMI(MBB, llvm::prior(MBBI), dl, TII.get(Mips::CPLOAD))
.addReg(RegInfo->getPICCallReg());
else if (STI.isABI_N64() || (isPIC && STI.isABI_N32())) {
// lui $28,%hi(%neg(%gp_rel(fname)))
@@ -189,8 +193,6 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
.addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
}
- BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO));
-
// No need to allocate space on the stack.
if (StackSize == 0 && !MFI->adjustsStack()) return;
@@ -199,10 +201,8 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineLocation DstML, SrcML;
// Adjust stack : addi sp, sp, (-imm)
- ATUsed = expandRegLargeImmPair(Mips::SP, -StackSize, NewReg, NewImm, MBB,
- MBBI);
- BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP)
- .addReg(NewReg).addImm(NewImm);
+ ATUsed = expandRegLargeImmPair(SP, -StackSize, NewReg, NewImm, MBB, MBBI);
+ BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(NewReg).addImm(NewImm);
// FIXME: change this when mips goes MC".
if (ATUsed)
@@ -262,14 +262,13 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
// if framepointer enabled, set it to point to the stack pointer.
if (hasFP(MF)) {
// Insert instruction "move $fp, $sp" at this location.
- BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::FP)
- .addReg(Mips::SP).addReg(Mips::ZERO);
+ BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO);
// emit ".cfi_def_cfa_register $fp"
MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol();
BuildMI(MBB, MBBI, dl,
TII.get(TargetOpcode::PROLOG_LABEL)).addSym(SetFPLabel);
- DstML = MachineLocation(Mips::FP);
+ DstML = MachineLocation(FP);
SrcML = MachineLocation(MachineLocation::VirtualFP);
Moves.push_back(MachineMove(SetFPLabel, DstML, SrcML));
}
@@ -293,6 +292,11 @@ void MipsFrameLowering::emitEpilogue(MachineFunction &MF,
const MipsInstrInfo &TII =
*static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
DebugLoc dl = MBBI->getDebugLoc();
+ unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
+ unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
+ unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
+ unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+ unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
// Get the number of bytes from FrameInfo
unsigned StackSize = MFI->getStackSize();
@@ -310,16 +314,13 @@ void MipsFrameLowering::emitEpilogue(MachineFunction &MF,
--I;
// Insert instruction "move $sp, $fp" at this location.
- BuildMI(MBB, I, dl, TII.get(Mips::ADDu), Mips::SP)
- .addReg(Mips::FP).addReg(Mips::ZERO);
+ BuildMI(MBB, I, dl, TII.get(ADDu), SP).addReg(FP).addReg(ZERO);
}
// adjust stack : insert addi sp, sp, (imm)
if (StackSize) {
- ATUsed = expandRegLargeImmPair(Mips::SP, StackSize, NewReg, NewImm, MBB,
- MBBI);
- BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP)
- .addReg(NewReg).addImm(NewImm);
+ ATUsed = expandRegLargeImmPair(SP, StackSize, NewReg, NewImm, MBB, MBBI);
+ BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(NewReg).addImm(NewImm);
// FIXME: change this when mips goes MC".
if (ATUsed)
@@ -331,13 +332,15 @@ void MipsFrameLowering::
processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
MachineRegisterInfo& MRI = MF.getRegInfo();
+ unsigned RA = STI.isABI_N64() ? Mips::RA_64 : Mips::RA;
+ unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
// FIXME: remove this code if register allocator can correctly mark
// $fp and $ra used or unused.
// Mark $fp and $ra as used or unused.
if (hasFP(MF))
- MRI.setPhysRegUsed(Mips::FP);
+ MRI.setPhysRegUsed(FP);
// The register allocator might determine $ra is used after seeing
// instruction "jr $ra", but we do not want PrologEpilogInserter to insert
@@ -345,7 +348,7 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// To correct this, $ra is explicitly marked unused if there is no
// function call.
if (MF.getFrameInfo()->hasCalls())
- MRI.setPhysRegUsed(Mips::RA);
+ MRI.setPhysRegUsed(RA);
else
- MRI.setPhysRegUnused(Mips::RA);
+ MRI.setPhysRegUnused(RA);
}
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index b595f03..b5a15cf 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -127,9 +127,11 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
setOperationAction(ISD::JumpTable, MVT::i32, Custom);
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::SELECT, MVT::i32, Custom);
@@ -1506,7 +1508,7 @@ SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op,
// FIXME there isn't actually debug info here
DebugLoc dl = Op.getDebugLoc();
- if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) {
// %hi/%lo relocation
SDValue BAHi = DAG.getBlockAddress(BA, MVT::i32, true,
MipsII::MO_ABS_HI);
@@ -1517,16 +1519,17 @@ SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op,
return DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo);
}
- SDValue BAGOTOffset = DAG.getBlockAddress(BA, MVT::i32, true,
- MipsII::MO_GOT);
- BAGOTOffset = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, BAGOTOffset);
- SDValue BALOOffset = DAG.getBlockAddress(BA, MVT::i32, true,
- MipsII::MO_ABS_LO);
- SDValue Load = DAG.getLoad(MVT::i32, dl,
+ EVT ValTy = Op.getValueType();
+ unsigned GOTFlag = IsN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
+ unsigned OFSTFlag = IsN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO;
+ SDValue BAGOTOffset = DAG.getBlockAddress(BA, ValTy, true, GOTFlag);
+ BAGOTOffset = DAG.getNode(MipsISD::WrapperPIC, dl, ValTy, BAGOTOffset);
+ SDValue BALOOffset = DAG.getBlockAddress(BA, ValTy, true, OFSTFlag);
+ SDValue Load = DAG.getLoad(ValTy, dl,
DAG.getEntryNode(), BAGOTOffset,
MachinePointerInfo(), false, false, false, 0);
- SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, BALOOffset);
- return DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, ValTy, BALOOffset);
+ return DAG.getNode(ISD::ADD, dl, ValTy, Load, Lo);
}
SDValue MipsTargetLowering::
@@ -1649,16 +1652,19 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CPLo);
ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
} else {
- SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
- N->getOffset(), MipsII::MO_GOT);
- CP = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, CP);
- SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(),
+ EVT ValTy = Op.getValueType();
+ unsigned GOTFlag = IsN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
+ unsigned OFSTFlag = IsN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO;
+ SDValue CP = DAG.getTargetConstantPool(C, ValTy, N->getAlignment(),
+ N->getOffset(), GOTFlag);
+ CP = DAG.getNode(MipsISD::WrapperPIC, dl, ValTy, CP);
+ SDValue Load = DAG.getLoad(ValTy, dl, DAG.getEntryNode(),
CP, MachinePointerInfo::getConstantPool(),
false, false, false, 0);
- SDValue CPLo = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
- N->getOffset(), MipsII::MO_ABS_LO);
- SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CPLo);
- ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo);
+ SDValue CPLo = DAG.getTargetConstantPool(C, ValTy, N->getAlignment(),
+ N->getOffset(), OFSTFlag);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, ValTy, CPLo);
+ ResNode = DAG.getNode(ISD::ADD, dl, ValTy, Load, Lo);
}
return ResNode;
@@ -2063,6 +2069,7 @@ PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
bool IsRegLoc = VA.isRegLoc();
unsigned Offset = 0; // Offset in # of bytes from the beginning of struct.
unsigned LocMemOffset = 0;
+ unsigned MemCpySize = ByValSize;
if (!IsRegLoc)
LocMemOffset = VA.getLocMemOffset();
@@ -2082,9 +2089,13 @@ PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
RegsToPass.push_back(std::make_pair(*Reg, LoadVal));
}
+ // Return if the struct has been fully copied.
+ if (!(MemCpySize = ByValSize - Offset))
+ return;
+
// If there is an argument register available, copy the remainder of the
// byval argument with sub-doubleword loads and shifts.
- if ((Reg != RegEnd) && (ByValSize != Offset)) {
+ if (Reg != RegEnd) {
assert((ByValSize < Offset + 8) &&
"Size of the remainder should be smaller than 8-byte.");
SDValue Val;
@@ -2119,19 +2130,18 @@ PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
}
}
- unsigned MemCpySize = ByValSize - Offset;
- if (MemCpySize) {
- // Create a fixed object on stack at offset LocMemOffset and copy
- // remainder of byval arg to it with memcpy.
- SDValue Src = DAG.getNode(ISD::ADD, dl, PtrTy, Arg,
- DAG.getConstant(Offset, PtrTy));
- LastFI = MFI->CreateFixedObject(MemCpySize, LocMemOffset, true);
- SDValue Dst = DAG.getFrameIndex(LastFI, PtrTy);
- ByValChain = DAG.getMemcpy(ByValChain, dl, Dst, Src,
- DAG.getConstant(MemCpySize, PtrTy), Alignment,
- /*isVolatile=*/false, /*AlwaysInline=*/false,
- MachinePointerInfo(0), MachinePointerInfo(0));
- }
+ assert(MemCpySize && "MemCpySize must not be zero.");
+
+ // Create a fixed object on stack at offset LocMemOffset and copy
+ // remainder of byval arg to it with memcpy.
+ SDValue Src = DAG.getNode(ISD::ADD, dl, PtrTy, Arg,
+ DAG.getConstant(Offset, PtrTy));
+ LastFI = MFI->CreateFixedObject(MemCpySize, LocMemOffset, true);
+ SDValue Dst = DAG.getFrameIndex(LastFI, PtrTy);
+ ByValChain = DAG.getMemcpy(ByValChain, dl, Dst, Src,
+ DAG.getConstant(MemCpySize, PtrTy), Alignment,
+ /*isVolatile=*/false, /*AlwaysInline=*/false,
+ MachinePointerInfo(0), MachinePointerInfo(0));
}
/// LowerCall - functions arguments are copied from virtual regs to
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 5dca9b6..0ae94ab 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -145,7 +145,9 @@ def brtarget : Operand<OtherVT> {
let EncoderMethod = "getBranchTargetOpValue";
let OperandType = "OPERAND_PCREL";
}
-def calltarget : Operand<i32>;
+def calltarget : Operand<iPTR> {
+ let EncoderMethod = "getJumpTargetOpValue";
+}
def calltarget64: Operand<i64>;
def simm16 : Operand<i32>;
def simm16_64 : Operand<i64>;
@@ -378,6 +380,22 @@ class StoreM<bits<6> op, string instr_asm, PatFrag OpNode, RegisterClass RC,
let isPseudo = Pseudo;
}
+// Memory Load/Store
+let canFoldAsLoad = 1 in
+class LoadX<bits<6> op, RegisterClass RC,
+ Operand MemOpnd>:
+ FMem<op, (outs RC:$rt), (ins MemOpnd:$addr),
+ "",
+ [], IILoad> {
+}
+
+class StoreX<bits<6> op, RegisterClass RC,
+ Operand MemOpnd>:
+ FMem<op, (outs), (ins RC:$rt, MemOpnd:$addr),
+ "",
+ [], IIStore> {
+}
+
// 32-bit load.
multiclass LoadM32<bits<6> op, string instr_asm, PatFrag OpNode,
bit Pseudo = 0> {
@@ -396,6 +414,13 @@ multiclass LoadM64<bits<6> op, string instr_asm, PatFrag OpNode,
Requires<[IsN64]>;
}
+// 32-bit load.
+multiclass LoadX32<bits<6> op> {
+ def #NAME# : LoadX<op, CPURegs, mem>,
+ Requires<[NotN64]>;
+ def _P8 : LoadX<op, CPURegs, mem64>,
+ Requires<[IsN64]>;
+}
// 32-bit store.
multiclass StoreM32<bits<6> op, string instr_asm, PatFrag OpNode,
bit Pseudo = 0> {
@@ -414,6 +439,14 @@ multiclass StoreM64<bits<6> op, string instr_asm, PatFrag OpNode,
Requires<[IsN64]>;
}
+// 32-bit store.
+multiclass StoreX32<bits<6> op> {
+ def #NAME# : StoreX<op, CPURegs, mem>,
+ Requires<[NotN64]>;
+ def _P8 : StoreX<op, CPURegs, mem64>,
+ Requires<[IsN64]>;
+}
+
// Conditional Branch
class CBranch<bits<6> op, string instr_asm, PatFrag cond_op, RegisterClass RC>:
CBranchBase<op, (outs), (ins RC:$rs, RC:$rt, brtarget:$imm16),
@@ -458,10 +491,11 @@ class JumpFJ<bits<6> op, string instr_asm>:
FJ<op, (outs), (ins jmptarget:$target),
!strconcat(instr_asm, "\t$target"), [(br bb:$target)], IIBranch>;
-let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1 in
-class JumpFR<bits<6> op, bits<6> func, string instr_asm>:
- FR<op, func, (outs), (ins CPURegs:$rs),
- !strconcat(instr_asm, "\t$rs"), [(brind CPURegs:$rs)], IIBranch> {
+let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1,
+ isIndirectBranch = 1 in
+class JumpFR<bits<6> op, bits<6> func, string instr_asm, RegisterClass RC>:
+ FR<op, func, (outs), (ins RC:$rs),
+ !strconcat(instr_asm, "\t$rs"), [(brind RC:$rs)], IIBranch> {
let rt = 0;
let rd = 0;
let shamt = 0;
@@ -760,6 +794,12 @@ defm ULW : LoadM32<0x23, "ulw", load_u, 1>;
defm USH : StoreM32<0x29, "ush", truncstorei16_u, 1>;
defm USW : StoreM32<0x2b, "usw", store_u, 1>;
+/// Primitives for unaligned
+defm LWL : LoadX32<0x22>;
+defm LWR : LoadX32<0x26>;
+defm SWL : StoreX32<0x2A>;
+defm SWR : StoreX32<0x2E>;
+
let hasSideEffects = 1 in
def SYNC : MipsInst<(outs), (ins i32imm:$stype), "sync $stype",
[(MipsSync imm:$stype)], NoItinerary, FrmOther>
@@ -779,8 +819,7 @@ def SC_P8 : SCBase<0x38, "sc", CPURegs, mem64>, Requires<[IsN64]>;
/// Jump and Branch Instructions
def J : JumpFJ<0x02, "j">;
-let isIndirectBranch = 1 in
- def JR : JumpFR<0x00, 0x08, "jr">;
+def JR : JumpFR<0x00, 0x08, "jr", CPURegs>;
def JAL : JumpLink<0x03, "jal">;
def JALR : JumpLinkReg<0x00, 0x09, "jalr">;
def BEQ : CBranch<0x04, "beq", seteq, CPURegs>;
@@ -898,20 +937,20 @@ def : Pat<(MipsJmpLink (i32 texternalsym:$dst)),
// hi/lo relocs
def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>;
def : Pat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>;
+def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>;
+def : Pat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>;
+
def : Pat<(MipsLo tglobaladdr:$in), (ADDiu ZERO, tglobaladdr:$in)>;
def : Pat<(MipsLo tblockaddress:$in), (ADDiu ZERO, tblockaddress:$in)>;
+def : Pat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>;
+def : Pat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>;
+
def : Pat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)),
(ADDiu CPURegs:$hi, tglobaladdr:$lo)>;
def : Pat<(add CPURegs:$hi, (MipsLo tblockaddress:$lo)),
(ADDiu CPURegs:$hi, tblockaddress:$lo)>;
-
-def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>;
-def : Pat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>;
def : Pat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)),
(ADDiu CPURegs:$hi, tjumptable:$lo)>;
-
-def : Pat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>;
-def : Pat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>;
def : Pat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)),
(ADDiu CPURegs:$hi, tconstpool:$lo)>;
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
index 1fab52c..6fc2af1 100644
--- a/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -23,6 +23,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Target/Mangler.h"
+
using namespace llvm;
MipsMCInstLower::MipsMCInstLower(Mangler *mang, const MachineFunction &mf,
@@ -55,34 +56,34 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
}
switch (MOTy) {
- case MachineOperand::MO_MachineBasicBlock:
- Symbol = MO.getMBB()->getSymbol();
- break;
-
- case MachineOperand::MO_GlobalAddress:
- Symbol = Mang->getSymbol(MO.getGlobal());
- break;
-
- case MachineOperand::MO_BlockAddress:
- Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress());
- break;
-
- case MachineOperand::MO_ExternalSymbol:
- Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName());
- break;
-
- case MachineOperand::MO_JumpTableIndex:
- Symbol = AsmPrinter.GetJTISymbol(MO.getIndex());
- break;
-
- case MachineOperand::MO_ConstantPoolIndex:
- Symbol = AsmPrinter.GetCPISymbol(MO.getIndex());
- if (MO.getOffset())
- Offset += MO.getOffset();
- break;
-
- default:
- llvm_unreachable("<unknown operand type>");
+ case MachineOperand::MO_MachineBasicBlock:
+ Symbol = MO.getMBB()->getSymbol();
+ break;
+
+ case MachineOperand::MO_GlobalAddress:
+ Symbol = Mang->getSymbol(MO.getGlobal());
+ break;
+
+ case MachineOperand::MO_BlockAddress:
+ Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress());
+ break;
+
+ case MachineOperand::MO_ExternalSymbol:
+ Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName());
+ break;
+
+ case MachineOperand::MO_JumpTableIndex:
+ Symbol = AsmPrinter.GetJTISymbol(MO.getIndex());
+ break;
+
+ case MachineOperand::MO_ConstantPoolIndex:
+ Symbol = AsmPrinter.GetCPISymbol(MO.getIndex());
+ if (MO.getOffset())
+ Offset += MO.getOffset();
+ break;
+
+ default:
+ llvm_unreachable("<unknown operand type>");
}
const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Symbol, Kind, Ctx);
@@ -145,8 +146,8 @@ void MipsMCInstLower::LowerCPRESTORE(const MachineInstr *MI, MCInst &OutMI) {
OutMI.addOperand(MCOperand::CreateImm(MO.getImm()));
}
-
-MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO) const {
+MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO,
+ unsigned offset) const {
MachineOperandType MOTy = MO.getType();
switch (MOTy) {
@@ -158,14 +159,14 @@ MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO) const {
if (MO.isImplicit()) break;
return MCOperand::CreateReg(MO.getReg());
case MachineOperand::MO_Immediate:
- return MCOperand::CreateImm(MO.getImm());
+ return MCOperand::CreateImm(MO.getImm() + offset);
case MachineOperand::MO_MachineBasicBlock:
case MachineOperand::MO_GlobalAddress:
case MachineOperand::MO_ExternalSymbol:
case MachineOperand::MO_JumpTableIndex:
case MachineOperand::MO_ConstantPoolIndex:
case MachineOperand::MO_BlockAddress:
- return LowerSymbolOperand(MO, MOTy, 0);
+ return LowerSymbolOperand(MO, MOTy, offset);
}
return MCOperand();
@@ -182,3 +183,116 @@ void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.addOperand(MCOp);
}
}
+
+void MipsMCInstLower::LowerUnalignedLoadStore(const MachineInstr *MI,
+ SmallVector<MCInst,
+ 4>& MCInsts) {
+ unsigned Opc = MI->getOpcode();
+ MCInst instr1, instr2, instr3, move;
+
+ bool two_instructions = false;
+
+ assert(MI->getNumOperands() == 3);
+ assert(MI->getOperand(0).isReg());
+ assert(MI->getOperand(1).isReg());
+
+ MCOperand target = LowerOperand(MI->getOperand(0));
+ MCOperand base = LowerOperand(MI->getOperand(1));
+ MCOperand atReg = MCOperand::CreateReg(Mips::AT);
+ MCOperand zeroReg = MCOperand::CreateReg(Mips::ZERO);
+
+ MachineOperand unloweredName = MI->getOperand(2);
+ MCOperand name = LowerOperand(unloweredName);
+
+ move.setOpcode(Mips::ADDu);
+ move.addOperand(target);
+ move.addOperand(atReg);
+ move.addOperand(zeroReg);
+
+ switch (Opc) {
+ case Mips::ULW: {
+ // FIXME: only works for little endian right now
+ MCOperand adj_name = LowerOperand(unloweredName, 3);
+ if (base.getReg() == (target.getReg())) {
+ instr1.setOpcode(Mips::LWL);
+ instr1.addOperand(atReg);
+ instr1.addOperand(base);
+ instr1.addOperand(adj_name);
+ instr2.setOpcode(Mips::LWR);
+ instr2.addOperand(atReg);
+ instr2.addOperand(base);
+ instr2.addOperand(name);
+ instr3 = move;
+ } else {
+ two_instructions = true;
+ instr1.setOpcode(Mips::LWL);
+ instr1.addOperand(target);
+ instr1.addOperand(base);
+ instr1.addOperand(adj_name);
+ instr2.setOpcode(Mips::LWR);
+ instr2.addOperand(target);
+ instr2.addOperand(base);
+ instr2.addOperand(name);
+ }
+ break;
+ }
+ case Mips::ULHu: {
+ // FIXME: only works for little endian right now
+ MCOperand adj_name = LowerOperand(unloweredName, 1);
+ instr1.setOpcode(Mips::LBu);
+ instr1.addOperand(atReg);
+ instr1.addOperand(base);
+ instr1.addOperand(adj_name);
+ instr2.setOpcode(Mips::LBu);
+ instr2.addOperand(target);
+ instr2.addOperand(base);
+ instr2.addOperand(name);
+ instr3.setOpcode(Mips::INS);
+ instr3.addOperand(target);
+ instr3.addOperand(atReg);
+ instr3.addOperand(MCOperand::CreateImm(0x8));
+ instr3.addOperand(MCOperand::CreateImm(0x18));
+ break;
+ }
+
+ case Mips::USW: {
+ // FIXME: only works for little endian right now
+ assert (base.getReg() != target.getReg());
+ two_instructions = true;
+ MCOperand adj_name = LowerOperand(unloweredName, 3);
+ instr1.setOpcode(Mips::SWL);
+ instr1.addOperand(target);
+ instr1.addOperand(base);
+ instr1.addOperand(adj_name);
+ instr2.setOpcode(Mips::SWR);
+ instr2.addOperand(target);
+ instr2.addOperand(base);
+ instr2.addOperand(name);
+ break;
+ }
+ case Mips::USH: {
+ MCOperand adj_name = LowerOperand(unloweredName, 1);
+ instr1.setOpcode(Mips::SB);
+ instr1.addOperand(target);
+ instr1.addOperand(base);
+ instr1.addOperand(name);
+ instr2.setOpcode(Mips::SRL);
+ instr2.addOperand(atReg);
+ instr2.addOperand(target);
+ instr2.addOperand(MCOperand::CreateImm(8));
+ instr3.setOpcode(Mips::SB);
+ instr3.addOperand(atReg);
+ instr3.addOperand(base);
+ instr3.addOperand(adj_name);
+ break;
+ }
+ default:
+ // FIXME: need to add others
+ assert(0 && "unaligned instruction not processed");
+ }
+
+ MCInsts.push_back(instr1);
+ MCInsts.push_back(instr2);
+ if (!two_instructions) MCInsts.push_back(instr3);
+}
+
diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h
index 3a24da2..98e37e4 100644
--- a/lib/Target/Mips/MipsMCInstLower.h
+++ b/lib/Target/Mips/MipsMCInstLower.h
@@ -37,10 +37,12 @@ public:
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
void LowerCPLOAD(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts);
void LowerCPRESTORE(const MachineInstr *MI, MCInst &OutMI);
+ void LowerUnalignedLoadStore(const MachineInstr *MI,
+ SmallVector<MCInst, 4>& MCInsts);
private:
MCOperand LowerSymbolOperand(const MachineOperand &MO,
MachineOperandType MOTy, unsigned Offset) const;
- MCOperand LowerOperand(const MachineOperand& MO) const;
+ MCOperand LowerOperand(const MachineOperand& MO, unsigned offset = 0) const;
};
}
diff --git a/lib/Target/Mips/MipsMCSymbolRefExpr.cpp b/lib/Target/Mips/MipsMCSymbolRefExpr.cpp
deleted file mode 100644
index a0a242c..0000000
--- a/lib/Target/Mips/MipsMCSymbolRefExpr.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-//===-- MipsMCSymbolRefExpr.cpp - Mips specific MC expression classes -----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "mipsmcsymbolrefexpr"
-#include "MipsMCSymbolRefExpr.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSymbol.h"
-using namespace llvm;
-
-const MipsMCSymbolRefExpr*
-MipsMCSymbolRefExpr::Create(VariantKind Kind, const MCSymbol *Symbol,
- int Offset, MCContext &Ctx) {
- return new (Ctx) MipsMCSymbolRefExpr(Kind, Symbol, Offset);
-}
-
-void MipsMCSymbolRefExpr::PrintImpl(raw_ostream &OS) const {
- switch (Kind) {
- default: assert(0 && "Invalid kind!");
- case VK_Mips_None: break;
- case VK_Mips_GPREL: OS << "%gp_rel("; break;
- case VK_Mips_GOT_CALL: OS << "%call16("; break;
- case VK_Mips_GOT: OS << "%got("; break;
- case VK_Mips_ABS_HI: OS << "%hi("; break;
- case VK_Mips_ABS_LO: OS << "%lo("; break;
- case VK_Mips_TLSGD: OS << "%tlsgd("; break;
- case VK_Mips_GOTTPREL: OS << "%gottprel("; break;
- case VK_Mips_TPREL_HI: OS << "%tprel_hi("; break;
- case VK_Mips_TPREL_LO: OS << "%tprel_lo("; break;
- case VK_Mips_GPOFF_HI: OS << "%hi(%neg(%gp_rel("; break;
- case VK_Mips_GPOFF_LO: OS << "%lo(%neg(%gp_rel("; break;
- case VK_Mips_GOT_DISP: OS << "%got_disp("; break;
- case VK_Mips_GOT_PAGE: OS << "%got_page("; break;
- case VK_Mips_GOT_OFST: OS << "%got_ofst("; break;
- }
-
- OS << *Symbol;
-
- if (Offset) {
- if (Offset > 0)
- OS << '+';
- OS << Offset;
- }
-
- if (Kind == VK_Mips_GPOFF_HI || Kind == VK_Mips_GPOFF_LO)
- OS << ")))";
- else if (Kind != VK_Mips_None)
- OS << ')';
-}
-
-bool
-MipsMCSymbolRefExpr::EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const {
- return false;
-}
-
-void MipsMCSymbolRefExpr::AddValueSymbols(MCAssembler *Asm) const {
- Asm->getOrCreateSymbolData(*Symbol);
-}
-
-const MCSection *MipsMCSymbolRefExpr::FindAssociatedSection() const {
- return Symbol->isDefined() ? &Symbol->getSection() : NULL;
-}
-
diff --git a/lib/Target/Mips/MipsMCSymbolRefExpr.h b/lib/Target/Mips/MipsMCSymbolRefExpr.h
deleted file mode 100644
index 55e85a7..0000000
--- a/lib/Target/Mips/MipsMCSymbolRefExpr.h
+++ /dev/null
@@ -1,67 +0,0 @@
-//===-- MipsMCSymbolRefExpr.h - Mips specific MCSymbolRefExpr class -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MIPSMCSYMBOLREFEXPR_H
-#define MIPSMCSYMBOLREFEXPR_H
-#include "llvm/MC/MCExpr.h"
-
-namespace llvm {
-
-class MipsMCSymbolRefExpr : public MCTargetExpr {
-public:
- enum VariantKind {
- VK_Mips_None,
- VK_Mips_GPREL,
- VK_Mips_GOT_CALL,
- VK_Mips_GOT,
- VK_Mips_ABS_HI,
- VK_Mips_ABS_LO,
- VK_Mips_TLSGD,
- VK_Mips_GOTTPREL,
- VK_Mips_TPREL_HI,
- VK_Mips_TPREL_LO,
- VK_Mips_GPOFF_HI,
- VK_Mips_GPOFF_LO,
- VK_Mips_GOT_DISP,
- VK_Mips_GOT_PAGE,
- VK_Mips_GOT_OFST
- };
-
-private:
- const VariantKind Kind;
- const MCSymbol *Symbol;
- int Offset;
-
- explicit MipsMCSymbolRefExpr(VariantKind _Kind, const MCSymbol *_Symbol,
- int _Offset)
- : Kind(_Kind), Symbol(_Symbol), Offset(_Offset) {}
-
-public:
- static const MipsMCSymbolRefExpr *Create(VariantKind Kind,
- const MCSymbol *Symbol, int Offset,
- MCContext &Ctx);
-
- void PrintImpl(raw_ostream &OS) const;
- bool EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const;
- void AddValueSymbols(MCAssembler *) const;
- const MCSection *FindAssociatedSection() const;
-
- static bool classof(const MCExpr *E) {
- return E->getKind() == MCExpr::Target;
- }
-
- static bool classof(const MipsMCSymbolRefExpr *) { return true; }
-
- int getOffset() const { return Offset; }
- void setOffset(int O) { Offset = O; }
-};
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 5331f09..06c4a66 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -285,7 +285,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex) ||
(FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI))
- FrameReg = Mips::SP;
+ FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
else
FrameReg = getFrameRegister(MF);
@@ -334,8 +334,10 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
unsigned MipsRegisterInfo::
getFrameRegister(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ bool IsN64 = Subtarget.isABI_N64();
- return TFI->hasFP(MF) ? Mips::FP : Mips::SP;
+ return TFI->hasFP(MF) ? (IsN64 ? Mips::FP_64 : Mips::FP) :
+ (IsN64 ? Mips::SP_64 : Mips::SP);
}
unsigned MipsRegisterInfo::
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 6480da3..5d6b24f 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -36,8 +36,9 @@ MipsTargetMachine::
MipsTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL,
bool isLittle):
- LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
Subtarget(TT, CPU, FS, isLittle),
DataLayout(isLittle ?
(Subtarget.isABI_N64() ?
@@ -54,31 +55,35 @@ MipsTargetMachine(const Target &T, StringRef TT,
MipsebTargetMachine::
MipsebTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM) :
- MipsTargetMachine(T, TT, CPU, FS, RM, CM, false) {}
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL) :
+ MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) {}
MipselTargetMachine::
MipselTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM) :
- MipsTargetMachine(T, TT, CPU, FS, RM, CM, true) {}
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL) :
+ MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) {}
Mips64ebTargetMachine::
Mips64ebTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM) :
- MipsTargetMachine(T, TT, CPU, FS, RM, CM, false) {}
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL) :
+ MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) {}
Mips64elTargetMachine::
Mips64elTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM) :
- MipsTargetMachine(T, TT, CPU, FS, RM, CM, true) {}
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL) :
+ MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) {}
// Install an instruction selector pass using
// the ISelDag to gen Mips code.
bool MipsTargetMachine::
-addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
+addInstSelector(PassManagerBase &PM)
{
PM.add(createMipsISelDag(*this));
return false;
@@ -88,14 +93,14 @@ addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
// machine code is emitted. return true if -print-machineinstrs should
// print out the code after the passes.
bool MipsTargetMachine::
-addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
+addPreEmitPass(PassManagerBase &PM)
{
PM.add(createMipsDelaySlotFillerPass(*this));
return true;
}
bool MipsTargetMachine::
-addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
+addPreRegAlloc(PassManagerBase &PM) {
// Do not restore $gp if target is Mips64.
// In N32/64, $gp is a callee-saved register.
if (!Subtarget.hasMips64())
@@ -104,14 +109,13 @@ addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
}
bool MipsTargetMachine::
-addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
+addPostRegAlloc(PassManagerBase &PM) {
PM.add(createMipsExpandPseudoPass(*this));
return true;
}
bool MipsTargetMachine::addCodeEmitter(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel,
- JITCodeEmitter &JCE) {
+ JITCodeEmitter &JCE) {
// Machine code emitter pass for Mips.
PM.add(createMipsJITCodeEmitterPass(*this, JCE));
return false;
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index 118ed10..e40d9e2 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -40,6 +40,7 @@ namespace llvm {
MipsTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL,
bool isLittle);
virtual const MipsInstrInfo *getInstrInfo() const
@@ -67,15 +68,11 @@ namespace llvm {
}
// Pass Pipeline Configuration
- virtual bool addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel);
- virtual bool addPreEmitPass(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel);
- virtual bool addPreRegAlloc(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel);
- virtual bool addPostRegAlloc(PassManagerBase &, CodeGenOpt::Level);
+ virtual bool addInstSelector(PassManagerBase &PM);
+ virtual bool addPreEmitPass(PassManagerBase &PM);
+ virtual bool addPreRegAlloc(PassManagerBase &PM);
+ virtual bool addPostRegAlloc(PassManagerBase &);
virtual bool addCodeEmitter(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel,
JITCodeEmitter &JCE);
};
@@ -86,7 +83,8 @@ class MipsebTargetMachine : public MipsTargetMachine {
public:
MipsebTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
};
/// MipselTargetMachine - Mips32 little endian target machine.
@@ -95,7 +93,8 @@ class MipselTargetMachine : public MipsTargetMachine {
public:
MipselTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
};
/// Mips64ebTargetMachine - Mips64 big endian target machine.
@@ -104,7 +103,8 @@ class Mips64ebTargetMachine : public MipsTargetMachine {
public:
Mips64ebTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
};
/// Mips64elTargetMachine - Mips64 little endian target machine.
@@ -113,7 +113,8 @@ class Mips64elTargetMachine : public MipsTargetMachine {
public:
Mips64elTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
};
} // End llvm namespace
diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
index a5af3b8..09f86b5 100644
--- a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
+++ b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
@@ -52,9 +52,10 @@ static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU,
}
static MCCodeGenInfo *createPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM) {
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
- X->InitMCCodeGenInfo(RM, CM);
+ X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp
index 50dd417..292ea5e 100644
--- a/lib/Target/PTX/PTXTargetMachine.cpp
+++ b/lib/Target/PTX/PTXTargetMachine.cpp
@@ -88,8 +88,9 @@ namespace {
PTXTargetMachine::PTXTargetMachine(const Target &T,
StringRef TT, StringRef CPU, StringRef FS,
Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL,
bool is64Bit)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
DataLayout(is64Bit ? DataLayout64 : DataLayout32),
Subtarget(TT, CPU, FS, is64Bit),
FrameLowering(Subtarget),
@@ -100,39 +101,38 @@ PTXTargetMachine::PTXTargetMachine(const Target &T,
PTX32TargetMachine::PTX32TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : PTXTargetMachine(T, TT, CPU, FS, RM, CM, false) {
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : PTXTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) {
}
PTX64TargetMachine::PTX64TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : PTXTargetMachine(T, TT, CPU, FS, RM, CM, true) {
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : PTXTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) {
}
-bool PTXTargetMachine::addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
- PM.add(createPTXISelDag(*this, OptLevel));
+bool PTXTargetMachine::addInstSelector(PassManagerBase &PM) {
+ PM.add(createPTXISelDag(*this, getOptLevel()));
return false;
}
-bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM) {
// PTXMFInfoExtract must after register allocation!
- //PM.add(createPTXMFInfoExtract(*this, OptLevel));
+ //PM.add(createPTXMFInfoExtract(*this));
return false;
}
bool PTXTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
formatted_raw_ostream &Out,
CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel,
bool DisableVerify) {
// This is mostly based on LLVMTargetMachine::addPassesToEmitFile
// Add common CodeGen passes.
MCContext *Context = 0;
- if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Context))
+ if (addCommonCodeGenPasses(PM, DisableVerify, Context))
return true;
assert(Context != 0 && "Failed to get MCContext");
@@ -192,7 +192,6 @@ bool PTXTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
}
bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel,
bool DisableVerify,
MCContext *&OutContext) {
// Add standard LLVM codegen passes.
@@ -214,7 +213,7 @@ bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
PM.add(createVerifierPass());
// Run loop strength reduction before anything else.
- if (OptLevel != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOpt::None) {
PM.add(createLoopStrengthReducePass(getTargetLowering()));
//PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
}
@@ -228,12 +227,12 @@ bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
// The lower invoke pass may create unreachable code. Remove it.
PM.add(createUnreachableBlockEliminationPass());
- if (OptLevel != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOpt::None)
PM.add(createCodeGenPreparePass(getTargetLowering()));
PM.add(createStackProtectorPass(getTargetLowering()));
- addPreISel(PM, OptLevel);
+ addPreISel(PM);
//PM.add(createPrintFunctionPass("\n\n"
// "*** Final LLVM Code input to ISel ***\n",
@@ -255,10 +254,10 @@ bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref.
// Set up a MachineFunction for the rest of CodeGen to work on.
- PM.add(new MachineFunctionAnalysis(*this, OptLevel));
+ PM.add(new MachineFunctionAnalysis(*this));
// Ask the target for an isel.
- if (addInstSelector(PM, OptLevel))
+ if (addInstSelector(PM))
return true;
// Print the instruction selected machine code...
@@ -268,21 +267,21 @@ bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
PM.add(createExpandISelPseudosPass());
// Pre-ra tail duplication.
- if (OptLevel != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOpt::None) {
PM.add(createTailDuplicatePass(true));
printAndVerify(PM, "After Pre-RegAlloc TailDuplicate");
}
// Optimize PHIs before DCE: removing dead PHI cycles may make more
// instructions dead.
- if (OptLevel != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOpt::None)
PM.add(createOptimizePHIsPass());
// If the target requests it, assign local variables to stack slots relative
// to one another and simplify frame index references where possible.
PM.add(createLocalStackSlotAllocationPass());
- if (OptLevel != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOpt::None) {
// With optimization, dead code should already be eliminated. However
// there is one known exception: lowered code for arguments that are only
// used by tail calls, where the tail calls reuse the incoming stack
@@ -300,7 +299,7 @@ bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
}
// Run pre-ra passes.
- if (addPreRegAlloc(PM, OptLevel))
+ if (addPreRegAlloc(PM))
printAndVerify(PM, "After PreRegAlloc passes");
// Perform register allocation.
@@ -308,7 +307,7 @@ bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
printAndVerify(PM, "After Register Allocation");
// Perform stack slot coloring and post-ra machine LICM.
- if (OptLevel != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOpt::None) {
// FIXME: Re-enable coloring with register when it's capable of adding
// kill markers.
PM.add(createStackSlotColoringPass(false));
@@ -322,7 +321,7 @@ bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
}
// Run post-ra passes.
- if (addPostRegAlloc(PM, OptLevel))
+ if (addPostRegAlloc(PM))
printAndVerify(PM, "After PostRegAlloc passes");
PM.add(createExpandPostRAPseudosPass());
@@ -333,23 +332,23 @@ bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
printAndVerify(PM, "After PrologEpilogCodeInserter");
// Run pre-sched2 passes.
- if (addPreSched2(PM, OptLevel))
+ if (addPreSched2(PM))
printAndVerify(PM, "After PreSched2 passes");
// Second pass scheduler.
- if (OptLevel != CodeGenOpt::None) {
- PM.add(createPostRAScheduler(OptLevel));
+ if (getOptLevel() != CodeGenOpt::None) {
+ PM.add(createPostRAScheduler(getOptLevel()));
printAndVerify(PM, "After PostRAScheduler");
}
// Branch folding must be run after regalloc and prolog/epilog insertion.
- if (OptLevel != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOpt::None) {
PM.add(createBranchFoldingPass(getEnableTailMergeDefault()));
printNoVerify(PM, "After BranchFolding");
}
// Tail duplication.
- if (OptLevel != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOpt::None) {
PM.add(createTailDuplicatePass(false));
printNoVerify(PM, "After TailDuplicate");
}
@@ -359,16 +358,16 @@ bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
//if (PrintGCInfo)
// PM.add(createGCInfoPrinter(dbgs()));
- if (OptLevel != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOpt::None) {
PM.add(createCodePlacementOptPass());
printNoVerify(PM, "After CodePlacementOpt");
}
- if (addPreEmitPass(PM, OptLevel))
+ if (addPreEmitPass(PM))
printNoVerify(PM, "After PreEmit passes");
- PM.add(createPTXMFInfoExtract(*this, OptLevel));
- PM.add(createPTXFPRoundingModePass(*this, OptLevel));
+ PM.add(createPTXMFInfoExtract(*this, getOptLevel()));
+ PM.add(createPTXFPRoundingModePass(*this, getOptLevel()));
return false;
}
diff --git a/lib/Target/PTX/PTXTargetMachine.h b/lib/Target/PTX/PTXTargetMachine.h
index 5b7c82b..19f6c0f 100644
--- a/lib/Target/PTX/PTXTargetMachine.h
+++ b/lib/Target/PTX/PTXTargetMachine.h
@@ -37,6 +37,7 @@ class PTXTargetMachine : public LLVMTargetMachine {
PTXTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL,
bool is64Bit);
virtual const TargetData *getTargetData() const { return &DataLayout; }
@@ -58,22 +59,18 @@ class PTXTargetMachine : public LLVMTargetMachine {
virtual const PTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
- virtual bool addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel);
- virtual bool addPostRegAlloc(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel);
+ virtual bool addInstSelector(PassManagerBase &PM);
+ virtual bool addPostRegAlloc(PassManagerBase &PM);
// We override this method to supply our own set of codegen passes.
virtual bool addPassesToEmitFile(PassManagerBase &,
formatted_raw_ostream &,
CodeGenFileType,
- CodeGenOpt::Level,
bool = true);
// Emission of machine code through JITCodeEmitter is not supported.
virtual bool addPassesToEmitMachineCode(PassManagerBase &,
JITCodeEmitter &,
- CodeGenOpt::Level,
bool = true) {
return true;
}
@@ -82,14 +79,13 @@ class PTXTargetMachine : public LLVMTargetMachine {
virtual bool addPassesToEmitMC(PassManagerBase &,
MCContext *&,
raw_ostream &,
- CodeGenOpt::Level,
bool = true) {
return true;
}
private:
- bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level,
+ bool addCommonCodeGenPasses(PassManagerBase &,
bool DisableVerify, MCContext *&OutCtx);
}; // class PTXTargetMachine
@@ -99,7 +95,8 @@ public:
PTX32TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
}; // class PTX32TargetMachine
class PTX64TargetMachine : public PTXTargetMachine {
@@ -107,7 +104,8 @@ public:
PTX64TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
}; // class PTX32TargetMachine
} // namespace llvm
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index d5c8a9e..7c47051 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -76,7 +76,8 @@ static MCAsmInfo *createPPCMCAsmInfo(const Target &T, StringRef TT) {
}
static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM) {
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
if (RM == Reloc::Default) {
@@ -86,7 +87,7 @@ static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM,
else
RM = Reloc::Static;
}
- X->InitMCCodeGenInfo(RM, CM);
+ X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 6f204cc..3dee406 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -18,7 +18,6 @@
#include "MCTargetDesc/PPCPredicates.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index b188b90..36d5c41 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -23,7 +23,6 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CallingConv.h"
@@ -408,6 +407,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setInsertFencesForAtomic(true);
+ setSchedulingPreference(Sched::Hybrid);
+
computeRegisterProperties();
}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index f148e9d..b9a6297 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -22,7 +22,6 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
@@ -57,11 +56,8 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective();
if (Directive == PPC::DIR_440) {
- // Disable the hazard recognizer for now, as it doesn't support
- // bottom-up scheduling.
- //const InstrItineraryData *II = TM->getInstrItineraryData();
- //return new PPCHazardRecognizer440(II, DAG);
- return new ScheduleHazardRecognizer();
+ const InstrItineraryData *II = TM->getInstrItineraryData();
+ return new PPCHazardRecognizer440(II, DAG);
}
else {
// Disable the hazard recognizer for now, as it doesn't support
@@ -501,8 +497,7 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
const MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(
- MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
MachineMemOperand::MOStore,
MFI.getObjectSize(FrameIdx),
MFI.getObjectAlignment(FrameIdx));
@@ -623,8 +618,7 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
const MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(
- MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
MachineMemOperand::MOLoad,
MFI.getObjectSize(FrameIdx),
MFI.getObjectAlignment(FrameIdx));
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 2e90b7a..3ba9260 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -273,6 +273,27 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
+unsigned
+PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const unsigned DefaultSafety = 1;
+
+ switch (RC->getID()) {
+ default:
+ return 0;
+ case PPC::G8RCRegClassID:
+ case PPC::GPRCRegClassID: {
+ unsigned FP = TFI->hasFP(MF) ? 1 : 0;
+ return 32 - FP - DefaultSafety;
+ }
+ case PPC::F8RCRegClassID:
+ case PPC::F4RCRegClassID:
+ case PPC::VRRCRegClassID:
+ return 32 - DefaultSafety;
+ }
+}
+
//===----------------------------------------------------------------------===//
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 1cc7213..f70a594 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -37,6 +37,9 @@ public:
/// This is used for addressing modes.
virtual const TargetRegisterClass *getPointerRegClass(unsigned Kind=0) const;
+ unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const;
+
/// Code Generation virtual methods...
const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index f5744b83..de8fca0 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -29,8 +29,9 @@ extern "C" void LLVMInitializePowerPCTarget() {
PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL,
bool is64Bit)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
Subtarget(TT, CPU, FS, is64Bit),
DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this),
FrameLowering(Subtarget), JITInfo(*this, is64Bit),
@@ -44,15 +45,17 @@ bool PPCTargetMachine::getEnableTailMergeDefault() const { return false; }
PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : PPCTargetMachine(T, TT, CPU, FS, RM, CM, false) {
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : PPCTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) {
}
PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : PPCTargetMachine(T, TT, CPU, FS, RM, CM, true) {
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : PPCTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) {
}
@@ -60,22 +63,19 @@ PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT,
// Pass Pipeline Configuration
//===----------------------------------------------------------------------===//
-bool PPCTargetMachine::addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+bool PPCTargetMachine::addInstSelector(PassManagerBase &PM) {
// Install an instruction selector.
PM.add(createPPCISelDag(*this));
return false;
}
-bool PPCTargetMachine::addPreEmitPass(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+bool PPCTargetMachine::addPreEmitPass(PassManagerBase &PM) {
// Must run branch selection immediately preceding the asm printer.
PM.add(createPPCBranchSelectionPass());
return false;
}
bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel,
JITCodeEmitter &JCE) {
// FIXME: This should be moved to TargetJITInfo!!
if (Subtarget.isPPC64())
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index d06f084..03b27c6 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -42,7 +42,8 @@ class PPCTargetMachine : public LLVMTargetMachine {
public:
PPCTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM, bool is64Bit);
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL, bool is64Bit);
virtual const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const PPCFrameLowering *getFrameLowering() const {
@@ -66,9 +67,9 @@ public:
}
// Pass Pipeline Configuration
- virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ virtual bool addInstSelector(PassManagerBase &PM);
+ virtual bool addPreEmitPass(PassManagerBase &PM);
+ virtual bool addCodeEmitter(PassManagerBase &PM,
JITCodeEmitter &JCE);
virtual bool getEnableTailMergeDefault() const;
};
@@ -79,7 +80,8 @@ class PPC32TargetMachine : public PPCTargetMachine {
public:
PPC32TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
};
/// PPC64TargetMachine - PowerPC 64-bit target machine.
@@ -88,7 +90,8 @@ class PPC64TargetMachine : public PPCTargetMachine {
public:
PPC64TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
};
} // end namespace llvm
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index cb2a7df..eda04c3 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -50,9 +50,10 @@ static MCSubtargetInfo *createSparcMCSubtargetInfo(StringRef TT, StringRef CPU,
}
static MCCodeGenInfo *createSparcMCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM) {
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
- X->InitMCCodeGenInfo(RM, CM);
+ X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 3d7b4a4..7dff799 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -27,16 +27,16 @@ extern "C" void LLVMInitializeSparcTarget() {
SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL,
bool is64bit)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
Subtarget(TT, CPU, FS, is64bit),
DataLayout(Subtarget.getDataLayout()),
TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget),
FrameLowering(Subtarget) {
}
-bool SparcTargetMachine::addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+bool SparcTargetMachine::addInstSelector(PassManagerBase &PM) {
PM.add(createSparcISelDag(*this));
return false;
}
@@ -44,8 +44,7 @@ bool SparcTargetMachine::addInstSelector(PassManagerBase &PM,
/// addPreEmitPass - This pass may be implemented by targets that want to run
/// passes immediately before machine code is emitted. This should return
/// true if -print-machineinstrs should print out the code after the passes.
-bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel){
+bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM){
PM.add(createSparcFPMoverPass(*this));
PM.add(createSparcDelaySlotFillerPass(*this));
return true;
@@ -54,13 +53,15 @@ bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM,
SparcV8TargetMachine::SparcV8TargetMachine(const Target &T,
StringRef TT, StringRef CPU,
StringRef FS, Reloc::Model RM,
- CodeModel::Model CM)
- : SparcTargetMachine(T, TT, CPU, FS, RM, CM, false) {
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : SparcTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) {
}
SparcV9TargetMachine::SparcV9TargetMachine(const Target &T,
StringRef TT, StringRef CPU,
StringRef FS, Reloc::Model RM,
- CodeModel::Model CM)
- : SparcTargetMachine(T, TT, CPU, FS, RM, CM, true) {
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : SparcTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) {
}
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 3c907dd..63bfa5d 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -35,7 +35,8 @@ class SparcTargetMachine : public LLVMTargetMachine {
public:
SparcTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM, bool is64bit);
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL, bool is64bit);
virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const TargetFrameLowering *getFrameLowering() const {
@@ -54,8 +55,8 @@ public:
virtual const TargetData *getTargetData() const { return &DataLayout; }
// Pass Pipeline Configuration
- virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addInstSelector(PassManagerBase &PM);
+ virtual bool addPreEmitPass(PassManagerBase &PM);
};
/// SparcV8TargetMachine - Sparc 32-bit target machine
@@ -64,7 +65,8 @@ class SparcV8TargetMachine : public SparcTargetMachine {
public:
SparcV8TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
};
/// SparcV9TargetMachine - Sparc 64-bit target machine
@@ -73,7 +75,8 @@ class SparcV9TargetMachine : public SparcTargetMachine {
public:
SparcV9TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
};
} // end namespace llvm
diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp
index 709dfd2..aa2e014 100644
--- a/lib/Target/TargetLibraryInfo.cpp
+++ b/lib/Target/TargetLibraryInfo.cpp
@@ -20,6 +20,19 @@ INITIALIZE_PASS(TargetLibraryInfo, "targetlibinfo",
"Target Library Information", false, true)
char TargetLibraryInfo::ID = 0;
+const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
+ {
+ "memset",
+ "memcpy",
+ "memmove",
+ "memset_pattern16",
+ "iprintf",
+ "siprintf",
+ "fiprintf",
+ "fwrite",
+ "fputs"
+ };
+
/// initialize - Initialize the set of available library functions based on the
/// specified target triple. This should be carefully written so that a missing
/// target triple gets a sane set of defaults.
@@ -38,6 +51,17 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T) {
TLI.setUnavailable(LibFunc::memset_pattern16);
}
+ if (T.isMacOSX() && T.getArch() == Triple::x86 &&
+ !T.isMacOSXVersionLT(10, 7)) {
+ // x86-32 OSX has a scheme where fwrite and fputs (and some other functions
+ // we don't care about) have two versions; on recent OSX, the one we want
+ // has a $UNIX2003 suffix. The two implementations are identical except
+ // for the return value in some edge cases. However, we don't want to
+ // generate code that depends on the old symbols.
+ TLI.setAvailableWithName(LibFunc::fwrite, "fwrite$UNIX2003");
+ TLI.setAvailableWithName(LibFunc::fputs, "fputs$UNIX2003");
+ }
+
// iprintf and friends are only available on XCore and TCE.
if (T.getArch() != Triple::xcore && T.getArch() != Triple::tce) {
TLI.setUnavailable(LibFunc::iprintf);
@@ -64,6 +88,7 @@ TargetLibraryInfo::TargetLibraryInfo(const Triple &T) : ImmutablePass(ID) {
TargetLibraryInfo::TargetLibraryInfo(const TargetLibraryInfo &TLI)
: ImmutablePass(ID) {
memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray));
+ CustomNames = TLI.CustomNames;
}
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index daac924..805e16e 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -226,6 +226,14 @@ CodeModel::Model TargetMachine::getCodeModel() const {
return CodeGenInfo->getCodeModel();
}
+/// getOptLevel - Returns the optimization level: None, Less,
+/// Default, or Aggressive.
+CodeGenOpt::Level TargetMachine::getOptLevel() const {
+ if (!CodeGenInfo)
+ return CodeGenOpt::Default;
+ return CodeGenInfo->getOptLevel();
+}
+
bool TargetMachine::getAsmVerbosityDefault() {
return AsmVerbosityDefault;
}
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 8d85b95..6e87efa 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -34,6 +34,12 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
switch (MI->getOpcode()) {
case X86::INSERTPSrr:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask);
+ break;
+ case X86::VINSERTPSrr:
+ DestName = getRegName(MI->getOperand(0).getReg());
Src1Name = getRegName(MI->getOperand(1).getReg());
Src2Name = getRegName(MI->getOperand(2).getReg());
DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask);
@@ -44,34 +50,52 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src1Name = getRegName(MI->getOperand(0).getReg());
DecodeMOVLHPSMask(2, ShuffleMask);
break;
+ case X86::VMOVLHPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVLHPSMask(2, ShuffleMask);
+ break;
case X86::MOVHLPSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
Src1Name = getRegName(MI->getOperand(0).getReg());
DecodeMOVHLPSMask(2, ShuffleMask);
break;
+ case X86::VMOVHLPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVHLPSMask(2, ShuffleMask);
+ break;
case X86::PSHUFDri:
+ case X86::VPSHUFDri:
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::PSHUFDmi:
+ case X86::VPSHUFDmi:
DestName = getRegName(MI->getOperand(0).getReg());
DecodePSHUFMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask);
break;
case X86::PSHUFHWri:
+ case X86::VPSHUFHWri:
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::PSHUFHWmi:
+ case X86::VPSHUFHWmi:
DestName = getRegName(MI->getOperand(0).getReg());
DecodePSHUFHWMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask);
break;
case X86::PSHUFLWri:
+ case X86::VPSHUFLWri:
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::PSHUFLWmi:
+ case X86::VPSHUFLWmi:
DestName = getRegName(MI->getOperand(0).getReg());
DecodePSHUFLWMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask);
@@ -142,6 +166,14 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
+ case X86::VSHUFPDrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VSHUFPDrmi:
+ DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
case X86::SHUFPSrri:
Src2Name = getRegName(MI->getOperand(2).getReg());
@@ -150,63 +182,107 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
+ case X86::VSHUFPSrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VSHUFPSrmi:
+ DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
case X86::UNPCKLPDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::UNPCKLPDrm:
- DecodeUNPCKLPDMask(2, ShuffleMask);
+ DecodeUNPCKLPMask(MVT::v2f64, ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
case X86::VUNPCKLPDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKLPDrm:
- DecodeUNPCKLPDMask(2, ShuffleMask);
+ DecodeUNPCKLPMask(MVT::v2f64, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::VUNPCKLPDYrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKLPDYrm:
- DecodeUNPCKLPDMask(4, ShuffleMask);
+ DecodeUNPCKLPMask(MVT::v4f64, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::UNPCKLPSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::UNPCKLPSrm:
- DecodeUNPCKLPSMask(4, ShuffleMask);
+ DecodeUNPCKLPMask(MVT::v4f32, ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
case X86::VUNPCKLPSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKLPSrm:
- DecodeUNPCKLPSMask(4, ShuffleMask);
+ DecodeUNPCKLPMask(MVT::v4f32, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::VUNPCKLPSYrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKLPSYrm:
- DecodeUNPCKLPSMask(8, ShuffleMask);
+ DecodeUNPCKLPMask(MVT::v8f32, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::UNPCKHPDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::UNPCKHPDrm:
- DecodeUNPCKHPMask(2, ShuffleMask);
+ DecodeUNPCKHPMask(MVT::v2f64, ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
+ case X86::VUNPCKHPDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKHPDrm:
+ DecodeUNPCKHPMask(MVT::v2f64, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VUNPCKHPDYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKHPDYrm:
+ DecodeUNPCKLPMask(MVT::v4f64, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
case X86::UNPCKHPSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::UNPCKHPSrm:
- DecodeUNPCKHPMask(4, ShuffleMask);
+ DecodeUNPCKHPMask(MVT::v4f32, ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
+ case X86::VUNPCKHPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKHPSrm:
+ DecodeUNPCKHPMask(MVT::v4f32, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VUNPCKHPSYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKHPSYrm:
+ DecodeUNPCKHPMask(MVT::v8f32, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
case X86::VPERMILPSri:
DecodeVPERMILPSMask(4, MI->getOperand(2).getImm(),
ShuffleMask);
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 03c3948..a843515 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -385,7 +385,8 @@ static MCAsmInfo *createX86MCAsmInfo(const Target &T, StringRef TT) {
}
static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM) {
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
Triple T(TT);
@@ -429,7 +430,7 @@ static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM,
// 64-bit JIT places everything in the same buffer except external funcs.
CM = is64Bit ? CodeModel::Large : CodeModel::Small;
- X->InitMCCodeGenInfo(RM, CM);
+ X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index aeb3309..f6c9d7b 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -142,29 +142,29 @@ void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
}
}
-void DecodeUNPCKHPMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- for (unsigned i = 0; i != NElts/2; ++i) {
- ShuffleMask.push_back(i+NElts/2); // Reads from dest
- ShuffleMask.push_back(i+NElts+NElts/2); // Reads from src
- }
-}
+void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
-void DecodeUNPCKLPSMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
-}
+ // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
+ // independently on 128-bit lanes.
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ if (NumLanes == 0 ) NumLanes = 1; // Handle MMX
+ unsigned NumLaneElts = NumElts / NumLanes;
-void DecodeUNPCKLPDMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
+ for (unsigned s = 0; s < NumLanes; ++s) {
+ unsigned Start = s * NumLaneElts + NumLaneElts/2;
+ unsigned End = s * NumLaneElts + NumLaneElts;
+ for (unsigned i = Start; i != End; ++i) {
+ ShuffleMask.push_back(i); // Reads from dest/src1
+ ShuffleMask.push_back(i+NumElts); // Reads from src/src2
+ }
+ }
}
/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
/// etc. VT indicates the type of the vector allowing it to handle different
/// datatypes and vector widths.
-void DecodeUNPCKLPMask(EVT VT,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -173,16 +173,13 @@ void DecodeUNPCKLPMask(EVT VT,
if (NumLanes == 0 ) NumLanes = 1; // Handle MMX
unsigned NumLaneElts = NumElts / NumLanes;
- unsigned Start = 0;
- unsigned End = NumLaneElts / 2;
for (unsigned s = 0; s < NumLanes; ++s) {
+ unsigned Start = s * NumLaneElts;
+ unsigned End = s * NumLaneElts + NumLaneElts/2;
for (unsigned i = Start; i != End; ++i) {
- ShuffleMask.push_back(i); // Reads from dest/src1
- ShuffleMask.push_back(i+NumLaneElts); // Reads from src/src2
+ ShuffleMask.push_back(i); // Reads from dest/src1
+ ShuffleMask.push_back(i+NumElts); // Reads from src/src2
}
- // Process the next 128 bits.
- Start += NumLaneElts;
- End += NumLaneElts;
}
}
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
index 58193e6..35f6530 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -67,20 +67,15 @@ void DecodePUNPCKHMask(unsigned NElts,
void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
SmallVectorImpl<unsigned> &ShuffleMask);
-void DecodeUNPCKHPMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodeUNPCKLPSMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodeUNPCKLPDMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask);
+/// DecodeUNPCKHPMask - This decodes the shuffle masks for unpckhps/unpckhpd
+/// etc. VT indicates the type of the vector allowing it to handle different
+/// datatypes and vector widths.
+void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask);
/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
/// etc. VT indicates the type of the vector allowing it to handle different
/// datatypes and vector widths.
-void DecodeUNPCKLPMask(EVT VT,
- SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask);
// DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 3d75de0..3c35763 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -2216,6 +2216,75 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
}
break;
}
+ case ISD::STORE: {
+ // The DEC64m tablegen pattern is currently not able to match the case where
+ // the EFLAGS on the original DEC are used.
+ // we'll need to improve tablegen to allow flags to be transferred from a
+ // node in the pattern to the result node. probably with a new keyword
+ // for example, we have this
+ // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
+ // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
+ // (implicit EFLAGS)]>;
+ // but maybe need something like this
+ // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
+ // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
+ // (transferrable EFLAGS)]>;
+ StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
+ SDValue Chain = StoreNode->getOperand(0);
+ SDValue StoredVal = StoreNode->getOperand(1);
+ SDValue Address = StoreNode->getOperand(2);
+ SDValue Undef = StoreNode->getOperand(3);
+
+ if (StoreNode->getMemOperand()->getSize() != 8 ||
+ Undef->getOpcode() != ISD::UNDEF ||
+ Chain->getOpcode() != ISD::LOAD ||
+ StoredVal->getOpcode() != X86ISD::DEC ||
+ StoredVal.getResNo() != 0 ||
+ StoredVal->getOperand(0).getNode() != Chain.getNode())
+ break;
+
+ //OPC_CheckPredicate, 1, // Predicate_nontemporalstore
+ if (StoreNode->isNonTemporal())
+ break;
+
+ LoadSDNode *LoadNode = cast<LoadSDNode>(Chain.getNode());
+ if (LoadNode->getOperand(1) != Address ||
+ LoadNode->getOperand(2) != Undef)
+ break;
+
+ if (!ISD::isNormalLoad(LoadNode))
+ break;
+
+ if (!ISD::isNormalStore(StoreNode))
+ break;
+
+ // check load chain has only one use (from the store)
+ if (!Chain.hasOneUse())
+ break;
+
+ // Merge the input chains if they are not intra-pattern references.
+ SDValue InputChain = LoadNode->getOperand(0);
+
+ SDValue Base, Scale, Index, Disp, Segment;
+ if (!SelectAddr(LoadNode, LoadNode->getBasePtr(),
+ Base, Scale, Index, Disp, Segment))
+ break;
+
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2);
+ MemOp[0] = StoreNode->getMemOperand();
+ MemOp[1] = LoadNode->getMemOperand();
+ const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain };
+ MachineSDNode *Result = CurDAG->getMachineNode(X86::DEC64m,
+ Node->getDebugLoc(),
+ MVT::i32, MVT::Other, Ops,
+ array_lengthof(Ops));
+ Result->setMemRefs(MemOp, MemOp + 2);
+
+ ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
+ ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
+
+ return Result;
+ }
}
SDNode *ResNode = SelectCode(Node);
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 4e11131..96c6f41 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -35,7 +35,6 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -909,7 +908,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
}
- if (Subtarget->hasSSE41() || Subtarget->hasAVX()) {
+ if (Subtarget->hasSSE41orAVX()) {
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
@@ -981,7 +980,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
}
}
- if (Subtarget->hasSSE42() || Subtarget->hasAVX())
+ if (Subtarget->hasSSE42orAVX())
setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
if (!UseSoftFloat && Subtarget->hasAVX()) {
@@ -2846,16 +2845,12 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::MOVSD:
case X86ISD::UNPCKLPS:
case X86ISD::UNPCKLPD:
- case X86ISD::VUNPCKLPSY:
- case X86ISD::VUNPCKLPDY:
case X86ISD::PUNPCKLWD:
case X86ISD::PUNPCKLBW:
case X86ISD::PUNPCKLDQ:
case X86ISD::PUNPCKLQDQ:
case X86ISD::UNPCKHPS:
case X86ISD::UNPCKHPD:
- case X86ISD::VUNPCKHPSY:
- case X86ISD::VUNPCKHPDY:
case X86ISD::PUNPCKHWD:
case X86ISD::PUNPCKHBW:
case X86ISD::PUNPCKHDQ:
@@ -2927,16 +2922,12 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
case X86ISD::MOVSD:
case X86ISD::UNPCKLPS:
case X86ISD::UNPCKLPD:
- case X86ISD::VUNPCKLPSY:
- case X86ISD::VUNPCKLPDY:
case X86ISD::PUNPCKLWD:
case X86ISD::PUNPCKLBW:
case X86ISD::PUNPCKLDQ:
case X86ISD::PUNPCKLQDQ:
case X86ISD::UNPCKHPS:
case X86ISD::UNPCKHPD:
- case X86ISD::VUNPCKHPSY:
- case X86ISD::VUNPCKHPDY:
case X86ISD::PUNPCKHWD:
case X86ISD::PUNPCKHBW:
case X86ISD::PUNPCKHDQ:
@@ -3416,6 +3407,41 @@ static unsigned getShuffleVSHUFPDYImmediate(SDNode *N) {
return Mask;
}
+/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
+/// the two vector operands have swapped position.
+static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) {
+ unsigned NumElems = VT.getVectorNumElements();
+ for (unsigned i = 0; i != NumElems; ++i) {
+ int idx = Mask[i];
+ if (idx < 0)
+ continue;
+ else if (idx < (int)NumElems)
+ Mask[i] = idx + NumElems;
+ else
+ Mask[i] = idx - NumElems;
+ }
+}
+
+/// isCommutedVSHUFP() - Return true if swapping operands will
+/// allow to use the "vshufpd" or "vshufps" instruction
+/// for 256-bit vectors
+static bool isCommutedVSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT,
+ const X86Subtarget *Subtarget) {
+
+ unsigned NumElems = VT.getVectorNumElements();
+ if ((VT.getSizeInBits() != 256) || ((NumElems != 4) && (NumElems != 8)))
+ return false;
+
+ SmallVector<int, 8> CommutedMask;
+ for (unsigned i = 0; i < NumElems; ++i)
+ CommutedMask.push_back(Mask[i]);
+
+ CommuteVectorShuffleMask(CommutedMask, VT);
+ return (NumElems == 4) ? isVSHUFPDYMask(CommutedMask, VT, Subtarget):
+ isVSHUFPSYMask(CommutedMask, VT, Subtarget);
+}
+
+
/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to 128-bit
/// SHUFPS and SHUFPD.
@@ -3551,13 +3577,14 @@ bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) {
/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKL.
static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
- bool V2IsSplat = false) {
+ bool HasAVX2, bool V2IsSplat = false) {
int NumElts = VT.getVectorNumElements();
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
- if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8)
+ if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
+ (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
return false;
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -3591,22 +3618,23 @@ static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
return true;
}
-bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) {
+bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool HasAVX2, bool V2IsSplat) {
SmallVector<int, 8> M;
N->getMask(M);
- return ::isUNPCKLMask(M, N->getValueType(0), V2IsSplat);
+ return ::isUNPCKLMask(M, N->getValueType(0), HasAVX2, V2IsSplat);
}
/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKH.
static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT,
- bool V2IsSplat = false) {
+ bool HasAVX2, bool V2IsSplat = false) {
int NumElts = VT.getVectorNumElements();
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
- if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8)
+ if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
+ (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
return false;
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -3638,10 +3666,10 @@ static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT,
return true;
}
-bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat) {
+bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool HasAVX2, bool V2IsSplat) {
SmallVector<int, 8> M;
N->getMask(M);
- return ::isUNPCKHMask(M, N->getValueType(0), V2IsSplat);
+ return ::isUNPCKHMask(M, N->getValueType(0), HasAVX2, V2IsSplat);
}
/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
@@ -3953,7 +3981,7 @@ static bool isCommutedMOVL(ShuffleVectorSDNode *N, bool V2IsSplat = false,
/// Masks to match: <1, 1, 3, 3> or <1, 1, 3, 3, 5, 5, 7, 7>
bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N,
const X86Subtarget *Subtarget) {
- if (!Subtarget->hasSSE3() && !Subtarget->hasAVX())
+ if (!Subtarget->hasSSE3orAVX())
return false;
// The second vector must be undef
@@ -3981,7 +4009,7 @@ bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N,
/// Masks to match: <0, 0, 2, 2> or <0, 0, 2, 2, 4, 4, 6, 6>
bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N,
const X86Subtarget *Subtarget) {
- if (!Subtarget->hasSSE3() && !Subtarget->hasAVX())
+ if (!Subtarget->hasSSE3orAVX())
return false;
// The second vector must be undef
@@ -4216,21 +4244,6 @@ static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
SVOp->getOperand(0), &MaskVec[0]);
}
-/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
-/// the two vector operands have swapped position.
-static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) {
- unsigned NumElems = VT.getVectorNumElements();
- for (unsigned i = 0; i != NumElems; ++i) {
- int idx = Mask[i];
- if (idx < 0)
- continue;
- else if (idx < (int)NumElems)
- Mask[i] = idx + NumElems;
- else
- Mask[i] = idx - NumElems;
- }
-}
-
/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
/// match movhlps. The lower half elements should come from upper half of
/// V1 (and in order), and the upper half elements should come from the upper
@@ -4388,23 +4401,30 @@ static SDValue getZeroVector(EVT VT, bool HasXMMInt, SelectionDAG &DAG,
}
/// getOnesVector - Returns a vector of specified type with all bits set.
-/// Always build ones vectors as <4 x i32>. For 256-bit types, use two
-/// <4 x i32> inserted in a <8 x i32> appropriately. Then bitcast to their
-/// original type, ensuring they get CSE'd.
-static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
+/// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with
+/// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately.
+/// Then bitcast to their original type, ensuring they get CSE'd.
+static SDValue getOnesVector(EVT VT, bool HasAVX2, SelectionDAG &DAG,
+ DebugLoc dl) {
assert(VT.isVector() && "Expected a vector type");
assert((VT.is128BitVector() || VT.is256BitVector())
&& "Expected a 128-bit or 256-bit vector type");
SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
- SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- Cst, Cst, Cst, Cst);
-
- if (VT.is256BitVector()) {
- SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, MVT::v8i32),
- Vec, DAG.getConstant(0, MVT::i32), DAG, dl);
- Vec = Insert128BitVector(InsV, Vec,
- DAG.getConstant(4 /* NumElems/2 */, MVT::i32), DAG, dl);
+ SDValue Vec;
+ if (VT.getSizeInBits() == 256) {
+ if (HasAVX2) { // AVX2
+ SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
+ } else { // AVX
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+ SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, MVT::v8i32),
+ Vec, DAG.getConstant(0, MVT::i32), DAG, dl);
+ Vec = Insert128BitVector(InsV, Vec,
+ DAG.getConstant(4 /* NumElems/2 */, MVT::i32), DAG, dl);
+ }
+ } else {
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
}
return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
@@ -4623,9 +4643,7 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
break;
case X86ISD::UNPCKHPS:
case X86ISD::UNPCKHPD:
- case X86ISD::VUNPCKHPSY:
- case X86ISD::VUNPCKHPDY:
- DecodeUNPCKHPMask(NumElems, ShuffleMask);
+ DecodeUNPCKHPMask(VT, ShuffleMask);
break;
case X86ISD::PUNPCKLBW:
case X86ISD::PUNPCKLWD:
@@ -4635,8 +4653,6 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
break;
case X86ISD::UNPCKLPS:
case X86ISD::UNPCKLPD:
- case X86ISD::VUNPCKLPSY:
- case X86ISD::VUNPCKLPDY:
DecodeUNPCKLPMask(VT, ShuffleMask);
break;
case X86ISD::MOVHLPS:
@@ -5111,6 +5127,97 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
return SDValue();
}
+/// isVectorBroadcast - Check if the node chain is suitable to be xformed to
+/// a vbroadcast node. We support two patterns:
+/// 1. A splat BUILD_VECTOR which uses a single scalar load.
+/// 2. A splat shuffle which uses a scalar_to_vector node which comes from
+/// a scalar load.
+/// The scalar load node is returned when a pattern is found,
+/// or SDValue() otherwise.
+static SDValue isVectorBroadcast(SDValue &Op, bool hasAVX2) {
+ EVT VT = Op.getValueType();
+ SDValue V = Op;
+
+ if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+
+ //A suspected load to be broadcasted.
+ SDValue Ld;
+
+ switch (V.getOpcode()) {
+ default:
+ // Unknown pattern found.
+ return SDValue();
+
+ case ISD::BUILD_VECTOR: {
+ // The BUILD_VECTOR node must be a splat.
+ if (!isSplatVector(V.getNode()))
+ return SDValue();
+
+ Ld = V.getOperand(0);
+
+ // The suspected load node has several users. Make sure that all
+ // of its users are from the BUILD_VECTOR node.
+ if (!Ld->hasNUsesOfValue(VT.getVectorNumElements(), 0))
+ return SDValue();
+ break;
+ }
+
+ case ISD::VECTOR_SHUFFLE: {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+
+ // Shuffles must have a splat mask where the first element is
+ // broadcasted.
+ if ((!SVOp->isSplat()) || SVOp->getMaskElt(0) != 0)
+ return SDValue();
+
+ SDValue Sc = Op.getOperand(0);
+ if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR)
+ return SDValue();
+
+ Ld = Sc.getOperand(0);
+
+ // The scalar_to_vector node and the suspected
+ // load node must have exactly one user.
+ if (!Sc.hasOneUse() || !Ld.hasOneUse())
+ return SDValue();
+ break;
+ }
+ }
+
+ // The scalar source must be a normal load.
+ if (!ISD::isNormalLoad(Ld.getNode()))
+ return SDValue();
+
+ bool Is256 = VT.getSizeInBits() == 256;
+ bool Is128 = VT.getSizeInBits() == 128;
+ unsigned ScalarSize = Ld.getValueType().getSizeInBits();
+
+ if (hasAVX2) {
+ // VBroadcast to YMM
+ if (Is256 && (ScalarSize == 8 || ScalarSize == 16 ||
+ ScalarSize == 32 || ScalarSize == 64 ))
+ return Ld;
+
+ // VBroadcast to XMM
+ if (Is128 && (ScalarSize == 8 || ScalarSize == 32 ||
+ ScalarSize == 16 || ScalarSize == 64 ))
+ return Ld;
+ }
+
+ // VBroadcast to YMM
+ if (Is256 && (ScalarSize == 32 || ScalarSize == 64))
+ return Ld;
+
+ // VBroadcast to XMM
+ if (Is128 && (ScalarSize == 32))
+ return Ld;
+
+
+ // Unsupported broadcast.
+ return SDValue();
+}
+
SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
@@ -5131,14 +5238,20 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
}
// Vectors containing all ones can be matched by pcmpeqd on 128-bit width
- // vectors or broken into v4i32 operations on 256-bit vectors.
+ // vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use
+ // vpcmpeqd on 256-bit vectors.
if (ISD::isBuildVectorAllOnes(Op.getNode())) {
- if (Op.getValueType() == MVT::v4i32)
+ if (Op.getValueType() == MVT::v4i32 ||
+ (Op.getValueType() == MVT::v8i32 && Subtarget->hasAVX2()))
return Op;
- return getOnesVector(Op.getValueType(), DAG, dl);
+ return getOnesVector(Op.getValueType(), Subtarget->hasAVX2(), DAG, dl);
}
+ SDValue LD = isVectorBroadcast(Op, Subtarget->hasAVX2());
+ if (Subtarget->hasAVX() && LD.getNode())
+ return DAG.getNode(X86ISD::VBROADCAST, dl, VT, LD);
+
unsigned EVTBits = ExtVT.getSizeInBits();
unsigned NumZero = 0;
@@ -5380,7 +5493,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return LD;
// For SSE 4.1, use insertps to put the high elements into the low element.
- if (getSubtarget()->hasSSE41() || getSubtarget()->hasAVX()) {
+ if (getSubtarget()->hasSSE41orAVX()) {
SDValue Result;
if (Op.getOperand(0).getOpcode() != ISD::UNDEF)
Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
@@ -5551,7 +5664,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
// quads, disable the next transformation since it does not help SSSE3.
bool V1Used = InputQuads[0] || InputQuads[1];
bool V2Used = InputQuads[2] || InputQuads[3];
- if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) {
+ if (Subtarget->hasSSSE3orAVX()) {
if (InputQuads.count() == 2 && V1Used && V2Used) {
BestLoQuad = InputQuads.find_first();
BestHiQuad = InputQuads.find_next(BestLoQuad);
@@ -5624,7 +5737,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
// If we have SSSE3, and all words of the result are from 1 input vector,
// case 2 is generated, otherwise case 3 is generated. If no SSSE3
// is present, fall back to case 4.
- if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) {
+ if (Subtarget->hasSSSE3orAVX()) {
SmallVector<SDValue,16> pshufbMask;
// If we have elements from both input vectors, set the high bit of the
@@ -5692,8 +5805,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
&MaskV[0]);
- if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE &&
- (Subtarget->hasSSSE3() || Subtarget->hasAVX()))
+ if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3orAVX())
NewV = getTargetShuffleNode(X86ISD::PSHUFLW, dl, MVT::v8i16,
NewV.getOperand(0),
X86::getShufflePSHUFLWImmediate(NewV.getNode()),
@@ -5721,8 +5833,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
&MaskV[0]);
- if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE &&
- (Subtarget->hasSSSE3() || Subtarget->hasAVX()))
+ if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3orAVX())
NewV = getTargetShuffleNode(X86ISD::PSHUFHW, dl, MVT::v8i16,
NewV.getOperand(0),
X86::getShufflePSHUFHWImmediate(NewV.getNode()),
@@ -5788,7 +5899,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
}
// If SSSE3, use 1 pshufb instruction per vector with elements in the result.
- if (TLI.getSubtarget()->hasSSSE3() || TLI.getSubtarget()->hasAVX()) {
+ if (TLI.getSubtarget()->hasSSSE3orAVX()) {
SmallVector<SDValue,16> pshufbMask;
// If all result elements are from one input vector, then only translate
@@ -6455,17 +6566,23 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) {
X86::getShuffleSHUFImmediate(SVOp), DAG);
}
-static inline unsigned getUNPCKLOpcode(EVT VT) {
+static inline unsigned getUNPCKLOpcode(EVT VT, bool HasAVX2) {
switch(VT.getSimpleVT().SimpleTy) {
case MVT::v4i32: return X86ISD::PUNPCKLDQ;
case MVT::v2i64: return X86ISD::PUNPCKLQDQ;
+ case MVT::v8i32:
+ if (HasAVX2) return X86ISD::PUNPCKLDQ;
+ // else use fp unit for int unpack.
+ case MVT::v8f32:
case MVT::v4f32: return X86ISD::UNPCKLPS;
+ case MVT::v4i64:
+ if (HasAVX2) return X86ISD::PUNPCKLQDQ;
+ // else use fp unit for int unpack.
+ case MVT::v4f64:
case MVT::v2f64: return X86ISD::UNPCKLPD;
- case MVT::v8i32: // Use fp unit for int unpack.
- case MVT::v8f32: return X86ISD::VUNPCKLPSY;
- case MVT::v4i64: // Use fp unit for int unpack.
- case MVT::v4f64: return X86ISD::VUNPCKLPDY;
+ case MVT::v32i8:
case MVT::v16i8: return X86ISD::PUNPCKLBW;
+ case MVT::v16i16:
case MVT::v8i16: return X86ISD::PUNPCKLWD;
default:
llvm_unreachable("Unknown type for unpckl");
@@ -6473,17 +6590,23 @@ static inline unsigned getUNPCKLOpcode(EVT VT) {
return 0;
}
-static inline unsigned getUNPCKHOpcode(EVT VT) {
+static inline unsigned getUNPCKHOpcode(EVT VT, bool HasAVX2) {
switch(VT.getSimpleVT().SimpleTy) {
case MVT::v4i32: return X86ISD::PUNPCKHDQ;
case MVT::v2i64: return X86ISD::PUNPCKHQDQ;
+ case MVT::v8i32:
+ if (HasAVX2) return X86ISD::PUNPCKHDQ;
+ // else use fp unit for int unpack.
+ case MVT::v8f32:
case MVT::v4f32: return X86ISD::UNPCKHPS;
+ case MVT::v4i64:
+ if (HasAVX2) return X86ISD::PUNPCKHQDQ;
+ // else use fp unit for int unpack.
+ case MVT::v4f64:
case MVT::v2f64: return X86ISD::UNPCKHPD;
- case MVT::v8i32: // Use fp unit for int unpack.
- case MVT::v8f32: return X86ISD::VUNPCKHPSY;
- case MVT::v4i64: // Use fp unit for int unpack.
- case MVT::v4f64: return X86ISD::VUNPCKHPDY;
+ case MVT::v32i8:
case MVT::v16i8: return X86ISD::PUNPCKHBW;
+ case MVT::v16i16:
case MVT::v8i16: return X86ISD::PUNPCKHWD;
default:
llvm_unreachable("Unknown type for unpckh");
@@ -6507,52 +6630,6 @@ static inline unsigned getVPERMILOpcode(EVT VT) {
return 0;
}
-/// isVectorBroadcast - Check if the node chain is suitable to be xformed to
-/// a vbroadcast node. The nodes are suitable whenever we can fold a load coming
-/// from a 32 or 64 bit scalar. Update Op to the desired load to be folded.
-static bool isVectorBroadcast(SDValue &Op) {
- EVT VT = Op.getValueType();
- bool Is256 = VT.getSizeInBits() == 256;
-
- assert((VT.getSizeInBits() == 128 || Is256) &&
- "Unsupported type for vbroadcast node");
-
- SDValue V = Op;
- if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
- V = V.getOperand(0);
-
- if (Is256 && !(V.hasOneUse() &&
- V.getOpcode() == ISD::INSERT_SUBVECTOR &&
- V.getOperand(0).getOpcode() == ISD::UNDEF))
- return false;
-
- if (Is256)
- V = V.getOperand(1);
-
- if (!V.hasOneUse())
- return false;
-
- // Check the source scalar_to_vector type. 256-bit broadcasts are
- // supported for 32/64-bit sizes, while 128-bit ones are only supported
- // for 32-bit scalars.
- if (V.getOpcode() != ISD::SCALAR_TO_VECTOR)
- return false;
-
- unsigned ScalarSize = V.getOperand(0).getValueType().getSizeInBits();
- if (ScalarSize != 32 && ScalarSize != 64)
- return false;
- if (!Is256 && ScalarSize == 64)
- return false;
-
- V = V.getOperand(0);
- if (!MayFoldLoad(V))
- return false;
-
- // Return the load node
- Op = V;
- return true;
-}
-
static
SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI,
@@ -6578,8 +6655,9 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
return Op;
// Use vbroadcast whenever the splat comes from a foldable load
- if (Subtarget->hasAVX() && isVectorBroadcast(V1))
- return DAG.getNode(X86ISD::VBROADCAST, dl, VT, V1);
+ SDValue LD = isVectorBroadcast(Op, Subtarget->hasAVX2());
+ if (Subtarget->hasAVX() && LD.getNode())
+ return DAG.getNode(X86ISD::VBROADCAST, dl, VT, LD);
// Handle splats by matching through known shuffle masks
if ((Size == 128 && NumElem <= 4) ||
@@ -6630,6 +6708,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
bool V1IsSplat = false;
bool V2IsSplat = false;
bool HasXMMInt = Subtarget->hasXMMInt();
+ bool HasAVX2 = Subtarget->hasAVX2();
MachineFunction &MF = DAG.getMachineFunction();
bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
@@ -6659,12 +6738,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
// unpckh_undef). Only use pshufd if speed is more important than size.
if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp))
- return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
+ return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1,
+ DAG);
if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp))
- return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+ return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1,
+ DAG);
- if (X86::isMOVDDUPMask(SVOp) &&
- (Subtarget->hasSSE3() || Subtarget->hasAVX()) &&
+ if (X86::isMOVDDUPMask(SVOp) && Subtarget->hasSSE3orAVX() &&
V2IsUndef && RelaxedMayFoldVectorLoad(V1))
return getMOVDDup(Op, dl, V1, DAG);
@@ -6672,9 +6752,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return getMOVHighToLow(Op, dl, DAG);
// Use to match splats
- if (HasXMMInt && X86::isUNPCKHMask(SVOp) && V2IsUndef &&
+ if (HasXMMInt && X86::isUNPCKHMask(SVOp, HasAVX2) && V2IsUndef &&
(VT == MVT::v2f64 || VT == MVT::v2i64))
- return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+ return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1,
+ DAG);
if (X86::isPSHUFDMask(SVOp)) {
// The actual implementation will match the mask in the if above and then
@@ -6696,8 +6777,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
bool isLeft = false;
unsigned ShAmt = 0;
SDValue ShVal;
- bool isShift = getSubtarget()->hasXMMInt() &&
- isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
+ bool isShift = HasXMMInt && isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
if (isShift && ShVal.hasOneUse()) {
// If the shifted value has multiple uses, it may be cheaper to use
// v_set0 + movlhps or movhlps, etc.
@@ -6721,7 +6801,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
}
// FIXME: fold these into legal mask.
- if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp))
+ if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp, HasAVX2))
return getMOVLowToHigh(Op, dl, DAG, HasXMMInt);
if (X86::isMOVHLPSMask(SVOp))
@@ -6774,11 +6854,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return getMOVL(DAG, dl, VT, V2, V1);
}
- if (X86::isUNPCKLMask(SVOp))
- return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG);
+ if (X86::isUNPCKLMask(SVOp, HasAVX2))
+ return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V2,
+ DAG);
- if (X86::isUNPCKHMask(SVOp))
- return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG);
+ if (X86::isUNPCKHMask(SVOp, HasAVX2))
+ return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V2,
+ DAG);
if (V2IsSplat) {
// Normalize mask so all entries that point to V2 points to its first
@@ -6787,9 +6869,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
SDValue NewMask = NormalizeMask(SVOp, DAG);
ShuffleVectorSDNode *NSVOp = cast<ShuffleVectorSDNode>(NewMask);
if (NSVOp != SVOp) {
- if (X86::isUNPCKLMask(NSVOp, true)) {
+ if (X86::isUNPCKLMask(NSVOp, HasAVX2, true)) {
return NewMask;
- } else if (X86::isUNPCKHMask(NSVOp, true)) {
+ } else if (X86::isUNPCKHMask(NSVOp, HasAVX2, true)) {
return NewMask;
}
}
@@ -6801,11 +6883,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
SDValue NewOp = CommuteVectorShuffle(SVOp, DAG);
ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
- if (X86::isUNPCKLMask(NewSVOp))
- return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG);
+ if (X86::isUNPCKLMask(NewSVOp, HasAVX2))
+ return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V2, V1,
+ DAG);
- if (X86::isUNPCKHMask(NewSVOp))
- return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG);
+ if (X86::isUNPCKHMask(NewSVOp, HasAVX2))
+ return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V2, V1,
+ DAG);
}
// Normalize the node to match x86 shuffle ops if needed
@@ -6818,7 +6902,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
SmallVector<int, 16> M;
SVOp->getMask(M);
- if (isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX()))
+ if (isPALIGNRMask(M, VT, Subtarget->hasSSSE3orAVX()))
return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2,
X86::getShufflePALIGNRImmediate(SVOp),
DAG);
@@ -6846,9 +6930,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
X86::getShuffleSHUFImmediate(SVOp), DAG);
if (X86::isUNPCKL_v_undef_Mask(SVOp))
- return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
+ return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1,
+ DAG);
if (X86::isUNPCKH_v_undef_Mask(SVOp))
- return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+ return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1,
+ DAG);
//===--------------------------------------------------------------------===//
// Generate target specific nodes for 128 or 256-bit shuffles only
@@ -6884,6 +6970,17 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
getShuffleVSHUFPDYImmediate(SVOp), DAG);
+ // Try to swap operands in the node to match x86 shuffle ops
+ if (isCommutedVSHUFPMask(M, VT, Subtarget)) {
+ // Now we need to commute operands.
+ SVOp = cast<ShuffleVectorSDNode>(CommuteVectorShuffle(SVOp, DAG));
+ V1 = SVOp->getOperand(0);
+ V2 = SVOp->getOperand(1);
+ unsigned Immediate = (NumElems == 4) ? getShuffleVSHUFPDYImmediate(SVOp):
+ getShuffleVSHUFPSYImmediate(SVOp);
+ return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2, Immediate, DAG);
+ }
+
//===--------------------------------------------------------------------===//
// Since no target specific shuffle was selected for this generic one,
// lower it into other known shuffles. FIXME: this isn't true yet, but
@@ -7002,7 +7099,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
assert(Vec.getValueSizeInBits() <= 128 && "Unexpected vector length");
- if (Subtarget->hasSSE41() || Subtarget->hasAVX()) {
+ if (Subtarget->hasSSE41orAVX()) {
SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
if (Res.getNode())
return Res;
@@ -7144,7 +7241,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
return Insert128BitVector(N0, V, Ins128Idx, DAG, dl);
}
- if (Subtarget->hasSSE41() || Subtarget->hasAVX())
+ if (Subtarget->hasSSE41orAVX())
return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
if (EltVT == MVT::i8)
@@ -8264,8 +8361,10 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
// climbing the DAG back to the root, and it doesn't seem to be worth the
// effort.
for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
- UE = Op.getNode()->use_end(); UI != UE; ++UI)
- if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC)
+ UE = Op.getNode()->use_end(); UI != UE; ++UI)
+ if (UI->getOpcode() != ISD::CopyToReg &&
+ UI->getOpcode() != ISD::SETCC &&
+ UI->getOpcode() != ISD::STORE)
goto default_case;
if (ConstantSDNode *C =
@@ -8408,11 +8507,19 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
}
} else if (Op1.getOpcode() == ISD::Constant) {
ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op1);
+ uint64_t AndRHSVal = AndRHS->getZExtValue();
SDValue AndLHS = Op0;
- if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) {
+
+ if (AndRHSVal == 1 && AndLHS.getOpcode() == ISD::SRL) {
LHS = AndLHS.getOperand(0);
RHS = AndLHS.getOperand(1);
}
+
+ // Use BT if the immediate can't be encoded in a TEST instruction.
+ if (!isUInt<32>(AndRHSVal) && isPowerOf2_64(AndRHSVal)) {
+ LHS = AndLHS;
+ RHS = DAG.getConstant(Log2_64_Ceil(AndRHSVal), LHS.getValueType());
+ }
}
if (LHS.getNode()) {
@@ -8632,9 +8739,9 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
// Check that the operation in question is available (most are plain SSE2,
// but PCMPGTQ and PCMPEQQ have different requirements).
- if (Opc == X86ISD::PCMPGTQ && !Subtarget->hasSSE42() && !Subtarget->hasAVX())
+ if (Opc == X86ISD::PCMPGTQ && !Subtarget->hasSSE42orAVX())
return SDValue();
- if (Opc == X86ISD::PCMPEQQ && !Subtarget->hasSSE41() && !Subtarget->hasAVX())
+ if (Opc == X86ISD::PCMPEQQ && !Subtarget->hasSSE41orAVX())
return SDValue();
// Since SSE has no unsigned integer comparisons, we need to flip the sign
@@ -9464,6 +9571,23 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::x86_avx_hsub_pd_256:
return DAG.getNode(X86ISD::FHSUB, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_avx2_psllv_d:
+ case Intrinsic::x86_avx2_psllv_q:
+ case Intrinsic::x86_avx2_psllv_d_256:
+ case Intrinsic::x86_avx2_psllv_q_256:
+ return DAG.getNode(ISD::SHL, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_avx2_psrlv_d:
+ case Intrinsic::x86_avx2_psrlv_q:
+ case Intrinsic::x86_avx2_psrlv_d_256:
+ case Intrinsic::x86_avx2_psrlv_q_256:
+ return DAG.getNode(ISD::SRL, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_avx2_psrav_d:
+ case Intrinsic::x86_avx2_psrav_d_256:
+ return DAG.getNode(ISD::SRA, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+
// ptest and testp intrinsics. The intrinsic these come from are designed to
// return an integer value, not just an instruction so lower it to the ptest
// or testp pattern and a setcc for the result.
@@ -10261,47 +10385,48 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
return Res;
}
- if (Subtarget->hasAVX2()) {
- if (VT == MVT::v4i64 && Op.getOpcode() == ISD::SHL)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_avx2_pslli_q, MVT::i32),
- R, DAG.getConstant(ShiftAmt, MVT::i32));
-
- if (VT == MVT::v8i32 && Op.getOpcode() == ISD::SHL)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_avx2_pslli_d, MVT::i32),
- R, DAG.getConstant(ShiftAmt, MVT::i32));
-
- if (VT == MVT::v16i16 && Op.getOpcode() == ISD::SHL)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_avx2_pslli_w, MVT::i32),
- R, DAG.getConstant(ShiftAmt, MVT::i32));
-
- if (VT == MVT::v4i64 && Op.getOpcode() == ISD::SRL)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_avx2_psrli_q, MVT::i32),
- R, DAG.getConstant(ShiftAmt, MVT::i32));
-
- if (VT == MVT::v8i32 && Op.getOpcode() == ISD::SRL)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_avx2_psrli_d, MVT::i32),
- R, DAG.getConstant(ShiftAmt, MVT::i32));
-
- if (VT == MVT::v16i16 && Op.getOpcode() == ISD::SRL)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_avx2_psrli_w, MVT::i32),
- R, DAG.getConstant(ShiftAmt, MVT::i32));
-
- if (VT == MVT::v8i32 && Op.getOpcode() == ISD::SRA)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_avx2_psrai_d, MVT::i32),
- R, DAG.getConstant(ShiftAmt, MVT::i32));
-
- if (VT == MVT::v16i16 && Op.getOpcode() == ISD::SRA)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_avx2_psrai_w, MVT::i32),
- R, DAG.getConstant(ShiftAmt, MVT::i32));
+ if (Subtarget->hasAVX2() && VT == MVT::v32i8) {
+ if (Op.getOpcode() == ISD::SHL) {
+ // Make a large shift.
+ SDValue SHL =
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_avx2_pslli_w, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+ // Zero out the rightmost bits.
+ SmallVector<SDValue, 32> V(32, DAG.getConstant(uint8_t(-1U << ShiftAmt),
+ MVT::i8));
+ return DAG.getNode(ISD::AND, dl, VT, SHL,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32));
+ }
+ if (Op.getOpcode() == ISD::SRL) {
+ // Make a large shift.
+ SDValue SRL =
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_avx2_psrli_w, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+ // Zero out the leftmost bits.
+ SmallVector<SDValue, 32> V(32, DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
+ MVT::i8));
+ return DAG.getNode(ISD::AND, dl, VT, SRL,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32));
+ }
+ if (Op.getOpcode() == ISD::SRA) {
+ if (ShiftAmt == 7) {
+ // R s>> 7 === R s< 0
+ SDValue Zeros = getZeroVector(VT, true /* HasXMMInt */, DAG, dl);
+ return DAG.getNode(X86ISD::PCMPGTB, dl, VT, Zeros, R);
+ }
+
+ // R s>> a === ((R u>> a) ^ m) - m
+ SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
+ SmallVector<SDValue, 32> V(32, DAG.getConstant(128 >> ShiftAmt,
+ MVT::i8));
+ SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32);
+ Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
+ Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
+ return Res;
}
+ }
}
}
@@ -10493,9 +10618,9 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const{
DebugLoc dl = Op.getDebugLoc();
- SDNode* Node = Op.getNode();
- EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
- EVT VT = Node->getValueType(0);
+ EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ EVT VT = Op.getValueType();
+
if (Subtarget->hasXMMInt() && VT.isVector()) {
unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
ExtraVT.getScalarType().getSizeInBits();
@@ -10506,21 +10631,55 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG)
switch (VT.getSimpleVT().SimpleTy) {
default:
return SDValue();
- case MVT::v4i32: {
+ case MVT::v4i32:
SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_d;
SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_d;
break;
- }
- case MVT::v8i16: {
+ case MVT::v8i16:
SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_w;
SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_w;
break;
- }
+ case MVT::v8i32:
+ case MVT::v16i16:
+ if (!Subtarget->hasAVX())
+ return SDValue();
+ if (!Subtarget->hasAVX2()) {
+ // needs to be split
+ int NumElems = VT.getVectorNumElements();
+ SDValue Idx0 = DAG.getConstant(0, MVT::i32);
+ SDValue Idx1 = DAG.getConstant(NumElems/2, MVT::i32);
+
+ // Extract the LHS vectors
+ SDValue LHS = Op.getOperand(0);
+ SDValue LHS1 = Extract128BitVector(LHS, Idx0, DAG, dl);
+ SDValue LHS2 = Extract128BitVector(LHS, Idx1, DAG, dl);
+
+ MVT EltVT = VT.getVectorElementType().getSimpleVT();
+ EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+
+ EVT ExtraEltVT = ExtraVT.getVectorElementType();
+ int ExtraNumElems = ExtraVT.getVectorNumElements();
+ ExtraVT = EVT::getVectorVT(*DAG.getContext(), ExtraEltVT,
+ ExtraNumElems/2);
+ SDValue Extra = DAG.getValueType(ExtraVT);
+
+ LHS1 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, Extra);
+ LHS2 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, Extra);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, LHS1, LHS2);;
+ }
+ if (VT == MVT::v8i32) {
+ SHLIntrinsicsID = Intrinsic::x86_avx2_pslli_d;
+ SRAIntrinsicsID = Intrinsic::x86_avx2_psrai_d;
+ } else {
+ SHLIntrinsicsID = Intrinsic::x86_avx2_pslli_w;
+ SRAIntrinsicsID = Intrinsic::x86_avx2_psrai_w;
+ }
}
SDValue Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(SHLIntrinsicsID, MVT::i32),
- Node->getOperand(0), ShAmt);
+ Op.getOperand(0), ShAmt);
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(SRAIntrinsicsID, MVT::i32),
@@ -11033,9 +11192,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PINSRW: return "X86ISD::PINSRW";
case X86ISD::PSHUFB: return "X86ISD::PSHUFB";
case X86ISD::ANDNP: return "X86ISD::ANDNP";
- case X86ISD::PSIGNB: return "X86ISD::PSIGNB";
- case X86ISD::PSIGNW: return "X86ISD::PSIGNW";
- case X86ISD::PSIGND: return "X86ISD::PSIGND";
+ case X86ISD::PSIGN: return "X86ISD::PSIGN";
case X86ISD::BLENDV: return "X86ISD::BLENDV";
case X86ISD::FHADD: return "X86ISD::FHADD";
case X86ISD::FHSUB: return "X86ISD::FHSUB";
@@ -11111,7 +11268,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::MOVSS: return "X86ISD::MOVSS";
case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS";
case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD";
- case X86ISD::VUNPCKLPDY: return "X86ISD::VUNPCKLPDY";
case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS";
case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD";
case X86ISD::PUNPCKLBW: return "X86ISD::PUNPCKLBW";
@@ -11235,7 +11391,7 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
EVT VT) const {
// Very little shuffling can be done for 64-bit vectors right now.
if (VT.getSizeInBits() == 64)
- return isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX());
+ return isPALIGNRMask(M, VT, Subtarget->hasSSSE3orAVX());
// FIXME: pshufb, blends, shifts.
return (VT.getVectorNumElements() == 2 ||
@@ -11245,9 +11401,9 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
isPSHUFDMask(M, VT) ||
isPSHUFHWMask(M, VT) ||
isPSHUFLWMask(M, VT) ||
- isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX()) ||
- isUNPCKLMask(M, VT) ||
- isUNPCKHMask(M, VT) ||
+ isPALIGNRMask(M, VT, Subtarget->hasSSSE3orAVX()) ||
+ isUNPCKLMask(M, VT, Subtarget->hasAVX2()) ||
+ isUNPCKHMask(M, VT, Subtarget->hasAVX2()) ||
isUNPCKL_v_undef_Mask(M, VT) ||
isUNPCKH_v_undef_Mask(M, VT));
}
@@ -11654,7 +11810,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
MachineBasicBlock *
X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
unsigned numArgs, bool memArg) const {
- assert((Subtarget->hasSSE42() || Subtarget->hasAVX()) &&
+ assert(Subtarget->hasSSE42orAVX() &&
"Target must have SSE4.2 or AVX features enabled");
DebugLoc dl = MI->getDebugLoc();
@@ -13808,98 +13964,98 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
return R;
EVT VT = N->getValueType(0);
- if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64 && VT != MVT::v2i64)
- return SDValue();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
// look for psign/blend
- if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) {
- if (VT == MVT::v2i64) {
- // Canonicalize pandn to RHS
- if (N0.getOpcode() == X86ISD::ANDNP)
- std::swap(N0, N1);
- // or (and (m, x), (pandn m, y))
- if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::ANDNP) {
- SDValue Mask = N1.getOperand(0);
- SDValue X = N1.getOperand(1);
- SDValue Y;
- if (N0.getOperand(0) == Mask)
- Y = N0.getOperand(1);
- if (N0.getOperand(1) == Mask)
- Y = N0.getOperand(0);
-
- // Check to see if the mask appeared in both the AND and ANDNP and
- if (!Y.getNode())
- return SDValue();
+ if (VT == MVT::v2i64 || VT == MVT::v4i64) {
+ if (!Subtarget->hasSSSE3orAVX() ||
+ (VT == MVT::v4i64 && !Subtarget->hasAVX2()))
+ return SDValue();
- // Validate that X, Y, and Mask are BIT_CONVERTS, and see through them.
- if (Mask.getOpcode() != ISD::BITCAST ||
- X.getOpcode() != ISD::BITCAST ||
- Y.getOpcode() != ISD::BITCAST)
- return SDValue();
+ // Canonicalize pandn to RHS
+ if (N0.getOpcode() == X86ISD::ANDNP)
+ std::swap(N0, N1);
+ // or (and (m, x), (pandn m, y))
+ if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::ANDNP) {
+ SDValue Mask = N1.getOperand(0);
+ SDValue X = N1.getOperand(1);
+ SDValue Y;
+ if (N0.getOperand(0) == Mask)
+ Y = N0.getOperand(1);
+ if (N0.getOperand(1) == Mask)
+ Y = N0.getOperand(0);
+
+ // Check to see if the mask appeared in both the AND and ANDNP and
+ if (!Y.getNode())
+ return SDValue();
- // Look through mask bitcast.
- Mask = Mask.getOperand(0);
- EVT MaskVT = Mask.getValueType();
+ // Validate that X, Y, and Mask are BIT_CONVERTS, and see through them.
+ if (Mask.getOpcode() != ISD::BITCAST ||
+ X.getOpcode() != ISD::BITCAST ||
+ Y.getOpcode() != ISD::BITCAST)
+ return SDValue();
- // Validate that the Mask operand is a vector sra node. The sra node
- // will be an intrinsic.
- if (Mask.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
- return SDValue();
+ // Look through mask bitcast.
+ Mask = Mask.getOperand(0);
+ EVT MaskVT = Mask.getValueType();
- // FIXME: what to do for bytes, since there is a psignb/pblendvb, but
- // there is no psrai.b
- switch (cast<ConstantSDNode>(Mask.getOperand(0))->getZExtValue()) {
- case Intrinsic::x86_sse2_psrai_w:
- case Intrinsic::x86_sse2_psrai_d:
- break;
- default: return SDValue();
- }
+ // Validate that the Mask operand is a vector sra node. The sra node
+ // will be an intrinsic.
+ if (Mask.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
+ return SDValue();
- // Check that the SRA is all signbits.
- SDValue SraC = Mask.getOperand(2);
- unsigned SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue();
- unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
- if ((SraAmt + 1) != EltBits)
- return SDValue();
+ // FIXME: what to do for bytes, since there is a psignb/pblendvb, but
+ // there is no psrai.b
+ switch (cast<ConstantSDNode>(Mask.getOperand(0))->getZExtValue()) {
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_sse2_psrai_d:
+ case Intrinsic::x86_avx2_psrai_w:
+ case Intrinsic::x86_avx2_psrai_d:
+ break;
+ default: return SDValue();
+ }
- DebugLoc DL = N->getDebugLoc();
-
- // Now we know we at least have a plendvb with the mask val. See if
- // we can form a psignb/w/d.
- // psign = x.type == y.type == mask.type && y = sub(0, x);
- X = X.getOperand(0);
- Y = Y.getOperand(0);
- if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
- ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
- X.getValueType() == MaskVT && X.getValueType() == Y.getValueType()){
- unsigned Opc = 0;
- switch (EltBits) {
- case 8: Opc = X86ISD::PSIGNB; break;
- case 16: Opc = X86ISD::PSIGNW; break;
- case 32: Opc = X86ISD::PSIGND; break;
- default: break;
- }
- if (Opc) {
- SDValue Sign = DAG.getNode(Opc, DL, MaskVT, X, Mask.getOperand(1));
- return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Sign);
- }
- }
- // PBLENDVB only available on SSE 4.1
- if (!(Subtarget->hasSSE41() || Subtarget->hasAVX()))
- return SDValue();
+ // Check that the SRA is all signbits.
+ SDValue SraC = Mask.getOperand(2);
+ unsigned SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue();
+ unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
+ if ((SraAmt + 1) != EltBits)
+ return SDValue();
- X = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, X);
- Y = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Y);
- Mask = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Mask);
- Mask = DAG.getNode(ISD::VSELECT, DL, MVT::v16i8, Mask, X, Y);
- return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Mask);
+ DebugLoc DL = N->getDebugLoc();
+
+ // Now we know we at least have a plendvb with the mask val. See if
+ // we can form a psignb/w/d.
+ // psign = x.type == y.type == mask.type && y = sub(0, x);
+ X = X.getOperand(0);
+ Y = Y.getOperand(0);
+ if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
+ ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
+ X.getValueType() == MaskVT && X.getValueType() == Y.getValueType() &&
+ (EltBits == 8 || EltBits == 16 || EltBits == 32)) {
+ SDValue Sign = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X,
+ Mask.getOperand(1));
+ return DAG.getNode(ISD::BITCAST, DL, VT, Sign);
}
+ // PBLENDVB only available on SSE 4.1
+ if (!Subtarget->hasSSE41orAVX())
+ return SDValue();
+
+ EVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8;
+
+ X = DAG.getNode(ISD::BITCAST, DL, BlendVT, X);
+ Y = DAG.getNode(ISD::BITCAST, DL, BlendVT, Y);
+ Mask = DAG.getNode(ISD::BITCAST, DL, BlendVT, Mask);
+ Mask = DAG.getNode(ISD::VSELECT, DL, BlendVT, Mask, X, Y);
+ return DAG.getNode(ISD::BITCAST, DL, VT, Mask);
}
}
+ if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+
// fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
std::swap(N0, N1);
@@ -14409,8 +14565,7 @@ static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG,
SDValue RHS = N->getOperand(1);
// Try to synthesize horizontal adds from adds of shuffles.
- if ((Subtarget->hasSSE3() || Subtarget->hasAVX()) &&
- (VT == MVT::v4f32 || VT == MVT::v2f64) &&
+ if (Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64) &&
isHorizontalBinOp(LHS, RHS, true))
return DAG.getNode(X86ISD::FHADD, N->getDebugLoc(), VT, LHS, RHS);
return SDValue();
@@ -14424,8 +14579,7 @@ static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG,
SDValue RHS = N->getOperand(1);
// Try to synthesize horizontal subs from subs of shuffles.
- if ((Subtarget->hasSSE3() || Subtarget->hasAVX()) &&
- (VT == MVT::v4f32 || VT == MVT::v2f64) &&
+ if (Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64) &&
isHorizontalBinOp(LHS, RHS, false))
return DAG.getNode(X86ISD::FHSUB, N->getDebugLoc(), VT, LHS, RHS);
return SDValue();
@@ -14621,7 +14775,23 @@ static SDValue OptimizeConditionalInDecrement(SDNode *N, SelectionDAG &DAG) {
DAG.getConstant(0, OtherVal.getValueType()), NewCmp);
}
-static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG) {
+/// PerformADDCombine - Do target-specific dag combines on integer adds.
+static SDValue PerformAddCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+
+ // Try to synthesize horizontal adds from adds of shuffles.
+ if ((Subtarget->hasSSSE3orAVX()) && (VT == MVT::v8i16 || VT == MVT::v4i32) &&
+ isHorizontalBinOp(Op0, Op1, true))
+ return DAG.getNode(X86ISD::HADD, N->getDebugLoc(), VT, Op0, Op1);
+
+ return OptimizeConditionalInDecrement(N, DAG);
+}
+
+static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
@@ -14643,6 +14813,12 @@ static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG) {
}
}
+ // Try to synthesize horizontal adds from adds of shuffles.
+ EVT VT = N->getValueType(0);
+ if ((Subtarget->hasSSSE3orAVX()) && (VT == MVT::v8i16 || VT == MVT::v4i32) &&
+ isHorizontalBinOp(Op0, Op1, false))
+ return DAG.getNode(X86ISD::HSUB, N->getDebugLoc(), VT, Op0, Op1);
+
return OptimizeConditionalInDecrement(N, DAG);
}
@@ -14656,8 +14832,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::VSELECT:
case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI);
- case ISD::ADD: return OptimizeConditionalInDecrement(N, DAG);
- case ISD::SUB: return PerformSubCombine(N, DAG);
+ case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget);
+ case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget);
case X86ISD::ADC: return PerformADCCombine(N, DAG, DCI);
case ISD::MUL: return PerformMulCombine(N, DAG, DCI);
case ISD::SHL:
@@ -14687,16 +14863,12 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::PUNPCKHQDQ:
case X86ISD::UNPCKHPS:
case X86ISD::UNPCKHPD:
- case X86ISD::VUNPCKHPSY:
- case X86ISD::VUNPCKHPDY:
case X86ISD::PUNPCKLBW:
case X86ISD::PUNPCKLWD:
case X86ISD::PUNPCKLDQ:
case X86ISD::PUNPCKLQDQ:
case X86ISD::UNPCKLPS:
case X86ISD::UNPCKLPD:
- case X86ISD::VUNPCKLPSY:
- case X86ISD::VUNPCKLPDY:
case X86ISD::MOVHLPS:
case X86ISD::MOVLHPS:
case X86ISD::PSHUFD:
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 3b7a14d..ccff3a5 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -172,12 +172,18 @@ namespace llvm {
/// ANDNP - Bitwise Logical AND NOT of Packed FP values.
ANDNP,
- /// PSIGNB/W/D - Copy integer sign.
- PSIGNB, PSIGNW, PSIGND,
+ /// PSIGN - Copy integer sign.
+ PSIGN,
/// BLEND family of opcodes
BLENDV,
+ /// HADD - Integer horizontal add.
+ HADD,
+
+ /// HSUB - Integer horizontal sub.
+ HSUB,
+
/// FHADD - Floating point horizontal add.
FHADD,
@@ -269,12 +275,8 @@ namespace llvm {
MOVSS,
UNPCKLPS,
UNPCKLPD,
- VUNPCKLPSY,
- VUNPCKLPDY,
UNPCKHPS,
UNPCKHPD,
- VUNPCKHPSY,
- VUNPCKHPDY,
PUNPCKLBW,
PUNPCKLWD,
PUNPCKLDQ,
@@ -408,11 +410,13 @@ namespace llvm {
/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKL.
- bool isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat = false);
+ bool isUNPCKLMask(ShuffleVectorSDNode *N, bool HasAVX2,
+ bool V2IsSplat = false);
/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKH.
- bool isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat = false);
+ bool isUNPCKHMask(ShuffleVectorSDNode *N, bool HasAVX2,
+ bool V2IsSplat = false);
/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index 0245e5c..fa1d676 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -27,7 +27,6 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
namespace llvm {
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 6fd2efd..791bbe6 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -41,6 +41,8 @@ def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
def X86fgetsign: SDNode<"X86ISD::FGETSIGNx86",SDTFPToIntOp>;
def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>;
def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>;
+def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>;
+def X86hsub : SDNode<"X86ISD::HSUB", SDTIntBinOp>;
def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
def X86cmpss : SDNode<"X86ISD::FSETCCss", SDTX86Cmpss>;
@@ -51,14 +53,8 @@ def X86pshufb : SDNode<"X86ISD::PSHUFB",
def X86andnp : SDNode<"X86ISD::ANDNP",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
-def X86psignb : SDNode<"X86ISD::PSIGNB",
- SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>]>>;
-def X86psignw : SDNode<"X86ISD::PSIGNW",
- SDTypeProfile<1, 2, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>]>>;
-def X86psignd : SDNode<"X86ISD::PSIGND",
- SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>,
+def X86psign : SDNode<"X86ISD::PSIGN",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
def X86pextrb : SDNode<"X86ISD::PEXTRB",
SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
@@ -136,13 +132,9 @@ def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>;
def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>;
-def X86Unpcklpsy : SDNode<"X86ISD::VUNPCKLPSY", SDTShuff2Op>;
-def X86Unpcklpdy : SDNode<"X86ISD::VUNPCKLPDY", SDTShuff2Op>;
def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>;
def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>;
-def X86Unpckhpsy : SDNode<"X86ISD::VUNPCKHPSY", SDTShuff2Op>;
-def X86Unpckhpdy : SDNode<"X86ISD::VUNPCKHPDY", SDTShuff2Op>;
def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>;
def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>;
@@ -427,12 +419,12 @@ def movl : PatFrag<(ops node:$lhs, node:$rhs),
def unpckl : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N));
+ return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N), Subtarget->hasAVX2());
}]>;
def unpckh : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N));
+ return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N), Subtarget->hasAVX2());
}]>;
def pshufd : PatFrag<(ops node:$lhs, node:$rhs),
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 9428fff..24c4a53 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -25,7 +25,6 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/LiveVariables.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -2903,6 +2902,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
switch (LoadMI->getOpcode()) {
case X86::AVX_SET0PSY:
case X86::AVX_SET0PDY:
+ case X86::AVX2_SETALLONES:
Alignment = 32;
break;
case X86::V_SET0:
@@ -2948,6 +2948,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
case X86::AVX_SET0PSY:
case X86::AVX_SET0PDY:
case X86::AVX_SETALLONES:
+ case X86::AVX2_SETALLONES:
case X86::FsFLD0SD:
case X86::FsFLD0SS:
case X86::VFsFLD0SD:
@@ -2986,7 +2987,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
else
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
- bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX_SETALLONES);
+ bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX_SETALLONES ||
+ Opc == X86::AVX2_SETALLONES);
const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) :
Constant::getNullValue(Ty);
unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
@@ -3555,7 +3557,11 @@ static const unsigned ReplaceableInstrs[][3] = {
{ X86::VMOVAPSYrr, X86::VMOVAPDYrr, X86::VMOVDQAYrr },
{ X86::VMOVUPSYmr, X86::VMOVUPDYmr, X86::VMOVDQUYmr },
{ X86::VMOVUPSYrm, X86::VMOVUPDYrm, X86::VMOVDQUYrm },
- { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr },
+ { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr }
+};
+
+static const unsigned ReplaceableInstrsAVX2[][3] = {
+ //PackedSingle PackedDouble PackedInt
{ X86::VANDNPSYrm, X86::VANDNPDYrm, X86::VPANDNYrm },
{ X86::VANDNPSYrr, X86::VANDNPDYrr, X86::VPANDNYrr },
{ X86::VANDPSYrm, X86::VANDPDYrm, X86::VPANDYrm },
@@ -3563,7 +3569,7 @@ static const unsigned ReplaceableInstrs[][3] = {
{ X86::VORPSYrm, X86::VORPDYrm, X86::VPORYrm },
{ X86::VORPSYrr, X86::VORPDYrr, X86::VPORYrr },
{ X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORYrm },
- { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr },
+ { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr }
};
// FIXME: Some shuffle and unpack instructions have equivalents in different
@@ -3576,11 +3582,23 @@ static const unsigned *lookup(unsigned opcode, unsigned domain) {
return 0;
}
+static const unsigned *lookupAVX2(unsigned opcode, unsigned domain) {
+ for (unsigned i = 0, e = array_lengthof(ReplaceableInstrsAVX2); i != e; ++i)
+ if (ReplaceableInstrsAVX2[i][domain-1] == opcode)
+ return ReplaceableInstrsAVX2[i];
+ return 0;
+}
+
std::pair<uint16_t, uint16_t>
X86InstrInfo::getExecutionDomain(const MachineInstr *MI) const {
uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
- return std::make_pair(domain,
- domain && lookup(MI->getOpcode(), domain) ? 0xe : 0);
+ bool hasAVX2 = TM.getSubtarget<X86Subtarget>().hasAVX2();
+ uint16_t validDomains = 0;
+ if (domain && lookup(MI->getOpcode(), domain))
+ validDomains = 0xe;
+ else if (domain && lookupAVX2(MI->getOpcode(), domain))
+ validDomains = hasAVX2 ? 0xe : 0x6;
+ return std::make_pair(domain, validDomains);
}
void X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
@@ -3588,6 +3606,11 @@ void X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
assert(dom && "Not an SSE instruction");
const unsigned *table = lookup(MI->getOpcode(), dom);
+ if (!table) { // try the other table
+ assert((TM.getSubtarget<X86Subtarget>().hasAVX2() || Domain < 3) &&
+ "256-bit vector operations only available in AVX2");
+ table = lookupAVX2(MI->getOpcode(), dom);
+ }
assert(table && "Cannot change domain");
MI->setDesc(get(table[Domain-1]));
}
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 79ce509..35631d5 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -1523,10 +1523,11 @@ def : MnemonicAlias<"call", "calll">, Requires<[In32BitMode]>;
def : MnemonicAlias<"call", "callq">, Requires<[In64BitMode]>;
def : MnemonicAlias<"cbw", "cbtw">;
+def : MnemonicAlias<"cwde", "cwtl">;
def : MnemonicAlias<"cwd", "cwtd">;
def : MnemonicAlias<"cdq", "cltd">;
-def : MnemonicAlias<"cwde", "cwtl">;
def : MnemonicAlias<"cdqe", "cltq">;
+def : MnemonicAlias<"cqo", "cqto">;
// lret maps to lretl, it is not ambiguous with lretq.
def : MnemonicAlias<"lret", "lretl">;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 6deee4f..7cadac1 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -311,13 +311,16 @@ def : Pat<(bc_v4i64 (v8f32 immAllZerosV)),
// JIT implementation, it does not expand the instructions below like
// X86MCInstLower does.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isCodeGenOnly = 1, ExeDomain = SSEPackedInt in
+ isCodeGenOnly = 1, ExeDomain = SSEPackedInt in {
def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4i32 immAllOnesV))]>;
-let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isCodeGenOnly = 1, ExeDomain = SSEPackedInt, Predicates = [HasAVX] in
+ let Predicates = [HasAVX] in
def AVX_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4i32 immAllOnesV))]>, VEX_4V;
+ let Predicates = [HasAVX2] in
+ def AVX2_SETALLONES : PDI<0x76, MRMInitReg, (outs VR256:$dst), (ins), "",
+ [(set VR256:$dst, (v8i32 immAllOnesV))]>, VEX_4V;
+}
//===----------------------------------------------------------------------===//
@@ -522,6 +525,8 @@ let Predicates = [HasSSE2] in {
// fold opportunity reappears.
def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
(MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),sub_sd))>;
+ def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2),sub_sd))>;
def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
(MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>;
def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
@@ -2467,21 +2472,21 @@ let Predicates = [HasAVX] in {
def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
(VUNPCKHPSrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))),
+ def : Pat<(v8f32 (X86Unpcklps VR256:$src1, (memopv8f32 addr:$src2))),
(VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
+ def : Pat<(v8f32 (X86Unpcklps VR256:$src1, VR256:$src2)),
(VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
+ def : Pat<(v8i32 (X86Unpcklps VR256:$src1, VR256:$src2)),
(VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, (memopv8i32 addr:$src2))),
+ def : Pat<(v8i32 (X86Unpcklps VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
(VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, (memopv8f32 addr:$src2))),
+ def : Pat<(v8f32 (X86Unpckhps VR256:$src1, (memopv8f32 addr:$src2))),
(VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, VR256:$src2)),
+ def : Pat<(v8f32 (X86Unpckhps VR256:$src1, VR256:$src2)),
(VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, (memopv8i32 addr:$src2))),
+ def : Pat<(v8i32 (X86Unpckhps VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
(VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, VR256:$src2)),
+ def : Pat<(v8i32 (X86Unpckhps VR256:$src1, VR256:$src2)),
(VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
@@ -2493,21 +2498,21 @@ let Predicates = [HasAVX] in {
def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
(VUNPCKHPDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))),
+ def : Pat<(v4f64 (X86Unpcklpd VR256:$src1, (memopv4f64 addr:$src2))),
(VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
+ def : Pat<(v4f64 (X86Unpcklpd VR256:$src1, VR256:$src2)),
(VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, (memopv4i64 addr:$src2))),
+ def : Pat<(v4i64 (X86Unpcklpd VR256:$src1, (memopv4i64 addr:$src2))),
(VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
+ def : Pat<(v4i64 (X86Unpcklpd VR256:$src1, VR256:$src2)),
(VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, (memopv4f64 addr:$src2))),
+ def : Pat<(v4f64 (X86Unpckhpd VR256:$src1, (memopv4f64 addr:$src2))),
(VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, VR256:$src2)),
+ def : Pat<(v4f64 (X86Unpckhpd VR256:$src1, VR256:$src2)),
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, (memopv4i64 addr:$src2))),
+ def : Pat<(v4i64 (X86Unpckhpd VR256:$src1, (memopv4i64 addr:$src2))),
(VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, VR256:$src2)),
+ def : Pat<(v4i64 (X86Unpckhpd VR256:$src1, VR256:$src2)),
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
// FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the
@@ -3421,47 +3426,6 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
[(set RC:$dst, (OpVT (OpNode RC:$src1,
(bitconvert (memop_frag addr:$src2)))))]>;
}
-
-/// PDI_binop_rm_v2i64 - Simple SSE2 binary operator whose type is v2i64.
-///
-/// FIXME: we could eliminate this and use PDI_binop_rm instead if tblgen knew
-/// to collapse (bitconvert VT to VT) into its operand.
-///
-multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
- bit IsCommutable = 0, bit Is2Addr = 1> {
- let isCommutable = IsCommutable in
- def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]>;
- def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (OpNode VR128:$src1, (memopv2i64 addr:$src2)))]>;
-}
-
-/// PDI_binop_rm_v4i64 - Simple AVX2 binary operator whose type is v4i64.
-///
-/// FIXME: we could eliminate this and use PDI_binop_rm instead if tblgen knew
-/// to collapse (bitconvert VT to VT) into its operand.
-///
-multiclass PDI_binop_rm_v4i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
- bit IsCommutable = 0> {
- let isCommutable = IsCommutable in
- def rr : PDI<opc, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))]>;
- def rm : PDI<opc, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, i256mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst, (OpNode VR256:$src1, (memopv4i64 addr:$src2)))]>;
-}
-
} // ExeDomain = SSEPackedInt
// 128-bit Integer Arithmetic
@@ -3473,7 +3437,8 @@ defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, VR128, memopv2i64,
i128mem, 1, 0>, VEX_4V;
defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, VR128, memopv2i64,
i128mem, 1, 0>, VEX_4V;
-defm VPADDQ : PDI_binop_rm_v2i64<0xD4, "vpaddq", add, 1, 0>, VEX_4V;
+defm VPADDQ : PDI_binop_rm<0xD4, "vpaddq", add, v2i64, VR128, memopv2i64,
+ i128mem, 1, 0>, VEX_4V;
defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, VR128, memopv2i64,
i128mem, 1, 0>, VEX_4V;
defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, VR128, memopv2i64,
@@ -3482,7 +3447,8 @@ defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, VR128, memopv2i64,
i128mem, 0, 0>, VEX_4V;
defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, VR128, memopv2i64,
i128mem, 0, 0>, VEX_4V;
-defm VPSUBQ : PDI_binop_rm_v2i64<0xFB, "vpsubq", sub, 0, 0>, VEX_4V;
+defm VPSUBQ : PDI_binop_rm<0xFB, "vpsubq", sub, v2i64, VR128, memopv2i64,
+ i128mem, 0, 0>, VEX_4V;
// Intrinsic forms
defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b,
@@ -3527,21 +3493,23 @@ defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw,
let Predicates = [HasAVX2] in {
defm VPADDBY : PDI_binop_rm<0xFC, "vpaddb", add, v32i8, VR256, memopv4i64,
- i256mem, 1, 0>, VEX_4V;
+ i256mem, 1, 0>, VEX_4V;
defm VPADDWY : PDI_binop_rm<0xFD, "vpaddw", add, v16i16, VR256, memopv4i64,
- i256mem, 1, 0>, VEX_4V;
+ i256mem, 1, 0>, VEX_4V;
defm VPADDDY : PDI_binop_rm<0xFE, "vpaddd", add, v8i32, VR256, memopv4i64,
- i256mem, 1, 0>, VEX_4V;
-defm VPADDQY : PDI_binop_rm_v4i64<0xD4, "vpaddq", add, 1>, VEX_4V;
+ i256mem, 1, 0>, VEX_4V;
+defm VPADDQY : PDI_binop_rm<0xD4, "vpaddq", add, v4i64, VR256, memopv4i64,
+ i256mem, 1, 0>, VEX_4V;
defm VPMULLWY : PDI_binop_rm<0xD5, "vpmullw", mul, v16i16, VR256, memopv4i64,
- i256mem, 1, 0>, VEX_4V;
+ i256mem, 1, 0>, VEX_4V;
defm VPSUBBY : PDI_binop_rm<0xF8, "vpsubb", sub, v32i8, VR256, memopv4i64,
- i256mem, 0, 0>, VEX_4V;
+ i256mem, 0, 0>, VEX_4V;
defm VPSUBWY : PDI_binop_rm<0xF9, "vpsubw", sub, v16i16,VR256, memopv4i64,
- i256mem, 0, 0>, VEX_4V;
+ i256mem, 0, 0>, VEX_4V;
defm VPSUBDY : PDI_binop_rm<0xFA, "vpsubd", sub, v8i32, VR256, memopv4i64,
- i256mem, 0, 0>, VEX_4V;
-defm VPSUBQY : PDI_binop_rm_v4i64<0xFB, "vpsubq", sub, 0>, VEX_4V;
+ i256mem, 0, 0>, VEX_4V;
+defm VPSUBQY : PDI_binop_rm<0xFB, "vpsubq", sub, v4i64, VR256, memopv4i64,
+ i256mem, 0, 0>, VEX_4V;
// Intrinsic forms
defm VPSUBSBY : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_avx2_psubs_b,
@@ -3591,7 +3559,8 @@ defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, VR128, memopv2i64,
i128mem, 1>;
defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, VR128, memopv2i64,
i128mem, 1>;
-defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>;
+defm PADDQ : PDI_binop_rm<0xD4, "paddq", add, v2i64, VR128, memopv2i64,
+ i128mem, 1>;
defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, VR128, memopv2i64,
i128mem, 1>;
defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8, VR128, memopv2i64,
@@ -3600,7 +3569,8 @@ defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16, VR128, memopv2i64,
i128mem>;
defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32, VR128, memopv2i64,
i128mem>;
-defm PSUBQ : PDI_binop_rm_v2i64<0xFB, "psubq", sub>;
+defm PSUBQ : PDI_binop_rm<0xFB, "psubq", sub, v2i64, VR128, memopv2i64,
+ i128mem>;
// Intrinsic forms
defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b,
@@ -3676,9 +3646,12 @@ defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad",
int_x86_sse2_psra_d, int_x86_sse2_psrai_d,
VR128, 0>, VEX_4V;
-defm VPAND : PDI_binop_rm_v2i64<0xDB, "vpand", and, 1, 0>, VEX_4V;
-defm VPOR : PDI_binop_rm_v2i64<0xEB, "vpor" , or, 1, 0>, VEX_4V;
-defm VPXOR : PDI_binop_rm_v2i64<0xEF, "vpxor", xor, 1, 0>, VEX_4V;
+defm VPAND : PDI_binop_rm<0xDB, "vpand", and, v2i64, VR128, memopv2i64,
+ i128mem, 1, 0>, VEX_4V;
+defm VPOR : PDI_binop_rm<0xEB, "vpor" , or, v2i64, VR128, memopv2i64,
+ i128mem, 1, 0>, VEX_4V;
+defm VPXOR : PDI_binop_rm<0xEF, "vpxor", xor, v2i64, VR128, memopv2i64,
+ i128mem, 1, 0>, VEX_4V;
let ExeDomain = SSEPackedInt in {
let neverHasSideEffects = 1 in {
@@ -3735,9 +3708,12 @@ defm VPSRADY : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad",
int_x86_avx2_psra_d, int_x86_avx2_psrai_d,
VR256, 0>, VEX_4V;
-defm VPANDY : PDI_binop_rm_v4i64<0xDB, "vpand", and, 1>, VEX_4V;
-defm VPORY : PDI_binop_rm_v4i64<0xEB, "vpor" , or, 1>, VEX_4V;
-defm VPXORY : PDI_binop_rm_v4i64<0xEF, "vpxor", xor, 1>, VEX_4V;
+defm VPANDY : PDI_binop_rm<0xDB, "vpand", and, v4i64, VR256, memopv4i64,
+ i256mem, 1, 0>, VEX_4V;
+defm VPORY : PDI_binop_rm<0xEB, "vpor", or, v4i64, VR256, memopv4i64,
+ i256mem, 1, 0>, VEX_4V;
+defm VPXORY : PDI_binop_rm<0xEF, "vpxor", xor, v4i64, VR256, memopv4i64,
+ i256mem, 1, 0>, VEX_4V;
let ExeDomain = SSEPackedInt in {
let neverHasSideEffects = 1 in {
@@ -3794,9 +3770,12 @@ defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
int_x86_sse2_psra_d, int_x86_sse2_psrai_d,
VR128>;
-defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>;
-defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or, 1>;
-defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>;
+defm PAND : PDI_binop_rm<0xDB, "pand", and, v2i64, VR128, memopv2i64,
+ i128mem, 1>;
+defm POR : PDI_binop_rm<0xEB, "por" , or, v2i64, VR128, memopv2i64,
+ i128mem, 1>;
+defm PXOR : PDI_binop_rm<0xEF, "pxor", xor, v2i64, VR128, memopv2i64,
+ i128mem, 1>;
let ExeDomain = SSEPackedInt in {
let neverHasSideEffects = 1 in {
@@ -3822,51 +3801,51 @@ let ExeDomain = SSEPackedInt in {
let Predicates = [HasAVX] in {
def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
- (v2i64 (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+ (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
- (v2i64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+ (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2),
- (v2i64 (VPSLLDQri VR128:$src1, imm:$src2))>;
+ (VPSLLDQri VR128:$src1, imm:$src2)>;
def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2),
- (v2i64 (VPSRLDQri VR128:$src1, imm:$src2))>;
+ (VPSRLDQri VR128:$src1, imm:$src2)>;
def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
- (v2f64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+ (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
// Shift up / down and insert zero's.
def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))),
- (v2i64 (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+ (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))),
- (v2i64 (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+ (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
}
let Predicates = [HasAVX2] in {
def : Pat<(int_x86_avx2_psll_dq VR256:$src1, imm:$src2),
- (v4i64 (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2)))>;
+ (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2),
- (v4i64 (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2)))>;
+ (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
def : Pat<(int_x86_avx2_psll_dq_bs VR256:$src1, imm:$src2),
- (v4i64 (VPSLLDQYri VR256:$src1, imm:$src2))>;
+ (VPSLLDQYri VR256:$src1, imm:$src2)>;
def : Pat<(int_x86_avx2_psrl_dq_bs VR256:$src1, imm:$src2),
- (v4i64 (VPSRLDQYri VR256:$src1, imm:$src2))>;
+ (VPSRLDQYri VR256:$src1, imm:$src2)>;
}
let Predicates = [HasSSE2] in {
def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
- (v2i64 (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+ (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
- (v2i64 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+ (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2),
- (v2i64 (PSLLDQri VR128:$src1, imm:$src2))>;
+ (PSLLDQri VR128:$src1, imm:$src2)>;
def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2),
- (v2i64 (PSRLDQri VR128:$src1, imm:$src2))>;
+ (PSRLDQri VR128:$src1, imm:$src2)>;
def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
- (v2f64 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+ (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
// Shift up / down and insert zero's.
def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))),
- (v2i64 (PSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+ (PSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))),
- (v2i64 (PSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+ (PSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
}
//===---------------------------------------------------------------------===//
@@ -3889,28 +3868,34 @@ let Predicates = [HasAVX] in {
def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)),
(VPCMPEQBrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))),
+ def : Pat<(v16i8 (X86pcmpeqb VR128:$src1,
+ (bc_v16i8 (memopv2i64 addr:$src2)))),
(VPCMPEQBrm VR128:$src1, addr:$src2)>;
def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)),
(VPCMPEQWrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))),
+ def : Pat<(v8i16 (X86pcmpeqw VR128:$src1,
+ (bc_v8i16 (memopv2i64 addr:$src2)))),
(VPCMPEQWrm VR128:$src1, addr:$src2)>;
def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)),
(VPCMPEQDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))),
+ def : Pat<(v4i32 (X86pcmpeqd VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)))),
(VPCMPEQDrm VR128:$src1, addr:$src2)>;
def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)),
(VPCMPGTBrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))),
+ def : Pat<(v16i8 (X86pcmpgtb VR128:$src1,
+ (bc_v16i8 (memopv2i64 addr:$src2)))),
(VPCMPGTBrm VR128:$src1, addr:$src2)>;
def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)),
(VPCMPGTWrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))),
+ def : Pat<(v8i16 (X86pcmpgtw VR128:$src1,
+ (bc_v8i16 (memopv2i64 addr:$src2)))),
(VPCMPGTWrm VR128:$src1, addr:$src2)>;
def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)),
(VPCMPGTDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
+ def : Pat<(v4i32 (X86pcmpgtd VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)))),
(VPCMPGTDrm VR128:$src1, addr:$src2)>;
}
@@ -3930,28 +3915,34 @@ let Predicates = [HasAVX2] in {
def : Pat<(v32i8 (X86pcmpeqb VR256:$src1, VR256:$src2)),
(VPCMPEQBYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v32i8 (X86pcmpeqb VR256:$src1, (memop addr:$src2))),
+ def : Pat<(v32i8 (X86pcmpeqb VR256:$src1,
+ (bc_v32i8 (memopv4i64 addr:$src2)))),
(VPCMPEQBYrm VR256:$src1, addr:$src2)>;
def : Pat<(v16i16 (X86pcmpeqw VR256:$src1, VR256:$src2)),
(VPCMPEQWYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v16i16 (X86pcmpeqw VR256:$src1, (memop addr:$src2))),
+ def : Pat<(v16i16 (X86pcmpeqw VR256:$src1,
+ (bc_v16i16 (memopv4i64 addr:$src2)))),
(VPCMPEQWYrm VR256:$src1, addr:$src2)>;
def : Pat<(v8i32 (X86pcmpeqd VR256:$src1, VR256:$src2)),
(VPCMPEQDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86pcmpeqd VR256:$src1, (memop addr:$src2))),
+ def : Pat<(v8i32 (X86pcmpeqd VR256:$src1,
+ (bc_v8i32 (memopv4i64 addr:$src2)))),
(VPCMPEQDYrm VR256:$src1, addr:$src2)>;
def : Pat<(v32i8 (X86pcmpgtb VR256:$src1, VR256:$src2)),
(VPCMPGTBYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v32i8 (X86pcmpgtb VR256:$src1, (memop addr:$src2))),
+ def : Pat<(v32i8 (X86pcmpgtb VR256:$src1,
+ (bc_v32i8 (memopv4i64 addr:$src2)))),
(VPCMPGTBYrm VR256:$src1, addr:$src2)>;
def : Pat<(v16i16 (X86pcmpgtw VR256:$src1, VR256:$src2)),
(VPCMPGTWYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v16i16 (X86pcmpgtw VR256:$src1, (memop addr:$src2))),
+ def : Pat<(v16i16 (X86pcmpgtw VR256:$src1,
+ (bc_v16i16 (memopv4i64 addr:$src2)))),
(VPCMPGTWYrm VR256:$src1, addr:$src2)>;
def : Pat<(v8i32 (X86pcmpgtd VR256:$src1, VR256:$src2)),
(VPCMPGTDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86pcmpgtd VR256:$src1, (memop addr:$src2))),
+ def : Pat<(v8i32 (X86pcmpgtd VR256:$src1,
+ (bc_v8i32 (memopv4i64 addr:$src2)))),
(VPCMPGTDYrm VR256:$src1, addr:$src2)>;
}
@@ -3973,28 +3964,34 @@ let Constraints = "$src1 = $dst" in {
let Predicates = [HasSSE2] in {
def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)),
(PCMPEQBrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))),
+ def : Pat<(v16i8 (X86pcmpeqb VR128:$src1,
+ (bc_v16i8 (memopv2i64 addr:$src2)))),
(PCMPEQBrm VR128:$src1, addr:$src2)>;
def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)),
(PCMPEQWrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))),
+ def : Pat<(v8i16 (X86pcmpeqw VR128:$src1,
+ (bc_v8i16 (memopv2i64 addr:$src2)))),
(PCMPEQWrm VR128:$src1, addr:$src2)>;
def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)),
(PCMPEQDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))),
+ def : Pat<(v4i32 (X86pcmpeqd VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)))),
(PCMPEQDrm VR128:$src1, addr:$src2)>;
def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)),
(PCMPGTBrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))),
+ def : Pat<(v16i8 (X86pcmpgtb VR128:$src1,
+ (bc_v16i8 (memopv2i64 addr:$src2)))),
(PCMPGTBrm VR128:$src1, addr:$src2)>;
def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)),
(PCMPGTWrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))),
+ def : Pat<(v8i16 (X86pcmpgtw VR128:$src1,
+ (bc_v8i16 (memopv2i64 addr:$src2)))),
(PCMPGTWrm VR128:$src1, addr:$src2)>;
def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)),
(PCMPGTDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
+ def : Pat<(v4i32 (X86pcmpgtd VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)))),
(PCMPGTDrm VR128:$src1, addr:$src2)>;
}
@@ -4207,19 +4204,8 @@ let Predicates = [HasAVX] in {
bc_v8i16, 0>, VEX_4V;
defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckldq,
bc_v4i32, 0>, VEX_4V;
-
- /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
- /// knew to collapse (bitconvert VT to VT) into its operand.
- def VPUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1,
- VR128:$src2)))]>, VEX_4V;
- def VPUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1,
- (memopv2i64 addr:$src2))))]>, VEX_4V;
+ defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Punpcklqdq,
+ bc_v2i64, 0>, VEX_4V;
defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckhbw,
bc_v16i8, 0>, VEX_4V;
@@ -4227,19 +4213,8 @@ let Predicates = [HasAVX] in {
bc_v8i16, 0>, VEX_4V;
defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckhdq,
bc_v4i32, 0>, VEX_4V;
-
- /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
- /// knew to collapse (bitconvert VT to VT) into its operand.
- def VPUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1,
- VR128:$src2)))]>, VEX_4V;
- def VPUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1,
- (memopv2i64 addr:$src2))))]>, VEX_4V;
+ defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Punpckhqdq,
+ bc_v2i64, 0>, VEX_4V;
}
let Predicates = [HasAVX2] in {
@@ -4249,19 +4224,8 @@ let Predicates = [HasAVX2] in {
bc_v16i16>, VEX_4V;
defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckldq,
bc_v8i32>, VEX_4V;
-
- /// FIXME: we could eliminate this and use sse2_unpack_y instead if tblgen
- /// knew to collapse (bitconvert VT to VT) into its operand.
- def VPUNPCKLQDQYrr : PDI<0x6C, MRMSrcReg,
- (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
- "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR256:$dst, (v4i64 (X86Punpcklqdq VR256:$src1,
- VR256:$src2)))]>, VEX_4V;
- def VPUNPCKLQDQYrm : PDI<0x6C, MRMSrcMem,
- (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
- "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR256:$dst, (v4i64 (X86Punpcklqdq VR256:$src1,
- (memopv4i64 addr:$src2))))]>, VEX_4V;
+ defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Punpcklqdq,
+ bc_v4i64>, VEX_4V;
defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckhbw,
bc_v32i8>, VEX_4V;
@@ -4269,57 +4233,28 @@ let Predicates = [HasAVX2] in {
bc_v16i16>, VEX_4V;
defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckhdq,
bc_v8i32>, VEX_4V;
-
- /// FIXME: we could eliminate this and use sse2_unpack_y instead if tblgen
- /// knew to collapse (bitconvert VT to VT) into its operand.
- def VPUNPCKHQDQYrr : PDI<0x6D, MRMSrcReg,
- (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
- "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR256:$dst, (v4i64 (X86Punpckhqdq VR256:$src1,
- VR256:$src2)))]>, VEX_4V;
- def VPUNPCKHQDQYrm : PDI<0x6D, MRMSrcMem,
- (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
- "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR256:$dst, (v4i64 (X86Punpckhqdq VR256:$src1,
- (memopv4i64 addr:$src2))))]>, VEX_4V;
+ defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Punpckhqdq,
+ bc_v4i64>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
- defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpcklbw, bc_v16i8>;
- defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpcklwd, bc_v8i16>;
- defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckldq, bc_v4i32>;
-
- /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
- /// knew to collapse (bitconvert VT to VT) into its operand.
- def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "punpcklqdq\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2i64 (X86Punpcklqdq VR128:$src1, VR128:$src2)))]>;
- def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "punpcklqdq\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2i64 (X86Punpcklqdq VR128:$src1,
- (memopv2i64 addr:$src2))))]>;
-
- defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckhbw, bc_v16i8>;
- defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckhwd, bc_v8i16>;
- defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckhdq, bc_v4i32>;
-
- /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
- /// knew to collapse (bitconvert VT to VT) into its operand.
- def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "punpckhqdq\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)))]>;
- def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "punpckhqdq\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2i64 (X86Punpckhqdq VR128:$src1,
- (memopv2i64 addr:$src2))))]>;
+ defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpcklbw,
+ bc_v16i8>;
+ defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpcklwd,
+ bc_v8i16>;
+ defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckldq,
+ bc_v4i32>;
+ defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Punpcklqdq,
+ bc_v2i64>;
+
+ defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckhbw,
+ bc_v16i8>;
+ defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckhwd,
+ bc_v8i16>;
+ defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckhdq,
+ bc_v4i32>;
+ defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Punpckhqdq,
+ bc_v2i64>;
}
} // ExeDomain = SSEPackedInt
@@ -5052,21 +4987,25 @@ multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC,
[(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))]>;
}
-let Predicates = [HasAVX],
- ExeDomain = SSEPackedDouble in {
- defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128,
- f128mem, 0>, TB, XD, VEX_4V;
- defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128,
- f128mem, 0>, TB, OpSize, VEX_4V;
- defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256,
- f256mem, 0>, TB, XD, VEX_4V;
- defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256,
- f256mem, 0>, TB, OpSize, VEX_4V;
-}
-let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
- ExeDomain = SSEPackedDouble in {
+let Predicates = [HasAVX] in {
+ let ExeDomain = SSEPackedSingle in {
+ defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128,
+ f128mem, 0>, TB, XD, VEX_4V;
+ defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256,
+ f256mem, 0>, TB, XD, VEX_4V;
+ }
+ let ExeDomain = SSEPackedDouble in {
+ defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128,
+ f128mem, 0>, TB, OpSize, VEX_4V;
+ defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256,
+ f256mem, 0>, TB, OpSize, VEX_4V;
+ }
+}
+let Constraints = "$src1 = $dst", Predicates = [HasSSE3] in {
+ let ExeDomain = SSEPackedSingle in
defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128,
f128mem>, TB, XD;
+ let ExeDomain = SSEPackedDouble in
defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128,
f128mem>, TB, OpSize;
}
@@ -5106,29 +5045,37 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
}
let Predicates = [HasAVX] in {
- defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
- X86fhadd, 0>, VEX_4V;
- defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
- X86fhadd, 0>, VEX_4V;
- defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
- X86fhsub, 0>, VEX_4V;
- defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
- X86fhsub, 0>, VEX_4V;
- defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
- X86fhadd, 0>, VEX_4V;
- defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
- X86fhadd, 0>, VEX_4V;
- defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
- X86fhsub, 0>, VEX_4V;
- defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
- X86fhsub, 0>, VEX_4V;
+ let ExeDomain = SSEPackedSingle in {
+ defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
+ X86fhadd, 0>, VEX_4V;
+ defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
+ X86fhsub, 0>, VEX_4V;
+ defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
+ X86fhadd, 0>, VEX_4V;
+ defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
+ X86fhsub, 0>, VEX_4V;
+ }
+ let ExeDomain = SSEPackedDouble in {
+ defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
+ X86fhadd, 0>, VEX_4V;
+ defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
+ X86fhsub, 0>, VEX_4V;
+ defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
+ X86fhadd, 0>, VEX_4V;
+ defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
+ X86fhsub, 0>, VEX_4V;
+ }
}
let Constraints = "$src1 = $dst" in {
- defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd>;
- defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd>;
- defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub>;
- defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub>;
+ let ExeDomain = SSEPackedSingle in {
+ defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd>;
+ defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub>;
+ }
+ let ExeDomain = SSEPackedDouble in {
+ defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd>;
+ defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub>;
+ }
}
//===---------------------------------------------------------------------===//
@@ -5284,11 +5231,11 @@ let isCommutable = 0 in {
int_x86_avx2_pmadd_ub_sw>, VEX_4V;
defm VPSHUFB : SS3I_binop_rm_int_y<0x00, "vpshufb", memopv32i8,
int_x86_avx2_pshuf_b>, VEX_4V;
- defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", memopv16i8,
+ defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", memopv32i8,
int_x86_avx2_psign_b>, VEX_4V;
- defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", memopv8i16,
+ defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", memopv16i16,
int_x86_avx2_psign_w>, VEX_4V;
- defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", memopv4i32,
+ defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", memopv8i32,
int_x86_avx2_psign_d>, VEX_4V;
}
defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw", memopv16i16,
@@ -5331,12 +5278,21 @@ let Predicates = [HasSSSE3] in {
def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
(PSHUFBrm128 VR128:$src, addr:$mask)>;
- def : Pat<(X86psignb VR128:$src1, VR128:$src2),
+ def : Pat<(v16i8 (X86psign VR128:$src1, VR128:$src2)),
(PSIGNBrr128 VR128:$src1, VR128:$src2)>;
- def : Pat<(X86psignw VR128:$src1, VR128:$src2),
+ def : Pat<(v8i16 (X86psign VR128:$src1, VR128:$src2)),
(PSIGNWrr128 VR128:$src1, VR128:$src2)>;
- def : Pat<(X86psignd VR128:$src1, VR128:$src2),
+ def : Pat<(v4i32 (X86psign VR128:$src1, VR128:$src2)),
(PSIGNDrr128 VR128:$src1, VR128:$src2)>;
+
+ def : Pat<(v8i16 (X86hadd VR128:$src1, VR128:$src2)),
+ (PHADDWrr128 VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86hadd VR128:$src1, VR128:$src2)),
+ (PHADDDrr128 VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (X86hsub VR128:$src1, VR128:$src2)),
+ (PHSUBWrr128 VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86hsub VR128:$src1, VR128:$src2)),
+ (PHSUBDrr128 VR128:$src1, VR128:$src2)>;
}
let Predicates = [HasAVX] in {
@@ -5345,12 +5301,39 @@ let Predicates = [HasAVX] in {
def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
(VPSHUFBrm128 VR128:$src, addr:$mask)>;
- def : Pat<(X86psignb VR128:$src1, VR128:$src2),
+ def : Pat<(v16i8 (X86psign VR128:$src1, VR128:$src2)),
(VPSIGNBrr128 VR128:$src1, VR128:$src2)>;
- def : Pat<(X86psignw VR128:$src1, VR128:$src2),
+ def : Pat<(v8i16 (X86psign VR128:$src1, VR128:$src2)),
(VPSIGNWrr128 VR128:$src1, VR128:$src2)>;
- def : Pat<(X86psignd VR128:$src1, VR128:$src2),
+ def : Pat<(v4i32 (X86psign VR128:$src1, VR128:$src2)),
(VPSIGNDrr128 VR128:$src1, VR128:$src2)>;
+
+ def : Pat<(v8i16 (X86hadd VR128:$src1, VR128:$src2)),
+ (VPHADDWrr128 VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86hadd VR128:$src1, VR128:$src2)),
+ (VPHADDDrr128 VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i16 (X86hsub VR128:$src1, VR128:$src2)),
+ (VPHSUBWrr128 VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (X86hsub VR128:$src1, VR128:$src2)),
+ (VPHSUBDrr128 VR128:$src1, VR128:$src2)>;
+}
+
+let Predicates = [HasAVX2] in {
+ def : Pat<(v32i8 (X86psign VR256:$src1, VR256:$src2)),
+ (VPSIGNBrr256 VR256:$src1, VR256:$src2)>;
+ def : Pat<(v16i16 (X86psign VR256:$src1, VR256:$src2)),
+ (VPSIGNWrr256 VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (X86psign VR256:$src1, VR256:$src2)),
+ (VPSIGNDrr256 VR256:$src1, VR256:$src2)>;
+
+ def : Pat<(v16i16 (X86hadd VR256:$src1, VR256:$src2)),
+ (VPHADDWrr256 VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (X86hadd VR256:$src1, VR256:$src2)),
+ (VPHADDDrr256 VR256:$src1, VR256:$src2)>;
+ def : Pat<(v16i16 (X86hsub VR256:$src1, VR256:$src2)),
+ (VPHSUBWrr256 VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (X86hsub VR256:$src1, VR256:$src2)),
+ (VPHSUBDrr256 VR256:$src1, VR256:$src2)>;
}
//===---------------------------------------------------------------------===//
@@ -5837,14 +5820,16 @@ multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
addr:$dst)]>, OpSize;
}
-let Predicates = [HasAVX] in {
- defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
- def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
- "vextractps \t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, OpSize, VEX;
+let ExeDomain = SSEPackedSingle in {
+ let Predicates = [HasAVX] in {
+ defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
+ def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ "vextractps \t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, OpSize, VEX;
+ }
+ defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">;
}
-defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">;
// Also match an EXTRACTPS store when the store is done as f32 instead of i32.
def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
@@ -5965,10 +5950,12 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
imm:$src3))]>, OpSize;
}
-let Constraints = "$src1 = $dst" in
- defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
-let Predicates = [HasAVX] in
- defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
+let ExeDomain = SSEPackedSingle in {
+ let Constraints = "$src1 = $dst" in
+ defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
+ let Predicates = [HasAVX] in
+ defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
+}
def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
(VINSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>,
@@ -5985,6 +5972,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
X86MemOperand x86memop, RegisterClass RC,
PatFrag mem_frag32, PatFrag mem_frag64,
Intrinsic V4F32Int, Intrinsic V2F64Int> {
+let ExeDomain = SSEPackedSingle in {
// Intrinsic operation, reg.
// Vector intrinsic operation, reg
def PSr : SS4AIi8<opcps, MRMSrcReg,
@@ -5995,15 +5983,16 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
OpSize;
// Vector intrinsic operation, mem
- def PSm : Ii8<opcps, MRMSrcMem,
+ def PSm : SS4AIi8<opcps, MRMSrcMem,
(outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst,
(V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
- TA, OpSize,
- Requires<[HasSSE41]>;
+ OpSize;
+} // ExeDomain = SSEPackedSingle
+let ExeDomain = SSEPackedDouble in {
// Vector intrinsic operation, reg
def PDr : SS4AIi8<opcpd, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
@@ -6020,44 +6009,14 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
[(set RC:$dst,
(V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
OpSize;
-}
-
-multiclass sse41_fp_unop_rm_avx_p<bits<8> opcps, bits<8> opcpd,
- RegisterClass RC, X86MemOperand x86memop, string OpcodeStr> {
- // Intrinsic operation, reg.
- // Vector intrinsic operation, reg
- def PSr_AVX : SS4AIi8<opcps, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, OpSize;
-
- // Vector intrinsic operation, mem
- def PSm_AVX : Ii8<opcps, MRMSrcMem,
- (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, TA, OpSize, Requires<[HasSSE41]>;
-
- // Vector intrinsic operation, reg
- def PDr_AVX : SS4AIi8<opcpd, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, OpSize;
-
- // Vector intrinsic operation, mem
- def PDm_AVX : SS4AIi8<opcpd, MRMSrcMem,
- (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, OpSize;
+} // ExeDomain = SSEPackedDouble
}
multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
string OpcodeStr,
Intrinsic F32Int,
Intrinsic F64Int, bit Is2Addr = 1> {
+let ExeDomain = GenericDomain in {
// Intrinsic operation, reg.
def SSr : SS4AIi8<opcss, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
@@ -6103,37 +6062,7 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
[(set VR128:$dst,
(F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
OpSize;
-}
-
-multiclass sse41_fp_binop_rm_avx_s<bits<8> opcss, bits<8> opcsd,
- string OpcodeStr> {
- // Intrinsic operation, reg.
- def SSr_AVX : SS4AIi8<opcss, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, OpSize;
-
- // Intrinsic operation, mem.
- def SSm_AVX : SS4AIi8<opcss, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, OpSize;
-
- // Intrinsic operation, reg.
- def SDr_AVX : SS4AIi8<opcsd, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, OpSize;
-
- // Intrinsic operation, mem.
- def SDm_AVX : SS4AIi8<opcsd, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, OpSize;
+} // ExeDomain = GenericDomain
}
// FP round - roundss, roundps, roundsd, roundpd
@@ -6150,13 +6079,6 @@ let Predicates = [HasAVX] in {
defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround",
int_x86_sse41_round_ss,
int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG;
-
- // Instructions for the assembler
- defm VROUND : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR128, f128mem, "vround">,
- VEX;
- defm VROUNDY : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR256, f256mem, "vround">,
- VEX;
- defm VROUND : sse41_fp_binop_rm_avx_s<0x0A, 0x0B, "vround">, VEX_4V, VEX_LIG;
}
defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
@@ -6194,11 +6116,11 @@ def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
let Defs = [EFLAGS] in {
def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "ptest \t{$src2, $src1|$src1, $src2}",
+ "ptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
OpSize;
def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
- "ptest \t{$src2, $src1|$src1, $src2}",
+ "ptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
OpSize;
}
@@ -6216,11 +6138,15 @@ multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
}
let Defs = [EFLAGS], Predicates = [HasAVX] in {
+let ExeDomain = SSEPackedSingle in {
defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>;
defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>;
+}
+let ExeDomain = SSEPackedDouble in {
defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>;
defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>;
}
+}
//===----------------------------------------------------------------------===//
// SSE4.1 - Misc Instructions
@@ -6391,10 +6317,12 @@ let Constraints = "$src1 = $dst" in {
defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>;
}
-def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
- (PCMPEQQrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
- (PCMPEQQrm VR128:$src1, addr:$src2)>;
+let Predicates = [HasSSE41] in {
+ def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
+ (PCMPEQQrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
+ (PCMPEQQrm VR128:$src1, addr:$src2)>;
+}
/// SS48I_binop_rm - Simple SSE41 binary operator.
multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -6470,23 +6398,30 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
let Predicates = [HasAVX] in {
let isCommutable = 0 in {
- defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
- VR128, memopv16i8, i128mem, 0>, VEX_4V;
- defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
- VR128, memopv16i8, i128mem, 0>, VEX_4V;
- defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
- int_x86_avx_blend_ps_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
- defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
- int_x86_avx_blend_pd_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
+ let ExeDomain = SSEPackedSingle in {
+ defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
+ int_x86_avx_blend_ps_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
+ }
+ let ExeDomain = SSEPackedDouble in {
+ defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
+ int_x86_avx_blend_pd_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
+ }
defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
VR128, memopv16i8, i128mem, 0>, VEX_4V;
defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
VR128, memopv16i8, i128mem, 0>, VEX_4V;
}
+ let ExeDomain = SSEPackedSingle in
defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ let ExeDomain = SSEPackedDouble in
defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ let ExeDomain = SSEPackedSingle in
defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
VR256, memopv32i8, i256mem, 0>, VEX_4V;
}
@@ -6502,8 +6437,10 @@ let Predicates = [HasAVX2] in {
let Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
+ let ExeDomain = SSEPackedSingle in
defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
VR128, memopv16i8, i128mem>;
+ let ExeDomain = SSEPackedDouble in
defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd,
VR128, memopv16i8, i128mem>;
defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw,
@@ -6511,8 +6448,10 @@ let Constraints = "$src1 = $dst" in {
defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
VR128, memopv16i8, i128mem>;
}
+ let ExeDomain = SSEPackedSingle in
defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
VR128, memopv16i8, i128mem>;
+ let ExeDomain = SSEPackedDouble in
defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
VR128, memopv16i8, i128mem>;
}
@@ -6539,16 +6478,20 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
}
let Predicates = [HasAVX] in {
+let ExeDomain = SSEPackedDouble in {
defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem,
memopv16i8, int_x86_sse41_blendvpd>;
-defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem,
- memopv16i8, int_x86_sse41_blendvps>;
-defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
- memopv16i8, int_x86_sse41_pblendvb>;
defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem,
memopv32i8, int_x86_avx_blendv_pd_256>;
+} // ExeDomain = SSEPackedDouble
+let ExeDomain = SSEPackedSingle in {
+defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem,
+ memopv16i8, int_x86_sse41_blendvps>;
defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
memopv32i8, int_x86_avx_blendv_ps_256>;
+} // ExeDomain = SSEPackedSingle
+defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
+ memopv16i8, int_x86_sse41_pblendvb>;
}
let Predicates = [HasAVX2] in {
@@ -6612,7 +6555,9 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
}
}
+let ExeDomain = SSEPackedDouble in
defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
+let ExeDomain = SSEPackedSingle in
defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
@@ -6712,10 +6657,12 @@ let Predicates = [HasAVX2] in {
let Constraints = "$src1 = $dst" in
defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>;
-def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
- (PCMPGTQrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
- (PCMPGTQrm VR128:$src1, addr:$src2)>;
+let Predicates = [HasSSE42] in {
+ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
+ (PCMPGTQrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
+ (PCMPGTQrm VR128:$src1, addr:$src2)>;
+}
//===----------------------------------------------------------------------===//
// SSE4.2 - String/text Processing Instructions
@@ -7164,21 +7111,27 @@ class avx2_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC,
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (Int VR128:$src))]>, VEX;
-def VBROADCASTSSrm : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
- int_x86_avx_vbroadcast_ss>;
-def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
- int_x86_avx_vbroadcast_ss_256>;
+let ExeDomain = SSEPackedSingle in {
+ def VBROADCASTSSrm : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
+ int_x86_avx_vbroadcast_ss>;
+ def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
+ int_x86_avx_vbroadcast_ss_256>;
+}
+let ExeDomain = SSEPackedDouble in
def VBROADCASTSDrm : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
int_x86_avx_vbroadcast_sd_256>;
def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
int_x86_avx_vbroadcastf128_pd_256>;
-def VBROADCASTSSrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR128,
- int_x86_avx2_vbroadcast_ss_ps>;
-def VBROADCASTSSYrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR256,
- int_x86_avx2_vbroadcast_ss_ps_256>;
+let ExeDomain = SSEPackedSingle in {
+ def VBROADCASTSSrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR128,
+ int_x86_avx2_vbroadcast_ss_ps>;
+ def VBROADCASTSSYrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR256,
+ int_x86_avx2_vbroadcast_ss_ps_256>;
+}
+let ExeDomain = SSEPackedDouble in
def VBROADCASTSDrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256,
- int_x86_avx2_vbroadcast_sd_pd_256>;
+ int_x86_avx2_vbroadcast_sd_pd_256>;
let Predicates = [HasAVX2] in
def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem,
@@ -7187,19 +7140,6 @@ def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem,
def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
(VBROADCASTF128 addr:$src)>;
-def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
- (VBROADCASTSSYrm addr:$src)>;
-def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
- (VBROADCASTSDrm addr:$src)>;
-def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
- (VBROADCASTSSYrm addr:$src)>;
-def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
- (VBROADCASTSDrm addr:$src)>;
-
-def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
- (VBROADCASTSSrm addr:$src)>;
-def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
- (VBROADCASTSSrm addr:$src)>;
//===----------------------------------------------------------------------===//
// VINSERTF128 - Insert packed floating-point values
@@ -7300,8 +7240,7 @@ def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
//
multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr,
Intrinsic IntLd, Intrinsic IntLd256,
- Intrinsic IntSt, Intrinsic IntSt256,
- PatFrag pf128, PatFrag pf256> {
+ Intrinsic IntSt, Intrinsic IntSt256> {
def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -7322,18 +7261,18 @@ multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr,
[(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V;
}
+let ExeDomain = SSEPackedSingle in
defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps",
int_x86_avx_maskload_ps,
int_x86_avx_maskload_ps_256,
int_x86_avx_maskstore_ps,
- int_x86_avx_maskstore_ps_256,
- memopv4f32, memopv8f32>;
+ int_x86_avx_maskstore_ps_256>;
+let ExeDomain = SSEPackedDouble in
defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
int_x86_avx_maskload_pd,
int_x86_avx_maskload_pd_256,
int_x86_avx_maskstore_pd,
- int_x86_avx_maskstore_pd_256,
- memopv2f64, memopv4f64>;
+ int_x86_avx_maskstore_pd_256>;
//===----------------------------------------------------------------------===//
// VPERMIL - Permute Single and Double Floating-Point Values
@@ -7361,22 +7300,26 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
[(set RC:$dst, (IntImm (f_frag addr:$src1), imm:$src2))]>, VEX;
}
-defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
- memopv4f32, memopv4i32,
- int_x86_avx_vpermilvar_ps,
- int_x86_avx_vpermil_ps>;
-defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
- memopv8f32, memopv8i32,
- int_x86_avx_vpermilvar_ps_256,
- int_x86_avx_vpermil_ps_256>;
-defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
- memopv2f64, memopv2i64,
- int_x86_avx_vpermilvar_pd,
- int_x86_avx_vpermil_pd>;
-defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
- memopv4f64, memopv4i64,
- int_x86_avx_vpermilvar_pd_256,
- int_x86_avx_vpermil_pd_256>;
+let ExeDomain = SSEPackedSingle in {
+ defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
+ memopv4f32, memopv4i32,
+ int_x86_avx_vpermilvar_ps,
+ int_x86_avx_vpermil_ps>;
+ defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
+ memopv8f32, memopv8i32,
+ int_x86_avx_vpermilvar_ps_256,
+ int_x86_avx_vpermil_ps_256>;
+}
+let ExeDomain = SSEPackedDouble in {
+ defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
+ memopv2f64, memopv2i64,
+ int_x86_avx_vpermilvar_pd,
+ int_x86_avx_vpermil_pd>;
+ defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
+ memopv4f64, memopv4i64,
+ int_x86_avx_vpermilvar_pd_256,
+ int_x86_avx_vpermil_pd_256>;
+}
def : Pat<(v8f32 (X86VPermilpsy VR256:$src1, (i8 imm:$imm))),
(VPERMILPSYri VR256:$src1, imm:$imm)>;
@@ -7549,6 +7492,40 @@ defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64,
int_x86_avx2_pbroadcastq_128,
int_x86_avx2_pbroadcastq_256>;
+let Predicates = [HasAVX2] in {
+ def : Pat<(v16i8 (X86VBroadcast (loadi8 addr:$src))),
+ (VPBROADCASTBrm addr:$src)>;
+ def : Pat<(v32i8 (X86VBroadcast (loadi8 addr:$src))),
+ (VPBROADCASTBYrm addr:$src)>;
+ def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))),
+ (VPBROADCASTWrm addr:$src)>;
+ def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))),
+ (VPBROADCASTWYrm addr:$src)>;
+ def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
+ (VPBROADCASTDrm addr:$src)>;
+ def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
+ (VPBROADCASTDYrm addr:$src)>;
+ def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))),
+ (VPBROADCASTQrm addr:$src)>;
+ def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
+ (VPBROADCASTQYrm addr:$src)>;
+}
+
+// AVX1 broadcast patterns
+def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
+ (VBROADCASTSSYrm addr:$src)>;
+def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
+ (VBROADCASTSDrm addr:$src)>;
+def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
+ (VBROADCASTSSYrm addr:$src)>;
+def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
+ (VBROADCASTSDrm addr:$src)>;
+
+def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
+ (VBROADCASTSSrm addr:$src)>;
+def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
+ (VBROADCASTSSrm addr:$src)>;
+
//===----------------------------------------------------------------------===//
// VPERM - Permute instructions
//
@@ -7569,6 +7546,7 @@ multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
}
defm VPERMD : avx2_perm<0x36, "vpermd", memopv8i32, int_x86_avx2_permd>;
+let ExeDomain = SSEPackedSingle in
defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, int_x86_avx2_permps>;
multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
@@ -7588,6 +7566,7 @@ multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
defm VPERMQ : avx2_perm_imm<0x00, "vpermq", memopv4i64, int_x86_avx2_permq>,
VEX_W;
+let ExeDomain = SSEPackedDouble in
defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, int_x86_avx2_permpd>,
VEX_W;
@@ -7643,8 +7622,7 @@ def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
//
multiclass avx2_pmovmask<string OpcodeStr,
Intrinsic IntLd128, Intrinsic IntLd256,
- Intrinsic IntSt128, Intrinsic IntSt256,
- PatFrag pf128, PatFrag pf256> {
+ Intrinsic IntSt128, Intrinsic IntSt256> {
def rm : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -7667,124 +7645,49 @@ defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd",
int_x86_avx2_maskload_d,
int_x86_avx2_maskload_d_256,
int_x86_avx2_maskstore_d,
- int_x86_avx2_maskstore_d_256,
- memopv4i32, memopv8i32>;
+ int_x86_avx2_maskstore_d_256>;
defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq",
int_x86_avx2_maskload_q,
int_x86_avx2_maskload_q_256,
int_x86_avx2_maskstore_q,
- int_x86_avx2_maskstore_q_256,
- memopv2i64, memopv4i64>, VEX_W;
+ int_x86_avx2_maskstore_q_256>, VEX_W;
//===----------------------------------------------------------------------===//
// Variable Bit Shifts
//
-multiclass avx2_var_shift<bits<8> opc, string OpcodeStr,
- Intrinsic Int128, Intrinsic Int256> {
+multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType vt128, ValueType vt256> {
def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (Int128 VR128:$src1, VR128:$src2))]>, VEX_4V;
- def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
- (Int128 VR128:$src1, (bitconvert (memopv2i64 addr:$src2))))]>,
- VEX_4V;
- def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src2))]>, VEX_4V;
- def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, i256mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst,
- (Int256 VR256:$src1, (bitconvert (memopv4i64 addr:$src2))))]>,
+ (vt128 (OpNode VR128:$src1, (vt128 VR128:$src2))))]>,
VEX_4V;
-}
-
-multiclass avx2_var_shift_i64<bits<8> opc, string OpcodeStr,
- Intrinsic Int128, Intrinsic Int256> {
- def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (Int128 VR128:$src1, VR128:$src2))]>, VEX_4V;
def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
- (Int128 VR128:$src1, (memopv2i64 addr:$src2)))]>,
+ (vt128 (OpNode VR128:$src1,
+ (vt128 (bitconvert (memopv2i64 addr:$src2))))))]>,
VEX_4V;
def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src2))]>, VEX_4V;
+ [(set VR256:$dst,
+ (vt256 (OpNode VR256:$src1, (vt256 VR256:$src2))))]>,
+ VEX_4V;
def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, i256mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
- (Int256 VR256:$src1, (memopv4i64 addr:$src2)))]>,
+ (vt256 (OpNode VR256:$src1,
+ (vt256 (bitconvert (memopv4i64 addr:$src2))))))]>,
VEX_4V;
}
-defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", int_x86_avx2_psllv_d,
- int_x86_avx2_psllv_d_256>;
-defm VPSLLVQ : avx2_var_shift_i64<0x47, "vpsllvq", int_x86_avx2_psllv_q,
- int_x86_avx2_psllv_q_256>, VEX_W;
-defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", int_x86_avx2_psrlv_d,
- int_x86_avx2_psrlv_d_256>;
-defm VPSRLVQ : avx2_var_shift_i64<0x45, "vpsrlvq", int_x86_avx2_psrlv_q,
- int_x86_avx2_psrlv_q_256>, VEX_W;
-defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", int_x86_avx2_psrav_d,
- int_x86_avx2_psrav_d_256>;
-
-let Predicates = [HasAVX2] in {
- def : Pat<(v4i32 (shl (v4i32 VR128:$src1), (v4i32 VR128:$src2))),
- (VPSLLVDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2i64 (shl (v2i64 VR128:$src1), (v2i64 VR128:$src2))),
- (VPSLLVQrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (srl (v4i32 VR128:$src1), (v4i32 VR128:$src2))),
- (VPSRLVDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2i64 (srl (v2i64 VR128:$src1), (v2i64 VR128:$src2))),
- (VPSRLVQrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (sra (v4i32 VR128:$src1), (v4i32 VR128:$src2))),
- (VPSRAVDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v8i32 (shl (v8i32 VR256:$src1), (v8i32 VR256:$src2))),
- (VPSLLVDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4i64 (shl (v4i64 VR256:$src1), (v4i64 VR256:$src2))),
- (VPSLLVQYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (srl (v8i32 VR256:$src1), (v8i32 VR256:$src2))),
- (VPSRLVDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4i64 (srl (v4i64 VR256:$src1), (v4i64 VR256:$src2))),
- (VPSRLVQYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (sra (v8i32 VR256:$src1), (v8i32 VR256:$src2))),
- (VPSRAVDYrr VR256:$src1, VR256:$src2)>;
-
- def : Pat<(v4i32 (shl (v4i32 VR128:$src1),
- (v4i32 (bitconvert (memopv2i64 addr:$src2))))),
- (VPSLLVDrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2i64 (shl (v2i64 VR128:$src1), (memopv2i64 addr:$src2))),
- (VPSLLVQrm VR128:$src1, addr:$src2)>;
- def : Pat<(v4i32 (srl (v4i32 VR128:$src1),
- (v4i32 (bitconvert (memopv2i64 addr:$src2))))),
- (VPSRLVDrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2i64 (srl (v2i64 VR128:$src1), (memopv2i64 addr:$src2))),
- (VPSRLVQrm VR128:$src1, addr:$src2)>;
- def : Pat<(v4i32 (sra (v4i32 VR128:$src1),
- (v4i32 (bitconvert (memopv2i64 addr:$src2))))),
- (VPSRAVDrm VR128:$src1, addr:$src2)>;
- def : Pat<(v8i32 (shl (v8i32 VR256:$src1),
- (v8i32 (bitconvert (memopv4i64 addr:$src2))))),
- (VPSLLVDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v4i64 (shl (v4i64 VR256:$src1), (memopv4i64 addr:$src2))),
- (VPSLLVQYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v8i32 (srl (v8i32 VR256:$src1),
- (v8i32 (bitconvert (memopv4i64 addr:$src2))))),
- (VPSRLVDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v4i64 (srl (v4i64 VR256:$src1), (memopv4i64 addr:$src2))),
- (VPSRLVQYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v8i32 (sra (v8i32 VR256:$src1),
- (v8i32 (bitconvert (memopv4i64 addr:$src2))))),
- (VPSRAVDYrm VR256:$src1, addr:$src2)>;
-}
+defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>;
+defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W;
+defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>;
+defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W;
+defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>;
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 328cf67..81ee665 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -376,6 +376,7 @@ ReSimplify:
case X86::AVX_SET0PSY: LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break;
case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break;
case X86::AVX_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDrr); break;
+ case X86::AVX2_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDYrr);break;
case X86::MOV16r0:
LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 763fb43..e93f8e9 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -190,6 +190,10 @@ public:
bool hasAVX2() const { return HasAVX2; }
bool hasXMM() const { return hasSSE1() || hasAVX(); }
bool hasXMMInt() const { return hasSSE2() || hasAVX(); }
+ bool hasSSE3orAVX() const { return hasSSE3() || hasAVX(); }
+ bool hasSSSE3orAVX() const { return hasSSSE3() || hasAVX(); }
+ bool hasSSE41orAVX() const { return hasSSE41() || hasAVX(); }
+ bool hasSSE42orAVX() const { return hasSSE42() || hasAVX(); }
bool hasAES() const { return HasAES; }
bool hasCLMUL() const { return HasCLMUL; }
bool hasFMA3() const { return HasFMA3; }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 4d4d7c0..1c9f3bd 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -31,8 +31,9 @@ extern "C" void LLVMInitializeX86Target() {
X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : X86TargetMachine(T, TT, CPU, FS, RM, CM, false),
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : X86TargetMachine(T, TT, CPU, FS, RM, CM, OL, false),
DataLayout(getSubtargetImpl()->isTargetDarwin() ?
"e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-"
"n8:16:32-S128" :
@@ -51,8 +52,9 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT,
X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : X86TargetMachine(T, TT, CPU, FS, RM, CM, true),
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : X86TargetMachine(T, TT, CPU, FS, RM, CM, OL, true),
DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
"n8:16:32:64-S128"),
InstrInfo(*this),
@@ -66,8 +68,9 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT,
X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL,
bool is64Bit)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
Subtarget(TT, CPU, FS, StackAlignmentOverride, is64Bit),
FrameLowering(*this, Subtarget),
ELFWriterInfo(is64Bit, true) {
@@ -102,16 +105,15 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT,
static cl::opt<bool>
UseVZeroUpper("x86-use-vzeroupper",
cl::desc("Minimize AVX to SSE transition penalty"),
- cl::init(false));
+ cl::init(true));
//===----------------------------------------------------------------------===//
// Pass Pipeline Configuration
//===----------------------------------------------------------------------===//
-bool X86TargetMachine::addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+bool X86TargetMachine::addInstSelector(PassManagerBase &PM) {
// Install an instruction selector.
- PM.add(createX86ISelDag(*this, OptLevel));
+ PM.add(createX86ISelDag(*this, getOptLevel()));
// For 32-bit, prepend instructions to set the "global base reg" for PIC.
if (!Subtarget.is64Bit())
@@ -120,33 +122,21 @@ bool X86TargetMachine::addInstSelector(PassManagerBase &PM,
return false;
}
-bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM) {
PM.add(createX86MaxStackAlignmentHeuristicPass());
return false; // -print-machineinstr shouldn't print after this.
}
-bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM) {
PM.add(createX86FloatingPointStackifierPass());
return true; // -print-machineinstr should print after this.
}
-bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM) {
bool ShouldPrint = false;
- if (OptLevel != CodeGenOpt::None) {
- if (Subtarget.hasXMMInt()) {
- PM.add(createExecutionDependencyFixPass(&X86::VR128RegClass));
- ShouldPrint = true;
- }
- if (Subtarget.hasAVX2()) {
- // FIXME this should be turned on for just AVX, but the pass doesn't check
- // that instructions are valid before replacing them and there are AVX2
- // integer instructions in the table.
- PM.add(createExecutionDependencyFixPass(&X86::VR256RegClass));
- ShouldPrint = true;
- }
+ if (getOptLevel() != CodeGenOpt::None && Subtarget.hasXMMInt()) {
+ PM.add(createExecutionDependencyFixPass(&X86::VR128RegClass));
+ ShouldPrint = true;
}
if (Subtarget.hasAVX() && UseVZeroUpper) {
@@ -158,7 +148,6 @@ bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
}
bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel,
JITCodeEmitter &JCE) {
PM.add(createX86JITCodeEmitterPass(*this, JCE));
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index d1569aa..64be458 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -40,6 +40,7 @@ public:
X86TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL,
bool is64Bit);
virtual const X86InstrInfo *getInstrInfo() const {
@@ -66,11 +67,11 @@ public:
}
// Set up the pass pipeline.
- virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ virtual bool addInstSelector(PassManagerBase &PM);
+ virtual bool addPreRegAlloc(PassManagerBase &PM);
+ virtual bool addPostRegAlloc(PassManagerBase &PM);
+ virtual bool addPreEmitPass(PassManagerBase &PM);
+ virtual bool addCodeEmitter(PassManagerBase &PM,
JITCodeEmitter &JCE);
};
@@ -85,7 +86,8 @@ class X86_32TargetMachine : public X86TargetMachine {
public:
X86_32TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
virtual const TargetData *getTargetData() const { return &DataLayout; }
virtual const X86TargetLowering *getTargetLowering() const {
return &TLInfo;
@@ -112,7 +114,8 @@ class X86_64TargetMachine : public X86TargetMachine {
public:
X86_64TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
virtual const TargetData *getTargetData() const { return &DataLayout; }
virtual const X86TargetLowering *getTargetLowering() const {
return &TLInfo;
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index 276e841..7d5fcce 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -61,9 +61,10 @@ static MCAsmInfo *createXCoreMCAsmInfo(const Target &T, StringRef TT) {
}
static MCCodeGenInfo *createXCoreMCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM) {
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
- X->InitMCCodeGenInfo(RM, CM);
+ X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index fdc5d35..eec3674 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -21,8 +21,9 @@ using namespace llvm;
///
XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
Subtarget(TT, CPU, FS),
DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-"
"i16:16:32-i32:32:32-i64:32:32-n32"),
@@ -32,8 +33,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
TSInfo(*this) {
}
-bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM) {
PM.add(createXCoreISelDag(*this));
return false;
}
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index 83d09d6..3f2644d 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -34,7 +34,8 @@ class XCoreTargetMachine : public LLVMTargetMachine {
public:
XCoreTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- Reloc::Model RM, CodeModel::Model CM);
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const XCoreFrameLowering *getFrameLowering() const {
@@ -55,7 +56,7 @@ public:
virtual const TargetData *getTargetData() const { return &DataLayout; }
// Pass Pipeline Configuration
- virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addInstSelector(PassManagerBase &PM);
};
} // end namespace llvm