diff options
Diffstat (limited to 'lib/Target')
87 files changed, 2184 insertions, 1513 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 7a7267a..9315348 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -28,7 +28,6 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/BranchProbability.h" @@ -47,7 +46,7 @@ EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, cl::desc("Enable ARM 2-addr to 3-addr conv")); static cl::opt<bool> -WidenVMOVS("widen-vmovs", cl::Hidden, +WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true), cl::desc("Widen ARM vmovs to vmovd when possible")); /// ARM_MLxEntry - Record information about MLA / MLS instructions. @@ -710,8 +709,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Align = MFI.getObjectAlignment(FI); MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo( - PseudoSourceValue::getFixedStack(FI)), + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align); @@ -862,7 +860,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Align = MFI.getObjectAlignment(FI); MachineMemOperand *MMO = MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), + MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align); diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index fb7d96a..fc464ea 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -824,7 +824,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MI.eraseFromParent(); return true; } - case ARM::Int_eh_sjlj_dispatchsetup: { + case ARM::eh_sjlj_dispatchsetup: { MachineFunction &MF = *MI.getParent()->getParent(); const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 4df084f..9bae422 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -37,7 +37,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -197,8 +196,6 @@ class ARMFastISel : public FastISel { // Call handling routines. private: - bool FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, - unsigned &ResultReg); CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return); bool ProcessCallArgs(SmallVectorImpl<Value*> &Args, SmallVectorImpl<unsigned> &ArgRegs, @@ -687,6 +684,8 @@ unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) { return 0; } +// TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF); + unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { // Don't handle dynamic allocas. if (!FuncInfo.StaticAllocaMap.count(AI)) return 0; @@ -1115,7 +1114,7 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) { // Create the base instruction, then add the operands. MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(StrOpc)) - .addReg(SrcReg, getKillRegState(true)); + .addReg(SrcReg); AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore, useAM3); return true; } @@ -1304,6 +1303,8 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, int Imm = 0; bool UseImm = false; bool isNegativeImm = false; + // FIXME: At -O0 we don't have anything that canonicalizes operand order. + // Thus, Src1Value may be a ConstantInt, but we're missing it. if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) { if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { @@ -1669,12 +1670,6 @@ bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) { if (isFloat && !Subtarget->hasVFP2()) return false; - unsigned Op1 = getRegForValue(I->getOperand(0)); - if (Op1 == 0) return false; - - unsigned Op2 = getRegForValue(I->getOperand(1)); - if (Op2 == 0) return false; - unsigned Opc; bool is64bit = VT == MVT::f64 || VT == MVT::i64; switch (ISDOpcode) { @@ -1689,6 +1684,12 @@ bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) { Opc = is64bit ? ARM::VMULD : ARM::VMULS; break; } + unsigned Op1 = getRegForValue(I->getOperand(0)); + if (Op1 == 0) return false; + + unsigned Op2 = getRegForValue(I->getOperand(1)); + if (Op2 == 0) return false; + unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) @@ -1699,18 +1700,6 @@ bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) { // Call Handling Code -bool ARMFastISel::FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, - EVT SrcVT, unsigned &ResultReg) { - unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, - Src, /*TODO: Kill=*/false); - - if (RR != 0) { - ResultReg = RR; - return true; - } else - return false; -} - // This is largely taken directly from CCAssignFnForNode - we don't support // varargs in FastISel so that part has been removed. // TODO: We may not support all of this. @@ -2119,9 +2108,6 @@ bool ARMFastISel::SelectCall(const Instruction *I, if (IntrMemName && e-i <= 2) break; - unsigned Arg = getRegForValue(*i); - if (Arg == 0) - return false; ISD::ArgFlagsTy Flags; unsigned AttrInd = i - CS.arg_begin() + 1; if (CS.paramHasAttr(AttrInd, Attribute::SExt)) @@ -2141,6 +2127,11 @@ bool ARMFastISel::SelectCall(const Instruction *I, if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8 && ArgVT != MVT::i1) return false; + + unsigned Arg = getRegForValue(*i); + if (Arg == 0) + return false; + unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); Flags.setOrigAlign(OriginalAlignment); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index b55ef70..8c4c06f 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -40,7 +40,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/Target/TargetOptions.h" @@ -687,7 +686,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (Subtarget->isTargetDarwin()) { setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); - setOperationAction(ISD::EH_SJLJ_DISPATCHSETUP, MVT::Other, Custom); setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); } @@ -864,7 +862,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; - case ARMISD::EH_SJLJ_DISPATCHSETUP:return "ARMISD::EH_SJLJ_DISPATCHSETUP"; case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; @@ -912,6 +909,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM"; case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM"; + case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM"; case ARMISD::VDUP: return "ARMISD::VDUP"; case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; case ARMISD::VEXT: return "ARMISD::VEXT"; @@ -2212,14 +2210,6 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, } SDValue -ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG) - const { - DebugLoc dl = Op.getDebugLoc(); - return DAG.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other, - Op.getOperand(0), Op.getOperand(1)); -} - -SDValue ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); SDValue Val = DAG.getConstant(0, MVT::i32); @@ -3986,6 +3976,16 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); } + + // Use vmov.f32 to materialize other v2f32 and v4f32 splats. + if (VT == MVT::v2f32 || VT == MVT::v4f32) { + ConstantFPSDNode *C = cast<ConstantFPSDNode>(Op.getOperand(0)); + int ImmVal = ARM_AM::getFP32Imm(C->getValueAPF()); + if (ImmVal != -1) { + SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32); + return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val); + } + } } } @@ -5014,7 +5014,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); - case ISD::EH_SJLJ_DISPATCHSETUP: return LowerEH_SJLJ_DISPATCHSETUP(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, Subtarget); case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG); @@ -5556,52 +5555,6 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, return BB; } -/// EmitBasePointerRecalculation - For functions using a base pointer, we -/// rematerialize it (via the frame pointer). -void ARMTargetLowering:: -EmitBasePointerRecalculation(MachineInstr *MI, MachineBasicBlock *MBB, - MachineBasicBlock *DispatchBB) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); - MachineFunction &MF = *MI->getParent()->getParent(); - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); - - if (!RI.hasBasePointer(MF)) return; - - MachineBasicBlock::iterator MBBI = MI; - - int32_t NumBytes = AFI->getFramePtrSpillOffset(); - unsigned FramePtr = RI.getFrameRegister(MF); - assert(MF.getTarget().getFrameLowering()->hasFP(MF) && - "Base pointer without frame pointer?"); - - if (AFI->isThumb2Function()) - llvm::emitT2RegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6, - FramePtr, -NumBytes, ARMCC::AL, 0, *AII); - else if (AFI->isThumbFunction()) - llvm::emitThumbRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6, - FramePtr, -NumBytes, *AII, RI); - else - llvm::emitARMRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6, - FramePtr, -NumBytes, ARMCC::AL, 0, *AII); - - if (!RI.needsStackRealignment(MF)) return; - - // If there's dynamic realignment, adjust for it. - MachineFrameInfo *MFI = MF.getFrameInfo(); - unsigned MaxAlign = MFI->getMaxAlignment(); - assert(!AFI->isThumb1OnlyFunction()); - - // Emit bic r6, r6, MaxAlign - unsigned bicOpc = AFI->isThumbFunction() ? ARM::t2BICri : ARM::BICri; - AddDefaultCC( - AddDefaultPred( - BuildMI(*MBB, MBBI, MI->getDebugLoc(), TII->get(bicOpc), ARM::R6) - .addReg(ARM::R6, RegState::Kill) - .addImm(MaxAlign - 1))); -} - /// SetupEntryBlockForSjLj - Insert code into the entry block that creates and /// registers the function context. void ARMTargetLowering:: @@ -5636,8 +5589,6 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOStore, 4, 4); - EmitBasePointerRecalculation(MI, MBB, DispatchBB); - // Load the address of the dispatch MBB into the jump buffer. if (isThumb2) { // Incoming value: jbuf @@ -5811,6 +5762,8 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 4, 4); + BuildMI(DispatchBB, dl, TII->get(ARM::eh_sjlj_dispatchsetup)); + unsigned NumLPads = LPadList.size(); if (Subtarget->isThumb2()) { unsigned NewVReg1 = MRI->createVirtualRegister(TRC); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index be6a530..b8dc4bf 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -81,7 +81,6 @@ namespace llvm { EH_SJLJ_SETJMP, // SjLj exception handling setjmp. EH_SJLJ_LONGJMP, // SjLj exception handling longjmp. - EH_SJLJ_DISPATCHSETUP, // SjLj exception handling dispatch setup. TC_RETURN, // Tail call return pseudo. @@ -146,6 +145,9 @@ namespace llvm { VMOVIMM, VMVNIMM, + // Vector move f32 immediate: + VMOVFPIMM, + // Vector duplicate: VDUP, VDUPLANE, @@ -407,7 +409,6 @@ namespace llvm { ISD::ArgFlagsTy Flags) const; SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; @@ -517,9 +518,6 @@ namespace llvm { bool signExtend, ARMCC::CondCodes Cond) const; - void EmitBasePointerRecalculation(MachineInstr *MI, MachineBasicBlock *MBB, - MachineBasicBlock *DispatchBB) const; - void SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, MachineBasicBlock *DispatchBB, int FI) const; diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 06ee2c8..6940156 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -2041,9 +2041,26 @@ multiclass VFPDT64InstAlias<string opc, string asm, dag Result> { def _64 : VFPDataTypeInstAlias<opc, ".64", asm, Result>; defm : VFPDT64ReqInstAlias<opc, asm, Result>; } +multiclass VFPDT64NoF64ReqInstAlias<string opc, string asm, dag Result> { + def I64 : VFPDataTypeInstAlias<opc, ".i64", asm, Result>; + def S64 : VFPDataTypeInstAlias<opc, ".s64", asm, Result>; + def U64 : VFPDataTypeInstAlias<opc, ".u64", asm, Result>; + def D : VFPDataTypeInstAlias<opc, ".d", asm, Result>; +} +// VFPDT64ReqInstAlias plus plain ".64" +multiclass VFPDT64NoF64InstAlias<string opc, string asm, dag Result> { + def _64 : VFPDataTypeInstAlias<opc, ".64", asm, Result>; + defm : VFPDT64ReqInstAlias<opc, asm, Result>; +} multiclass VFPDTAnyInstAlias<string opc, string asm, dag Result> { defm : VFPDT8InstAlias<opc, asm, Result>; defm : VFPDT16InstAlias<opc, asm, Result>; defm : VFPDT32InstAlias<opc, asm, Result>; defm : VFPDT64InstAlias<opc, asm, Result>; } +multiclass VFPDTAnyNoF64InstAlias<string opc, string asm, dag Result> { + defm : VFPDT8InstAlias<opc, asm, Result>; + defm : VFPDT16InstAlias<opc, asm, Result>; + defm : VFPDT32InstAlias<opc, asm, Result>; + defm : VFPDT64NoF64InstAlias<opc, asm, Result>; +} diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 770703c..be03924 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -58,8 +58,6 @@ def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>; def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>; -def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 1, [SDTCisInt<0>]>; - def SDT_ARMMEMBARRIER : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_ARMPREFETCH : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisSameAs<1, 2>, @@ -143,9 +141,6 @@ def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>; def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP", SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>; -def ARMeh_sjlj_dispatchsetup: SDNode<"ARMISD::EH_SJLJ_DISPATCHSETUP", - SDT_ARMEH_SJLJ_DispatchSetup, [SDNPHasChain]>; - def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER, [SDNPHasChain]>; @@ -475,6 +470,7 @@ def shift_so_reg_reg : Operand<i32>, // reg reg imm let EncoderMethod = "getSORegRegOpValue"; let PrintMethod = "printSORegRegOperand"; let DecoderMethod = "DecodeSORegRegOperand"; + let ParserMatchClass = ShiftedRegAsmOperand; let MIOperandInfo = (ops GPR, GPR, i32imm); } @@ -485,6 +481,7 @@ def shift_so_reg_imm : Operand<i32>, // reg reg imm let EncoderMethod = "getSORegImmOpValue"; let PrintMethod = "printSORegImmOperand"; let DecoderMethod = "DecodeSORegImmOperand"; + let ParserMatchClass = ShiftedImmAsmOperand; let MIOperandInfo = (ops GPR, i32imm); } @@ -1555,7 +1552,7 @@ PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary, } // Atomic pseudo-insts which will be lowered to ldrexd/strexd loops. -// (These psuedos use a hand-written selection code). +// (These pseudos use a hand-written selection code). let usesCustomInserter = 1, Defs = [CPSR], mayLoad = 1, mayStore = 1 in { def ATOMOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), (ins GPR:$addr, GPR:$src1, GPR:$src2), @@ -4673,11 +4670,8 @@ def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch), // This pseudo is used for ARM, Thumb1 and Thumb2. Any differences are // handled when the pseudo is expanded (which happens before any passes // that need the instruction size). -let isBarrier = 1, hasSideEffects = 1 in -def Int_eh_sjlj_dispatchsetup : - PseudoInst<(outs), (ins GPR:$src), NoItinerary, - [(ARMeh_sjlj_dispatchsetup GPR:$src)]>, - Requires<[IsDarwin]>; +let isBarrier = 1 in +def eh_sjlj_dispatchsetup : PseudoInst<(outs), (ins), NoItinerary, []>; //===----------------------------------------------------------------------===// // Non-Instruction Patterns @@ -5023,3 +5017,43 @@ def LSLi : ARMAsmPseudo<"lsl${s}${p} $Rd, $Rm, $imm", def RORi : ARMAsmPseudo<"ror${s}${p} $Rd, $Rm, $imm", (ins GPR:$Rd, GPR:$Rm, imm0_31:$imm, pred:$p, cc_out:$s)>; +def RRXi : ARMAsmPseudo<"rrx${s}${p} $Rd, $Rm", + (ins GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, cc_out:$s)>; +def ASRr : ARMAsmPseudo<"asr${s}${p} $Rd, $Rn, $Rm", + (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, + cc_out:$s)>; +def LSRr : ARMAsmPseudo<"lsr${s}${p} $Rd, $Rn, $Rm", + (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, + cc_out:$s)>; +def LSLr : ARMAsmPseudo<"lsl${s}${p} $Rd, $Rn, $Rm", + (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, + cc_out:$s)>; +def RORr : ARMAsmPseudo<"ror${s}${p} $Rd, $Rn, $Rm", + (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, + cc_out:$s)>; +// shifter instructions also support a two-operand form. +def : ARMInstAlias<"asr${s}${p} $Rm, $imm", + (ASRi GPR:$Rm, GPR:$Rm, imm0_32:$imm, pred:$p, cc_out:$s)>; +def : ARMInstAlias<"lsr${s}${p} $Rm, $imm", + (LSRi GPR:$Rm, GPR:$Rm, imm0_32:$imm, pred:$p, cc_out:$s)>; +def : ARMInstAlias<"lsl${s}${p} $Rm, $imm", + (LSLi GPR:$Rm, GPR:$Rm, imm0_31:$imm, pred:$p, cc_out:$s)>; +def : ARMInstAlias<"ror${s}${p} $Rm, $imm", + (RORi GPR:$Rm, GPR:$Rm, imm0_31:$imm, pred:$p, cc_out:$s)>; +def : ARMInstAlias<"asr${s}${p} $Rn, $Rm", + (ASRr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, + cc_out:$s)>; +def : ARMInstAlias<"lsr${s}${p} $Rn, $Rm", + (LSRr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, + cc_out:$s)>; +def : ARMInstAlias<"lsl${s}${p} $Rn, $Rm", + (LSLr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, + cc_out:$s)>; +def : ARMInstAlias<"ror${s}${p} $Rn, $Rm", + (RORr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, + cc_out:$s)>; + + +// 'mul' instruction can be specified with only two operands. +def : ARMInstAlias<"mul${s}${p} $Rn, $Rm", + (MUL rGPR:$Rn, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 49cc254..f2ca963 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -39,6 +39,10 @@ def nImmVMOVI32 : Operand<i32> { let PrintMethod = "printNEONModImmOperand"; let ParserMatchClass = nImmVMOVI32AsmOperand; } +def nImmVMOVF32 : Operand<i32> { + let PrintMethod = "printFPImmOperand"; + let ParserMatchClass = FPImmOperand; +} def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } def nImmSplatI64 : Operand<i32> { let PrintMethod = "printNEONModImmOperand"; @@ -173,6 +177,7 @@ def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; +def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>; def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; @@ -4464,6 +4469,10 @@ def : InstAlias<"vmov${p} $Vd, $Vm", (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; def : InstAlias<"vmov${p} $Vd, $Vm", (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; +defm : VFPDTAnyNoF64InstAlias<"vmov${p}", "$Vd, $Vm", + (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; +defm : VFPDTAnyNoF64InstAlias<"vmov${p}", "$Vd, $Vm", + (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; // VMOV : Vector Move (Immediate) @@ -4513,6 +4522,15 @@ def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd), (ins nImmSplatI64:$SIMM), IIC_VMOVImm, "vmov", "i64", "$Vd, $SIMM", "", [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>; + +def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd), + (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, + "vmov", "f32", "$Vd, $SIMM", "", + [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>; +def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), + (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, + "vmov", "f32", "$Vd, $SIMM", "", + [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>; } // isReMaterializable // VMOV : Vector Get Lane (move scalar to ARM core register) @@ -4801,6 +4819,7 @@ def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", v4f32, v4i32, uint_to_fp>; // VCVT : Vector Convert Between Floating-Point and Fixed-Point. +let DecoderMethod = "DecodeVCVTD" in { def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", @@ -4809,7 +4828,9 @@ def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; +} +let DecoderMethod = "DecodeVCVTQ" in { def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", @@ -4818,6 +4839,7 @@ def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; +} // VCVT : Vector Convert Between Half-Precision and Single-Precision. def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, @@ -5218,6 +5240,19 @@ defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", (VLD1d32wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", (VLD1d64wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; +// with writeback, register stride +defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1d8wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn, + rGPR:$Rm, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1d16wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn, + rGPR:$Rm, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1d32wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn, + rGPR:$Rm, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1d64wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn, + rGPR:$Rm, pred:$p)>; // Load two D registers. defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn", @@ -5237,6 +5272,19 @@ defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", (VLD1q32wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", (VLD1q64wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; +// with writeback, register stride +defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1q8wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, + rGPR:$Rm, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1q16wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, + rGPR:$Rm, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1q32wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, + rGPR:$Rm, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1q64wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, + rGPR:$Rm, pred:$p)>; // Load three D registers. defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn", @@ -5260,6 +5308,19 @@ defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", (VLD1d64Twb_fixed VecListThreeD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; +// with writeback, register stride +defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1d8Twb_register VecListThreeD:$Vd, zero_reg, + addrmode6:$Rn, rGPR:$Rm, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1d16Twb_register VecListThreeD:$Vd, zero_reg, + addrmode6:$Rn, rGPR:$Rm, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1d32Twb_register VecListThreeD:$Vd, zero_reg, + addrmode6:$Rn, rGPR:$Rm, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1d64Twb_register VecListThreeD:$Vd, zero_reg, + addrmode6:$Rn, rGPR:$Rm, pred:$p)>; // Load four D registers. @@ -5284,6 +5345,19 @@ defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", (VLD1d64Qwb_fixed VecListFourD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; +// with writeback, register stride +defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1d8Qwb_register VecListFourD:$Vd, zero_reg, + addrmode6:$Rn, rGPR:$Rm, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1d16Qwb_register VecListFourD:$Vd, zero_reg, + addrmode6:$Rn, rGPR:$Rm, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1d32Qwb_register VecListFourD:$Vd, zero_reg, + addrmode6:$Rn, rGPR:$Rm, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1d64Qwb_register VecListFourD:$Vd, zero_reg, + addrmode6:$Rn, rGPR:$Rm, pred:$p)>; // VST1 requires a size suffix, but also accepts type specific variants. // Store one D register. @@ -5304,6 +5378,19 @@ defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn!", (VST1d32wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn!", (VST1d64wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; +// with writeback, register stride +defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", + (VST1d8wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm, + VecListOneD:$Vd, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", + (VST1d16wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm, + VecListOneD:$Vd, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", + (VST1d32wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm, + VecListOneD:$Vd, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", + (VST1d64wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm, + VecListOneD:$Vd, pred:$p)>; // Store two D registers. defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn", @@ -5323,6 +5410,19 @@ defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn!", (VST1q32wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn!", (VST1q64wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; +// with writeback, register stride +defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", + (VST1q8wb_register zero_reg, addrmode6:$Rn, + rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", + (VST1q16wb_register zero_reg, addrmode6:$Rn, + rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", + (VST1q32wb_register zero_reg, addrmode6:$Rn, + rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", + (VST1q64wb_register zero_reg, addrmode6:$Rn, + rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>; // FIXME: The three and four register VST1 instructions haven't been moved // to the VecList* encoding yet, so we can't do assembly parsing support @@ -5346,3 +5446,19 @@ defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn!", // (VST1d32Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>; //defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn", // (VST1d64Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>; + + +// VTRN instructions data type suffix aliases for more-specific types. +defm : VFPDT8ReqInstAlias <"vtrn${p}", "$Dd, $Dm", + (VTRNd8 DPR:$Dd, DPR:$Dm, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vtrn${p}", "$Dd, $Dm", + (VTRNd16 DPR:$Dd, DPR:$Dm, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vtrn${p}", "$Dd, $Dm", + (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; + +defm : VFPDT8ReqInstAlias <"vtrn${p}", "$Qd, $Qm", + (VTRNq8 QPR:$Qd, QPR:$Qm, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vtrn${p}", "$Qd, $Qm", + (VTRNq16 QPR:$Qd, QPR:$Qm, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vtrn${p}", "$Qd, $Qm", + (VTRNq32 QPR:$Qd, QPR:$Qm, pred:$p)>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 03077c0..6129fa3 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -4084,3 +4084,8 @@ def : t2InstAlias<"sxth${p} $Rd, $Rm$rot", // for isel. def : t2InstAlias<"mov${p} $Rd, $imm", (t2MVNi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>; + + +// Wide 'mul' encoding can be specified with only two operands. +def : t2InstAlias<"mul${p} $Rn, $Rm", + (t2MUL rGPR:$Rn, rGPR:$Rn, rGPR:$Rm, pred:$p)>; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 488c508..e420135 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -1172,3 +1172,35 @@ defm : VFPDT64InstAlias<"vldr${p}", "$Dd, $addr", (VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>; defm : VFPDT64InstAlias<"vstr${p}", "$Dd, $addr", (VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>; + +// VMUL has a two-operand form (implied destination operand) +def : VFP2InstAlias<"vmul${p}.f64 $Dn, $Dm", + (VMULD DPR:$Dn, DPR:$Dn, DPR:$Dm, pred:$p)>; +def : VFP2InstAlias<"vmul${p}.f32 $Sn, $Sm", + (VMULS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>; +// VADD has a two-operand form (implied destination operand) +def : VFP2InstAlias<"vadd${p}.f64 $Dn, $Dm", + (VADDD DPR:$Dn, DPR:$Dn, DPR:$Dm, pred:$p)>; +def : VFP2InstAlias<"vadd${p}.f32 $Sn, $Sm", + (VADDS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>; +// VSUB has a two-operand form (implied destination operand) +def : VFP2InstAlias<"vsub${p}.f64 $Dn, $Dm", + (VSUBD DPR:$Dn, DPR:$Dn, DPR:$Dm, pred:$p)>; +def : VFP2InstAlias<"vsub${p}.f32 $Sn, $Sm", + (VSUBS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>; + +// VMOV can accept optional .f32/.f64 suffix. +def : VFP2InstAlias<"vmov${p}.f32 $Rt, $Sn", + (VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>; +def : VFP2InstAlias<"vmov${p}.f32 $Sn, $Rt", + (VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>; + +def : VFP2InstAlias<"vmov${p}.f64 $Rt, $Rt2, $Dn", + (VMOVRRD GPR:$Rt, GPR:$Rt2, DPR:$Dn, pred:$p)>; +def : VFP2InstAlias<"vmov${p}.f64 $Dn, $Rt, $Rt2", + (VMOVDRR DPR:$Dn, GPR:$Rt, GPR:$Rt2, pred:$p)>; + +// VMOVS doesn't need the .f32 to disambiguate from the NEON encoding the way +// VMOVD does. +def : VFP2InstAlias<"vmov${p} $Sd, $Sm", + (VMOVS SPR:$Sd, SPR:$Sm, pred:$p)>; diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index cf1432d..6cbb24b 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -38,8 +38,9 @@ extern "C" void LLVMInitializeARMTarget() { /// ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), Subtarget(TT, CPU, FS), JITInfo(), InstrItins(Subtarget.getInstrItineraryData()) { @@ -50,8 +51,9 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM), InstrInfo(Subtarget), + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM, OL), InstrInfo(Subtarget), DataLayout(Subtarget.isAPCS_ABI() ? std::string("e-p:32:32-f64:32:64-i64:32:64-" "v128:32:128-v64:32:64-n32-S32") : @@ -71,8 +73,9 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM), + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM, OL), InstrInfo(Subtarget.hasThumb2() ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget)) : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))), @@ -95,34 +98,30 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) { } -bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { - if (OptLevel != CodeGenOpt::None && EnableGlobalMerge) +bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM) { + if (getOptLevel() != CodeGenOpt::None && EnableGlobalMerge) PM.add(createGlobalMergePass(getTargetLowering())); return false; } -bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { - PM.add(createARMISelDag(*this, OptLevel)); +bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM) { + PM.add(createARMISelDag(*this, getOptLevel())); return false; } -bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM) { // FIXME: temporarily disabling load / store optimization pass for Thumb1. - if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only()) + if (getOptLevel() != CodeGenOpt::None && !Subtarget.isThumb1Only()) PM.add(createARMLoadStoreOptimizationPass(true)); - if (OptLevel != CodeGenOpt::None && Subtarget.isCortexA9()) + if (getOptLevel() != CodeGenOpt::None && Subtarget.isCortexA9()) PM.add(createMLxExpansionPass()); return true; } -bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM) { // FIXME: temporarily disabling load / store optimization pass for Thumb1. - if (OptLevel != CodeGenOpt::None) { + if (getOptLevel() != CodeGenOpt::None) { if (!Subtarget.isThumb1Only()) PM.add(createARMLoadStoreOptimizationPass()); if (Subtarget.hasNEON()) @@ -133,7 +132,7 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM, // proper scheduling. PM.add(createARMExpandPseudoPass()); - if (OptLevel != CodeGenOpt::None) { + if (getOptLevel() != CodeGenOpt::None) { if (!Subtarget.isThumb1Only()) PM.add(createIfConverterPass()); } @@ -143,8 +142,7 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM, return true; } -bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM) { if (Subtarget.isThumb2() && !Subtarget.prefers32BitThumb()) PM.add(createThumb2SizeReductionPass()); @@ -153,7 +151,6 @@ bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM, } bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, JITCodeEmitter &JCE) { // Machine code emitter pass for ARM. PM.add(createARMJITCodeEmitterPass(*this, JCE)); diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index c8c601c..a1f517b 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -41,7 +41,8 @@ private: public: ARMBaseTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); virtual ARMJITInfo *getJITInfo() { return &JITInfo; } virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; } @@ -50,13 +51,12 @@ public: } // Pass Pipeline Configuration - virtual bool addPreISel(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addPreSched2(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - JITCodeEmitter &MCE); + virtual bool addPreISel(PassManagerBase &PM); + virtual bool addInstSelector(PassManagerBase &PM); + virtual bool addPreRegAlloc(PassManagerBase &PM); + virtual bool addPreSched2(PassManagerBase &PM); + virtual bool addPreEmitPass(PassManagerBase &PM); + virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &MCE); }; /// ARMTargetMachine - ARM target machine. @@ -71,7 +71,8 @@ class ARMTargetMachine : public ARMBaseTargetMachine { public: ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); virtual const ARMRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); @@ -111,7 +112,8 @@ class ThumbTargetMachine : public ARMBaseTargetMachine { public: ThumbTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); /// returns either Thumb1RegisterInfo or Thumb2RegisterInfo virtual const ARMBaseRegisterInfo *getRegisterInfo() const { diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 1d66d12..bb83e5e 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -1946,18 +1946,15 @@ void ARMOperand::print(raw_ostream &OS) const { break; case k_ShiftedRegister: OS << "<so_reg_reg " - << RegShiftedReg.SrcReg - << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(RegShiftedReg.ShiftImm)) - << ", " << RegShiftedReg.ShiftReg << ", " - << ARM_AM::getSORegOffset(RegShiftedReg.ShiftImm) - << ">"; + << RegShiftedReg.SrcReg << " " + << ARM_AM::getShiftOpcStr(RegShiftedReg.ShiftTy) + << " " << RegShiftedReg.ShiftReg << ">"; break; case k_ShiftedImmediate: OS << "<so_reg_imm " - << RegShiftedImm.SrcReg - << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(RegShiftedImm.ShiftImm)) - << ", " << ARM_AM::getSORegOffset(RegShiftedImm.ShiftImm) - << ">"; + << RegShiftedImm.SrcReg << " " + << ARM_AM::getShiftOpcStr(RegShiftedImm.ShiftTy) + << " #" << RegShiftedImm.ShiftImm << ">"; break; case k_RotateImmediate: OS << "<ror " << " #" << (RotImm.Imm * 8) << ">"; @@ -2366,7 +2363,7 @@ static unsigned getDRegFromQReg(unsigned QReg) { case ARM::Q6: return ARM::D12; case ARM::Q7: return ARM::D14; case ARM::Q8: return ARM::D16; - case ARM::Q9: return ARM::D19; + case ARM::Q9: return ARM::D18; case ARM::Q10: return ARM::D20; case ARM::Q11: return ARM::D22; case ARM::Q12: return ARM::D24; @@ -2420,7 +2417,7 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { while (Parser.getTok().is(AsmToken::Comma) || Parser.getTok().is(AsmToken::Minus)) { if (Parser.getTok().is(AsmToken::Minus)) { - Parser.Lex(); // Eat the comma. + Parser.Lex(); // Eat the minus. SMLoc EndLoc = Parser.getTok().getLoc(); int EndReg = tryParseRegister(); if (EndReg == -1) @@ -2487,10 +2484,31 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // parse a vector register list ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - if(Parser.getTok().isNot(AsmToken::LCurly)) + SMLoc S = Parser.getTok().getLoc(); + // As an extension (to match gas), support a plain D register or Q register + // (without encosing curly braces) as a single or double entry list, + // respectively. + if (Parser.getTok().is(AsmToken::Identifier)) { + int Reg = tryParseRegister(); + if (Reg == -1) + return MatchOperand_NoMatch; + SMLoc E = Parser.getTok().getLoc(); + if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) { + Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, S, E)); + return MatchOperand_Success; + } + if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { + Reg = getDRegFromQReg(Reg); + Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, S, E)); + return MatchOperand_Success; + } + Error(S, "vector register expected"); + return MatchOperand_ParseFail; + } + + if (Parser.getTok().isNot(AsmToken::LCurly)) return MatchOperand_NoMatch; - SMLoc S = Parser.getTok().getLoc(); Parser.Lex(); // Eat '{' token. SMLoc RegLoc = Parser.getTok().getLoc(); @@ -2509,7 +2527,39 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { ++Count; } - while (Parser.getTok().is(AsmToken::Comma)) { + while (Parser.getTok().is(AsmToken::Comma) || + Parser.getTok().is(AsmToken::Minus)) { + if (Parser.getTok().is(AsmToken::Minus)) { + Parser.Lex(); // Eat the minus. + SMLoc EndLoc = Parser.getTok().getLoc(); + int EndReg = tryParseRegister(); + if (EndReg == -1) { + Error(EndLoc, "register expected"); + return MatchOperand_ParseFail; + } + // Allow Q regs and just interpret them as the two D sub-registers. + if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg)) + EndReg = getDRegFromQReg(EndReg) + 1; + // If the register is the same as the start reg, there's nothing + // more to do. + if (Reg == EndReg) + continue; + // The register must be in the same register class as the first. + if (!ARMMCRegisterClasses[ARM::DPRRegClassID].contains(EndReg)) { + Error(EndLoc, "invalid register in register list"); + return MatchOperand_ParseFail; + } + // Ranges must go from low to high. + if (Reg > EndReg) { + Error(EndLoc, "bad range in register list"); + return MatchOperand_ParseFail; + } + + // Add all the registers in the range to the register list. + Count += EndReg - Reg; + Reg = EndReg; + continue; + } Parser.Lex(); // Eat the comma. RegLoc = Parser.getTok().getLoc(); int OldReg = Reg; @@ -3538,9 +3588,12 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } // If we have a '#', it's an immediate offset, else assume it's a register - // offset. - if (Parser.getTok().is(AsmToken::Hash)) { - Parser.Lex(); // Eat the '#'. + // offset. Be friendly and also accept a plain integer (without a leading + // hash) for gas compatibility. + if (Parser.getTok().is(AsmToken::Hash) || + Parser.getTok().is(AsmToken::Integer)) { + if (Parser.getTok().is(AsmToken::Hash)) + Parser.Lex(); // Eat the '#'. E = Parser.getTok().getLoc(); bool isNegative = getParser().getTok().is(AsmToken::Minus); @@ -4098,6 +4151,7 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, // remove the cc_out operand. (!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) || !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) || + !isARMLowRegister(static_cast<ARMOperand*>(Operands[5])->getReg()) || !inITBlock() || (static_cast<ARMOperand*>(Operands[3])->getReg() != static_cast<ARMOperand*>(Operands[5])->getReg() && @@ -4105,6 +4159,20 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, static_cast<ARMOperand*>(Operands[4])->getReg()))) return true; + // Also check the 'mul' syntax variant that doesn't specify an explicit + // destination register. + if (isThumbTwo() && Mnemonic == "mul" && Operands.size() == 5 && + static_cast<ARMOperand*>(Operands[1])->getReg() == 0 && + static_cast<ARMOperand*>(Operands[3])->isReg() && + static_cast<ARMOperand*>(Operands[4])->isReg() && + // If the registers aren't low regs or the cc_out operand is zero + // outside of an IT block, we have to use the 32-bit encoding, so + // remove the cc_out operand. + (!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) || + !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) || + !inITBlock())) + return true; + // Register-register 'add/sub' for thumb does not have a cc_out operand @@ -4542,12 +4610,37 @@ processInstruction(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { switch (Inst.getOpcode()) { // Handle the MOV complex aliases. + case ARM::ASRr: + case ARM::LSRr: + case ARM::LSLr: + case ARM::RORr: { + ARM_AM::ShiftOpc ShiftTy; + switch(Inst.getOpcode()) { + default: llvm_unreachable("unexpected opcode!"); + case ARM::ASRr: ShiftTy = ARM_AM::asr; break; + case ARM::LSRr: ShiftTy = ARM_AM::lsr; break; + case ARM::LSLr: ShiftTy = ARM_AM::lsl; break; + case ARM::RORr: ShiftTy = ARM_AM::ror; break; + } + // A shift by zero is a plain MOVr, not a MOVsi. + unsigned Shifter = ARM_AM::getSORegOpc(ShiftTy, 0); + MCInst TmpInst; + TmpInst.setOpcode(ARM::MOVsr); + TmpInst.addOperand(Inst.getOperand(0)); // Rd + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // Rm + TmpInst.addOperand(MCOperand::CreateImm(Shifter)); // Shift value and ty + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + TmpInst.addOperand(Inst.getOperand(5)); // cc_out + Inst = TmpInst; + return true; + } case ARM::ASRi: case ARM::LSRi: case ARM::LSLi: case ARM::RORi: { ARM_AM::ShiftOpc ShiftTy; - unsigned Amt = Inst.getOperand(2).getImm(); switch(Inst.getOpcode()) { default: llvm_unreachable("unexpected opcode!"); case ARM::ASRi: ShiftTy = ARM_AM::asr; break; @@ -4556,6 +4649,7 @@ processInstruction(MCInst &Inst, case ARM::RORi: ShiftTy = ARM_AM::ror; break; } // A shift by zero is a plain MOVr, not a MOVsi. + unsigned Amt = Inst.getOperand(2).getImm(); unsigned Opc = Amt == 0 ? ARM::MOVr : ARM::MOVsi; unsigned Shifter = ARM_AM::getSORegOpc(ShiftTy, Amt); MCInst TmpInst; @@ -4570,6 +4664,19 @@ processInstruction(MCInst &Inst, Inst = TmpInst; return true; } + case ARM::RRXi: { + unsigned Shifter = ARM_AM::getSORegOpc(ARM_AM::rrx, 0); + MCInst TmpInst; + TmpInst.setOpcode(ARM::MOVsi); + TmpInst.addOperand(Inst.getOperand(0)); // Rd + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(MCOperand::CreateImm(Shifter)); // Shift value and ty + TmpInst.addOperand(Inst.getOperand(2)); // CondCode + TmpInst.addOperand(Inst.getOperand(3)); + TmpInst.addOperand(Inst.getOperand(4)); // cc_out + Inst = TmpInst; + return true; + } case ARM::t2LDMIA_UPD: { // If this is a load of a single register, then we should use // a post-indexed LDR instruction instead, per the ARM ARM. diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index baa55f2..511932e 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -62,8 +62,8 @@ add_llvm_library_dependencies(LLVMARMCodeGen LLVMTarget ) -# workaround for hanging compilation on MSVC10 -if( MSVC_VERSION EQUAL 1600 ) +# workaround for hanging compilation on MSVC9, 10 +if( MSVC_VERSION EQUAL 1600 OR MSVC_VERSION EQUAL 1500 ) set_property( SOURCE ARMISelLowering.cpp PROPERTY COMPILE_FLAGS "/Od" diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 0b9b5d0..ad250ab 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -179,8 +179,6 @@ static DecodeStatus DecodeAddrMode7Operand(llvm::MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); static DecodeStatus DecodeBranchImmInstruction(llvm::MCInst &Inst,unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVCVTImmOperand(llvm::MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Val, @@ -251,6 +249,11 @@ static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeSwap(llvm::MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVCVTD(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVCVTQ(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder); @@ -1921,12 +1924,6 @@ DecodeBranchImmInstruction(llvm::MCInst &Inst, unsigned Insn, } -static DecodeStatus DecodeVCVTImmOperand(llvm::MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(64 - Val)); - return MCDisassembler::Success; -} - static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -4085,3 +4082,60 @@ static DecodeStatus DecodeSwap(llvm::MCInst &Inst, unsigned Insn, return S; } + +static DecodeStatus DecodeVCVTD(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0); + Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4); + unsigned Vm = (fieldFromInstruction32(Insn, 0, 4) << 0); + Vm |= (fieldFromInstruction32(Insn, 5, 1) << 4); + unsigned imm = fieldFromInstruction32(Insn, 16, 6); + unsigned cmode = fieldFromInstruction32(Insn, 8, 4); + + DecodeStatus S = MCDisassembler::Success; + + // VMOVv2f32 is ambiguous with these decodings. + if (!(imm & 0x38) && cmode == 0xF) { + Inst.setOpcode(ARM::VMOVv2f32); + return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder); + } + + if (!(imm & 0x20)) Check(S, MCDisassembler::SoftFail); + + if (!Check(S, DecodeDPRRegisterClass(Inst, Vd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Vm, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(64 - imm)); + + return S; +} + +static DecodeStatus DecodeVCVTQ(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0); + Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4); + unsigned Vm = (fieldFromInstruction32(Insn, 0, 4) << 0); + Vm |= (fieldFromInstruction32(Insn, 5, 1) << 4); + unsigned imm = fieldFromInstruction32(Insn, 16, 6); + unsigned cmode = fieldFromInstruction32(Insn, 8, 4); + + DecodeStatus S = MCDisassembler::Success; + + // VMOVv4f32 is ambiguous with these decodings. + if (!(imm & 0x38) && cmode == 0xF) { + Inst.setOpcode(ARM::VMOVv4f32); + return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder); + } + + if (!(imm & 0x20)) Check(S, MCDisassembler::SoftFail); + + if (!Check(S, DecodeQPRRegisterClass(Inst, Vd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeQPRRegisterClass(Inst, Vm, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(64 - imm)); + + return S; +} + diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 1bc585b..62d04c4 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -60,7 +60,7 @@ public: // ARMFixupKinds.h. // // Name Offset (bits) Size (bits) Flags -{ "fixup_arm_ldst_pcrel_12", 1, 24, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_t2_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, { "fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, @@ -68,7 +68,7 @@ public: MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, { "fixup_thumb_adr_pcrel_10",0, 8, MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, -{ "fixup_arm_adr_pcrel_12", 1, 24, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_t2_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, { "fixup_arm_condbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, @@ -138,7 +138,7 @@ bool ARMAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const { const uint16_t Thumb1_16bitNopEncoding = 0x46c0; // using MOV r8,r8 const uint16_t Thumb2_16bitNopEncoding = 0xbf00; // NOP const uint32_t ARMv4_NopEncoding = 0xe1a0000; // using MOV r0,r0 - const uint32_t ARMv6T2_NopEncoding = 0xe3207800; // NOP + const uint32_t ARMv6T2_NopEncoding = 0xe320f000; // NOP if (isThumb()) { const uint16_t nopEncoding = hasNOP() ? Thumb2_16bitNopEncoding : Thumb1_16bitNopEncoding; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 6042b11..e86f48e 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -129,14 +129,15 @@ static MCAsmInfo *createARMMCAsmInfo(const Target &T, StringRef TT) { } static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM) { + CodeModel::Model CM, + CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); if (RM == Reloc::Default) { Triple TheTriple(TT); // Default relocation model on Darwin is PIC, not DynamicNoPIC. RM = TheTriple.isOSDarwin() ? Reloc::PIC_ : Reloc::DynamicNoPIC; } - X->InitMCCodeGenInfo(RM, CM); + X->InitMCCodeGenInfo(RM, CM, OL); return X; } diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index 218311d..de33bd6 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -18,7 +18,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/ADT/SmallVector.h" #include "Thumb1InstrInfo.h" @@ -60,8 +59,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOStore, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); @@ -89,8 +87,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOLoad, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index cf040c8..7ec3c0e 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -20,7 +20,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/CommandLine.h" @@ -130,8 +129,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOStore, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); @@ -158,8 +156,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOLoad, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index 06e812b..8bce52c 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -3604,7 +3604,6 @@ void CWriter::visitExtractValueInst(ExtractValueInst &EVI) { bool CTargetMachine::addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &o, CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, bool DisableVerify) { if (FileType != TargetMachine::CGFT_AssemblyFile) return true; diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h index 4f1ca97..ca346af 100644 --- a/lib/Target/CBackend/CTargetMachine.h +++ b/lib/Target/CBackend/CTargetMachine.h @@ -22,13 +22,13 @@ namespace llvm { struct CTargetMachine : public TargetMachine { CTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) : TargetMachine(T, TT, CPU, FS) {} virtual bool addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out, CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, bool DisableVerify); virtual const TargetData *getTargetData() const { return 0; } diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp index d5af2a8..5ce14c9 100644 --- a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp +++ b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp @@ -62,11 +62,12 @@ static MCAsmInfo *createSPUMCAsmInfo(const Target &T, StringRef TT) { } static MCCodeGenInfo *createSPUMCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM) { + CodeModel::Model CM, + CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); // For the time being, use static relocations, since there's really no // support for PIC yet. - X->InitMCCodeGenInfo(Reloc::Static, CM); + X->InitMCCodeGenInfo(Reloc::Static, CM, OL); return X; } diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 99837df..a851be3 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -22,7 +22,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/Statistic.h" #include "llvm/Constants.h" diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp index 93a7f6e..6940316 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.cpp +++ b/lib/Target/CellSPU/SPUTargetMachine.cpp @@ -34,8 +34,9 @@ SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const { SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), Subtarget(TT, CPU, FS), DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this), @@ -49,8 +50,7 @@ SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT, // Pass Pipeline Configuration //===----------------------------------------------------------------------===// -bool SPUTargetMachine::addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool SPUTargetMachine::addInstSelector(PassManagerBase &PM) { // Install an instruction selector. PM.add(createSPUISelDag(*this)); return false; @@ -58,7 +58,7 @@ bool SPUTargetMachine::addInstSelector(PassManagerBase &PM, // passes to run just before printing the assembly bool SPUTargetMachine:: -addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { +addPreEmitPass(PassManagerBase &PM) { // load the TCE instruction scheduler, if available via // loaded plugins typedef llvm::FunctionPass* (*BuilderFunc)(const char*); diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h index fffe77c..909f12e 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.h +++ b/lib/Target/CellSPU/SPUTargetMachine.h @@ -40,7 +40,8 @@ class SPUTargetMachine : public LLVMTargetMachine { public: SPUTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); /// Return the subtarget implementation object virtual const SPUSubtarget *getSubtargetImpl() const { @@ -81,9 +82,8 @@ public: } // Pass Pipeline Configuration - virtual bool addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel); - virtual bool addPreEmitPass(PassManagerBase &, CodeGenOpt::Level); + virtual bool addInstSelector(PassManagerBase &PM); + virtual bool addPreEmitPass(PassManagerBase &); }; } // end namespace llvm diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 394ea2b..efeb989 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -2065,7 +2065,6 @@ char CppWriter::ID = 0; bool CPPTargetMachine::addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &o, CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, bool DisableVerify) { if (FileType != TargetMachine::CGFT_AssemblyFile) return true; PM.add(new CppWriter(o)); diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h index 287e537..a3613b4 100644 --- a/lib/Target/CppBackend/CPPTargetMachine.h +++ b/lib/Target/CppBackend/CPPTargetMachine.h @@ -24,13 +24,13 @@ class formatted_raw_ostream; struct CPPTargetMachine : public TargetMachine { CPPTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) : TargetMachine(T, TT, CPU, FS) {} virtual bool addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out, CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, bool DisableVerify); virtual const TargetData *getTargetData() const { return 0; } diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp index 7bff53e..4ad7bd6 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp +++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp @@ -34,8 +34,9 @@ extern "C" void LLVMInitializeMBlazeTarget() { MBlazeTargetMachine:: MBlazeTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM): - LLVMTargetMachine(T, TT, CPU, FS, RM, CM), + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL): + LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), Subtarget(TT, CPU, FS), DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"), InstrInfo(*this), @@ -46,8 +47,7 @@ MBlazeTargetMachine(const Target &T, StringRef TT, // Install an instruction selector pass using // the ISelDag to gen MBlaze code. -bool MBlazeTargetMachine::addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool MBlazeTargetMachine::addInstSelector(PassManagerBase &PM) { PM.add(createMBlazeISelDag(*this)); return false; } @@ -55,8 +55,7 @@ bool MBlazeTargetMachine::addInstSelector(PassManagerBase &PM, // Implemented by targets that want to run passes immediately before // machine code is emitted. return true if -print-machineinstrs should // print out the code after the passes. -bool MBlazeTargetMachine::addPreEmitPass(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool MBlazeTargetMachine::addPreEmitPass(PassManagerBase &PM) { PM.add(createMBlazeDelaySlotFillerPass(*this)); return true; } diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h index c1bc08a..1c1aa53 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.h +++ b/lib/Target/MBlaze/MBlazeTargetMachine.h @@ -43,7 +43,8 @@ namespace llvm { public: MBlazeTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); virtual const MBlazeInstrInfo *getInstrInfo() const { return &InstrInfo; } @@ -77,8 +78,8 @@ namespace llvm { } // Pass Pipeline Configuration - virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level Opt); - virtual bool addPreEmitPass(PassManagerBase &PM,CodeGenOpt::Level Opt); + virtual bool addInstSelector(PassManagerBase &PM); + virtual bool addPreEmitPass(PassManagerBase &PM); }; } // End llvm namespace diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp index 43ae281..a3a5cf4 100644 --- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp +++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp @@ -62,13 +62,14 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { } static MCCodeGenInfo *createMBlazeMCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM) { + CodeModel::Model CM, + CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); if (RM == Reloc::Default) RM = Reloc::Static; if (CM == CodeModel::Default) CM = CodeModel::Small; - X->InitMCCodeGenInfo(RM, CM); + X->InitMCCodeGenInfo(RM, CM, OL); return X; } diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp index fda70b8..0d532e3 100644 --- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp @@ -51,9 +51,10 @@ static MCSubtargetInfo *createMSP430MCSubtargetInfo(StringRef TT, StringRef CPU, } static MCCodeGenInfo *createMSP430MCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM) { + CodeModel::Model CM, + CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); - X->InitMCCodeGenInfo(RM, CM); + X->InitMCCodeGenInfo(RM, CM, OL); return X; } diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 9daeb2a..5c94137 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -29,7 +29,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/ValueTypes.h" diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index ffd4318..81f766e 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -19,7 +19,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" @@ -43,8 +42,7 @@ void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)), + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx), MFI.getObjectAlignment(FrameIdx)); @@ -72,8 +70,7 @@ void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)), + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx), MFI.getObjectAlignment(FrameIdx)); diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index 4dd8933..fe185fb 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -28,8 +28,9 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), Subtarget(TT, CPU, FS), // FIXME: Check TargetData string. DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"), @@ -37,15 +38,13 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, FrameLowering(Subtarget) { } -bool MSP430TargetMachine::addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool MSP430TargetMachine::addInstSelector(PassManagerBase &PM) { // Install an instruction selector. - PM.add(createMSP430ISelDag(*this, OptLevel)); + PM.add(createMSP430ISelDag(*this, getOptLevel())); return false; } -bool MSP430TargetMachine::addPreEmitPass(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool MSP430TargetMachine::addPreEmitPass(PassManagerBase &PM) { // Must run branch selection immediately preceding the asm printer. PM.add(createMSP430BranchSelectionPass()); return false; diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h index eb483dc..4fb060f 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.h +++ b/lib/Target/MSP430/MSP430TargetMachine.h @@ -40,7 +40,8 @@ class MSP430TargetMachine : public LLVMTargetMachine { public: MSP430TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); virtual const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; @@ -61,8 +62,8 @@ public: return &TSInfo; } - virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addInstSelector(PassManagerBase &PM); + virtual bool addPreEmitPass(PassManagerBase &PM); }; // MSP430TargetMachine. } // end namespace llvm diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index 53656d4d..ac9cfc0 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -22,7 +22,6 @@ add_llvm_target(MipsCodeGen MipsISelLowering.cpp MipsFrameLowering.cpp MipsMCInstLower.cpp - MipsMCSymbolRefExpr.cpp MipsRegisterInfo.cpp MipsSubtarget.cpp MipsTargetMachine.cpp diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 4f017d0..7bc5fe4 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -58,6 +58,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { switch (Kind) { default: break; + case FK_GPRel_4: case FK_Data_4: Value &= 0xffffffff; break; @@ -68,6 +69,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case Mips::fixup_Mips_PC16: Value &= 0x0000ffff; break; + case Mips::fixup_Mips_HI16: + Value >>= 16; + break; } return Value; @@ -104,15 +108,17 @@ public: llvm_unreachable("Unknown fixup kind!"); case Mips::fixup_Mips_GOT16: // This will be fixed up at link time break; + case FK_GPRel_4: case FK_Data_4: case Mips::fixup_Mips_26: case Mips::fixup_Mips_LO16: case Mips::fixup_Mips_PC16: + case Mips::fixup_Mips_HI16: // For each byte of the fragment that the fixup touches, mask i // the fixup value. The Value has been "split up" into the appr // bitfields above. for (unsigned i = 0; i != 4; ++i) // FIXME - Need to support 2 and 8 bytes - Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); + Data[Offset + i] += uint8_t((Value >> (i * 8)) & 0xff); break; } } diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 1115fec..0c3cbb3 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -173,11 +173,21 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, } else if (MO.isExpr()) { const MCExpr *Expr = MO.getExpr(); MCExpr::ExprKind Kind = Expr->getKind(); + unsigned Ret = 0; + + if (Kind == MCExpr::Binary) { + const MCBinaryExpr *BE = static_cast<const MCBinaryExpr*>(Expr); + Expr = BE->getLHS(); + Kind = Expr->getKind(); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(BE->getRHS()); + assert((Kind == MCExpr::SymbolRef) && CE && + "Binary expression must be sym+const."); + Ret = CE->getValue(); + } + if (Kind == MCExpr::SymbolRef) { - Mips::Fixups FixupKind = Mips::fixup_Mips_NONE; - MCSymbolRefExpr::VariantKind SymRefKind = - cast<MCSymbolRefExpr>(Expr)->getKind(); - switch(SymRefKind) { + Mips::Fixups FixupKind; + switch(cast<MCSymbolRefExpr>(Expr)->getKind()) { case MCSymbolRefExpr::VK_Mips_GPREL: FixupKind = Mips::fixup_Mips_GPREL16; break; @@ -206,12 +216,12 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, FixupKind = Mips::fixup_Mips_TPREL_LO; break; default: - return 0; + return Ret; } // switch Fixups.push_back(MCFixup::Create(0, Expr, MCFixupKind(FixupKind))); } // if SymbolRef // All of the information is in the fixup. - return 0; + return Ret; } llvm_unreachable("Unable to encode MCOperand!"); // Not reached @@ -234,15 +244,22 @@ MipsMCCodeEmitter::getMemEncoding(const MCInst &MI, unsigned OpNo, unsigned MipsMCCodeEmitter::getSizeExtEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const { - // FIXME: implement - return 0; + assert(MI.getOperand(OpNo).isImm()); + unsigned szEncoding = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups); + return szEncoding - 1; } +// FIXME: should be called getMSBEncoding +// unsigned MipsMCCodeEmitter::getSizeInsEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const { - // FIXME: implement - return 0; + assert(MI.getOperand(OpNo-1).isImm()); + assert(MI.getOperand(OpNo).isImm()); + unsigned pos = getMachineOpValue(MI, MI.getOperand(OpNo-1), Fixups); + unsigned sz = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups); + + return pos + sz - 1; } #include "MipsGenMCCodeEmitter.inc" diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index e6040e4..1fec88a 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -63,11 +63,12 @@ static MCAsmInfo *createMipsMCAsmInfo(const Target &T, StringRef TT) { } static MCCodeGenInfo *createMipsMCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM) { + CodeModel::Model CM, + CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); if (RM == Reloc::Default) RM = Reloc::PIC_; - X->InitMCCodeGenInfo(RM, CM); + X->InitMCCodeGenInfo(RM, CM, OL); return X; } diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 3c97241..b0fb4fa 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -175,6 +175,7 @@ def SCD : SCBase<0x3c, "scd", CPU64Regs, mem>, Requires<[NotN64]>; def SCD_P8 : SCBase<0x3c, "scd", CPU64Regs, mem64>, Requires<[IsN64]>; /// Jump and Branch Instructions +def JR64 : JumpFR<0x00, 0x08, "jr", CPU64Regs>; def JAL64 : JumpLink64<0x03, "jal">; def JALR64 : JumpLinkReg64<0x00, 0x09, "jalr">; def BEQ64 : CBranch<0x04, "beq", seteq, CPU64Regs>; @@ -231,7 +232,24 @@ let Predicates = [IsN64] in { } // hi/lo relocs -def : Pat<(i64 (MipsLo tglobaladdr:$in)), (DADDiu ZERO_64, tglobaladdr:$in)>; +def : Pat<(MipsHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>; +def : Pat<(MipsHi tblockaddress:$in), (LUi64 tblockaddress:$in)>; +def : Pat<(MipsHi tjumptable:$in), (LUi64 tjumptable:$in)>; +def : Pat<(MipsHi tconstpool:$in), (LUi64 tconstpool:$in)>; + +def : Pat<(MipsLo tglobaladdr:$in), (DADDiu ZERO_64, tglobaladdr:$in)>; +def : Pat<(MipsLo tblockaddress:$in), (DADDiu ZERO_64, tblockaddress:$in)>; +def : Pat<(MipsLo tjumptable:$in), (DADDiu ZERO_64, tjumptable:$in)>; +def : Pat<(MipsLo tconstpool:$in), (DADDiu ZERO_64, tconstpool:$in)>; + +def : Pat<(add CPU64Regs:$hi, (MipsLo tglobaladdr:$lo)), + (DADDiu CPU64Regs:$hi, tglobaladdr:$lo)>; +def : Pat<(add CPU64Regs:$hi, (MipsLo tblockaddress:$lo)), + (DADDiu CPU64Regs:$hi, tblockaddress:$lo)>; +def : Pat<(add CPU64Regs:$hi, (MipsLo tjumptable:$lo)), + (DADDiu CPU64Regs:$hi, tjumptable:$lo)>; +def : Pat<(add CPU64Regs:$hi, (MipsLo tconstpool:$lo)), + (DADDiu CPU64Regs:$hi, tconstpool:$lo)>; defm : BrcondPats<CPU64Regs, BEQ64, BNE64, SLT64, SLTu64, SLTi64, SLTiu64, ZERO_64>; diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index 186a5e3..d27e3ab 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -18,7 +18,6 @@ #include "MipsInstrInfo.h" #include "MipsMachineFunction.h" #include "MipsMCInstLower.h" -#include "MipsMCSymbolRefExpr.h" #include "InstPrinter/MipsInstPrinter.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "llvm/ADT/SmallString.h" @@ -79,12 +78,19 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Enclose unaligned load or store with .macro & .nomacro directives. if (isUnalignedLoadStore(Opc)) { - MCInst Directive; - Directive.setOpcode(Mips::MACRO); - OutStreamer.EmitInstruction(Directive); - OutStreamer.EmitInstruction(TmpInst0); - Directive.setOpcode(Mips::NOMACRO); - OutStreamer.EmitInstruction(Directive); + if (OutStreamer.hasRawTextSupport()) { + MCInst Directive; + Directive.setOpcode(Mips::MACRO); + OutStreamer.EmitInstruction(Directive); + OutStreamer.EmitInstruction(TmpInst0); + Directive.setOpcode(Mips::NOMACRO); + OutStreamer.EmitInstruction(Directive); + } else { + MCInstLowering.LowerUnalignedLoadStore(MI, MCInsts); + for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); I + != MCInsts.end(); ++I) + OutStreamer.EmitInstruction(*I); + } return; } @@ -92,8 +98,8 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Lower CPLOAD and CPRESTORE if (Opc == Mips::CPLOAD) { MCInstLowering.LowerCPLOAD(MI, MCInsts); - for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); - I != MCInsts.end(); ++I) + for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); I + != MCInsts.end(); ++I) OutStreamer.EmitInstruction(*I); return; } @@ -102,7 +108,7 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInstLowering.LowerCPRESTORE(MI, TmpInst0); OutStreamer.EmitInstruction(TmpInst0); return; - } + } } OutStreamer.EmitInstruction(TmpInst0); diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h index 16461ff..f0c6626 100644 --- a/lib/Target/Mips/MipsAsmPrinter.h +++ b/lib/Target/Mips/MipsAsmPrinter.h @@ -27,9 +27,11 @@ class MachineBasicBlock; class Module; class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter { - const MipsSubtarget *Subtarget; - + public: + + const MipsSubtarget *Subtarget; + explicit MipsAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) : AsmPrinter(TM, Streamer) { Subtarget = &TM.getSubtarget<MipsSubtarget>(); diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp index 19bb1a5..36aef99 100644 --- a/lib/Target/Mips/MipsFrameLowering.cpp +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -152,6 +152,9 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { bool ATUsed; unsigned GP = STI.isABI_N64() ? Mips::GP_64 : Mips::GP; unsigned T9 = STI.isABI_N64() ? Mips::T9_64 : Mips::T9; + unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP; + unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; + unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu; unsigned LUi = STI.isABI_N64() ? Mips::LUi64 : Mips::LUi; @@ -169,13 +172,14 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { MFI->setStackSize(StackSize); BuildMI(MBB, MBBI, dl, TII.get(Mips::NOREORDER)); + BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO)); // Emit instructions that set $gp using the the value of $t9. // O32 uses the directive .cpload while N32/64 requires three instructions to // do this. // TODO: Do not emit these instructions if no instructions use $gp. if (isPIC && STI.isABI_O32()) - BuildMI(MBB, MBBI, dl, TII.get(Mips::CPLOAD)) + BuildMI(MBB, llvm::prior(MBBI), dl, TII.get(Mips::CPLOAD)) .addReg(RegInfo->getPICCallReg()); else if (STI.isABI_N64() || (isPIC && STI.isABI_N32())) { // lui $28,%hi(%neg(%gp_rel(fname))) @@ -189,8 +193,6 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); } - BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO)); - // No need to allocate space on the stack. if (StackSize == 0 && !MFI->adjustsStack()) return; @@ -199,10 +201,8 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { MachineLocation DstML, SrcML; // Adjust stack : addi sp, sp, (-imm) - ATUsed = expandRegLargeImmPair(Mips::SP, -StackSize, NewReg, NewImm, MBB, - MBBI); - BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP) - .addReg(NewReg).addImm(NewImm); + ATUsed = expandRegLargeImmPair(SP, -StackSize, NewReg, NewImm, MBB, MBBI); + BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(NewReg).addImm(NewImm); // FIXME: change this when mips goes MC". if (ATUsed) @@ -262,14 +262,13 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { // if framepointer enabled, set it to point to the stack pointer. if (hasFP(MF)) { // Insert instruction "move $fp, $sp" at this location. - BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::FP) - .addReg(Mips::SP).addReg(Mips::ZERO); + BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO); // emit ".cfi_def_cfa_register $fp" MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::PROLOG_LABEL)).addSym(SetFPLabel); - DstML = MachineLocation(Mips::FP); + DstML = MachineLocation(FP); SrcML = MachineLocation(MachineLocation::VirtualFP); Moves.push_back(MachineMove(SetFPLabel, DstML, SrcML)); } @@ -293,6 +292,11 @@ void MipsFrameLowering::emitEpilogue(MachineFunction &MF, const MipsInstrInfo &TII = *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo()); DebugLoc dl = MBBI->getDebugLoc(); + unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP; + unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; + unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; + unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; + unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu; // Get the number of bytes from FrameInfo unsigned StackSize = MFI->getStackSize(); @@ -310,16 +314,13 @@ void MipsFrameLowering::emitEpilogue(MachineFunction &MF, --I; // Insert instruction "move $sp, $fp" at this location. - BuildMI(MBB, I, dl, TII.get(Mips::ADDu), Mips::SP) - .addReg(Mips::FP).addReg(Mips::ZERO); + BuildMI(MBB, I, dl, TII.get(ADDu), SP).addReg(FP).addReg(ZERO); } // adjust stack : insert addi sp, sp, (imm) if (StackSize) { - ATUsed = expandRegLargeImmPair(Mips::SP, StackSize, NewReg, NewImm, MBB, - MBBI); - BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP) - .addReg(NewReg).addImm(NewImm); + ATUsed = expandRegLargeImmPair(SP, StackSize, NewReg, NewImm, MBB, MBBI); + BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(NewReg).addImm(NewImm); // FIXME: change this when mips goes MC". if (ATUsed) @@ -331,13 +332,15 @@ void MipsFrameLowering:: processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { MachineRegisterInfo& MRI = MF.getRegInfo(); + unsigned RA = STI.isABI_N64() ? Mips::RA_64 : Mips::RA; + unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; // FIXME: remove this code if register allocator can correctly mark // $fp and $ra used or unused. // Mark $fp and $ra as used or unused. if (hasFP(MF)) - MRI.setPhysRegUsed(Mips::FP); + MRI.setPhysRegUsed(FP); // The register allocator might determine $ra is used after seeing // instruction "jr $ra", but we do not want PrologEpilogInserter to insert @@ -345,7 +348,7 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // To correct this, $ra is explicitly marked unused if there is no // function call. if (MF.getFrameInfo()->hasCalls()) - MRI.setPhysRegUsed(Mips::RA); + MRI.setPhysRegUsed(RA); else - MRI.setPhysRegUnused(Mips::RA); + MRI.setPhysRegUnused(RA); } diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index b595f03..b5a15cf 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -127,9 +127,11 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); setOperationAction(ISD::BlockAddress, MVT::i32, Custom); + setOperationAction(ISD::BlockAddress, MVT::i64, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); setOperationAction(ISD::JumpTable, MVT::i32, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); + setOperationAction(ISD::ConstantPool, MVT::i64, Custom); setOperationAction(ISD::SELECT, MVT::f32, Custom); setOperationAction(ISD::SELECT, MVT::f64, Custom); setOperationAction(ISD::SELECT, MVT::i32, Custom); @@ -1506,7 +1508,7 @@ SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op, // FIXME there isn't actually debug info here DebugLoc dl = Op.getDebugLoc(); - if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { + if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) { // %hi/%lo relocation SDValue BAHi = DAG.getBlockAddress(BA, MVT::i32, true, MipsII::MO_ABS_HI); @@ -1517,16 +1519,17 @@ SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op, return DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo); } - SDValue BAGOTOffset = DAG.getBlockAddress(BA, MVT::i32, true, - MipsII::MO_GOT); - BAGOTOffset = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, BAGOTOffset); - SDValue BALOOffset = DAG.getBlockAddress(BA, MVT::i32, true, - MipsII::MO_ABS_LO); - SDValue Load = DAG.getLoad(MVT::i32, dl, + EVT ValTy = Op.getValueType(); + unsigned GOTFlag = IsN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; + unsigned OFSTFlag = IsN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; + SDValue BAGOTOffset = DAG.getBlockAddress(BA, ValTy, true, GOTFlag); + BAGOTOffset = DAG.getNode(MipsISD::WrapperPIC, dl, ValTy, BAGOTOffset); + SDValue BALOOffset = DAG.getBlockAddress(BA, ValTy, true, OFSTFlag); + SDValue Load = DAG.getLoad(ValTy, dl, DAG.getEntryNode(), BAGOTOffset, MachinePointerInfo(), false, false, false, 0); - SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, BALOOffset); - return DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, ValTy, BALOOffset); + return DAG.getNode(ISD::ADD, dl, ValTy, Load, Lo); } SDValue MipsTargetLowering:: @@ -1649,16 +1652,19 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CPLo); ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); } else { - SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), - N->getOffset(), MipsII::MO_GOT); - CP = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, CP); - SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), + EVT ValTy = Op.getValueType(); + unsigned GOTFlag = IsN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; + unsigned OFSTFlag = IsN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; + SDValue CP = DAG.getTargetConstantPool(C, ValTy, N->getAlignment(), + N->getOffset(), GOTFlag); + CP = DAG.getNode(MipsISD::WrapperPIC, dl, ValTy, CP); + SDValue Load = DAG.getLoad(ValTy, dl, DAG.getEntryNode(), CP, MachinePointerInfo::getConstantPool(), false, false, false, 0); - SDValue CPLo = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), - N->getOffset(), MipsII::MO_ABS_LO); - SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CPLo); - ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo); + SDValue CPLo = DAG.getTargetConstantPool(C, ValTy, N->getAlignment(), + N->getOffset(), OFSTFlag); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, ValTy, CPLo); + ResNode = DAG.getNode(ISD::ADD, dl, ValTy, Load, Lo); } return ResNode; @@ -2063,6 +2069,7 @@ PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl, bool IsRegLoc = VA.isRegLoc(); unsigned Offset = 0; // Offset in # of bytes from the beginning of struct. unsigned LocMemOffset = 0; + unsigned MemCpySize = ByValSize; if (!IsRegLoc) LocMemOffset = VA.getLocMemOffset(); @@ -2082,9 +2089,13 @@ PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl, RegsToPass.push_back(std::make_pair(*Reg, LoadVal)); } + // Return if the struct has been fully copied. + if (!(MemCpySize = ByValSize - Offset)) + return; + // If there is an argument register available, copy the remainder of the // byval argument with sub-doubleword loads and shifts. - if ((Reg != RegEnd) && (ByValSize != Offset)) { + if (Reg != RegEnd) { assert((ByValSize < Offset + 8) && "Size of the remainder should be smaller than 8-byte."); SDValue Val; @@ -2119,19 +2130,18 @@ PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl, } } - unsigned MemCpySize = ByValSize - Offset; - if (MemCpySize) { - // Create a fixed object on stack at offset LocMemOffset and copy - // remainder of byval arg to it with memcpy. - SDValue Src = DAG.getNode(ISD::ADD, dl, PtrTy, Arg, - DAG.getConstant(Offset, PtrTy)); - LastFI = MFI->CreateFixedObject(MemCpySize, LocMemOffset, true); - SDValue Dst = DAG.getFrameIndex(LastFI, PtrTy); - ByValChain = DAG.getMemcpy(ByValChain, dl, Dst, Src, - DAG.getConstant(MemCpySize, PtrTy), Alignment, - /*isVolatile=*/false, /*AlwaysInline=*/false, - MachinePointerInfo(0), MachinePointerInfo(0)); - } + assert(MemCpySize && "MemCpySize must not be zero."); + + // Create a fixed object on stack at offset LocMemOffset and copy + // remainder of byval arg to it with memcpy. + SDValue Src = DAG.getNode(ISD::ADD, dl, PtrTy, Arg, + DAG.getConstant(Offset, PtrTy)); + LastFI = MFI->CreateFixedObject(MemCpySize, LocMemOffset, true); + SDValue Dst = DAG.getFrameIndex(LastFI, PtrTy); + ByValChain = DAG.getMemcpy(ByValChain, dl, Dst, Src, + DAG.getConstant(MemCpySize, PtrTy), Alignment, + /*isVolatile=*/false, /*AlwaysInline=*/false, + MachinePointerInfo(0), MachinePointerInfo(0)); } /// LowerCall - functions arguments are copied from virtual regs to diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 5dca9b6..0ae94ab 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -145,7 +145,9 @@ def brtarget : Operand<OtherVT> { let EncoderMethod = "getBranchTargetOpValue"; let OperandType = "OPERAND_PCREL"; } -def calltarget : Operand<i32>; +def calltarget : Operand<iPTR> { + let EncoderMethod = "getJumpTargetOpValue"; +} def calltarget64: Operand<i64>; def simm16 : Operand<i32>; def simm16_64 : Operand<i64>; @@ -378,6 +380,22 @@ class StoreM<bits<6> op, string instr_asm, PatFrag OpNode, RegisterClass RC, let isPseudo = Pseudo; } +// Memory Load/Store +let canFoldAsLoad = 1 in +class LoadX<bits<6> op, RegisterClass RC, + Operand MemOpnd>: + FMem<op, (outs RC:$rt), (ins MemOpnd:$addr), + "", + [], IILoad> { +} + +class StoreX<bits<6> op, RegisterClass RC, + Operand MemOpnd>: + FMem<op, (outs), (ins RC:$rt, MemOpnd:$addr), + "", + [], IIStore> { +} + // 32-bit load. multiclass LoadM32<bits<6> op, string instr_asm, PatFrag OpNode, bit Pseudo = 0> { @@ -396,6 +414,13 @@ multiclass LoadM64<bits<6> op, string instr_asm, PatFrag OpNode, Requires<[IsN64]>; } +// 32-bit load. +multiclass LoadX32<bits<6> op> { + def #NAME# : LoadX<op, CPURegs, mem>, + Requires<[NotN64]>; + def _P8 : LoadX<op, CPURegs, mem64>, + Requires<[IsN64]>; +} // 32-bit store. multiclass StoreM32<bits<6> op, string instr_asm, PatFrag OpNode, bit Pseudo = 0> { @@ -414,6 +439,14 @@ multiclass StoreM64<bits<6> op, string instr_asm, PatFrag OpNode, Requires<[IsN64]>; } +// 32-bit store. +multiclass StoreX32<bits<6> op> { + def #NAME# : StoreX<op, CPURegs, mem>, + Requires<[NotN64]>; + def _P8 : StoreX<op, CPURegs, mem64>, + Requires<[IsN64]>; +} + // Conditional Branch class CBranch<bits<6> op, string instr_asm, PatFrag cond_op, RegisterClass RC>: CBranchBase<op, (outs), (ins RC:$rs, RC:$rt, brtarget:$imm16), @@ -458,10 +491,11 @@ class JumpFJ<bits<6> op, string instr_asm>: FJ<op, (outs), (ins jmptarget:$target), !strconcat(instr_asm, "\t$target"), [(br bb:$target)], IIBranch>; -let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1 in -class JumpFR<bits<6> op, bits<6> func, string instr_asm>: - FR<op, func, (outs), (ins CPURegs:$rs), - !strconcat(instr_asm, "\t$rs"), [(brind CPURegs:$rs)], IIBranch> { +let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1, + isIndirectBranch = 1 in +class JumpFR<bits<6> op, bits<6> func, string instr_asm, RegisterClass RC>: + FR<op, func, (outs), (ins RC:$rs), + !strconcat(instr_asm, "\t$rs"), [(brind RC:$rs)], IIBranch> { let rt = 0; let rd = 0; let shamt = 0; @@ -760,6 +794,12 @@ defm ULW : LoadM32<0x23, "ulw", load_u, 1>; defm USH : StoreM32<0x29, "ush", truncstorei16_u, 1>; defm USW : StoreM32<0x2b, "usw", store_u, 1>; +/// Primitives for unaligned +defm LWL : LoadX32<0x22>; +defm LWR : LoadX32<0x26>; +defm SWL : StoreX32<0x2A>; +defm SWR : StoreX32<0x2E>; + let hasSideEffects = 1 in def SYNC : MipsInst<(outs), (ins i32imm:$stype), "sync $stype", [(MipsSync imm:$stype)], NoItinerary, FrmOther> @@ -779,8 +819,7 @@ def SC_P8 : SCBase<0x38, "sc", CPURegs, mem64>, Requires<[IsN64]>; /// Jump and Branch Instructions def J : JumpFJ<0x02, "j">; -let isIndirectBranch = 1 in - def JR : JumpFR<0x00, 0x08, "jr">; +def JR : JumpFR<0x00, 0x08, "jr", CPURegs>; def JAL : JumpLink<0x03, "jal">; def JALR : JumpLinkReg<0x00, 0x09, "jalr">; def BEQ : CBranch<0x04, "beq", seteq, CPURegs>; @@ -898,20 +937,20 @@ def : Pat<(MipsJmpLink (i32 texternalsym:$dst)), // hi/lo relocs def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>; def : Pat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>; +def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>; +def : Pat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>; + def : Pat<(MipsLo tglobaladdr:$in), (ADDiu ZERO, tglobaladdr:$in)>; def : Pat<(MipsLo tblockaddress:$in), (ADDiu ZERO, tblockaddress:$in)>; +def : Pat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>; +def : Pat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>; + def : Pat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)), (ADDiu CPURegs:$hi, tglobaladdr:$lo)>; def : Pat<(add CPURegs:$hi, (MipsLo tblockaddress:$lo)), (ADDiu CPURegs:$hi, tblockaddress:$lo)>; - -def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>; -def : Pat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>; def : Pat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)), (ADDiu CPURegs:$hi, tjumptable:$lo)>; - -def : Pat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>; -def : Pat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>; def : Pat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)), (ADDiu CPURegs:$hi, tconstpool:$lo)>; diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp index 1fab52c..6fc2af1 100644 --- a/lib/Target/Mips/MipsMCInstLower.cpp +++ b/lib/Target/Mips/MipsMCInstLower.cpp @@ -23,6 +23,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/Target/Mangler.h" + using namespace llvm; MipsMCInstLower::MipsMCInstLower(Mangler *mang, const MachineFunction &mf, @@ -55,34 +56,34 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, } switch (MOTy) { - case MachineOperand::MO_MachineBasicBlock: - Symbol = MO.getMBB()->getSymbol(); - break; - - case MachineOperand::MO_GlobalAddress: - Symbol = Mang->getSymbol(MO.getGlobal()); - break; - - case MachineOperand::MO_BlockAddress: - Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()); - break; - - case MachineOperand::MO_ExternalSymbol: - Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName()); - break; - - case MachineOperand::MO_JumpTableIndex: - Symbol = AsmPrinter.GetJTISymbol(MO.getIndex()); - break; - - case MachineOperand::MO_ConstantPoolIndex: - Symbol = AsmPrinter.GetCPISymbol(MO.getIndex()); - if (MO.getOffset()) - Offset += MO.getOffset(); - break; - - default: - llvm_unreachable("<unknown operand type>"); + case MachineOperand::MO_MachineBasicBlock: + Symbol = MO.getMBB()->getSymbol(); + break; + + case MachineOperand::MO_GlobalAddress: + Symbol = Mang->getSymbol(MO.getGlobal()); + break; + + case MachineOperand::MO_BlockAddress: + Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()); + break; + + case MachineOperand::MO_ExternalSymbol: + Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName()); + break; + + case MachineOperand::MO_JumpTableIndex: + Symbol = AsmPrinter.GetJTISymbol(MO.getIndex()); + break; + + case MachineOperand::MO_ConstantPoolIndex: + Symbol = AsmPrinter.GetCPISymbol(MO.getIndex()); + if (MO.getOffset()) + Offset += MO.getOffset(); + break; + + default: + llvm_unreachable("<unknown operand type>"); } const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Symbol, Kind, Ctx); @@ -145,8 +146,8 @@ void MipsMCInstLower::LowerCPRESTORE(const MachineInstr *MI, MCInst &OutMI) { OutMI.addOperand(MCOperand::CreateImm(MO.getImm())); } - -MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO) const { +MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO, + unsigned offset) const { MachineOperandType MOTy = MO.getType(); switch (MOTy) { @@ -158,14 +159,14 @@ MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO) const { if (MO.isImplicit()) break; return MCOperand::CreateReg(MO.getReg()); case MachineOperand::MO_Immediate: - return MCOperand::CreateImm(MO.getImm()); + return MCOperand::CreateImm(MO.getImm() + offset); case MachineOperand::MO_MachineBasicBlock: case MachineOperand::MO_GlobalAddress: case MachineOperand::MO_ExternalSymbol: case MachineOperand::MO_JumpTableIndex: case MachineOperand::MO_ConstantPoolIndex: case MachineOperand::MO_BlockAddress: - return LowerSymbolOperand(MO, MOTy, 0); + return LowerSymbolOperand(MO, MOTy, offset); } return MCOperand(); @@ -182,3 +183,116 @@ void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.addOperand(MCOp); } } + +void MipsMCInstLower::LowerUnalignedLoadStore(const MachineInstr *MI, + SmallVector<MCInst, + 4>& MCInsts) { + unsigned Opc = MI->getOpcode(); + MCInst instr1, instr2, instr3, move; + + bool two_instructions = false; + + assert(MI->getNumOperands() == 3); + assert(MI->getOperand(0).isReg()); + assert(MI->getOperand(1).isReg()); + + MCOperand target = LowerOperand(MI->getOperand(0)); + MCOperand base = LowerOperand(MI->getOperand(1)); + MCOperand atReg = MCOperand::CreateReg(Mips::AT); + MCOperand zeroReg = MCOperand::CreateReg(Mips::ZERO); + + MachineOperand unloweredName = MI->getOperand(2); + MCOperand name = LowerOperand(unloweredName); + + move.setOpcode(Mips::ADDu); + move.addOperand(target); + move.addOperand(atReg); + move.addOperand(zeroReg); + + switch (Opc) { + case Mips::ULW: { + // FIXME: only works for little endian right now + MCOperand adj_name = LowerOperand(unloweredName, 3); + if (base.getReg() == (target.getReg())) { + instr1.setOpcode(Mips::LWL); + instr1.addOperand(atReg); + instr1.addOperand(base); + instr1.addOperand(adj_name); + instr2.setOpcode(Mips::LWR); + instr2.addOperand(atReg); + instr2.addOperand(base); + instr2.addOperand(name); + instr3 = move; + } else { + two_instructions = true; + instr1.setOpcode(Mips::LWL); + instr1.addOperand(target); + instr1.addOperand(base); + instr1.addOperand(adj_name); + instr2.setOpcode(Mips::LWR); + instr2.addOperand(target); + instr2.addOperand(base); + instr2.addOperand(name); + } + break; + } + case Mips::ULHu: { + // FIXME: only works for little endian right now + MCOperand adj_name = LowerOperand(unloweredName, 1); + instr1.setOpcode(Mips::LBu); + instr1.addOperand(atReg); + instr1.addOperand(base); + instr1.addOperand(adj_name); + instr2.setOpcode(Mips::LBu); + instr2.addOperand(target); + instr2.addOperand(base); + instr2.addOperand(name); + instr3.setOpcode(Mips::INS); + instr3.addOperand(target); + instr3.addOperand(atReg); + instr3.addOperand(MCOperand::CreateImm(0x8)); + instr3.addOperand(MCOperand::CreateImm(0x18)); + break; + } + + case Mips::USW: { + // FIXME: only works for little endian right now + assert (base.getReg() != target.getReg()); + two_instructions = true; + MCOperand adj_name = LowerOperand(unloweredName, 3); + instr1.setOpcode(Mips::SWL); + instr1.addOperand(target); + instr1.addOperand(base); + instr1.addOperand(adj_name); + instr2.setOpcode(Mips::SWR); + instr2.addOperand(target); + instr2.addOperand(base); + instr2.addOperand(name); + break; + } + case Mips::USH: { + MCOperand adj_name = LowerOperand(unloweredName, 1); + instr1.setOpcode(Mips::SB); + instr1.addOperand(target); + instr1.addOperand(base); + instr1.addOperand(name); + instr2.setOpcode(Mips::SRL); + instr2.addOperand(atReg); + instr2.addOperand(target); + instr2.addOperand(MCOperand::CreateImm(8)); + instr3.setOpcode(Mips::SB); + instr3.addOperand(atReg); + instr3.addOperand(base); + instr3.addOperand(adj_name); + break; + } + default: + // FIXME: need to add others + assert(0 && "unaligned instruction not processed"); + } + + MCInsts.push_back(instr1); + MCInsts.push_back(instr2); + if (!two_instructions) MCInsts.push_back(instr3); +} + diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h index 3a24da2..98e37e4 100644 --- a/lib/Target/Mips/MipsMCInstLower.h +++ b/lib/Target/Mips/MipsMCInstLower.h @@ -37,10 +37,12 @@ public: void Lower(const MachineInstr *MI, MCInst &OutMI) const; void LowerCPLOAD(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts); void LowerCPRESTORE(const MachineInstr *MI, MCInst &OutMI); + void LowerUnalignedLoadStore(const MachineInstr *MI, + SmallVector<MCInst, 4>& MCInsts); private: MCOperand LowerSymbolOperand(const MachineOperand &MO, MachineOperandType MOTy, unsigned Offset) const; - MCOperand LowerOperand(const MachineOperand& MO) const; + MCOperand LowerOperand(const MachineOperand& MO, unsigned offset = 0) const; }; } diff --git a/lib/Target/Mips/MipsMCSymbolRefExpr.cpp b/lib/Target/Mips/MipsMCSymbolRefExpr.cpp deleted file mode 100644 index a0a242c..0000000 --- a/lib/Target/Mips/MipsMCSymbolRefExpr.cpp +++ /dev/null @@ -1,70 +0,0 @@ -//===-- MipsMCSymbolRefExpr.cpp - Mips specific MC expression classes -----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "mipsmcsymbolrefexpr" -#include "MipsMCSymbolRefExpr.h" -#include "llvm/MC/MCAssembler.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCSymbol.h" -using namespace llvm; - -const MipsMCSymbolRefExpr* -MipsMCSymbolRefExpr::Create(VariantKind Kind, const MCSymbol *Symbol, - int Offset, MCContext &Ctx) { - return new (Ctx) MipsMCSymbolRefExpr(Kind, Symbol, Offset); -} - -void MipsMCSymbolRefExpr::PrintImpl(raw_ostream &OS) const { - switch (Kind) { - default: assert(0 && "Invalid kind!"); - case VK_Mips_None: break; - case VK_Mips_GPREL: OS << "%gp_rel("; break; - case VK_Mips_GOT_CALL: OS << "%call16("; break; - case VK_Mips_GOT: OS << "%got("; break; - case VK_Mips_ABS_HI: OS << "%hi("; break; - case VK_Mips_ABS_LO: OS << "%lo("; break; - case VK_Mips_TLSGD: OS << "%tlsgd("; break; - case VK_Mips_GOTTPREL: OS << "%gottprel("; break; - case VK_Mips_TPREL_HI: OS << "%tprel_hi("; break; - case VK_Mips_TPREL_LO: OS << "%tprel_lo("; break; - case VK_Mips_GPOFF_HI: OS << "%hi(%neg(%gp_rel("; break; - case VK_Mips_GPOFF_LO: OS << "%lo(%neg(%gp_rel("; break; - case VK_Mips_GOT_DISP: OS << "%got_disp("; break; - case VK_Mips_GOT_PAGE: OS << "%got_page("; break; - case VK_Mips_GOT_OFST: OS << "%got_ofst("; break; - } - - OS << *Symbol; - - if (Offset) { - if (Offset > 0) - OS << '+'; - OS << Offset; - } - - if (Kind == VK_Mips_GPOFF_HI || Kind == VK_Mips_GPOFF_LO) - OS << ")))"; - else if (Kind != VK_Mips_None) - OS << ')'; -} - -bool -MipsMCSymbolRefExpr::EvaluateAsRelocatableImpl(MCValue &Res, - const MCAsmLayout *Layout) const { - return false; -} - -void MipsMCSymbolRefExpr::AddValueSymbols(MCAssembler *Asm) const { - Asm->getOrCreateSymbolData(*Symbol); -} - -const MCSection *MipsMCSymbolRefExpr::FindAssociatedSection() const { - return Symbol->isDefined() ? &Symbol->getSection() : NULL; -} - diff --git a/lib/Target/Mips/MipsMCSymbolRefExpr.h b/lib/Target/Mips/MipsMCSymbolRefExpr.h deleted file mode 100644 index 55e85a7..0000000 --- a/lib/Target/Mips/MipsMCSymbolRefExpr.h +++ /dev/null @@ -1,67 +0,0 @@ -//===-- MipsMCSymbolRefExpr.h - Mips specific MCSymbolRefExpr class -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef MIPSMCSYMBOLREFEXPR_H -#define MIPSMCSYMBOLREFEXPR_H -#include "llvm/MC/MCExpr.h" - -namespace llvm { - -class MipsMCSymbolRefExpr : public MCTargetExpr { -public: - enum VariantKind { - VK_Mips_None, - VK_Mips_GPREL, - VK_Mips_GOT_CALL, - VK_Mips_GOT, - VK_Mips_ABS_HI, - VK_Mips_ABS_LO, - VK_Mips_TLSGD, - VK_Mips_GOTTPREL, - VK_Mips_TPREL_HI, - VK_Mips_TPREL_LO, - VK_Mips_GPOFF_HI, - VK_Mips_GPOFF_LO, - VK_Mips_GOT_DISP, - VK_Mips_GOT_PAGE, - VK_Mips_GOT_OFST - }; - -private: - const VariantKind Kind; - const MCSymbol *Symbol; - int Offset; - - explicit MipsMCSymbolRefExpr(VariantKind _Kind, const MCSymbol *_Symbol, - int _Offset) - : Kind(_Kind), Symbol(_Symbol), Offset(_Offset) {} - -public: - static const MipsMCSymbolRefExpr *Create(VariantKind Kind, - const MCSymbol *Symbol, int Offset, - MCContext &Ctx); - - void PrintImpl(raw_ostream &OS) const; - bool EvaluateAsRelocatableImpl(MCValue &Res, - const MCAsmLayout *Layout) const; - void AddValueSymbols(MCAssembler *) const; - const MCSection *FindAssociatedSection() const; - - static bool classof(const MCExpr *E) { - return E->getKind() == MCExpr::Target; - } - - static bool classof(const MipsMCSymbolRefExpr *) { return true; } - - int getOffset() const { return Offset; } - void setOffset(int O) { Offset = O; } -}; -} // end namespace llvm - -#endif diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 5331f09..06c4a66 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -285,7 +285,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex) || (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)) - FrameReg = Mips::SP; + FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP; else FrameReg = getFrameRegister(MF); @@ -334,8 +334,10 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned MipsRegisterInfo:: getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + bool IsN64 = Subtarget.isABI_N64(); - return TFI->hasFP(MF) ? Mips::FP : Mips::SP; + return TFI->hasFP(MF) ? (IsN64 ? Mips::FP_64 : Mips::FP) : + (IsN64 ? Mips::SP_64 : Mips::SP); } unsigned MipsRegisterInfo:: diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 6480da3..5d6b24f 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -36,8 +36,9 @@ MipsTargetMachine:: MipsTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool isLittle): - LLVMTargetMachine(T, TT, CPU, FS, RM, CM), + LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), Subtarget(TT, CPU, FS, isLittle), DataLayout(isLittle ? (Subtarget.isABI_N64() ? @@ -54,31 +55,35 @@ MipsTargetMachine(const Target &T, StringRef TT, MipsebTargetMachine:: MipsebTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) : - MipsTargetMachine(T, TT, CPU, FS, RM, CM, false) {} + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) : + MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) {} MipselTargetMachine:: MipselTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) : - MipsTargetMachine(T, TT, CPU, FS, RM, CM, true) {} + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) : + MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) {} Mips64ebTargetMachine:: Mips64ebTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) : - MipsTargetMachine(T, TT, CPU, FS, RM, CM, false) {} + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) : + MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) {} Mips64elTargetMachine:: Mips64elTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) : - MipsTargetMachine(T, TT, CPU, FS, RM, CM, true) {} + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) : + MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) {} // Install an instruction selector pass using // the ISelDag to gen Mips code. bool MipsTargetMachine:: -addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel) +addInstSelector(PassManagerBase &PM) { PM.add(createMipsISelDag(*this)); return false; @@ -88,14 +93,14 @@ addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel) // machine code is emitted. return true if -print-machineinstrs should // print out the code after the passes. bool MipsTargetMachine:: -addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) +addPreEmitPass(PassManagerBase &PM) { PM.add(createMipsDelaySlotFillerPass(*this)); return true; } bool MipsTargetMachine:: -addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { +addPreRegAlloc(PassManagerBase &PM) { // Do not restore $gp if target is Mips64. // In N32/64, $gp is a callee-saved register. if (!Subtarget.hasMips64()) @@ -104,14 +109,13 @@ addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { } bool MipsTargetMachine:: -addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { +addPostRegAlloc(PassManagerBase &PM) { PM.add(createMipsExpandPseudoPass(*this)); return true; } bool MipsTargetMachine::addCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - JITCodeEmitter &JCE) { + JITCodeEmitter &JCE) { // Machine code emitter pass for Mips. PM.add(createMipsJITCodeEmitterPass(*this, JCE)); return false; diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index 118ed10..e40d9e2 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -40,6 +40,7 @@ namespace llvm { MipsTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool isLittle); virtual const MipsInstrInfo *getInstrInfo() const @@ -67,15 +68,11 @@ namespace llvm { } // Pass Pipeline Configuration - virtual bool addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel); - virtual bool addPreEmitPass(PassManagerBase &PM, - CodeGenOpt::Level OptLevel); - virtual bool addPreRegAlloc(PassManagerBase &PM, - CodeGenOpt::Level OptLevel); - virtual bool addPostRegAlloc(PassManagerBase &, CodeGenOpt::Level); + virtual bool addInstSelector(PassManagerBase &PM); + virtual bool addPreEmitPass(PassManagerBase &PM); + virtual bool addPreRegAlloc(PassManagerBase &PM); + virtual bool addPostRegAlloc(PassManagerBase &); virtual bool addCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, JITCodeEmitter &JCE); }; @@ -86,7 +83,8 @@ class MipsebTargetMachine : public MipsTargetMachine { public: MipsebTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); }; /// MipselTargetMachine - Mips32 little endian target machine. @@ -95,7 +93,8 @@ class MipselTargetMachine : public MipsTargetMachine { public: MipselTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); }; /// Mips64ebTargetMachine - Mips64 big endian target machine. @@ -104,7 +103,8 @@ class Mips64ebTargetMachine : public MipsTargetMachine { public: Mips64ebTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); }; /// Mips64elTargetMachine - Mips64 little endian target machine. @@ -113,7 +113,8 @@ class Mips64elTargetMachine : public MipsTargetMachine { public: Mips64elTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); }; } // End llvm namespace diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp index a5af3b8..09f86b5 100644 --- a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp +++ b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp @@ -52,9 +52,10 @@ static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU, } static MCCodeGenInfo *createPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM) { + CodeModel::Model CM, + CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); - X->InitMCCodeGenInfo(RM, CM); + X->InitMCCodeGenInfo(RM, CM, OL); return X; } diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp index 50dd417..292ea5e 100644 --- a/lib/Target/PTX/PTXTargetMachine.cpp +++ b/lib/Target/PTX/PTXTargetMachine.cpp @@ -88,8 +88,9 @@ namespace { PTXTargetMachine::PTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool is64Bit) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), DataLayout(is64Bit ? DataLayout64 : DataLayout32), Subtarget(TT, CPU, FS, is64Bit), FrameLowering(Subtarget), @@ -100,39 +101,38 @@ PTXTargetMachine::PTXTargetMachine(const Target &T, PTX32TargetMachine::PTX32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : PTXTargetMachine(T, TT, CPU, FS, RM, CM, false) { + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : PTXTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) { } PTX64TargetMachine::PTX64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : PTXTargetMachine(T, TT, CPU, FS, RM, CM, true) { + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : PTXTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) { } -bool PTXTargetMachine::addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { - PM.add(createPTXISelDag(*this, OptLevel)); +bool PTXTargetMachine::addInstSelector(PassManagerBase &PM) { + PM.add(createPTXISelDag(*this, getOptLevel())); return false; } -bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM) { // PTXMFInfoExtract must after register allocation! - //PM.add(createPTXMFInfoExtract(*this, OptLevel)); + //PM.add(createPTXMFInfoExtract(*this)); return false; } bool PTXTargetMachine::addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out, CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, bool DisableVerify) { // This is mostly based on LLVMTargetMachine::addPassesToEmitFile // Add common CodeGen passes. MCContext *Context = 0; - if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Context)) + if (addCommonCodeGenPasses(PM, DisableVerify, Context)) return true; assert(Context != 0 && "Failed to get MCContext"); @@ -192,7 +192,6 @@ bool PTXTargetMachine::addPassesToEmitFile(PassManagerBase &PM, } bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, bool DisableVerify, MCContext *&OutContext) { // Add standard LLVM codegen passes. @@ -214,7 +213,7 @@ bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, PM.add(createVerifierPass()); // Run loop strength reduction before anything else. - if (OptLevel != CodeGenOpt::None) { + if (getOptLevel() != CodeGenOpt::None) { PM.add(createLoopStrengthReducePass(getTargetLowering())); //PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); } @@ -228,12 +227,12 @@ bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, // The lower invoke pass may create unreachable code. Remove it. PM.add(createUnreachableBlockEliminationPass()); - if (OptLevel != CodeGenOpt::None) + if (getOptLevel() != CodeGenOpt::None) PM.add(createCodeGenPreparePass(getTargetLowering())); PM.add(createStackProtectorPass(getTargetLowering())); - addPreISel(PM, OptLevel); + addPreISel(PM); //PM.add(createPrintFunctionPass("\n\n" // "*** Final LLVM Code input to ISel ***\n", @@ -255,10 +254,10 @@ bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref. // Set up a MachineFunction for the rest of CodeGen to work on. - PM.add(new MachineFunctionAnalysis(*this, OptLevel)); + PM.add(new MachineFunctionAnalysis(*this)); // Ask the target for an isel. - if (addInstSelector(PM, OptLevel)) + if (addInstSelector(PM)) return true; // Print the instruction selected machine code... @@ -268,21 +267,21 @@ bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, PM.add(createExpandISelPseudosPass()); // Pre-ra tail duplication. - if (OptLevel != CodeGenOpt::None) { + if (getOptLevel() != CodeGenOpt::None) { PM.add(createTailDuplicatePass(true)); printAndVerify(PM, "After Pre-RegAlloc TailDuplicate"); } // Optimize PHIs before DCE: removing dead PHI cycles may make more // instructions dead. - if (OptLevel != CodeGenOpt::None) + if (getOptLevel() != CodeGenOpt::None) PM.add(createOptimizePHIsPass()); // If the target requests it, assign local variables to stack slots relative // to one another and simplify frame index references where possible. PM.add(createLocalStackSlotAllocationPass()); - if (OptLevel != CodeGenOpt::None) { + if (getOptLevel() != CodeGenOpt::None) { // With optimization, dead code should already be eliminated. However // there is one known exception: lowered code for arguments that are only // used by tail calls, where the tail calls reuse the incoming stack @@ -300,7 +299,7 @@ bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, } // Run pre-ra passes. - if (addPreRegAlloc(PM, OptLevel)) + if (addPreRegAlloc(PM)) printAndVerify(PM, "After PreRegAlloc passes"); // Perform register allocation. @@ -308,7 +307,7 @@ bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, printAndVerify(PM, "After Register Allocation"); // Perform stack slot coloring and post-ra machine LICM. - if (OptLevel != CodeGenOpt::None) { + if (getOptLevel() != CodeGenOpt::None) { // FIXME: Re-enable coloring with register when it's capable of adding // kill markers. PM.add(createStackSlotColoringPass(false)); @@ -322,7 +321,7 @@ bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, } // Run post-ra passes. - if (addPostRegAlloc(PM, OptLevel)) + if (addPostRegAlloc(PM)) printAndVerify(PM, "After PostRegAlloc passes"); PM.add(createExpandPostRAPseudosPass()); @@ -333,23 +332,23 @@ bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, printAndVerify(PM, "After PrologEpilogCodeInserter"); // Run pre-sched2 passes. - if (addPreSched2(PM, OptLevel)) + if (addPreSched2(PM)) printAndVerify(PM, "After PreSched2 passes"); // Second pass scheduler. - if (OptLevel != CodeGenOpt::None) { - PM.add(createPostRAScheduler(OptLevel)); + if (getOptLevel() != CodeGenOpt::None) { + PM.add(createPostRAScheduler(getOptLevel())); printAndVerify(PM, "After PostRAScheduler"); } // Branch folding must be run after regalloc and prolog/epilog insertion. - if (OptLevel != CodeGenOpt::None) { + if (getOptLevel() != CodeGenOpt::None) { PM.add(createBranchFoldingPass(getEnableTailMergeDefault())); printNoVerify(PM, "After BranchFolding"); } // Tail duplication. - if (OptLevel != CodeGenOpt::None) { + if (getOptLevel() != CodeGenOpt::None) { PM.add(createTailDuplicatePass(false)); printNoVerify(PM, "After TailDuplicate"); } @@ -359,16 +358,16 @@ bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, //if (PrintGCInfo) // PM.add(createGCInfoPrinter(dbgs())); - if (OptLevel != CodeGenOpt::None) { + if (getOptLevel() != CodeGenOpt::None) { PM.add(createCodePlacementOptPass()); printNoVerify(PM, "After CodePlacementOpt"); } - if (addPreEmitPass(PM, OptLevel)) + if (addPreEmitPass(PM)) printNoVerify(PM, "After PreEmit passes"); - PM.add(createPTXMFInfoExtract(*this, OptLevel)); - PM.add(createPTXFPRoundingModePass(*this, OptLevel)); + PM.add(createPTXMFInfoExtract(*this, getOptLevel())); + PM.add(createPTXFPRoundingModePass(*this, getOptLevel())); return false; } diff --git a/lib/Target/PTX/PTXTargetMachine.h b/lib/Target/PTX/PTXTargetMachine.h index 5b7c82b..19f6c0f 100644 --- a/lib/Target/PTX/PTXTargetMachine.h +++ b/lib/Target/PTX/PTXTargetMachine.h @@ -37,6 +37,7 @@ class PTXTargetMachine : public LLVMTargetMachine { PTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool is64Bit); virtual const TargetData *getTargetData() const { return &DataLayout; } @@ -58,22 +59,18 @@ class PTXTargetMachine : public LLVMTargetMachine { virtual const PTXSubtarget *getSubtargetImpl() const { return &Subtarget; } - virtual bool addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel); - virtual bool addPostRegAlloc(PassManagerBase &PM, - CodeGenOpt::Level OptLevel); + virtual bool addInstSelector(PassManagerBase &PM); + virtual bool addPostRegAlloc(PassManagerBase &PM); // We override this method to supply our own set of codegen passes. virtual bool addPassesToEmitFile(PassManagerBase &, formatted_raw_ostream &, CodeGenFileType, - CodeGenOpt::Level, bool = true); // Emission of machine code through JITCodeEmitter is not supported. virtual bool addPassesToEmitMachineCode(PassManagerBase &, JITCodeEmitter &, - CodeGenOpt::Level, bool = true) { return true; } @@ -82,14 +79,13 @@ class PTXTargetMachine : public LLVMTargetMachine { virtual bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &, - CodeGenOpt::Level, bool = true) { return true; } private: - bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level, + bool addCommonCodeGenPasses(PassManagerBase &, bool DisableVerify, MCContext *&OutCtx); }; // class PTXTargetMachine @@ -99,7 +95,8 @@ public: PTX32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); }; // class PTX32TargetMachine class PTX64TargetMachine : public PTXTargetMachine { @@ -107,7 +104,8 @@ public: PTX64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); }; // class PTX32TargetMachine } // namespace llvm diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index d5c8a9e..7c47051 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -76,7 +76,8 @@ static MCAsmInfo *createPPCMCAsmInfo(const Target &T, StringRef TT) { } static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM) { + CodeModel::Model CM, + CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); if (RM == Reloc::Default) { @@ -86,7 +87,7 @@ static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM, else RM = Reloc::Static; } - X->InitMCCodeGenInfo(RM, CM); + X->InitMCCodeGenInfo(RM, CM, OL); return X; } diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 6f204cc..3dee406 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -18,7 +18,6 @@ #include "MCTargetDesc/PPCPredicates.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index b188b90..36d5c41 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -23,7 +23,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CallingConv.h" @@ -408,6 +407,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setInsertFencesForAtomic(true); + setSchedulingPreference(Sched::Hybrid); + computeRegisterProperties(); } diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index f148e9d..b9a6297 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -22,7 +22,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -57,11 +56,8 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer( unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective(); if (Directive == PPC::DIR_440) { - // Disable the hazard recognizer for now, as it doesn't support - // bottom-up scheduling. - //const InstrItineraryData *II = TM->getInstrItineraryData(); - //return new PPCHazardRecognizer440(II, DAG); - return new ScheduleHazardRecognizer(); + const InstrItineraryData *II = TM->getInstrItineraryData(); + return new PPCHazardRecognizer440(II, DAG); } else { // Disable the hazard recognizer for now, as it doesn't support @@ -501,8 +497,7 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, const MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)), + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx), MFI.getObjectAlignment(FrameIdx)); @@ -623,8 +618,7 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, const MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)), + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx), MFI.getObjectAlignment(FrameIdx)); diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 2e90b7a..3ba9260 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -273,6 +273,27 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } +unsigned +PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const unsigned DefaultSafety = 1; + + switch (RC->getID()) { + default: + return 0; + case PPC::G8RCRegClassID: + case PPC::GPRCRegClassID: { + unsigned FP = TFI->hasFP(MF) ? 1 : 0; + return 32 - FP - DefaultSafety; + } + case PPC::F8RCRegClassID: + case PPC::F4RCRegClassID: + case PPC::VRRCRegClassID: + return 32 - DefaultSafety; + } +} + //===----------------------------------------------------------------------===// // Stack Frame Processing methods //===----------------------------------------------------------------------===// diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 1cc7213..f70a594 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -37,6 +37,9 @@ public: /// This is used for addressing modes. virtual const TargetRegisterClass *getPointerRegClass(unsigned Kind=0) const; + unsigned getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const; + /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index f5744b83..de8fca0 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -29,8 +29,9 @@ extern "C" void LLVMInitializePowerPCTarget() { PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool is64Bit) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), Subtarget(TT, CPU, FS, is64Bit), DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this), FrameLowering(Subtarget), JITInfo(*this, is64Bit), @@ -44,15 +45,17 @@ bool PPCTargetMachine::getEnableTailMergeDefault() const { return false; } PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : PPCTargetMachine(T, TT, CPU, FS, RM, CM, false) { + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : PPCTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) { } PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : PPCTargetMachine(T, TT, CPU, FS, RM, CM, true) { + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : PPCTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) { } @@ -60,22 +63,19 @@ PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT, // Pass Pipeline Configuration //===----------------------------------------------------------------------===// -bool PPCTargetMachine::addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool PPCTargetMachine::addInstSelector(PassManagerBase &PM) { // Install an instruction selector. PM.add(createPPCISelDag(*this)); return false; } -bool PPCTargetMachine::addPreEmitPass(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool PPCTargetMachine::addPreEmitPass(PassManagerBase &PM) { // Must run branch selection immediately preceding the asm printer. PM.add(createPPCBranchSelectionPass()); return false; } bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, JITCodeEmitter &JCE) { // FIXME: This should be moved to TargetJITInfo!! if (Subtarget.isPPC64()) diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index d06f084..03b27c6 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -42,7 +42,8 @@ class PPCTargetMachine : public LLVMTargetMachine { public: PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM, bool is64Bit); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool is64Bit); virtual const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const PPCFrameLowering *getFrameLowering() const { @@ -66,9 +67,9 @@ public: } // Pass Pipeline Configuration - virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, + virtual bool addInstSelector(PassManagerBase &PM); + virtual bool addPreEmitPass(PassManagerBase &PM); + virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE); virtual bool getEnableTailMergeDefault() const; }; @@ -79,7 +80,8 @@ class PPC32TargetMachine : public PPCTargetMachine { public: PPC32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); }; /// PPC64TargetMachine - PowerPC 64-bit target machine. @@ -88,7 +90,8 @@ class PPC64TargetMachine : public PPCTargetMachine { public: PPC64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); }; } // end namespace llvm diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp index cb2a7df..eda04c3 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp @@ -50,9 +50,10 @@ static MCSubtargetInfo *createSparcMCSubtargetInfo(StringRef TT, StringRef CPU, } static MCCodeGenInfo *createSparcMCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM) { + CodeModel::Model CM, + CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); - X->InitMCCodeGenInfo(RM, CM); + X->InitMCCodeGenInfo(RM, CM, OL); return X; } diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 3d7b4a4..7dff799 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -27,16 +27,16 @@ extern "C" void LLVMInitializeSparcTarget() { SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool is64bit) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), Subtarget(TT, CPU, FS, is64bit), DataLayout(Subtarget.getDataLayout()), TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget), FrameLowering(Subtarget) { } -bool SparcTargetMachine::addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool SparcTargetMachine::addInstSelector(PassManagerBase &PM) { PM.add(createSparcISelDag(*this)); return false; } @@ -44,8 +44,7 @@ bool SparcTargetMachine::addInstSelector(PassManagerBase &PM, /// addPreEmitPass - This pass may be implemented by targets that want to run /// passes immediately before machine code is emitted. This should return /// true if -print-machineinstrs should print out the code after the passes. -bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM, - CodeGenOpt::Level OptLevel){ +bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM){ PM.add(createSparcFPMoverPass(*this)); PM.add(createSparcDelaySlotFillerPass(*this)); return true; @@ -54,13 +53,15 @@ bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM, SparcV8TargetMachine::SparcV8TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, Reloc::Model RM, - CodeModel::Model CM) - : SparcTargetMachine(T, TT, CPU, FS, RM, CM, false) { + CodeModel::Model CM, + CodeGenOpt::Level OL) + : SparcTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) { } SparcV9TargetMachine::SparcV9TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, Reloc::Model RM, - CodeModel::Model CM) - : SparcTargetMachine(T, TT, CPU, FS, RM, CM, true) { + CodeModel::Model CM, + CodeGenOpt::Level OL) + : SparcTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) { } diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h index 3c907dd..63bfa5d 100644 --- a/lib/Target/Sparc/SparcTargetMachine.h +++ b/lib/Target/Sparc/SparcTargetMachine.h @@ -35,7 +35,8 @@ class SparcTargetMachine : public LLVMTargetMachine { public: SparcTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM, bool is64bit); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool is64bit); virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetFrameLowering *getFrameLowering() const { @@ -54,8 +55,8 @@ public: virtual const TargetData *getTargetData() const { return &DataLayout; } // Pass Pipeline Configuration - virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addInstSelector(PassManagerBase &PM); + virtual bool addPreEmitPass(PassManagerBase &PM); }; /// SparcV8TargetMachine - Sparc 32-bit target machine @@ -64,7 +65,8 @@ class SparcV8TargetMachine : public SparcTargetMachine { public: SparcV8TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); }; /// SparcV9TargetMachine - Sparc 64-bit target machine @@ -73,7 +75,8 @@ class SparcV9TargetMachine : public SparcTargetMachine { public: SparcV9TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); }; } // end namespace llvm diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp index 709dfd2..aa2e014 100644 --- a/lib/Target/TargetLibraryInfo.cpp +++ b/lib/Target/TargetLibraryInfo.cpp @@ -20,6 +20,19 @@ INITIALIZE_PASS(TargetLibraryInfo, "targetlibinfo", "Target Library Information", false, true) char TargetLibraryInfo::ID = 0; +const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = + { + "memset", + "memcpy", + "memmove", + "memset_pattern16", + "iprintf", + "siprintf", + "fiprintf", + "fwrite", + "fputs" + }; + /// initialize - Initialize the set of available library functions based on the /// specified target triple. This should be carefully written so that a missing /// target triple gets a sane set of defaults. @@ -38,6 +51,17 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T) { TLI.setUnavailable(LibFunc::memset_pattern16); } + if (T.isMacOSX() && T.getArch() == Triple::x86 && + !T.isMacOSXVersionLT(10, 7)) { + // x86-32 OSX has a scheme where fwrite and fputs (and some other functions + // we don't care about) have two versions; on recent OSX, the one we want + // has a $UNIX2003 suffix. The two implementations are identical except + // for the return value in some edge cases. However, we don't want to + // generate code that depends on the old symbols. + TLI.setAvailableWithName(LibFunc::fwrite, "fwrite$UNIX2003"); + TLI.setAvailableWithName(LibFunc::fputs, "fputs$UNIX2003"); + } + // iprintf and friends are only available on XCore and TCE. if (T.getArch() != Triple::xcore && T.getArch() != Triple::tce) { TLI.setUnavailable(LibFunc::iprintf); @@ -64,6 +88,7 @@ TargetLibraryInfo::TargetLibraryInfo(const Triple &T) : ImmutablePass(ID) { TargetLibraryInfo::TargetLibraryInfo(const TargetLibraryInfo &TLI) : ImmutablePass(ID) { memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray)); + CustomNames = TLI.CustomNames; } diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index daac924..805e16e 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -226,6 +226,14 @@ CodeModel::Model TargetMachine::getCodeModel() const { return CodeGenInfo->getCodeModel(); } +/// getOptLevel - Returns the optimization level: None, Less, +/// Default, or Aggressive. +CodeGenOpt::Level TargetMachine::getOptLevel() const { + if (!CodeGenInfo) + return CodeGenOpt::Default; + return CodeGenInfo->getOptLevel(); +} + bool TargetMachine::getAsmVerbosityDefault() { return AsmVerbosityDefault; } diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index 8d85b95..6e87efa 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -34,6 +34,12 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, switch (MI->getOpcode()) { case X86::INSERTPSrr: + Src1Name = getRegName(MI->getOperand(0).getReg()); + Src2Name = getRegName(MI->getOperand(2).getReg()); + DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask); + break; + case X86::VINSERTPSrr: + DestName = getRegName(MI->getOperand(0).getReg()); Src1Name = getRegName(MI->getOperand(1).getReg()); Src2Name = getRegName(MI->getOperand(2).getReg()); DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask); @@ -44,34 +50,52 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src1Name = getRegName(MI->getOperand(0).getReg()); DecodeMOVLHPSMask(2, ShuffleMask); break; + case X86::VMOVLHPSrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeMOVLHPSMask(2, ShuffleMask); + break; case X86::MOVHLPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); Src1Name = getRegName(MI->getOperand(0).getReg()); DecodeMOVHLPSMask(2, ShuffleMask); break; + case X86::VMOVHLPSrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeMOVHLPSMask(2, ShuffleMask); + break; case X86::PSHUFDri: + case X86::VPSHUFDri: Src1Name = getRegName(MI->getOperand(1).getReg()); // FALL THROUGH. case X86::PSHUFDmi: + case X86::VPSHUFDmi: DestName = getRegName(MI->getOperand(0).getReg()); DecodePSHUFMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(), ShuffleMask); break; case X86::PSHUFHWri: + case X86::VPSHUFHWri: Src1Name = getRegName(MI->getOperand(1).getReg()); // FALL THROUGH. case X86::PSHUFHWmi: + case X86::VPSHUFHWmi: DestName = getRegName(MI->getOperand(0).getReg()); DecodePSHUFHWMask(MI->getOperand(MI->getNumOperands()-1).getImm(), ShuffleMask); break; case X86::PSHUFLWri: + case X86::VPSHUFLWri: Src1Name = getRegName(MI->getOperand(1).getReg()); // FALL THROUGH. case X86::PSHUFLWmi: + case X86::VPSHUFLWmi: DestName = getRegName(MI->getOperand(0).getReg()); DecodePSHUFLWMask(MI->getOperand(MI->getNumOperands()-1).getImm(), ShuffleMask); @@ -142,6 +166,14 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; + case X86::VSHUFPDrri: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VSHUFPDrmi: + DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + break; case X86::SHUFPSrri: Src2Name = getRegName(MI->getOperand(2).getReg()); @@ -150,63 +182,107 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; + case X86::VSHUFPSrri: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VSHUFPSrmi: + DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + break; case X86::UNPCKLPDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::UNPCKLPDrm: - DecodeUNPCKLPDMask(2, ShuffleMask); + DecodeUNPCKLPMask(MVT::v2f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; case X86::VUNPCKLPDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKLPDrm: - DecodeUNPCKLPDMask(2, ShuffleMask); + DecodeUNPCKLPMask(MVT::v2f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::VUNPCKLPDYrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKLPDYrm: - DecodeUNPCKLPDMask(4, ShuffleMask); + DecodeUNPCKLPMask(MVT::v4f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::UNPCKLPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::UNPCKLPSrm: - DecodeUNPCKLPSMask(4, ShuffleMask); + DecodeUNPCKLPMask(MVT::v4f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; case X86::VUNPCKLPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKLPSrm: - DecodeUNPCKLPSMask(4, ShuffleMask); + DecodeUNPCKLPMask(MVT::v4f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::VUNPCKLPSYrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKLPSYrm: - DecodeUNPCKLPSMask(8, ShuffleMask); + DecodeUNPCKLPMask(MVT::v8f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::UNPCKHPDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::UNPCKHPDrm: - DecodeUNPCKHPMask(2, ShuffleMask); + DecodeUNPCKHPMask(MVT::v2f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; + case X86::VUNPCKHPDrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VUNPCKHPDrm: + DecodeUNPCKHPMask(MVT::v2f64, ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + break; + case X86::VUNPCKHPDYrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VUNPCKHPDYrm: + DecodeUNPCKLPMask(MVT::v4f64, ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + break; case X86::UNPCKHPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::UNPCKHPSrm: - DecodeUNPCKHPMask(4, ShuffleMask); + DecodeUNPCKHPMask(MVT::v4f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; + case X86::VUNPCKHPSrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VUNPCKHPSrm: + DecodeUNPCKHPMask(MVT::v4f32, ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + break; + case X86::VUNPCKHPSYrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VUNPCKHPSYrm: + DecodeUNPCKHPMask(MVT::v8f32, ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + break; case X86::VPERMILPSri: DecodeVPERMILPSMask(4, MI->getOperand(2).getImm(), ShuffleMask); diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index 03c3948..a843515 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -385,7 +385,8 @@ static MCAsmInfo *createX86MCAsmInfo(const Target &T, StringRef TT) { } static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM) { + CodeModel::Model CM, + CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); Triple T(TT); @@ -429,7 +430,7 @@ static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM, // 64-bit JIT places everything in the same buffer except external funcs. CM = is64Bit ? CodeModel::Large : CodeModel::Small; - X->InitMCCodeGenInfo(RM, CM); + X->InitMCCodeGenInfo(RM, CM, OL); return X; } diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index aeb3309..f6c9d7b 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -142,29 +142,29 @@ void DecodeSHUFPSMask(unsigned NElts, unsigned Imm, } } -void DecodeUNPCKHPMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { - for (unsigned i = 0; i != NElts/2; ++i) { - ShuffleMask.push_back(i+NElts/2); // Reads from dest - ShuffleMask.push_back(i+NElts+NElts/2); // Reads from src - } -} +void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { + unsigned NumElts = VT.getVectorNumElements(); -void DecodeUNPCKLPSMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { - DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask); -} + // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate + // independently on 128-bit lanes. + unsigned NumLanes = VT.getSizeInBits() / 128; + if (NumLanes == 0 ) NumLanes = 1; // Handle MMX + unsigned NumLaneElts = NumElts / NumLanes; -void DecodeUNPCKLPDMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { - DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask); + for (unsigned s = 0; s < NumLanes; ++s) { + unsigned Start = s * NumLaneElts + NumLaneElts/2; + unsigned End = s * NumLaneElts + NumLaneElts; + for (unsigned i = Start; i != End; ++i) { + ShuffleMask.push_back(i); // Reads from dest/src1 + ShuffleMask.push_back(i+NumElts); // Reads from src/src2 + } + } } /// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd /// etc. VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. -void DecodeUNPCKLPMask(EVT VT, - SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate @@ -173,16 +173,13 @@ void DecodeUNPCKLPMask(EVT VT, if (NumLanes == 0 ) NumLanes = 1; // Handle MMX unsigned NumLaneElts = NumElts / NumLanes; - unsigned Start = 0; - unsigned End = NumLaneElts / 2; for (unsigned s = 0; s < NumLanes; ++s) { + unsigned Start = s * NumLaneElts; + unsigned End = s * NumLaneElts + NumLaneElts/2; for (unsigned i = Start; i != End; ++i) { - ShuffleMask.push_back(i); // Reads from dest/src1 - ShuffleMask.push_back(i+NumLaneElts); // Reads from src/src2 + ShuffleMask.push_back(i); // Reads from dest/src1 + ShuffleMask.push_back(i+NumElts); // Reads from src/src2 } - // Process the next 128 bits. - Start += NumLaneElts; - End += NumLaneElts; } } diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index 58193e6..35f6530 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -67,20 +67,15 @@ void DecodePUNPCKHMask(unsigned NElts, void DecodeSHUFPSMask(unsigned NElts, unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask); -void DecodeUNPCKHPMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask); - -void DecodeUNPCKLPSMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask); - -void DecodeUNPCKLPDMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask); +/// DecodeUNPCKHPMask - This decodes the shuffle masks for unpckhps/unpckhpd +/// etc. VT indicates the type of the vector allowing it to handle different +/// datatypes and vector widths. +void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask); /// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd /// etc. VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. -void DecodeUNPCKLPMask(EVT VT, - SmallVectorImpl<unsigned> &ShuffleMask); +void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask); // DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 3d75de0..3c35763 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2216,6 +2216,75 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } break; } + case ISD::STORE: { + // The DEC64m tablegen pattern is currently not able to match the case where + // the EFLAGS on the original DEC are used. + // we'll need to improve tablegen to allow flags to be transferred from a + // node in the pattern to the result node. probably with a new keyword + // for example, we have this + // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", + // [(store (add (loadi64 addr:$dst), -1), addr:$dst), + // (implicit EFLAGS)]>; + // but maybe need something like this + // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", + // [(store (add (loadi64 addr:$dst), -1), addr:$dst), + // (transferrable EFLAGS)]>; + StoreSDNode *StoreNode = cast<StoreSDNode>(Node); + SDValue Chain = StoreNode->getOperand(0); + SDValue StoredVal = StoreNode->getOperand(1); + SDValue Address = StoreNode->getOperand(2); + SDValue Undef = StoreNode->getOperand(3); + + if (StoreNode->getMemOperand()->getSize() != 8 || + Undef->getOpcode() != ISD::UNDEF || + Chain->getOpcode() != ISD::LOAD || + StoredVal->getOpcode() != X86ISD::DEC || + StoredVal.getResNo() != 0 || + StoredVal->getOperand(0).getNode() != Chain.getNode()) + break; + + //OPC_CheckPredicate, 1, // Predicate_nontemporalstore + if (StoreNode->isNonTemporal()) + break; + + LoadSDNode *LoadNode = cast<LoadSDNode>(Chain.getNode()); + if (LoadNode->getOperand(1) != Address || + LoadNode->getOperand(2) != Undef) + break; + + if (!ISD::isNormalLoad(LoadNode)) + break; + + if (!ISD::isNormalStore(StoreNode)) + break; + + // check load chain has only one use (from the store) + if (!Chain.hasOneUse()) + break; + + // Merge the input chains if they are not intra-pattern references. + SDValue InputChain = LoadNode->getOperand(0); + + SDValue Base, Scale, Index, Disp, Segment; + if (!SelectAddr(LoadNode, LoadNode->getBasePtr(), + Base, Scale, Index, Disp, Segment)) + break; + + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2); + MemOp[0] = StoreNode->getMemOperand(); + MemOp[1] = LoadNode->getMemOperand(); + const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain }; + MachineSDNode *Result = CurDAG->getMachineNode(X86::DEC64m, + Node->getDebugLoc(), + MVT::i32, MVT::Other, Ops, + array_lengthof(Ops)); + Result->setMemRefs(MemOp, MemOp + 2); + + ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1)); + ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0)); + + return Result; + } } SDNode *ResNode = SelectCode(Node); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4e11131..96c6f41 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -35,7 +35,6 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -909,7 +908,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); } - if (Subtarget->hasSSE41() || Subtarget->hasAVX()) { + if (Subtarget->hasSSE41orAVX()) { setOperationAction(ISD::FFLOOR, MVT::f32, Legal); setOperationAction(ISD::FCEIL, MVT::f32, Legal); setOperationAction(ISD::FTRUNC, MVT::f32, Legal); @@ -981,7 +980,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } } - if (Subtarget->hasSSE42() || Subtarget->hasAVX()) + if (Subtarget->hasSSE42orAVX()) setOperationAction(ISD::SETCC, MVT::v2i64, Custom); if (!UseSoftFloat && Subtarget->hasAVX()) { @@ -2846,16 +2845,12 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::MOVSD: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: - case X86ISD::VUNPCKLPSY: - case X86ISD::VUNPCKLPDY: case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLBW: case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: case X86ISD::UNPCKHPS: case X86ISD::UNPCKHPD: - case X86ISD::VUNPCKHPSY: - case X86ISD::VUNPCKHPDY: case X86ISD::PUNPCKHWD: case X86ISD::PUNPCKHBW: case X86ISD::PUNPCKHDQ: @@ -2927,16 +2922,12 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::MOVSD: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: - case X86ISD::VUNPCKLPSY: - case X86ISD::VUNPCKLPDY: case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLBW: case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: case X86ISD::UNPCKHPS: case X86ISD::UNPCKHPD: - case X86ISD::VUNPCKHPSY: - case X86ISD::VUNPCKHPDY: case X86ISD::PUNPCKHWD: case X86ISD::PUNPCKHBW: case X86ISD::PUNPCKHDQ: @@ -3416,6 +3407,41 @@ static unsigned getShuffleVSHUFPDYImmediate(SDNode *N) { return Mask; } +/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming +/// the two vector operands have swapped position. +static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) { + unsigned NumElems = VT.getVectorNumElements(); + for (unsigned i = 0; i != NumElems; ++i) { + int idx = Mask[i]; + if (idx < 0) + continue; + else if (idx < (int)NumElems) + Mask[i] = idx + NumElems; + else + Mask[i] = idx - NumElems; + } +} + +/// isCommutedVSHUFP() - Return true if swapping operands will +/// allow to use the "vshufpd" or "vshufps" instruction +/// for 256-bit vectors +static bool isCommutedVSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT, + const X86Subtarget *Subtarget) { + + unsigned NumElems = VT.getVectorNumElements(); + if ((VT.getSizeInBits() != 256) || ((NumElems != 4) && (NumElems != 8))) + return false; + + SmallVector<int, 8> CommutedMask; + for (unsigned i = 0; i < NumElems; ++i) + CommutedMask.push_back(Mask[i]); + + CommuteVectorShuffleMask(CommutedMask, VT); + return (NumElems == 4) ? isVSHUFPDYMask(CommutedMask, VT, Subtarget): + isVSHUFPSYMask(CommutedMask, VT, Subtarget); +} + + /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to 128-bit /// SHUFPS and SHUFPD. @@ -3551,13 +3577,14 @@ bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) { /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT, - bool V2IsSplat = false) { + bool HasAVX2, bool V2IsSplat = false) { int NumElts = VT.getVectorNumElements(); assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for unpckh"); - if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8) + if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 && + (!HasAVX2 || (NumElts != 16 && NumElts != 32))) return false; // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate @@ -3591,22 +3618,23 @@ static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT, return true; } -bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) { +bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool HasAVX2, bool V2IsSplat) { SmallVector<int, 8> M; N->getMask(M); - return ::isUNPCKLMask(M, N->getValueType(0), V2IsSplat); + return ::isUNPCKLMask(M, N->getValueType(0), HasAVX2, V2IsSplat); } /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKH. static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT, - bool V2IsSplat = false) { + bool HasAVX2, bool V2IsSplat = false) { int NumElts = VT.getVectorNumElements(); assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for unpckh"); - if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8) + if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 && + (!HasAVX2 || (NumElts != 16 && NumElts != 32))) return false; // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate @@ -3638,10 +3666,10 @@ static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT, return true; } -bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat) { +bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool HasAVX2, bool V2IsSplat) { SmallVector<int, 8> M; N->getMask(M); - return ::isUNPCKHMask(M, N->getValueType(0), V2IsSplat); + return ::isUNPCKHMask(M, N->getValueType(0), HasAVX2, V2IsSplat); } /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form @@ -3953,7 +3981,7 @@ static bool isCommutedMOVL(ShuffleVectorSDNode *N, bool V2IsSplat = false, /// Masks to match: <1, 1, 3, 3> or <1, 1, 3, 3, 5, 5, 7, 7> bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N, const X86Subtarget *Subtarget) { - if (!Subtarget->hasSSE3() && !Subtarget->hasAVX()) + if (!Subtarget->hasSSE3orAVX()) return false; // The second vector must be undef @@ -3981,7 +4009,7 @@ bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N, /// Masks to match: <0, 0, 2, 2> or <0, 0, 2, 2, 4, 4, 6, 6> bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N, const X86Subtarget *Subtarget) { - if (!Subtarget->hasSSE3() && !Subtarget->hasAVX()) + if (!Subtarget->hasSSE3orAVX()) return false; // The second vector must be undef @@ -4216,21 +4244,6 @@ static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp, SVOp->getOperand(0), &MaskVec[0]); } -/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming -/// the two vector operands have swapped position. -static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) { - unsigned NumElems = VT.getVectorNumElements(); - for (unsigned i = 0; i != NumElems; ++i) { - int idx = Mask[i]; - if (idx < 0) - continue; - else if (idx < (int)NumElems) - Mask[i] = idx + NumElems; - else - Mask[i] = idx - NumElems; - } -} - /// ShouldXformToMOVHLPS - Return true if the node should be transformed to /// match movhlps. The lower half elements should come from upper half of /// V1 (and in order), and the upper half elements should come from the upper @@ -4388,23 +4401,30 @@ static SDValue getZeroVector(EVT VT, bool HasXMMInt, SelectionDAG &DAG, } /// getOnesVector - Returns a vector of specified type with all bits set. -/// Always build ones vectors as <4 x i32>. For 256-bit types, use two -/// <4 x i32> inserted in a <8 x i32> appropriately. Then bitcast to their -/// original type, ensuring they get CSE'd. -static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { +/// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with +/// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately. +/// Then bitcast to their original type, ensuring they get CSE'd. +static SDValue getOnesVector(EVT VT, bool HasAVX2, SelectionDAG &DAG, + DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); assert((VT.is128BitVector() || VT.is256BitVector()) && "Expected a 128-bit or 256-bit vector type"); SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32); - SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - Cst, Cst, Cst, Cst); - - if (VT.is256BitVector()) { - SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, MVT::v8i32), - Vec, DAG.getConstant(0, MVT::i32), DAG, dl); - Vec = Insert128BitVector(InsV, Vec, - DAG.getConstant(4 /* NumElems/2 */, MVT::i32), DAG, dl); + SDValue Vec; + if (VT.getSizeInBits() == 256) { + if (HasAVX2) { // AVX2 + SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8); + } else { // AVX + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); + SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, MVT::v8i32), + Vec, DAG.getConstant(0, MVT::i32), DAG, dl); + Vec = Insert128BitVector(InsV, Vec, + DAG.getConstant(4 /* NumElems/2 */, MVT::i32), DAG, dl); + } + } else { + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); } return DAG.getNode(ISD::BITCAST, dl, VT, Vec); @@ -4623,9 +4643,7 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, break; case X86ISD::UNPCKHPS: case X86ISD::UNPCKHPD: - case X86ISD::VUNPCKHPSY: - case X86ISD::VUNPCKHPDY: - DecodeUNPCKHPMask(NumElems, ShuffleMask); + DecodeUNPCKHPMask(VT, ShuffleMask); break; case X86ISD::PUNPCKLBW: case X86ISD::PUNPCKLWD: @@ -4635,8 +4653,6 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, break; case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: - case X86ISD::VUNPCKLPSY: - case X86ISD::VUNPCKLPDY: DecodeUNPCKLPMask(VT, ShuffleMask); break; case X86ISD::MOVHLPS: @@ -5111,6 +5127,97 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts, return SDValue(); } +/// isVectorBroadcast - Check if the node chain is suitable to be xformed to +/// a vbroadcast node. We support two patterns: +/// 1. A splat BUILD_VECTOR which uses a single scalar load. +/// 2. A splat shuffle which uses a scalar_to_vector node which comes from +/// a scalar load. +/// The scalar load node is returned when a pattern is found, +/// or SDValue() otherwise. +static SDValue isVectorBroadcast(SDValue &Op, bool hasAVX2) { + EVT VT = Op.getValueType(); + SDValue V = Op; + + if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST) + V = V.getOperand(0); + + //A suspected load to be broadcasted. + SDValue Ld; + + switch (V.getOpcode()) { + default: + // Unknown pattern found. + return SDValue(); + + case ISD::BUILD_VECTOR: { + // The BUILD_VECTOR node must be a splat. + if (!isSplatVector(V.getNode())) + return SDValue(); + + Ld = V.getOperand(0); + + // The suspected load node has several users. Make sure that all + // of its users are from the BUILD_VECTOR node. + if (!Ld->hasNUsesOfValue(VT.getVectorNumElements(), 0)) + return SDValue(); + break; + } + + case ISD::VECTOR_SHUFFLE: { + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); + + // Shuffles must have a splat mask where the first element is + // broadcasted. + if ((!SVOp->isSplat()) || SVOp->getMaskElt(0) != 0) + return SDValue(); + + SDValue Sc = Op.getOperand(0); + if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR) + return SDValue(); + + Ld = Sc.getOperand(0); + + // The scalar_to_vector node and the suspected + // load node must have exactly one user. + if (!Sc.hasOneUse() || !Ld.hasOneUse()) + return SDValue(); + break; + } + } + + // The scalar source must be a normal load. + if (!ISD::isNormalLoad(Ld.getNode())) + return SDValue(); + + bool Is256 = VT.getSizeInBits() == 256; + bool Is128 = VT.getSizeInBits() == 128; + unsigned ScalarSize = Ld.getValueType().getSizeInBits(); + + if (hasAVX2) { + // VBroadcast to YMM + if (Is256 && (ScalarSize == 8 || ScalarSize == 16 || + ScalarSize == 32 || ScalarSize == 64 )) + return Ld; + + // VBroadcast to XMM + if (Is128 && (ScalarSize == 8 || ScalarSize == 32 || + ScalarSize == 16 || ScalarSize == 64 )) + return Ld; + } + + // VBroadcast to YMM + if (Is256 && (ScalarSize == 32 || ScalarSize == 64)) + return Ld; + + // VBroadcast to XMM + if (Is128 && (ScalarSize == 32)) + return Ld; + + + // Unsupported broadcast. + return SDValue(); +} + SDValue X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); @@ -5131,14 +5238,20 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { } // Vectors containing all ones can be matched by pcmpeqd on 128-bit width - // vectors or broken into v4i32 operations on 256-bit vectors. + // vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use + // vpcmpeqd on 256-bit vectors. if (ISD::isBuildVectorAllOnes(Op.getNode())) { - if (Op.getValueType() == MVT::v4i32) + if (Op.getValueType() == MVT::v4i32 || + (Op.getValueType() == MVT::v8i32 && Subtarget->hasAVX2())) return Op; - return getOnesVector(Op.getValueType(), DAG, dl); + return getOnesVector(Op.getValueType(), Subtarget->hasAVX2(), DAG, dl); } + SDValue LD = isVectorBroadcast(Op, Subtarget->hasAVX2()); + if (Subtarget->hasAVX() && LD.getNode()) + return DAG.getNode(X86ISD::VBROADCAST, dl, VT, LD); + unsigned EVTBits = ExtVT.getSizeInBits(); unsigned NumZero = 0; @@ -5380,7 +5493,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return LD; // For SSE 4.1, use insertps to put the high elements into the low element. - if (getSubtarget()->hasSSE41() || getSubtarget()->hasAVX()) { + if (getSubtarget()->hasSSE41orAVX()) { SDValue Result; if (Op.getOperand(0).getOpcode() != ISD::UNDEF) Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0)); @@ -5551,7 +5664,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, // quads, disable the next transformation since it does not help SSSE3. bool V1Used = InputQuads[0] || InputQuads[1]; bool V2Used = InputQuads[2] || InputQuads[3]; - if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) { + if (Subtarget->hasSSSE3orAVX()) { if (InputQuads.count() == 2 && V1Used && V2Used) { BestLoQuad = InputQuads.find_first(); BestHiQuad = InputQuads.find_next(BestLoQuad); @@ -5624,7 +5737,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, // If we have SSSE3, and all words of the result are from 1 input vector, // case 2 is generated, otherwise case 3 is generated. If no SSSE3 // is present, fall back to case 4. - if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) { + if (Subtarget->hasSSSE3orAVX()) { SmallVector<SDValue,16> pshufbMask; // If we have elements from both input vectors, set the high bit of the @@ -5692,8 +5805,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), &MaskV[0]); - if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && - (Subtarget->hasSSSE3() || Subtarget->hasAVX())) + if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3orAVX()) NewV = getTargetShuffleNode(X86ISD::PSHUFLW, dl, MVT::v8i16, NewV.getOperand(0), X86::getShufflePSHUFLWImmediate(NewV.getNode()), @@ -5721,8 +5833,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), &MaskV[0]); - if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && - (Subtarget->hasSSSE3() || Subtarget->hasAVX())) + if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3orAVX()) NewV = getTargetShuffleNode(X86ISD::PSHUFHW, dl, MVT::v8i16, NewV.getOperand(0), X86::getShufflePSHUFHWImmediate(NewV.getNode()), @@ -5788,7 +5899,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, } // If SSSE3, use 1 pshufb instruction per vector with elements in the result. - if (TLI.getSubtarget()->hasSSSE3() || TLI.getSubtarget()->hasAVX()) { + if (TLI.getSubtarget()->hasSSSE3orAVX()) { SmallVector<SDValue,16> pshufbMask; // If all result elements are from one input vector, then only translate @@ -6455,17 +6566,23 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) { X86::getShuffleSHUFImmediate(SVOp), DAG); } -static inline unsigned getUNPCKLOpcode(EVT VT) { +static inline unsigned getUNPCKLOpcode(EVT VT, bool HasAVX2) { switch(VT.getSimpleVT().SimpleTy) { case MVT::v4i32: return X86ISD::PUNPCKLDQ; case MVT::v2i64: return X86ISD::PUNPCKLQDQ; + case MVT::v8i32: + if (HasAVX2) return X86ISD::PUNPCKLDQ; + // else use fp unit for int unpack. + case MVT::v8f32: case MVT::v4f32: return X86ISD::UNPCKLPS; + case MVT::v4i64: + if (HasAVX2) return X86ISD::PUNPCKLQDQ; + // else use fp unit for int unpack. + case MVT::v4f64: case MVT::v2f64: return X86ISD::UNPCKLPD; - case MVT::v8i32: // Use fp unit for int unpack. - case MVT::v8f32: return X86ISD::VUNPCKLPSY; - case MVT::v4i64: // Use fp unit for int unpack. - case MVT::v4f64: return X86ISD::VUNPCKLPDY; + case MVT::v32i8: case MVT::v16i8: return X86ISD::PUNPCKLBW; + case MVT::v16i16: case MVT::v8i16: return X86ISD::PUNPCKLWD; default: llvm_unreachable("Unknown type for unpckl"); @@ -6473,17 +6590,23 @@ static inline unsigned getUNPCKLOpcode(EVT VT) { return 0; } -static inline unsigned getUNPCKHOpcode(EVT VT) { +static inline unsigned getUNPCKHOpcode(EVT VT, bool HasAVX2) { switch(VT.getSimpleVT().SimpleTy) { case MVT::v4i32: return X86ISD::PUNPCKHDQ; case MVT::v2i64: return X86ISD::PUNPCKHQDQ; + case MVT::v8i32: + if (HasAVX2) return X86ISD::PUNPCKHDQ; + // else use fp unit for int unpack. + case MVT::v8f32: case MVT::v4f32: return X86ISD::UNPCKHPS; + case MVT::v4i64: + if (HasAVX2) return X86ISD::PUNPCKHQDQ; + // else use fp unit for int unpack. + case MVT::v4f64: case MVT::v2f64: return X86ISD::UNPCKHPD; - case MVT::v8i32: // Use fp unit for int unpack. - case MVT::v8f32: return X86ISD::VUNPCKHPSY; - case MVT::v4i64: // Use fp unit for int unpack. - case MVT::v4f64: return X86ISD::VUNPCKHPDY; + case MVT::v32i8: case MVT::v16i8: return X86ISD::PUNPCKHBW; + case MVT::v16i16: case MVT::v8i16: return X86ISD::PUNPCKHWD; default: llvm_unreachable("Unknown type for unpckh"); @@ -6507,52 +6630,6 @@ static inline unsigned getVPERMILOpcode(EVT VT) { return 0; } -/// isVectorBroadcast - Check if the node chain is suitable to be xformed to -/// a vbroadcast node. The nodes are suitable whenever we can fold a load coming -/// from a 32 or 64 bit scalar. Update Op to the desired load to be folded. -static bool isVectorBroadcast(SDValue &Op) { - EVT VT = Op.getValueType(); - bool Is256 = VT.getSizeInBits() == 256; - - assert((VT.getSizeInBits() == 128 || Is256) && - "Unsupported type for vbroadcast node"); - - SDValue V = Op; - if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST) - V = V.getOperand(0); - - if (Is256 && !(V.hasOneUse() && - V.getOpcode() == ISD::INSERT_SUBVECTOR && - V.getOperand(0).getOpcode() == ISD::UNDEF)) - return false; - - if (Is256) - V = V.getOperand(1); - - if (!V.hasOneUse()) - return false; - - // Check the source scalar_to_vector type. 256-bit broadcasts are - // supported for 32/64-bit sizes, while 128-bit ones are only supported - // for 32-bit scalars. - if (V.getOpcode() != ISD::SCALAR_TO_VECTOR) - return false; - - unsigned ScalarSize = V.getOperand(0).getValueType().getSizeInBits(); - if (ScalarSize != 32 && ScalarSize != 64) - return false; - if (!Is256 && ScalarSize == 64) - return false; - - V = V.getOperand(0); - if (!MayFoldLoad(V)) - return false; - - // Return the load node - Op = V; - return true; -} - static SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI, @@ -6578,8 +6655,9 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, return Op; // Use vbroadcast whenever the splat comes from a foldable load - if (Subtarget->hasAVX() && isVectorBroadcast(V1)) - return DAG.getNode(X86ISD::VBROADCAST, dl, VT, V1); + SDValue LD = isVectorBroadcast(Op, Subtarget->hasAVX2()); + if (Subtarget->hasAVX() && LD.getNode()) + return DAG.getNode(X86ISD::VBROADCAST, dl, VT, LD); // Handle splats by matching through known shuffle masks if ((Size == 128 && NumElem <= 4) || @@ -6630,6 +6708,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { bool V1IsSplat = false; bool V2IsSplat = false; bool HasXMMInt = Subtarget->hasXMMInt(); + bool HasAVX2 = Subtarget->hasAVX2(); MachineFunction &MF = DAG.getMachineFunction(); bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); @@ -6659,12 +6738,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and // unpckh_undef). Only use pshufd if speed is more important than size. if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp)) - return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1, + DAG); if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp)) - return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1, + DAG); - if (X86::isMOVDDUPMask(SVOp) && - (Subtarget->hasSSE3() || Subtarget->hasAVX()) && + if (X86::isMOVDDUPMask(SVOp) && Subtarget->hasSSE3orAVX() && V2IsUndef && RelaxedMayFoldVectorLoad(V1)) return getMOVDDup(Op, dl, V1, DAG); @@ -6672,9 +6752,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getMOVHighToLow(Op, dl, DAG); // Use to match splats - if (HasXMMInt && X86::isUNPCKHMask(SVOp) && V2IsUndef && + if (HasXMMInt && X86::isUNPCKHMask(SVOp, HasAVX2) && V2IsUndef && (VT == MVT::v2f64 || VT == MVT::v2i64)) - return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1, + DAG); if (X86::isPSHUFDMask(SVOp)) { // The actual implementation will match the mask in the if above and then @@ -6696,8 +6777,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { bool isLeft = false; unsigned ShAmt = 0; SDValue ShVal; - bool isShift = getSubtarget()->hasXMMInt() && - isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt); + bool isShift = HasXMMInt && isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt); if (isShift && ShVal.hasOneUse()) { // If the shifted value has multiple uses, it may be cheaper to use // v_set0 + movlhps or movhlps, etc. @@ -6721,7 +6801,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { } // FIXME: fold these into legal mask. - if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp)) + if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp, HasAVX2)) return getMOVLowToHigh(Op, dl, DAG, HasXMMInt); if (X86::isMOVHLPSMask(SVOp)) @@ -6774,11 +6854,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getMOVL(DAG, dl, VT, V2, V1); } - if (X86::isUNPCKLMask(SVOp)) - return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG); + if (X86::isUNPCKLMask(SVOp, HasAVX2)) + return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V2, + DAG); - if (X86::isUNPCKHMask(SVOp)) - return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG); + if (X86::isUNPCKHMask(SVOp, HasAVX2)) + return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V2, + DAG); if (V2IsSplat) { // Normalize mask so all entries that point to V2 points to its first @@ -6787,9 +6869,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { SDValue NewMask = NormalizeMask(SVOp, DAG); ShuffleVectorSDNode *NSVOp = cast<ShuffleVectorSDNode>(NewMask); if (NSVOp != SVOp) { - if (X86::isUNPCKLMask(NSVOp, true)) { + if (X86::isUNPCKLMask(NSVOp, HasAVX2, true)) { return NewMask; - } else if (X86::isUNPCKHMask(NSVOp, true)) { + } else if (X86::isUNPCKHMask(NSVOp, HasAVX2, true)) { return NewMask; } } @@ -6801,11 +6883,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { SDValue NewOp = CommuteVectorShuffle(SVOp, DAG); ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp); - if (X86::isUNPCKLMask(NewSVOp)) - return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG); + if (X86::isUNPCKLMask(NewSVOp, HasAVX2)) + return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V2, V1, + DAG); - if (X86::isUNPCKHMask(NewSVOp)) - return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG); + if (X86::isUNPCKHMask(NewSVOp, HasAVX2)) + return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V2, V1, + DAG); } // Normalize the node to match x86 shuffle ops if needed @@ -6818,7 +6902,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { SmallVector<int, 16> M; SVOp->getMask(M); - if (isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX())) + if (isPALIGNRMask(M, VT, Subtarget->hasSSSE3orAVX())) return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2, X86::getShufflePALIGNRImmediate(SVOp), DAG); @@ -6846,9 +6930,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { X86::getShuffleSHUFImmediate(SVOp), DAG); if (X86::isUNPCKL_v_undef_Mask(SVOp)) - return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1, + DAG); if (X86::isUNPCKH_v_undef_Mask(SVOp)) - return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1, + DAG); //===--------------------------------------------------------------------===// // Generate target specific nodes for 128 or 256-bit shuffles only @@ -6884,6 +6970,17 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2, getShuffleVSHUFPDYImmediate(SVOp), DAG); + // Try to swap operands in the node to match x86 shuffle ops + if (isCommutedVSHUFPMask(M, VT, Subtarget)) { + // Now we need to commute operands. + SVOp = cast<ShuffleVectorSDNode>(CommuteVectorShuffle(SVOp, DAG)); + V1 = SVOp->getOperand(0); + V2 = SVOp->getOperand(1); + unsigned Immediate = (NumElems == 4) ? getShuffleVSHUFPDYImmediate(SVOp): + getShuffleVSHUFPSYImmediate(SVOp); + return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2, Immediate, DAG); + } + //===--------------------------------------------------------------------===// // Since no target specific shuffle was selected for this generic one, // lower it into other known shuffles. FIXME: this isn't true yet, but @@ -7002,7 +7099,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, assert(Vec.getValueSizeInBits() <= 128 && "Unexpected vector length"); - if (Subtarget->hasSSE41() || Subtarget->hasAVX()) { + if (Subtarget->hasSSE41orAVX()) { SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG); if (Res.getNode()) return Res; @@ -7144,7 +7241,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { return Insert128BitVector(N0, V, Ins128Idx, DAG, dl); } - if (Subtarget->hasSSE41() || Subtarget->hasAVX()) + if (Subtarget->hasSSE41orAVX()) return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG); if (EltVT == MVT::i8) @@ -8264,8 +8361,10 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, // climbing the DAG back to the root, and it doesn't seem to be worth the // effort. for (SDNode::use_iterator UI = Op.getNode()->use_begin(), - UE = Op.getNode()->use_end(); UI != UE; ++UI) - if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC) + UE = Op.getNode()->use_end(); UI != UE; ++UI) + if (UI->getOpcode() != ISD::CopyToReg && + UI->getOpcode() != ISD::SETCC && + UI->getOpcode() != ISD::STORE) goto default_case; if (ConstantSDNode *C = @@ -8408,11 +8507,19 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, } } else if (Op1.getOpcode() == ISD::Constant) { ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op1); + uint64_t AndRHSVal = AndRHS->getZExtValue(); SDValue AndLHS = Op0; - if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) { + + if (AndRHSVal == 1 && AndLHS.getOpcode() == ISD::SRL) { LHS = AndLHS.getOperand(0); RHS = AndLHS.getOperand(1); } + + // Use BT if the immediate can't be encoded in a TEST instruction. + if (!isUInt<32>(AndRHSVal) && isPowerOf2_64(AndRHSVal)) { + LHS = AndLHS; + RHS = DAG.getConstant(Log2_64_Ceil(AndRHSVal), LHS.getValueType()); + } } if (LHS.getNode()) { @@ -8632,9 +8739,9 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { // Check that the operation in question is available (most are plain SSE2, // but PCMPGTQ and PCMPEQQ have different requirements). - if (Opc == X86ISD::PCMPGTQ && !Subtarget->hasSSE42() && !Subtarget->hasAVX()) + if (Opc == X86ISD::PCMPGTQ && !Subtarget->hasSSE42orAVX()) return SDValue(); - if (Opc == X86ISD::PCMPEQQ && !Subtarget->hasSSE41() && !Subtarget->hasAVX()) + if (Opc == X86ISD::PCMPEQQ && !Subtarget->hasSSE41orAVX()) return SDValue(); // Since SSE has no unsigned integer comparisons, we need to flip the sign @@ -9464,6 +9571,23 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const case Intrinsic::x86_avx_hsub_pd_256: return DAG.getNode(X86ISD::FHSUB, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_avx2_psllv_d: + case Intrinsic::x86_avx2_psllv_q: + case Intrinsic::x86_avx2_psllv_d_256: + case Intrinsic::x86_avx2_psllv_q_256: + return DAG.getNode(ISD::SHL, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_avx2_psrlv_d: + case Intrinsic::x86_avx2_psrlv_q: + case Intrinsic::x86_avx2_psrlv_d_256: + case Intrinsic::x86_avx2_psrlv_q_256: + return DAG.getNode(ISD::SRL, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_avx2_psrav_d: + case Intrinsic::x86_avx2_psrav_d_256: + return DAG.getNode(ISD::SRA, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + // ptest and testp intrinsics. The intrinsic these come from are designed to // return an integer value, not just an instruction so lower it to the ptest // or testp pattern and a setcc for the result. @@ -10261,47 +10385,48 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { return Res; } - if (Subtarget->hasAVX2()) { - if (VT == MVT::v4i64 && Op.getOpcode() == ISD::SHL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_pslli_q, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); - - if (VT == MVT::v8i32 && Op.getOpcode() == ISD::SHL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_pslli_d, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); - - if (VT == MVT::v16i16 && Op.getOpcode() == ISD::SHL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_pslli_w, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); - - if (VT == MVT::v4i64 && Op.getOpcode() == ISD::SRL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_psrli_q, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); - - if (VT == MVT::v8i32 && Op.getOpcode() == ISD::SRL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_psrli_d, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); - - if (VT == MVT::v16i16 && Op.getOpcode() == ISD::SRL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_psrli_w, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); - - if (VT == MVT::v8i32 && Op.getOpcode() == ISD::SRA) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_psrai_d, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); - - if (VT == MVT::v16i16 && Op.getOpcode() == ISD::SRA) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_psrai_w, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); + if (Subtarget->hasAVX2() && VT == MVT::v32i8) { + if (Op.getOpcode() == ISD::SHL) { + // Make a large shift. + SDValue SHL = + DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_pslli_w, MVT::i32), + R, DAG.getConstant(ShiftAmt, MVT::i32)); + // Zero out the rightmost bits. + SmallVector<SDValue, 32> V(32, DAG.getConstant(uint8_t(-1U << ShiftAmt), + MVT::i8)); + return DAG.getNode(ISD::AND, dl, VT, SHL, + DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32)); + } + if (Op.getOpcode() == ISD::SRL) { + // Make a large shift. + SDValue SRL = + DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_psrli_w, MVT::i32), + R, DAG.getConstant(ShiftAmt, MVT::i32)); + // Zero out the leftmost bits. + SmallVector<SDValue, 32> V(32, DAG.getConstant(uint8_t(-1U) >> ShiftAmt, + MVT::i8)); + return DAG.getNode(ISD::AND, dl, VT, SRL, + DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32)); + } + if (Op.getOpcode() == ISD::SRA) { + if (ShiftAmt == 7) { + // R s>> 7 === R s< 0 + SDValue Zeros = getZeroVector(VT, true /* HasXMMInt */, DAG, dl); + return DAG.getNode(X86ISD::PCMPGTB, dl, VT, Zeros, R); + } + + // R s>> a === ((R u>> a) ^ m) - m + SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt); + SmallVector<SDValue, 32> V(32, DAG.getConstant(128 >> ShiftAmt, + MVT::i8)); + SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32); + Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask); + Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask); + return Res; } + } } } @@ -10493,9 +10618,9 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const{ DebugLoc dl = Op.getDebugLoc(); - SDNode* Node = Op.getNode(); - EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); - EVT VT = Node->getValueType(0); + EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + EVT VT = Op.getValueType(); + if (Subtarget->hasXMMInt() && VT.isVector()) { unsigned BitsDiff = VT.getScalarType().getSizeInBits() - ExtraVT.getScalarType().getSizeInBits(); @@ -10506,21 +10631,55 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) switch (VT.getSimpleVT().SimpleTy) { default: return SDValue(); - case MVT::v4i32: { + case MVT::v4i32: SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_d; SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_d; break; - } - case MVT::v8i16: { + case MVT::v8i16: SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_w; SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_w; break; - } + case MVT::v8i32: + case MVT::v16i16: + if (!Subtarget->hasAVX()) + return SDValue(); + if (!Subtarget->hasAVX2()) { + // needs to be split + int NumElems = VT.getVectorNumElements(); + SDValue Idx0 = DAG.getConstant(0, MVT::i32); + SDValue Idx1 = DAG.getConstant(NumElems/2, MVT::i32); + + // Extract the LHS vectors + SDValue LHS = Op.getOperand(0); + SDValue LHS1 = Extract128BitVector(LHS, Idx0, DAG, dl); + SDValue LHS2 = Extract128BitVector(LHS, Idx1, DAG, dl); + + MVT EltVT = VT.getVectorElementType().getSimpleVT(); + EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); + + EVT ExtraEltVT = ExtraVT.getVectorElementType(); + int ExtraNumElems = ExtraVT.getVectorNumElements(); + ExtraVT = EVT::getVectorVT(*DAG.getContext(), ExtraEltVT, + ExtraNumElems/2); + SDValue Extra = DAG.getValueType(ExtraVT); + + LHS1 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, Extra); + LHS2 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, Extra); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, LHS1, LHS2);; + } + if (VT == MVT::v8i32) { + SHLIntrinsicsID = Intrinsic::x86_avx2_pslli_d; + SRAIntrinsicsID = Intrinsic::x86_avx2_psrai_d; + } else { + SHLIntrinsicsID = Intrinsic::x86_avx2_pslli_w; + SRAIntrinsicsID = Intrinsic::x86_avx2_psrai_w; + } } SDValue Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(SHLIntrinsicsID, MVT::i32), - Node->getOperand(0), ShAmt); + Op.getOperand(0), ShAmt); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(SRAIntrinsicsID, MVT::i32), @@ -11033,9 +11192,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PINSRW: return "X86ISD::PINSRW"; case X86ISD::PSHUFB: return "X86ISD::PSHUFB"; case X86ISD::ANDNP: return "X86ISD::ANDNP"; - case X86ISD::PSIGNB: return "X86ISD::PSIGNB"; - case X86ISD::PSIGNW: return "X86ISD::PSIGNW"; - case X86ISD::PSIGND: return "X86ISD::PSIGND"; + case X86ISD::PSIGN: return "X86ISD::PSIGN"; case X86ISD::BLENDV: return "X86ISD::BLENDV"; case X86ISD::FHADD: return "X86ISD::FHADD"; case X86ISD::FHSUB: return "X86ISD::FHSUB"; @@ -11111,7 +11268,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::MOVSS: return "X86ISD::MOVSS"; case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS"; case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD"; - case X86ISD::VUNPCKLPDY: return "X86ISD::VUNPCKLPDY"; case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS"; case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD"; case X86ISD::PUNPCKLBW: return "X86ISD::PUNPCKLBW"; @@ -11235,7 +11391,7 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const { // Very little shuffling can be done for 64-bit vectors right now. if (VT.getSizeInBits() == 64) - return isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX()); + return isPALIGNRMask(M, VT, Subtarget->hasSSSE3orAVX()); // FIXME: pshufb, blends, shifts. return (VT.getVectorNumElements() == 2 || @@ -11245,9 +11401,9 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, isPSHUFDMask(M, VT) || isPSHUFHWMask(M, VT) || isPSHUFLWMask(M, VT) || - isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX()) || - isUNPCKLMask(M, VT) || - isUNPCKHMask(M, VT) || + isPALIGNRMask(M, VT, Subtarget->hasSSSE3orAVX()) || + isUNPCKLMask(M, VT, Subtarget->hasAVX2()) || + isUNPCKHMask(M, VT, Subtarget->hasAVX2()) || isUNPCKL_v_undef_Mask(M, VT) || isUNPCKH_v_undef_Mask(M, VT)); } @@ -11654,7 +11810,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, MachineBasicBlock * X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, unsigned numArgs, bool memArg) const { - assert((Subtarget->hasSSE42() || Subtarget->hasAVX()) && + assert(Subtarget->hasSSE42orAVX() && "Target must have SSE4.2 or AVX features enabled"); DebugLoc dl = MI->getDebugLoc(); @@ -13808,98 +13964,98 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, return R; EVT VT = N->getValueType(0); - if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64 && VT != MVT::v2i64) - return SDValue(); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); // look for psign/blend - if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) { - if (VT == MVT::v2i64) { - // Canonicalize pandn to RHS - if (N0.getOpcode() == X86ISD::ANDNP) - std::swap(N0, N1); - // or (and (m, x), (pandn m, y)) - if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::ANDNP) { - SDValue Mask = N1.getOperand(0); - SDValue X = N1.getOperand(1); - SDValue Y; - if (N0.getOperand(0) == Mask) - Y = N0.getOperand(1); - if (N0.getOperand(1) == Mask) - Y = N0.getOperand(0); - - // Check to see if the mask appeared in both the AND and ANDNP and - if (!Y.getNode()) - return SDValue(); + if (VT == MVT::v2i64 || VT == MVT::v4i64) { + if (!Subtarget->hasSSSE3orAVX() || + (VT == MVT::v4i64 && !Subtarget->hasAVX2())) + return SDValue(); - // Validate that X, Y, and Mask are BIT_CONVERTS, and see through them. - if (Mask.getOpcode() != ISD::BITCAST || - X.getOpcode() != ISD::BITCAST || - Y.getOpcode() != ISD::BITCAST) - return SDValue(); + // Canonicalize pandn to RHS + if (N0.getOpcode() == X86ISD::ANDNP) + std::swap(N0, N1); + // or (and (m, x), (pandn m, y)) + if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::ANDNP) { + SDValue Mask = N1.getOperand(0); + SDValue X = N1.getOperand(1); + SDValue Y; + if (N0.getOperand(0) == Mask) + Y = N0.getOperand(1); + if (N0.getOperand(1) == Mask) + Y = N0.getOperand(0); + + // Check to see if the mask appeared in both the AND and ANDNP and + if (!Y.getNode()) + return SDValue(); - // Look through mask bitcast. - Mask = Mask.getOperand(0); - EVT MaskVT = Mask.getValueType(); + // Validate that X, Y, and Mask are BIT_CONVERTS, and see through them. + if (Mask.getOpcode() != ISD::BITCAST || + X.getOpcode() != ISD::BITCAST || + Y.getOpcode() != ISD::BITCAST) + return SDValue(); - // Validate that the Mask operand is a vector sra node. The sra node - // will be an intrinsic. - if (Mask.getOpcode() != ISD::INTRINSIC_WO_CHAIN) - return SDValue(); + // Look through mask bitcast. + Mask = Mask.getOperand(0); + EVT MaskVT = Mask.getValueType(); - // FIXME: what to do for bytes, since there is a psignb/pblendvb, but - // there is no psrai.b - switch (cast<ConstantSDNode>(Mask.getOperand(0))->getZExtValue()) { - case Intrinsic::x86_sse2_psrai_w: - case Intrinsic::x86_sse2_psrai_d: - break; - default: return SDValue(); - } + // Validate that the Mask operand is a vector sra node. The sra node + // will be an intrinsic. + if (Mask.getOpcode() != ISD::INTRINSIC_WO_CHAIN) + return SDValue(); - // Check that the SRA is all signbits. - SDValue SraC = Mask.getOperand(2); - unsigned SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue(); - unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits(); - if ((SraAmt + 1) != EltBits) - return SDValue(); + // FIXME: what to do for bytes, since there is a psignb/pblendvb, but + // there is no psrai.b + switch (cast<ConstantSDNode>(Mask.getOperand(0))->getZExtValue()) { + case Intrinsic::x86_sse2_psrai_w: + case Intrinsic::x86_sse2_psrai_d: + case Intrinsic::x86_avx2_psrai_w: + case Intrinsic::x86_avx2_psrai_d: + break; + default: return SDValue(); + } - DebugLoc DL = N->getDebugLoc(); - - // Now we know we at least have a plendvb with the mask val. See if - // we can form a psignb/w/d. - // psign = x.type == y.type == mask.type && y = sub(0, x); - X = X.getOperand(0); - Y = Y.getOperand(0); - if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X && - ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) && - X.getValueType() == MaskVT && X.getValueType() == Y.getValueType()){ - unsigned Opc = 0; - switch (EltBits) { - case 8: Opc = X86ISD::PSIGNB; break; - case 16: Opc = X86ISD::PSIGNW; break; - case 32: Opc = X86ISD::PSIGND; break; - default: break; - } - if (Opc) { - SDValue Sign = DAG.getNode(Opc, DL, MaskVT, X, Mask.getOperand(1)); - return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Sign); - } - } - // PBLENDVB only available on SSE 4.1 - if (!(Subtarget->hasSSE41() || Subtarget->hasAVX())) - return SDValue(); + // Check that the SRA is all signbits. + SDValue SraC = Mask.getOperand(2); + unsigned SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue(); + unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits(); + if ((SraAmt + 1) != EltBits) + return SDValue(); - X = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, X); - Y = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Y); - Mask = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Mask); - Mask = DAG.getNode(ISD::VSELECT, DL, MVT::v16i8, Mask, X, Y); - return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Mask); + DebugLoc DL = N->getDebugLoc(); + + // Now we know we at least have a plendvb with the mask val. See if + // we can form a psignb/w/d. + // psign = x.type == y.type == mask.type && y = sub(0, x); + X = X.getOperand(0); + Y = Y.getOperand(0); + if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X && + ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) && + X.getValueType() == MaskVT && X.getValueType() == Y.getValueType() && + (EltBits == 8 || EltBits == 16 || EltBits == 32)) { + SDValue Sign = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, + Mask.getOperand(1)); + return DAG.getNode(ISD::BITCAST, DL, VT, Sign); } + // PBLENDVB only available on SSE 4.1 + if (!Subtarget->hasSSE41orAVX()) + return SDValue(); + + EVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8; + + X = DAG.getNode(ISD::BITCAST, DL, BlendVT, X); + Y = DAG.getNode(ISD::BITCAST, DL, BlendVT, Y); + Mask = DAG.getNode(ISD::BITCAST, DL, BlendVT, Mask); + Mask = DAG.getNode(ISD::VSELECT, DL, BlendVT, Mask, X, Y); + return DAG.getNode(ISD::BITCAST, DL, VT, Mask); } } + if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64) + return SDValue(); + // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c) if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) std::swap(N0, N1); @@ -14409,8 +14565,7 @@ static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG, SDValue RHS = N->getOperand(1); // Try to synthesize horizontal adds from adds of shuffles. - if ((Subtarget->hasSSE3() || Subtarget->hasAVX()) && - (VT == MVT::v4f32 || VT == MVT::v2f64) && + if (Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64) && isHorizontalBinOp(LHS, RHS, true)) return DAG.getNode(X86ISD::FHADD, N->getDebugLoc(), VT, LHS, RHS); return SDValue(); @@ -14424,8 +14579,7 @@ static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG, SDValue RHS = N->getOperand(1); // Try to synthesize horizontal subs from subs of shuffles. - if ((Subtarget->hasSSE3() || Subtarget->hasAVX()) && - (VT == MVT::v4f32 || VT == MVT::v2f64) && + if (Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64) && isHorizontalBinOp(LHS, RHS, false)) return DAG.getNode(X86ISD::FHSUB, N->getDebugLoc(), VT, LHS, RHS); return SDValue(); @@ -14621,7 +14775,23 @@ static SDValue OptimizeConditionalInDecrement(SDNode *N, SelectionDAG &DAG) { DAG.getConstant(0, OtherVal.getValueType()), NewCmp); } -static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG) { +/// PerformADDCombine - Do target-specific dag combines on integer adds. +static SDValue PerformAddCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + + // Try to synthesize horizontal adds from adds of shuffles. + if ((Subtarget->hasSSSE3orAVX()) && (VT == MVT::v8i16 || VT == MVT::v4i32) && + isHorizontalBinOp(Op0, Op1, true)) + return DAG.getNode(X86ISD::HADD, N->getDebugLoc(), VT, Op0, Op1); + + return OptimizeConditionalInDecrement(N, DAG); +} + +static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); @@ -14643,6 +14813,12 @@ static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG) { } } + // Try to synthesize horizontal adds from adds of shuffles. + EVT VT = N->getValueType(0); + if ((Subtarget->hasSSSE3orAVX()) && (VT == MVT::v8i16 || VT == MVT::v4i32) && + isHorizontalBinOp(Op0, Op1, false)) + return DAG.getNode(X86ISD::HSUB, N->getDebugLoc(), VT, Op0, Op1); + return OptimizeConditionalInDecrement(N, DAG); } @@ -14656,8 +14832,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::VSELECT: case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget); case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI); - case ISD::ADD: return OptimizeConditionalInDecrement(N, DAG); - case ISD::SUB: return PerformSubCombine(N, DAG); + case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget); + case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget); case X86ISD::ADC: return PerformADCCombine(N, DAG, DCI); case ISD::MUL: return PerformMulCombine(N, DAG, DCI); case ISD::SHL: @@ -14687,16 +14863,12 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PUNPCKHQDQ: case X86ISD::UNPCKHPS: case X86ISD::UNPCKHPD: - case X86ISD::VUNPCKHPSY: - case X86ISD::VUNPCKHPDY: case X86ISD::PUNPCKLBW: case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: - case X86ISD::VUNPCKLPSY: - case X86ISD::VUNPCKLPDY: case X86ISD::MOVHLPS: case X86ISD::MOVLHPS: case X86ISD::PSHUFD: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 3b7a14d..ccff3a5 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -172,12 +172,18 @@ namespace llvm { /// ANDNP - Bitwise Logical AND NOT of Packed FP values. ANDNP, - /// PSIGNB/W/D - Copy integer sign. - PSIGNB, PSIGNW, PSIGND, + /// PSIGN - Copy integer sign. + PSIGN, /// BLEND family of opcodes BLENDV, + /// HADD - Integer horizontal add. + HADD, + + /// HSUB - Integer horizontal sub. + HSUB, + /// FHADD - Floating point horizontal add. FHADD, @@ -269,12 +275,8 @@ namespace llvm { MOVSS, UNPCKLPS, UNPCKLPD, - VUNPCKLPSY, - VUNPCKLPDY, UNPCKHPS, UNPCKHPD, - VUNPCKHPSY, - VUNPCKHPDY, PUNPCKLBW, PUNPCKLWD, PUNPCKLDQ, @@ -408,11 +410,13 @@ namespace llvm { /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. - bool isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat = false); + bool isUNPCKLMask(ShuffleVectorSDNode *N, bool HasAVX2, + bool V2IsSplat = false); /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKH. - bool isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat = false); + bool isUNPCKHMask(ShuffleVectorSDNode *N, bool HasAVX2, + bool V2IsSplat = false); /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h index 0245e5c..fa1d676 100644 --- a/lib/Target/X86/X86InstrBuilder.h +++ b/lib/Target/X86/X86InstrBuilder.h @@ -27,7 +27,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/PseudoSourceValue.h" namespace llvm { diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 6fd2efd..791bbe6 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -41,6 +41,8 @@ def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>; def X86fgetsign: SDNode<"X86ISD::FGETSIGNx86",SDTFPToIntOp>; def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>; def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>; +def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>; +def X86hsub : SDNode<"X86ISD::HSUB", SDTIntBinOp>; def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>; def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>; def X86cmpss : SDNode<"X86ISD::FSETCCss", SDTX86Cmpss>; @@ -51,14 +53,8 @@ def X86pshufb : SDNode<"X86ISD::PSHUFB", def X86andnp : SDNode<"X86ISD::ANDNP", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; -def X86psignb : SDNode<"X86ISD::PSIGNB", - SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>]>>; -def X86psignw : SDNode<"X86ISD::PSIGNW", - SDTypeProfile<1, 2, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>]>>; -def X86psignd : SDNode<"X86ISD::PSIGND", - SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>, +def X86psign : SDNode<"X86ISD::PSIGN", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; def X86pextrb : SDNode<"X86ISD::PEXTRB", SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; @@ -136,13 +132,9 @@ def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>; def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>; def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>; -def X86Unpcklpsy : SDNode<"X86ISD::VUNPCKLPSY", SDTShuff2Op>; -def X86Unpcklpdy : SDNode<"X86ISD::VUNPCKLPDY", SDTShuff2Op>; def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>; def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>; -def X86Unpckhpsy : SDNode<"X86ISD::VUNPCKHPSY", SDTShuff2Op>; -def X86Unpckhpdy : SDNode<"X86ISD::VUNPCKHPDY", SDTShuff2Op>; def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>; def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>; @@ -427,12 +419,12 @@ def movl : PatFrag<(ops node:$lhs, node:$rhs), def unpckl : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N)); + return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N), Subtarget->hasAVX2()); }]>; def unpckh : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N)); + return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N), Subtarget->hasAVX2()); }]>; def pshufd : PatFrag<(ops node:$lhs, node:$rhs), diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 9428fff..24c4a53 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -25,7 +25,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/LiveVariables.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -2903,6 +2902,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, switch (LoadMI->getOpcode()) { case X86::AVX_SET0PSY: case X86::AVX_SET0PDY: + case X86::AVX2_SETALLONES: Alignment = 32; break; case X86::V_SET0: @@ -2948,6 +2948,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, case X86::AVX_SET0PSY: case X86::AVX_SET0PDY: case X86::AVX_SETALLONES: + case X86::AVX2_SETALLONES: case X86::FsFLD0SD: case X86::FsFLD0SS: case X86::VFsFLD0SD: @@ -2986,7 +2987,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, else Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4); - bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX_SETALLONES); + bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX_SETALLONES || + Opc == X86::AVX2_SETALLONES); const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) : Constant::getNullValue(Ty); unsigned CPI = MCP.getConstantPoolIndex(C, Alignment); @@ -3555,7 +3557,11 @@ static const unsigned ReplaceableInstrs[][3] = { { X86::VMOVAPSYrr, X86::VMOVAPDYrr, X86::VMOVDQAYrr }, { X86::VMOVUPSYmr, X86::VMOVUPDYmr, X86::VMOVDQUYmr }, { X86::VMOVUPSYrm, X86::VMOVUPDYrm, X86::VMOVDQUYrm }, - { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr }, + { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr } +}; + +static const unsigned ReplaceableInstrsAVX2[][3] = { + //PackedSingle PackedDouble PackedInt { X86::VANDNPSYrm, X86::VANDNPDYrm, X86::VPANDNYrm }, { X86::VANDNPSYrr, X86::VANDNPDYrr, X86::VPANDNYrr }, { X86::VANDPSYrm, X86::VANDPDYrm, X86::VPANDYrm }, @@ -3563,7 +3569,7 @@ static const unsigned ReplaceableInstrs[][3] = { { X86::VORPSYrm, X86::VORPDYrm, X86::VPORYrm }, { X86::VORPSYrr, X86::VORPDYrr, X86::VPORYrr }, { X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORYrm }, - { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr }, + { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr } }; // FIXME: Some shuffle and unpack instructions have equivalents in different @@ -3576,11 +3582,23 @@ static const unsigned *lookup(unsigned opcode, unsigned domain) { return 0; } +static const unsigned *lookupAVX2(unsigned opcode, unsigned domain) { + for (unsigned i = 0, e = array_lengthof(ReplaceableInstrsAVX2); i != e; ++i) + if (ReplaceableInstrsAVX2[i][domain-1] == opcode) + return ReplaceableInstrsAVX2[i]; + return 0; +} + std::pair<uint16_t, uint16_t> X86InstrInfo::getExecutionDomain(const MachineInstr *MI) const { uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3; - return std::make_pair(domain, - domain && lookup(MI->getOpcode(), domain) ? 0xe : 0); + bool hasAVX2 = TM.getSubtarget<X86Subtarget>().hasAVX2(); + uint16_t validDomains = 0; + if (domain && lookup(MI->getOpcode(), domain)) + validDomains = 0xe; + else if (domain && lookupAVX2(MI->getOpcode(), domain)) + validDomains = hasAVX2 ? 0xe : 0x6; + return std::make_pair(domain, validDomains); } void X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { @@ -3588,6 +3606,11 @@ void X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3; assert(dom && "Not an SSE instruction"); const unsigned *table = lookup(MI->getOpcode(), dom); + if (!table) { // try the other table + assert((TM.getSubtarget<X86Subtarget>().hasAVX2() || Domain < 3) && + "256-bit vector operations only available in AVX2"); + table = lookupAVX2(MI->getOpcode(), dom); + } assert(table && "Cannot change domain"); MI->setDesc(get(table[Domain-1])); } diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 79ce509..35631d5 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1523,10 +1523,11 @@ def : MnemonicAlias<"call", "calll">, Requires<[In32BitMode]>; def : MnemonicAlias<"call", "callq">, Requires<[In64BitMode]>; def : MnemonicAlias<"cbw", "cbtw">; +def : MnemonicAlias<"cwde", "cwtl">; def : MnemonicAlias<"cwd", "cwtd">; def : MnemonicAlias<"cdq", "cltd">; -def : MnemonicAlias<"cwde", "cwtl">; def : MnemonicAlias<"cdqe", "cltq">; +def : MnemonicAlias<"cqo", "cqto">; // lret maps to lretl, it is not ambiguous with lretq. def : MnemonicAlias<"lret", "lretl">; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 6deee4f..7cadac1 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -311,13 +311,16 @@ def : Pat<(bc_v4i64 (v8f32 immAllZerosV)), // JIT implementation, it does not expand the instructions below like // X86MCInstLower does. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isCodeGenOnly = 1, ExeDomain = SSEPackedInt in + isCodeGenOnly = 1, ExeDomain = SSEPackedInt in { def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllOnesV))]>; -let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isCodeGenOnly = 1, ExeDomain = SSEPackedInt, Predicates = [HasAVX] in + let Predicates = [HasAVX] in def AVX_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllOnesV))]>, VEX_4V; + let Predicates = [HasAVX2] in + def AVX2_SETALLONES : PDI<0x76, MRMInitReg, (outs VR256:$dst), (ins), "", + [(set VR256:$dst, (v8i32 immAllOnesV))]>, VEX_4V; +} //===----------------------------------------------------------------------===// @@ -522,6 +525,8 @@ let Predicates = [HasSSE2] in { // fold opportunity reappears. def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),sub_sd))>; + def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2),sub_sd))>; def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>; def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), @@ -2467,21 +2472,21 @@ let Predicates = [HasAVX] in { def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), (VUNPCKHPSrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))), + def : Pat<(v8f32 (X86Unpcklps VR256:$src1, (memopv8f32 addr:$src2))), (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)), + def : Pat<(v8f32 (X86Unpcklps VR256:$src1, VR256:$src2)), (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, VR256:$src2)), + def : Pat<(v8i32 (X86Unpcklps VR256:$src1, VR256:$src2)), (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, (memopv8i32 addr:$src2))), + def : Pat<(v8i32 (X86Unpcklps VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, (memopv8f32 addr:$src2))), + def : Pat<(v8f32 (X86Unpckhps VR256:$src1, (memopv8f32 addr:$src2))), (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, VR256:$src2)), + def : Pat<(v8f32 (X86Unpckhps VR256:$src1, VR256:$src2)), (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, (memopv8i32 addr:$src2))), + def : Pat<(v8i32 (X86Unpckhps VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, VR256:$src2)), + def : Pat<(v8i32 (X86Unpckhps VR256:$src1, VR256:$src2)), (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), @@ -2493,21 +2498,21 @@ let Predicates = [HasAVX] in { def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)), (VUNPCKHPDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))), + def : Pat<(v4f64 (X86Unpcklpd VR256:$src1, (memopv4f64 addr:$src2))), (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)), + def : Pat<(v4f64 (X86Unpcklpd VR256:$src1, VR256:$src2)), (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, (memopv4i64 addr:$src2))), + def : Pat<(v4i64 (X86Unpcklpd VR256:$src1, (memopv4i64 addr:$src2))), (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, VR256:$src2)), + def : Pat<(v4i64 (X86Unpcklpd VR256:$src1, VR256:$src2)), (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, (memopv4f64 addr:$src2))), + def : Pat<(v4f64 (X86Unpckhpd VR256:$src1, (memopv4f64 addr:$src2))), (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, VR256:$src2)), + def : Pat<(v4f64 (X86Unpckhpd VR256:$src1, VR256:$src2)), (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, (memopv4i64 addr:$src2))), + def : Pat<(v4i64 (X86Unpckhpd VR256:$src1, (memopv4i64 addr:$src2))), (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, VR256:$src2)), + def : Pat<(v4i64 (X86Unpckhpd VR256:$src1, VR256:$src2)), (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the @@ -3421,47 +3426,6 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, [(set RC:$dst, (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))]>; } - -/// PDI_binop_rm_v2i64 - Simple SSE2 binary operator whose type is v2i64. -/// -/// FIXME: we could eliminate this and use PDI_binop_rm instead if tblgen knew -/// to collapse (bitconvert VT to VT) into its operand. -/// -multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode, - bit IsCommutable = 0, bit Is2Addr = 1> { - let isCommutable = IsCommutable in - def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !if(Is2Addr, - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]>; - def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !if(Is2Addr, - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (OpNode VR128:$src1, (memopv2i64 addr:$src2)))]>; -} - -/// PDI_binop_rm_v4i64 - Simple AVX2 binary operator whose type is v4i64. -/// -/// FIXME: we could eliminate this and use PDI_binop_rm instead if tblgen knew -/// to collapse (bitconvert VT to VT) into its operand. -/// -multiclass PDI_binop_rm_v4i64<bits<8> opc, string OpcodeStr, SDNode OpNode, - bit IsCommutable = 0> { - let isCommutable = IsCommutable in - def rr : PDI<opc, MRMSrcReg, (outs VR256:$dst), - (ins VR256:$src1, VR256:$src2), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))]>; - def rm : PDI<opc, MRMSrcMem, (outs VR256:$dst), - (ins VR256:$src1, i256mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (OpNode VR256:$src1, (memopv4i64 addr:$src2)))]>; -} - } // ExeDomain = SSEPackedInt // 128-bit Integer Arithmetic @@ -3473,7 +3437,8 @@ defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; -defm VPADDQ : PDI_binop_rm_v2i64<0xD4, "vpaddq", add, 1, 0>, VEX_4V; +defm VPADDQ : PDI_binop_rm<0xD4, "vpaddq", add, v2i64, VR128, memopv2i64, + i128mem, 1, 0>, VEX_4V; defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, VR128, memopv2i64, @@ -3482,7 +3447,8 @@ defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; -defm VPSUBQ : PDI_binop_rm_v2i64<0xFB, "vpsubq", sub, 0, 0>, VEX_4V; +defm VPSUBQ : PDI_binop_rm<0xFB, "vpsubq", sub, v2i64, VR128, memopv2i64, + i128mem, 0, 0>, VEX_4V; // Intrinsic forms defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b, @@ -3527,21 +3493,23 @@ defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw, let Predicates = [HasAVX2] in { defm VPADDBY : PDI_binop_rm<0xFC, "vpaddb", add, v32i8, VR256, memopv4i64, - i256mem, 1, 0>, VEX_4V; + i256mem, 1, 0>, VEX_4V; defm VPADDWY : PDI_binop_rm<0xFD, "vpaddw", add, v16i16, VR256, memopv4i64, - i256mem, 1, 0>, VEX_4V; + i256mem, 1, 0>, VEX_4V; defm VPADDDY : PDI_binop_rm<0xFE, "vpaddd", add, v8i32, VR256, memopv4i64, - i256mem, 1, 0>, VEX_4V; -defm VPADDQY : PDI_binop_rm_v4i64<0xD4, "vpaddq", add, 1>, VEX_4V; + i256mem, 1, 0>, VEX_4V; +defm VPADDQY : PDI_binop_rm<0xD4, "vpaddq", add, v4i64, VR256, memopv4i64, + i256mem, 1, 0>, VEX_4V; defm VPMULLWY : PDI_binop_rm<0xD5, "vpmullw", mul, v16i16, VR256, memopv4i64, - i256mem, 1, 0>, VEX_4V; + i256mem, 1, 0>, VEX_4V; defm VPSUBBY : PDI_binop_rm<0xF8, "vpsubb", sub, v32i8, VR256, memopv4i64, - i256mem, 0, 0>, VEX_4V; + i256mem, 0, 0>, VEX_4V; defm VPSUBWY : PDI_binop_rm<0xF9, "vpsubw", sub, v16i16,VR256, memopv4i64, - i256mem, 0, 0>, VEX_4V; + i256mem, 0, 0>, VEX_4V; defm VPSUBDY : PDI_binop_rm<0xFA, "vpsubd", sub, v8i32, VR256, memopv4i64, - i256mem, 0, 0>, VEX_4V; -defm VPSUBQY : PDI_binop_rm_v4i64<0xFB, "vpsubq", sub, 0>, VEX_4V; + i256mem, 0, 0>, VEX_4V; +defm VPSUBQY : PDI_binop_rm<0xFB, "vpsubq", sub, v4i64, VR256, memopv4i64, + i256mem, 0, 0>, VEX_4V; // Intrinsic forms defm VPSUBSBY : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_avx2_psubs_b, @@ -3591,7 +3559,8 @@ defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, VR128, memopv2i64, i128mem, 1>; defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, VR128, memopv2i64, i128mem, 1>; -defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>; +defm PADDQ : PDI_binop_rm<0xD4, "paddq", add, v2i64, VR128, memopv2i64, + i128mem, 1>; defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, VR128, memopv2i64, i128mem, 1>; defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8, VR128, memopv2i64, @@ -3600,7 +3569,8 @@ defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16, VR128, memopv2i64, i128mem>; defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32, VR128, memopv2i64, i128mem>; -defm PSUBQ : PDI_binop_rm_v2i64<0xFB, "psubq", sub>; +defm PSUBQ : PDI_binop_rm<0xFB, "psubq", sub, v2i64, VR128, memopv2i64, + i128mem>; // Intrinsic forms defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b, @@ -3676,9 +3646,12 @@ defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad", int_x86_sse2_psra_d, int_x86_sse2_psrai_d, VR128, 0>, VEX_4V; -defm VPAND : PDI_binop_rm_v2i64<0xDB, "vpand", and, 1, 0>, VEX_4V; -defm VPOR : PDI_binop_rm_v2i64<0xEB, "vpor" , or, 1, 0>, VEX_4V; -defm VPXOR : PDI_binop_rm_v2i64<0xEF, "vpxor", xor, 1, 0>, VEX_4V; +defm VPAND : PDI_binop_rm<0xDB, "vpand", and, v2i64, VR128, memopv2i64, + i128mem, 1, 0>, VEX_4V; +defm VPOR : PDI_binop_rm<0xEB, "vpor" , or, v2i64, VR128, memopv2i64, + i128mem, 1, 0>, VEX_4V; +defm VPXOR : PDI_binop_rm<0xEF, "vpxor", xor, v2i64, VR128, memopv2i64, + i128mem, 1, 0>, VEX_4V; let ExeDomain = SSEPackedInt in { let neverHasSideEffects = 1 in { @@ -3735,9 +3708,12 @@ defm VPSRADY : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad", int_x86_avx2_psra_d, int_x86_avx2_psrai_d, VR256, 0>, VEX_4V; -defm VPANDY : PDI_binop_rm_v4i64<0xDB, "vpand", and, 1>, VEX_4V; -defm VPORY : PDI_binop_rm_v4i64<0xEB, "vpor" , or, 1>, VEX_4V; -defm VPXORY : PDI_binop_rm_v4i64<0xEF, "vpxor", xor, 1>, VEX_4V; +defm VPANDY : PDI_binop_rm<0xDB, "vpand", and, v4i64, VR256, memopv4i64, + i256mem, 1, 0>, VEX_4V; +defm VPORY : PDI_binop_rm<0xEB, "vpor", or, v4i64, VR256, memopv4i64, + i256mem, 1, 0>, VEX_4V; +defm VPXORY : PDI_binop_rm<0xEF, "vpxor", xor, v4i64, VR256, memopv4i64, + i256mem, 1, 0>, VEX_4V; let ExeDomain = SSEPackedInt in { let neverHasSideEffects = 1 in { @@ -3794,9 +3770,12 @@ defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", int_x86_sse2_psra_d, int_x86_sse2_psrai_d, VR128>; -defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>; -defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or, 1>; -defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>; +defm PAND : PDI_binop_rm<0xDB, "pand", and, v2i64, VR128, memopv2i64, + i128mem, 1>; +defm POR : PDI_binop_rm<0xEB, "por" , or, v2i64, VR128, memopv2i64, + i128mem, 1>; +defm PXOR : PDI_binop_rm<0xEF, "pxor", xor, v2i64, VR128, memopv2i64, + i128mem, 1>; let ExeDomain = SSEPackedInt in { let neverHasSideEffects = 1 in { @@ -3822,51 +3801,51 @@ let ExeDomain = SSEPackedInt in { let Predicates = [HasAVX] in { def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), - (v2i64 (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; + (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>; def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2), - (v2i64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; + (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2), - (v2i64 (VPSLLDQri VR128:$src1, imm:$src2))>; + (VPSLLDQri VR128:$src1, imm:$src2)>; def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2), - (v2i64 (VPSRLDQri VR128:$src1, imm:$src2))>; + (VPSRLDQri VR128:$src1, imm:$src2)>; def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)), - (v2f64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; + (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; // Shift up / down and insert zero's. def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))), - (v2i64 (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>; + (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt))>; def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))), - (v2i64 (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>; + (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt))>; } let Predicates = [HasAVX2] in { def : Pat<(int_x86_avx2_psll_dq VR256:$src1, imm:$src2), - (v4i64 (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2)))>; + (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2))>; def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2), - (v4i64 (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2)))>; + (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>; def : Pat<(int_x86_avx2_psll_dq_bs VR256:$src1, imm:$src2), - (v4i64 (VPSLLDQYri VR256:$src1, imm:$src2))>; + (VPSLLDQYri VR256:$src1, imm:$src2)>; def : Pat<(int_x86_avx2_psrl_dq_bs VR256:$src1, imm:$src2), - (v4i64 (VPSRLDQYri VR256:$src1, imm:$src2))>; + (VPSRLDQYri VR256:$src1, imm:$src2)>; } let Predicates = [HasSSE2] in { def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), - (v2i64 (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; + (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>; def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2), - (v2i64 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; + (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2), - (v2i64 (PSLLDQri VR128:$src1, imm:$src2))>; + (PSLLDQri VR128:$src1, imm:$src2)>; def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2), - (v2i64 (PSRLDQri VR128:$src1, imm:$src2))>; + (PSRLDQri VR128:$src1, imm:$src2)>; def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)), - (v2f64 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; + (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; // Shift up / down and insert zero's. def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))), - (v2i64 (PSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>; + (PSLLDQri VR128:$src, (BYTE_imm imm:$amt))>; def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))), - (v2i64 (PSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>; + (PSRLDQri VR128:$src, (BYTE_imm imm:$amt))>; } //===---------------------------------------------------------------------===// @@ -3889,28 +3868,34 @@ let Predicates = [HasAVX] in { def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)), (VPCMPEQBrr VR128:$src1, VR128:$src2)>; - def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))), + def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, + (bc_v16i8 (memopv2i64 addr:$src2)))), (VPCMPEQBrm VR128:$src1, addr:$src2)>; def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)), (VPCMPEQWrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))), + def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, + (bc_v8i16 (memopv2i64 addr:$src2)))), (VPCMPEQWrm VR128:$src1, addr:$src2)>; def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)), (VPCMPEQDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))), + def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)))), (VPCMPEQDrm VR128:$src1, addr:$src2)>; def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)), (VPCMPGTBrr VR128:$src1, VR128:$src2)>; - def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))), + def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, + (bc_v16i8 (memopv2i64 addr:$src2)))), (VPCMPGTBrm VR128:$src1, addr:$src2)>; def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)), (VPCMPGTWrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))), + def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, + (bc_v8i16 (memopv2i64 addr:$src2)))), (VPCMPGTWrm VR128:$src1, addr:$src2)>; def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)), (VPCMPGTDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))), + def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)))), (VPCMPGTDrm VR128:$src1, addr:$src2)>; } @@ -3930,28 +3915,34 @@ let Predicates = [HasAVX2] in { def : Pat<(v32i8 (X86pcmpeqb VR256:$src1, VR256:$src2)), (VPCMPEQBYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v32i8 (X86pcmpeqb VR256:$src1, (memop addr:$src2))), + def : Pat<(v32i8 (X86pcmpeqb VR256:$src1, + (bc_v32i8 (memopv4i64 addr:$src2)))), (VPCMPEQBYrm VR256:$src1, addr:$src2)>; def : Pat<(v16i16 (X86pcmpeqw VR256:$src1, VR256:$src2)), (VPCMPEQWYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v16i16 (X86pcmpeqw VR256:$src1, (memop addr:$src2))), + def : Pat<(v16i16 (X86pcmpeqw VR256:$src1, + (bc_v16i16 (memopv4i64 addr:$src2)))), (VPCMPEQWYrm VR256:$src1, addr:$src2)>; def : Pat<(v8i32 (X86pcmpeqd VR256:$src1, VR256:$src2)), (VPCMPEQDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86pcmpeqd VR256:$src1, (memop addr:$src2))), + def : Pat<(v8i32 (X86pcmpeqd VR256:$src1, + (bc_v8i32 (memopv4i64 addr:$src2)))), (VPCMPEQDYrm VR256:$src1, addr:$src2)>; def : Pat<(v32i8 (X86pcmpgtb VR256:$src1, VR256:$src2)), (VPCMPGTBYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v32i8 (X86pcmpgtb VR256:$src1, (memop addr:$src2))), + def : Pat<(v32i8 (X86pcmpgtb VR256:$src1, + (bc_v32i8 (memopv4i64 addr:$src2)))), (VPCMPGTBYrm VR256:$src1, addr:$src2)>; def : Pat<(v16i16 (X86pcmpgtw VR256:$src1, VR256:$src2)), (VPCMPGTWYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v16i16 (X86pcmpgtw VR256:$src1, (memop addr:$src2))), + def : Pat<(v16i16 (X86pcmpgtw VR256:$src1, + (bc_v16i16 (memopv4i64 addr:$src2)))), (VPCMPGTWYrm VR256:$src1, addr:$src2)>; def : Pat<(v8i32 (X86pcmpgtd VR256:$src1, VR256:$src2)), (VPCMPGTDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86pcmpgtd VR256:$src1, (memop addr:$src2))), + def : Pat<(v8i32 (X86pcmpgtd VR256:$src1, + (bc_v8i32 (memopv4i64 addr:$src2)))), (VPCMPGTDYrm VR256:$src1, addr:$src2)>; } @@ -3973,28 +3964,34 @@ let Constraints = "$src1 = $dst" in { let Predicates = [HasSSE2] in { def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)), (PCMPEQBrr VR128:$src1, VR128:$src2)>; - def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))), + def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, + (bc_v16i8 (memopv2i64 addr:$src2)))), (PCMPEQBrm VR128:$src1, addr:$src2)>; def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)), (PCMPEQWrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))), + def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, + (bc_v8i16 (memopv2i64 addr:$src2)))), (PCMPEQWrm VR128:$src1, addr:$src2)>; def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)), (PCMPEQDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))), + def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)))), (PCMPEQDrm VR128:$src1, addr:$src2)>; def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)), (PCMPGTBrr VR128:$src1, VR128:$src2)>; - def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))), + def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, + (bc_v16i8 (memopv2i64 addr:$src2)))), (PCMPGTBrm VR128:$src1, addr:$src2)>; def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)), (PCMPGTWrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))), + def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, + (bc_v8i16 (memopv2i64 addr:$src2)))), (PCMPGTWrm VR128:$src1, addr:$src2)>; def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)), (PCMPGTDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))), + def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)))), (PCMPGTDrm VR128:$src1, addr:$src2)>; } @@ -4207,19 +4204,8 @@ let Predicates = [HasAVX] in { bc_v8i16, 0>, VEX_4V; defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckldq, bc_v4i32, 0>, VEX_4V; - - /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen - /// knew to collapse (bitconvert VT to VT) into its operand. - def VPUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1, - VR128:$src2)))]>, VEX_4V; - def VPUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1, - (memopv2i64 addr:$src2))))]>, VEX_4V; + defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Punpcklqdq, + bc_v2i64, 0>, VEX_4V; defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckhbw, bc_v16i8, 0>, VEX_4V; @@ -4227,19 +4213,8 @@ let Predicates = [HasAVX] in { bc_v8i16, 0>, VEX_4V; defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckhdq, bc_v4i32, 0>, VEX_4V; - - /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen - /// knew to collapse (bitconvert VT to VT) into its operand. - def VPUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1, - VR128:$src2)))]>, VEX_4V; - def VPUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1, - (memopv2i64 addr:$src2))))]>, VEX_4V; + defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Punpckhqdq, + bc_v2i64, 0>, VEX_4V; } let Predicates = [HasAVX2] in { @@ -4249,19 +4224,8 @@ let Predicates = [HasAVX2] in { bc_v16i16>, VEX_4V; defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckldq, bc_v8i32>, VEX_4V; - - /// FIXME: we could eliminate this and use sse2_unpack_y instead if tblgen - /// knew to collapse (bitconvert VT to VT) into its operand. - def VPUNPCKLQDQYrr : PDI<0x6C, MRMSrcReg, - (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), - "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, (v4i64 (X86Punpcklqdq VR256:$src1, - VR256:$src2)))]>, VEX_4V; - def VPUNPCKLQDQYrm : PDI<0x6C, MRMSrcMem, - (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), - "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, (v4i64 (X86Punpcklqdq VR256:$src1, - (memopv4i64 addr:$src2))))]>, VEX_4V; + defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Punpcklqdq, + bc_v4i64>, VEX_4V; defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckhbw, bc_v32i8>, VEX_4V; @@ -4269,57 +4233,28 @@ let Predicates = [HasAVX2] in { bc_v16i16>, VEX_4V; defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckhdq, bc_v8i32>, VEX_4V; - - /// FIXME: we could eliminate this and use sse2_unpack_y instead if tblgen - /// knew to collapse (bitconvert VT to VT) into its operand. - def VPUNPCKHQDQYrr : PDI<0x6D, MRMSrcReg, - (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), - "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, (v4i64 (X86Punpckhqdq VR256:$src1, - VR256:$src2)))]>, VEX_4V; - def VPUNPCKHQDQYrm : PDI<0x6D, MRMSrcMem, - (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), - "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, (v4i64 (X86Punpckhqdq VR256:$src1, - (memopv4i64 addr:$src2))))]>, VEX_4V; + defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Punpckhqdq, + bc_v4i64>, VEX_4V; } let Constraints = "$src1 = $dst" in { - defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpcklbw, bc_v16i8>; - defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpcklwd, bc_v8i16>; - defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckldq, bc_v4i32>; - - /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen - /// knew to collapse (bitconvert VT to VT) into its operand. - def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "punpcklqdq\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2i64 (X86Punpcklqdq VR128:$src1, VR128:$src2)))]>; - def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "punpcklqdq\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2i64 (X86Punpcklqdq VR128:$src1, - (memopv2i64 addr:$src2))))]>; - - defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckhbw, bc_v16i8>; - defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckhwd, bc_v8i16>; - defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckhdq, bc_v4i32>; - - /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen - /// knew to collapse (bitconvert VT to VT) into its operand. - def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "punpckhqdq\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)))]>; - def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "punpckhqdq\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2i64 (X86Punpckhqdq VR128:$src1, - (memopv2i64 addr:$src2))))]>; + defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpcklbw, + bc_v16i8>; + defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpcklwd, + bc_v8i16>; + defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckldq, + bc_v4i32>; + defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Punpcklqdq, + bc_v2i64>; + + defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckhbw, + bc_v16i8>; + defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckhwd, + bc_v8i16>; + defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckhdq, + bc_v4i32>; + defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Punpckhqdq, + bc_v2i64>; } } // ExeDomain = SSEPackedInt @@ -5052,21 +4987,25 @@ multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC, [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))]>; } -let Predicates = [HasAVX], - ExeDomain = SSEPackedDouble in { - defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128, - f128mem, 0>, TB, XD, VEX_4V; - defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128, - f128mem, 0>, TB, OpSize, VEX_4V; - defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256, - f256mem, 0>, TB, XD, VEX_4V; - defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256, - f256mem, 0>, TB, OpSize, VEX_4V; -} -let Constraints = "$src1 = $dst", Predicates = [HasSSE3], - ExeDomain = SSEPackedDouble in { +let Predicates = [HasAVX] in { + let ExeDomain = SSEPackedSingle in { + defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128, + f128mem, 0>, TB, XD, VEX_4V; + defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256, + f256mem, 0>, TB, XD, VEX_4V; + } + let ExeDomain = SSEPackedDouble in { + defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128, + f128mem, 0>, TB, OpSize, VEX_4V; + defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256, + f256mem, 0>, TB, OpSize, VEX_4V; + } +} +let Constraints = "$src1 = $dst", Predicates = [HasSSE3] in { + let ExeDomain = SSEPackedSingle in defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128, f128mem>, TB, XD; + let ExeDomain = SSEPackedDouble in defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128, f128mem>, TB, OpSize; } @@ -5106,29 +5045,37 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, } let Predicates = [HasAVX] in { - defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem, - X86fhadd, 0>, VEX_4V; - defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem, - X86fhadd, 0>, VEX_4V; - defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem, - X86fhsub, 0>, VEX_4V; - defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem, - X86fhsub, 0>, VEX_4V; - defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem, - X86fhadd, 0>, VEX_4V; - defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem, - X86fhadd, 0>, VEX_4V; - defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem, - X86fhsub, 0>, VEX_4V; - defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem, - X86fhsub, 0>, VEX_4V; + let ExeDomain = SSEPackedSingle in { + defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem, + X86fhadd, 0>, VEX_4V; + defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem, + X86fhsub, 0>, VEX_4V; + defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem, + X86fhadd, 0>, VEX_4V; + defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem, + X86fhsub, 0>, VEX_4V; + } + let ExeDomain = SSEPackedDouble in { + defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem, + X86fhadd, 0>, VEX_4V; + defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem, + X86fhsub, 0>, VEX_4V; + defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem, + X86fhadd, 0>, VEX_4V; + defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem, + X86fhsub, 0>, VEX_4V; + } } let Constraints = "$src1 = $dst" in { - defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd>; - defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd>; - defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub>; - defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub>; + let ExeDomain = SSEPackedSingle in { + defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd>; + defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub>; + } + let ExeDomain = SSEPackedDouble in { + defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd>; + defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub>; + } } //===---------------------------------------------------------------------===// @@ -5284,11 +5231,11 @@ let isCommutable = 0 in { int_x86_avx2_pmadd_ub_sw>, VEX_4V; defm VPSHUFB : SS3I_binop_rm_int_y<0x00, "vpshufb", memopv32i8, int_x86_avx2_pshuf_b>, VEX_4V; - defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", memopv16i8, + defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", memopv32i8, int_x86_avx2_psign_b>, VEX_4V; - defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", memopv8i16, + defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", memopv16i16, int_x86_avx2_psign_w>, VEX_4V; - defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", memopv4i32, + defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", memopv8i32, int_x86_avx2_psign_d>, VEX_4V; } defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw", memopv16i16, @@ -5331,12 +5278,21 @@ let Predicates = [HasSSSE3] in { def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), (PSHUFBrm128 VR128:$src, addr:$mask)>; - def : Pat<(X86psignb VR128:$src1, VR128:$src2), + def : Pat<(v16i8 (X86psign VR128:$src1, VR128:$src2)), (PSIGNBrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(X86psignw VR128:$src1, VR128:$src2), + def : Pat<(v8i16 (X86psign VR128:$src1, VR128:$src2)), (PSIGNWrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(X86psignd VR128:$src1, VR128:$src2), + def : Pat<(v4i32 (X86psign VR128:$src1, VR128:$src2)), (PSIGNDrr128 VR128:$src1, VR128:$src2)>; + + def : Pat<(v8i16 (X86hadd VR128:$src1, VR128:$src2)), + (PHADDWrr128 VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (X86hadd VR128:$src1, VR128:$src2)), + (PHADDDrr128 VR128:$src1, VR128:$src2)>; + def : Pat<(v8i16 (X86hsub VR128:$src1, VR128:$src2)), + (PHSUBWrr128 VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (X86hsub VR128:$src1, VR128:$src2)), + (PHSUBDrr128 VR128:$src1, VR128:$src2)>; } let Predicates = [HasAVX] in { @@ -5345,12 +5301,39 @@ let Predicates = [HasAVX] in { def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), (VPSHUFBrm128 VR128:$src, addr:$mask)>; - def : Pat<(X86psignb VR128:$src1, VR128:$src2), + def : Pat<(v16i8 (X86psign VR128:$src1, VR128:$src2)), (VPSIGNBrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(X86psignw VR128:$src1, VR128:$src2), + def : Pat<(v8i16 (X86psign VR128:$src1, VR128:$src2)), (VPSIGNWrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(X86psignd VR128:$src1, VR128:$src2), + def : Pat<(v4i32 (X86psign VR128:$src1, VR128:$src2)), (VPSIGNDrr128 VR128:$src1, VR128:$src2)>; + + def : Pat<(v8i16 (X86hadd VR128:$src1, VR128:$src2)), + (VPHADDWrr128 VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (X86hadd VR128:$src1, VR128:$src2)), + (VPHADDDrr128 VR128:$src1, VR128:$src2)>; + def : Pat<(v8i16 (X86hsub VR128:$src1, VR128:$src2)), + (VPHSUBWrr128 VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (X86hsub VR128:$src1, VR128:$src2)), + (VPHSUBDrr128 VR128:$src1, VR128:$src2)>; +} + +let Predicates = [HasAVX2] in { + def : Pat<(v32i8 (X86psign VR256:$src1, VR256:$src2)), + (VPSIGNBrr256 VR256:$src1, VR256:$src2)>; + def : Pat<(v16i16 (X86psign VR256:$src1, VR256:$src2)), + (VPSIGNWrr256 VR256:$src1, VR256:$src2)>; + def : Pat<(v8i32 (X86psign VR256:$src1, VR256:$src2)), + (VPSIGNDrr256 VR256:$src1, VR256:$src2)>; + + def : Pat<(v16i16 (X86hadd VR256:$src1, VR256:$src2)), + (VPHADDWrr256 VR256:$src1, VR256:$src2)>; + def : Pat<(v8i32 (X86hadd VR256:$src1, VR256:$src2)), + (VPHADDDrr256 VR256:$src1, VR256:$src2)>; + def : Pat<(v16i16 (X86hsub VR256:$src1, VR256:$src2)), + (VPHSUBWrr256 VR256:$src1, VR256:$src2)>; + def : Pat<(v8i32 (X86hsub VR256:$src1, VR256:$src2)), + (VPHSUBDrr256 VR256:$src1, VR256:$src2)>; } //===---------------------------------------------------------------------===// @@ -5837,14 +5820,16 @@ multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> { addr:$dst)]>, OpSize; } -let Predicates = [HasAVX] in { - defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX; - def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst), - (ins VR128:$src1, i32i8imm:$src2), - "vextractps \t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, OpSize, VEX; +let ExeDomain = SSEPackedSingle in { + let Predicates = [HasAVX] in { + defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX; + def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst), + (ins VR128:$src1, i32i8imm:$src2), + "vextractps \t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, OpSize, VEX; + } + defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">; } -defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">; // Also match an EXTRACTPS store when the store is done as f32 instead of i32. def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), @@ -5965,10 +5950,12 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> { imm:$src3))]>, OpSize; } -let Constraints = "$src1 = $dst" in - defm INSERTPS : SS41I_insertf32<0x21, "insertps">; -let Predicates = [HasAVX] in - defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V; +let ExeDomain = SSEPackedSingle in { + let Constraints = "$src1 = $dst" in + defm INSERTPS : SS41I_insertf32<0x21, "insertps">; + let Predicates = [HasAVX] in + defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V; +} def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3), (VINSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>, @@ -5985,6 +5972,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr, X86MemOperand x86memop, RegisterClass RC, PatFrag mem_frag32, PatFrag mem_frag64, Intrinsic V4F32Int, Intrinsic V2F64Int> { +let ExeDomain = SSEPackedSingle in { // Intrinsic operation, reg. // Vector intrinsic operation, reg def PSr : SS4AIi8<opcps, MRMSrcReg, @@ -5995,15 +5983,16 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr, OpSize; // Vector intrinsic operation, mem - def PSm : Ii8<opcps, MRMSrcMem, + def PSm : SS4AIi8<opcps, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>, - TA, OpSize, - Requires<[HasSSE41]>; + OpSize; +} // ExeDomain = SSEPackedSingle +let ExeDomain = SSEPackedDouble in { // Vector intrinsic operation, reg def PDr : SS4AIi8<opcpd, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), @@ -6020,44 +6009,14 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr, [(set RC:$dst, (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>, OpSize; -} - -multiclass sse41_fp_unop_rm_avx_p<bits<8> opcps, bits<8> opcpd, - RegisterClass RC, X86MemOperand x86memop, string OpcodeStr> { - // Intrinsic operation, reg. - // Vector intrinsic operation, reg - def PSr_AVX : SS4AIi8<opcps, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), - !strconcat(OpcodeStr, - "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, OpSize; - - // Vector intrinsic operation, mem - def PSm_AVX : Ii8<opcps, MRMSrcMem, - (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2), - !strconcat(OpcodeStr, - "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, TA, OpSize, Requires<[HasSSE41]>; - - // Vector intrinsic operation, reg - def PDr_AVX : SS4AIi8<opcpd, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), - !strconcat(OpcodeStr, - "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, OpSize; - - // Vector intrinsic operation, mem - def PDm_AVX : SS4AIi8<opcpd, MRMSrcMem, - (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2), - !strconcat(OpcodeStr, - "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, OpSize; +} // ExeDomain = SSEPackedDouble } multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd, string OpcodeStr, Intrinsic F32Int, Intrinsic F64Int, bit Is2Addr = 1> { +let ExeDomain = GenericDomain in { // Intrinsic operation, reg. def SSr : SS4AIi8<opcss, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), @@ -6103,37 +6062,7 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd, [(set VR128:$dst, (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>, OpSize; -} - -multiclass sse41_fp_binop_rm_avx_s<bits<8> opcss, bits<8> opcsd, - string OpcodeStr> { - // Intrinsic operation, reg. - def SSr_AVX : SS4AIi8<opcss, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, OpSize; - - // Intrinsic operation, mem. - def SSm_AVX : SS4AIi8<opcss, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, OpSize; - - // Intrinsic operation, reg. - def SDr_AVX : SS4AIi8<opcsd, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, OpSize; - - // Intrinsic operation, mem. - def SDm_AVX : SS4AIi8<opcsd, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, OpSize; +} // ExeDomain = GenericDomain } // FP round - roundss, roundps, roundsd, roundpd @@ -6150,13 +6079,6 @@ let Predicates = [HasAVX] in { defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround", int_x86_sse41_round_ss, int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG; - - // Instructions for the assembler - defm VROUND : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR128, f128mem, "vround">, - VEX; - defm VROUNDY : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR256, f256mem, "vround">, - VEX; - defm VROUND : sse41_fp_binop_rm_avx_s<0x0A, 0x0B, "vround">, VEX_4V, VEX_LIG; } defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128, @@ -6194,11 +6116,11 @@ def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2), let Defs = [EFLAGS] in { def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "ptest \t{$src2, $src1|$src1, $src2}", + "ptest\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>, OpSize; def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), - "ptest \t{$src2, $src1|$src1, $src2}", + "ptest\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>, OpSize; } @@ -6216,11 +6138,15 @@ multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC, } let Defs = [EFLAGS], Predicates = [HasAVX] in { +let ExeDomain = SSEPackedSingle in { defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>; defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>; +} +let ExeDomain = SSEPackedDouble in { defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>; defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>; } +} //===----------------------------------------------------------------------===// // SSE4.1 - Misc Instructions @@ -6391,10 +6317,12 @@ let Constraints = "$src1 = $dst" in { defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>; } -def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)), - (PCMPEQQrr VR128:$src1, VR128:$src2)>; -def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))), - (PCMPEQQrm VR128:$src1, addr:$src2)>; +let Predicates = [HasSSE41] in { + def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)), + (PCMPEQQrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))), + (PCMPEQQrm VR128:$src1, addr:$src2)>; +} /// SS48I_binop_rm - Simple SSE41 binary operator. multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, @@ -6470,23 +6398,30 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, let Predicates = [HasAVX] in { let isCommutable = 0 in { - defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps, - VR128, memopv16i8, i128mem, 0>, VEX_4V; - defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd, - VR128, memopv16i8, i128mem, 0>, VEX_4V; - defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps", - int_x86_avx_blend_ps_256, VR256, memopv32i8, i256mem, 0>, VEX_4V; - defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd", - int_x86_avx_blend_pd_256, VR256, memopv32i8, i256mem, 0>, VEX_4V; + let ExeDomain = SSEPackedSingle in { + defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps, + VR128, memopv16i8, i128mem, 0>, VEX_4V; + defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps", + int_x86_avx_blend_ps_256, VR256, memopv32i8, i256mem, 0>, VEX_4V; + } + let ExeDomain = SSEPackedDouble in { + defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd, + VR128, memopv16i8, i128mem, 0>, VEX_4V; + defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd", + int_x86_avx_blend_pd_256, VR256, memopv32i8, i256mem, 0>, VEX_4V; + } defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw, VR128, memopv16i8, i128mem, 0>, VEX_4V; defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw, VR128, memopv16i8, i128mem, 0>, VEX_4V; } + let ExeDomain = SSEPackedSingle in defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, VR128, memopv16i8, i128mem, 0>, VEX_4V; + let ExeDomain = SSEPackedDouble in defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd, VR128, memopv16i8, i128mem, 0>, VEX_4V; + let ExeDomain = SSEPackedSingle in defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256, VR256, memopv32i8, i256mem, 0>, VEX_4V; } @@ -6502,8 +6437,10 @@ let Predicates = [HasAVX2] in { let Constraints = "$src1 = $dst" in { let isCommutable = 0 in { + let ExeDomain = SSEPackedSingle in defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps, VR128, memopv16i8, i128mem>; + let ExeDomain = SSEPackedDouble in defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd, VR128, memopv16i8, i128mem>; defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw, @@ -6511,8 +6448,10 @@ let Constraints = "$src1 = $dst" in { defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw, VR128, memopv16i8, i128mem>; } + let ExeDomain = SSEPackedSingle in defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps, VR128, memopv16i8, i128mem>; + let ExeDomain = SSEPackedDouble in defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd, VR128, memopv16i8, i128mem>; } @@ -6539,16 +6478,20 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr, } let Predicates = [HasAVX] in { +let ExeDomain = SSEPackedDouble in { defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem, memopv16i8, int_x86_sse41_blendvpd>; -defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem, - memopv16i8, int_x86_sse41_blendvps>; -defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem, - memopv16i8, int_x86_sse41_pblendvb>; defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem, memopv32i8, int_x86_avx_blendv_pd_256>; +} // ExeDomain = SSEPackedDouble +let ExeDomain = SSEPackedSingle in { +defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem, + memopv16i8, int_x86_sse41_blendvps>; defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem, memopv32i8, int_x86_avx_blendv_ps_256>; +} // ExeDomain = SSEPackedSingle +defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem, + memopv16i8, int_x86_sse41_pblendvb>; } let Predicates = [HasAVX2] in { @@ -6612,7 +6555,9 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in { } } +let ExeDomain = SSEPackedDouble in defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>; +let ExeDomain = SSEPackedSingle in defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>; defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>; @@ -6712,10 +6657,12 @@ let Predicates = [HasAVX2] in { let Constraints = "$src1 = $dst" in defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>; -def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)), - (PCMPGTQrr VR128:$src1, VR128:$src2)>; -def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))), - (PCMPGTQrm VR128:$src1, addr:$src2)>; +let Predicates = [HasSSE42] in { + def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)), + (PCMPGTQrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))), + (PCMPGTQrm VR128:$src1, addr:$src2)>; +} //===----------------------------------------------------------------------===// // SSE4.2 - String/text Processing Instructions @@ -7164,21 +7111,27 @@ class avx2_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (Int VR128:$src))]>, VEX; -def VBROADCASTSSrm : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem, - int_x86_avx_vbroadcast_ss>; -def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem, - int_x86_avx_vbroadcast_ss_256>; +let ExeDomain = SSEPackedSingle in { + def VBROADCASTSSrm : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem, + int_x86_avx_vbroadcast_ss>; + def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem, + int_x86_avx_vbroadcast_ss_256>; +} +let ExeDomain = SSEPackedDouble in def VBROADCASTSDrm : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem, int_x86_avx_vbroadcast_sd_256>; def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem, int_x86_avx_vbroadcastf128_pd_256>; -def VBROADCASTSSrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR128, - int_x86_avx2_vbroadcast_ss_ps>; -def VBROADCASTSSYrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR256, - int_x86_avx2_vbroadcast_ss_ps_256>; +let ExeDomain = SSEPackedSingle in { + def VBROADCASTSSrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR128, + int_x86_avx2_vbroadcast_ss_ps>; + def VBROADCASTSSYrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR256, + int_x86_avx2_vbroadcast_ss_ps_256>; +} +let ExeDomain = SSEPackedDouble in def VBROADCASTSDrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256, - int_x86_avx2_vbroadcast_sd_pd_256>; + int_x86_avx2_vbroadcast_sd_pd_256>; let Predicates = [HasAVX2] in def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem, @@ -7187,19 +7140,6 @@ def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem, def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), (VBROADCASTF128 addr:$src)>; -def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), - (VBROADCASTSSYrm addr:$src)>; -def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), - (VBROADCASTSDrm addr:$src)>; -def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))), - (VBROADCASTSSYrm addr:$src)>; -def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))), - (VBROADCASTSDrm addr:$src)>; - -def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))), - (VBROADCASTSSrm addr:$src)>; -def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), - (VBROADCASTSSrm addr:$src)>; //===----------------------------------------------------------------------===// // VINSERTF128 - Insert packed floating-point values @@ -7300,8 +7240,7 @@ def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), // multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr, Intrinsic IntLd, Intrinsic IntLd256, - Intrinsic IntSt, Intrinsic IntSt256, - PatFrag pf128, PatFrag pf256> { + Intrinsic IntSt, Intrinsic IntSt256> { def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), @@ -7322,18 +7261,18 @@ multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr, [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V; } +let ExeDomain = SSEPackedSingle in defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps", int_x86_avx_maskload_ps, int_x86_avx_maskload_ps_256, int_x86_avx_maskstore_ps, - int_x86_avx_maskstore_ps_256, - memopv4f32, memopv8f32>; + int_x86_avx_maskstore_ps_256>; +let ExeDomain = SSEPackedDouble in defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd", int_x86_avx_maskload_pd, int_x86_avx_maskload_pd_256, int_x86_avx_maskstore_pd, - int_x86_avx_maskstore_pd_256, - memopv2f64, memopv4f64>; + int_x86_avx_maskstore_pd_256>; //===----------------------------------------------------------------------===// // VPERMIL - Permute Single and Double Floating-Point Values @@ -7361,22 +7300,26 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr, [(set RC:$dst, (IntImm (f_frag addr:$src1), imm:$src2))]>, VEX; } -defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem, - memopv4f32, memopv4i32, - int_x86_avx_vpermilvar_ps, - int_x86_avx_vpermil_ps>; -defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem, - memopv8f32, memopv8i32, - int_x86_avx_vpermilvar_ps_256, - int_x86_avx_vpermil_ps_256>; -defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem, - memopv2f64, memopv2i64, - int_x86_avx_vpermilvar_pd, - int_x86_avx_vpermil_pd>; -defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem, - memopv4f64, memopv4i64, - int_x86_avx_vpermilvar_pd_256, - int_x86_avx_vpermil_pd_256>; +let ExeDomain = SSEPackedSingle in { + defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem, + memopv4f32, memopv4i32, + int_x86_avx_vpermilvar_ps, + int_x86_avx_vpermil_ps>; + defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem, + memopv8f32, memopv8i32, + int_x86_avx_vpermilvar_ps_256, + int_x86_avx_vpermil_ps_256>; +} +let ExeDomain = SSEPackedDouble in { + defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem, + memopv2f64, memopv2i64, + int_x86_avx_vpermilvar_pd, + int_x86_avx_vpermil_pd>; + defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem, + memopv4f64, memopv4i64, + int_x86_avx_vpermilvar_pd_256, + int_x86_avx_vpermil_pd_256>; +} def : Pat<(v8f32 (X86VPermilpsy VR256:$src1, (i8 imm:$imm))), (VPERMILPSYri VR256:$src1, imm:$imm)>; @@ -7549,6 +7492,40 @@ defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64, int_x86_avx2_pbroadcastq_128, int_x86_avx2_pbroadcastq_256>; +let Predicates = [HasAVX2] in { + def : Pat<(v16i8 (X86VBroadcast (loadi8 addr:$src))), + (VPBROADCASTBrm addr:$src)>; + def : Pat<(v32i8 (X86VBroadcast (loadi8 addr:$src))), + (VPBROADCASTBYrm addr:$src)>; + def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))), + (VPBROADCASTWrm addr:$src)>; + def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))), + (VPBROADCASTWYrm addr:$src)>; + def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), + (VPBROADCASTDrm addr:$src)>; + def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), + (VPBROADCASTDYrm addr:$src)>; + def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))), + (VPBROADCASTQrm addr:$src)>; + def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), + (VPBROADCASTQYrm addr:$src)>; +} + +// AVX1 broadcast patterns +def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), + (VBROADCASTSSYrm addr:$src)>; +def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), + (VBROADCASTSDrm addr:$src)>; +def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))), + (VBROADCASTSSYrm addr:$src)>; +def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))), + (VBROADCASTSDrm addr:$src)>; + +def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))), + (VBROADCASTSSrm addr:$src)>; +def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), + (VBROADCASTSSrm addr:$src)>; + //===----------------------------------------------------------------------===// // VPERM - Permute instructions // @@ -7569,6 +7546,7 @@ multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, } defm VPERMD : avx2_perm<0x36, "vpermd", memopv8i32, int_x86_avx2_permd>; +let ExeDomain = SSEPackedSingle in defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, int_x86_avx2_permps>; multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, @@ -7588,6 +7566,7 @@ multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, defm VPERMQ : avx2_perm_imm<0x00, "vpermq", memopv4i64, int_x86_avx2_permq>, VEX_W; +let ExeDomain = SSEPackedDouble in defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, int_x86_avx2_permpd>, VEX_W; @@ -7643,8 +7622,7 @@ def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs), // multiclass avx2_pmovmask<string OpcodeStr, Intrinsic IntLd128, Intrinsic IntLd256, - Intrinsic IntSt128, Intrinsic IntSt256, - PatFrag pf128, PatFrag pf256> { + Intrinsic IntSt128, Intrinsic IntSt256> { def rm : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), @@ -7667,124 +7645,49 @@ defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd", int_x86_avx2_maskload_d, int_x86_avx2_maskload_d_256, int_x86_avx2_maskstore_d, - int_x86_avx2_maskstore_d_256, - memopv4i32, memopv8i32>; + int_x86_avx2_maskstore_d_256>; defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq", int_x86_avx2_maskload_q, int_x86_avx2_maskload_q_256, int_x86_avx2_maskstore_q, - int_x86_avx2_maskstore_q_256, - memopv2i64, memopv4i64>, VEX_W; + int_x86_avx2_maskstore_q_256>, VEX_W; //===----------------------------------------------------------------------===// // Variable Bit Shifts // -multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, - Intrinsic Int128, Intrinsic Int256> { +multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, + ValueType vt128, ValueType vt256> { def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128:$dst, (Int128 VR128:$src1, VR128:$src2))]>, VEX_4V; - def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, - (Int128 VR128:$src1, (bitconvert (memopv2i64 addr:$src2))))]>, - VEX_4V; - def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), - (ins VR256:$src1, VR256:$src2), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src2))]>, VEX_4V; - def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), - (ins VR256:$src1, i256mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, - (Int256 VR256:$src1, (bitconvert (memopv4i64 addr:$src2))))]>, + (vt128 (OpNode VR128:$src1, (vt128 VR128:$src2))))]>, VEX_4V; -} - -multiclass avx2_var_shift_i64<bits<8> opc, string OpcodeStr, - Intrinsic Int128, Intrinsic Int256> { - def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128:$dst, (Int128 VR128:$src1, VR128:$src2))]>, VEX_4V; def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, - (Int128 VR128:$src1, (memopv2i64 addr:$src2)))]>, + (vt128 (OpNode VR128:$src1, + (vt128 (bitconvert (memopv2i64 addr:$src2))))))]>, VEX_4V; def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src2))]>, VEX_4V; + [(set VR256:$dst, + (vt256 (OpNode VR256:$src1, (vt256 VR256:$src2))))]>, + VEX_4V; def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, - (Int256 VR256:$src1, (memopv4i64 addr:$src2)))]>, + (vt256 (OpNode VR256:$src1, + (vt256 (bitconvert (memopv4i64 addr:$src2))))))]>, VEX_4V; } -defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", int_x86_avx2_psllv_d, - int_x86_avx2_psllv_d_256>; -defm VPSLLVQ : avx2_var_shift_i64<0x47, "vpsllvq", int_x86_avx2_psllv_q, - int_x86_avx2_psllv_q_256>, VEX_W; -defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", int_x86_avx2_psrlv_d, - int_x86_avx2_psrlv_d_256>; -defm VPSRLVQ : avx2_var_shift_i64<0x45, "vpsrlvq", int_x86_avx2_psrlv_q, - int_x86_avx2_psrlv_q_256>, VEX_W; -defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", int_x86_avx2_psrav_d, - int_x86_avx2_psrav_d_256>; - -let Predicates = [HasAVX2] in { - def : Pat<(v4i32 (shl (v4i32 VR128:$src1), (v4i32 VR128:$src2))), - (VPSLLVDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2i64 (shl (v2i64 VR128:$src1), (v2i64 VR128:$src2))), - (VPSLLVQrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (srl (v4i32 VR128:$src1), (v4i32 VR128:$src2))), - (VPSRLVDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2i64 (srl (v2i64 VR128:$src1), (v2i64 VR128:$src2))), - (VPSRLVQrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (sra (v4i32 VR128:$src1), (v4i32 VR128:$src2))), - (VPSRAVDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i32 (shl (v8i32 VR256:$src1), (v8i32 VR256:$src2))), - (VPSLLVDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4i64 (shl (v4i64 VR256:$src1), (v4i64 VR256:$src2))), - (VPSLLVQYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (srl (v8i32 VR256:$src1), (v8i32 VR256:$src2))), - (VPSRLVDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4i64 (srl (v4i64 VR256:$src1), (v4i64 VR256:$src2))), - (VPSRLVQYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (sra (v8i32 VR256:$src1), (v8i32 VR256:$src2))), - (VPSRAVDYrr VR256:$src1, VR256:$src2)>; - - def : Pat<(v4i32 (shl (v4i32 VR128:$src1), - (v4i32 (bitconvert (memopv2i64 addr:$src2))))), - (VPSLLVDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2i64 (shl (v2i64 VR128:$src1), (memopv2i64 addr:$src2))), - (VPSLLVQrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (srl (v4i32 VR128:$src1), - (v4i32 (bitconvert (memopv2i64 addr:$src2))))), - (VPSRLVDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2i64 (srl (v2i64 VR128:$src1), (memopv2i64 addr:$src2))), - (VPSRLVQrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (sra (v4i32 VR128:$src1), - (v4i32 (bitconvert (memopv2i64 addr:$src2))))), - (VPSRAVDrm VR128:$src1, addr:$src2)>; - def : Pat<(v8i32 (shl (v8i32 VR256:$src1), - (v8i32 (bitconvert (memopv4i64 addr:$src2))))), - (VPSLLVDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (shl (v4i64 VR256:$src1), (memopv4i64 addr:$src2))), - (VPSLLVQYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8i32 (srl (v8i32 VR256:$src1), - (v8i32 (bitconvert (memopv4i64 addr:$src2))))), - (VPSRLVDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (srl (v4i64 VR256:$src1), (memopv4i64 addr:$src2))), - (VPSRLVQYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8i32 (sra (v8i32 VR256:$src1), - (v8i32 (bitconvert (memopv4i64 addr:$src2))))), - (VPSRAVDYrm VR256:$src1, addr:$src2)>; -} +defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>; +defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W; +defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>; +defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W; +defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>; diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 328cf67..81ee665 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -376,6 +376,7 @@ ReSimplify: case X86::AVX_SET0PSY: LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break; case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break; case X86::AVX_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDrr); break; + case X86::AVX2_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDYrr);break; case X86::MOV16r0: LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0 diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 763fb43..e93f8e9 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -190,6 +190,10 @@ public: bool hasAVX2() const { return HasAVX2; } bool hasXMM() const { return hasSSE1() || hasAVX(); } bool hasXMMInt() const { return hasSSE2() || hasAVX(); } + bool hasSSE3orAVX() const { return hasSSE3() || hasAVX(); } + bool hasSSSE3orAVX() const { return hasSSSE3() || hasAVX(); } + bool hasSSE41orAVX() const { return hasSSE41() || hasAVX(); } + bool hasSSE42orAVX() const { return hasSSE42() || hasAVX(); } bool hasAES() const { return HasAES; } bool hasCLMUL() const { return HasCLMUL; } bool hasFMA3() const { return HasFMA3; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 4d4d7c0..1c9f3bd 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -31,8 +31,9 @@ extern "C" void LLVMInitializeX86Target() { X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : X86TargetMachine(T, TT, CPU, FS, RM, CM, false), + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : X86TargetMachine(T, TT, CPU, FS, RM, CM, OL, false), DataLayout(getSubtargetImpl()->isTargetDarwin() ? "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-" "n8:16:32-S128" : @@ -51,8 +52,9 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT, X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : X86TargetMachine(T, TT, CPU, FS, RM, CM, true), + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : X86TargetMachine(T, TT, CPU, FS, RM, CM, OL, true), DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-" "n8:16:32:64-S128"), InstrInfo(*this), @@ -66,8 +68,9 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT, X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool is64Bit) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), Subtarget(TT, CPU, FS, StackAlignmentOverride, is64Bit), FrameLowering(*this, Subtarget), ELFWriterInfo(is64Bit, true) { @@ -102,16 +105,15 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, static cl::opt<bool> UseVZeroUpper("x86-use-vzeroupper", cl::desc("Minimize AVX to SSE transition penalty"), - cl::init(false)); + cl::init(true)); //===----------------------------------------------------------------------===// // Pass Pipeline Configuration //===----------------------------------------------------------------------===// -bool X86TargetMachine::addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool X86TargetMachine::addInstSelector(PassManagerBase &PM) { // Install an instruction selector. - PM.add(createX86ISelDag(*this, OptLevel)); + PM.add(createX86ISelDag(*this, getOptLevel())); // For 32-bit, prepend instructions to set the "global base reg" for PIC. if (!Subtarget.is64Bit()) @@ -120,33 +122,21 @@ bool X86TargetMachine::addInstSelector(PassManagerBase &PM, return false; } -bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM) { PM.add(createX86MaxStackAlignmentHeuristicPass()); return false; // -print-machineinstr shouldn't print after this. } -bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM) { PM.add(createX86FloatingPointStackifierPass()); return true; // -print-machineinstr should print after this. } -bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM) { bool ShouldPrint = false; - if (OptLevel != CodeGenOpt::None) { - if (Subtarget.hasXMMInt()) { - PM.add(createExecutionDependencyFixPass(&X86::VR128RegClass)); - ShouldPrint = true; - } - if (Subtarget.hasAVX2()) { - // FIXME this should be turned on for just AVX, but the pass doesn't check - // that instructions are valid before replacing them and there are AVX2 - // integer instructions in the table. - PM.add(createExecutionDependencyFixPass(&X86::VR256RegClass)); - ShouldPrint = true; - } + if (getOptLevel() != CodeGenOpt::None && Subtarget.hasXMMInt()) { + PM.add(createExecutionDependencyFixPass(&X86::VR128RegClass)); + ShouldPrint = true; } if (Subtarget.hasAVX() && UseVZeroUpper) { @@ -158,7 +148,6 @@ bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM, } bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, JITCodeEmitter &JCE) { PM.add(createX86JITCodeEmitterPass(*this, JCE)); diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index d1569aa..64be458 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -40,6 +40,7 @@ public: X86TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool is64Bit); virtual const X86InstrInfo *getInstrInfo() const { @@ -66,11 +67,11 @@ public: } // Set up the pass pipeline. - virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, + virtual bool addInstSelector(PassManagerBase &PM); + virtual bool addPreRegAlloc(PassManagerBase &PM); + virtual bool addPostRegAlloc(PassManagerBase &PM); + virtual bool addPreEmitPass(PassManagerBase &PM); + virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE); }; @@ -85,7 +86,8 @@ class X86_32TargetMachine : public X86TargetMachine { public: X86_32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); virtual const TargetData *getTargetData() const { return &DataLayout; } virtual const X86TargetLowering *getTargetLowering() const { return &TLInfo; @@ -112,7 +114,8 @@ class X86_64TargetMachine : public X86TargetMachine { public: X86_64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); virtual const TargetData *getTargetData() const { return &DataLayout; } virtual const X86TargetLowering *getTargetLowering() const { return &TLInfo; diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp index 276e841..7d5fcce 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp @@ -61,9 +61,10 @@ static MCAsmInfo *createXCoreMCAsmInfo(const Target &T, StringRef TT) { } static MCCodeGenInfo *createXCoreMCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM) { + CodeModel::Model CM, + CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); - X->InitMCCodeGenInfo(RM, CM); + X->InitMCCodeGenInfo(RM, CM, OL); return X; } diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index fdc5d35..eec3674 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -21,8 +21,9 @@ using namespace llvm; /// XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), Subtarget(TT, CPU, FS), DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-" "i16:16:32-i32:32:32-i64:32:32-n32"), @@ -32,8 +33,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT, TSInfo(*this) { } -bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM) { PM.add(createXCoreISelDag(*this)); return false; } diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h index 83d09d6..3f2644d 100644 --- a/lib/Target/XCore/XCoreTargetMachine.h +++ b/lib/Target/XCore/XCoreTargetMachine.h @@ -34,7 +34,8 @@ class XCoreTargetMachine : public LLVMTargetMachine { public: XCoreTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const XCoreFrameLowering *getFrameLowering() const { @@ -55,7 +56,7 @@ public: virtual const TargetData *getTargetData() const { return &DataLayout; } // Pass Pipeline Configuration - virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addInstSelector(PassManagerBase &PM); }; } // end namespace llvm |