From 41418d17cced656f91038b2482bc9d173b4974b0 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Wed, 7 Aug 2013 22:49:12 +0000 Subject: Add ISD::FROUND for libm round() All libm floating-point rounding functions, except for round(), had their own ISD nodes. Recent PowerPC cores have an instruction for round(), and so here I'm adding ISD::FROUND so that round() can be custom lowered as well. For the most part, this is straightforward. I've added an intrinsic and a matching ISD node just like those for nearbyint() and friends. The SelectionDAG pattern I've named frnd (because ISD::FP_ROUND has already claimed fround). This will be used by the PowerPC backend in a follow-up commit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187926 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCCTRLoops.cpp | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index 4e30c537..05dad8a 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -259,6 +259,7 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; case Intrinsic::rint: Opcode = ISD::FRINT; break; case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; + case Intrinsic::round: Opcode = ISD::FROUND; break; } } @@ -309,6 +310,10 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { case LibFunc::rintf: case LibFunc::rintl: Opcode = ISD::FRINT; break; + case LibFunc::round: + case LibFunc::roundf: + case LibFunc::roundl: + Opcode = ISD::FROUND; break; case LibFunc::trunc: case LibFunc::truncf: case LibFunc::truncl: -- cgit v1.1 From 05a4d2642b415e3332651733015b656bc3c7b9bc Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Thu, 8 Aug 2013 04:31:34 +0000 Subject: PPC: Map frin to round() not nearbyint() and rint() Making use of the recently-added ISD::FROUND, which allows for custom lowering of round(), the PPC backend will now map frin to round(). Previously, we had been using frin to lower nearbyint() (and rint() via some custom lowering to handle the extra fenv flags requirements), but only in fast-math mode because frin does not tie-to-even. Several users had complained about this behavior, and this new mapping of frin to round is certainly more appropriate (and does not require fast-math mode). In effect, this reverts r178362 (and part of r178337, replacing the nearbyint mapping with the round mapping). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187960 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 58 ++-------------------------------- lib/Target/PowerPC/PPCInstrInfo.td | 14 ++------ 2 files changed, 4 insertions(+), 68 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 664dd12..886def1 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -156,21 +156,12 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FFLOOR, MVT::f64, Legal); setOperationAction(ISD::FCEIL, MVT::f64, Legal); setOperationAction(ISD::FTRUNC, MVT::f64, Legal); + setOperationAction(ISD::FROUND, MVT::f64, Legal); setOperationAction(ISD::FFLOOR, MVT::f32, Legal); setOperationAction(ISD::FCEIL, MVT::f32, Legal); setOperationAction(ISD::FTRUNC, MVT::f32, Legal); - - // frin does not implement "ties to even." Thus, this is safe only in - // fast-math mode. - if (TM.Options.UnsafeFPMath) { - setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); - setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); - - // These need to set FE_INEXACT, and use a custom inserter. - setOperationAction(ISD::FRINT, MVT::f64, Legal); - setOperationAction(ISD::FRINT, MVT::f32, Legal); - } + setOperationAction(ISD::FROUND, MVT::f32, Legal); } // PowerPC does not have BSWAP, CTPOP or CTTZ @@ -6676,51 +6667,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Restore FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg); - } else if (MI->getOpcode() == PPC::FRINDrint || - MI->getOpcode() == PPC::FRINSrint) { - bool isf32 = MI->getOpcode() == PPC::FRINSrint; - unsigned Dest = MI->getOperand(0).getReg(); - unsigned Src = MI->getOperand(1).getReg(); - DebugLoc dl = MI->getDebugLoc(); - - MachineRegisterInfo &RegInfo = F->getRegInfo(); - unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); - - // Perform the rounding. - BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FRINS : PPC::FRIND), Dest) - .addReg(Src); - - // Compare the results. - BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FCMPUS : PPC::FCMPUD), CRReg) - .addReg(Dest).addReg(Src); - - // If the results were not equal, then set the FPSCR XX bit. - MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, midMBB); - F->insert(It, exitMBB); - exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - BuildMI(*BB, MI, dl, TII->get(PPC::BCC)) - .addImm(PPC::PRED_EQ).addReg(CRReg).addMBB(exitMBB); - - BB->addSuccessor(midMBB); - BB->addSuccessor(exitMBB); - - BB = midMBB; - - // Set the FPSCR XX bit (FE_INEXACT). Note that we cannot just set - // the FI bit here because that will not automatically set XX also, - // and XX is what libm interprets as the FE_INEXACT flag. - BuildMI(BB, dl, TII->get(PPC::MTFSB1)).addImm(/* 38 - 32 = */ 6); - BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); - - BB->addSuccessor(exitMBB); - - BB = exitMBB; } else { llvm_unreachable("Unexpected instr type to insert"); } diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 398a11b..84ddb3f 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -1686,23 +1686,13 @@ let Uses = [RM] in { "frsp", "$frD, $frB", FPGeneral, [(set f32:$frD, (fround f64:$frB))]>; - // The frin -> nearbyint mapping is valid only in fast-math mode. let Interpretation64Bit = 1 in defm FRIND : XForm_26r<63, 392, (outs f8rc:$frD), (ins f8rc:$frB), "frin", "$frD, $frB", FPGeneral, - [(set f64:$frD, (fnearbyint f64:$frB))]>; + [(set f64:$frD, (frnd f64:$frB))]>; defm FRINS : XForm_26r<63, 392, (outs f4rc:$frD), (ins f4rc:$frB), "frin", "$frD, $frB", FPGeneral, - [(set f32:$frD, (fnearbyint f32:$frB))]>; - } - - // These pseudos expand to rint but also set FE_INEXACT when the result does - // not equal the argument. - let usesCustomInserter = 1, Defs = [RM] in { // FIXME: Model FPSCR! - def FRINDrint : Pseudo<(outs f8rc:$frD), (ins f8rc:$frB), - "#FRINDrint", [(set f64:$frD, (frint f64:$frB))]>; - def FRINSrint : Pseudo<(outs f4rc:$frD), (ins f4rc:$frB), - "#FRINSrint", [(set f32:$frD, (frint f32:$frB))]>; + [(set f32:$frD, (frnd f32:$frB))]>; } let neverHasSideEffects = 1 in { -- cgit v1.1 From 868bed99674cf19e5cb475945f3067f0f4cc421e Mon Sep 17 00:00:00 2001 From: Mihai Popa Date: Thu, 8 Aug 2013 10:20:41 +0000 Subject: The name "tCDP" isn't used anywhere else in the source code, so renaming it for consistency doesn't cause any problems. This is the only Thumb2 instruction defined with "t" prefix; all other Thumb2 instructions have "t2" prefix (e.g. "t2CDP2" which is defined immediately afterwards). Patch by Artyom Skrobov. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187973 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrThumb2.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 84086a5..7de0901 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3943,7 +3943,7 @@ def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1>; // Other Coprocessor Instructions. // -def tCDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, +def t2CDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, -- cgit v1.1 From 72feef14f16d138114040e486fb2c1ef7a9358e0 Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Thu, 8 Aug 2013 15:19:25 +0000 Subject: Fix the comment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187984 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 2703274..e3db491 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -53,7 +53,7 @@ namespace llvm { /// to X86::XORPS or X86::XORPD. FXOR, - /// FAND - Bitwise logical ANDNOT of floating point values. This + /// FANDN - Bitwise logical ANDNOT of floating point values. This /// corresponds to X86::ANDNPS or X86::ANDNPD. FANDN, -- cgit v1.1 From cee35382331d792ed9354bb7fe0f3e10b09f46a3 Mon Sep 17 00:00:00 2001 From: Silviu Baranga Date: Thu, 8 Aug 2013 15:47:33 +0000 Subject: Remove the now redundant FeatureFP16 from the Cortex-A15 feature list. It was made redundant when FeatureVFP4 was added which implies FP16. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187985 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARM.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index e5da3a5..067ad13 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -179,7 +179,7 @@ def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift", // FIXME: It has not been determined if A15 has these features. def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15", "Cortex-A15 ARM processors", - [FeatureT2XtPk, FeatureFP16, FeatureVFP4, + [FeatureT2XtPk, FeatureVFP4, FeatureAvoidPartialCPSR, FeatureTrustZone]>; def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5", -- cgit v1.1 From 91955e78e718b4a7d2272ce476e29774f6ac84ea Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Thu, 8 Aug 2013 15:48:46 +0000 Subject: Use pop_back() instead of pop_back_val() when the returned value is not used. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187986 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmParser/X86AsmParser.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index ad83d97..ced12fc 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -80,7 +80,7 @@ private: PostfixStack.push_back(std::make_pair(Op, Val)); } - void popOperator() { InfixOperatorStack.pop_back_val(); } + void popOperator() { InfixOperatorStack.pop_back(); } void pushOperator(InfixCalculatorTok Op) { // Push the new operator if the stack is empty. if (InfixOperatorStack.empty()) { @@ -118,12 +118,12 @@ private: if (StackOp == IC_RPAREN) { ++ParenCount; - InfixOperatorStack.pop_back_val(); + InfixOperatorStack.pop_back(); } else if (StackOp == IC_LPAREN) { --ParenCount; - InfixOperatorStack.pop_back_val(); + InfixOperatorStack.pop_back(); } else { - InfixOperatorStack.pop_back_val(); + InfixOperatorStack.pop_back(); PostfixStack.push_back(std::make_pair(StackOp, 0)); } } -- cgit v1.1 From 7114e2e7cff35364230795123d9049b96098725e Mon Sep 17 00:00:00 2001 From: Niels Ole Salscheider Date: Thu, 8 Aug 2013 16:06:08 +0000 Subject: R600/SI: Implement sint<->fp64 conversions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187987 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstrInfo.td | 6 ++++++ lib/Target/R600/SIInstructions.td | 8 ++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 52af79c..302fa24 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -184,6 +184,12 @@ multiclass VOP1_32 op, string opName, list pattern> multiclass VOP1_64 op, string opName, list pattern> : VOP1_Helper ; +multiclass VOP1_32_64 op, string opName, list pattern> + : VOP1_Helper ; + +multiclass VOP1_64_32 op, string opName, list pattern> + : VOP1_Helper ; + multiclass VOP2_Helper op, RegisterClass vrc, RegisterClass arc, string opName, list pattern, string revOp> { def _e32 : VOP2 < diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 500d15e..efe7a3e 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -603,8 +603,12 @@ defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>; } // End neverHasSideEffects = 1, isMoveImm = 1 defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>; -//defm V_CVT_I32_F64 : VOP1_32 <0x00000003, "V_CVT_I32_F64", []>; -//defm V_CVT_F64_I32 : VOP1_64 <0x00000004, "V_CVT_F64_I32", []>; +defm V_CVT_I32_F64 : VOP1_32_64 <0x00000003, "V_CVT_I32_F64", + [(set i32:$dst, (fp_to_sint f64:$src0))] +>; +defm V_CVT_F64_I32 : VOP1_64_32 <0x00000004, "V_CVT_F64_I32", + [(set f64:$dst, (sint_to_fp i32:$src0))] +>; defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32", [(set f32:$dst, (sint_to_fp i32:$src0))] >; -- cgit v1.1 From 014773626d2678868adf696ac58c44d2b2980fa8 Mon Sep 17 00:00:00 2001 From: Niels Ole Salscheider Date: Thu, 8 Aug 2013 16:06:15 +0000 Subject: R600/SI: Implement fp32<->fp64 conversions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187988 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIISelLowering.cpp | 3 +++ lib/Target/R600/SIInstructions.td | 8 ++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index c64027f..b714fc1 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -85,6 +85,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); setTargetDAGCombine(ISD::SELECT_CC); diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index efe7a3e..dc41885 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -625,8 +625,12 @@ defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>; //defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "V_CVT_RPI_I32_F32", []>; //defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "V_CVT_FLR_I32_F32", []>; //defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "V_CVT_OFF_F32_I4", []>; -//defm V_CVT_F32_F64 : VOP1_32 <0x0000000f, "V_CVT_F32_F64", []>; -//defm V_CVT_F64_F32 : VOP1_64 <0x00000010, "V_CVT_F64_F32", []>; +defm V_CVT_F32_F64 : VOP1_32_64 <0x0000000f, "V_CVT_F32_F64", + [(set f32:$dst, (fround f64:$src0))] +>; +defm V_CVT_F64_F32 : VOP1_64_32 <0x00000010, "V_CVT_F64_F32", + [(set f64:$dst, (fextend f32:$src0))] +>; //defm V_CVT_F32_UBYTE0 : VOP1_32 <0x00000011, "V_CVT_F32_UBYTE0", []>; //defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1", []>; //defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2", []>; -- cgit v1.1 From d4f9d05fde4b2cfd202a5852ec1ec3e960ef53ed Mon Sep 17 00:00:00 2001 From: David Fang Date: Thu, 8 Aug 2013 20:14:40 +0000 Subject: initial draft of PPCMachObjectWriter.cpp this records relocation entries in the mach-o object file for PIC code generation. tested on powerpc-darwin8, validated against darwin otool -rvV git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188004 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt | 1 + lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp | 24 +- lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h | 4 + .../PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp | 385 +++++++++++++++++++++ 4 files changed, 395 insertions(+), 19 deletions(-) create mode 100644 lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt index 45be471..3efa5ec 100644 --- a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt @@ -5,6 +5,7 @@ add_llvm_library(LLVMPowerPCDesc PPCMCCodeEmitter.cpp PPCMCExpr.cpp PPCPredicates.cpp + PPCMachObjectWriter.cpp PPCELFObjectWriter.cpp ) diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index b2a8701..dd61954 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -69,19 +69,6 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { } namespace { -class PPCMachObjectWriter : public MCMachObjectTargetWriter { -public: - PPCMachObjectWriter(bool Is64Bit, uint32_t CPUType, - uint32_t CPUSubtype) - : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {} - - void RecordRelocation(MachObjectWriter *Writer, - const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFragment *Fragment, const MCFixup &Fixup, - MCValue Target, uint64_t &FixedValue) { - llvm_unreachable("Relocation emission for MachO/PPC unimplemented!"); - } -}; class PPCAsmBackend : public MCAsmBackend { const Target &TheTarget; @@ -174,12 +161,11 @@ namespace { MCObjectWriter *createObjectWriter(raw_ostream &OS) const { bool is64 = getPointerSize() == 8; - return createMachObjectWriter(new PPCMachObjectWriter( - /*Is64Bit=*/is64, - (is64 ? object::mach::CTM_PowerPC64 : - object::mach::CTM_PowerPC), - object::mach::CSPPC_ALL), - OS, /*IsLittleEndian=*/false); + return createPPCMachObjectWriter( + OS, + /*Is64Bit=*/is64, + (is64 ? object::mach::CTM_PowerPC64 : object::mach::CTM_PowerPC), + object::mach::CSPPC_ALL); } virtual bool doesSectionRequireSymbols(const MCSection &Section) const { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index 9f29132..16c3cb4 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -46,6 +46,10 @@ MCAsmBackend *createPPCAsmBackend(const Target &T, StringRef TT, StringRef CPU); MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS, bool Is64Bit, uint8_t OSABI); +/// createPPCELFObjectWriter - Construct a PPC Mach-O object writer. +MCObjectWriter *createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit, + uint32_t CPUType, + uint32_t CPUSubtype); } // End llvm namespace // Generated files will use "namespace PPC". To avoid symbol clash, diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp new file mode 100644 index 0000000..c907b2b --- /dev/null +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp @@ -0,0 +1,385 @@ +//===-- PPCMachObjectWriter.cpp - PPC Mach-O Writer -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/PPCMCTargetDesc.h" +#include "MCTargetDesc/PPCFixupKinds.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCMachObjectWriter.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Object/MachOFormat.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" + +using namespace llvm; +using namespace llvm::object; + +namespace { +class PPCMachObjectWriter : public MCMachObjectTargetWriter { + bool RecordScatteredRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, MCValue Target, + unsigned Log2Size, uint64_t &FixedValue); + + void RecordPPCRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, const MCFixup &Fixup, + MCValue Target, uint64_t &FixedValue); + +public: + PPCMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) + : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype, + /*UseAggressiveSymbolFolding=*/Is64Bit) {} + + void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, + const MCAsmLayout &Layout, const MCFragment *Fragment, + const MCFixup &Fixup, MCValue Target, + uint64_t &FixedValue) { + if (Writer->is64Bit()) { + report_fatal_error("Relocation emission for MachO/PPC64 unimplemented."); + } else + RecordPPCRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, + FixedValue); + } +}; +} + +/// computes the log2 of the size of the relocation, +/// used for relocation_info::r_length. +static unsigned getFixupKindLog2Size(unsigned Kind) { + switch (Kind) { + default: + report_fatal_error("log2size(FixupKind): Unhandled fixup kind!"); + case FK_PCRel_1: + case FK_Data_1: + return 0; + case FK_PCRel_2: + case FK_Data_2: + return 1; + case FK_PCRel_4: + case PPC::fixup_ppc_brcond14: + case PPC::fixup_ppc_half16: + case PPC::fixup_ppc_br24: + case FK_Data_4: + return 2; + case FK_PCRel_8: + case FK_Data_8: + return 3; + } + return 0; +} + +/// Translates generic PPC fixup kind to Mach-O/PPC relocation type enum. +/// Outline based on PPCELFObjectWriter::getRelocTypeInner(). +static unsigned getRelocType(const MCValue &Target, + const MCFixupKind FixupKind, // from + // Fixup.getKind() + const bool IsPCRel) { + const MCSymbolRefExpr::VariantKind Modifier = + Target.isAbsolute() ? MCSymbolRefExpr::VK_None + : Target.getSymA()->getKind(); + // determine the type of the relocation + unsigned Type = macho::RIT_Vanilla; + if (IsPCRel) { // relative to PC + switch ((unsigned)FixupKind) { + default: + report_fatal_error("Unimplemented fixup kind (relative)"); + case PPC::fixup_ppc_br24: + Type = macho::RIT_PPC_BR24; // R_PPC_REL24 + break; + case PPC::fixup_ppc_brcond14: + Type = macho::RIT_PPC_BR14; + break; + case PPC::fixup_ppc_half16: + switch (Modifier) { + default: + llvm_unreachable("Unsupported modifier for half16 fixup"); + case MCSymbolRefExpr::VK_PPC_HA: + Type = macho::RIT_PPC_HA16; + break; + case MCSymbolRefExpr::VK_PPC_LO: + Type = macho::RIT_PPC_LO16; + break; + case MCSymbolRefExpr::VK_PPC_HI: + Type = macho::RIT_PPC_HI16; + break; + } + break; + } + } else { + switch ((unsigned)FixupKind) { + default: + report_fatal_error("Unimplemented fixup kind (absolute)!"); + case PPC::fixup_ppc_half16: + switch (Modifier) { + default: + llvm_unreachable("Unsupported modifier for half16 fixup"); + case MCSymbolRefExpr::VK_PPC_HA: + Type = macho::RIT_PPC_HA16_SECTDIFF; + break; + case MCSymbolRefExpr::VK_PPC_LO: + Type = macho::RIT_PPC_LO16_SECTDIFF; + break; + case MCSymbolRefExpr::VK_PPC_HI: + Type = macho::RIT_PPC_HI16_SECTDIFF; + break; + } + break; + case FK_Data_4: + break; + case FK_Data_2: + break; + } + } + return Type; +} + +static void makeRelocationInfo(macho::RelocationEntry &MRE, + const uint32_t FixupOffset, const uint32_t Index, + const unsigned IsPCRel, const unsigned Log2Size, + const unsigned IsExtern, const unsigned Type) { + MRE.Word0 = FixupOffset; + // The bitfield offsets that work (as determined by trial-and-error) + // are different than what is documented in the mach-o manuals. + // Is this an endianness issue w/ PPC? + MRE.Word1 = ((Index << 8) | // was << 0 + (IsPCRel << 7) | // was << 24 + (Log2Size << 5) | // was << 25 + (IsExtern << 4) | // was << 27 + (Type << 0)); // was << 28 +} + +static void +makeScatteredRelocationInfo(macho::RelocationEntry &MRE, const uint32_t Addr, + const unsigned Type, const unsigned Log2Size, + const unsigned IsPCRel, const uint32_t Value2) { + // For notes on bitfield positions and endianness, see: + // https://developer.apple.com/library/mac/documentation/developertools/conceptual/MachORuntime/Reference/reference.html#//apple_ref/doc/uid/20001298-scattered_relocation_entry + MRE.Word0 = ((Addr << 0) | (Type << 24) | (Log2Size << 28) | (IsPCRel << 30) | + macho::RF_Scattered); + MRE.Word1 = Value2; +} + +/// Compute fixup offset (address). +static uint32_t getFixupOffset(const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup) { + uint32_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); + // On Mach-O, ppc_fixup_half16 relocations must refer to the + // start of the instruction, not the second halfword, as ELF does + if (Fixup.getKind() == PPC::fixup_ppc_half16) + FixupOffset &= ~uint32_t(3); + return FixupOffset; +} + +/// \return false if falling back to using non-scattered relocation, +/// otherwise true for normal scattered relocation. +/// based on X86MachObjectWriter::RecordScatteredRelocation +/// and ARMMachObjectWriter::RecordScatteredRelocation +bool PPCMachObjectWriter::RecordScatteredRelocation( + MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, + unsigned Log2Size, uint64_t &FixedValue) { + // caller already computes these, can we just pass and reuse? + const uint32_t FixupOffset = getFixupOffset(Layout, Fragment, Fixup); + const MCFixupKind FK = Fixup.getKind(); + const unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, FK); + const unsigned Type = getRelocType(Target, FK, IsPCRel); + + // Is this a local or SECTDIFF relocation entry? + // SECTDIFF relocation entries have symbol subtractions, + // and require two entries, the first for the add-symbol value, + // the second for the subtract-symbol value. + + // See . + const MCSymbol *A = &Target.getSymA()->getSymbol(); + MCSymbolData *A_SD = &Asm.getSymbolData(*A); + + if (!A_SD->getFragment()) + report_fatal_error("symbol '" + A->getName() + + "' can not be undefined in a subtraction expression"); + + uint32_t Value = Writer->getSymbolAddress(A_SD, Layout); + uint64_t SecAddr = + Writer->getSectionAddress(A_SD->getFragment()->getParent()); + FixedValue += SecAddr; + uint32_t Value2 = 0; + + if (const MCSymbolRefExpr *B = Target.getSymB()) { + MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); + + if (!B_SD->getFragment()) + report_fatal_error("symbol '" + B->getSymbol().getName() + + "' can not be undefined in a subtraction expression"); + + // FIXME: is Type correct? see include/llvm/Object/MachOFormat.h + Value2 = Writer->getSymbolAddress(B_SD, Layout); + FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent()); + } + // FIXME: does FixedValue get used?? + + // Relocations are written out in reverse order, so the PAIR comes first. + if (Type == macho::RIT_PPC_SECTDIFF || Type == macho::RIT_PPC_HI16_SECTDIFF || + Type == macho::RIT_PPC_LO16_SECTDIFF || + Type == macho::RIT_PPC_HA16_SECTDIFF || + Type == macho::RIT_PPC_LO14_SECTDIFF || + Type == macho::RIT_PPC_LOCAL_SECTDIFF) { + // X86 had this piece, but ARM does not + // If the offset is too large to fit in a scattered relocation, + // we're hosed. It's an unfortunate limitation of the MachO format. + if (FixupOffset > 0xffffff) { + char Buffer[32]; + format("0x%x", FixupOffset).print(Buffer, sizeof(Buffer)); + Asm.getContext().FatalError(Fixup.getLoc(), + Twine("Section too large, can't encode " + "r_address (") + + Buffer + ") into 24 bits of scattered " + "relocation entry."); + llvm_unreachable("fatal error returned?!"); + } + + // Is this supposed to follow MCTarget/PPCAsmBackend.cpp:adjustFixupValue()? + // see PPCMCExpr::EvaluateAsRelocatableImpl() + uint32_t other_half = 0; + switch (Type) { + case macho::RIT_PPC_LO16_SECTDIFF: + other_half = (FixedValue >> 16) & 0xffff; + // applyFixupOffset longer extracts the high part because it now assumes + // this was already done. + // It looks like this is not true for the FixedValue needed with Mach-O + // relocs. + // So we need to adjust FixedValue again here. + FixedValue &= 0xffff; + break; + case macho::RIT_PPC_HA16_SECTDIFF: + other_half = FixedValue & 0xffff; + FixedValue = + ((FixedValue >> 16) + ((FixedValue & 0x8000) ? 1 : 0)) & 0xffff; + break; + case macho::RIT_PPC_HI16_SECTDIFF: + other_half = FixedValue & 0xffff; + FixedValue = (FixedValue >> 16) & 0xffff; + break; + default: + llvm_unreachable("Invalid PPC scattered relocation type."); + break; + } + + macho::RelocationEntry MRE; + makeScatteredRelocationInfo(MRE, other_half, macho::RIT_Pair, Log2Size, + IsPCRel, Value2); + Writer->addRelocation(Fragment->getParent(), MRE); + } else { + // If the offset is more than 24-bits, it won't fit in a scattered + // relocation offset field, so we fall back to using a non-scattered + // relocation. This is a bit risky, as if the offset reaches out of + // the block and the linker is doing scattered loading on this + // symbol, things can go badly. + // + // Required for 'as' compatibility. + if (FixupOffset > 0xffffff) + return false; + } + macho::RelocationEntry MRE; + makeScatteredRelocationInfo(MRE, FixupOffset, Type, Log2Size, IsPCRel, Value); + Writer->addRelocation(Fragment->getParent(), MRE); + return true; +} + +// see PPCELFObjectWriter for a general outline of cases +void PPCMachObjectWriter::RecordPPCRelocation( + MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, + uint64_t &FixedValue) { + const MCFixupKind FK = Fixup.getKind(); // unsigned + const unsigned Log2Size = getFixupKindLog2Size(FK); + const bool IsPCRel = Writer->isFixupKindPCRel(Asm, FK); + const unsigned RelocType = getRelocType(Target, FK, IsPCRel); + + // If this is a difference or a defined symbol plus an offset, then we need a + // scattered relocation entry. Differences always require scattered + // relocations. + if (Target.getSymB() && + // Q: are branch targets ever scattered? + RelocType != macho::RIT_PPC_BR24 && RelocType != macho::RIT_PPC_BR14) { + RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, + Log2Size, FixedValue); + return; + } + + // this doesn't seem right for RIT_PPC_BR24 + // Get the symbol data, if any. + MCSymbolData *SD = 0; + if (Target.getSymA()) + SD = &Asm.getSymbolData(Target.getSymA()->getSymbol()); + + // See . + const uint32_t FixupOffset = getFixupOffset(Layout, Fragment, Fixup); + unsigned Index = 0; + unsigned IsExtern = 0; + unsigned Type = RelocType; + + if (Target.isAbsolute()) { // constant + // SymbolNum of 0 indicates the absolute section. + // + // FIXME: Currently, these are never generated (see code below). I cannot + // find a case where they are actually emitted. + report_fatal_error("FIXME: relocations to absolute targets " + "not yet implemented"); + // the above line stolen from ARM, not sure + } else { + // Resolve constant variables. + if (SD->getSymbol().isVariable()) { + int64_t Res; + if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute( + Res, Layout, Writer->getSectionAddressMap())) { + FixedValue = Res; + return; + } + } + + // Check whether we need an external or internal relocation. + if (Writer->doesSymbolRequireExternRelocation(SD)) { + IsExtern = 1; + Index = SD->getIndex(); + // For external relocations, make sure to offset the fixup value to + // compensate for the addend of the symbol address, if it was + // undefined. This occurs with weak definitions, for example. + if (!SD->Symbol->isUndefined()) + FixedValue -= Layout.getSymbolOffset(SD); + } else { + // The index is the section ordinal (1-based). + const MCSectionData &SymSD = + Asm.getSectionData(SD->getSymbol().getSection()); + Index = SymSD.getOrdinal() + 1; + FixedValue += Writer->getSectionAddress(&SymSD); + } + if (IsPCRel) + FixedValue -= Writer->getSectionAddress(Fragment->getParent()); + } + + // struct relocation_info (8 bytes) + macho::RelocationEntry MRE; + makeRelocationInfo(MRE, FixupOffset, Index, IsPCRel, Log2Size, IsExtern, + Type); + Writer->addRelocation(Fragment->getParent(), MRE); +} + +MCObjectWriter *llvm::createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit, + uint32_t CPUType, + uint32_t CPUSubtype) { + return createMachObjectWriter( + new PPCMachObjectWriter(Is64Bit, CPUType, CPUSubtype), OS, + /*IsLittleEndian=*/false); +} -- cgit v1.1 From d76adee1b214fe1f5a26b8ffaec3e461beda6c88 Mon Sep 17 00:00:00 2001 From: David Fang Date: Thu, 8 Aug 2013 21:29:30 +0000 Subject: cast fix to appease buildbot git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188014 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp index c907b2b..fc9b892 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp @@ -178,7 +178,7 @@ static uint32_t getFixupOffset(const MCAsmLayout &Layout, uint32_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); // On Mach-O, ppc_fixup_half16 relocations must refer to the // start of the instruction, not the second halfword, as ELF does - if (Fixup.getKind() == PPC::fixup_ppc_half16) + if (unsigned(Fixup.getKind()) == PPC::fixup_ppc_half16) FixupOffset &= ~uint32_t(3); return FixupOffset; } -- cgit v1.1 From 94a88c49b0e87ee8c911669ff6c6bbd31b912542 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Thu, 8 Aug 2013 21:37:32 +0000 Subject: [mips] Delete register class HWRegs64. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188016 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 39 ----------------------------- lib/Target/Mips/Mips64InstrInfo.td | 2 +- lib/Target/Mips/MipsRegisterInfo.cpp | 1 - lib/Target/Mips/MipsRegisterInfo.td | 11 -------- lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 6 ++--- 5 files changed, 3 insertions(+), 56 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 3dd6562..c14f907 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -97,9 +97,6 @@ class MipsAsmParser : public MCTargetAsmParser { parseHWRegs(SmallVectorImpl &Operands); MipsAsmParser::OperandMatchResultTy - parseHW64Regs(SmallVectorImpl &Operands); - - MipsAsmParser::OperandMatchResultTy parseCCRRegs(SmallVectorImpl &Operands); MipsAsmParser::OperandMatchResultTy @@ -221,7 +218,6 @@ public: Kind_GPR32, Kind_GPR64, Kind_HWRegs, - Kind_HW64Regs, Kind_FGR32Regs, Kind_FGR64Regs, Kind_AFGR64Regs, @@ -388,11 +384,6 @@ public: return Reg.Kind == Kind_HWRegs; } - bool isHW64RegsAsm() const { - assert((Kind == k_Register) && "Invalid access!"); - return Reg.Kind == Kind_HW64Regs; - } - bool isCCRAsm() const { assert((Kind == k_Register) && "Invalid access!"); return Reg.Kind == Kind_CCRRegs; @@ -1498,36 +1489,6 @@ MipsAsmParser::parseHWRegs(SmallVectorImpl &Operands) { } MipsAsmParser::OperandMatchResultTy -MipsAsmParser::parseHW64Regs( - SmallVectorImpl &Operands) { - - if (!isMips64()) - return MatchOperand_NoMatch; - // If the first token is not '$' we have an error. - if (Parser.getTok().isNot(AsmToken::Dollar)) - return MatchOperand_NoMatch; - SMLoc S = Parser.getTok().getLoc(); - Parser.Lex(); // Eat $ - - const AsmToken &Tok = Parser.getTok(); // Get the next token. - if (Tok.isNot(AsmToken::Integer)) - return MatchOperand_NoMatch; - - unsigned RegNum = Tok.getIntVal(); - // At the moment only hwreg29 is supported. - if (RegNum != 29) - return MatchOperand_ParseFail; - - MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29_64, S, - Parser.getTok().getLoc()); - op->setRegKind(MipsOperand::Kind_HW64Regs); - Operands.push_back(op); - - Parser.Lex(); // Eat the register number. - return MatchOperand_Success; -} - -MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseCCRRegs(SmallVectorImpl &Operands) { // If the first token is not '$' we have an error. if (Parser.getTok().isNot(AsmToken::Dollar)) diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index a752ab8..ddc550b 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -224,7 +224,7 @@ def DSHD : SubwordSwap<"dshd", GPR64Opnd>, SEB_FM<5, 0x24>; def LEA_ADDiu64 : EffectiveAddress<"daddiu", GPR64Opnd, mem_ea_64>, LW_FM<0x19>; let isCodeGenOnly = 1 in -def RDHWR64 : ReadHardware, RDHWR_FM; +def RDHWR64 : ReadHardware, RDHWR_FM; def DEXT : ExtBase<"dext", GPR64Opnd>, EXT_FM<3>; let Pattern = [] in { diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 0b5fc33..eb1e056 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -146,7 +146,6 @@ getReservedRegs(const MachineFunction &MF) const { // Reserve hardware registers. Reserved.set(Mips::HWR29); - Reserved.set(Mips::HWR29_64); // Reserve DSP control register. Reserved.set(Mips::DSPPos); diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index c72c30d..377f1aa 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -190,7 +190,6 @@ let Namespace = "Mips" in { // Hardware register $29 def HWR29 : MipsReg<29, "29">; - def HWR29_64 : MipsReg<29, "29">; // Accum registers def AC0 : ACC<0, "ac0", [LO, HI]>; @@ -313,7 +312,6 @@ def HIRegs64 : RegisterClass<"Mips", [i64], 64, (add HI64)>; // Hardware registers def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>, Unallocatable; -def HWRegs64 : RegisterClass<"Mips", [i64], 64, (add HWR29_64)>, Unallocatable; // Accumulator Registers def ACRegs : RegisterClass<"Mips", [untyped], 64, (add AC0)> { @@ -392,19 +390,10 @@ def HWRegsAsmOperand : MipsAsmRegOperand { let ParserMethod = "parseHWRegs"; } -def HW64RegsAsmOperand : MipsAsmRegOperand { - let Name = "HW64RegsAsm"; - let ParserMethod = "parseHW64Regs"; -} - def HWRegsOpnd : RegisterOperand { let ParserMatchClass = HWRegsAsmOperand; } -def HW64RegsOpnd : RegisterOperand { - let ParserMatchClass = HW64RegsAsmOperand; -} - def AFGR64RegsOpnd : RegisterOperand { let ParserMatchClass = AFGR64AsmOperand; } diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 3b6480a..49da784 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -403,22 +403,20 @@ std::pair MipsSEDAGToDAGISel::selectNode(SDNode *Node) { case MipsISD::ThreadPointer: { EVT PtrVT = getTargetLowering()->getPointerTy(); - unsigned RdhwrOpc, SrcReg, DestReg; + unsigned RdhwrOpc, DestReg; if (PtrVT == MVT::i32) { RdhwrOpc = Mips::RDHWR; - SrcReg = Mips::HWR29; DestReg = Mips::V1; } else { RdhwrOpc = Mips::RDHWR64; - SrcReg = Mips::HWR29_64; DestReg = Mips::V1_64; } SDNode *Rdhwr = CurDAG->getMachineNode(RdhwrOpc, SDLoc(Node), Node->getValueType(0), - CurDAG->getRegister(SrcReg, PtrVT)); + CurDAG->getRegister(Mips::HWR29, MVT::i32)); SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, DestReg, SDValue(Rdhwr, 0)); SDValue ResNode = CurDAG->getCopyFromReg(Chain, DL, DestReg, PtrVT); -- cgit v1.1 From 7af40bfa6614896913e0953bfe850d8c1ef0e593 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Thu, 8 Aug 2013 21:44:39 +0000 Subject: [mips] Mark pseudo instructions as code-gen only. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188017 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsDSPInstrInfo.td | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td index 526821a..2120a79 100644 --- a/lib/Target/Mips/MipsDSPInstrInfo.td +++ b/lib/Target/Mips/MipsDSPInstrInfo.td @@ -1240,14 +1240,14 @@ def PREPEND : PREPEND_ENC, PREPEND_DESC; } // Pseudos. -let isPseudo = 1 in { +let isPseudo = 1, isCodeGenOnly = 1 in { // Pseudo instructions for loading and storing accumulator registers. - defm LOAD_AC_DSP : LoadM<"load_ac_dsp", ACRegsDSPOpnd>; - defm STORE_AC_DSP : StoreM<"store_ac_dsp", ACRegsDSPOpnd>; + defm LOAD_AC_DSP : LoadM<"", ACRegsDSPOpnd>; + defm STORE_AC_DSP : StoreM<"", ACRegsDSPOpnd>; // Pseudos for loading and storing ccond field of DSP control register. - defm LOAD_CCOND_DSP : LoadM<"load_ccond_dsp", DSPCC>; - defm STORE_CCOND_DSP : StoreM<"store_ccond_dsp", DSPCC>; + defm LOAD_CCOND_DSP : LoadM<"", DSPCC>; + defm STORE_CCOND_DSP : StoreM<"", DSPCC>; } // Pseudo CMP and PICK instructions. -- cgit v1.1 From 491d04969d9f29ed891c73238648853954ba4f81 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Thu, 8 Aug 2013 21:54:26 +0000 Subject: [mips] Rename accumulator register classes and FP register operands. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188020 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 14 +- lib/Target/Mips/Disassembler/MipsDisassembler.cpp | 18 +- lib/Target/Mips/Mips64InstrInfo.td | 16 +- lib/Target/Mips/MipsCondMov.td | 32 +-- lib/Target/Mips/MipsDSPInstrInfo.td | 68 +++---- lib/Target/Mips/MipsInstrFPU.td | 226 +++++++++++----------- lib/Target/Mips/MipsInstrInfo.td | 24 +-- lib/Target/Mips/MipsRegisterInfo.td | 34 ++-- lib/Target/Mips/MipsSEFrameLowering.cpp | 28 +-- lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 4 +- lib/Target/Mips/MipsSEISelLowering.h | 4 +- lib/Target/Mips/MipsSEInstrInfo.cpp | 24 +-- 12 files changed, 246 insertions(+), 246 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index c14f907..0754052 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -112,7 +112,7 @@ class MipsAsmParser : public MCTargetAsmParser { parseFCCRegs(SmallVectorImpl &Operands); MipsAsmParser::OperandMatchResultTy - parseACRegsDSP(SmallVectorImpl &Operands); + parseACC64DSP(SmallVectorImpl &Operands); bool searchSymbolAlias(SmallVectorImpl &Operands, unsigned RegKind); @@ -223,7 +223,7 @@ public: Kind_AFGR64Regs, Kind_CCRRegs, Kind_FCCRegs, - Kind_ACRegsDSP + Kind_ACC64DSP }; private: @@ -405,8 +405,8 @@ public: return (Kind == k_Register) && Reg.Kind == Kind_FCCRegs; } - bool isACRegsDSPAsm() const { - return Kind == k_Register && Reg.Kind == Kind_ACRegsDSP; + bool isACC64DSPAsm() const { + return Kind == k_Register && Reg.Kind == Kind_ACC64DSP; } /// getStartLoc - Get the location of the first token of this operand. @@ -1368,7 +1368,7 @@ MipsAsmParser::parseFCCRegs(SmallVectorImpl &Operands) { } MipsAsmParser::OperandMatchResultTy -MipsAsmParser::parseACRegsDSP(SmallVectorImpl &Operands) { +MipsAsmParser::parseACC64DSP(SmallVectorImpl &Operands) { // If the first token is not '$' we have an error. if (Parser.getTok().isNot(AsmToken::Dollar)) return MatchOperand_NoMatch; @@ -1390,10 +1390,10 @@ MipsAsmParser::parseACRegsDSP(SmallVectorImpl &Operands) { if (NumString.getAsInteger(10, IntVal)) return MatchOperand_NoMatch; - unsigned Reg = matchRegisterByNumber(IntVal, Mips::ACRegsDSPRegClassID); + unsigned Reg = matchRegisterByNumber(IntVal, Mips::ACC64DSPRegClassID); MipsOperand *Op = MipsOperand::CreateReg(Reg, S, Parser.getTok().getLoc()); - Op->setRegKind(MipsOperand::Kind_ACRegsDSP); + Op->setRegKind(MipsOperand::Kind_ACC64DSP); Operands.push_back(Op); Parser.Lex(); // Eat the register number. diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index d99df4d..d4f3109 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -138,10 +138,10 @@ static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeACRegsDSPRegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder); +static DecodeStatus DecodeACC64DSPRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); static DecodeStatus DecodeHIRegsDSPRegisterClass(MCInst &Inst, unsigned RegNo, @@ -477,14 +477,14 @@ static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeACRegsDSPRegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeACC64DSPRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { if (RegNo >= 4) return MCDisassembler::Fail; - unsigned Reg = getReg(Decoder, Mips::ACRegsDSPRegClassID, RegNo); + unsigned Reg = getReg(Decoder, Mips::ACC64DSPRegClassID, RegNo); Inst.addOperand(MCOperand::CreateReg(Reg)); return MCDisassembler::Success; } diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index ddc550b..226d241 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -62,8 +62,8 @@ let usesCustomInserter = 1, Predicates = [HasStdEnc], /// Pseudo instructions for loading and storing accumulator registers. let isPseudo = 1, isCodeGenOnly = 1 in { - defm LOAD_AC128 : LoadM<"", ACRegs128>; - defm STORE_AC128 : StoreM<"", ACRegs128>; + defm LOAD_ACC128 : LoadM<"", ACC128>; + defm STORE_ACC128 : StoreM<"", ACC128>; } //===----------------------------------------------------------------------===// @@ -191,15 +191,15 @@ def DMULT : Mult<"dmult", IIImult, GPR64Opnd, [HI64, LO64]>, MULT_FM<0, 0x1c>; def DMULTu : Mult<"dmultu", IIImult, GPR64Opnd, [HI64, LO64]>, MULT_FM<0, 0x1d>; -def PseudoDMULT : MultDivPseudo; -def PseudoDMULTu : MultDivPseudo; def DSDIV : Div<"ddiv", IIIdiv, GPR64Opnd, [HI64, LO64]>, MULT_FM<0, 0x1e>; def DUDIV : Div<"ddivu", IIIdiv, GPR64Opnd, [HI64, LO64]>, MULT_FM<0, 0x1f>; -def PseudoDSDIV : MultDivPseudo; -def PseudoDUDIV : MultDivPseudo; let isCodeGenOnly = 1 in { @@ -331,8 +331,8 @@ def : MipsPat<(i64 (sext_inreg GPR64:$src, i32)), def : MipsPat<(bswap GPR64:$rt), (DSHD (DSBH GPR64:$rt))>; // mflo/hi patterns. -def : MipsPat<(i64 (ExtractLOHI ACRegs128:$ac, imm:$lohi_idx)), - (EXTRACT_SUBREG ACRegs128:$ac, imm:$lohi_idx)>; +def : MipsPat<(i64 (ExtractLOHI ACC128:$ac, imm:$lohi_idx)), + (EXTRACT_SUBREG ACC128:$ac, imm:$lohi_idx)>; //===----------------------------------------------------------------------===// // Instruction aliases diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td index 39862b3..b313c52 100644 --- a/lib/Target/Mips/MipsCondMov.td +++ b/lib/Target/Mips/MipsCondMov.td @@ -127,35 +127,35 @@ let Predicates = [HasStdEnc], isCodeGenOnly = 1 in { NoItinerary>, ADD_FM<0, 0xb>; } -def MOVZ_I_S : CMov_I_F_FT<"movz.s", GPR32Opnd, FGR32RegsOpnd, IIFmove>, +def MOVZ_I_S : CMov_I_F_FT<"movz.s", GPR32Opnd, FGR32Opnd, IIFmove>, CMov_I_F_FM<18, 16>; let isCodeGenOnly = 1 in -def MOVZ_I64_S : CMov_I_F_FT<"movz.s", GPR64Opnd, FGR32RegsOpnd, IIFmove>, +def MOVZ_I64_S : CMov_I_F_FT<"movz.s", GPR64Opnd, FGR32Opnd, IIFmove>, CMov_I_F_FM<18, 16>, Requires<[HasMips64, HasStdEnc]>; -def MOVN_I_S : CMov_I_F_FT<"movn.s", GPR32Opnd, FGR32RegsOpnd, IIFmove>, +def MOVN_I_S : CMov_I_F_FT<"movn.s", GPR32Opnd, FGR32Opnd, IIFmove>, CMov_I_F_FM<19, 16>; let isCodeGenOnly = 1 in -def MOVN_I64_S : CMov_I_F_FT<"movn.s", GPR64Opnd, FGR32RegsOpnd, IIFmove>, +def MOVN_I64_S : CMov_I_F_FT<"movn.s", GPR64Opnd, FGR32Opnd, IIFmove>, CMov_I_F_FM<19, 16>, Requires<[HasMips64, HasStdEnc]>; let Predicates = [NotFP64bit, HasStdEnc] in { - def MOVZ_I_D32 : CMov_I_F_FT<"movz.d", GPR32Opnd, AFGR64RegsOpnd, IIFmove>, + def MOVZ_I_D32 : CMov_I_F_FT<"movz.d", GPR32Opnd, AFGR64Opnd, IIFmove>, CMov_I_F_FM<18, 17>; - def MOVN_I_D32 : CMov_I_F_FT<"movn.d", GPR32Opnd, AFGR64RegsOpnd, IIFmove>, + def MOVN_I_D32 : CMov_I_F_FT<"movn.d", GPR32Opnd, AFGR64Opnd, IIFmove>, CMov_I_F_FM<19, 17>; } let Predicates = [IsFP64bit, HasStdEnc], isCodeGenOnly = 1 in { - def MOVZ_I_D64 : CMov_I_F_FT<"movz.d", GPR32Opnd, FGR64RegsOpnd, IIFmove>, + def MOVZ_I_D64 : CMov_I_F_FT<"movz.d", GPR32Opnd, FGR64Opnd, IIFmove>, CMov_I_F_FM<18, 17>; - def MOVZ_I64_D64 : CMov_I_F_FT<"movz.d", GPR64Opnd, FGR64RegsOpnd, + def MOVZ_I64_D64 : CMov_I_F_FT<"movz.d", GPR64Opnd, FGR64Opnd, IIFmove>, CMov_I_F_FM<18, 17>; - def MOVN_I_D64 : CMov_I_F_FT<"movn.d", GPR32Opnd, FGR64RegsOpnd, IIFmove>, + def MOVN_I_D64 : CMov_I_F_FT<"movn.d", GPR32Opnd, FGR64Opnd, IIFmove>, CMov_I_F_FM<19, 17>; - def MOVN_I64_D64 : CMov_I_F_FT<"movn.d", GPR64Opnd, FGR64RegsOpnd, + def MOVN_I64_D64 : CMov_I_F_FT<"movn.d", GPR64Opnd, FGR64Opnd, IIFmove>, CMov_I_F_FM<19, 17>; } @@ -173,21 +173,21 @@ let isCodeGenOnly = 1 in def MOVF_I64 : CMov_F_I_FT<"movf", GPR64Opnd, IIArith, MipsCMovFP_F>, CMov_F_I_FM<0>, Requires<[HasMips64, HasStdEnc]>; -def MOVT_S : CMov_F_F_FT<"movt.s", FGR32RegsOpnd, IIFmove, MipsCMovFP_T>, +def MOVT_S : CMov_F_F_FT<"movt.s", FGR32Opnd, IIFmove, MipsCMovFP_T>, CMov_F_F_FM<16, 1>; -def MOVF_S : CMov_F_F_FT<"movf.s", FGR32RegsOpnd, IIFmove, MipsCMovFP_F>, +def MOVF_S : CMov_F_F_FT<"movf.s", FGR32Opnd, IIFmove, MipsCMovFP_F>, CMov_F_F_FM<16, 0>; let Predicates = [NotFP64bit, HasStdEnc] in { - def MOVT_D32 : CMov_F_F_FT<"movt.d", AFGR64RegsOpnd, IIFmove, MipsCMovFP_T>, + def MOVT_D32 : CMov_F_F_FT<"movt.d", AFGR64Opnd, IIFmove, MipsCMovFP_T>, CMov_F_F_FM<17, 1>; - def MOVF_D32 : CMov_F_F_FT<"movf.d", AFGR64RegsOpnd, IIFmove, MipsCMovFP_F>, + def MOVF_D32 : CMov_F_F_FT<"movf.d", AFGR64Opnd, IIFmove, MipsCMovFP_F>, CMov_F_F_FM<17, 0>; } let Predicates = [IsFP64bit, HasStdEnc], isCodeGenOnly = 1 in { - def MOVT_D64 : CMov_F_F_FT<"movt.d", FGR64RegsOpnd, IIFmove, MipsCMovFP_T>, + def MOVT_D64 : CMov_F_F_FT<"movt.d", FGR64Opnd, IIFmove, MipsCMovFP_T>, CMov_F_F_FM<17, 1>; - def MOVF_D64 : CMov_F_F_FT<"movf.d", FGR64RegsOpnd, IIFmove, MipsCMovFP_F>, + def MOVF_D64 : CMov_F_F_FT<"movf.d", FGR64Opnd, IIFmove, MipsCMovFP_F>, CMov_F_F_FM<17, 0>; } diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td index 2120a79..ccd8f3e 100644 --- a/lib/Target/Mips/MipsDSPInstrInfo.td +++ b/lib/Target/Mips/MipsDSPInstrInfo.td @@ -380,7 +380,7 @@ class APPEND_DESC_BASE { dag OutOperandList = (outs GPR32:$rt); - dag InOperandList = (ins ACRegsDSP:$ac, GPR32:$shift_rs); + dag InOperandList = (ins ACC64DSP:$ac, GPR32:$shift_rs); string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs"); InstrItinClass Itinerary = itin; } @@ -388,35 +388,35 @@ class EXTR_W_TY1_R2_DESC_BASE { dag OutOperandList = (outs GPR32:$rt); - dag InOperandList = (ins ACRegsDSP:$ac, uimm16:$shift_rs); + dag InOperandList = (ins ACC64DSP:$ac, uimm16:$shift_rs); string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs"); InstrItinClass Itinerary = itin; } class SHILO_R1_DESC_BASE { - dag OutOperandList = (outs ACRegsDSP:$ac); - dag InOperandList = (ins simm16:$shift, ACRegsDSP:$acin); + dag OutOperandList = (outs ACC64DSP:$ac); + dag InOperandList = (ins simm16:$shift, ACC64DSP:$acin); string AsmString = !strconcat(instr_asm, "\t$ac, $shift"); - list Pattern = [(set ACRegsDSP:$ac, - (OpNode immSExt6:$shift, ACRegsDSP:$acin))]; + list Pattern = [(set ACC64DSP:$ac, + (OpNode immSExt6:$shift, ACC64DSP:$acin))]; string Constraints = "$acin = $ac"; } class SHILO_R2_DESC_BASE { - dag OutOperandList = (outs ACRegsDSP:$ac); - dag InOperandList = (ins GPR32:$rs, ACRegsDSP:$acin); + dag OutOperandList = (outs ACC64DSP:$ac); + dag InOperandList = (ins GPR32:$rs, ACC64DSP:$acin); string AsmString = !strconcat(instr_asm, "\t$ac, $rs"); - list Pattern = [(set ACRegsDSP:$ac, - (OpNode GPR32:$rs, ACRegsDSP:$acin))]; + list Pattern = [(set ACC64DSP:$ac, + (OpNode GPR32:$rs, ACC64DSP:$acin))]; string Constraints = "$acin = $ac"; } class MTHLIP_DESC_BASE { - dag OutOperandList = (outs ACRegsDSP:$ac); - dag InOperandList = (ins GPR32:$rs, ACRegsDSP:$acin); + dag OutOperandList = (outs ACC64DSP:$ac); + dag InOperandList = (ins GPR32:$rs, ACC64DSP:$acin); string AsmString = !strconcat(instr_asm, "\t$rs, $ac"); - list Pattern = [(set ACRegsDSP:$ac, - (OpNode GPR32:$rs, ACRegsDSP:$acin))]; + list Pattern = [(set ACC64DSP:$ac, + (OpNode GPR32:$rs, ACC64DSP:$acin))]; string Constraints = "$acin = $ac"; } @@ -439,20 +439,20 @@ class WRDSP_DESC_BASE { - dag OutOperandList = (outs ACRegsDSP:$ac); - dag InOperandList = (ins GPR32:$rs, GPR32:$rt, ACRegsDSP:$acin); + dag OutOperandList = (outs ACC64DSP:$ac); + dag InOperandList = (ins GPR32:$rs, GPR32:$rt, ACC64DSP:$acin); string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt"); - list Pattern = [(set ACRegsDSP:$ac, - (OpNode GPR32:$rs, GPR32:$rt, ACRegsDSP:$acin))]; + list Pattern = [(set ACC64DSP:$ac, + (OpNode GPR32:$rs, GPR32:$rt, ACC64DSP:$acin))]; string Constraints = "$acin = $ac"; } class MULT_DESC_BASE { - dag OutOperandList = (outs ACRegsDSP:$ac); + dag OutOperandList = (outs ACC64DSP:$ac); dag InOperandList = (ins GPR32:$rs, GPR32:$rt); string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt"); - list Pattern = [(set ACRegsDSP:$ac, (OpNode GPR32:$rs, GPR32:$rt))]; + list Pattern = [(set ACC64DSP:$ac, (OpNode GPR32:$rs, GPR32:$rt))]; InstrItinClass Itinerary = itin; int AddedComplexity = 20; bit isCommutable = 1; @@ -460,11 +460,11 @@ class MULT_DESC_BASE { - dag OutOperandList = (outs ACRegsDSP:$ac); - dag InOperandList = (ins GPR32:$rs, GPR32:$rt, ACRegsDSP:$acin); + dag OutOperandList = (outs ACC64DSP:$ac); + dag InOperandList = (ins GPR32:$rs, GPR32:$rt, ACC64DSP:$acin); string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt"); - list Pattern = [(set ACRegsDSP:$ac, - (OpNode GPR32:$rs, GPR32:$rt, ACRegsDSP:$acin))]; + list Pattern = [(set ACC64DSP:$ac, + (OpNode GPR32:$rs, GPR32:$rt, ACC64DSP:$acin))]; InstrItinClass Itinerary = itin; int AddedComplexity = 20; string Constraints = "$acin = $ac"; @@ -1242,12 +1242,12 @@ def PREPEND : PREPEND_ENC, PREPEND_DESC; // Pseudos. let isPseudo = 1, isCodeGenOnly = 1 in { // Pseudo instructions for loading and storing accumulator registers. - defm LOAD_AC_DSP : LoadM<"", ACRegsDSPOpnd>; - defm STORE_AC_DSP : StoreM<"", ACRegsDSPOpnd>; + defm LOAD_ACC64DSP : LoadM<"", ACC64DSPOpnd>; + defm STORE_ACC64DSP : StoreM<"", ACC64DSPOpnd>; // Pseudos for loading and storing ccond field of DSP control register. - defm LOAD_CCOND_DSP : LoadM<"", DSPCC>; - defm STORE_CCOND_DSP : StoreM<"", DSPCC>; + defm LOAD_CCOND_DSP : LoadM<"load_ccond_dsp", DSPCC>; + defm STORE_CCOND_DSP : StoreM<"store_ccond_dsp", DSPCC>; } // Pseudo CMP and PICK instructions. @@ -1384,12 +1384,12 @@ def : DSPSelectCCPatInv; // Extr patterns. class EXTR_W_TY1_R2_Pat : - DSPPat<(i32 (OpNode GPR32:$rs, ACRegsDSP:$ac)), - (Instr ACRegsDSP:$ac, GPR32:$rs)>; + DSPPat<(i32 (OpNode GPR32:$rs, ACC64DSP:$ac)), + (Instr ACC64DSP:$ac, GPR32:$rs)>; class EXTR_W_TY1_R1_Pat : - DSPPat<(i32 (OpNode immZExt5:$shift, ACRegsDSP:$ac)), - (Instr ACRegsDSP:$ac, immZExt5:$shift)>; + DSPPat<(i32 (OpNode immZExt5:$shift, ACC64DSP:$ac)), + (Instr ACC64DSP:$ac, immZExt5:$shift)>; def : EXTR_W_TY1_R1_Pat; def : EXTR_W_TY1_R2_Pat; @@ -1406,8 +1406,8 @@ def : EXTR_W_TY1_R2_Pat; // mflo/hi patterns. let AddedComplexity = 20 in -def : DSPPat<(i32 (ExtractLOHI ACRegsDSP:$ac, imm:$lohi_idx)), - (EXTRACT_SUBREG ACRegsDSP:$ac, imm:$lohi_idx)>; +def : DSPPat<(i32 (ExtractLOHI ACC64DSP:$ac, imm:$lohi_idx)), + (EXTRACT_SUBREG ACC64DSP:$ac, imm:$lohi_idx)>; // Indexed load patterns. class IndexedLoadPat : diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index b992e77..f64e6f7 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -99,9 +99,9 @@ class ADDS_FT { - def _D32 : ADDS_FT, + def _D32 : ADDS_FT, Requires<[NotFP64bit, HasStdEnc]>; - def _D64 : ADDS_FT, + def _D64 : ADDS_FT, Requires<[IsFP64bit, HasStdEnc]> { string DecoderNamespace = "Mips64"; } @@ -115,18 +115,18 @@ class ABSS_FT { - def _D32 : ABSS_FT, + def _D32 : ABSS_FT, Requires<[NotFP64bit, HasStdEnc]>; - def _D64 : ABSS_FT, + def _D64 : ABSS_FT, Requires<[IsFP64bit, HasStdEnc]> { string DecoderNamespace = "Mips64"; } } multiclass ROUND_M { - def _D32 : ABSS_FT, + def _D32 : ABSS_FT, Requires<[NotFP64bit, HasStdEnc]>; - def _D64 : ABSS_FT, + def _D64 : ABSS_FT, Requires<[IsFP64bit, HasStdEnc]> { let DecoderNamespace = "Mips64"; } @@ -231,24 +231,24 @@ multiclass C_COND_M fmt> { def C_NGT_#NAME : C_COND_FT<"ngt", TypeStr, RC>, C_COND_FM; } -defm S : C_COND_M<"s", FGR32RegsOpnd, 16>; -defm D32 : C_COND_M<"d", AFGR64RegsOpnd, 17>, +defm S : C_COND_M<"s", FGR32Opnd, 16>; +defm D32 : C_COND_M<"d", AFGR64Opnd, 17>, Requires<[NotFP64bit, HasStdEnc]>; let DecoderNamespace = "Mips64" in -defm D64 : C_COND_M<"d", FGR64RegsOpnd, 17>, Requires<[IsFP64bit, HasStdEnc]>; +defm D64 : C_COND_M<"d", FGR64Opnd, 17>, Requires<[IsFP64bit, HasStdEnc]>; //===----------------------------------------------------------------------===// // Floating Point Instructions //===----------------------------------------------------------------------===// -def ROUND_W_S : ABSS_FT<"round.w.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt>, +def ROUND_W_S : ABSS_FT<"round.w.s", FGR32Opnd, FGR32Opnd, IIFcvt>, ABSS_FM<0xc, 16>; -def TRUNC_W_S : ABSS_FT<"trunc.w.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt>, +def TRUNC_W_S : ABSS_FT<"trunc.w.s", FGR32Opnd, FGR32Opnd, IIFcvt>, ABSS_FM<0xd, 16>; -def CEIL_W_S : ABSS_FT<"ceil.w.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt>, +def CEIL_W_S : ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, IIFcvt>, ABSS_FM<0xe, 16>; -def FLOOR_W_S : ABSS_FT<"floor.w.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt>, +def FLOOR_W_S : ABSS_FT<"floor.w.s", FGR32Opnd, FGR32Opnd, IIFcvt>, ABSS_FM<0xf, 16>; -def CVT_W_S : ABSS_FT<"cvt.w.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt>, +def CVT_W_S : ABSS_FT<"cvt.w.s", FGR32Opnd, FGR32Opnd, IIFcvt>, ABSS_FM<0x24, 16>; defm ROUND_W : ROUND_M<"round.w.d", IIFcvt>, ABSS_FM<0xc, 17>; @@ -258,71 +258,71 @@ defm FLOOR_W : ROUND_M<"floor.w.d", IIFcvt>, ABSS_FM<0xf, 17>; defm CVT_W : ROUND_M<"cvt.w.d", IIFcvt>, ABSS_FM<0x24, 17>; let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in { - def ROUND_L_S : ABSS_FT<"round.l.s", FGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>, + def ROUND_L_S : ABSS_FT<"round.l.s", FGR64Opnd, FGR32Opnd, IIFcvt>, ABSS_FM<0x8, 16>; - def ROUND_L_D64 : ABSS_FT<"round.l.d", FGR64RegsOpnd, FGR64RegsOpnd, IIFcvt>, + def ROUND_L_D64 : ABSS_FT<"round.l.d", FGR64Opnd, FGR64Opnd, IIFcvt>, ABSS_FM<0x8, 17>; - def TRUNC_L_S : ABSS_FT<"trunc.l.s", FGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>, + def TRUNC_L_S : ABSS_FT<"trunc.l.s", FGR64Opnd, FGR32Opnd, IIFcvt>, ABSS_FM<0x9, 16>; - def TRUNC_L_D64 : ABSS_FT<"trunc.l.d", FGR64RegsOpnd, FGR64RegsOpnd, IIFcvt>, + def TRUNC_L_D64 : ABSS_FT<"trunc.l.d", FGR64Opnd, FGR64Opnd, IIFcvt>, ABSS_FM<0x9, 17>; - def CEIL_L_S : ABSS_FT<"ceil.l.s", FGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>, + def CEIL_L_S : ABSS_FT<"ceil.l.s", FGR64Opnd, FGR32Opnd, IIFcvt>, ABSS_FM<0xa, 16>; - def CEIL_L_D64 : ABSS_FT<"ceil.l.d", FGR64RegsOpnd, FGR64RegsOpnd, IIFcvt>, + def CEIL_L_D64 : ABSS_FT<"ceil.l.d", FGR64Opnd, FGR64Opnd, IIFcvt>, ABSS_FM<0xa, 17>; - def FLOOR_L_S : ABSS_FT<"floor.l.s", FGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>, + def FLOOR_L_S : ABSS_FT<"floor.l.s", FGR64Opnd, FGR32Opnd, IIFcvt>, ABSS_FM<0xb, 16>; - def FLOOR_L_D64 : ABSS_FT<"floor.l.d", FGR64RegsOpnd, FGR64RegsOpnd, IIFcvt>, + def FLOOR_L_D64 : ABSS_FT<"floor.l.d", FGR64Opnd, FGR64Opnd, IIFcvt>, ABSS_FM<0xb, 17>; } -def CVT_S_W : ABSS_FT<"cvt.s.w", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt>, +def CVT_S_W : ABSS_FT<"cvt.s.w", FGR32Opnd, FGR32Opnd, IIFcvt>, ABSS_FM<0x20, 20>; -def CVT_L_S : ABSS_FT<"cvt.l.s", FGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>, +def CVT_L_S : ABSS_FT<"cvt.l.s", FGR64Opnd, FGR32Opnd, IIFcvt>, ABSS_FM<0x25, 16>; -def CVT_L_D64: ABSS_FT<"cvt.l.d", FGR64RegsOpnd, FGR64RegsOpnd, IIFcvt>, +def CVT_L_D64: ABSS_FT<"cvt.l.d", FGR64Opnd, FGR64Opnd, IIFcvt>, ABSS_FM<0x25, 17>; let Predicates = [NotFP64bit, HasStdEnc] in { - def CVT_S_D32 : ABSS_FT<"cvt.s.d", FGR32RegsOpnd, AFGR64RegsOpnd, IIFcvt>, + def CVT_S_D32 : ABSS_FT<"cvt.s.d", FGR32Opnd, AFGR64Opnd, IIFcvt>, ABSS_FM<0x20, 17>; - def CVT_D32_W : ABSS_FT<"cvt.d.w", AFGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>, + def CVT_D32_W : ABSS_FT<"cvt.d.w", AFGR64Opnd, FGR32Opnd, IIFcvt>, ABSS_FM<0x21, 20>; - def CVT_D32_S : ABSS_FT<"cvt.d.s", AFGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>, + def CVT_D32_S : ABSS_FT<"cvt.d.s", AFGR64Opnd, FGR32Opnd, IIFcvt>, ABSS_FM<0x21, 16>; } let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in { - def CVT_S_D64 : ABSS_FT<"cvt.s.d", FGR32RegsOpnd, FGR64RegsOpnd, IIFcvt>, + def CVT_S_D64 : ABSS_FT<"cvt.s.d", FGR32Opnd, FGR64Opnd, IIFcvt>, ABSS_FM<0x20, 17>; - def CVT_S_L : ABSS_FT<"cvt.s.l", FGR32RegsOpnd, FGR64RegsOpnd, IIFcvt>, + def CVT_S_L : ABSS_FT<"cvt.s.l", FGR32Opnd, FGR64Opnd, IIFcvt>, ABSS_FM<0x20, 21>; - def CVT_D64_W : ABSS_FT<"cvt.d.w", FGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>, + def CVT_D64_W : ABSS_FT<"cvt.d.w", FGR64Opnd, FGR32Opnd, IIFcvt>, ABSS_FM<0x21, 20>; - def CVT_D64_S : ABSS_FT<"cvt.d.s", FGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>, + def CVT_D64_S : ABSS_FT<"cvt.d.s", FGR64Opnd, FGR32Opnd, IIFcvt>, ABSS_FM<0x21, 16>; - def CVT_D64_L : ABSS_FT<"cvt.d.l", FGR64RegsOpnd, FGR64RegsOpnd, IIFcvt>, + def CVT_D64_L : ABSS_FT<"cvt.d.l", FGR64Opnd, FGR64Opnd, IIFcvt>, ABSS_FM<0x21, 21>; } let isPseudo = 1, isCodeGenOnly = 1 in { - def PseudoCVT_S_W : ABSS_FT<"", FGR32RegsOpnd, GPR32Opnd, IIFcvt>; - def PseudoCVT_D32_W : ABSS_FT<"", AFGR64RegsOpnd, GPR32Opnd, IIFcvt>; - def PseudoCVT_S_L : ABSS_FT<"", FGR64RegsOpnd, GPR64Opnd, IIFcvt>; - def PseudoCVT_D64_W : ABSS_FT<"", FGR64RegsOpnd, GPR32Opnd, IIFcvt>; - def PseudoCVT_D64_L : ABSS_FT<"", FGR64RegsOpnd, GPR64Opnd, IIFcvt>; + def PseudoCVT_S_W : ABSS_FT<"", FGR32Opnd, GPR32Opnd, IIFcvt>; + def PseudoCVT_D32_W : ABSS_FT<"", AFGR64Opnd, GPR32Opnd, IIFcvt>; + def PseudoCVT_S_L : ABSS_FT<"", FGR64Opnd, GPR64Opnd, IIFcvt>; + def PseudoCVT_D64_W : ABSS_FT<"", FGR64Opnd, GPR32Opnd, IIFcvt>; + def PseudoCVT_D64_L : ABSS_FT<"", FGR64Opnd, GPR64Opnd, IIFcvt>; } let Predicates = [NoNaNsFPMath, HasStdEnc] in { - def FABS_S : ABSS_FT<"abs.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt, fabs>, + def FABS_S : ABSS_FT<"abs.s", FGR32Opnd, FGR32Opnd, IIFcvt, fabs>, ABSS_FM<0x5, 16>; - def FNEG_S : ABSS_FT<"neg.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt, fneg>, + def FNEG_S : ABSS_FT<"neg.s", FGR32Opnd, FGR32Opnd, IIFcvt, fneg>, ABSS_FM<0x7, 16>; defm FABS : ABSS_M<"abs.d", IIFcvt, fabs>, ABSS_FM<0x5, 17>; defm FNEG : ABSS_M<"neg.d", IIFcvt, fneg>, ABSS_FM<0x7, 17>; } -def FSQRT_S : ABSS_FT<"sqrt.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFsqrtSingle, +def FSQRT_S : ABSS_FT<"sqrt.s", FGR32Opnd, FGR32Opnd, IIFsqrtSingle, fsqrt>, ABSS_FM<0x4, 16>; defm FSQRT : ABSS_M<"sqrt.d", IIFsqrtDouble, fsqrt>, ABSS_FM<0x4, 17>; @@ -334,164 +334,164 @@ defm FSQRT : ABSS_M<"sqrt.d", IIFsqrtDouble, fsqrt>, ABSS_FM<0x4, 17>; /// Move Control Registers From/To CPU Registers def CFC1 : MFC1_FT<"cfc1", GPR32Opnd, CCROpnd, IIFmove>, MFC1_FM<2>; def CTC1 : MTC1_FT<"ctc1", CCROpnd, GPR32Opnd, IIFmove>, MFC1_FM<6>; -def MFC1 : MFC1_FT<"mfc1", GPR32Opnd, FGR32RegsOpnd, IIFmoveC1, bitconvert>, +def MFC1 : MFC1_FT<"mfc1", GPR32Opnd, FGR32Opnd, IIFmoveC1, bitconvert>, MFC1_FM<0>; -def MTC1 : MTC1_FT<"mtc1", FGR32RegsOpnd, GPR32Opnd, IIFmoveC1, bitconvert>, +def MTC1 : MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd, IIFmoveC1, bitconvert>, MFC1_FM<4>; -def DMFC1 : MFC1_FT<"dmfc1", GPR64Opnd, FGR64RegsOpnd, IIFmoveC1, +def DMFC1 : MFC1_FT<"dmfc1", GPR64Opnd, FGR64Opnd, IIFmoveC1, bitconvert>, MFC1_FM<1>; -def DMTC1 : MTC1_FT<"dmtc1", FGR64RegsOpnd, GPR64Opnd, IIFmoveC1, +def DMTC1 : MTC1_FT<"dmtc1", FGR64Opnd, GPR64Opnd, IIFmoveC1, bitconvert>, MFC1_FM<5>; -def FMOV_S : ABSS_FT<"mov.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFmove>, +def FMOV_S : ABSS_FT<"mov.s", FGR32Opnd, FGR32Opnd, IIFmove>, ABSS_FM<0x6, 16>; -def FMOV_D32 : ABSS_FT<"mov.d", AFGR64RegsOpnd, AFGR64RegsOpnd, IIFmove>, +def FMOV_D32 : ABSS_FT<"mov.d", AFGR64Opnd, AFGR64Opnd, IIFmove>, ABSS_FM<0x6, 17>, Requires<[NotFP64bit, HasStdEnc]>; -def FMOV_D64 : ABSS_FT<"mov.d", FGR64RegsOpnd, FGR64RegsOpnd, IIFmove>, +def FMOV_D64 : ABSS_FT<"mov.d", FGR64Opnd, FGR64Opnd, IIFmove>, ABSS_FM<0x6, 17>, Requires<[IsFP64bit, HasStdEnc]> { let DecoderNamespace = "Mips64"; } /// Floating Point Memory Instructions let Predicates = [IsN64, HasStdEnc], DecoderNamespace = "Mips64" in { - def LWC1_P8 : LW_FT<"lwc1", FGR32RegsOpnd, IIFLoad, mem64, load>, + def LWC1_P8 : LW_FT<"lwc1", FGR32Opnd, IIFLoad, mem64, load>, LW_FM<0x31>; - def SWC1_P8 : SW_FT<"swc1", FGR32RegsOpnd, IIFStore, mem64, store>, + def SWC1_P8 : SW_FT<"swc1", FGR32Opnd, IIFStore, mem64, store>, LW_FM<0x39>; - def LDC164_P8 : LW_FT<"ldc1", FGR64RegsOpnd, IIFLoad, mem64, load>, + def LDC164_P8 : LW_FT<"ldc1", FGR64Opnd, IIFLoad, mem64, load>, LW_FM<0x35> { let isCodeGenOnly =1; } - def SDC164_P8 : SW_FT<"sdc1", FGR64RegsOpnd, IIFStore, mem64, store>, + def SDC164_P8 : SW_FT<"sdc1", FGR64Opnd, IIFStore, mem64, store>, LW_FM<0x3d> { let isCodeGenOnly =1; } } let Predicates = [NotN64, HasStdEnc] in { - def LWC1 : LW_FT<"lwc1", FGR32RegsOpnd, IIFLoad, mem, load>, LW_FM<0x31>; - def SWC1 : SW_FT<"swc1", FGR32RegsOpnd, IIFStore, mem, store>, LW_FM<0x39>; + def LWC1 : LW_FT<"lwc1", FGR32Opnd, IIFLoad, mem, load>, LW_FM<0x31>; + def SWC1 : SW_FT<"swc1", FGR32Opnd, IIFStore, mem, store>, LW_FM<0x39>; } let Predicates = [NotN64, HasMips64, HasStdEnc], DecoderNamespace = "Mips64" in { - def LDC164 : LW_FT<"ldc1", FGR64RegsOpnd, IIFLoad, mem, load>, LW_FM<0x35>; - def SDC164 : SW_FT<"sdc1", FGR64RegsOpnd, IIFStore, mem, store>, LW_FM<0x3d>; + def LDC164 : LW_FT<"ldc1", FGR64Opnd, IIFLoad, mem, load>, LW_FM<0x35>; + def SDC164 : SW_FT<"sdc1", FGR64Opnd, IIFStore, mem, store>, LW_FM<0x3d>; } let Predicates = [NotN64, NotMips64, HasStdEnc] in { let isPseudo = 1, isCodeGenOnly = 1 in { - def PseudoLDC1 : LW_FT<"", AFGR64RegsOpnd, IIFLoad, mem, load>; - def PseudoSDC1 : SW_FT<"", AFGR64RegsOpnd, IIFStore, mem, store>; + def PseudoLDC1 : LW_FT<"", AFGR64Opnd, IIFLoad, mem, load>; + def PseudoSDC1 : SW_FT<"", AFGR64Opnd, IIFStore, mem, store>; } - def LDC1 : LW_FT<"ldc1", AFGR64RegsOpnd, IIFLoad, mem>, LW_FM<0x35>; - def SDC1 : SW_FT<"sdc1", AFGR64RegsOpnd, IIFStore, mem>, LW_FM<0x3d>; + def LDC1 : LW_FT<"ldc1", AFGR64Opnd, IIFLoad, mem>, LW_FM<0x35>; + def SDC1 : SW_FT<"sdc1", AFGR64Opnd, IIFStore, mem>, LW_FM<0x3d>; } // Indexed loads and stores. let Predicates = [HasFPIdx, HasStdEnc] in { - def LWXC1 : LWXC1_FT<"lwxc1", FGR32RegsOpnd, GPR32Opnd, IIFLoad, load>, + def LWXC1 : LWXC1_FT<"lwxc1", FGR32Opnd, GPR32Opnd, IIFLoad, load>, LWXC1_FM<0>; - def SWXC1 : SWXC1_FT<"swxc1", FGR32RegsOpnd, GPR32Opnd, IIFStore, store>, + def SWXC1 : SWXC1_FT<"swxc1", FGR32Opnd, GPR32Opnd, IIFStore, store>, SWXC1_FM<8>; } let Predicates = [HasMips32r2, NotMips64, HasStdEnc] in { - def LDXC1 : LWXC1_FT<"ldxc1", AFGR64RegsOpnd, GPR32Opnd, IIFLoad, load>, + def LDXC1 : LWXC1_FT<"ldxc1", AFGR64Opnd, GPR32Opnd, IIFLoad, load>, LWXC1_FM<1>; - def SDXC1 : SWXC1_FT<"sdxc1", AFGR64RegsOpnd, GPR32Opnd, IIFStore, store>, + def SDXC1 : SWXC1_FT<"sdxc1", AFGR64Opnd, GPR32Opnd, IIFStore, store>, SWXC1_FM<9>; } let Predicates = [HasMips64, NotN64, HasStdEnc], DecoderNamespace="Mips64" in { - def LDXC164 : LWXC1_FT<"ldxc1", FGR64RegsOpnd, GPR32Opnd, IIFLoad, load>, + def LDXC164 : LWXC1_FT<"ldxc1", FGR64Opnd, GPR32Opnd, IIFLoad, load>, LWXC1_FM<1>; - def SDXC164 : SWXC1_FT<"sdxc1", FGR64RegsOpnd, GPR32Opnd, IIFStore, store>, + def SDXC164 : SWXC1_FT<"sdxc1", FGR64Opnd, GPR32Opnd, IIFStore, store>, SWXC1_FM<9>; } // n64 let Predicates = [IsN64, HasStdEnc], isCodeGenOnly=1 in { - def LWXC1_P8 : LWXC1_FT<"lwxc1", FGR32RegsOpnd, GPR64Opnd, IIFLoad, load>, + def LWXC1_P8 : LWXC1_FT<"lwxc1", FGR32Opnd, GPR64Opnd, IIFLoad, load>, LWXC1_FM<0>; - def LDXC164_P8 : LWXC1_FT<"ldxc1", FGR64RegsOpnd, GPR64Opnd, IIFLoad, + def LDXC164_P8 : LWXC1_FT<"ldxc1", FGR64Opnd, GPR64Opnd, IIFLoad, load>, LWXC1_FM<1>; - def SWXC1_P8 : SWXC1_FT<"swxc1", FGR32RegsOpnd, GPR64Opnd, IIFStore, + def SWXC1_P8 : SWXC1_FT<"swxc1", FGR32Opnd, GPR64Opnd, IIFStore, store>, SWXC1_FM<8>; - def SDXC164_P8 : SWXC1_FT<"sdxc1", FGR64RegsOpnd, GPR64Opnd, IIFStore, + def SDXC164_P8 : SWXC1_FT<"sdxc1", FGR64Opnd, GPR64Opnd, IIFStore, store>, SWXC1_FM<9>; } // Load/store doubleword indexed unaligned. let Predicates = [NotMips64, HasStdEnc] in { - def LUXC1 : LWXC1_FT<"luxc1", AFGR64RegsOpnd, GPR32Opnd, IIFLoad>, + def LUXC1 : LWXC1_FT<"luxc1", AFGR64Opnd, GPR32Opnd, IIFLoad>, LWXC1_FM<0x5>; - def SUXC1 : SWXC1_FT<"suxc1", AFGR64RegsOpnd, GPR32Opnd, IIFStore>, + def SUXC1 : SWXC1_FT<"suxc1", AFGR64Opnd, GPR32Opnd, IIFStore>, SWXC1_FM<0xd>; } let Predicates = [HasMips64, HasStdEnc], DecoderNamespace="Mips64" in { - def LUXC164 : LWXC1_FT<"luxc1", FGR64RegsOpnd, GPR32Opnd, IIFLoad>, + def LUXC164 : LWXC1_FT<"luxc1", FGR64Opnd, GPR32Opnd, IIFLoad>, LWXC1_FM<0x5>; - def SUXC164 : SWXC1_FT<"suxc1", FGR64RegsOpnd, GPR32Opnd, IIFStore>, + def SUXC164 : SWXC1_FT<"suxc1", FGR64Opnd, GPR32Opnd, IIFStore>, SWXC1_FM<0xd>; } /// Floating-point Aritmetic -def FADD_S : ADDS_FT<"add.s", FGR32RegsOpnd, IIFadd, 1, fadd>, +def FADD_S : ADDS_FT<"add.s", FGR32Opnd, IIFadd, 1, fadd>, ADDS_FM<0x00, 16>; defm FADD : ADDS_M<"add.d", IIFadd, 1, fadd>, ADDS_FM<0x00, 17>; -def FDIV_S : ADDS_FT<"div.s", FGR32RegsOpnd, IIFdivSingle, 0, fdiv>, +def FDIV_S : ADDS_FT<"div.s", FGR32Opnd, IIFdivSingle, 0, fdiv>, ADDS_FM<0x03, 16>; defm FDIV : ADDS_M<"div.d", IIFdivDouble, 0, fdiv>, ADDS_FM<0x03, 17>; -def FMUL_S : ADDS_FT<"mul.s", FGR32RegsOpnd, IIFmulSingle, 1, fmul>, +def FMUL_S : ADDS_FT<"mul.s", FGR32Opnd, IIFmulSingle, 1, fmul>, ADDS_FM<0x02, 16>; defm FMUL : ADDS_M<"mul.d", IIFmulDouble, 1, fmul>, ADDS_FM<0x02, 17>; -def FSUB_S : ADDS_FT<"sub.s", FGR32RegsOpnd, IIFadd, 0, fsub>, +def FSUB_S : ADDS_FT<"sub.s", FGR32Opnd, IIFadd, 0, fsub>, ADDS_FM<0x01, 16>; defm FSUB : ADDS_M<"sub.d", IIFadd, 0, fsub>, ADDS_FM<0x01, 17>; let Predicates = [HasMips32r2, HasStdEnc] in { - def MADD_S : MADDS_FT<"madd.s", FGR32RegsOpnd, IIFmulSingle, fadd>, + def MADD_S : MADDS_FT<"madd.s", FGR32Opnd, IIFmulSingle, fadd>, MADDS_FM<4, 0>; - def MSUB_S : MADDS_FT<"msub.s", FGR32RegsOpnd, IIFmulSingle, fsub>, + def MSUB_S : MADDS_FT<"msub.s", FGR32Opnd, IIFmulSingle, fsub>, MADDS_FM<5, 0>; } let Predicates = [HasMips32r2, NoNaNsFPMath, HasStdEnc] in { - def NMADD_S : NMADDS_FT<"nmadd.s", FGR32RegsOpnd, IIFmulSingle, fadd>, + def NMADD_S : NMADDS_FT<"nmadd.s", FGR32Opnd, IIFmulSingle, fadd>, MADDS_FM<6, 0>; - def NMSUB_S : NMADDS_FT<"nmsub.s", FGR32RegsOpnd, IIFmulSingle, fsub>, + def NMSUB_S : NMADDS_FT<"nmsub.s", FGR32Opnd, IIFmulSingle, fsub>, MADDS_FM<7, 0>; } let Predicates = [HasMips32r2, NotFP64bit, HasStdEnc] in { - def MADD_D32 : MADDS_FT<"madd.d", AFGR64RegsOpnd, IIFmulDouble, fadd>, + def MADD_D32 : MADDS_FT<"madd.d", AFGR64Opnd, IIFmulDouble, fadd>, MADDS_FM<4, 1>; - def MSUB_D32 : MADDS_FT<"msub.d", AFGR64RegsOpnd, IIFmulDouble, fsub>, + def MSUB_D32 : MADDS_FT<"msub.d", AFGR64Opnd, IIFmulDouble, fsub>, MADDS_FM<5, 1>; } let Predicates = [HasMips32r2, NotFP64bit, NoNaNsFPMath, HasStdEnc] in { - def NMADD_D32 : NMADDS_FT<"nmadd.d", AFGR64RegsOpnd, IIFmulDouble, fadd>, + def NMADD_D32 : NMADDS_FT<"nmadd.d", AFGR64Opnd, IIFmulDouble, fadd>, MADDS_FM<6, 1>; - def NMSUB_D32 : NMADDS_FT<"nmsub.d", AFGR64RegsOpnd, IIFmulDouble, fsub>, + def NMSUB_D32 : NMADDS_FT<"nmsub.d", AFGR64Opnd, IIFmulDouble, fsub>, MADDS_FM<7, 1>; } let Predicates = [HasMips32r2, IsFP64bit, HasStdEnc], isCodeGenOnly=1 in { - def MADD_D64 : MADDS_FT<"madd.d", FGR64RegsOpnd, IIFmulDouble, fadd>, + def MADD_D64 : MADDS_FT<"madd.d", FGR64Opnd, IIFmulDouble, fadd>, MADDS_FM<4, 1>; - def MSUB_D64 : MADDS_FT<"msub.d", FGR64RegsOpnd, IIFmulDouble, fsub>, + def MSUB_D64 : MADDS_FT<"msub.d", FGR64Opnd, IIFmulDouble, fsub>, MADDS_FM<5, 1>; } let Predicates = [HasMips32r2, IsFP64bit, NoNaNsFPMath, HasStdEnc], isCodeGenOnly=1 in { - def NMADD_D64 : NMADDS_FT<"nmadd.d", FGR64RegsOpnd, IIFmulDouble, fadd>, + def NMADD_D64 : NMADDS_FT<"nmadd.d", FGR64Opnd, IIFmulDouble, fadd>, MADDS_FM<6, 1>; - def NMSUB_D64 : NMADDS_FT<"nmsub.d", FGR64RegsOpnd, IIFmulDouble, fsub>, + def NMSUB_D64 : NMADDS_FT<"nmsub.d", FGR64Opnd, IIFmulDouble, fsub>, MADDS_FM<7, 1>; } @@ -543,9 +543,9 @@ def FCMP_D64 : CEQS_FT<"d", FGR64, IIFcmp, MipsFPCmp>, CEQS_FM<17>, // This pseudo instr gets expanded into 2 mtc1 instrs after register // allocation. def BuildPairF64 : - PseudoSE<(outs AFGR64RegsOpnd:$dst), + PseudoSE<(outs AFGR64Opnd:$dst), (ins GPR32Opnd:$lo, GPR32Opnd:$hi), - [(set AFGR64RegsOpnd:$dst, + [(set AFGR64Opnd:$dst, (MipsBuildPairF64 GPR32Opnd:$lo, GPR32Opnd:$hi))]>; // This pseudo instr gets expanded into 2 mfc1 instrs after register @@ -553,9 +553,9 @@ def BuildPairF64 : // if n is 0, lower part of src is extracted. // if n is 1, higher part of src is extracted. def ExtractElementF64 : - PseudoSE<(outs GPR32Opnd:$dst), (ins AFGR64RegsOpnd:$src, i32imm:$n), + PseudoSE<(outs GPR32Opnd:$dst), (ins AFGR64Opnd:$src, i32imm:$n), [(set GPR32Opnd:$dst, - (MipsExtractElementF64 AFGR64RegsOpnd:$src, imm:$n))]>; + (MipsExtractElementF64 AFGR64Opnd:$src, imm:$n))]>; //===----------------------------------------------------------------------===// // InstAliases. @@ -571,18 +571,18 @@ def : MipsPat<(f32 fpimm0neg), (FNEG_S (MTC1 ZERO))>; def : MipsPat<(f32 (sint_to_fp GPR32Opnd:$src)), (PseudoCVT_S_W GPR32Opnd:$src)>; -def : MipsPat<(MipsTruncIntFP FGR32RegsOpnd:$src), - (TRUNC_W_S FGR32RegsOpnd:$src)>; +def : MipsPat<(MipsTruncIntFP FGR32Opnd:$src), + (TRUNC_W_S FGR32Opnd:$src)>; let Predicates = [NotFP64bit, HasStdEnc] in { def : MipsPat<(f64 (sint_to_fp GPR32Opnd:$src)), (PseudoCVT_D32_W GPR32Opnd:$src)>; - def : MipsPat<(MipsTruncIntFP AFGR64RegsOpnd:$src), - (TRUNC_W_D32 AFGR64RegsOpnd:$src)>; - def : MipsPat<(f32 (fround AFGR64RegsOpnd:$src)), - (CVT_S_D32 AFGR64RegsOpnd:$src)>; - def : MipsPat<(f64 (fextend FGR32RegsOpnd:$src)), - (CVT_D32_S FGR32RegsOpnd:$src)>; + def : MipsPat<(MipsTruncIntFP AFGR64Opnd:$src), + (TRUNC_W_D32 AFGR64Opnd:$src)>; + def : MipsPat<(f32 (fround AFGR64Opnd:$src)), + (CVT_S_D32 AFGR64Opnd:$src)>; + def : MipsPat<(f64 (fextend FGR32Opnd:$src)), + (CVT_D32_S FGR32Opnd:$src)>; } let Predicates = [IsFP64bit, HasStdEnc] in { @@ -596,17 +596,17 @@ let Predicates = [IsFP64bit, HasStdEnc] in { def : MipsPat<(f64 (sint_to_fp GPR64Opnd:$src)), (PseudoCVT_D64_L GPR64Opnd:$src)>; - def : MipsPat<(MipsTruncIntFP FGR64RegsOpnd:$src), - (TRUNC_W_D64 FGR64RegsOpnd:$src)>; - def : MipsPat<(MipsTruncIntFP FGR32RegsOpnd:$src), - (TRUNC_L_S FGR32RegsOpnd:$src)>; - def : MipsPat<(MipsTruncIntFP FGR64RegsOpnd:$src), - (TRUNC_L_D64 FGR64RegsOpnd:$src)>; - - def : MipsPat<(f32 (fround FGR64RegsOpnd:$src)), - (CVT_S_D64 FGR64RegsOpnd:$src)>; - def : MipsPat<(f64 (fextend FGR32RegsOpnd:$src)), - (CVT_D64_S FGR32RegsOpnd:$src)>; + def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src), + (TRUNC_W_D64 FGR64Opnd:$src)>; + def : MipsPat<(MipsTruncIntFP FGR32Opnd:$src), + (TRUNC_L_S FGR32Opnd:$src)>; + def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src), + (TRUNC_L_D64 FGR64Opnd:$src)>; + + def : MipsPat<(f32 (fround FGR64Opnd:$src)), + (CVT_S_D64 FGR64Opnd:$src)>; + def : MipsPat<(f64 (fextend FGR32Opnd:$src)), + (CVT_D64_S FGR32Opnd:$src)>; } // Patterns for loads/stores with a reg+imm operand. diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index b9e8895..c321330 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -698,10 +698,10 @@ class MultDivPseudo - : PseudoSE<(outs ACRegs:$ac), - (ins GPR32Opnd:$rs, GPR32Opnd:$rt, ACRegs:$acin), - [(set ACRegs:$ac, - (OpNode GPR32Opnd:$rs, GPR32Opnd:$rt, ACRegs:$acin))], + : PseudoSE<(outs ACC64:$ac), + (ins GPR32Opnd:$rs, GPR32Opnd:$rt, ACC64:$acin), + [(set ACC64:$ac, + (OpNode GPR32Opnd:$rs, GPR32Opnd:$rt, ACC64:$acin))], IIImult>, PseudoInstExpansion<(RealInst GPR32Opnd:$rs, GPR32Opnd:$rt)> { string Constraints = "$acin = $ac"; @@ -875,8 +875,8 @@ let usesCustomInserter = 1 in { /// Pseudo instructions for loading and storing accumulator registers. let isPseudo = 1, isCodeGenOnly = 1 in { - defm LOAD_AC64 : LoadM<"", ACRegs>; - defm STORE_AC64 : StoreM<"", ACRegs>; + defm LOAD_ACC64 : LoadM<"", ACC64>; + defm STORE_ACC64 : StoreM<"", ACC64>; } //===----------------------------------------------------------------------===// @@ -1033,13 +1033,13 @@ def MULT : MMRel, Mult<"mult", IIImult, GPR32Opnd, [HI, LO]>, MULT_FM<0, 0x18>; def MULTu : MMRel, Mult<"multu", IIImult, GPR32Opnd, [HI, LO]>, MULT_FM<0, 0x19>; -def PseudoMULT : MultDivPseudo; -def PseudoMULTu : MultDivPseudo; +def PseudoMULT : MultDivPseudo; +def PseudoMULTu : MultDivPseudo; def SDIV : Div<"div", IIIdiv, GPR32Opnd, [HI, LO]>, MULT_FM<0, 0x1a>; def UDIV : Div<"divu", IIIdiv, GPR32Opnd, [HI, LO]>, MULT_FM<0, 0x1b>; -def PseudoSDIV : MultDivPseudo; -def PseudoUDIV : MultDivPseudo; def MTHI : MoveToLOHI<"mthi", GPR32Opnd, [HI]>, MTLO_FM<0x11>; @@ -1370,8 +1370,8 @@ defm : SetgeImmPats; def : MipsPat<(bswap GPR32:$rt), (ROTR (WSBH GPR32:$rt), 16)>; // mflo/hi patterns. -def : MipsPat<(i32 (ExtractLOHI ACRegs:$ac, imm:$lohi_idx)), - (EXTRACT_SUBREG ACRegs:$ac, imm:$lohi_idx)>; +def : MipsPat<(i32 (ExtractLOHI ACC64:$ac, imm:$lohi_idx)), + (EXTRACT_SUBREG ACC64:$ac, imm:$lohi_idx)>; // Load halfword/word patterns. let AddedComplexity = 40 in { diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index 377f1aa..aa5d39c 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -64,7 +64,7 @@ class AFPR64 Enc, string n, list subregs> } // Accumulator Registers -class ACC Enc, string n, list subregs> +class ACCReg Enc, string n, list subregs> : MipsRegWithSubRegs { let SubRegIndices = [sub_lo, sub_hi]; let CoveredBySubRegs = 1; @@ -192,12 +192,12 @@ let Namespace = "Mips" in { def HWR29 : MipsReg<29, "29">; // Accum registers - def AC0 : ACC<0, "ac0", [LO, HI]>; - def AC1 : ACC<1, "ac1", [LO1, HI1]>; - def AC2 : ACC<2, "ac2", [LO2, HI2]>; - def AC3 : ACC<3, "ac3", [LO3, HI3]>; + def AC0 : ACCReg<0, "ac0", [LO, HI]>; + def AC1 : ACCReg<1, "ac1", [LO1, HI1]>; + def AC2 : ACCReg<2, "ac2", [LO2, HI2]>; + def AC3 : ACCReg<3, "ac3", [LO3, HI3]>; - def AC0_64 : ACC<0, "ac0", [LO64, HI64]>; + def AC0_64 : ACCReg<0, "ac0", [LO64, HI64]>; // DSP-ASE control register fields. def DSPPos : Register<"">; @@ -314,15 +314,15 @@ def HIRegs64 : RegisterClass<"Mips", [i64], 64, (add HI64)>; def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>, Unallocatable; // Accumulator Registers -def ACRegs : RegisterClass<"Mips", [untyped], 64, (add AC0)> { +def ACC64 : RegisterClass<"Mips", [untyped], 64, (add AC0)> { let Size = 64; } -def ACRegs128 : RegisterClass<"Mips", [untyped], 128, (add AC0_64)> { +def ACC128 : RegisterClass<"Mips", [untyped], 128, (add AC0_64)> { let Size = 128; } -def ACRegsDSP : RegisterClass<"Mips", [untyped], 64, (sequence "AC%u", 0, 3)> { +def ACC64DSP : RegisterClass<"Mips", [untyped], 64, (sequence "AC%u", 0, 3)> { let Size = 64; } @@ -343,9 +343,9 @@ def GPR64AsmOperand : MipsAsmRegOperand { let ParserMethod = "parseGPR64"; } -def ACRegsDSPAsmOperand : MipsAsmRegOperand { - let Name = "ACRegsDSPAsm"; - let ParserMethod = "parseACRegsDSP"; +def ACC64DSPAsmOperand : MipsAsmRegOperand { + let Name = "ACC64DSPAsm"; + let ParserMethod = "parseACC64DSP"; } def CCRAsmOperand : MipsAsmRegOperand { @@ -394,15 +394,15 @@ def HWRegsOpnd : RegisterOperand { let ParserMatchClass = HWRegsAsmOperand; } -def AFGR64RegsOpnd : RegisterOperand { +def AFGR64Opnd : RegisterOperand { let ParserMatchClass = AFGR64AsmOperand; } -def FGR64RegsOpnd : RegisterOperand { +def FGR64Opnd : RegisterOperand { let ParserMatchClass = FGR64AsmOperand; } -def FGR32RegsOpnd : RegisterOperand { +def FGR32Opnd : RegisterOperand { let ParserMatchClass = FGR32AsmOperand; } @@ -410,6 +410,6 @@ def FCCRegsOpnd : RegisterOperand { let ParserMatchClass = FCCRegsAsmOperand; } -def ACRegsDSPOpnd : RegisterOperand { - let ParserMatchClass = ACRegsDSPAsmOperand; +def ACC64DSPOpnd : RegisterOperand { + let ParserMatchClass = ACC64DSPAsmOperand; } diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index d9e0fa4..2b0672c 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -77,24 +77,24 @@ bool ExpandPseudo::expandInstr(MachineBasicBlock &MBB, Iter I) { case Mips::STORE_CCOND_DSP_P8: expandStoreCCond(MBB, I); break; - case Mips::LOAD_AC64: - case Mips::LOAD_AC64_P8: - case Mips::LOAD_AC_DSP: - case Mips::LOAD_AC_DSP_P8: + case Mips::LOAD_ACC64: + case Mips::LOAD_ACC64_P8: + case Mips::LOAD_ACC64DSP: + case Mips::LOAD_ACC64DSP_P8: expandLoadACC(MBB, I, 4); break; - case Mips::LOAD_AC128: - case Mips::LOAD_AC128_P8: + case Mips::LOAD_ACC128: + case Mips::LOAD_ACC128_P8: expandLoadACC(MBB, I, 8); break; - case Mips::STORE_AC64: - case Mips::STORE_AC64_P8: - case Mips::STORE_AC_DSP: - case Mips::STORE_AC_DSP_P8: + case Mips::STORE_ACC64: + case Mips::STORE_ACC64_P8: + case Mips::STORE_ACC64DSP: + case Mips::STORE_ACC64DSP_P8: expandStoreACC(MBB, I, 4); break; - case Mips::STORE_AC128: - case Mips::STORE_AC128_P8: + case Mips::STORE_ACC128: + case Mips::STORE_ACC128_P8: expandStoreACC(MBB, I, 8); break; case TargetOpcode::COPY: @@ -210,10 +210,10 @@ void ExpandPseudo::expandStoreACC(MachineBasicBlock &MBB, Iter I, bool ExpandPseudo::expandCopy(MachineBasicBlock &MBB, Iter I) { unsigned Dst = I->getOperand(0).getReg(), Src = I->getOperand(1).getReg(); - if (Mips::ACRegsDSPRegClass.contains(Dst, Src)) + if (Mips::ACC64DSPRegClass.contains(Dst, Src)) return expandCopyACC(MBB, I, Dst, Src, 4); - if (Mips::ACRegs128RegClass.contains(Dst, Src)) + if (Mips::ACC128RegClass.contains(Dst, Src)) return expandCopyACC(MBB, I, Dst, Src, 8); return false; diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 49da784..37fbc3d 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -425,8 +425,8 @@ std::pair MipsSEDAGToDAGISel::selectNode(SDNode *Node) { } case MipsISD::InsertLOHI: { - unsigned RCID = Subtarget.hasDSP() ? Mips::ACRegsDSPRegClassID : - Mips::ACRegsRegClassID; + unsigned RCID = Subtarget.hasDSP() ? Mips::ACC64DSPRegClassID : + Mips::ACC64RegClassID; SDValue RegClass = CurDAG->getTargetConstant(RCID, MVT::i32); SDValue LoIdx = CurDAG->getTargetConstant(Mips::sub_lo, MVT::i32); SDValue HiIdx = CurDAG->getTargetConstant(Mips::sub_hi, MVT::i32); diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h index ec8a5c7..da1321b 100644 --- a/lib/Target/Mips/MipsSEISelLowering.h +++ b/lib/Target/Mips/MipsSEISelLowering.h @@ -38,8 +38,8 @@ namespace llvm { virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const { if (VT == MVT::Untyped) - return Subtarget->hasDSP() ? &Mips::ACRegsDSPRegClass : - &Mips::ACRegsRegClass; + return Subtarget->hasDSP() ? &Mips::ACC64DSPRegClass : + &Mips::ACC64RegClass; return TargetLowering::getRepRegClassFor(VT); } diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index 9521043..b2c6caa 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -189,12 +189,12 @@ storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Opc = IsN64 ? Mips::SW_P8 : Mips::SW; else if (Mips::GPR64RegClass.hasSubClassEq(RC)) Opc = IsN64 ? Mips::SD_P8 : Mips::SD; - else if (Mips::ACRegsRegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::STORE_AC64_P8 : Mips::STORE_AC64; - else if (Mips::ACRegsDSPRegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::STORE_AC_DSP_P8 : Mips::STORE_AC_DSP; - else if (Mips::ACRegs128RegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::STORE_AC128_P8 : Mips::STORE_AC128; + else if (Mips::ACC64RegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::STORE_ACC64_P8 : Mips::STORE_ACC64; + else if (Mips::ACC64DSPRegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::STORE_ACC64DSP_P8 : Mips::STORE_ACC64DSP; + else if (Mips::ACC128RegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::STORE_ACC128_P8 : Mips::STORE_ACC128; else if (Mips::DSPCCRegClass.hasSubClassEq(RC)) Opc = IsN64 ? Mips::STORE_CCOND_DSP_P8 : Mips::STORE_CCOND_DSP; else if (Mips::FGR32RegClass.hasSubClassEq(RC)) @@ -222,12 +222,12 @@ loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Opc = IsN64 ? Mips::LW_P8 : Mips::LW; else if (Mips::GPR64RegClass.hasSubClassEq(RC)) Opc = IsN64 ? Mips::LD_P8 : Mips::LD; - else if (Mips::ACRegsRegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::LOAD_AC64_P8 : Mips::LOAD_AC64; - else if (Mips::ACRegsDSPRegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::LOAD_AC_DSP_P8 : Mips::LOAD_AC_DSP; - else if (Mips::ACRegs128RegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::LOAD_AC128_P8 : Mips::LOAD_AC128; + else if (Mips::ACC64RegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::LOAD_ACC64_P8 : Mips::LOAD_ACC64; + else if (Mips::ACC64DSPRegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::LOAD_ACC64DSP_P8 : Mips::LOAD_ACC64DSP; + else if (Mips::ACC128RegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::LOAD_ACC128_P8 : Mips::LOAD_ACC128; else if (Mips::DSPCCRegClass.hasSubClassEq(RC)) Opc = IsN64 ? Mips::LOAD_CCOND_DSP_P8 : Mips::LOAD_CCOND_DSP; else if (Mips::FGR32RegClass.hasSubClassEq(RC)) -- cgit v1.1 From 081a1941b595f6294e4ce678fd61ef56a2ceb51e Mon Sep 17 00:00:00 2001 From: "Michael J. Spencer" Date: Thu, 8 Aug 2013 22:27:13 +0000 Subject: [Object] Split the ELF interface into 3 parts. * ELFTypes.h contains template magic for defining types based on endianess, size, and alignment. * ELFFile.h defines the ELFFile class which provides low level ELF specific access. * ELFObjectFile.h contains ELFObjectFile which uses ELFFile to implement the ObjectFile interface. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188022 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp index 8f4ab46..a3eb4fb 100644 --- a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp @@ -13,7 +13,7 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCRelocationInfo.h" -#include "llvm/Object/ELF.h" +#include "llvm/Object/ELFObjectFile.h" #include "llvm/Support/ELF.h" using namespace llvm; -- cgit v1.1 From d12fce1a27c30292dcd5f5bc10d4ba6e742888be Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Thu, 8 Aug 2013 23:30:40 +0000 Subject: Mips ELF: MicroMips direct object Little endian support. Test included. Patch by Zoran Jovanovich git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188024 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 4dc6917..d019f05 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -39,11 +39,14 @@ class MipsMCCodeEmitter : public MCCodeEmitter { MCContext &Ctx; const MCSubtargetInfo &STI; bool IsLittleEndian; + bool IsMicroMips; public: MipsMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_, const MCSubtargetInfo &sti, bool IsLittle) : - MCII(mcii), Ctx(Ctx_), STI (sti), IsLittleEndian(IsLittle) {} + MCII(mcii), Ctx(Ctx_), STI (sti), IsLittleEndian(IsLittle) { + IsMicroMips = STI.getFeatureBits() & Mips::FeatureMicroMips; + } ~MipsMCCodeEmitter() {} @@ -53,9 +56,17 @@ public: void EmitInstruction(uint64_t Val, unsigned Size, raw_ostream &OS) const { // Output the instruction encoding in little endian byte order. - for (unsigned i = 0; i < Size; ++i) { - unsigned Shift = IsLittleEndian ? i * 8 : (Size - 1 - i) * 8; - EmitByte((Val >> Shift) & 0xff, OS); + // Little-endian byte ordering: + // mips32r2: 4 | 3 | 2 | 1 + // microMIPS: 2 | 1 | 4 | 3 + if (IsLittleEndian && Size == 4 && IsMicroMips) { + EmitInstruction(Val>>16, 2, OS); + EmitInstruction(Val, 2, OS); + } else { + for (unsigned i = 0; i < Size; ++i) { + unsigned Shift = IsLittleEndian ? i * 8 : (Size - 1 - i) * 8; + EmitByte((Val >> Shift) & 0xff, OS); + } } } -- cgit v1.1 From 6514751d12ba6e1c01ba8f14862c99ecc1fff343 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 9 Aug 2013 09:25:57 +0000 Subject: [SystemZ] Update README git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188062 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/README.txt | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt index 2782b63..563513b 100644 --- a/lib/Target/SystemZ/README.txt +++ b/lib/Target/SystemZ/README.txt @@ -47,7 +47,7 @@ There is no scheduling support. -- -We don't use the BRANCH ON COUNT or BRANCH ON INDEX families of instruction. +We don't use the BRANCH ON INDEX instructions. -- @@ -56,18 +56,7 @@ and conditional returns. -- -We don't use the condition code results of anything except comparisons. - -Implementing this may need something more finely grained than the z_cmp -and z_ucmp that we have now. It might (or might not) also be useful to -have a mask of "don't care" values in conditional branches. For example, -integer comparisons never set CC to 3, so the bottom bit of the CC mask -isn't particularly relevant. JNLH and JE are equally good for testing -equality after an integer comparison, etc. - --- - -We don't use the LOAD AND TEST or TEST DATA CLASS instructions. +We don't use the TEST DATA CLASS instructions. -- @@ -77,9 +66,10 @@ condition codes. For example, we could use LCDFR instead of LCDBR. -- -We don't optimize block memory operations. +We don't optimize block memory operations, except using single MVCs +for memcpy. -It's definitely worth using things like MVC, CLC, NC, XC and OC with +It's definitely worth using things like CLC, NC, XC and OC with constant lengths. MVCIN may be worthwhile too. We should probably implement things like memcpy using MVC with EXECUTE. -- cgit v1.1 From e921f323533ee751b3fa34bd00d10fa72096ffd3 Mon Sep 17 00:00:00 2001 From: Mihai Popa Date: Fri, 9 Aug 2013 10:38:32 +0000 Subject: Fix assembling of Thumb2 branch instructions. The long encoding for Thumb2 unconditional branches is broken. Additionally, there is no range checking for target operands; as such for instructions originating in assembly code, only short Thumb encodings are generated, regardless of the bitsize needed for the offset. Adding range checking is non trivial due to the representation of Thumb branch instructions. There is no true difference between conditional and unconditional branches in terms of operands and syntax - even unconditional branches have a predicate which is expected to match that of the IT block they are in. Yet, the encodings and the permitted size of the offset differ. Due to this, for any mnemonic there are really 4 encodings to choose for. The problem cannot be handled in the parser alone or by manipulating td files. Because the parser builds first a set of match candidates and then checks them one by one, whatever tablegen-only solution might be found will ultimately be dependent of the parser's evaluation order. What's worse is that due to the fact that all branches have the same syntax and the same kinds of operands, that order is governed by the lexicographical ordering of the names of operand classes... To circumvent all this, any necessary disambiguation is added to the instruction validation pass. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188067 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrThumb.td | 4 +- lib/Target/ARM/ARMInstrThumb2.td | 10 ++- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 107 ++++++++++++++++++++++- lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp | 10 ++- 4 files changed, 121 insertions(+), 10 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index e7218c6..d5b6563 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -491,7 +491,8 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in { T1Encoding<{1,1,1,0,0,?}>, Sched<[WriteBr]> { bits<11> target; let Inst{10-0} = target; - } + let AsmMatchConverter = "cvtThumbBranches"; + } // Far jump // Just a pseudo for a tBL instruction. Needed to let regalloc know about @@ -521,6 +522,7 @@ let isBranch = 1, isTerminator = 1 in bits<8> target; let Inst{11-8} = p; let Inst{7-0} = target; + let AsmMatchConverter = "cvtThumbBranches"; } diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 7de0901..ccca41a 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3336,13 +3336,14 @@ def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br, let Inst{12} = 1; bits<24> target; - let Inst{26} = target{19}; - let Inst{11} = target{18}; - let Inst{13} = target{17}; + let Inst{26} = target{23}; + let Inst{13} = target{22}; + let Inst{11} = target{21}; let Inst{25-16} = target{20-11}; let Inst{10-0} = target{10-0}; let DecoderMethod = "DecodeT2BInstruction"; -} + let AsmMatchConverter = "cvtThumbBranches"; +} let isNotDuplicable = 1, isIndirectBranch = 1 in { def t2BR_JT : t2PseudoInst<(outs), @@ -3410,6 +3411,7 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br, let Inst{10-0} = target{11-1}; let DecoderMethod = "DecodeThumb2BCCInstruction"; + let AsmMatchConverter = "cvtThumbBranches"; } // Tail calls. The IOS version of thumb tail calls uses a t2 branch, so diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 80e5c6e..03d3a48 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -221,6 +221,9 @@ class ARMAsmParser : public MCTargetAsmParser { // Asm Match Converter Methods void cvtThumbMultiply(MCInst &Inst, const SmallVectorImpl &); + void cvtThumbBranches(MCInst &Inst, + const SmallVectorImpl &); + bool validateInstruction(MCInst &Inst, const SmallVectorImpl &Ops); bool processInstruction(MCInst &Inst, @@ -601,7 +604,7 @@ public: template bool isUnsignedOffset() const { if (!isImm()) return false; - if (dyn_cast(Imm.Val)) return true; + if (isa(Imm.Val)) return true; if (const MCConstantExpr *CE = dyn_cast(Imm.Val)) { int64_t Val = CE->getValue(); int64_t Align = 1LL << scale; @@ -610,6 +613,22 @@ public: } return false; } + // checks whether this operand is an signed offset which fits is a field + // of specified width and scaled by a specific number of bits + template + bool isSignedOffset() const { + if (!isImm()) return false; + if (isa(Imm.Val)) return true; + if (const MCConstantExpr *CE = dyn_cast(Imm.Val)) { + int64_t Val = CE->getValue(); + int64_t Align = 1LL << scale; + int64_t Max = Align * ((1LL << (width-1)) - 1); + int64_t Min = -Align * (1LL << (width-1)); + return ((Val % Align) == 0) && (Val >= Min) && (Val <= Max); + } + return false; + } + // checks whether this operand is a memory operand computed as an offset // applied to PC. the offset may have 8 bits of magnitude and is represented // with two bits of shift. textually it may be either [pc, #imm], #imm or @@ -4102,6 +4121,65 @@ cvtThumbMultiply(MCInst &Inst, ((ARMOperand*)Operands[2])->addCondCodeOperands(Inst, 2); } +void ARMAsmParser:: +cvtThumbBranches(MCInst &Inst, + const SmallVectorImpl &Operands) { + int CondOp = -1, ImmOp = -1; + switch(Inst.getOpcode()) { + case ARM::tB: + case ARM::tBcc: CondOp = 1; ImmOp = 2; break; + + case ARM::t2B: + case ARM::t2Bcc: CondOp = 1; ImmOp = 3; break; + + default: llvm_unreachable("Unexpected instruction in cvtThumbBranches"); + } + // first decide whether or not the branch should be conditional + // by looking at it's location relative to an IT block + if(inITBlock()) { + // inside an IT block we cannot have any conditional branches. any + // such instructions needs to be converted to unconditional form + switch(Inst.getOpcode()) { + case ARM::tBcc: Inst.setOpcode(ARM::tB); break; + case ARM::t2Bcc: Inst.setOpcode(ARM::t2B); break; + } + } else { + // outside IT blocks we can only have unconditional branches with AL + // condition code or conditional branches with non-AL condition code + unsigned Cond = static_cast(Operands[CondOp])->getCondCode(); + switch(Inst.getOpcode()) { + case ARM::tB: + case ARM::tBcc: + Inst.setOpcode(Cond == ARMCC::AL ? ARM::tB : ARM::tBcc); + break; + case ARM::t2B: + case ARM::t2Bcc: + Inst.setOpcode(Cond == ARMCC::AL ? ARM::t2B : ARM::t2Bcc); + break; + } + } + + // now decide on encoding size based on branch target range + switch(Inst.getOpcode()) { + // classify tB as either t2B or t1B based on range of immediate operand + case ARM::tB: { + ARMOperand* op = static_cast(Operands[ImmOp]); + if(!op->isSignedOffset<11, 1>() && isThumbTwo()) + Inst.setOpcode(ARM::t2B); + break; + } + // classify tBcc as either t2Bcc or t1Bcc based on range of immediate operand + case ARM::tBcc: { + ARMOperand* op = static_cast(Operands[ImmOp]); + if(!op->isSignedOffset<8, 1>() && isThumbTwo()) + Inst.setOpcode(ARM::t2Bcc); + break; + } + } + ((ARMOperand*)Operands[ImmOp])->addImmOperands(Inst, 1); + ((ARMOperand*)Operands[CondOp])->addCondCodeOperands(Inst, 2); +} + /// Parse an ARM memory expression, return false if successful else return true /// or an error. The first token must be a '[' when called. bool ARMAsmParser:: @@ -5216,6 +5294,7 @@ validateInstruction(MCInst &Inst, const SmallVectorImpl &Operands) { const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode()); SMLoc Loc = Operands[0]->getStartLoc(); + // Check the IT block state first. // NOTE: BKPT instruction has the interesting property of being // allowed in IT blocks, but not being predicable. It just always @@ -5247,8 +5326,8 @@ validateInstruction(MCInst &Inst, // Check for non-'al' condition codes outside of the IT block. } else if (isThumbTwo() && MCID.isPredicable() && Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm() != - ARMCC::AL && Inst.getOpcode() != ARM::tB && - Inst.getOpcode() != ARM::t2B) + ARMCC::AL && Inst.getOpcode() != ARM::tBcc && + Inst.getOpcode() != ARM::t2Bcc) return Error(Loc, "predicated instructions must be in IT block"); switch (Inst.getOpcode()) { @@ -5383,6 +5462,28 @@ validateInstruction(MCInst &Inst, } break; } + // final range checking for Thumb unconditional branch instructions + case ARM::tB: + if(!(static_cast(Operands[2]))->isSignedOffset<11, 1>()) + return Error(Operands[2]->getStartLoc(), "Branch target out of range"); + break; + case ARM::t2B: { + int op = (Operands[2]->isImm()) ? 2 : 3; + if(!(static_cast(Operands[op]))->isSignedOffset<24, 1>()) + return Error(Operands[op]->getStartLoc(), "Branch target out of range"); + break; + } + // final range checking for Thumb conditional branch instructions + case ARM::tBcc: + if(!(static_cast(Operands[2]))->isSignedOffset<8, 1>()) + return Error(Operands[2]->getStartLoc(), "Branch target out of range"); + break; + case ARM::t2Bcc: { + int op = (Operands[2]->isImm()) ? 2 : 3; + if(!(static_cast(Operands[op]))->isSignedOffset<20, 1>()) + return Error(Operands[op]->getStartLoc(), "Branch target out of range"); + break; + } } StringRef DepInfo; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index a18d465..0873220 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -638,8 +638,14 @@ getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx, uint32_t ARMMCCodeEmitter:: getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups) const { - unsigned Val = - ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_uncondbranch, Fixups); + unsigned Val = 0; + const MCOperand MO = MI.getOperand(OpIdx); + + if(MO.isExpr()) + Val = ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_uncondbranch, Fixups); + else + Val = MO.getImm() >> 1; + bool I = (Val & 0x800000); bool J1 = (Val & 0x400000); bool J2 = (Val & 0x200000); -- cgit v1.1 From 04b03fac11f10c92cf7ce63ba2f548a42ee2c448 Mon Sep 17 00:00:00 2001 From: Mihai Popa Date: Fri, 9 Aug 2013 13:52:32 +0000 Subject: This fixes the Thumb2 CPS assembly syntax. In Thumb1, only one variant is supported: CPS{effect} {flags} Thumb2 supports three: CPS{effect}.W {flags} CPS{effect} {flags} {mode} CPS {mode} Canonically, .W should be used only when ambiguity is present between encodings of different width. The wide suffix is still accepted for the latter two forms via aliases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188071 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrThumb2.td | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index ccca41a..5d04d21 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3504,13 +3504,17 @@ class t2CPS : T2XI<(outs), iops, NoItinerary, let M = 1 in def t2CPS3p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags, i32imm:$mode), - "$imod.w\t$iflags, $mode">; + "$imod\t$iflags, $mode">; let mode = 0, M = 0 in def t2CPS2p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags), "$imod.w\t$iflags">; let imod = 0, iflags = 0, M = 1 in def t2CPS1p : t2CPS<(ins imm0_31:$mode), "\t$mode">; +def : t2InstAlias<"cps$imod.w $iflags, $mode", + (t2CPS3p imod_op:$imod, iflags_op:$iflags, i32imm:$mode), 0>; +def : t2InstAlias<"cps.w $mode", (t2CPS1p imm0_31:$mode), 0>; + // A6.3.4 Branches and miscellaneous control // Table A6-14 Change Processor State, and hint instructions def t2HINT : T2I<(outs), (ins imm0_4:$imm), NoItinerary, "hint", "\t$imm",[]> { -- cgit v1.1 From fc6434a73d053c3e1d9c79034a267ae1434483ad Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 9 Aug 2013 19:33:32 +0000 Subject: Add a overload to CostTable which allows it to infer the size of the table. Use it to avoid repeating ourselves too often. Also store MVT::SimpleValueType in the TTI tables so they can be statically initialized, MVT's constructors create bloated initialization code otherwise. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188095 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMTargetTransformInfo.cpp | 61 ++++++++++++++----------------- lib/Target/X86/X86TargetTransformInfo.cpp | 58 +++++++++++++---------------- 2 files changed, 52 insertions(+), 67 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 34576ba..9dc1cd1 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -182,7 +182,7 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, assert(ISD && "Invalid opcode"); // Single to/from double precision conversions. - static const CostTblEntry NEONFltDblTbl[] = { + static const CostTblEntry NEONFltDblTbl[] = { // Vector fptrunc/fpext conversions. { ISD::FP_ROUND, MVT::v2f64, 2 }, { ISD::FP_EXTEND, MVT::v2f32, 2 }, @@ -192,8 +192,7 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND || ISD == ISD::FP_EXTEND)) { std::pair LT = TLI->getTypeLegalizationCost(Src); - int Idx = CostTableLookup(NEONFltDblTbl, array_lengthof(NEONFltDblTbl), - ISD, LT.second); + int Idx = CostTableLookup(NEONFltDblTbl, ISD, LT.second); if (Idx != -1) return LT.first * NEONFltDblTbl[Idx].Cost; } @@ -207,7 +206,8 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, // Some arithmetic, load and store operations have specific instructions // to cast up/down their types automatically at no extra cost. // TODO: Get these tables to know at least what the related operations are. - static const TypeConversionCostTblEntry NEONVectorConversionTbl[] = { + static const TypeConversionCostTblEntry + NEONVectorConversionTbl[] = { { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 }, { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 }, { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 }, @@ -283,15 +283,15 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, }; if (SrcTy.isVector() && ST->hasNEON()) { - int Idx = ConvertCostTableLookup(NEONVectorConversionTbl, - array_lengthof(NEONVectorConversionTbl), - ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); + int Idx = ConvertCostTableLookup(NEONVectorConversionTbl, ISD, + DstTy.getSimpleVT(), SrcTy.getSimpleVT()); if (Idx != -1) return NEONVectorConversionTbl[Idx].Cost; } // Scalar float to integer conversions. - static const TypeConversionCostTblEntry NEONFloatConversionTbl[] = { + static const TypeConversionCostTblEntry + NEONFloatConversionTbl[] = { { ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 }, { ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 }, { ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 }, @@ -314,16 +314,15 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 } }; if (SrcTy.isFloatingPoint() && ST->hasNEON()) { - int Idx = ConvertCostTableLookup(NEONFloatConversionTbl, - array_lengthof(NEONFloatConversionTbl), - ISD, DstTy.getSimpleVT(), - SrcTy.getSimpleVT()); + int Idx = ConvertCostTableLookup(NEONFloatConversionTbl, ISD, + DstTy.getSimpleVT(), SrcTy.getSimpleVT()); if (Idx != -1) return NEONFloatConversionTbl[Idx].Cost; } // Scalar integer to float conversions. - static const TypeConversionCostTblEntry NEONIntegerConversionTbl[] = { + static const TypeConversionCostTblEntry + NEONIntegerConversionTbl[] = { { ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 }, { ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 }, { ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 }, @@ -347,16 +346,15 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, }; if (SrcTy.isInteger() && ST->hasNEON()) { - int Idx = ConvertCostTableLookup(NEONIntegerConversionTbl, - array_lengthof(NEONIntegerConversionTbl), - ISD, DstTy.getSimpleVT(), - SrcTy.getSimpleVT()); + int Idx = ConvertCostTableLookup(NEONIntegerConversionTbl, ISD, + DstTy.getSimpleVT(), SrcTy.getSimpleVT()); if (Idx != -1) return NEONIntegerConversionTbl[Idx].Cost; } // Scalar integer conversion costs. - static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = { + static const TypeConversionCostTblEntry + ARMIntegerConversionTbl[] = { // i16 -> i64 requires two dependent operations. { ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 }, @@ -368,11 +366,8 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, }; if (SrcTy.isInteger()) { - int Idx = - ConvertCostTableLookup(ARMIntegerConversionTbl, - array_lengthof(ARMIntegerConversionTbl), - ISD, DstTy.getSimpleVT(), - SrcTy.getSimpleVT()); + int Idx = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD, + DstTy.getSimpleVT(), SrcTy.getSimpleVT()); if (Idx != -1) return ARMIntegerConversionTbl[Idx].Cost; } @@ -400,7 +395,8 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, // On NEON a a vector select gets lowered to vbsl. if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) { // Lowering of some vector selects is currently far from perfect. - static const TypeConversionCostTblEntry NEONVectorSelectTbl[] = { + static const TypeConversionCostTblEntry + NEONVectorSelectTbl[] = { { ISD::SELECT, MVT::v16i1, MVT::v16i16, 2*16 + 1 + 3*1 + 4*1 }, { ISD::SELECT, MVT::v8i1, MVT::v8i32, 4*8 + 1*3 + 1*4 + 1*2 }, { ISD::SELECT, MVT::v16i1, MVT::v16i32, 4*16 + 1*6 + 1*8 + 1*4 }, @@ -412,10 +408,9 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, EVT SelCondTy = TLI->getValueType(CondTy); EVT SelValTy = TLI->getValueType(ValTy); if (SelCondTy.isSimple() && SelValTy.isSimple()) { - int Idx = ConvertCostTableLookup(NEONVectorSelectTbl, - array_lengthof(NEONVectorSelectTbl), - ISD, SelCondTy.getSimpleVT(), - SelValTy.getSimpleVT()); + int Idx = ConvertCostTableLookup(NEONVectorSelectTbl, ISD, + SelCondTy.getSimpleVT(), + SelValTy.getSimpleVT()); if (Idx != -1) return NEONVectorSelectTbl[Idx].Cost; } @@ -448,7 +443,7 @@ unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, if (Kind != SK_Reverse) return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); - static const CostTblEntry NEONShuffleTbl[] = { + static const CostTblEntry NEONShuffleTbl[] = { // Reverse shuffle cost one instruction if we are shuffling within a double // word (vrev) or two if we shuffle a quad word (vrev, vext). { ISD::VECTOR_SHUFFLE, MVT::v2i32, 1 }, @@ -464,8 +459,7 @@ unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, std::pair LT = TLI->getTypeLegalizationCost(Tp); - int Idx = CostTableLookup(NEONShuffleTbl, array_lengthof(NEONShuffleTbl), - ISD::VECTOR_SHUFFLE, LT.second); + int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); if (Idx == -1) return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); @@ -480,7 +474,7 @@ unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueK const unsigned FunctionCallDivCost = 20; const unsigned ReciprocalDivCost = 10; - static const CostTblEntry CostTbl[] = { + static const CostTblEntry CostTbl[] = { // Division. // These costs are somewhat random. Choose a cost of 20 to indicate that // vectorizing devision (added function call) is going to be very expensive. @@ -524,8 +518,7 @@ unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueK int Idx = -1; if (ST->hasNEON()) - Idx = CostTableLookup(CostTbl, array_lengthof(CostTbl), ISDOpcode, - LT.second); + Idx = CostTableLookup(CostTbl, ISDOpcode, LT.second); if (Idx != -1) return LT.first * CostTbl[Idx].Cost; diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 3bbddad..5f81d33 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -174,7 +174,7 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - static const CostTblEntry AVX2CostTable[] = { + static const CostTblEntry AVX2CostTable[] = { // Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to // customize them to detect the cases where shift amount is a scalar one. { ISD::SHL, MVT::v4i32, 1 }, @@ -211,13 +211,13 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, // Look for AVX2 lowering tricks. if (ST->hasAVX2()) { - int Idx = CostTableLookup(AVX2CostTable, array_lengthof(AVX2CostTable), - ISD, LT.second); + int Idx = CostTableLookup(AVX2CostTable, ISD, LT.second); if (Idx != -1) return LT.first * AVX2CostTable[Idx].Cost; } - static const CostTblEntry SSE2UniformConstCostTable[] = { + static const CostTblEntry + SSE2UniformConstCostTable[] = { // We don't correctly identify costs of casts because they are marked as // custom. // Constant splats are cheaper for the following instructions. @@ -238,15 +238,13 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, if (Op2Info == TargetTransformInfo::OK_UniformConstantValue && ST->hasSSE2()) { - int Idx = CostTableLookup(SSE2UniformConstCostTable, - array_lengthof(SSE2UniformConstCostTable), - ISD, LT.second); + int Idx = CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second); if (Idx != -1) return LT.first * SSE2UniformConstCostTable[Idx].Cost; } - static const CostTblEntry SSE2CostTable[] = { + static const CostTblEntry SSE2CostTable[] = { // We don't correctly identify costs of casts because they are marked as // custom. // For some cases, where the shift amount is a scalar we would be able @@ -287,13 +285,12 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, }; if (ST->hasSSE2()) { - int Idx = CostTableLookup(SSE2CostTable, array_lengthof(SSE2CostTable), - ISD, LT.second); + int Idx = CostTableLookup(SSE2CostTable, ISD, LT.second); if (Idx != -1) return LT.first * SSE2CostTable[Idx].Cost; } - static const CostTblEntry AVX1CostTable[] = { + static const CostTblEntry AVX1CostTable[] = { // We don't have to scalarize unsupported ops. We can issue two half-sized // operations and we only need to extract the upper YMM half. // Two ops + 1 extract + 1 insert = 4. @@ -312,21 +309,19 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, // Look for AVX1 lowering tricks. if (ST->hasAVX() && !ST->hasAVX2()) { - int Idx = CostTableLookup(AVX1CostTable, array_lengthof(AVX1CostTable), - ISD, LT.second); + int Idx = CostTableLookup(AVX1CostTable, ISD, LT.second); if (Idx != -1) return LT.first * AVX1CostTable[Idx].Cost; } // Custom lowering of vectors. - static const CostTblEntry CustomLowered[] = { + static const CostTblEntry CustomLowered[] = { // A v2i64/v4i64 and multiply is custom lowered as a series of long // multiplies(3), shifts(4) and adds(2). { ISD::MUL, MVT::v2i64, 9 }, { ISD::MUL, MVT::v4i64, 9 }, }; - int Idx = CostTableLookup(CustomLowered, array_lengthof(CustomLowered), - ISD, LT.second); + int Idx = CostTableLookup(CustomLowered, ISD, LT.second); if (Idx != -1) return LT.first * CustomLowered[Idx].Cost; @@ -363,7 +358,8 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { std::pair LTSrc = TLI->getTypeLegalizationCost(Src); std::pair LTDest = TLI->getTypeLegalizationCost(Dst); - static const TypeConversionCostTblEntry SSE2ConvTbl[] = { + static const TypeConversionCostTblEntry + SSE2ConvTbl[] = { // These are somewhat magic numbers justified by looking at the output of // Intel's IACA, running some kernels and making sure when we take // legalization into account the throughput will be overestimated. @@ -387,9 +383,8 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { }; if (ST->hasSSE2() && !ST->hasAVX()) { - int Idx = ConvertCostTableLookup(SSE2ConvTbl, - array_lengthof(SSE2ConvTbl), - ISD, LTDest.second, LTSrc.second); + int Idx = + ConvertCostTableLookup(SSE2ConvTbl, ISD, LTDest.second, LTSrc.second); if (Idx != -1) return LTSrc.first * SSE2ConvTbl[Idx].Cost; } @@ -401,7 +396,8 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { if (!SrcTy.isSimple() || !DstTy.isSimple()) return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); - static const TypeConversionCostTblEntry AVXConversionTbl[] = { + static const TypeConversionCostTblEntry + AVXConversionTbl[] = { { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, @@ -446,9 +442,8 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { }; if (ST->hasAVX()) { - int Idx = ConvertCostTableLookup(AVXConversionTbl, - array_lengthof(AVXConversionTbl), - ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); + int Idx = ConvertCostTableLookup(AVXConversionTbl, ISD, DstTy.getSimpleVT(), + SrcTy.getSimpleVT()); if (Idx != -1) return AVXConversionTbl[Idx].Cost; } @@ -466,7 +461,7 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - static const CostTblEntry SSE42CostTbl[] = { + static const CostTblEntry SSE42CostTbl[] = { { ISD::SETCC, MVT::v2f64, 1 }, { ISD::SETCC, MVT::v4f32, 1 }, { ISD::SETCC, MVT::v2i64, 1 }, @@ -475,7 +470,7 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, { ISD::SETCC, MVT::v16i8, 1 }, }; - static const CostTblEntry AVX1CostTbl[] = { + static const CostTblEntry AVX1CostTbl[] = { { ISD::SETCC, MVT::v4f64, 1 }, { ISD::SETCC, MVT::v8f32, 1 }, // AVX1 does not support 8-wide integer compare. @@ -485,7 +480,7 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, { ISD::SETCC, MVT::v32i8, 4 }, }; - static const CostTblEntry AVX2CostTbl[] = { + static const CostTblEntry AVX2CostTbl[] = { { ISD::SETCC, MVT::v4i64, 1 }, { ISD::SETCC, MVT::v8i32, 1 }, { ISD::SETCC, MVT::v16i16, 1 }, @@ -493,22 +488,19 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, }; if (ST->hasAVX2()) { - int Idx = CostTableLookup(AVX2CostTbl, array_lengthof(AVX2CostTbl), - ISD, MTy); + int Idx = CostTableLookup(AVX2CostTbl, ISD, MTy); if (Idx != -1) return LT.first * AVX2CostTbl[Idx].Cost; } if (ST->hasAVX()) { - int Idx = CostTableLookup(AVX1CostTbl, array_lengthof(AVX1CostTbl), - ISD, MTy); + int Idx = CostTableLookup(AVX1CostTbl, ISD, MTy); if (Idx != -1) return LT.first * AVX1CostTbl[Idx].Cost; } if (ST->hasSSE42()) { - int Idx = CostTableLookup(SSE42CostTbl, array_lengthof(SSE42CostTbl), - ISD, MTy); + int Idx = CostTableLookup(SSE42CostTbl, ISD, MTy); if (Idx != -1) return LT.first * SSE42CostTbl[Idx].Cost; } -- cgit v1.1 From 1e07de352947aaf2f9137113cc594d8204da2b77 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Fri, 9 Aug 2013 21:33:41 +0000 Subject: Add another intrinsic that LLVM gives an incorrect prototype to. I need to go through all the runtime routine list and see if there are any more I need to add for mips16 floating point. Prototypes must be correct or else I don't know to add a helper function call. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188106 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16ISelLowering.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/Target') diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp index 6ed1d9e..c8d0a52 100644 --- a/lib/Target/Mips/Mips16ISelLowering.cpp +++ b/lib/Target/Mips/Mips16ISelLowering.cpp @@ -90,6 +90,7 @@ static const Mips16Libcall HardFloatLibCalls[] = { }; static const Mips16IntrinsicHelperType Mips16IntrinsicHelper[] = { + {"__fixunsdfsi", "__mips16_call_stub_2" }, {"ceil", "__mips16_call_stub_df_2"}, {"ceilf", "__mips16_call_stub_sf_1"}, {"copysign", "__mips16_call_stub_df_10"}, -- cgit v1.1 From 4ab92e4d4bdf103f487b7383ca666ccc021c147a Mon Sep 17 00:00:00 2001 From: Niels Ole Salscheider Date: Sat, 10 Aug 2013 10:38:47 +0000 Subject: R600/SI: Add FMA pattern git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188135 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstructions.td | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index dc41885..dc14609 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1007,8 +1007,12 @@ def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", []>; def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", []>; def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", []>; defm : BFIPatterns ; -def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32", []>; -def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64", []>; +def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32", + [(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))] +>; +def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64", + [(set f64:$dst, (fma f64:$src0, f64:$src1, f64:$src2))] +>; //def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>; def V_ALIGNBIT_B32 : VOP3_32 <0x0000014e, "V_ALIGNBIT_B32", []>; def : ROTRPattern ; -- cgit v1.1 From 6e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5 Mon Sep 17 00:00:00 2001 From: Niels Ole Salscheider Date: Sat, 10 Aug 2013 10:38:54 +0000 Subject: R600/SI: FMA is faster than fmul and fadd for f64 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188136 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIISelLowering.cpp | 18 ++++++++++++++++++ lib/Target/R600/SIISelLowering.h | 1 + 2 files changed, 19 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index b714fc1..a76e6ee 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -338,6 +338,24 @@ MVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const { return MVT::i32; } +bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { + VT = VT.getScalarType(); + + if (!VT.isSimple()) + return false; + + switch (VT.getSimpleVT().SimpleTy) { + case MVT::f32: + return false; /* There is V_MAD_F32 for f32 */ + case MVT::f64: + return true; + default: + break; + } + + return false; +} + //===----------------------------------------------------------------------===// // Custom DAG Lowering Operations //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index b4202c4..effbf1f 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -55,6 +55,7 @@ public: MachineBasicBlock * BB) const; virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const; virtual MVT getScalarShiftAmountTy(EVT VT) const; + virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const; virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const; -- cgit v1.1 From d8de58e24cf5874c0d6f903d15333406787ea944 Mon Sep 17 00:00:00 2001 From: Venkatraman Govindaraju Date: Sat, 10 Aug 2013 20:13:20 +0000 Subject: [Sparc] Enable xword directive in sparcv9. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188141 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp index 5a52abe..45cfe03 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp @@ -21,18 +21,21 @@ void SparcELFMCAsmInfo::anchor() { } SparcELFMCAsmInfo::SparcELFMCAsmInfo(StringRef TT) { IsLittleEndian = false; Triple TheTriple(TT); - if (TheTriple.getArch() == Triple::sparcv9) { + bool isV9 = (TheTriple.getArch() == Triple::sparcv9); + + if (isV9) { PointerSize = CalleeSaveStackSlotSize = 8; } Data16bitsDirective = "\t.half\t"; Data32bitsDirective = "\t.word\t"; - Data64bitsDirective = 0; // .xword is only supported by V9. + // .xword is only supported by V9. + Data64bitsDirective = (isV9) ? "\t.xword\t" : 0; ZeroDirective = "\t.skip\t"; CommentString = "!"; HasLEB128 = true; SupportsDebugInformation = true; - + SunStyleELFSectionSwitchSyntax = true; UsesELFSectionDirectiveForBSS = true; -- cgit v1.1 From 76ba4f5275a8bd908275076b50f51f3aa9a5d4e2 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Sat, 10 Aug 2013 22:18:22 +0000 Subject: Incorrect JAL instruction attributes caused the optimizer to make a wrong instruction move. Just affects static relocation. -static works fine now with mips16 for the most part. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188143 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16InstrInfo.td | 3 --- 1 file changed, 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index aef4e92..da4b221 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -684,10 +684,7 @@ def DivuRxRy16: FRR16_div_ins<0b11011, "divu", IIAlu> { // def Jal16 : FJAL16_ins<0b0, "jal", IIAlu> { - let isBranch = 1; let hasDelaySlot = 0; // not true, but we add the nop for now - let isTerminator=1; - let isBarrier=1; let isCall=1; } -- cgit v1.1 From fac4a4eb7dfbfc90ae1d5c7d6c39a2d89a33c30e Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Sun, 11 Aug 2013 07:55:09 +0000 Subject: AVX-512: Added VPERM* instructons and MOV* zmm-to-zmm instructions. Added a test for shuffles using VPERM. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188147 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 152 ++++++++++++++------ lib/Target/X86/X86ISelLowering.h | 1 + lib/Target/X86/X86InstrAVX512.td | 237 ++++++++++++++++++++++++++++++++ lib/Target/X86/X86InstrFragmentsSIMD.td | 40 +++++- lib/Target/X86/X86InstrInfo.cpp | 49 ++++++- 5 files changed, 432 insertions(+), 47 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 00b4976..dfd41b7 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3872,11 +3872,13 @@ SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp, /// specifies a shuffle of elements that is suitable for input to UNPCKL. static bool isUNPCKLMask(ArrayRef Mask, EVT VT, bool HasInt256, bool V2IsSplat = false) { - unsigned NumElts = VT.getVectorNumElements(); + if (VT.is512BitVector()) + return false; assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for unpckh"); + unsigned NumElts = VT.getVectorNumElements(); if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 && (!HasInt256 || (NumElts != 16 && NumElts != 32))) return false; @@ -3911,6 +3913,8 @@ static bool isUNPCKHMask(ArrayRef Mask, EVT VT, bool HasInt256, bool V2IsSplat = false) { unsigned NumElts = VT.getVectorNumElements(); + if (VT.is512BitVector()) + return false; assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for unpckh"); @@ -3948,6 +3952,8 @@ static bool isUNPCKL_v_undef_Mask(ArrayRef Mask, EVT VT, bool HasInt256) { unsigned NumElts = VT.getVectorNumElements(); bool Is256BitVec = VT.is256BitVector(); + if (VT.is512BitVector()) + return false; assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for unpckh"); @@ -3988,6 +3994,9 @@ static bool isUNPCKL_v_undef_Mask(ArrayRef Mask, EVT VT, bool HasInt256) { static bool isUNPCKH_v_undef_Mask(ArrayRef Mask, EVT VT, bool HasInt256) { unsigned NumElts = VT.getVectorNumElements(); + if (VT.is512BitVector()) + return false; + assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for unpckh"); @@ -4093,6 +4102,44 @@ static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) { return (FstHalf | (SndHalf << 4)); } +// Symetric in-lane mask. Each lane has 4 elements (for imm8) +static bool isPermImmMask(ArrayRef Mask, EVT VT, unsigned& Imm8) { + unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + if (EltSize < 32) + return false; + + unsigned NumElts = VT.getVectorNumElements(); + Imm8 = 0; + if (VT.is128BitVector() || (VT.is256BitVector() && EltSize == 64)) { + for (unsigned i = 0; i != NumElts; ++i) { + if (Mask[i] < 0) + continue; + Imm8 |= Mask[i] << (i*2); + } + return true; + } + + unsigned LaneSize = 4; + SmallVector MaskVal(LaneSize, -1); + + for (unsigned l = 0; l != NumElts; l += LaneSize) { + for (unsigned i = 0; i != LaneSize; ++i) { + if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize)) + return false; + if (Mask[i+l] < 0) + continue; + if (MaskVal[i] < 0) { + MaskVal[i] = Mask[i+l] - l; + Imm8 |= MaskVal[i] << (i*2); + continue; + } + if (Mask[i+l] != (signed)(MaskVal[i]+l)) + return false; + } + } + return true; +} + /// isVPERMILPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to VPERMILPD*. /// Note that VPERMIL mask matching is different depending whether theunderlying @@ -4163,7 +4210,8 @@ static bool isMOVSHDUPMask(ArrayRef Mask, EVT VT, unsigned NumElems = VT.getVectorNumElements(); if ((VT.is128BitVector() && NumElems != 4) || - (VT.is256BitVector() && NumElems != 8)) + (VT.is256BitVector() && NumElems != 8) || + (VT.is512BitVector() && NumElems != 16)) return false; // "i+1" is the value the indexed mask element must have @@ -4186,7 +4234,8 @@ static bool isMOVSLDUPMask(ArrayRef Mask, EVT VT, unsigned NumElems = VT.getVectorNumElements(); if ((VT.is128BitVector() && NumElems != 4) || - (VT.is256BitVector() && NumElems != 8)) + (VT.is256BitVector() && NumElems != 8) || + (VT.is512BitVector() && NumElems != 16)) return false; // "i" is the value the indexed mask element must have @@ -4449,27 +4498,6 @@ unsigned X86::getInsertVINSERT256Immediate(SDNode *N) { return getInsertVINSERTImmediate(N, 256); } -/// getShuffleCLImmediate - Return the appropriate immediate to shuffle -/// the specified VECTOR_SHUFFLE mask with VPERMQ and VPERMPD instructions. -/// Handles 256-bit. -static unsigned getShuffleCLImmediate(ShuffleVectorSDNode *N) { - MVT VT = N->getValueType(0).getSimpleVT(); - - unsigned NumElts = VT.getVectorNumElements(); - - assert((VT.is256BitVector() && NumElts == 4) && - "Unsupported vector type for VPERMQ/VPERMPD"); - - unsigned Mask = 0; - for (unsigned i = 0; i != NumElts; ++i) { - int Elt = N->getMaskElt(i); - if (Elt < 0) - continue; - Mask |= Elt << (i*2); - } - - return Mask; -} /// isZeroNode - Returns true if Elt is a constant zero or a floating point /// constant +0.0. bool X86::isZeroNode(SDValue Elt) { @@ -5288,7 +5316,10 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, SDLoc dl, LD->getPointerInfo().getWithOffset(StartOffset), false, false, false, 0); - SmallVector Mask(NumElems, EltNo); + SmallVector Mask; + for (unsigned i = 0; i != NumElems; ++i) + Mask.push_back(EltNo); + return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &Mask[0]); } @@ -5720,7 +5751,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (ISD::isBuildVectorAllZeros(Op.getNode())) { // Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd // and 2) ensure that i64 scalars are eliminated on x86-32 hosts. - if (VT == MVT::v4i32 || VT == MVT::v8i32) + if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) return Op; return getZeroVector(VT, Subtarget, DAG, dl); @@ -7413,21 +7444,30 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { if (BlendOp.getNode()) return BlendOp; - if (V2IsUndef && HasInt256 && (VT == MVT::v8i32 || VT == MVT::v8f32)) { - SmallVector permclMask; - for (unsigned i = 0; i != 8; ++i) { - permclMask.push_back(DAG.getConstant((M[i]>=0) ? M[i] : 0, MVT::i32)); + unsigned Imm8; + if (V2IsUndef && HasInt256 && isPermImmMask(M, VT, Imm8)) + return getTargetShuffleNode(X86ISD::VPERMI, dl, VT, V1, Imm8, DAG); + + if ((V2IsUndef && HasInt256 && VT.is256BitVector() && NumElems == 8) || + VT.is512BitVector()) { + EVT MaskEltVT = EVT::getIntegerVT(*DAG.getContext(), + VT.getVectorElementType().getSizeInBits()); + EVT MaskVectorVT = + EVT::getVectorVT(*DAG.getContext(),MaskEltVT, NumElems); + SmallVector permclMask; + for (unsigned i = 0; i != NumElems; ++i) { + permclMask.push_back(DAG.getConstant((M[i]>=0) ? M[i] : 0, MaskEltVT)); } - SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, - &permclMask[0], 8); - // Bitcast is for VPERMPS since mask is v8i32 but node takes v8f32 - return DAG.getNode(X86ISD::VPERMV, dl, VT, - DAG.getNode(ISD::BITCAST, dl, VT, Mask), V1); - } - if (V2IsUndef && HasInt256 && (VT == MVT::v4i64 || VT == MVT::v4f64)) - return getTargetShuffleNode(X86ISD::VPERMI, dl, VT, V1, - getShuffleCLImmediate(SVOp), DAG); + SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVectorVT, + &permclMask[0], NumElems); + if (V2IsUndef) + // Bitcast is for VPERMPS since mask is v8i32 but node takes v8f32 + return DAG.getNode(X86ISD::VPERMV, dl, VT, + DAG.getNode(ISD::BITCAST, dl, VT, Mask), V1); + return DAG.getNode(X86ISD::VPERMV3, dl, VT, + DAG.getNode(ISD::BITCAST, dl, VT, Mask), V1, V2); + } //===--------------------------------------------------------------------===// // Since no target specific shuffle was selected for this generic one, @@ -10149,6 +10189,36 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops)); } +SDValue X86TargetLowering::LowerSIGN_EXTEND_AVX512(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op->getValueType(0); + SDValue In = Op->getOperand(0); + EVT InVT = In.getValueType(); + SDLoc dl(Op); + + if (InVT.getVectorElementType().getSizeInBits() >=8 && + VT.getVectorElementType().getSizeInBits() >= 32) + return DAG.getNode(X86ISD::VSEXT, dl, VT, In); + + if (InVT.getVectorElementType() == MVT::i1) { + unsigned int NumElts = InVT.getVectorNumElements(); + assert ((NumElts == 8 || NumElts == 16) && + "Unsupported SIGN_EXTEND operation"); + if (VT.getVectorElementType().getSizeInBits() >= 32) { + Constant *C = + ConstantInt::get(*DAG.getContext(), + (NumElts == 8)? APInt(64, ~0ULL): APInt(32, ~0U)); + SDValue CP = DAG.getConstantPool(C, getPointerTy()); + unsigned Alignment = cast(CP)->getAlignment(); + SDValue Ld = DAG.getLoad(VT.getScalarType(), dl, DAG.getEntryNode(), CP, + MachinePointerInfo::getConstantPool(), + false, false, false, Alignment); + return DAG.getNode(X86ISD::VBROADCASTM, dl, VT, In, Ld); + } + } + return SDValue(); +} + SDValue X86TargetLowering::LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const { MVT VT = Op->getValueType(0).getSimpleVT(); @@ -10156,6 +10226,9 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND(SDValue Op, MVT InVT = In.getValueType().getSimpleVT(); SDLoc dl(Op); + if (VT.is512BitVector() || InVT.getVectorElementType() == MVT::i1) + return LowerSIGN_EXTEND_AVX512(Op, DAG); + if ((VT != MVT::v4i64 || InVT != MVT::v4i32) && (VT != MVT::v8i32 || InVT != MVT::v8i16)) return SDValue(); @@ -13239,6 +13312,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VPERMILP: return "X86ISD::VPERMILP"; case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128"; case X86ISD::VPERMV: return "X86ISD::VPERMV"; + case X86ISD::VPERMV3: return "X86ISD::VPERMV3"; case X86ISD::VPERMI: return "X86ISD::VPERMI"; case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ"; case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index e3db491..c931b9b 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -318,6 +318,7 @@ namespace llvm { UNPCKH, VPERMILP, VPERMV, + VPERMV3, VPERMI, VPERM2X128, VBROADCAST, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 8abae14..7fed783 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -473,6 +473,98 @@ defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", VR512, defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", VR512, VK8, v8i64, v8i1>, EVEX_V512, VEX_W; +//===----------------------------------------------------------------------===// +// AVX-512 - VPERM +// +// -- immediate form -- +multiclass avx512_perm_imm opc, string OpcodeStr, RegisterClass RC, + SDNode OpNode, PatFrag mem_frag, + X86MemOperand x86memop, ValueType OpVT> { + def ri : AVX512AIi8, + EVEX; + def mi : AVX512AIi8, EVEX; +} + +defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", VR512, X86VPermi, memopv8i64, + i512mem, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +let ExeDomain = SSEPackedDouble in +defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", VR512, X86VPermi, memopv8f64, + f512mem, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + +// -- VPERM - register form -- +multiclass avx512_perm opc, string OpcodeStr, RegisterClass RC, + PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> { + + def rr : AVX5128I, EVEX_4V; + + def rm : AVX5128I, EVEX_4V; +} + +defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, memopv8i64, i512mem, + v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem, + v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +let ExeDomain = SSEPackedSingle in +defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, memopv8f64, f512mem, + v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; +let ExeDomain = SSEPackedDouble in +defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem, + v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + +// -- VPERM2I - 3 source operands form -- +multiclass avx512_perm_3src opc, string OpcodeStr, RegisterClass RC, + PatFrag mem_frag, X86MemOperand x86memop, + ValueType OpVT> { +let Constraints = "$src1 = $dst" in { + def rr : AVX5128I, + EVEX_4V; + + def rm : AVX5128I, EVEX_4V; + } +} +defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32, i512mem, + v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64, i512mem, + v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32, i512mem, + v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64, i512mem, + v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + + // Mask register copy, including // - copy between mask registers // - load/store mask registers @@ -713,3 +805,148 @@ def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))), def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))), (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>; + +//===----------------------------------------------------------------------===// +// AVX-512 - Aligned and unaligned load and store +// + +multiclass avx512_mov_packed opc, RegisterClass RC, RegisterClass KRC, + X86MemOperand x86memop, PatFrag ld_frag, + string asm, Domain d> { +let neverHasSideEffects = 1 in + def rr : AVX512PI, + EVEX; +let canFoldAsLoad = 1 in + def rm : AVX512PI, EVEX; +let Constraints = "$src1 = $dst" in { + def rrk : AVX512PI, + EVEX, EVEX_K; + def rmk : AVX512PI, EVEX, EVEX_K; +} +} + +defm VMOVAPSZ : avx512_mov_packed<0x28, VR512, VK16WM, f512mem, alignedloadv16f32, + "vmovaps", SSEPackedSingle>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VMOVAPDZ : avx512_mov_packed<0x28, VR512, VK8WM, f512mem, alignedloadv8f64, + "vmovapd", SSEPackedDouble>, + OpSize, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; +defm VMOVUPSZ : avx512_mov_packed<0x10, VR512, VK16WM, f512mem, loadv16f32, + "vmovups", SSEPackedSingle>, + TB, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VMOVUPDZ : avx512_mov_packed<0x10, VR512, VK8WM, f512mem, loadv8f64, + "vmovupd", SSEPackedDouble>, + OpSize, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; +def VMOVAPSZmr : AVX512PI<0x29, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src), + "vmovaps\t{$src, $dst|$dst, $src}", + [(alignedstore512 (v16f32 VR512:$src), addr:$dst)], + SSEPackedSingle>, EVEX, EVEX_V512, TB, + EVEX_CD8<32, CD8VF>; +def VMOVAPDZmr : AVX512PI<0x29, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src), + "vmovapd\t{$src, $dst|$dst, $src}", + [(alignedstore512 (v8f64 VR512:$src), addr:$dst)], + SSEPackedDouble>, EVEX, EVEX_V512, + OpSize, TB, VEX_W, EVEX_CD8<64, CD8VF>; +def VMOVUPSZmr : AVX512PI<0x11, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src), + "vmovups\t{$src, $dst|$dst, $src}", + [(store (v16f32 VR512:$src), addr:$dst)], + SSEPackedSingle>, EVEX, EVEX_V512, TB, + EVEX_CD8<32, CD8VF>; +def VMOVUPDZmr : AVX512PI<0x11, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src), + "vmovupd\t{$src, $dst|$dst, $src}", + [(store (v8f64 VR512:$src), addr:$dst)], + SSEPackedDouble>, EVEX, EVEX_V512, + OpSize, TB, VEX_W, EVEX_CD8<64, CD8VF>; + +// Use vmovaps/vmovups for AVX-512 integer load/store. +// 512-bit load/store +def : Pat<(alignedloadv8i64 addr:$src), + (VMOVAPSZrm addr:$src)>; +def : Pat<(loadv8i64 addr:$src), + (VMOVUPSZrm addr:$src)>; + +def : Pat<(alignedstore512 (v8i64 VR512:$src), addr:$dst), + (VMOVAPSZmr addr:$dst, VR512:$src)>; +def : Pat<(alignedstore512 (v16i32 VR512:$src), addr:$dst), + (VMOVAPSZmr addr:$dst, VR512:$src)>; + +def : Pat<(store (v8i64 VR512:$src), addr:$dst), + (VMOVUPDZmr addr:$dst, VR512:$src)>; +def : Pat<(store (v16i32 VR512:$src), addr:$dst), + (VMOVUPSZmr addr:$dst, VR512:$src)>; + +let neverHasSideEffects = 1 in { + def VMOVDQA32rr : AVX512BI<0x6F, MRMSrcReg, (outs VR512:$dst), + (ins VR512:$src), + "vmovdqa32\t{$src, $dst|$dst, $src}", []>, + EVEX, EVEX_V512; + def VMOVDQA64rr : AVX512BI<0x6F, MRMSrcReg, (outs VR512:$dst), + (ins VR512:$src), + "vmovdqa64\t{$src, $dst|$dst, $src}", []>, + EVEX, EVEX_V512, VEX_W; +let mayStore = 1 in { + def VMOVDQA32mr : AVX512BI<0x7F, MRMDestMem, (outs), + (ins i512mem:$dst, VR512:$src), + "vmovdqa32\t{$src, $dst|$dst, $src}", []>, + EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; + def VMOVDQA64mr : AVX512BI<0x7F, MRMDestMem, (outs), + (ins i512mem:$dst, VR512:$src), + "vmovdqa64\t{$src, $dst|$dst, $src}", []>, + EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +} +let mayLoad = 1 in { +def VMOVDQA32rm : AVX512BI<0x6F, MRMSrcMem, (outs VR512:$dst), + (ins i512mem:$src), + "vmovdqa32\t{$src, $dst|$dst, $src}", []>, + EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; +def VMOVDQA64rm : AVX512BI<0x6F, MRMSrcMem, (outs VR512:$dst), + (ins i512mem:$src), + "vmovdqa64\t{$src, $dst|$dst, $src}", []>, + EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +} +} + +multiclass avx512_mov_int opc, string asm, RegisterClass RC, + RegisterClass KRC, + PatFrag ld_frag, X86MemOperand x86memop> { +let neverHasSideEffects = 1 in + def rr : AVX512XSI, + EVEX; +let canFoldAsLoad = 1 in + def rm : AVX512XSI, + EVEX; +let Constraints = "$src1 = $dst" in { + def rrk : AVX512XSI, + EVEX, EVEX_K; + def rmk : AVX512XSI, EVEX, EVEX_K; +} +} + +defm VMOVDQU32 : avx512_mov_int<0x6F, "vmovdqu32", VR512, VK16WM, memopv16i32, i512mem>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VMOVDQU64 : avx512_mov_int<0x6F, "vmovdqu64", VR512, VK8WM, memopv8i64, i512mem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + + diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 0b51521..8587d38 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -151,6 +151,8 @@ def X86pmuludq : SDNode<"X86ISD::PMULUDQ", def SDTShuff1Op : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>; +def SDTShuff3Op : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>; def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisInt<2>]>; @@ -194,6 +196,7 @@ def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>; def X86VPermilp : SDNode<"X86ISD::VPERMILP", SDTShuff2OpI>; def X86VPermv : SDNode<"X86ISD::VPERMV", SDTShuff2Op>; def X86VPermi : SDNode<"X86ISD::VPERMI", SDTShuff2OpI>; +def X86VPermv3 : SDNode<"X86ISD::VPERMV3", SDTShuff3Op>; def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>; @@ -262,9 +265,16 @@ def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>; def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>; def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>; -// 128-/256-bit extload pattern fragments +// 512-bit load pattern fragments +def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (load node:$ptr))>; +def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (load node:$ptr))>; +def loadv16i32 : PatFrag<(ops node:$ptr), (v16i32 (load node:$ptr))>; +def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (load node:$ptr))>; + +// 128-/256-/512-bit extload pattern fragments def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>; def extloadv4f32 : PatFrag<(ops node:$ptr), (v4f64 (extloadvf32 node:$ptr))>; +def extloadv8f32 : PatFrag<(ops node:$ptr), (v8f64 (extloadvf32 node:$ptr))>; // Like 'store', but always requires 128-bit vector alignment. def alignedstore : PatFrag<(ops node:$val, node:$ptr), @@ -278,6 +288,12 @@ def alignedstore256 : PatFrag<(ops node:$val, node:$ptr), return cast(N)->getAlignment() >= 32; }]>; +// Like 'store', but always requires 512-bit vector alignment. +def alignedstore512 : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->getAlignment() >= 64; +}]>; + // Like 'load', but always requires 128-bit vector alignment. def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return cast(N)->getAlignment() >= 16; @@ -293,6 +309,11 @@ def alignedload256 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return cast(N)->getAlignment() >= 32; }]>; +// Like 'load', but always requires 512-bit vector alignment. +def alignedload512 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() >= 64; +}]>; + def alignedloadfsf32 : PatFrag<(ops node:$ptr), (f32 (alignedload node:$ptr))>; def alignedloadfsf64 : PatFrag<(ops node:$ptr), @@ -316,6 +337,16 @@ def alignedloadv4f64 : PatFrag<(ops node:$ptr), def alignedloadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (alignedload256 node:$ptr))>; +// 512-bit aligned load pattern fragments +def alignedloadv16f32 : PatFrag<(ops node:$ptr), + (v16f32 (alignedload512 node:$ptr))>; +def alignedloadv8f64 : PatFrag<(ops node:$ptr), + (v8f64 (alignedload512 node:$ptr))>; +def alignedloadv16i32 : PatFrag<(ops node:$ptr), + (v16i32 (alignedload512 node:$ptr))>; +def alignedloadv8i64 : PatFrag<(ops node:$ptr), + (v8i64 (alignedload512 node:$ptr))>; + // Like 'load', but uses special alignment checks suitable for use in // memory operands in most SSE instructions, which are required to // be naturally aligned on some targets but not on others. If the subtarget @@ -339,9 +370,16 @@ def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>; // 256-bit memop pattern fragments // NOTE: all 256-bit integer vector loads are promoted to v4i64 def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>; +def memopv8i32 : PatFrag<(ops node:$ptr), (v8i32 (memop node:$ptr))>; def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>; def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>; +// 512-bit memop pattern fragments +def memopv16f32 : PatFrag<(ops node:$ptr), (v16f32 (memop node:$ptr))>; +def memopv8f64 : PatFrag<(ops node:$ptr), (v8f64 (memop node:$ptr))>; +def memopv16i32 : PatFrag<(ops node:$ptr), (v16i32 (memop node:$ptr))>; +def memopv8i64 : PatFrag<(ops node:$ptr), (v8i64 (memop node:$ptr))>; + // SSSE3 uses MMX registers for some instructions. They aren't aligned on a // 16-byte boundary. // FIXME: 8 byte alignment for mmx reads is not required diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 0443a93..b773768 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -81,6 +81,7 @@ enum { TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT, TB_ALIGN_16 = 16 << TB_ALIGN_SHIFT, TB_ALIGN_32 = 32 << TB_ALIGN_SHIFT, + TB_ALIGN_64 = 64 << TB_ALIGN_SHIFT, TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT }; @@ -1177,6 +1178,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::PDEP64rr, X86::PDEP64rm, 0 }, { X86::PEXT32rr, X86::PEXT32rm, 0 }, { X86::PEXT64rr, X86::PEXT64rm, 0 }, + + // AVX-512 foldable instructions + { X86::VPERMPDZri, X86::VPERMPDZmi, 0 }, + { X86::VPERMPSZrr, X86::VPERMPSZrm, 0 }, + { X86::VPERMI2Drr, X86::VPERMI2Drm, 0 }, + { X86::VPERMI2Qrr, X86::VPERMI2Qrm, 0 }, + { X86::VPERMI2PSrr, X86::VPERMI2PSrm, 0 }, + { X86::VPERMI2PDrr, X86::VPERMI2PDrm, 0 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { @@ -1454,6 +1463,8 @@ static bool isFrameLoadOpcode(int Opcode) { case X86::VMOVDQAYrm: case X86::MMX_MOVD64rm: case X86::MMX_MOVQ64rm: + case X86::VMOVDQA32rm: + case X86::VMOVDQA64rm: return true; } } @@ -2890,12 +2901,15 @@ static bool isHReg(unsigned Reg) { // Try and copy between VR128/VR64 and GR64 registers. static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, - bool HasAVX) { + const X86Subtarget& Subtarget) { + + // SrcReg(VR128) -> DestReg(GR64) // SrcReg(VR64) -> DestReg(GR64) // SrcReg(GR64) -> DestReg(VR128) // SrcReg(GR64) -> DestReg(VR64) + bool HasAVX = Subtarget.hasAVX(); if (X86::GR64RegClass.contains(DestReg)) { if (X86::VR128RegClass.contains(SrcReg)) // Copy from a VR128 register to a GR64 register. @@ -2926,13 +2940,31 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, return 0; } +static +unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg) { + if (X86::VR128XRegClass.contains(DestReg, SrcReg) || + X86::VR256XRegClass.contains(DestReg, SrcReg) || + X86::VR512RegClass.contains(DestReg, SrcReg)) { + DestReg = get512BitSuperRegister(DestReg); + SrcReg = get512BitSuperRegister(SrcReg); + return X86::VMOVAPSZrr; + } + if ((X86::VK8RegClass.contains(DestReg) || + X86::VK16RegClass.contains(DestReg)) && + (X86::VK8RegClass.contains(SrcReg) || + X86::VK16RegClass.contains(SrcReg))) + return X86::KMOVWkk; + return 0; +} + void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { // First deal with the normal symmetric copies. bool HasAVX = TM.getSubtarget().hasAVX(); - unsigned Opc; + bool HasAVX512 = TM.getSubtarget().hasAVX512(); + unsigned Opc = 0; if (X86::GR64RegClass.contains(DestReg, SrcReg)) Opc = X86::MOV64rr; else if (X86::GR32RegClass.contains(DestReg, SrcReg)) @@ -2950,14 +2982,17 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, "8-bit H register can not be copied outside GR8_NOREX"); } else Opc = X86::MOV8rr; - } else if (X86::VR128RegClass.contains(DestReg, SrcReg)) + } + else if (X86::VR64RegClass.contains(DestReg, SrcReg)) + Opc = X86::MMX_MOVQ64rr; + else if (HasAVX512) + Opc = copyPhysRegOpcode_AVX512(DestReg, SrcReg); + else if (X86::VR128RegClass.contains(DestReg, SrcReg)) Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr; else if (X86::VR256RegClass.contains(DestReg, SrcReg)) Opc = X86::VMOVAPSYrr; - else if (X86::VR64RegClass.contains(DestReg, SrcReg)) - Opc = X86::MMX_MOVQ64rr; - else - Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, HasAVX); + if (!Opc) + Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, TM.getSubtarget()); if (Opc) { BuildMI(MBB, MI, DL, get(Opc), DestReg) -- cgit v1.1 From 55db69c97e936abf8290a4dc639239f92ab34fcb Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Sun, 11 Aug 2013 12:29:16 +0000 Subject: AVX-512: Added more tests for BROADCAST git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188148 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index dfd41b7..55b1f01 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5481,7 +5481,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const { return SDValue(); // Use the register form of the broadcast instruction available on AVX2. - if (VT.is256BitVector()) + if (VT.getSizeInBits() >= 256) Sc = Extract128BitVector(Sc, 0, DAG, dl); return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Sc); } -- cgit v1.1 From 5e4b95b3fe908f89aa512b6e9921fe49aadd759b Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Sun, 11 Aug 2013 21:30:27 +0000 Subject: Don't generate floating point stubs for mips16 code if the function is actually an instrinsic that will not occur in libc. This list here is not exhaustive but fixes the one places in test-suite where this occurs. I have filed a bug against myself to research the full list and add them to the array of such cases. In the future, actual stub generation will occur in a later phase and we won't need this code because we will know at that time during the compilation that in fact no helper function was even needed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188149 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16HardFloat.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp index 7e456aa..7374636 100644 --- a/lib/Target/Mips/Mips16HardFloat.cpp +++ b/lib/Target/Mips/Mips16HardFloat.cpp @@ -16,6 +16,7 @@ #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include #include static void inlineAsmOut @@ -321,6 +322,17 @@ static void assureFPCallStub(Function &F, Module *M, } // +// Functions that are inline intrinsics don't need helpers. +// +std::string IntrinsicInline[] = + {"fabs"}; + +bool isIntrinsicInline(Function *F) { + return std::binary_search( + IntrinsicInline, array_endof(IntrinsicInline), + F->getName()); +} +// // Returns of float, double and complex need to be handled with a helper // function. // @@ -372,7 +384,7 @@ static bool fixupFPReturnAndCall // helper functions if (Subtarget.getRelocationModel() != Reloc::PIC_ ) { Function *F_ = CI->getCalledFunction(); - if (F_ && needsFPHelperFromSig(*F_)) { + if (F_ && !isIntrinsicInline(F_) && needsFPHelperFromSig(*F_)) { assureFPCallStub(*F_, M, Subtarget); Modified=true; } -- cgit v1.1 From 674c91bb6e704fecd905a226daef079717e7a8c1 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 12 Aug 2013 09:37:29 +0000 Subject: Remove global construction. const char* is sufficient here. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188158 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16HardFloat.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp index 7374636..46b04c3 100644 --- a/lib/Target/Mips/Mips16HardFloat.cpp +++ b/lib/Target/Mips/Mips16HardFloat.cpp @@ -324,10 +324,10 @@ static void assureFPCallStub(Function &F, Module *M, // // Functions that are inline intrinsics don't need helpers. // -std::string IntrinsicInline[] = +static const char *IntrinsicInline[] = {"fabs"}; -bool isIntrinsicInline(Function *F) { +static bool isIntrinsicInline(Function *F) { return std::binary_search( IntrinsicInline, array_endof(IntrinsicInline), F->getName()); -- cgit v1.1 From f37c8feb468a0e1876c08bdeb449bdb5999c0534 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Mon, 12 Aug 2013 10:05:58 +0000 Subject: [SystemZ] Add a definition of the IPM instruction git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188161 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZInstrFP.td | 6 +++--- lib/Target/SystemZ/SystemZInstrFormats.td | 2 +- lib/Target/SystemZ/SystemZInstrInfo.td | 4 ++++ 3 files changed, 8 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td index b903b51..9f5279e 100644 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -27,9 +27,9 @@ defm CondStoreF64 : CondStores; - def LZDR : InherentRRE<"lzd", 0xB375, FP64, (fpimm0)>; - def LZXR : InherentRRE<"lzx", 0xB376, FP128, (fpimm0)>; + def LZER : InherentRRE<"lzer", 0xB374, FP32, (fpimm0)>; + def LZDR : InherentRRE<"lzdr", 0xB375, FP64, (fpimm0)>; + def LZXR : InherentRRE<"lzxr", 0xB376, FP128, (fpimm0)>; } // Moves between two floating-point registers. diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index 954df11..2af8e83 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -552,7 +552,7 @@ class InstSS op, dag outs, dag ins, string asmstr, list pattern> class InherentRRE opcode, RegisterOperand cls, dag src> : InstRRE { let R2 = 0; } diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index b318d67..5371e8a 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -1119,6 +1119,10 @@ let Defs = [CC] in { // Miscellaneous Instructions. //===----------------------------------------------------------------------===// +// Extract CC into bits 29 and 28 of a register. +let Uses = [CC] in + def IPM : InherentRRE<"ipm", 0xB222, GR32, (null_frag)>; + // Read a 32-bit access register into a GR32. As with all GR32 operations, // the upper 32 bits of the enclosing GR64 remain unchanged, which is useful // when a 64-bit address is stored in a pair of access registers. -- cgit v1.1 From e03a56d62fc623e2f72d623b816f91b293d5904b Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Mon, 12 Aug 2013 10:17:33 +0000 Subject: [SystemZ] Add a definition of the CLC instruction git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188162 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelLowering.cpp | 1 + lib/Target/SystemZ/SystemZISelLowering.h | 4 ++++ lib/Target/SystemZ/SystemZInstrFormats.td | 19 +++++++++++++++++++ lib/Target/SystemZ/SystemZInstrInfo.td | 14 +++++--------- lib/Target/SystemZ/SystemZOperators.td | 6 ++++-- 5 files changed, 33 insertions(+), 11 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 6acdcd4..a51f016 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1701,6 +1701,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(UDIVREM32); OPCODE(UDIVREM64); OPCODE(MVC); + OPCODE(CLC); OPCODE(ATOMIC_SWAPW); OPCODE(ATOMIC_LOADW_ADD); OPCODE(ATOMIC_LOADW_SUB); diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index c0dbe49..4098ff3 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -80,6 +80,10 @@ namespace SystemZISD { // MachineMemOperands rather than one. MVC, + // Use CLC to compare two blocks of memory, with the same comments + // as for MVC. + CLC, + // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or // ATOMIC_LOAD_. // diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index 2af8e83..2d18e03 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -1386,3 +1386,22 @@ class AtomicLoadWBinaryReg : AtomicLoadWBinary; class AtomicLoadWBinaryImm : AtomicLoadWBinary; + +// Define an instruction that operates on two fixed-length blocks of memory. +// The real instruction uses a bdladdr12onlylen8 for the first operand and a +// bdaddr12only for the second, with the length of the second operand being +// implicitly the same as the first. This arrangement matches the underlying +// assembly syntax. However, for instruction selection it's easier to have +// two normal bdaddr12onlys and a separate length operand, so define a pseudo +// instruction for that too. +multiclass MemorySS opcode, + SDPatternOperator operator> { + def "" : InstSS; + let usesCustomInserter = 1 in + def Wrapper : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src, + imm32len8:$length), + [(operator bdaddr12only:$dest, bdaddr12only:$src, + imm32len8:$length)]>; +} diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 5371e8a..a7181d6 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -334,15 +334,7 @@ def MVGHI : StoreSIL<"mvghi", 0xE548, store, imm64sx16>; // Memory-to-memory moves. let mayLoad = 1, mayStore = 1 in - def MVC : InstSS<0xD2, (outs), (ins bdladdr12onlylen8:$BDL1, - bdaddr12only:$BD2), - "mvc\t$BDL1, $BD2", []>; - -let mayLoad = 1, mayStore = 1, usesCustomInserter = 1 in - def MVCWrapper : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src, - imm32len8:$length), - [(z_mvc bdaddr12only:$dest, bdaddr12only:$src, - imm32len8:$length)]>; + defm MVC : MemorySS<"mvc", 0xD2, z_mvc>; defm LoadStore8_32 : MVCLoadStore; @@ -1000,6 +992,10 @@ let Defs = [CC], CCValues = 0xE, IsLogical = 1 in { } defm : ZXB; +// Memory-to-memory comparison. +let mayLoad = 1, Defs = [CC] in + defm CLC : MemorySS<"clc", 0xD5, z_clc>; + //===----------------------------------------------------------------------===// // Atomic operations //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index 6a3af2b..dae04de 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -54,7 +54,7 @@ def SDT_ZAtomicCmpSwapW : SDTypeProfile<1, 6, SDTCisVT<4, i32>, SDTCisVT<5, i32>, SDTCisVT<6, i32>]>; -def SDT_ZCopy : SDTypeProfile<0, 3, +def SDT_ZMemMemLength : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisPtrTy<1>, SDTCisVT<2, i32>]>; @@ -109,8 +109,10 @@ def z_atomic_loadw_umin : AtomicWOp<"ATOMIC_LOADW_UMIN">; def z_atomic_loadw_umax : AtomicWOp<"ATOMIC_LOADW_UMAX">; def z_atomic_cmp_swapw : AtomicWOp<"ATOMIC_CMP_SWAPW", SDT_ZAtomicCmpSwapW>; -def z_mvc : SDNode<"SystemZISD::MVC", SDT_ZCopy, +def z_mvc : SDNode<"SystemZISD::MVC", SDT_ZMemMemLength, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_clc : SDNode<"SystemZISD::CLC", SDT_ZMemMemLength, + [SDNPHasChain, SDNPMayLoad]>; //===----------------------------------------------------------------------===// // Pattern fragments -- cgit v1.1 From ac168b8bc8773a083a10902f64e4ae57a925aee4 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Mon, 12 Aug 2013 10:28:10 +0000 Subject: [SystemZ] Use CLC and IPM to implement memcmp For now this is restricted to fixed-length comparisons with a length in the range [1, 256], as for memcpy() and MVC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188163 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/README.txt | 6 +- lib/Target/SystemZ/SystemZISelLowering.cpp | 12 ++-- lib/Target/SystemZ/SystemZISelLowering.h | 8 ++- lib/Target/SystemZ/SystemZInstrInfo.cpp | 93 ++++++++++++++++++++++++++ lib/Target/SystemZ/SystemZInstrInfo.h | 6 ++ lib/Target/SystemZ/SystemZInstrInfo.td | 2 +- lib/Target/SystemZ/SystemZOperators.td | 5 +- lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp | 27 ++++++++ lib/Target/SystemZ/SystemZSelectionDAGInfo.h | 8 ++- 9 files changed, 155 insertions(+), 12 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt index 563513b..eebc4e4 100644 --- a/lib/Target/SystemZ/README.txt +++ b/lib/Target/SystemZ/README.txt @@ -67,12 +67,12 @@ condition codes. For example, we could use LCDFR instead of LCDBR. -- We don't optimize block memory operations, except using single MVCs -for memcpy. +for memcpy and single CLCs for memcmp. -It's definitely worth using things like CLC, NC, XC and OC with +It's definitely worth using things like NC, XC and OC with constant lengths. MVCIN may be worthwhile too. -We should probably implement things like memcpy using MVC with EXECUTE. +We should probably implement general memcpy using MVC with EXECUTE. Likewise memcmp and CLC. MVCLE and CLCLE could be useful too. -- diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index a51f016..899b08c 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1702,6 +1702,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(UDIVREM64); OPCODE(MVC); OPCODE(CLC); + OPCODE(IPM); OPCODE(ATOMIC_SWAPW); OPCODE(ATOMIC_LOADW_ADD); OPCODE(ATOMIC_LOADW_SUB); @@ -2240,8 +2241,9 @@ SystemZTargetLowering::emitExt128(MachineInstr *MI, } MachineBasicBlock * -SystemZTargetLowering::emitMVCWrapper(MachineInstr *MI, - MachineBasicBlock *MBB) const { +SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned Opcode) const { const SystemZInstrInfo *TII = TM.getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); @@ -2251,7 +2253,7 @@ SystemZTargetLowering::emitMVCWrapper(MachineInstr *MI, uint64_t SrcDisp = MI->getOperand(3).getImm(); uint64_t Length = MI->getOperand(4).getImm(); - BuildMI(*MBB, MI, DL, TII->get(SystemZ::MVC)) + BuildMI(*MBB, MI, DL, TII->get(Opcode)) .addOperand(DestBase).addImm(DestDisp).addImm(Length) .addOperand(SrcBase).addImm(SrcDisp); @@ -2483,7 +2485,9 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { case SystemZ::ATOMIC_CMP_SWAPW: return emitAtomicCmpSwapW(MI, MBB); case SystemZ::MVCWrapper: - return emitMVCWrapper(MI, MBB); + return emitMemMemWrapper(MI, MBB, SystemZ::MVC); + case SystemZ::CLCWrapper: + return emitMemMemWrapper(MI, MBB, SystemZ::CLC); default: llvm_unreachable("Unexpected instr type to insert"); } diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 4098ff3..0036ce8 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -84,6 +84,9 @@ namespace SystemZISD { // as for MVC. CLC, + // Store the CC value in bits 29 and 28 of an integer. + IPM, + // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or // ATOMIC_LOAD_. // @@ -234,8 +237,9 @@ private: unsigned BitSize) const; MachineBasicBlock *emitAtomicCmpSwapW(MachineInstr *MI, MachineBasicBlock *BB) const; - MachineBasicBlock *emitMVCWrapper(MachineInstr *MI, - MachineBasicBlock *BB) const; + MachineBasicBlock *emitMemMemWrapper(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Opcode) const; }; } // end namespace llvm diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 9ee60aa..54a8669 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -293,6 +293,99 @@ SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, return Count; } +bool SystemZInstrInfo::analyzeCompare(const MachineInstr *MI, + unsigned &SrcReg, unsigned &SrcReg2, + int &Mask, int &Value) const { + assert(MI->isCompare() && "Caller should have checked for a comparison"); + + if (MI->getNumExplicitOperands() == 2 && + MI->getOperand(0).isReg() && + MI->getOperand(1).isImm()) { + SrcReg = MI->getOperand(0).getReg(); + SrcReg2 = 0; + Value = MI->getOperand(1).getImm(); + Mask = ~0; + return true; + } + + return false; +} + +// If Reg is a virtual register that is used by only a single non-debug +// instruction, return the defining instruction, otherwise return null. +static MachineInstr *getDefSingleUse(const MachineRegisterInfo *MRI, + unsigned Reg) { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return 0; + + MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg); + MachineRegisterInfo::use_nodbg_iterator E = MRI->use_nodbg_end(); + if (I == E || llvm::next(I) != E) + return 0; + + return MRI->getUniqueVRegDef(Reg); +} + +// Return true if MI is a shift of type Opcode by Imm bits. +static bool isShift(MachineInstr *MI, int Opcode, int64_t Imm) { + return (MI->getOpcode() == Opcode && + !MI->getOperand(2).getReg() && + MI->getOperand(3).getImm() == Imm); +} + +// Compare compares SrcReg against zero. Check whether SrcReg contains +// the result of an IPM sequence that is only used by Compare. Try to +// delete both of them if so and return true if a change was made. +static bool removeIPM(MachineInstr *Compare, unsigned SrcReg, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI) { + MachineInstr *SRA = getDefSingleUse(MRI, SrcReg); + if (!SRA || !isShift(SRA, SystemZ::SRA, 30)) + return false; + + MachineInstr *SLL = getDefSingleUse(MRI, SRA->getOperand(1).getReg()); + if (!SLL || !isShift(SLL, SystemZ::SLL, 2)) + return false; + + MachineInstr *IPM = getDefSingleUse(MRI, SLL->getOperand(1).getReg()); + if (!IPM || IPM->getOpcode() != SystemZ::IPM) + return false; + + // Check that there are no assignments to CC between the IPM and Compare, + // except for the SRA that we'd like to delete. We can ignore SLL because + // it does not assign to CC. We can also ignore uses of the SRA CC result, + // since it is effectively restoring CC to the value it had before IPM + // (for all current use cases). + if (IPM->getParent() != Compare->getParent()) + return false; + MachineBasicBlock::iterator MBBI = IPM, MBBE = Compare; + for (++MBBI; MBBI != MBBE; ++MBBI) { + MachineInstr *MI = MBBI; + if (MI != SRA && MI->modifiesRegister(SystemZ::CC, TRI)) + return false; + } + + IPM->eraseFromParent(); + SLL->eraseFromParent(); + SRA->eraseFromParent(); + Compare->eraseFromParent(); + return true; +} + +bool +SystemZInstrInfo::optimizeCompareInstr(MachineInstr *Compare, + unsigned SrcReg, unsigned SrcReg2, + int Mask, int Value, + const MachineRegisterInfo *MRI) const { + assert(!SrcReg2 && "Only optimizing constant comparisons so far"); + bool IsLogical = (Compare->getDesc().TSFlags & SystemZII::IsLogical) != 0; + if (Value == 0 && + !IsLogical && + removeIPM(Compare, SrcReg, MRI, TM.getRegisterInfo())) + return true; + return false; +} + // If Opcode is a move that has a conditional variant, return that variant, // otherwise return 0. static unsigned getConditionalMove(unsigned Opcode) { diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index 276fd3b..3c4e8af 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -129,6 +129,12 @@ public: MachineBasicBlock *FBB, const SmallVectorImpl &Cond, DebugLoc DL) const LLVM_OVERRIDE; + bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, + unsigned &SrcReg2, int &Mask, int &Value) const + LLVM_OVERRIDE; + bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, + unsigned SrcReg2, int Mask, int Value, + const MachineRegisterInfo *MRI) const LLVM_OVERRIDE; virtual bool isPredicable(MachineInstr *MI) const LLVM_OVERRIDE; virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index a7181d6..834ffed 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -1117,7 +1117,7 @@ let Defs = [CC] in { // Extract CC into bits 29 and 28 of a register. let Uses = [CC] in - def IPM : InherentRRE<"ipm", 0xB222, GR32, (null_frag)>; + def IPM : InherentRRE<"ipm", 0xB222, GR32, (z_ipm)>; // Read a 32-bit access register into a GR32. As with all GR32 operations, // the upper 32 bits of the enclosing GR64 remain unchanged, which is useful diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index dae04de..8a5b909 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -58,6 +58,7 @@ def SDT_ZMemMemLength : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisPtrTy<1>, SDTCisVT<2, i32>]>; +def SDT_ZI32Intrinsic : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>; //===----------------------------------------------------------------------===// // Node definitions @@ -112,7 +113,9 @@ def z_atomic_cmp_swapw : AtomicWOp<"ATOMIC_CMP_SWAPW", SDT_ZAtomicCmpSwapW>; def z_mvc : SDNode<"SystemZISD::MVC", SDT_ZMemMemLength, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; def z_clc : SDNode<"SystemZISD::CLC", SDT_ZMemMemLength, - [SDNPHasChain, SDNPMayLoad]>; + [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>; +def z_ipm : SDNode<"SystemZISD::IPM", SDT_ZI32Intrinsic, + [SDNPInGlue]>; //===----------------------------------------------------------------------===// // Pattern fragments diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index 4ca9292..341dc94 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -125,3 +125,30 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, } return SDValue(); } + +std::pair SystemZSelectionDAGInfo:: +EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src1, SDValue Src2, SDValue Size, + MachinePointerInfo Op1PtrInfo, + MachinePointerInfo Op2PtrInfo) const { + if (ConstantSDNode *CSize = dyn_cast(Size)) { + uint64_t Bytes = CSize->getZExtValue(); + if (Bytes >= 1 && Bytes <= 0x100) { + // A single CLC. + SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, + Src1, Src2, Size); + SDValue Glue = Chain.getValue(1); + // IPM inserts the CC value into bits 29 and 28, with 0 meaning "equal", + // 1 meaning "greater" and 2 meaning "less". Convert them into an + // integer that is respectively equal, greater or less than 0. + SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); + SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, IPM, + DAG.getConstant(2, MVT::i32)); + SDValue SRA = DAG.getNode(ISD::SRA, DL, MVT::i32, SHL, + DAG.getConstant(30, MVT::i32)); + return std::make_pair(SRA, Chain); + } + } + return std::make_pair(SDValue(), SDValue()); +} diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h index 9138a9c..c757e16 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -38,7 +38,13 @@ public: EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, SDValue Dst, SDValue Byte, SDValue Size, unsigned Align, bool IsVolatile, - MachinePointerInfo DstPtrInfo) const; + MachinePointerInfo DstPtrInfo) const LLVM_OVERRIDE; + + virtual std::pair + EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src1, SDValue Src2, SDValue Size, + MachinePointerInfo Op1PtrInfo, + MachinePointerInfo Op2PtrInfo) const LLVM_OVERRIDE; }; } -- cgit v1.1 From 809313970fc98bba6f36a332adfa3e5fef4110b3 Mon Sep 17 00:00:00 2001 From: Vladimir Medic Date: Mon, 12 Aug 2013 13:07:23 +0000 Subject: This patch implements ei and di instructions for mips. Test cases are added. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188176 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsInstrFormats.td | 18 ++++++++++++++++++ lib/Target/Mips/MipsInstrInfo.td | 13 ++++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index 1322784..28abb7e 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -520,6 +520,24 @@ class ER_FM funct> let Inst{5-0} = funct; } + +//===----------------------------------------------------------------------===// +// Enable/disable interrupt instruction format +//===----------------------------------------------------------------------===// + +class EI_FM sc> +{ + bits<32> Inst; + bits<5> rt; + let Inst{31-26} = 0x10; + let Inst{25-21} = 0xb; + let Inst{20-16} = rt; + let Inst{15-11} = 0xc; + let Inst{10-6} = 0; + let Inst{5} = sc; + let Inst{4-0} = 0; +} + //===----------------------------------------------------------------------===// // // FLOATING POINT INSTRUCTION FORMATS diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index c321330..65601b0 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -660,6 +660,11 @@ class ER_FT : InstSE<(outs), (ins), opstr, [], NoItinerary, FrmOther>; +// Interrupts +class DEI_FT : + InstSE<(outs RO:$rt), (ins), + !strconcat(opstr, "\t$rt"), [], NoItinerary, FrmOther>; + // Sync let hasSideEffects = 1 in class SYNC_FT : @@ -953,7 +958,8 @@ defm LBu : LoadM<"lbu", GPR32Opnd, zextloadi8, IILoad, addrDefault>, MMRel, defm LH : LoadM<"lh", GPR32Opnd, sextloadi16, IILoad, addrDefault>, MMRel, LW_FM<0x21>; defm LHu : LoadM<"lhu", GPR32Opnd, zextloadi16, IILoad>, MMRel, LW_FM<0x25>; -defm LW : LoadM<"lw", GPR32Opnd, load, IILoad, addrDefault>, MMRel, LW_FM<0x23>; +defm LW : LoadM<"lw", GPR32Opnd, load, IILoad, addrDefault>, MMRel, + LW_FM<0x23>; defm SB : StoreM<"sb", GPR32Opnd, truncstorei8, IIStore>, MMRel, LW_FM<0x28>; defm SH : StoreM<"sh", GPR32Opnd, truncstorei16, IIStore>, MMRel, LW_FM<0x29>; defm SW : StoreM<"sw", GPR32Opnd, store, IIStore>, MMRel, LW_FM<0x2b>; @@ -973,6 +979,9 @@ def SYSCALL : SYS_FT<"syscall">, SYS_FM<0xc>; def ERET : ER_FT<"eret">, ER_FM<0x18>; def DERET : ER_FT<"deret">, ER_FM<0x1f>; +def EI : DEI_FT<"ei", GPR32Opnd>, EI_FM<1>; +def DI : DEI_FT<"di", GPR32Opnd>, EI_FM<0>; + /// Load-linked, Store-conditional let Predicates = [NotN64, HasStdEnc] in { def LL : LLBase<"ll", GPR32Opnd, mem>, LW_FM<0x30>; @@ -1145,6 +1154,8 @@ def : InstAlias<"syscall", (SYSCALL 0), 1>; def : InstAlias<"break $imm", (BREAK uimm10:$imm, 0), 1>; def : InstAlias<"break", (BREAK 0, 0), 1>; +def : InstAlias<"ei", (EI ZERO), 1>; +def : InstAlias<"di", (DI ZERO), 1>; //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions //===----------------------------------------------------------------------===// -- cgit v1.1 From f15dfe4eb48e8e2ff02a30bc8ba9112108f9b83d Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 12 Aug 2013 22:33:21 +0000 Subject: R600: Set scheduling preference to Sched::Source R600 doesn't need to do any scheduling on the SelectionDAG now that it has a very good MachineScheduler. Also, using the VLIW SelectionDAG scheduler was having a major impact on compile times. For example with the phatk kernel here are the LLVM IR to machine code compile times: With Sched::VLIW Total Compile Time: 1.4890 Seconds (User + System) SelectionDAG Instruction Scheduling: 1.1670 Seconds (User + System) With Sched::Source Total Compile Time: 0.3330 Seconds (User + System) SelectionDAG Instruction Scheduling: 0.0070 Seconds (User + System) The code ouput was identical with both schedulers. This may not be true for all programs, but it gives me confidence that there won't be much reduction, if any, in code quality by using Sched::Source. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188215 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index ce6ac89..a89875c 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -99,7 +99,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setBooleanContents(ZeroOrNegativeOneBooleanContent); setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); - setSchedulingPreference(Sched::VLIW); + setSchedulingPreference(Sched::Source); } MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( -- cgit v1.1 From ebc573ed5b7d4757d58b3bfdec53fcf9b4cb8c01 Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Mon, 12 Aug 2013 22:45:44 +0000 Subject: Fix a crash with X86 Mach-O and a subtraction expression where both symbols are undefined and produce an error message instead as this is a non-relocatable expression with X86 Mach-O. rdar://8920876 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188218 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index 64f005c..6eff224 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -175,6 +175,11 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, if (A_Base == B_Base && A_Base) report_fatal_error("unsupported relocation with identical base"); + // A subtraction expression where both symbols are undefined is a + // non-relocatable expression. + if (A->isUndefined() && B->isUndefined()) + report_fatal_error("unsupported relocation with subtraction expression"); + Value += Writer->getSymbolAddress(&A_SD, Layout) - (A_Base == NULL ? 0 : Writer->getSymbolAddress(A_Base, Layout)); Value -= Writer->getSymbolAddress(&B_SD, Layout) - -- cgit v1.1 From bd980e5569d085ab73e351ec9fca8b698e06d44f Mon Sep 17 00:00:00 2001 From: Vladimir Medic Date: Tue, 13 Aug 2013 13:07:09 +0000 Subject: This patch introduces changes to MipsAsmParser register parsing routines. The code now follows more deterministic path and makes the code more efficient and easier to maintain. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188264 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 322 +++++++++++++--------------- 1 file changed, 146 insertions(+), 176 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 0754052..2492bf8 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -63,6 +63,7 @@ class MipsAsmParser : public MCTargetAsmParser { MCSubtargetInfo &STI; MCAsmParser &Parser; MipsAssemblerOptions Options; + bool hasConsumedDollar; #define GET_ASSEMBLER_HEADER #include "MipsGenAsmMatcher.inc" @@ -175,15 +176,13 @@ class MipsAsmParser : public MCTargetAsmParser { int matchRegisterByNumber(unsigned RegNum, unsigned RegClass); - int matchFPURegisterName(StringRef Name, FpFormatTy Format); + int matchFPURegisterName(StringRef Name); - void setFpFormat(FpFormatTy Format) { - FpFormat = Format; - } + int matchFCCRegisterName(StringRef Name); - void setDefaultFpFormat(); + int matchACRegisterName(StringRef Name); - void setFpFormat(StringRef Format); + int regKindToRegClass(int RegKind); FpFormatTy getFpFormat() {return FpFormat;} @@ -195,7 +194,7 @@ class MipsAsmParser : public MCTargetAsmParser { SmallVectorImpl &Instructions); public: MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) - : MCTargetAsmParser(), STI(sti), Parser(parser) { + : MCTargetAsmParser(), STI(sti), Parser(parser), hasConsumedDollar(false) { // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); } @@ -816,60 +815,76 @@ int MipsAsmParser::matchCPURegisterName(StringRef Name) { return CC; } -int MipsAsmParser::matchFPURegisterName(StringRef Name, FpFormatTy Format) { +int MipsAsmParser::matchFPURegisterName(StringRef Name) { if (Name[0] == 'f') { StringRef NumString = Name.substr(1); unsigned IntVal; if (NumString.getAsInteger(10, IntVal)) return -1; // This is not an integer. - if (IntVal > 31) + if (IntVal > 31) // Maximum index for fpu register. return -1; + return IntVal; + } + return -1; +} - if (Format == FP_FORMAT_S || Format == FP_FORMAT_W) - return getReg(Mips::FGR32RegClassID, IntVal); - if (Format == FP_FORMAT_D) { - if (isFP64()) { - return getReg(Mips::FGR64RegClassID, IntVal); - } - // Only even numbers available as register pairs. - if ((IntVal > 31) || (IntVal % 2 != 0)) - return -1; - return getReg(Mips::AFGR64RegClassID, IntVal / 2); - } +int MipsAsmParser::matchFCCRegisterName(StringRef Name) { + + if (Name.startswith("fcc")) { + StringRef NumString = Name.substr(3); + unsigned IntVal; + if (NumString.getAsInteger(10, IntVal)) + return -1; // This is not an integer. + if (IntVal > 7) // There are only 8 fcc registers. + return -1; + return IntVal; } return -1; } -int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) { +int MipsAsmParser::matchACRegisterName(StringRef Name) { - if (Name.equals("fcc0")) - return Mips::FCC0; + if (Name.startswith("acc")) { + StringRef NumString = Name.substr(3); + unsigned IntVal; + if (NumString.getAsInteger(10, IntVal)) + return -1; // This is not an integer. + if (IntVal > 3) // There are only 3 acc registers. + return -1; + return IntVal; + } + return -1; +} + +int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) { int CC; CC = matchCPURegisterName(Name); if (CC != -1) return matchRegisterByNumber(CC, is64BitReg ? Mips::GPR64RegClassID : Mips::GPR32RegClassID); - return matchFPURegisterName(Name, getFpFormat()); + CC= matchFPURegisterName(Name); + //TODO: decide about fpu register class + return matchRegisterByNumber(CC, isFP64() ? Mips::FGR64RegClassID + : Mips::FGR32RegClassID); } -void MipsAsmParser::setDefaultFpFormat() { +int MipsAsmParser::regKindToRegClass(int RegKind) { - if (isMips64() || isFP64()) - FpFormat = FP_FORMAT_D; - else - FpFormat = FP_FORMAT_S; -} - -void MipsAsmParser::setFpFormat(StringRef Format) { + switch (RegKind) { + case MipsOperand::Kind_GPR32: return Mips::GPR32RegClassID; + case MipsOperand::Kind_GPR64: return Mips::GPR64RegClassID; + case MipsOperand::Kind_HWRegs: return Mips::HWRegsRegClassID; + case MipsOperand::Kind_FGR32Regs: return Mips::FGR32RegClassID; + case MipsOperand::Kind_FGR64Regs: return Mips::FGR64RegClassID; + case MipsOperand::Kind_AFGR64Regs: return Mips::AFGR64RegClassID; + case MipsOperand::Kind_CCRRegs: return Mips::CCRRegClassID; + case MipsOperand::Kind_ACC64DSP: return Mips::ACC64DSPRegClassID; + case MipsOperand::Kind_FCCRegs: return Mips::FCCRegClassID; + default :return -1; + } - FpFormat = StringSwitch(Format.lower()) - .Case(".s", FP_FORMAT_S) - .Case(".d", FP_FORMAT_D) - .Case(".l", FP_FORMAT_L) - .Case(".w", FP_FORMAT_W) - .Default(FP_FORMAT_NONE); } bool MipsAssemblerOptions::setATReg(unsigned Reg) { @@ -889,8 +904,8 @@ unsigned MipsAsmParser::getReg(int RC, int RegNo) { } int MipsAsmParser::matchRegisterByNumber(unsigned RegNum, unsigned RegClass) { - - if (RegNum > 31) + if (RegNum > + getContext().getRegisterInfo()->getRegClass(RegClass).getNumRegs()) return -1; return getReg(RegClass, RegNum); @@ -1173,6 +1188,7 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand( const MCExpr *IdVal = 0; SMLoc S; bool isParenExpr = false; + MipsAsmParser::OperandMatchResultTy Res = MatchOperand_NoMatch; // First operand is the offset. S = Parser.getTok().getLoc(); @@ -1211,21 +1227,12 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand( Parser.Lex(); // Eat the '(' token. } - const AsmToken &Tok1 = Parser.getTok(); // Get next token - if (Tok1.is(AsmToken::Dollar)) { - Parser.Lex(); // Eat the '$' token. - if (tryParseRegisterOperand(Operands, isMips64())) { - Error(Parser.getTok().getLoc(), "unexpected token in operand"); - return MatchOperand_ParseFail; - } - - } else { - Error(Parser.getTok().getLoc(), "unexpected token in operand"); - return MatchOperand_ParseFail; - } + Res = parseRegs(Operands, isMips64()? (int) MipsOperand::Kind_GPR64: + (int) MipsOperand::Kind_GPR32); + if (Res != MatchOperand_Success) + return Res; - const AsmToken &Tok2 = Parser.getTok(); // Get next token. - if (Tok2.isNot(AsmToken::RParen)) { + if (Parser.getTok().isNot(AsmToken::RParen)) { Error(Parser.getTok().getLoc(), "')' expected"); return MatchOperand_ParseFail; } @@ -1261,22 +1268,88 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseRegs(SmallVectorImpl &Operands, int RegKind) { MipsOperand::RegisterKind Kind = (MipsOperand::RegisterKind)RegKind; - if (getLexer().getKind() == AsmToken::Identifier) { + if (getLexer().getKind() == AsmToken::Identifier + && !hasConsumedDollar) { if (searchSymbolAlias(Operands, Kind)) return MatchOperand_Success; return MatchOperand_NoMatch; } + SMLoc S = Parser.getTok().getLoc(); // If the first token is not '$', we have an error. - if (Parser.getTok().isNot(AsmToken::Dollar)) + if (Parser.getTok().isNot(AsmToken::Dollar) && !hasConsumedDollar) return MatchOperand_NoMatch; - - Parser.Lex(); // Eat $ - if (!tryParseRegisterOperand(Operands, - RegKind == MipsOperand::Kind_GPR64)) { - // Set the proper register kind. - MipsOperand* op = static_cast(Operands.back()); - op->setRegKind(Kind); - if ((Kind == MipsOperand::Kind_GPR32) + if (!hasConsumedDollar) { + Parser.Lex(); // Eat the '$' + hasConsumedDollar = true; + } + if (getLexer().getKind() == AsmToken::Identifier) { + int RegNum = -1; + std::string RegName = Parser.getTok().getString().lower(); + // Match register by name + switch (RegKind) { + case MipsOperand::Kind_GPR32: + case MipsOperand::Kind_GPR64: + RegNum = matchCPURegisterName(RegName); + break; + case MipsOperand::Kind_AFGR64Regs: + case MipsOperand::Kind_FGR64Regs: + case MipsOperand::Kind_FGR32Regs: + RegNum = matchFPURegisterName(RegName); + if (RegKind == MipsOperand::Kind_AFGR64Regs) + RegNum /= 2; + break; + case MipsOperand::Kind_FCCRegs: + RegNum = matchFCCRegisterName(RegName); + break; + case MipsOperand::Kind_ACC64DSP: + RegNum = matchACRegisterName(RegName); + break; + default: break; // No match, value is set to -1. + } + // No match found, return _NoMatch to give a chance to other round. + if (RegNum < 0) + return MatchOperand_NoMatch; + + int RegVal = getReg(regKindToRegClass(Kind), RegNum); + if (RegVal == -1) + return MatchOperand_NoMatch; + + MipsOperand *Op = MipsOperand::CreateReg(RegVal, S, + Parser.getTok().getLoc()); + Op->setRegKind(Kind); + Operands.push_back(Op); + hasConsumedDollar = false; + Parser.Lex(); // Eat the register name. + if ((RegKind == MipsOperand::Kind_GPR32) + && (getLexer().is(AsmToken::LParen))) { + // Check if it is indexed addressing operand. + Operands.push_back(MipsOperand::CreateToken("(", getLexer().getLoc())); + Parser.Lex(); // Eat the parenthesis. + if (parseRegs(Operands,RegKind) != MatchOperand_Success) + return MatchOperand_NoMatch; + if (getLexer().isNot(AsmToken::RParen)) + return MatchOperand_NoMatch; + Operands.push_back(MipsOperand::CreateToken(")", getLexer().getLoc())); + Parser.Lex(); + } + return MatchOperand_Success; + } else if (getLexer().getKind() == AsmToken::Integer) { + unsigned RegNum = Parser.getTok().getIntVal(); + if (Kind == MipsOperand::Kind_HWRegs) { + if (RegNum != 29) + return MatchOperand_NoMatch; + // Only hwreg 29 is supported, found at index 0. + RegNum = 0; + } + int Reg = matchRegisterByNumber(RegNum, regKindToRegClass(Kind)); + if (Reg == -1) + return MatchOperand_NoMatch; + MipsOperand *Op = MipsOperand::CreateReg(Reg, S, Parser.getTok().getLoc()); + Op->setRegKind(Kind); + Operands.push_back(Op); + hasConsumedDollar = false; + Parser.Lex(); // Eat the register number. + if ((RegKind == MipsOperand::Kind_GPR32) && (getLexer().is(AsmToken::LParen))) { // Check if it is indexed addressing operand. Operands.push_back(MipsOperand::CreateToken("(", getLexer().getLoc())); @@ -1311,9 +1384,6 @@ MipsAsmParser::parseAFGR64Regs(SmallVectorImpl &Operands) { if (isFP64()) return MatchOperand_NoMatch; - // Double operand is expected, set appropriate format - setFpFormat(FP_FORMAT_D); - return parseRegs(Operands, (int) MipsOperand::Kind_AFGR64Regs); } @@ -1321,83 +1391,22 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseFGR64Regs(SmallVectorImpl &Operands) { if (!isFP64()) return MatchOperand_NoMatch; - // Double operand is expected, set appropriate format - setFpFormat(FP_FORMAT_D); - return parseRegs(Operands, (int) MipsOperand::Kind_FGR64Regs); } MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseFGR32Regs(SmallVectorImpl &Operands) { - // Single operand is expected, set appropriate format - setFpFormat(FP_FORMAT_S); return parseRegs(Operands, (int) MipsOperand::Kind_FGR32Regs); } MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseFCCRegs(SmallVectorImpl &Operands) { - // If the first token is not '$' we have an error. - if (Parser.getTok().isNot(AsmToken::Dollar)) - return MatchOperand_NoMatch; - - SMLoc S = Parser.getTok().getLoc(); - Parser.Lex(); // Eat the '$' - - const AsmToken &Tok = Parser.getTok(); // Get next token. - - if (Tok.isNot(AsmToken::Identifier)) - return MatchOperand_NoMatch; - - if (!Tok.getIdentifier().startswith("fcc")) - return MatchOperand_NoMatch; - - StringRef NumString = Tok.getIdentifier().substr(3); - - unsigned IntVal; - if (NumString.getAsInteger(10, IntVal)) - return MatchOperand_NoMatch; - - unsigned Reg = matchRegisterByNumber(IntVal, Mips::FCCRegClassID); - - MipsOperand *Op = MipsOperand::CreateReg(Reg, S, Parser.getTok().getLoc()); - Op->setRegKind(MipsOperand::Kind_FCCRegs); - Operands.push_back(Op); - - Parser.Lex(); // Eat the register number. - return MatchOperand_Success; + return parseRegs(Operands, (int) MipsOperand::Kind_FCCRegs); } MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseACC64DSP(SmallVectorImpl &Operands) { - // If the first token is not '$' we have an error. - if (Parser.getTok().isNot(AsmToken::Dollar)) - return MatchOperand_NoMatch; - - SMLoc S = Parser.getTok().getLoc(); - Parser.Lex(); // Eat the '$' - - const AsmToken &Tok = Parser.getTok(); // Get next token. - - if (Tok.isNot(AsmToken::Identifier)) - return MatchOperand_NoMatch; - - if (!Tok.getIdentifier().startswith("acc")) - return MatchOperand_NoMatch; - - StringRef NumString = Tok.getIdentifier().substr(3); - - unsigned IntVal; - if (NumString.getAsInteger(10, IntVal)) - return MatchOperand_NoMatch; - - unsigned Reg = matchRegisterByNumber(IntVal, Mips::ACC64DSPRegClassID); - - MipsOperand *Op = MipsOperand::CreateReg(Reg, S, Parser.getTok().getLoc()); - Op->setRegKind(MipsOperand::Kind_ACC64DSP); - Operands.push_back(Op); - - Parser.Lex(); // Eat the register number. - return MatchOperand_Success; + return parseRegs(Operands, (int) MipsOperand::Kind_ACC64DSP); } bool MipsAsmParser::searchSymbolAlias( @@ -1428,17 +1437,19 @@ bool MipsAsmParser::searchSymbolAlias( switch (Kind) { case MipsOperand::Kind_AFGR64Regs: case MipsOperand::Kind_FGR64Regs: - RegNum = matchFPURegisterName(DefSymbol.substr(1), FP_FORMAT_D); + RegNum = matchFPURegisterName(DefSymbol.substr(1)); break; case MipsOperand::Kind_FGR32Regs: - RegNum = matchFPURegisterName(DefSymbol.substr(1), FP_FORMAT_S); + RegNum = matchFPURegisterName(DefSymbol.substr(1)); break; case MipsOperand::Kind_GPR64: case MipsOperand::Kind_GPR32: default: - RegNum = matchRegisterName(DefSymbol.substr(1), isMips64()); + RegNum = matchCPURegisterName(DefSymbol.substr(1)); break; } + if (RegNum > -1) + RegNum = getReg(regKindToRegClass(Kind), RegNum); } if (RegNum > -1) { Parser.Lex(); @@ -1463,53 +1474,12 @@ bool MipsAsmParser::searchSymbolAlias( MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseHWRegs(SmallVectorImpl &Operands) { - - // If the first token is not '$' we have error. - if (Parser.getTok().isNot(AsmToken::Dollar)) - return MatchOperand_NoMatch; - SMLoc S = Parser.getTok().getLoc(); - Parser.Lex(); // Eat the '$'. - - const AsmToken &Tok = Parser.getTok(); // Get the next token. - if (Tok.isNot(AsmToken::Integer)) - return MatchOperand_NoMatch; - - unsigned RegNum = Tok.getIntVal(); - // At the moment only hwreg29 is supported. - if (RegNum != 29) - return MatchOperand_ParseFail; - - MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29, S, - Parser.getTok().getLoc()); - op->setRegKind(MipsOperand::Kind_HWRegs); - Operands.push_back(op); - - Parser.Lex(); // Eat the register number. - return MatchOperand_Success; + return parseRegs(Operands, (int) MipsOperand::Kind_HWRegs); } MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseCCRRegs(SmallVectorImpl &Operands) { - // If the first token is not '$' we have an error. - if (Parser.getTok().isNot(AsmToken::Dollar)) - return MatchOperand_NoMatch; - - SMLoc S = Parser.getTok().getLoc(); - Parser.Lex(); // Eat the '$' - - const AsmToken &Tok = Parser.getTok(); // Get next token. - - if (Tok.isNot(AsmToken::Integer)) - return MatchOperand_NoMatch; - - unsigned Reg = matchRegisterByNumber(Tok.getIntVal(), Mips::CCRRegClassID); - - MipsOperand *Op = MipsOperand::CreateReg(Reg, S, Parser.getTok().getLoc()); - Op->setRegKind(MipsOperand::Kind_CCRRegs); - Operands.push_back(Op); - - Parser.Lex(); // Eat the register number. - return MatchOperand_Success; + return parseRegs(Operands, (int) MipsOperand::Kind_CCRRegs); } MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) { -- cgit v1.1 From 4d36bd80e68b8245ba4fcf26d33dbf35da3e2002 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Tue, 13 Aug 2013 13:24:07 +0000 Subject: AVX-512: Added CMP and BLEND instructions. Lowering for SETCC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188265 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 84 +++++++++++-- lib/Target/X86/X86ISelLowering.h | 7 ++ lib/Target/X86/X86InstrAVX512.td | 202 ++++++++++++++++++++++++++++++++ lib/Target/X86/X86InstrFragmentsSIMD.td | 9 ++ 4 files changed, 293 insertions(+), 9 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 55b1f01..b6e7413 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -9705,6 +9705,42 @@ static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) { DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC)); } +static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) { + SDValue Cond; + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue CC = Op.getOperand(2); + MVT VT = Op.getValueType().getSimpleVT(); + + EVT OpVT = Op0.getValueType(); + assert(OpVT.getVectorElementType().getSizeInBits() >= 32 && + Op.getValueType().getScalarType() == MVT::i1 && + "Cannot set masked compare for this operation"); + + ISD::CondCode SetCCOpcode = cast(CC)->get(); + SDLoc dl(Op); + + bool Unsigned = false; + unsigned SSECC; + switch (SetCCOpcode) { + default: llvm_unreachable("Unexpected SETCC condition"); + case ISD::SETNE: SSECC = 4; break; + case ISD::SETEQ: SSECC = 0; break; + case ISD::SETUGT: Unsigned = true; + case ISD::SETGT: SSECC = 6; break; // NLE + case ISD::SETULT: Unsigned = true; + case ISD::SETLT: SSECC = 1; break; + case ISD::SETUGE: Unsigned = true; + case ISD::SETGE: SSECC = 5; break; // NLT + case ISD::SETULE: Unsigned = true; + case ISD::SETLE: SSECC = 2; break; + } + unsigned Opc = Unsigned ? X86ISD::CMPMU: X86ISD::CMPM; + return DAG.getNode(Opc, dl, VT, Op0, Op1, + DAG.getConstant(SSECC, MVT::i8)); + +} + static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDValue Cond; @@ -9723,7 +9759,12 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, #endif unsigned SSECC = translateX86FSETCC(SetCCOpcode, Op0, Op1); - + unsigned Opc = X86ISD::CMPP; + unsigned NumElems = VT.getVectorNumElements(); + if (Subtarget->hasAVX512() && VT.getVectorElementType() == MVT::i1) { + assert(NumElems <=16); + Opc = X86ISD::CMPM; + } // In the two special cases we can't handle, emit two comparisons. if (SSECC == 8) { unsigned CC0, CC1; @@ -9735,14 +9776,14 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, CC0 = 7; CC1 = 4; CombineOpc = ISD::AND; } - SDValue Cmp0 = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1, + SDValue Cmp0 = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(CC0, MVT::i8)); - SDValue Cmp1 = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1, + SDValue Cmp1 = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(CC1, MVT::i8)); return DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1); } // Handle all other FP comparisons here. - return DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1, + return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8)); } @@ -9750,6 +9791,24 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, if (VT.is256BitVector() && !Subtarget->hasInt256()) return Lower256IntVSETCC(Op, DAG); + bool MaskResult = (VT.getVectorElementType() == MVT::i1); + EVT OpVT = Op1.getValueType(); + if (Subtarget->hasAVX512()) { + if (Op1.getValueType().is512BitVector() || + (MaskResult && OpVT.getVectorElementType().getSizeInBits() >= 32)) + return LowerIntVSETCC_AVX512(Op, DAG); + + // In AVX-512 architecture setcc returns mask with i1 elements, + // But there is no compare instruction for i8 and i16 elements. + // We are not talking about 512-bit operands in this case, these + // types are illegal. + if (MaskResult && + (OpVT.getVectorElementType().getSizeInBits() < 32 && + OpVT.getVectorElementType().getSizeInBits() >= 8)) + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(ISD::SETCC, dl, OpVT, Op0, Op1, CC)); + } + // We are handling one of the integer comparisons here. Since SSE only has // GT and EQ comparisons for integer, swapping operands and multiple // operations may be required for some comparisons. @@ -9759,15 +9818,18 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, switch (SetCCOpcode) { default: llvm_unreachable("Unexpected SETCC condition"); case ISD::SETNE: Invert = true; - case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break; + case ISD::SETEQ: Opc = MaskResult? X86ISD::PCMPEQM: X86ISD::PCMPEQ; break; case ISD::SETLT: Swap = true; - case ISD::SETGT: Opc = X86ISD::PCMPGT; break; + case ISD::SETGT: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT; break; case ISD::SETGE: Swap = true; - case ISD::SETLE: Opc = X86ISD::PCMPGT; Invert = true; break; + case ISD::SETLE: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT; + Invert = true; break; case ISD::SETULT: Swap = true; - case ISD::SETUGT: Opc = X86ISD::PCMPGT; FlipSigns = true; break; + case ISD::SETUGT: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT; + FlipSigns = true; break; case ISD::SETUGE: Swap = true; - case ISD::SETULE: Opc = X86ISD::PCMPGT; FlipSigns = true; Invert = true; break; + case ISD::SETULE: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT; + FlipSigns = true; Invert = true; break; } // Special case: Use min/max operations for SETULE/SETUGE @@ -13201,6 +13263,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::CMP: return "X86ISD::CMP"; case X86ISD::COMI: return "X86ISD::COMI"; case X86ISD::UCOMI: return "X86ISD::UCOMI"; + case X86ISD::CMPM: return "X86ISD::CMPM"; + case X86ISD::CMPMU: return "X86ISD::CMPMU"; case X86ISD::SETCC: return "X86ISD::SETCC"; case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY"; case X86ISD::FSETCCsd: return "X86ISD::FSETCCsd"; @@ -13273,6 +13337,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::CMPP: return "X86ISD::CMPP"; case X86ISD::PCMPEQ: return "X86ISD::PCMPEQ"; case X86ISD::PCMPGT: return "X86ISD::PCMPGT"; + case X86ISD::PCMPEQM: return "X86ISD::PCMPEQM"; + case X86ISD::PCMPGTM: return "X86ISD::PCMPGTM"; case X86ISD::ADD: return "X86ISD::ADD"; case X86ISD::SUB: return "X86ISD::SUB"; case X86ISD::ADC: return "X86ISD::ADC"; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index c931b9b..7201f7a 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -274,6 +274,13 @@ namespace llvm { // PCMP* - Vector integer comparisons. PCMPEQ, PCMPGT, + // PCMP*M - Vector integer comparisons, the result is in a mask vector + PCMPEQM, PCMPGTM, + + /// CMPM, CMPMU - Vector comparison generating mask bits for fp and + /// integer signed and unsigned data types. + CMPM, + CMPMU, // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results. ADD, SUB, ADC, SBB, SMUL, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 7fed783..f4528a9 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -564,7 +564,195 @@ defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32, i512me defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64, i512mem, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +//===----------------------------------------------------------------------===// +// AVX-512 - BLEND using mask +// +multiclass avx512_blendmask opc, string OpcodeStr, + RegisterClass KRC, RegisterClass RC, + X86MemOperand x86memop, PatFrag mem_frag, + SDNode OpNode, ValueType vt> { + def rr : AVX5128I, EVEX_4V, EVEX_K; + + def rm : AVX5128I, + EVEX_4V, EVEX_K; +} + +let ExeDomain = SSEPackedSingle in +defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps", VK16WM, VR512, f512mem, + memopv16f32, vselect, v16f32>, + EVEX_CD8<32, CD8VF>, EVEX_V512; +let ExeDomain = SSEPackedDouble in +defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd", VK8WM, VR512, f512mem, + memopv8f64, vselect, v8f64>, + VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512; + +defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd", VK16WM, VR512, f512mem, + memopv8i64, vselect, v16i32>, + EVEX_CD8<32, CD8VF>, EVEX_V512; + +defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq", VK8WM, VR512, f512mem, + memopv8i64, vselect, v8i64>, VEX_W, + EVEX_CD8<64, CD8VF>, EVEX_V512; + +let Predicates = [HasAVX512] in { +def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1), + (v8f32 VR256X:$src2))), + (EXTRACT_SUBREG + (v16f32 (VBLENDMPSZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), + (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), + (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; + +def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1), + (v8i32 VR256X:$src2))), + (EXTRACT_SUBREG + (v16i32 (VPBLENDMDZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), + (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), + (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; +} + +multiclass avx512_icmp_packed opc, string OpcodeStr, RegisterClass KRC, + RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag, + SDNode OpNode, ValueType vt> { + def rr : AVX512BI, EVEX_4V; + def rm : AVX512BI, EVEX_4V; +} + +defm VPCMPEQDZ : avx512_icmp_packed<0x76, "vpcmpeqd", VK16, VR512, i512mem, + memopv8i64, X86pcmpeqm, v16i32>, EVEX_V512; +defm VPCMPEQQZ : avx512_icmp_packed<0x29, "vpcmpeqq", VK8, VR512, i512mem, + memopv8i64, X86pcmpeqm, v8i64>, T8, EVEX_V512, VEX_W; + +defm VPCMPGTDZ : avx512_icmp_packed<0x66, "vpcmpgtd", VK16, VR512, i512mem, + memopv8i64, X86pcmpgtm, v16i32>, EVEX_V512; +defm VPCMPGTQZ : avx512_icmp_packed<0x37, "vpcmpgtq", VK8, VR512, i512mem, + memopv8i64, X86pcmpgtm, v8i64>, T8, EVEX_V512, VEX_W; + +def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), + (COPY_TO_REGCLASS (VPCMPGTDZrr + (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), + (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>; + +def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), + (COPY_TO_REGCLASS (VPCMPEQDZrr + (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), + (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>; + + +multiclass avx512_icmp_cc opc, RegisterClass KRC, + RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag, + SDNode OpNode, ValueType vt, Operand CC, string asm, + string asm_alt> { + def rri : AVX512AIi8, EVEX_4V; + def rmi : AVX512AIi8, EVEX_4V; + // Accept explicit immediate argument form instead of comparison code. + let neverHasSideEffects = 1 in { + def rri_alt : AVX512AIi8, EVEX_4V; + def rmi_alt : AVX512AIi8, EVEX_4V; + } +} + +defm VPCMPDZ : avx512_icmp_cc<0x1F, VK16, VR512, i512mem, memopv8i64, + X86cmpm, v16i32, AVXCC, + "vpcmp${cc}d\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "vpcmpd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPCMPUDZ : avx512_icmp_cc<0x1E, VK16, VR512, i512mem, memopv8i64, + X86cmpmu, v16i32, AVXCC, + "vpcmp${cc}ud\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "vpcmpud\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, + EVEX_V512, EVEX_CD8<32, CD8VF>; + +defm VPCMPQZ : avx512_icmp_cc<0x1F, VK8, VR512, i512mem, memopv8i64, + X86cmpm, v8i64, AVXCC, + "vpcmp${cc}q\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "vpcmpq\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, + VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; +defm VPCMPUQZ : avx512_icmp_cc<0x1E, VK8, VR512, i512mem, memopv8i64, + X86cmpmu, v8i64, AVXCC, + "vpcmp${cc}uq\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "vpcmpuq\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, + VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; + +// avx512_cmp_packed - sse 1 & 2 compare packed instructions +multiclass avx512_cmp_packed { + def rri : AVX512PIi8<0xC2, MRMSrcReg, + (outs KRC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, + [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>; + def rmi : AVX512PIi8<0xC2, MRMSrcMem, + (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, + [(set KRC:$dst, + (OpNode (vt RC:$src1), (memop addr:$src2), imm:$cc))], d>; + + // Accept explicit immediate argument form instead of comparison code. + let neverHasSideEffects = 1 in { + def rri_alt : PIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), + asm_alt, [], IIC_SSE_CMPP_RR, d>; + def rmi_alt : PIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), + asm_alt, [], IIC_SSE_CMPP_RM, d>; + } +} + +defm VCMPPSZ : avx512_cmp_packed, TB, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VCMPPDZ : avx512_cmp_packed, TB, OpSize, EVEX_4V, VEX_W, EVEX_V512, + EVEX_CD8<64, CD8VF>; + +def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)), + (COPY_TO_REGCLASS (VCMPPSZrri + (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), + (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), + imm:$cc), VK8)>; +def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)), + (COPY_TO_REGCLASS (VPCMPDZrri + (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), + (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), + imm:$cc), VK8)>; +def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)), + (COPY_TO_REGCLASS (VPCMPUDZrri + (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), + (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), + imm:$cc), VK8)>; + // Mask register copy, including // - copy between mask registers // - load/store mask registers @@ -949,4 +1137,18 @@ defm VMOVDQU32 : avx512_mov_int<0x6F, "vmovdqu32", VR512, VK16WM, memopv16i32, i defm VMOVDQU64 : avx512_mov_int<0x6F, "vmovdqu64", VR512, VK8WM, memopv8i64, i512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +let AddedComplexity = 20 in { +def : Pat<(v16f32 (vselect VK16WM:$mask, (v16f32 VR512:$src1), + (v16f32 VR512:$src2))), + (VMOVUPSZrrk VR512:$src2, VK16WM:$mask, VR512:$src1)>; +def : Pat<(v8f64 (vselect VK8WM:$mask, (v8f64 VR512:$src1), + (v8f64 VR512:$src2))), + (VMOVUPDZrrk VR512:$src2, VK8WM:$mask, VR512:$src1)>; +def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src1), + (v16i32 VR512:$src2))), + (VMOVDQU32rrk VR512:$src2, VK16WM:$mask, VR512:$src1)>; +def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src1), + (v8i64 VR512:$src2))), + (VMOVDQU64rrk VR512:$src2, VK8WM:$mask, VR512:$src1)>; +} diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 8587d38..3d6370f 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -118,6 +118,15 @@ def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>; def X86pcmpeq : SDNode<"X86ISD::PCMPEQ", SDTIntBinOp, [SDNPCommutative]>; def X86pcmpgt : SDNode<"X86ISD::PCMPGT", SDTIntBinOp>; +def X86IntCmpMask : SDTypeProfile<1, 2, + [SDTCisVec<0>, SDTCisSameAs<1, 2>, SDTCisInt<1>]>; +def X86pcmpeqm : SDNode<"X86ISD::PCMPEQM", X86IntCmpMask, [SDNPCommutative]>; +def X86pcmpgtm : SDNode<"X86ISD::PCMPGTM", X86IntCmpMask>; + +def X86CmpMaskCC : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; +def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>; +def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>; + def X86vshl : SDNode<"X86ISD::VSHL", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisVec<2>]>>; -- cgit v1.1 From ea8ddd86b1e364a799e57fc0ac468a9c4a8f8bcf Mon Sep 17 00:00:00 2001 From: Mihai Popa Date: Tue, 13 Aug 2013 14:02:13 +0000 Subject: Fix signed overflow in when computing encodings for ADR instructions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188268 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 0873220..c0c21d3 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -671,7 +671,7 @@ getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, if (MO.isExpr()) return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_adr_pcrel_12, Fixups); - int32_t offset = MO.getImm(); + int64_t offset = MO.getImm(); uint32_t Val = 0x2000; int SoImmVal; -- cgit v1.1 From 4c857225ecba199bc74b350bbb1a2ded125ebfb7 Mon Sep 17 00:00:00 2001 From: Evgeniy Stepanov Date: Tue, 13 Aug 2013 14:04:20 +0000 Subject: Fix compiler warnings. ../lib/Target/X86/X86ISelLowering.cpp:9715:7: error: unused variable 'OpVT' [-Werror,-Wunused-variable] EVT OpVT = Op0.getValueType(); ^ ../lib/Target/X86/X86ISelLowering.cpp:9763:14: error: unused variable 'NumElems' [-Werror,-Wunused-variable] unsigned NumElems = VT.getVectorNumElements(); git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188269 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b6e7413..9625e4b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -9712,10 +9712,9 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) { SDValue CC = Op.getOperand(2); MVT VT = Op.getValueType().getSimpleVT(); - EVT OpVT = Op0.getValueType(); - assert(OpVT.getVectorElementType().getSizeInBits() >= 32 && + assert(Op0.getValueType().getVectorElementType().getSizeInBits() >= 32 && Op.getValueType().getScalarType() == MVT::i1 && - "Cannot set masked compare for this operation"); + "Cannot set masked compare for this operation"); ISD::CondCode SetCCOpcode = cast(CC)->get(); SDLoc dl(Op); @@ -9759,10 +9758,9 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, #endif unsigned SSECC = translateX86FSETCC(SetCCOpcode, Op0, Op1); - unsigned Opc = X86ISD::CMPP; - unsigned NumElems = VT.getVectorNumElements(); + unsigned Opc = X86ISD::CMPP; if (Subtarget->hasAVX512() && VT.getVectorElementType() == MVT::i1) { - assert(NumElems <=16); + assert(VT.getVectorNumElements() <= 16); Opc = X86ISD::CMPM; } // In the two special cases we can't handle, emit two comparisons. -- cgit v1.1 From 3f87f2510c0d84fe092ac311a0e25a5e2f7aa3ac Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Tue, 13 Aug 2013 16:40:47 +0000 Subject: ARMv8: SWP and SWPB are obsoleted on ARMv8. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188288 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrInfo.td | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index c243402..280757f 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -196,6 +196,8 @@ def HasV7 : Predicate<"Subtarget->hasV7Ops()">, AssemblerPredicate<"HasV7Ops", "armv7">; def HasV8 : Predicate<"Subtarget->hasV8Ops()">, AssemblerPredicate<"HasV8Ops", "armv8">; +def PreV8 : Predicate<"!Subtarget->hasV8Ops()">, + AssemblerPredicate<"!HasV8Ops", "armv7 or earlier">; def NoVFP : Predicate<"!Subtarget->hasVFP2()">; def HasVFP2 : Predicate<"Subtarget->hasVFP2()">, AssemblerPredicate<"FeatureVFP2", "VFP2">; @@ -4455,9 +4457,11 @@ def : ARMPat<(strex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr), // SWP/SWPB are deprecated in V6/V7. let mayLoad = 1, mayStore = 1 in { def SWP : AIswp<0, (outs GPRnopc:$Rt), - (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swp", []>; + (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swp", []>, + Requires<[PreV8]>; def SWPB: AIswp<1, (outs GPRnopc:$Rt), - (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swpb", []>; + (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swpb", []>, + Requires<[PreV8]>; } //===----------------------------------------------------------------------===// -- cgit v1.1 From 3add0679d24a00c4a585809c6ce54486f6a458f5 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Tue, 13 Aug 2013 17:54:56 +0000 Subject: Update makeLibCall to return both the call and the chain associated with the libcall instead of just the call. This allows us to specify libcalls that return void. LowerCallTo returns a pair with the return value of the call as the first element and the chain associated with the return value as the second element. If we lower a call that has a void return value, LowerCallTo returns an SDValue with a NULL SDNode and the chain for the call. Thus makeLibCall by just returning the first value makes it impossible for you to set up the chain so that the call is not eliminated as dead code. I also updated all references to makeLibCall to reflect the new return type. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188300 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 44b691b..ec838fb 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1875,7 +1875,7 @@ AArch64TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { SDValue SrcVal = Op.getOperand(0); return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1, - /*isSigned*/ false, SDLoc(Op)); + /*isSigned*/ false, SDLoc(Op)).first; } SDValue -- cgit v1.1 From da0860f78e6e43aca3333a7815b2f9bc0f8dfac0 Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Tue, 13 Aug 2013 20:19:16 +0000 Subject: [Mips] Support for unaligned load/store microMips instructions This includes instructions lwl, lwr, swl and swr. Patch by Zoran Jovnovic git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188312 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp | 16 ++++++++ lib/Target/Mips/MicroMipsInstrFormats.td | 13 +++++++ lib/Target/Mips/MicroMipsInstrInfo.td | 44 +++++++++++++++++++++- lib/Target/Mips/MipsCodeEmitter.cpp | 7 ++++ lib/Target/Mips/MipsISelDAGToDAG.cpp | 6 +++ lib/Target/Mips/MipsISelDAGToDAG.h | 3 ++ lib/Target/Mips/MipsInstrInfo.td | 8 +++- lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 31 +++++++++++++++ lib/Target/Mips/MipsSEISelDAGToDAG.h | 6 +++ 9 files changed, 131 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index d019f05..aa927bd 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -97,6 +97,8 @@ public: unsigned getMemEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups) const; + unsigned getMemEncodingMMImm12(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups) const; unsigned getSizeExtEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups) const; unsigned getSizeInsEncoding(const MCInst &MI, unsigned OpNo, @@ -212,6 +214,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, LowerDextDins(TmpInst); } + unsigned long N = Fixups.size(); uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups); // Check for unimplemented opcodes. @@ -224,6 +227,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, if (STI.getFeatureBits() & Mips::FeatureMicroMips) { int NewOpcode = Mips::Std2MicroMips (Opcode, Mips::Arch_micromips); if (NewOpcode != -1) { + if (Fixups.size() > N) + Fixups.pop_back(); Opcode = NewOpcode; TmpInst.setOpcode (NewOpcode); Binary = getBinaryCodeForInstr(TmpInst, Fixups); @@ -417,6 +422,17 @@ MipsMCCodeEmitter::getMemEncoding(const MCInst &MI, unsigned OpNo, return (OffBits & 0xFFFF) | RegBits; } +unsigned MipsMCCodeEmitter:: +getMemEncodingMMImm12(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups) const { + // Base register is encoded in bits 20-16, offset is encoded in bits 11-0. + assert(MI.getOperand(OpNo).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups) << 16; + unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups); + + return (OffBits & 0x0FFF) | RegBits; +} + unsigned MipsMCCodeEmitter::getSizeExtEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups) const { diff --git a/lib/Target/Mips/MicroMipsInstrFormats.td b/lib/Target/Mips/MicroMipsInstrFormats.td index 665b4d2..7d8c513 100644 --- a/lib/Target/Mips/MicroMipsInstrFormats.td +++ b/lib/Target/Mips/MicroMipsInstrFormats.td @@ -110,3 +110,16 @@ class LW_FM_MM op> : MMArch { let Inst{20-16} = addr{20-16}; let Inst{15-0} = addr{15-0}; } + +class LWL_FM_MM funct> { + bits<5> rt; + bits<21> addr; + + bits<32> Inst; + + let Inst{31-26} = 0x18; + let Inst{25-21} = rt; + let Inst{20-16} = addr{20-16}; + let Inst{15-12} = funct; + let Inst{11-0} = addr{11-0}; +} diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td index 249d712..549d0e2 100644 --- a/lib/Target/Mips/MicroMipsInstrInfo.td +++ b/lib/Target/Mips/MicroMipsInstrInfo.td @@ -1,4 +1,34 @@ -let isCodeGenOnly = 1 in { +def addrimm12 : ComplexPattern; + +def simm12 : Operand { + let DecoderMethod = "DecodeSimm12"; +} + +def mem_mm_12 : Operand { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops GPR32, simm12); + let EncoderMethod = "getMemEncodingMMImm12"; + let ParserMatchClass = MipsMemAsmOperand; + let OperandType = "OPERAND_MEMORY"; +} + +let canFoldAsLoad = 1 in +class LoadLeftRightMM : + InstSE<(outs RO:$rt), (ins MemOpnd:$addr, RO:$src), + !strconcat(opstr, "\t$rt, $addr"), + [(set RO:$rt, (OpNode addrimm12:$addr, RO:$src))], + NoItinerary, FrmI> { + string Constraints = "$src = $rt"; +} + +class StoreLeftRightMM: + InstSE<(outs), (ins RO:$rt, MemOpnd:$addr), + !strconcat(opstr, "\t$rt, $addr"), + [(OpNode RO:$rt, addrimm12:$addr)], NoItinerary, FrmI>; + +let DecoderNamespace = "MicroMips" in { /// Arithmetic Instructions (ALU Immediate) def ADDiu_MM : MMRel, ArithLogicI<"addiu", simm16, GPR32Opnd>, ADDI_FM_MM<0xc>; @@ -64,4 +94,16 @@ let isCodeGenOnly = 1 in { defm SB_MM : StoreM<"sb", GPR32Opnd, truncstorei8>, MMRel, LW_FM_MM<0x6>; defm SH_MM : StoreM<"sh", GPR32Opnd, truncstorei16>, MMRel, LW_FM_MM<0xe>; defm SW_MM : StoreM<"sw", GPR32Opnd>, MMRel, LW_FM_MM<0x3e>; + + /// Load and Store Instructions - unaligned + let Predicates = [InMicroMips] in { + def LWL_MM : LoadLeftRightMM<"lwl", MipsLWL, GPR32Opnd, mem_mm_12>, + LWL_FM_MM<0x0>; + def LWR_MM : LoadLeftRightMM<"lwr", MipsLWR, GPR32Opnd, mem_mm_12>, + LWL_FM_MM<0x1>; + def SWL_MM : StoreLeftRightMM<"swl", MipsSWL, GPR32Opnd, mem_mm_12>, + LWL_FM_MM<0x8>; + def SWR_MM : StoreLeftRightMM<"swr", MipsSWR, GPR32Opnd, mem_mm_12>, + LWL_FM_MM<0x9>; + } } diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp index 813037e..eed1155 100644 --- a/lib/Target/Mips/MipsCodeEmitter.cpp +++ b/lib/Target/Mips/MipsCodeEmitter.cpp @@ -108,6 +108,7 @@ private: unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned OpNo) const; unsigned getMemEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getMemEncodingMMImm12(const MachineInstr &MI, unsigned OpNo) const; unsigned getSizeExtEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const; @@ -201,6 +202,12 @@ unsigned MipsCodeEmitter::getMemEncoding(const MachineInstr &MI, return (getMachineOpValue(MI, MI.getOperand(OpNo+1)) & 0xFFFF) | RegBits; } +unsigned MipsCodeEmitter::getMemEncodingMMImm12(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("Unimplemented function."); + return 0; +} + unsigned MipsCodeEmitter::getSizeExtEncoding(const MachineInstr &MI, unsigned OpNo) const { // size is encoded as size-1. diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 0002a5f..15e43d4 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -81,6 +81,12 @@ bool MipsDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base, return false; } +bool MipsDAGToDAGISel::selectIntAddrMM(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + llvm_unreachable("Unimplemented function."); + return false; +} + bool MipsDAGToDAGISel::selectAddr16(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset, SDValue &Alias) { llvm_unreachable("Unimplemented function."); diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h index cf0f9c5..a137a60 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.h +++ b/lib/Target/Mips/MipsISelDAGToDAG.h @@ -65,6 +65,9 @@ private: virtual bool selectIntAddr(SDValue Addr, SDValue &Base, SDValue &Offset) const; + virtual bool selectIntAddrMM(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset, SDValue &Alias); diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 65601b0..3806b33 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -182,6 +182,10 @@ def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">, def HasStdEnc : Predicate<"Subtarget.hasStandardEncoding()">, AssemblerPredicate<"!FeatureMips16">; def NotDSP : Predicate<"!Subtarget.hasDSP()">; +def InMicroMips : Predicate<"Subtarget.inMicroMipsMode()">, + AssemblerPredicate<"FeatureMicroMips">; +def NotInMicroMips : Predicate<"!Subtarget.inMicroMipsMode()">, + AssemblerPredicate<"!FeatureMicroMips">; class MipsPat : Pat { let Predicates = [HasStdEnc]; @@ -516,7 +520,7 @@ class StoreLeftRight { def NAME : LoadLeftRight, - Requires<[NotN64, HasStdEnc]>; + Requires<[NotN64, HasStdEnc, NotInMicroMips]>; def _P8 : LoadLeftRight, Requires<[IsN64, HasStdEnc]> { let DecoderNamespace = "Mips64"; @@ -526,7 +530,7 @@ multiclass LoadLeftRightM { multiclass StoreLeftRightM { def NAME : StoreLeftRight, - Requires<[NotN64, HasStdEnc]>; + Requires<[NotN64, HasStdEnc, NotInMicroMips]>; def _P8 : StoreLeftRight, Requires<[IsN64, HasStdEnc]> { let DecoderNamespace = "Mips64"; diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 37fbc3d..203196e 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -314,6 +314,37 @@ bool MipsSEDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base, selectAddrDefault(Addr, Base, Offset); } +/// Used on microMIPS Load/Store unaligned instructions (12-bit offset) +bool MipsSEDAGToDAGISel::selectAddrRegImm12(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + EVT ValTy = Addr.getValueType(); + + // Addresses of the form FI+const or FI|const + if (CurDAG->isBaseWithConstantOffset(Addr)) { + ConstantSDNode *CN = dyn_cast(Addr.getOperand(1)); + if (isInt<12>(CN->getSExtValue())) { + + // If the first operand is a FI, get the TargetFI Node + if (FrameIndexSDNode *FIN = dyn_cast + (Addr.getOperand(0))) + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + else + Base = Addr.getOperand(0); + + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); + return true; + } + } + + return false; +} + +bool MipsSEDAGToDAGISel::selectIntAddrMM(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + return selectAddrRegImm12(Addr, Base, Offset) || + selectAddrDefault(Addr, Base, Offset); +} + std::pair MipsSEDAGToDAGISel::selectNode(SDNode *Node) { unsigned Opcode = Node->getOpcode(); SDLoc DL(Node); diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h index 03ed1f9..09a15b0 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.h +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -47,6 +47,12 @@ private: virtual bool selectIntAddr(SDValue Addr, SDValue &Base, SDValue &Offset) const; + virtual bool selectAddrRegImm12(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + virtual bool selectIntAddrMM(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + virtual std::pair selectNode(SDNode *Node); virtual void processFunctionAfterISel(MachineFunction &MF); -- cgit v1.1 From 3f70e908c3d9de7acea462719ebf36dca1560f9c Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Tue, 13 Aug 2013 20:54:07 +0000 Subject: [Mips][msa] Added initial MSA support. * msa SubtargetFeature * registers * ld.[bhwd], and st.[bhwd] instructions Does not correctly prohibit use of both 32-bit FPU registers and MSA together. Patch by Daniel Sanders git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188313 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips.td | 2 + lib/Target/Mips/MipsInstrInfo.td | 4 ++ lib/Target/Mips/MipsMSAInstrFormats.td | 34 +++++++++++++++++ lib/Target/Mips/MipsMSAInstrInfo.td | 69 ++++++++++++++++++++++++++++++++++ lib/Target/Mips/MipsRegisterInfo.td | 45 ++++++++++++++++++++++ lib/Target/Mips/MipsSEISelLowering.cpp | 17 +++++++++ lib/Target/Mips/MipsSubtarget.cpp | 2 +- lib/Target/Mips/MipsSubtarget.h | 4 ++ 8 files changed, 176 insertions(+), 1 deletion(-) create mode 100644 lib/Target/Mips/MipsMSAInstrFormats.td create mode 100644 lib/Target/Mips/MipsMSAInstrInfo.td (limited to 'lib/Target') diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index 2595e41..b8e3f39 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -78,6 +78,8 @@ def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", "Mips DSP ASE">; def FeatureDSPR2 : SubtargetFeature<"dspr2", "HasDSPR2", "true", "Mips DSP-R2 ASE", [FeatureDSP]>; +def FeatureMSA : SubtargetFeature<"msa", "HasMSA", "true", "Mips MSA ASE">; + def FeatureMicroMips : SubtargetFeature<"micromips", "InMicroMipsMode", "true", "microMips mode">; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 3806b33..abc2d16 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -1420,6 +1420,10 @@ include "Mips16InstrInfo.td" include "MipsDSPInstrFormats.td" include "MipsDSPInstrInfo.td" +// MSA +include "MipsMSAInstrFormats.td" +include "MipsMSAInstrInfo.td" + // Micromips include "MicroMipsInstrFormats.td" include "MicroMipsInstrInfo.td" diff --git a/lib/Target/Mips/MipsMSAInstrFormats.td b/lib/Target/Mips/MipsMSAInstrFormats.td new file mode 100644 index 0000000..7cc1a88 --- /dev/null +++ b/lib/Target/Mips/MipsMSAInstrFormats.td @@ -0,0 +1,34 @@ +//===- MipsMSAInstrFormats.td - Mips Instruction Formats ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +def HasMSA : Predicate<"Subtarget.hasMSA()">, + AssemblerPredicate<"FeatureMSA">; + +class MSAInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther> { + let Predicates = [HasMSA]; + let Inst{31-26} = 0b011110; +} + +class PseudoMSA pattern, + InstrItinClass itin = IIPseudo>: + MipsPseudo { + let Predicates = [HasMSA]; +} + +class MSA_3R_FMT major, bits<2> df, bits<6> minor>: MSAInst { + let Inst{25-23} = major; + let Inst{22-21} = df; + let Inst{5-0} = minor; +} + +class MSA_I5_FMT major, bits<2> df, bits<6> minor>: MSAInst { + let Inst{25-23} = major; + let Inst{22-21} = df; + let Inst{5-0} = minor; +} diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td new file mode 100644 index 0000000..24482c0 --- /dev/null +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -0,0 +1,69 @@ +//===- MipsMSAInstrInfo.td - MSA ASE instructions -*- tablegen ------------*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes Mips MSA ASE instructions. +// +//===----------------------------------------------------------------------===// + +// Instruction encoding. +class LD_B_ENC : MSA_I5_FMT<0b110, 0b00, 0b000111>; +class LD_H_ENC : MSA_I5_FMT<0b110, 0b01, 0b000111>; +class LD_W_ENC : MSA_I5_FMT<0b110, 0b10, 0b000111>; +class LD_D_ENC : MSA_I5_FMT<0b110, 0b11, 0b000111>; +class ST_B_ENC : MSA_I5_FMT<0b111, 0b00, 0b000111>; +class ST_H_ENC : MSA_I5_FMT<0b111, 0b01, 0b000111>; +class ST_W_ENC : MSA_I5_FMT<0b111, 0b10, 0b000111>; +class ST_D_ENC : MSA_I5_FMT<0b111, 0b11, 0b000111>; + +// Instruction desc. +class LD_DESC_BASE { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins MemOpnd:$addr); + string AsmString = !strconcat(instr_asm, "\t$wd, $addr"); + list Pattern = [(set RCWD:$wd, (TyNode (OpNode Addr:$addr)))]; + InstrItinClass Itinerary = itin; +} + +class ST_DESC_BASE { + dag OutOperandList = (outs); + dag InOperandList = (ins RCWD:$wd, MemOpnd:$addr); + string AsmString = !strconcat(instr_asm, "\t$wd, $addr"); + list Pattern = [(OpNode (TyNode RCWD:$wd), Addr:$addr)]; + InstrItinClass Itinerary = itin; +} + +// Load/Store +class LD_B_DESC : LD_DESC_BASE<"ld.b", load, v16i8, NoItinerary, MSA128>; +class LD_H_DESC : LD_DESC_BASE<"ld.h", load, v8i16, NoItinerary, MSA128>; +class LD_W_DESC : LD_DESC_BASE<"ld.w", load, v4i32, NoItinerary, MSA128>; +class LD_D_DESC : LD_DESC_BASE<"ld.d", load, v2i64, NoItinerary, MSA128>; +class ST_B_DESC : ST_DESC_BASE<"st.b", store, v16i8, NoItinerary, MSA128>; +class ST_H_DESC : ST_DESC_BASE<"st.h", store, v8i16, NoItinerary, MSA128>; +class ST_W_DESC : ST_DESC_BASE<"st.w", store, v4i32, NoItinerary, MSA128>; +class ST_D_DESC : ST_DESC_BASE<"st.d", store, v2i64, NoItinerary, MSA128>; + +// Instruction defs. +def LD_B: LD_B_ENC, LD_B_DESC, Requires<[HasMSA]>; +def LD_H: LD_H_ENC, LD_H_DESC, Requires<[HasMSA]>; +def LD_W: LD_W_ENC, LD_W_DESC, Requires<[HasMSA]>; +def LD_D: LD_D_ENC, LD_D_DESC, Requires<[HasMSA]>; + +def ST_B: ST_B_ENC, ST_B_DESC, Requires<[HasMSA]>; +def ST_H: ST_H_ENC, ST_H_DESC, Requires<[HasMSA]>; +def ST_W: ST_W_ENC, ST_W_DESC, Requires<[HasMSA]>; +def ST_D: ST_D_ENC, ST_D_DESC, Requires<[HasMSA]>; + +// Patterns. +class MSAPat : + Pat, Requires<[pred]>; + diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index aa5d39c..adc0a79 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -14,6 +14,7 @@ let Namespace = "Mips" in { def sub_fpeven : SubRegIndex<32>; def sub_fpodd : SubRegIndex<32, 32>; def sub_32 : SubRegIndex<32>; +def sub_64 : SubRegIndex<64>; def sub_lo : SubRegIndex<32>; def sub_hi : SubRegIndex<32, 32>; def sub_dsp16_19 : SubRegIndex<4, 16>; @@ -63,6 +64,12 @@ class AFPR64 Enc, string n, list subregs> let SubRegIndices = [sub_32]; } +// Mips 128-bit (aliased) MSA Registers +class AFPR128 Enc, string n, list subregs> + : MipsRegWithSubRegs { + let SubRegIndices = [sub_64]; +} + // Accumulator Registers class ACCReg Enc, string n, list subregs> : MipsRegWithSubRegs { @@ -162,6 +169,41 @@ let Namespace = "Mips" in { def D#I#_64 : AFPR64("F"#I)]>, DwarfRegNum<[!add(I, 32)]>; + /// Mips MSA registers + /// MSA and FPU cannot both be present unless the FPU has 64-bit registers + def W0 : AFPR128<0, "w0", [D0_64]>, DwarfRegNum<[32]>; + def W1 : AFPR128<1, "w1", [D1_64]>, DwarfRegNum<[33]>; + def W2 : AFPR128<2, "w2", [D2_64]>, DwarfRegNum<[34]>; + def W3 : AFPR128<3, "w3", [D3_64]>, DwarfRegNum<[35]>; + def W4 : AFPR128<4, "w4", [D4_64]>, DwarfRegNum<[36]>; + def W5 : AFPR128<5, "w5", [D5_64]>, DwarfRegNum<[37]>; + def W6 : AFPR128<6, "w6", [D6_64]>, DwarfRegNum<[38]>; + def W7 : AFPR128<7, "w7", [D7_64]>, DwarfRegNum<[39]>; + def W8 : AFPR128<8, "w8", [D8_64]>, DwarfRegNum<[40]>; + def W9 : AFPR128<9, "w9", [D9_64]>, DwarfRegNum<[41]>; + def W10 : AFPR128<10, "w10", [D10_64]>, DwarfRegNum<[42]>; + def W11 : AFPR128<11, "w11", [D11_64]>, DwarfRegNum<[43]>; + def W12 : AFPR128<12, "w12", [D12_64]>, DwarfRegNum<[44]>; + def W13 : AFPR128<13, "w13", [D13_64]>, DwarfRegNum<[45]>; + def W14 : AFPR128<14, "w14", [D14_64]>, DwarfRegNum<[46]>; + def W15 : AFPR128<15, "w15", [D15_64]>, DwarfRegNum<[47]>; + def W16 : AFPR128<16, "w16", [D16_64]>, DwarfRegNum<[48]>; + def W17 : AFPR128<17, "w17", [D17_64]>, DwarfRegNum<[49]>; + def W18 : AFPR128<18, "w18", [D18_64]>, DwarfRegNum<[50]>; + def W19 : AFPR128<19, "w19", [D19_64]>, DwarfRegNum<[51]>; + def W20 : AFPR128<20, "w20", [D20_64]>, DwarfRegNum<[52]>; + def W21 : AFPR128<21, "w21", [D21_64]>, DwarfRegNum<[53]>; + def W22 : AFPR128<22, "w22", [D22_64]>, DwarfRegNum<[54]>; + def W23 : AFPR128<23, "w23", [D23_64]>, DwarfRegNum<[55]>; + def W24 : AFPR128<24, "w24", [D24_64]>, DwarfRegNum<[56]>; + def W25 : AFPR128<25, "w25", [D25_64]>, DwarfRegNum<[57]>; + def W26 : AFPR128<26, "w26", [D26_64]>, DwarfRegNum<[58]>; + def W27 : AFPR128<27, "w27", [D27_64]>, DwarfRegNum<[59]>; + def W28 : AFPR128<28, "w28", [D28_64]>, DwarfRegNum<[60]>; + def W29 : AFPR128<29, "w29", [D29_64]>, DwarfRegNum<[61]>; + def W30 : AFPR128<30, "w30", [D30_64]>, DwarfRegNum<[62]>; + def W31 : AFPR128<31, "w31", [D31_64]>, DwarfRegNum<[63]>; + // Hi/Lo registers def HI : Register<"ac0">, DwarfRegNum<[64]>; def HI1 : Register<"ac1">, DwarfRegNum<[176]>; @@ -302,6 +344,9 @@ def CCR : RegisterClass<"Mips", [i32], 32, (sequence "FCR%u", 0, 31)>, def FCC : RegisterClass<"Mips", [i32], 32, (sequence "FCC%u", 0, 7)>, Unallocatable; +def MSA128: RegisterClass<"Mips", [v16i8, v8i16, v4i32, v2i64], 128, + (sequence "W%u", 0, 31)>; + // Hi/Lo Registers def LORegs : RegisterClass<"Mips", [i32], 32, (add LO)>; def HIRegs : RegisterClass<"Mips", [i32], 32, (add HI)>; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index a0aacb5..979cfcb 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -77,6 +77,23 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) if (Subtarget->hasDSPR2()) setOperationAction(ISD::MUL, MVT::v2i16, Legal); + if (Subtarget->hasMSA()) { + MVT::SimpleValueType VecTys[4] = {MVT::v16i8, MVT::v8i16, + MVT::v4i32, MVT::v2i64}; + + for (unsigned i = 0; i < array_lengthof(VecTys); ++i) { + addRegisterClass(VecTys[i], &Mips::MSA128RegClass); + + // Expand all builtin opcodes. + for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) + setOperationAction(Opc, VecTys[i], Expand); + + setOperationAction(ISD::LOAD, VecTys[i], Legal); + setOperationAction(ISD::STORE, VecTys[i], Legal); + setOperationAction(ISD::BITCAST, VecTys[i], Legal); + } + } + if (!TM.Options.UseSoftFloat) { addRegisterClass(MVT::f32, &Mips::FGR32RegClass); diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 541e2ca..e2e7e08 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -65,7 +65,7 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, HasBitCount(false), HasFPIdx(false), InMips16Mode(false), InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false), HasDSPR2(false), - AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), + AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), HasMSA(false), RM(_RM), OverrideMode(NoOverride), TM(_TM) { std::string CPUName = CPU; diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index bfb13bb..21d6938 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -113,6 +113,9 @@ protected: // compiled as Mips32 bool Os16; + // HasMSA -- supports MSA ASE. + bool HasMSA; + InstrItineraryData InstrItins; // The instance to the register info section object @@ -182,6 +185,7 @@ public: bool inMicroMipsMode() const { return InMicroMipsMode; } bool hasDSP() const { return HasDSP; } bool hasDSPR2() const { return HasDSPR2; } + bool hasMSA() const { return HasMSA; } bool isLinux() const { return IsLinux; } bool useSmallSection() const { return UseSmallSection; } -- cgit v1.1 From bfb07b1054b653661306848e695b34e79289a15b Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Wed, 14 Aug 2013 00:21:25 +0000 Subject: [mips] Properly parse registers that appear in inline-asm constraints. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188336 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 81 ++++++++++++++++++++++++++++++++++++ lib/Target/Mips/MipsISelLowering.h | 5 +++ 2 files changed, 86 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index a62e84f..a3bddf9 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -34,6 +34,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include using namespace llvm; @@ -2880,6 +2881,79 @@ MipsTargetLowering::getSingleConstraintMatchWeight( return weight; } +/// This is a helper function to parse a physical register string and split it +/// into non-numeric and numeric parts (Prefix and Reg). The first boolean flag +/// that is returned indicates whether parsing was successful. The second flag +/// is true if the numeric part exists. +static std::pair +parsePhysicalReg(const StringRef &C, std::string &Prefix, + unsigned long long &Reg) { + if (C.front() != '{' || C.back() != '}') + return std::make_pair(false, false); + + // Search for the first numeric character. + StringRef::const_iterator I, B = C.begin() + 1, E = C.end() - 1; + I = std::find_if(B, E, std::ptr_fun(isdigit)); + + Prefix.assign(B, I - B); + + // The second flag is set to false if no numeric characters were found. + if (I == E) + return std::make_pair(true, false); + + // Parse the numeric characters. + return std::make_pair(!getAsUnsignedInteger(StringRef(I, E - I), 10, Reg), + true); +} + +std::pair MipsTargetLowering:: +parseRegForInlineAsmConstraint(const StringRef &C, MVT VT) const { + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const TargetRegisterClass *RC; + std::string Prefix; + unsigned long long Reg; + + std::pair R = parsePhysicalReg(C, Prefix, Reg); + + if (!R.first) + return std::make_pair((unsigned)0, (const TargetRegisterClass*)0); + + if ((Prefix == "hi" || Prefix == "lo")) { // Parse hi/lo. + // No numeric characters follow "hi" or "lo". + if (R.second) + return std::make_pair((unsigned)0, (const TargetRegisterClass*)0); + + RC = TRI->getRegClass(Prefix == "hi" ? + Mips::HIRegsRegClassID : Mips::LORegsRegClassID); + return std::make_pair(*(RC->begin()), RC); + } + + if (!R.second) + return std::make_pair((unsigned)0, (const TargetRegisterClass*)0); + + if (Prefix == "$f") { // Parse $f0-$f31. + // If the size of FP registers is 64-bit or Reg is an even number, select + // the 64-bit register class. Otherwise, select the 32-bit register class. + if (VT == MVT::Other) + VT = (Subtarget->isFP64bit() || !(Reg % 2)) ? MVT::f64 : MVT::f32; + + RC= getRegClassFor(VT); + + if (RC == &Mips::AFGR64RegClass) { + assert(Reg % 2 == 0); + Reg >>= 1; + } + } else if (Prefix == "$fcc") { // Parse $fcc0-$fcc7. + RC = TRI->getRegClass(Mips::FCCRegClassID); + } else { // Parse $0-$31. + assert(Prefix == "$"); + RC = getRegClassFor((VT == MVT::Other) ? MVT::i32 : VT); + } + + assert(Reg < RC->getNumRegs()); + return std::make_pair(*(RC->begin() + Reg), RC); +} + /// Given a register class constraint, like 'r', if this corresponds directly /// to an LLVM register class, return a register of 0 and the register class /// pointer. @@ -2926,6 +3000,13 @@ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const return std::make_pair(0u, static_cast(0)); } } + + std::pair R; + R = parseRegForInlineAsmConstraint(Constraint, VT); + + if (R.second) + return R; + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 123a2a6..686a382 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -433,6 +433,11 @@ namespace llvm { ConstraintWeight getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const; + /// This function parses registers that appear in inline-asm constraints. + /// It returns pair (0, 0) on failure. + std::pair + parseRegForInlineAsmConstraint(const StringRef &C, MVT VT) const; + std::pair getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const; -- cgit v1.1 From cbaf6d0cc3d3f363f269346817a90d3cbc8d1084 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Wed, 14 Aug 2013 00:47:08 +0000 Subject: [mips] Rename HIRegs and LORegs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188341 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Disassembler/MipsDisassembler.cpp | 36 +++++++++++------------ lib/Target/Mips/MicroMipsInstrInfo.td | 4 +-- lib/Target/Mips/Mips16InstrInfo.cpp | 4 +-- lib/Target/Mips/Mips16InstrInfo.td | 16 +++++----- lib/Target/Mips/Mips64InstrInfo.td | 16 +++++----- lib/Target/Mips/MipsDSPInstrInfo.td | 8 ++--- lib/Target/Mips/MipsISelLowering.cpp | 10 +++---- lib/Target/Mips/MipsInstrInfo.td | 20 ++++++------- lib/Target/Mips/MipsRegisterInfo.td | 29 +++++++++--------- lib/Target/Mips/MipsSEInstrInfo.cpp | 24 +++++++-------- 10 files changed, 83 insertions(+), 84 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index d4f3109..fbe9309 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -143,15 +143,15 @@ static DecodeStatus DecodeACC64DSPRegisterClass(MCInst &Inst, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeHIRegsDSPRegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder); +static DecodeStatus DecodeHI32DSPRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); -static DecodeStatus DecodeLORegsDSPRegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder); +static DecodeStatus DecodeLO32DSPRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); static DecodeStatus DecodeBranchTarget(MCInst &Inst, unsigned Offset, @@ -489,26 +489,26 @@ static DecodeStatus DecodeACC64DSPRegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeHIRegsDSPRegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeHI32DSPRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { if (RegNo >= 4) return MCDisassembler::Fail; - unsigned Reg = getReg(Decoder, Mips::HIRegsDSPRegClassID, RegNo); + unsigned Reg = getReg(Decoder, Mips::HI32DSPRegClassID, RegNo); Inst.addOperand(MCOperand::CreateReg(Reg)); return MCDisassembler::Success; } -static DecodeStatus DecodeLORegsDSPRegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeLO32DSPRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { if (RegNo >= 4) return MCDisassembler::Fail; - unsigned Reg = getReg(Decoder, Mips::LORegsDSPRegClassID, RegNo); + unsigned Reg = getReg(Decoder, Mips::LO32DSPRegClassID, RegNo); Inst.addOperand(MCOperand::CreateReg(Reg)); return MCDisassembler::Success; } diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td index 549d0e2..bf07d84 100644 --- a/lib/Target/Mips/MicroMipsInstrInfo.td +++ b/lib/Target/Mips/MicroMipsInstrInfo.td @@ -62,9 +62,9 @@ let DecoderNamespace = "MicroMips" in { def XOR_MM : MMRel, ArithLogicR<"xor", GPR32Opnd, 1, IIAlu, xor>, ADD_FM_MM<0, 0x310>; def NOR_MM : MMRel, LogicNOR<"nor", GPR32Opnd>, ADD_FM_MM<0, 0x2d0>; - def MULT_MM : MMRel, Mult<"mult", IIImul, GPR32Opnd, [HI, LO]>, + def MULT_MM : MMRel, Mult<"mult", IIImul, GPR32Opnd, [HI0, LO0]>, MULT_FM_MM<0x22c>; - def MULTu_MM : MMRel, Mult<"multu", IIImul, GPR32Opnd, [HI, LO]>, + def MULTu_MM : MMRel, Mult<"multu", IIImul, GPR32Opnd, [HI0, LO0]>, MULT_FM_MM<0x26c>; /// Shift Instructions diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index 05e70ab..887aa80 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -77,11 +77,11 @@ void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB, else if (Mips::GPR32RegClass.contains(DestReg) && Mips::CPU16RegsRegClass.contains(SrcReg)) Opc = Mips::Move32R16; - else if ((SrcReg == Mips::HI) && + else if ((SrcReg == Mips::HI0) && (Mips::CPU16RegsRegClass.contains(DestReg))) Opc = Mips::Mfhi16, SrcReg = 0; - else if ((SrcReg == Mips::LO) && + else if ((SrcReg == Mips::LO0) && (Mips::CPU16RegsRegClass.contains(DestReg))) Opc = Mips::Mflo16, SrcReg = 0; diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index da4b221..eb7d957 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -665,7 +665,7 @@ def CmpiRxImmX16: FEXT_RI16R_ins<0b01110, "cmpi", IIAlu> { // To divide 32-bit signed integers. // def DivRxRy16: FRR16_div_ins<0b11010, "div", IIAlu> { - let Defs = [HI, LO]; + let Defs = [HI0, LO0]; } // @@ -674,7 +674,7 @@ def DivRxRy16: FRR16_div_ins<0b11010, "div", IIAlu> { // To divide 32-bit unsigned integers. // def DivuRxRy16: FRR16_div_ins<0b11011, "divu", IIAlu> { - let Defs = [HI, LO]; + let Defs = [HI0, LO0]; } // // Format: JAL target MIPS16e @@ -805,7 +805,7 @@ def MoveR3216: FI8_MOVR3216_ins<"move", IIAlu>; // To copy the special purpose HI register to a GPR. // def Mfhi16: FRR16_M_ins<0b10000, "mfhi", IIAlu> { - let Uses = [HI]; + let Uses = [HI0]; let neverHasSideEffects = 1; } @@ -815,7 +815,7 @@ def Mfhi16: FRR16_M_ins<0b10000, "mfhi", IIAlu> { // To copy the special purpose LO register to a GPR. // def Mflo16: FRR16_M_ins<0b10010, "mflo", IIAlu> { - let Uses = [LO]; + let Uses = [LO0]; let neverHasSideEffects = 1; } @@ -825,13 +825,13 @@ def Mflo16: FRR16_M_ins<0b10010, "mflo", IIAlu> { def MultRxRy16: FMULT16_ins<"mult", IIAlu> { let isCommutable = 1; let neverHasSideEffects = 1; - let Defs = [HI, LO]; + let Defs = [HI0, LO0]; } def MultuRxRy16: FMULT16_ins<"multu", IIAlu> { let isCommutable = 1; let neverHasSideEffects = 1; - let Defs = [HI, LO]; + let Defs = [HI0, LO0]; } // @@ -842,7 +842,7 @@ def MultuRxRy16: FMULT16_ins<"multu", IIAlu> { def MultRxRyRz16: FMULT16_LO_ins<"mult", IIAlu> { let isCommutable = 1; let neverHasSideEffects = 1; - let Defs = [HI, LO]; + let Defs = [HI0, LO0]; } // @@ -853,7 +853,7 @@ def MultRxRyRz16: FMULT16_LO_ins<"mult", IIAlu> { def MultuRxRyRz16: FMULT16_LO_ins<"multu", IIAlu> { let isCommutable = 1; let neverHasSideEffects = 1; - let Defs = [HI, LO]; + let Defs = [HI0, LO0]; } // diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 226d241..9e164fb 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -187,26 +187,26 @@ def TAILCALL64_R : JumpFR, MTLO_FM<8>, IsTailCall; } /// Multiply and Divide Instructions. -def DMULT : Mult<"dmult", IIImult, GPR64Opnd, [HI64, LO64]>, +def DMULT : Mult<"dmult", IIImult, GPR64Opnd, [HI0_64, LO0_64]>, MULT_FM<0, 0x1c>; -def DMULTu : Mult<"dmultu", IIImult, GPR64Opnd, [HI64, LO64]>, +def DMULTu : Mult<"dmultu", IIImult, GPR64Opnd, [HI0_64, LO0_64]>, MULT_FM<0, 0x1d>; def PseudoDMULT : MultDivPseudo; def PseudoDMULTu : MultDivPseudo; -def DSDIV : Div<"ddiv", IIIdiv, GPR64Opnd, [HI64, LO64]>, MULT_FM<0, 0x1e>; -def DUDIV : Div<"ddivu", IIIdiv, GPR64Opnd, [HI64, LO64]>, MULT_FM<0, 0x1f>; +def DSDIV : Div<"ddiv", IIIdiv, GPR64Opnd, [HI0_64, LO0_64]>, MULT_FM<0, 0x1e>; +def DUDIV : Div<"ddivu", IIIdiv, GPR64Opnd, [HI0_64, LO0_64]>, MULT_FM<0, 0x1f>; def PseudoDSDIV : MultDivPseudo; def PseudoDUDIV : MultDivPseudo; let isCodeGenOnly = 1 in { -def MTHI64 : MoveToLOHI<"mthi", GPR64Opnd, [HI64]>, MTLO_FM<0x11>; -def MTLO64 : MoveToLOHI<"mtlo", GPR64Opnd, [LO64]>, MTLO_FM<0x13>; -def MFHI64 : MoveFromLOHI<"mfhi", GPR64Opnd, [HI64]>, MFLO_FM<0x10>; -def MFLO64 : MoveFromLOHI<"mflo", GPR64Opnd, [LO64]>, MFLO_FM<0x12>; +def MTHI64 : MoveToLOHI<"mthi", GPR64Opnd, [HI0_64]>, MTLO_FM<0x11>; +def MTLO64 : MoveToLOHI<"mtlo", GPR64Opnd, [LO0_64]>, MTLO_FM<0x13>; +def MFHI64 : MoveFromLOHI<"mfhi", GPR64Opnd, [HI0_64]>, MFLO_FM<0x10>; +def MFLO64 : MoveFromLOHI<"mflo", GPR64Opnd, [LO0_64]>, MFLO_FM<0x12>; /// Sign Ext In Register Instructions. def SEB64 : SignExtInReg<"seb", i8, GPR64Opnd>, SEB_FM<0x10, 0x20>; diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td index ccd8f3e..3bcd585 100644 --- a/lib/Target/Mips/MipsDSPInstrInfo.td +++ b/lib/Target/Mips/MipsDSPInstrInfo.td @@ -737,10 +737,10 @@ class MAQ_SA_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phr", MipsMAQ_SA_W_PHR>, Defs<[DSPOutFlag16_19]>; // Move from/to hi/lo. -class MFHI_DESC : MFHI_DESC_BASE<"mfhi", HIRegsDSP, NoItinerary>; -class MFLO_DESC : MFHI_DESC_BASE<"mflo", LORegsDSP, NoItinerary>; -class MTHI_DESC : MTHI_DESC_BASE<"mthi", HIRegsDSP, NoItinerary>; -class MTLO_DESC : MTHI_DESC_BASE<"mtlo", LORegsDSP, NoItinerary>; +class MFHI_DESC : MFHI_DESC_BASE<"mfhi", HI32DSP, NoItinerary>; +class MFLO_DESC : MFHI_DESC_BASE<"mflo", LO32DSP, NoItinerary>; +class MTHI_DESC : MTHI_DESC_BASE<"mthi", HI32DSP, NoItinerary>; +class MTLO_DESC : MTHI_DESC_BASE<"mtlo", LO32DSP, NoItinerary>; // Dot product with accumulate/subtract class DPAU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbl", MipsDPAU_H_QBL>; diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index a3bddf9..377236a 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -425,8 +425,8 @@ static SDValue performDivRemCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); EVT Ty = N->getValueType(0); - unsigned LO = (Ty == MVT::i32) ? Mips::LO : Mips::LO64; - unsigned HI = (Ty == MVT::i32) ? Mips::HI : Mips::HI64; + unsigned LO = (Ty == MVT::i32) ? Mips::LO0 : Mips::LO0_64; + unsigned HI = (Ty == MVT::i32) ? Mips::HI0 : Mips::HI0_64; unsigned Opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem16 : MipsISD::DivRemU16; SDLoc DL(N); @@ -2924,7 +2924,7 @@ parseRegForInlineAsmConstraint(const StringRef &C, MVT VT) const { return std::make_pair((unsigned)0, (const TargetRegisterClass*)0); RC = TRI->getRegClass(Prefix == "hi" ? - Mips::HIRegsRegClassID : Mips::LORegsRegClassID); + Mips::HI32RegClassID : Mips::LO32RegClassID); return std::make_pair(*(RC->begin()), RC); } @@ -2992,8 +2992,8 @@ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const return std::make_pair((unsigned)Mips::T9_64, &Mips::GPR64RegClass); case 'l': // register suitable for indirect jump if (VT == MVT::i32) - return std::make_pair((unsigned)Mips::LO, &Mips::LORegsRegClass); - return std::make_pair((unsigned)Mips::LO64, &Mips::LORegs64RegClass); + return std::make_pair((unsigned)Mips::LO0, &Mips::LO32RegClass); + return std::make_pair((unsigned)Mips::LO0_64, &Mips::LO64RegClass); case 'x': // register suitable for indirect jump // Fixme: Not triggering the use of both hi and low // This will generate an error message diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index abc2d16..81dbd42 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -409,8 +409,8 @@ class ArithLogicI : InstSE<(outs), (ins GPR32Opnd:$rs, GPR32Opnd:$rt), !strconcat(opstr, "\t$rs, $rt"), [], IIImult, FrmR> { - let Defs = [HI, LO]; - let Uses = [HI, LO]; + let Defs = [HI0, LO0]; + let Uses = [HI0, LO0]; let isCommutable = isComm; } @@ -1042,23 +1042,23 @@ let Uses = [V0, V1], isTerminator = 1, isReturn = 1, isBarrier = 1 in { } /// Multiply and Divide Instructions. -def MULT : MMRel, Mult<"mult", IIImult, GPR32Opnd, [HI, LO]>, +def MULT : MMRel, Mult<"mult", IIImult, GPR32Opnd, [HI0, LO0]>, MULT_FM<0, 0x18>; -def MULTu : MMRel, Mult<"multu", IIImult, GPR32Opnd, [HI, LO]>, +def MULTu : MMRel, Mult<"multu", IIImult, GPR32Opnd, [HI0, LO0]>, MULT_FM<0, 0x19>; def PseudoMULT : MultDivPseudo; def PseudoMULTu : MultDivPseudo; -def SDIV : Div<"div", IIIdiv, GPR32Opnd, [HI, LO]>, MULT_FM<0, 0x1a>; -def UDIV : Div<"divu", IIIdiv, GPR32Opnd, [HI, LO]>, MULT_FM<0, 0x1b>; +def SDIV : Div<"div", IIIdiv, GPR32Opnd, [HI0, LO0]>, MULT_FM<0, 0x1a>; +def UDIV : Div<"divu", IIIdiv, GPR32Opnd, [HI0, LO0]>, MULT_FM<0, 0x1b>; def PseudoSDIV : MultDivPseudo; def PseudoUDIV : MultDivPseudo; -def MTHI : MoveToLOHI<"mthi", GPR32Opnd, [HI]>, MTLO_FM<0x11>; -def MTLO : MoveToLOHI<"mtlo", GPR32Opnd, [LO]>, MTLO_FM<0x13>; -def MFHI : MoveFromLOHI<"mfhi", GPR32Opnd, [HI]>, MFLO_FM<0x10>; -def MFLO : MoveFromLOHI<"mflo", GPR32Opnd, [LO]>, MFLO_FM<0x12>; +def MTHI : MoveToLOHI<"mthi", GPR32Opnd, [HI0]>, MTLO_FM<0x11>; +def MTLO : MoveToLOHI<"mtlo", GPR32Opnd, [LO0]>, MTLO_FM<0x13>; +def MFHI : MoveFromLOHI<"mfhi", GPR32Opnd, [HI0]>, MFLO_FM<0x10>; +def MFLO : MoveFromLOHI<"mflo", GPR32Opnd, [LO0]>, MFLO_FM<0x12>; /// Sign Ext In Register Instructions. def SEB : SignExtInReg<"seb", i8, GPR32Opnd>, SEB_FM<0x10, 0x20>; diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index adc0a79..a3f7d95 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -205,18 +205,18 @@ let Namespace = "Mips" in { def W31 : AFPR128<31, "w31", [D31_64]>, DwarfRegNum<[63]>; // Hi/Lo registers - def HI : Register<"ac0">, DwarfRegNum<[64]>; + def HI0 : Register<"ac0">, DwarfRegNum<[64]>; def HI1 : Register<"ac1">, DwarfRegNum<[176]>; def HI2 : Register<"ac2">, DwarfRegNum<[178]>; def HI3 : Register<"ac3">, DwarfRegNum<[180]>; - def LO : Register<"ac0">, DwarfRegNum<[65]>; + def LO0 : Register<"ac0">, DwarfRegNum<[65]>; def LO1 : Register<"ac1">, DwarfRegNum<[177]>; def LO2 : Register<"ac2">, DwarfRegNum<[179]>; def LO3 : Register<"ac3">, DwarfRegNum<[181]>; let SubRegIndices = [sub_32] in { - def HI64 : RegisterWithSubRegs<"hi", [HI]>; - def LO64 : RegisterWithSubRegs<"lo", [LO]>; + def HI0_64 : RegisterWithSubRegs<"hi", [HI0]>; + def LO0_64 : RegisterWithSubRegs<"lo", [LO0]>; } // FP control registers. @@ -234,12 +234,11 @@ let Namespace = "Mips" in { def HWR29 : MipsReg<29, "29">; // Accum registers - def AC0 : ACCReg<0, "ac0", [LO, HI]>; - def AC1 : ACCReg<1, "ac1", [LO1, HI1]>; - def AC2 : ACCReg<2, "ac2", [LO2, HI2]>; - def AC3 : ACCReg<3, "ac3", [LO3, HI3]>; + foreach I = 0-3 in + def AC#I : ACCReg<#I, "ac"#I, + [!cast("LO"#I), !cast("HI"#I)]>; - def AC0_64 : ACCReg<0, "ac0", [LO64, HI64]>; + def AC0_64 : ACCReg<0, "ac0", [LO0_64, HI0_64]>; // DSP-ASE control register fields. def DSPPos : Register<"">; @@ -348,12 +347,12 @@ def MSA128: RegisterClass<"Mips", [v16i8, v8i16, v4i32, v2i64], 128, (sequence "W%u", 0, 31)>; // Hi/Lo Registers -def LORegs : RegisterClass<"Mips", [i32], 32, (add LO)>; -def HIRegs : RegisterClass<"Mips", [i32], 32, (add HI)>; -def LORegsDSP : RegisterClass<"Mips", [i32], 32, (add LO, LO1, LO2, LO3)>; -def HIRegsDSP : RegisterClass<"Mips", [i32], 32, (add HI, HI1, HI2, HI3)>; -def LORegs64 : RegisterClass<"Mips", [i64], 64, (add LO64)>; -def HIRegs64 : RegisterClass<"Mips", [i64], 64, (add HI64)>; +def LO32 : RegisterClass<"Mips", [i32], 32, (add LO0)>; +def HI32 : RegisterClass<"Mips", [i32], 32, (add HI0)>; +def LO32DSP : RegisterClass<"Mips", [i32], 32, (sequence "LO%u", 0, 3)>; +def HI32DSP : RegisterClass<"Mips", [i32], 32, (sequence "HI%u", 0, 3)>; +def LO64 : RegisterClass<"Mips", [i64], 64, (add LO0_64)>; +def HI64 : RegisterClass<"Mips", [i64], 64, (add HI0_64)>; // Hardware registers def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>, Unallocatable; diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index b2c6caa..24a6836 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -101,13 +101,13 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = Mips::CFC1; else if (Mips::FGR32RegClass.contains(SrcReg)) Opc = Mips::MFC1; - else if (Mips::HIRegsRegClass.contains(SrcReg)) + else if (Mips::HI32RegClass.contains(SrcReg)) Opc = Mips::MFHI, SrcReg = 0; - else if (Mips::LORegsRegClass.contains(SrcReg)) + else if (Mips::LO32RegClass.contains(SrcReg)) Opc = Mips::MFLO, SrcReg = 0; - else if (Mips::HIRegsDSPRegClass.contains(SrcReg)) + else if (Mips::HI32DSPRegClass.contains(SrcReg)) Opc = Mips::MFHI_DSP; - else if (Mips::LORegsDSPRegClass.contains(SrcReg)) + else if (Mips::LO32DSPRegClass.contains(SrcReg)) Opc = Mips::MFLO_DSP; else if (Mips::DSPCCRegClass.contains(SrcReg)) { BuildMI(MBB, I, DL, get(Mips::RDDSP), DestReg).addImm(1 << 4) @@ -120,13 +120,13 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = Mips::CTC1; else if (Mips::FGR32RegClass.contains(DestReg)) Opc = Mips::MTC1; - else if (Mips::HIRegsRegClass.contains(DestReg)) + else if (Mips::HI32RegClass.contains(DestReg)) Opc = Mips::MTHI, DestReg = 0; - else if (Mips::LORegsRegClass.contains(DestReg)) + else if (Mips::LO32RegClass.contains(DestReg)) Opc = Mips::MTLO, DestReg = 0; - else if (Mips::HIRegsDSPRegClass.contains(DestReg)) + else if (Mips::HI32DSPRegClass.contains(DestReg)) Opc = Mips::MTHI_DSP; - else if (Mips::LORegsDSPRegClass.contains(DestReg)) + else if (Mips::LO32DSPRegClass.contains(DestReg)) Opc = Mips::MTLO_DSP; else if (Mips::DSPCCRegClass.contains(DestReg)) { BuildMI(MBB, I, DL, get(Mips::WRDSP)) @@ -144,17 +144,17 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB, else if (Mips::GPR64RegClass.contains(DestReg)) { // Copy to CPU64 Reg. if (Mips::GPR64RegClass.contains(SrcReg)) Opc = Mips::DADDu, ZeroReg = Mips::ZERO_64; - else if (Mips::HIRegs64RegClass.contains(SrcReg)) + else if (Mips::HI64RegClass.contains(SrcReg)) Opc = Mips::MFHI64, SrcReg = 0; - else if (Mips::LORegs64RegClass.contains(SrcReg)) + else if (Mips::LO64RegClass.contains(SrcReg)) Opc = Mips::MFLO64, SrcReg = 0; else if (Mips::FGR64RegClass.contains(SrcReg)) Opc = Mips::DMFC1; } else if (Mips::GPR64RegClass.contains(SrcReg)) { // Copy from CPU64 Reg. - if (Mips::HIRegs64RegClass.contains(DestReg)) + if (Mips::HI64RegClass.contains(DestReg)) Opc = Mips::MTHI64, DestReg = 0; - else if (Mips::LORegs64RegClass.contains(DestReg)) + else if (Mips::LO64RegClass.contains(DestReg)) Opc = Mips::MTLO64, DestReg = 0; else if (Mips::FGR64RegClass.contains(DestReg)) Opc = Mips::DMTC1; -- cgit v1.1 From 7d6355226c60cd5ac7e1c916b17fee1a2b30a871 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Wed, 14 Aug 2013 00:53:38 +0000 Subject: [mips] Rename DSPRegs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188342 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Disassembler/MipsDisassembler.cpp | 16 +- lib/Target/Mips/MipsDSPInstrInfo.td | 204 +++++++++++----------- lib/Target/Mips/MipsRegisterInfo.cpp | 2 +- lib/Target/Mips/MipsRegisterInfo.td | 6 +- lib/Target/Mips/MipsSEISelLowering.cpp | 2 +- 5 files changed, 117 insertions(+), 113 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index fbe9309..1f2d210 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -103,10 +103,10 @@ static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDSPRegsRegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder); +static DecodeStatus DecodeDSPRRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst, unsigned RegNo, @@ -359,10 +359,10 @@ static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeDSPRegsRegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeDSPRRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { return DecodeGPR32RegisterClass(Inst, RegNo, Address, Decoder); } diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td index 3bcd585..7fc2305 100644 --- a/lib/Target/Mips/MipsDSPInstrInfo.td +++ b/lib/Target/Mips/MipsDSPInstrInfo.td @@ -515,35 +515,35 @@ class INSV_DESC_BASE, IsCommutable, + DSPR, DSPR>, IsCommutable, Defs<[DSPOutFlag20]>; class ADDU_S_QB_DESC : ADDU_QB_DESC_BASE<"addu_s.qb", int_mips_addu_s_qb, - NoItinerary, DSPRegs, DSPRegs>, + NoItinerary, DSPR, DSPR>, IsCommutable, Defs<[DSPOutFlag20]>; class SUBU_QB_DESC : ADDU_QB_DESC_BASE<"subu.qb", null_frag, NoItinerary, - DSPRegs, DSPRegs>, + DSPR, DSPR>, Defs<[DSPOutFlag20]>; class SUBU_S_QB_DESC : ADDU_QB_DESC_BASE<"subu_s.qb", int_mips_subu_s_qb, - NoItinerary, DSPRegs, DSPRegs>, + NoItinerary, DSPR, DSPR>, Defs<[DSPOutFlag20]>; class ADDQ_PH_DESC : ADDU_QB_DESC_BASE<"addq.ph", null_frag, NoItinerary, - DSPRegs, DSPRegs>, IsCommutable, + DSPR, DSPR>, IsCommutable, Defs<[DSPOutFlag20]>; class ADDQ_S_PH_DESC : ADDU_QB_DESC_BASE<"addq_s.ph", int_mips_addq_s_ph, - NoItinerary, DSPRegs, DSPRegs>, + NoItinerary, DSPR, DSPR>, IsCommutable, Defs<[DSPOutFlag20]>; class SUBQ_PH_DESC : ADDU_QB_DESC_BASE<"subq.ph", null_frag, NoItinerary, - DSPRegs, DSPRegs>, + DSPR, DSPR>, Defs<[DSPOutFlag20]>; class SUBQ_S_PH_DESC : ADDU_QB_DESC_BASE<"subq_s.ph", int_mips_subq_s_ph, - NoItinerary, DSPRegs, DSPRegs>, + NoItinerary, DSPR, DSPR>, Defs<[DSPOutFlag20]>; class ADDQ_S_W_DESC : ADDU_QB_DESC_BASE<"addq_s.w", int_mips_addq_s_w, @@ -566,11 +566,11 @@ class MODSUB_DESC : ADDU_QB_DESC_BASE<"modsub", int_mips_modsub, NoItinerary, GPR32, GPR32>; class RADDU_W_QB_DESC : RADDU_W_QB_DESC_BASE<"raddu.w.qb", int_mips_raddu_w_qb, - NoItinerary, GPR32, DSPRegs>; + NoItinerary, GPR32, DSPR>; // Absolute value class ABSQ_S_PH_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.ph", int_mips_absq_s_ph, - NoItinerary, DSPRegs>, + NoItinerary, DSPR>, Defs<[DSPOutFlag20]>; class ABSQ_S_W_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.w", int_mips_absq_s_w, @@ -580,106 +580,106 @@ class ABSQ_S_W_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.w", int_mips_absq_s_w, // Precision reduce/expand class PRECRQ_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq.qb.ph", int_mips_precrq_qb_ph, - NoItinerary, DSPRegs, DSPRegs>; + NoItinerary, DSPR, DSPR>; class PRECRQ_PH_W_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq.ph.w", int_mips_precrq_ph_w, - NoItinerary, DSPRegs, GPR32>; + NoItinerary, DSPR, GPR32>; class PRECRQ_RS_PH_W_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq_rs.ph.w", int_mips_precrq_rs_ph_w, - NoItinerary, DSPRegs, + NoItinerary, DSPR, GPR32>, Defs<[DSPOutFlag22]>; class PRECRQU_S_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrqu_s.qb.ph", int_mips_precrqu_s_qb_ph, - NoItinerary, DSPRegs, - DSPRegs>, + NoItinerary, DSPR, + DSPR>, Defs<[DSPOutFlag22]>; class PRECEQ_W_PHL_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceq.w.phl", int_mips_preceq_w_phl, - NoItinerary, GPR32, DSPRegs>; + NoItinerary, GPR32, DSPR>; class PRECEQ_W_PHR_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceq.w.phr", int_mips_preceq_w_phr, - NoItinerary, GPR32, DSPRegs>; + NoItinerary, GPR32, DSPR>; class PRECEQU_PH_QBL_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbl", int_mips_precequ_ph_qbl, - NoItinerary, DSPRegs>; + NoItinerary, DSPR>; class PRECEQU_PH_QBR_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbr", int_mips_precequ_ph_qbr, - NoItinerary, DSPRegs>; + NoItinerary, DSPR>; class PRECEQU_PH_QBLA_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbla", int_mips_precequ_ph_qbla, - NoItinerary, DSPRegs>; + NoItinerary, DSPR>; class PRECEQU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbra", int_mips_precequ_ph_qbra, - NoItinerary, DSPRegs>; + NoItinerary, DSPR>; class PRECEU_PH_QBL_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbl", int_mips_preceu_ph_qbl, - NoItinerary, DSPRegs>; + NoItinerary, DSPR>; class PRECEU_PH_QBR_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbr", int_mips_preceu_ph_qbr, - NoItinerary, DSPRegs>; + NoItinerary, DSPR>; class PRECEU_PH_QBLA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbla", int_mips_preceu_ph_qbla, - NoItinerary, DSPRegs>; + NoItinerary, DSPR>; class PRECEU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbra", int_mips_preceu_ph_qbra, - NoItinerary, DSPRegs>; + NoItinerary, DSPR>; // Shift class SHLL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shll.qb", null_frag, immZExt3, - NoItinerary, DSPRegs>, + NoItinerary, DSPR>, Defs<[DSPOutFlag22]>; class SHLLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shllv.qb", int_mips_shll_qb, - NoItinerary, DSPRegs>, + NoItinerary, DSPR>, Defs<[DSPOutFlag22]>; class SHRL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shrl.qb", null_frag, immZExt3, - NoItinerary, DSPRegs>; + NoItinerary, DSPR>; class SHRLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.qb", int_mips_shrl_qb, - NoItinerary, DSPRegs>; + NoItinerary, DSPR>; class SHLL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll.ph", null_frag, immZExt4, - NoItinerary, DSPRegs>, + NoItinerary, DSPR>, Defs<[DSPOutFlag22]>; class SHLLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv.ph", int_mips_shll_ph, - NoItinerary, DSPRegs>, + NoItinerary, DSPR>, Defs<[DSPOutFlag22]>; class SHLL_S_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.ph", int_mips_shll_s_ph, - immZExt4, NoItinerary, DSPRegs>, + immZExt4, NoItinerary, DSPR>, Defs<[DSPOutFlag22]>; class SHLLV_S_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.ph", int_mips_shll_s_ph, - NoItinerary, DSPRegs>, + NoItinerary, DSPR>, Defs<[DSPOutFlag22]>; class SHRA_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra.ph", null_frag, immZExt4, - NoItinerary, DSPRegs>; + NoItinerary, DSPR>; class SHRAV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav.ph", int_mips_shra_ph, - NoItinerary, DSPRegs>; + NoItinerary, DSPR>; class SHRA_R_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.ph", int_mips_shra_r_ph, - immZExt4, NoItinerary, DSPRegs>; + immZExt4, NoItinerary, DSPR>; class SHRAV_R_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.ph", int_mips_shra_r_ph, - NoItinerary, DSPRegs>; + NoItinerary, DSPR>; class SHLL_S_W_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.w", int_mips_shll_s_w, immZExt5, NoItinerary, GPR32>, @@ -698,26 +698,26 @@ class SHRAV_R_W_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.w", int_mips_shra_r_w, // Multiplication class MULEU_S_PH_QBL_DESC : ADDU_QB_DESC_BASE<"muleu_s.ph.qbl", int_mips_muleu_s_ph_qbl, - NoItinerary, DSPRegs, DSPRegs>, + NoItinerary, DSPR, DSPR>, Defs<[DSPOutFlag21]>; class MULEU_S_PH_QBR_DESC : ADDU_QB_DESC_BASE<"muleu_s.ph.qbr", int_mips_muleu_s_ph_qbr, - NoItinerary, DSPRegs, DSPRegs>, + NoItinerary, DSPR, DSPR>, Defs<[DSPOutFlag21]>; class MULEQ_S_W_PHL_DESC : ADDU_QB_DESC_BASE<"muleq_s.w.phl", int_mips_muleq_s_w_phl, - NoItinerary, GPR32, DSPRegs>, + NoItinerary, GPR32, DSPR>, IsCommutable, Defs<[DSPOutFlag21]>; class MULEQ_S_W_PHR_DESC : ADDU_QB_DESC_BASE<"muleq_s.w.phr", int_mips_muleq_s_w_phr, - NoItinerary, GPR32, DSPRegs>, + NoItinerary, GPR32, DSPR>, IsCommutable, Defs<[DSPOutFlag21]>; class MULQ_RS_PH_DESC : ADDU_QB_DESC_BASE<"mulq_rs.ph", int_mips_mulq_rs_ph, - NoItinerary, DSPRegs, DSPRegs>, + NoItinerary, DSPR, DSPR>, IsCommutable, Defs<[DSPOutFlag21]>; class MULSAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsaq_s.w.ph", @@ -773,40 +773,40 @@ class MSUBU_DSP_DESC : MADD_DESC_BASE<"msubu", MipsMSubu, NoItinerary>; // Comparison class CMPU_EQ_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.eq.qb", int_mips_cmpu_eq_qb, NoItinerary, - DSPRegs>, + DSPR>, IsCommutable, Defs<[DSPCCond]>; class CMPU_LT_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.lt.qb", int_mips_cmpu_lt_qb, NoItinerary, - DSPRegs>, Defs<[DSPCCond]>; + DSPR>, Defs<[DSPCCond]>; class CMPU_LE_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.le.qb", int_mips_cmpu_le_qb, NoItinerary, - DSPRegs>, Defs<[DSPCCond]>; + DSPR>, Defs<[DSPCCond]>; class CMPGU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.eq.qb", int_mips_cmpgu_eq_qb, - NoItinerary, GPR32, DSPRegs>, + NoItinerary, GPR32, DSPR>, IsCommutable; class CMPGU_LT_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.lt.qb", int_mips_cmpgu_lt_qb, - NoItinerary, GPR32, DSPRegs>; + NoItinerary, GPR32, DSPR>; class CMPGU_LE_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.le.qb", int_mips_cmpgu_le_qb, - NoItinerary, GPR32, DSPRegs>; + NoItinerary, GPR32, DSPR>; class CMP_EQ_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.eq.ph", int_mips_cmp_eq_ph, - NoItinerary, DSPRegs>, + NoItinerary, DSPR>, IsCommutable, Defs<[DSPCCond]>; class CMP_LT_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.lt.ph", int_mips_cmp_lt_ph, - NoItinerary, DSPRegs>, + NoItinerary, DSPR>, Defs<[DSPCCond]>; class CMP_LE_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.le.ph", int_mips_cmp_le_ph, - NoItinerary, DSPRegs>, + NoItinerary, DSPR>, Defs<[DSPCCond]>; // Misc @@ -814,26 +814,26 @@ class BITREV_DESC : ABSQ_S_PH_R2_DESC_BASE<"bitrev", int_mips_bitrev, NoItinerary, GPR32>; class PACKRL_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"packrl.ph", int_mips_packrl_ph, - NoItinerary, DSPRegs, DSPRegs>; + NoItinerary, DSPR, DSPR>; class REPL_QB_DESC : REPL_DESC_BASE<"repl.qb", int_mips_repl_qb, immZExt8, - NoItinerary, DSPRegs>; + NoItinerary, DSPR>; class REPL_PH_DESC : REPL_DESC_BASE<"repl.ph", int_mips_repl_ph, immZExt10, - NoItinerary, DSPRegs>; + NoItinerary, DSPR>; class REPLV_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.qb", int_mips_repl_qb, - NoItinerary, DSPRegs, GPR32>; + NoItinerary, DSPR, GPR32>; class REPLV_PH_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.ph", int_mips_repl_ph, - NoItinerary, DSPRegs, GPR32>; + NoItinerary, DSPR, GPR32>; class PICK_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"pick.qb", int_mips_pick_qb, - NoItinerary, DSPRegs, DSPRegs>, + NoItinerary, DSPR, DSPR>, Uses<[DSPCCond]>; class PICK_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"pick.ph", int_mips_pick_ph, - NoItinerary, DSPRegs, DSPRegs>, + NoItinerary, DSPR, DSPR>, Uses<[DSPCCond]>; class LWX_DESC : LX_DESC_BASE<"lwx", int_mips_lwx, NoItinerary>; @@ -905,44 +905,44 @@ class INSV_DESC : INSV_DESC_BASE<"insv", int_mips_insv, NoItinerary>, // MIPS DSP Rev 2 // Addition/subtraction class ADDU_PH_DESC : ADDU_QB_DESC_BASE<"addu.ph", int_mips_addu_ph, NoItinerary, - DSPRegs, DSPRegs>, IsCommutable, + DSPR, DSPR>, IsCommutable, Defs<[DSPOutFlag20]>; class ADDU_S_PH_DESC : ADDU_QB_DESC_BASE<"addu_s.ph", int_mips_addu_s_ph, - NoItinerary, DSPRegs, DSPRegs>, + NoItinerary, DSPR, DSPR>, IsCommutable, Defs<[DSPOutFlag20]>; class SUBU_PH_DESC : ADDU_QB_DESC_BASE<"subu.ph", int_mips_subu_ph, NoItinerary, - DSPRegs, DSPRegs>, + DSPR, DSPR>, Defs<[DSPOutFlag20]>; class SUBU_S_PH_DESC : ADDU_QB_DESC_BASE<"subu_s.ph", int_mips_subu_s_ph, - NoItinerary, DSPRegs, DSPRegs>, + NoItinerary, DSPR, DSPR>, Defs<[DSPOutFlag20]>; class ADDUH_QB_DESC : ADDUH_QB_DESC_BASE<"adduh.qb", int_mips_adduh_qb, - NoItinerary, DSPRegs>, IsCommutable; + NoItinerary, DSPR>, IsCommutable; class ADDUH_R_QB_DESC : ADDUH_QB_DESC_BASE<"adduh_r.qb", int_mips_adduh_r_qb, - NoItinerary, DSPRegs>, IsCommutable; + NoItinerary, DSPR>, IsCommutable; class SUBUH_QB_DESC : ADDUH_QB_DESC_BASE<"subuh.qb", int_mips_subuh_qb, - NoItinerary, DSPRegs>; + NoItinerary, DSPR>; class SUBUH_R_QB_DESC : ADDUH_QB_DESC_BASE<"subuh_r.qb", int_mips_subuh_r_qb, - NoItinerary, DSPRegs>; + NoItinerary, DSPR>; class ADDQH_PH_DESC : ADDUH_QB_DESC_BASE<"addqh.ph", int_mips_addqh_ph, - NoItinerary, DSPRegs>, IsCommutable; + NoItinerary, DSPR>, IsCommutable; class ADDQH_R_PH_DESC : ADDUH_QB_DESC_BASE<"addqh_r.ph", int_mips_addqh_r_ph, - NoItinerary, DSPRegs>, IsCommutable; + NoItinerary, DSPR>, IsCommutable; class SUBQH_PH_DESC : ADDUH_QB_DESC_BASE<"subqh.ph", int_mips_subqh_ph, - NoItinerary, DSPRegs>; + NoItinerary, DSPR>; class SUBQH_R_PH_DESC : ADDUH_QB_DESC_BASE<"subqh_r.ph", int_mips_subqh_r_ph, - NoItinerary, DSPRegs>; + NoItinerary, DSPR>; class ADDQH_W_DESC : ADDUH_QB_DESC_BASE<"addqh.w", int_mips_addqh_w, NoItinerary, GPR32>, IsCommutable; @@ -959,31 +959,31 @@ class SUBQH_R_W_DESC : ADDUH_QB_DESC_BASE<"subqh_r.w", int_mips_subqh_r_w, // Comparison class CMPGDU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.eq.qb", int_mips_cmpgdu_eq_qb, - NoItinerary, GPR32, DSPRegs>, + NoItinerary, GPR32, DSPR>, IsCommutable, Defs<[DSPCCond]>; class CMPGDU_LT_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.lt.qb", int_mips_cmpgdu_lt_qb, - NoItinerary, GPR32, DSPRegs>, + NoItinerary, GPR32, DSPR>, Defs<[DSPCCond]>; class CMPGDU_LE_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.le.qb", int_mips_cmpgdu_le_qb, - NoItinerary, GPR32, DSPRegs>, + NoItinerary, GPR32, DSPR>, Defs<[DSPCCond]>; // Absolute class ABSQ_S_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.qb", int_mips_absq_s_qb, - NoItinerary, DSPRegs>, + NoItinerary, DSPR>, Defs<[DSPOutFlag20]>; // Multiplication class MUL_PH_DESC : ADDUH_QB_DESC_BASE<"mul.ph", null_frag, NoItinerary, - DSPRegs>, IsCommutable, + DSPR>, IsCommutable, Defs<[DSPOutFlag21]>; class MUL_S_PH_DESC : ADDUH_QB_DESC_BASE<"mul_s.ph", int_mips_mul_s_ph, - NoItinerary, DSPRegs>, IsCommutable, + NoItinerary, DSPR>, IsCommutable, Defs<[DSPOutFlag21]>; class MULQ_S_W_DESC : ADDUH_QB_DESC_BASE<"mulq_s.w", int_mips_mulq_s_w, @@ -995,7 +995,7 @@ class MULQ_RS_W_DESC : ADDUH_QB_DESC_BASE<"mulq_rs.w", int_mips_mulq_rs_w, Defs<[DSPOutFlag21]>; class MULQ_S_PH_DESC : ADDU_QB_DESC_BASE<"mulq_s.ph", int_mips_mulq_s_ph, - NoItinerary, DSPRegs, DSPRegs>, + NoItinerary, DSPR, DSPR>, IsCommutable, Defs<[DSPOutFlag21]>; // Dot product with accumulate/subtract @@ -1026,36 +1026,36 @@ class MULSA_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsa.w.ph", MipsMULSA_W_PH>; // Precision reduce/expand class PRECR_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precr.qb.ph", int_mips_precr_qb_ph, - NoItinerary, DSPRegs, DSPRegs>; + NoItinerary, DSPR, DSPR>; class PRECR_SRA_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra.ph.w", int_mips_precr_sra_ph_w, - NoItinerary, DSPRegs, + NoItinerary, DSPR, GPR32>; class PRECR_SRA_R_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra_r.ph.w", int_mips_precr_sra_r_ph_w, - NoItinerary, DSPRegs, + NoItinerary, DSPR, GPR32>; // Shift class SHRA_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra.qb", null_frag, immZExt3, - NoItinerary, DSPRegs>; + NoItinerary, DSPR>; class SHRAV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav.qb", int_mips_shra_qb, - NoItinerary, DSPRegs>; + NoItinerary, DSPR>; class SHRA_R_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.qb", int_mips_shra_r_qb, - immZExt3, NoItinerary, DSPRegs>; + immZExt3, NoItinerary, DSPR>; class SHRAV_R_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.qb", int_mips_shra_r_qb, - NoItinerary, DSPRegs>; + NoItinerary, DSPR>; class SHRL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shrl.ph", null_frag, immZExt4, - NoItinerary, DSPRegs>; + NoItinerary, DSPR>; class SHRLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.ph", int_mips_shrl_ph, - NoItinerary, DSPRegs>; + NoItinerary, DSPR>; // Misc class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, immZExt5, @@ -1252,12 +1252,12 @@ let isPseudo = 1, isCodeGenOnly = 1 in { // Pseudo CMP and PICK instructions. class PseudoCMP : - PseudoDSP<(outs DSPCC:$cmp), (ins DSPRegs:$rs, DSPRegs:$rt), []>, - PseudoInstExpansion<(RealInst DSPRegs:$rs, DSPRegs:$rt)>, NeverHasSideEffects; + PseudoDSP<(outs DSPCC:$cmp), (ins DSPR:$rs, DSPR:$rt), []>, + PseudoInstExpansion<(RealInst DSPR:$rs, DSPR:$rt)>, NeverHasSideEffects; class PseudoPICK : - PseudoDSP<(outs DSPRegs:$rd), (ins DSPCC:$cmp, DSPRegs:$rs, DSPRegs:$rt), []>, - PseudoInstExpansion<(RealInst DSPRegs:$rd, DSPRegs:$rs, DSPRegs:$rt)>, + PseudoDSP<(outs DSPR:$rd), (ins DSPCC:$cmp, DSPR:$rs, DSPR:$rt), []>, + PseudoInstExpansion<(RealInst DSPR:$rd, DSPR:$rs, DSPR:$rt)>, NeverHasSideEffects; def PseudoCMP_EQ_PH : PseudoCMP; @@ -1279,19 +1279,19 @@ class BitconvertPat; -def : BitconvertPat; -def : BitconvertPat; -def : BitconvertPat; -def : BitconvertPat; +def : BitconvertPat; +def : BitconvertPat; +def : BitconvertPat; +def : BitconvertPat; def : DSPPat<(v2i16 (load addr:$a)), - (v2i16 (COPY_TO_REGCLASS (LW addr:$a), DSPRegs))>; + (v2i16 (COPY_TO_REGCLASS (LW addr:$a), DSPR))>; def : DSPPat<(v4i8 (load addr:$a)), - (v4i8 (COPY_TO_REGCLASS (LW addr:$a), DSPRegs))>; -def : DSPPat<(store (v2i16 DSPRegs:$val), addr:$a), - (SW (COPY_TO_REGCLASS DSPRegs:$val, GPR32), addr:$a)>; -def : DSPPat<(store (v4i8 DSPRegs:$val), addr:$a), - (SW (COPY_TO_REGCLASS DSPRegs:$val, GPR32), addr:$a)>; + (v4i8 (COPY_TO_REGCLASS (LW addr:$a), DSPR))>; +def : DSPPat<(store (v2i16 DSPR:$val), addr:$a), + (SW (COPY_TO_REGCLASS DSPR:$val, GPR32), addr:$a)>; +def : DSPPat<(store (v4i8 DSPR:$val), addr:$a), + (SW (COPY_TO_REGCLASS DSPR:$val, GPR32), addr:$a)>; // Binary operations. class DSPBinPat : DSPPat<(ValTy (MipsSETCC_DSP ValTy:$a, ValTy:$b, CC)), (ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)), - (ValTy (COPY_TO_REGCLASS (ADDiu ZERO, -1), DSPRegs)), + (ValTy (COPY_TO_REGCLASS (ADDiu ZERO, -1), DSPR)), (ValTy ZERO)))>; class DSPSetCCPatInv; + (ValTy (COPY_TO_REGCLASS (ADDiu ZERO, -1), DSPR))))>; class DSPSelectCCPat : diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index eb1e056..6d76021 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -56,7 +56,7 @@ MipsRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, return 0; case Mips::GPR32RegClassID: case Mips::GPR64RegClassID: - case Mips::DSPRegsRegClassID: { + case Mips::DSPRRegClassID: { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); return 28 - TFI->hasFP(MF); } diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index a3f7d95..62a594f 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -279,7 +279,7 @@ class GPR32Class regTypes> : K0, K1, GP, SP, FP, RA)>; def GPR32 : GPR32Class<[i32]>; -def DSPRegs : GPR32Class<[v4i8, v2i16]>; +def DSPR : GPR32Class<[v4i8, v2i16]>; def GPR64 : RegisterClass<"Mips", [i64], 64, (add // Reserved @@ -425,6 +425,10 @@ def GPR64Opnd : RegisterOperand { let ParserMatchClass = GPR64AsmOperand; } +def DSPROpnd : RegisterOperand { + let ParserMatchClass = GPR32AsmOperand; +} + def CCROpnd : RegisterOperand { let ParserMatchClass = CCRAsmOperand; } diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 979cfcb..c54c55c 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -40,7 +40,7 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; for (unsigned i = 0; i < array_lengthof(VecTys); ++i) { - addRegisterClass(VecTys[i], &Mips::DSPRegsRegClass); + addRegisterClass(VecTys[i], &Mips::DSPRRegClass); // Expand all builtin opcodes. for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) -- cgit v1.1 From 88373c29fe9d0b498ed21c3d29129f31806d7ec8 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Wed, 14 Aug 2013 01:02:20 +0000 Subject: [mips] Use register operands instead of register classes in DSP instruction definitions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188343 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 84 +++++- lib/Target/Mips/MipsDSPInstrInfo.td | 416 ++++++++++++++-------------- lib/Target/Mips/MipsRegisterInfo.td | 18 ++ 3 files changed, 309 insertions(+), 209 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 2492bf8..28ad090 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -115,6 +115,12 @@ class MipsAsmParser : public MCTargetAsmParser { MipsAsmParser::OperandMatchResultTy parseACC64DSP(SmallVectorImpl &Operands); + MipsAsmParser::OperandMatchResultTy + parseLO32DSP(SmallVectorImpl &Operands); + + MipsAsmParser::OperandMatchResultTy + parseHI32DSP(SmallVectorImpl &Operands); + bool searchSymbolAlias(SmallVectorImpl &Operands, unsigned RegKind); @@ -222,7 +228,9 @@ public: Kind_AFGR64Regs, Kind_CCRRegs, Kind_FCCRegs, - Kind_ACC64DSP + Kind_ACC64DSP, + Kind_LO32DSP, + Kind_HI32DSP }; private: @@ -408,6 +416,14 @@ public: return Kind == k_Register && Reg.Kind == Kind_ACC64DSP; } + bool isLO32DSPAsm() const { + return Kind == k_Register && Reg.Kind == Kind_LO32DSP; + } + + bool isHI32DSPAsm() const { + return Kind == k_Register && Reg.Kind == Kind_HI32DSP; + } + /// getStartLoc - Get the location of the first token of this operand. SMLoc getStartLoc() const { return StartLoc; @@ -1409,6 +1425,72 @@ MipsAsmParser::parseACC64DSP(SmallVectorImpl &Operands) { return parseRegs(Operands, (int) MipsOperand::Kind_ACC64DSP); } +MipsAsmParser::OperandMatchResultTy +MipsAsmParser::parseLO32DSP(SmallVectorImpl &Operands) { + // If the first token is not '$' we have an error. + if (Parser.getTok().isNot(AsmToken::Dollar)) + return MatchOperand_NoMatch; + + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat the '$' + + const AsmToken &Tok = Parser.getTok(); // Get next token. + + if (Tok.isNot(AsmToken::Identifier)) + return MatchOperand_NoMatch; + + if (!Tok.getIdentifier().startswith("ac")) + return MatchOperand_NoMatch; + + StringRef NumString = Tok.getIdentifier().substr(2); + + unsigned IntVal; + if (NumString.getAsInteger(10, IntVal)) + return MatchOperand_NoMatch; + + unsigned Reg = matchRegisterByNumber(IntVal, Mips::LO32DSPRegClassID); + + MipsOperand *Op = MipsOperand::CreateReg(Reg, S, Parser.getTok().getLoc()); + Op->setRegKind(MipsOperand::Kind_LO32DSP); + Operands.push_back(Op); + + Parser.Lex(); // Eat the register number. + return MatchOperand_Success; +} + +MipsAsmParser::OperandMatchResultTy +MipsAsmParser::parseHI32DSP(SmallVectorImpl &Operands) { + // If the first token is not '$' we have an error. + if (Parser.getTok().isNot(AsmToken::Dollar)) + return MatchOperand_NoMatch; + + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat the '$' + + const AsmToken &Tok = Parser.getTok(); // Get next token. + + if (Tok.isNot(AsmToken::Identifier)) + return MatchOperand_NoMatch; + + if (!Tok.getIdentifier().startswith("ac")) + return MatchOperand_NoMatch; + + StringRef NumString = Tok.getIdentifier().substr(2); + + unsigned IntVal; + if (NumString.getAsInteger(10, IntVal)) + return MatchOperand_NoMatch; + + unsigned Reg = matchRegisterByNumber(IntVal, Mips::HI32DSPRegClassID); + + MipsOperand *Op = MipsOperand::CreateReg(Reg, S, Parser.getTok().getLoc()); + Op->setRegKind(MipsOperand::Kind_HI32DSP); + Operands.push_back(Op); + + Parser.Lex(); // Eat the register number. + return MatchOperand_Success; +} + bool MipsAsmParser::searchSymbolAlias( SmallVectorImpl &Operands, unsigned RegKind) { diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td index 7fc2305..5020aa3 100644 --- a/lib/Target/Mips/MipsDSPInstrInfo.td +++ b/lib/Target/Mips/MipsDSPInstrInfo.td @@ -256,203 +256,203 @@ class PREPEND_ENC : APPEND_FMT<0b00001>; // Instruction desc. class ADDU_QB_DESC_BASE { - dag OutOperandList = (outs RCD:$rd); - dag InOperandList = (ins RCS:$rs, RCT:$rt); + InstrItinClass itin, RegisterOperand ROD, + RegisterOperand ROS, RegisterOperand ROT = ROS> { + dag OutOperandList = (outs ROD:$rd); + dag InOperandList = (ins ROS:$rs, ROT:$rt); string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt"); - list Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))]; + list Pattern = [(set ROD:$rd, (OpNode ROS:$rs, ROT:$rt))]; InstrItinClass Itinerary = itin; } class RADDU_W_QB_DESC_BASE { - dag OutOperandList = (outs RCD:$rd); - dag InOperandList = (ins RCS:$rs); + InstrItinClass itin, RegisterOperand ROD, + RegisterOperand ROS = ROD> { + dag OutOperandList = (outs ROD:$rd); + dag InOperandList = (ins ROS:$rs); string AsmString = !strconcat(instr_asm, "\t$rd, $rs"); - list Pattern = [(set RCD:$rd, (OpNode RCS:$rs))]; + list Pattern = [(set ROD:$rd, (OpNode ROS:$rs))]; InstrItinClass Itinerary = itin; } class CMP_EQ_QB_R2_DESC_BASE { + InstrItinClass itin, RegisterOperand ROS, + RegisterOperand ROT = ROS> { dag OutOperandList = (outs); - dag InOperandList = (ins RCS:$rs, RCT:$rt); + dag InOperandList = (ins ROS:$rs, ROT:$rt); string AsmString = !strconcat(instr_asm, "\t$rs, $rt"); - list Pattern = [(OpNode RCS:$rs, RCT:$rt)]; + list Pattern = [(OpNode ROS:$rs, ROT:$rt)]; InstrItinClass Itinerary = itin; } class CMP_EQ_QB_R3_DESC_BASE { - dag OutOperandList = (outs RCD:$rd); - dag InOperandList = (ins RCS:$rs, RCT:$rt); + InstrItinClass itin, RegisterOperand ROD, + RegisterOperand ROS, RegisterOperand ROT = ROS> { + dag OutOperandList = (outs ROD:$rd); + dag InOperandList = (ins ROS:$rs, ROT:$rt); string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt"); - list Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))]; + list Pattern = [(set ROD:$rd, (OpNode ROS:$rs, ROT:$rt))]; InstrItinClass Itinerary = itin; } class PRECR_SRA_PH_W_DESC_BASE { - dag OutOperandList = (outs RCT:$rt); - dag InOperandList = (ins RCS:$rs, shamt:$sa, RCS:$src); + InstrItinClass itin, RegisterOperand ROT, + RegisterOperand ROS = ROT> { + dag OutOperandList = (outs ROT:$rt); + dag InOperandList = (ins ROS:$rs, shamt:$sa, ROS:$src); string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa"); - list Pattern = [(set RCT:$rt, (OpNode RCS:$src, RCS:$rs, immZExt5:$sa))]; + list Pattern = [(set ROT:$rt, (OpNode ROS:$src, ROS:$rs, immZExt5:$sa))]; InstrItinClass Itinerary = itin; string Constraints = "$src = $rt"; } class ABSQ_S_PH_R2_DESC_BASE { - dag OutOperandList = (outs RCD:$rd); - dag InOperandList = (ins RCT:$rt); + InstrItinClass itin, RegisterOperand ROD, + RegisterOperand ROT = ROD> { + dag OutOperandList = (outs ROD:$rd); + dag InOperandList = (ins ROT:$rt); string AsmString = !strconcat(instr_asm, "\t$rd, $rt"); - list Pattern = [(set RCD:$rd, (OpNode RCT:$rt))]; + list Pattern = [(set ROD:$rd, (OpNode ROT:$rt))]; InstrItinClass Itinerary = itin; } class REPL_DESC_BASE { - dag OutOperandList = (outs RC:$rd); + ImmLeaf immPat, InstrItinClass itin, RegisterOperand RO> { + dag OutOperandList = (outs RO:$rd); dag InOperandList = (ins uimm16:$imm); string AsmString = !strconcat(instr_asm, "\t$rd, $imm"); - list Pattern = [(set RC:$rd, (OpNode immPat:$imm))]; + list Pattern = [(set RO:$rd, (OpNode immPat:$imm))]; InstrItinClass Itinerary = itin; } class SHLL_QB_R3_DESC_BASE { - dag OutOperandList = (outs RC:$rd); - dag InOperandList = (ins RC:$rt, GPR32:$rs_sa); + InstrItinClass itin, RegisterOperand RO> { + dag OutOperandList = (outs RO:$rd); + dag InOperandList = (ins RO:$rt, GPR32Opnd:$rs_sa); string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa"); - list Pattern = [(set RC:$rd, (OpNode RC:$rt, GPR32:$rs_sa))]; + list Pattern = [(set RO:$rd, (OpNode RO:$rt, GPR32Opnd:$rs_sa))]; InstrItinClass Itinerary = itin; } class SHLL_QB_R2_DESC_BASE { - dag OutOperandList = (outs RC:$rd); - dag InOperandList = (ins RC:$rt, uimm16:$rs_sa); + RegisterOperand RO> { + dag OutOperandList = (outs RO:$rd); + dag InOperandList = (ins RO:$rt, uimm16:$rs_sa); string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa"); - list Pattern = [(set RC:$rd, (OpNode RC:$rt, ImmPat:$rs_sa))]; + list Pattern = [(set RO:$rd, (OpNode RO:$rt, ImmPat:$rs_sa))]; InstrItinClass Itinerary = itin; bit hasSideEffects = 1; } class LX_DESC_BASE { - dag OutOperandList = (outs GPR32:$rd); - dag InOperandList = (ins GPR32:$base, GPR32:$index); + dag OutOperandList = (outs GPR32Opnd:$rd); + dag InOperandList = (ins GPR32Opnd:$base, GPR32Opnd:$index); string AsmString = !strconcat(instr_asm, "\t$rd, ${index}(${base})"); - list Pattern = [(set GPR32:$rd, - (OpNode GPR32:$base, GPR32:$index))]; + list Pattern = [(set GPR32Opnd:$rd, + (OpNode GPR32Opnd:$base, GPR32Opnd:$index))]; InstrItinClass Itinerary = itin; bit mayLoad = 1; } class ADDUH_QB_DESC_BASE { - dag OutOperandList = (outs RCD:$rd); - dag InOperandList = (ins RCS:$rs, RCT:$rt); + InstrItinClass itin, RegisterOperand ROD, + RegisterOperand ROS = ROD, RegisterOperand ROT = ROD> { + dag OutOperandList = (outs ROD:$rd); + dag InOperandList = (ins ROS:$rs, ROT:$rt); string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt"); - list Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))]; + list Pattern = [(set ROD:$rd, (OpNode ROS:$rs, ROT:$rt))]; InstrItinClass Itinerary = itin; } class APPEND_DESC_BASE { - dag OutOperandList = (outs GPR32:$rt); - dag InOperandList = (ins GPR32:$rs, shamt:$sa, GPR32:$src); + dag OutOperandList = (outs GPR32Opnd:$rt); + dag InOperandList = (ins GPR32Opnd:$rs, shamt:$sa, GPR32Opnd:$src); string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa"); - list Pattern = [(set GPR32:$rt, - (OpNode GPR32:$src, GPR32:$rs, ImmOp:$sa))]; + list Pattern = [(set GPR32Opnd:$rt, + (OpNode GPR32Opnd:$src, GPR32Opnd:$rs, ImmOp:$sa))]; InstrItinClass Itinerary = itin; string Constraints = "$src = $rt"; } class EXTR_W_TY1_R2_DESC_BASE { - dag OutOperandList = (outs GPR32:$rt); - dag InOperandList = (ins ACC64DSP:$ac, GPR32:$shift_rs); + dag OutOperandList = (outs GPR32Opnd:$rt); + dag InOperandList = (ins ACC64DSPOpnd:$ac, GPR32Opnd:$shift_rs); string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs"); InstrItinClass Itinerary = itin; } class EXTR_W_TY1_R1_DESC_BASE { - dag OutOperandList = (outs GPR32:$rt); - dag InOperandList = (ins ACC64DSP:$ac, uimm16:$shift_rs); + dag OutOperandList = (outs GPR32Opnd:$rt); + dag InOperandList = (ins ACC64DSPOpnd:$ac, uimm16:$shift_rs); string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs"); InstrItinClass Itinerary = itin; } class SHILO_R1_DESC_BASE { - dag OutOperandList = (outs ACC64DSP:$ac); - dag InOperandList = (ins simm16:$shift, ACC64DSP:$acin); + dag OutOperandList = (outs ACC64DSPOpnd:$ac); + dag InOperandList = (ins simm16:$shift, ACC64DSPOpnd:$acin); string AsmString = !strconcat(instr_asm, "\t$ac, $shift"); - list Pattern = [(set ACC64DSP:$ac, - (OpNode immSExt6:$shift, ACC64DSP:$acin))]; + list Pattern = [(set ACC64DSPOpnd:$ac, + (OpNode immSExt6:$shift, ACC64DSPOpnd:$acin))]; string Constraints = "$acin = $ac"; } class SHILO_R2_DESC_BASE { - dag OutOperandList = (outs ACC64DSP:$ac); - dag InOperandList = (ins GPR32:$rs, ACC64DSP:$acin); + dag OutOperandList = (outs ACC64DSPOpnd:$ac); + dag InOperandList = (ins GPR32Opnd:$rs, ACC64DSPOpnd:$acin); string AsmString = !strconcat(instr_asm, "\t$ac, $rs"); - list Pattern = [(set ACC64DSP:$ac, - (OpNode GPR32:$rs, ACC64DSP:$acin))]; + list Pattern = [(set ACC64DSPOpnd:$ac, + (OpNode GPR32Opnd:$rs, ACC64DSPOpnd:$acin))]; string Constraints = "$acin = $ac"; } class MTHLIP_DESC_BASE { - dag OutOperandList = (outs ACC64DSP:$ac); - dag InOperandList = (ins GPR32:$rs, ACC64DSP:$acin); + dag OutOperandList = (outs ACC64DSPOpnd:$ac); + dag InOperandList = (ins GPR32Opnd:$rs, ACC64DSPOpnd:$acin); string AsmString = !strconcat(instr_asm, "\t$rs, $ac"); - list Pattern = [(set ACC64DSP:$ac, - (OpNode GPR32:$rs, ACC64DSP:$acin))]; + list Pattern = [(set ACC64DSPOpnd:$ac, + (OpNode GPR32Opnd:$rs, ACC64DSPOpnd:$acin))]; string Constraints = "$acin = $ac"; } class RDDSP_DESC_BASE { - dag OutOperandList = (outs GPR32:$rd); + dag OutOperandList = (outs GPR32Opnd:$rd); dag InOperandList = (ins uimm16:$mask); string AsmString = !strconcat(instr_asm, "\t$rd, $mask"); - list Pattern = [(set GPR32:$rd, (OpNode immZExt10:$mask))]; + list Pattern = [(set GPR32Opnd:$rd, (OpNode immZExt10:$mask))]; InstrItinClass Itinerary = itin; } class WRDSP_DESC_BASE { dag OutOperandList = (outs); - dag InOperandList = (ins GPR32:$rs, uimm16:$mask); + dag InOperandList = (ins GPR32Opnd:$rs, uimm16:$mask); string AsmString = !strconcat(instr_asm, "\t$rs, $mask"); - list Pattern = [(OpNode GPR32:$rs, immZExt10:$mask)]; + list Pattern = [(OpNode GPR32Opnd:$rs, immZExt10:$mask)]; InstrItinClass Itinerary = itin; } class DPA_W_PH_DESC_BASE { - dag OutOperandList = (outs ACC64DSP:$ac); - dag InOperandList = (ins GPR32:$rs, GPR32:$rt, ACC64DSP:$acin); + dag OutOperandList = (outs ACC64DSPOpnd:$ac); + dag InOperandList = (ins GPR32Opnd:$rs, GPR32Opnd:$rt, ACC64DSPOpnd:$acin); string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt"); - list Pattern = [(set ACC64DSP:$ac, - (OpNode GPR32:$rs, GPR32:$rt, ACC64DSP:$acin))]; + list Pattern = [(set ACC64DSPOpnd:$ac, + (OpNode GPR32Opnd:$rs, GPR32Opnd:$rt, ACC64DSPOpnd:$acin))]; string Constraints = "$acin = $ac"; } class MULT_DESC_BASE { - dag OutOperandList = (outs ACC64DSP:$ac); - dag InOperandList = (ins GPR32:$rs, GPR32:$rt); + dag OutOperandList = (outs ACC64DSPOpnd:$ac); + dag InOperandList = (ins GPR32Opnd:$rs, GPR32Opnd:$rt); string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt"); - list Pattern = [(set ACC64DSP:$ac, (OpNode GPR32:$rs, GPR32:$rt))]; + list Pattern = [(set ACC64DSPOpnd:$ac, (OpNode GPR32Opnd:$rs, GPR32Opnd:$rt))]; InstrItinClass Itinerary = itin; int AddedComplexity = 20; bit isCommutable = 1; @@ -460,32 +460,32 @@ class MULT_DESC_BASE { - dag OutOperandList = (outs ACC64DSP:$ac); - dag InOperandList = (ins GPR32:$rs, GPR32:$rt, ACC64DSP:$acin); + dag OutOperandList = (outs ACC64DSPOpnd:$ac); + dag InOperandList = (ins GPR32Opnd:$rs, GPR32Opnd:$rt, ACC64DSPOpnd:$acin); string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt"); - list Pattern = [(set ACC64DSP:$ac, - (OpNode GPR32:$rs, GPR32:$rt, ACC64DSP:$acin))]; + list Pattern = [(set ACC64DSPOpnd:$ac, + (OpNode GPR32Opnd:$rs, GPR32Opnd:$rt, ACC64DSPOpnd:$acin))]; InstrItinClass Itinerary = itin; int AddedComplexity = 20; string Constraints = "$acin = $ac"; } -class MFHI_DESC_BASE { - dag OutOperandList = (outs GPR32:$rd); - dag InOperandList = (ins RC:$ac); +class MFHI_DESC_BASE { + dag OutOperandList = (outs GPR32Opnd:$rd); + dag InOperandList = (ins RO:$ac); string AsmString = !strconcat(instr_asm, "\t$rd, $ac"); InstrItinClass Itinerary = itin; } -class MTHI_DESC_BASE { - dag OutOperandList = (outs RC:$ac); - dag InOperandList = (ins GPR32:$rs); +class MTHI_DESC_BASE { + dag OutOperandList = (outs RO:$ac); + dag InOperandList = (ins GPR32Opnd:$rs); string AsmString = !strconcat(instr_asm, "\t$rs, $ac"); InstrItinClass Itinerary = itin; } class BPOSGE32_PSEUDO_DESC_BASE : - MipsPseudo<(outs GPR32:$dst), (ins), [(set GPR32:$dst, (OpNode))]> { + MipsPseudo<(outs GPR32Opnd:$dst), (ins), [(set GPR32Opnd:$dst, (OpNode))]> { bit usesCustomInserter = 1; } @@ -501,10 +501,10 @@ class BPOSGE32_DESC_BASE { class INSV_DESC_BASE { - dag OutOperandList = (outs GPR32:$rt); - dag InOperandList = (ins GPR32:$src, GPR32:$rs); + dag OutOperandList = (outs GPR32Opnd:$rt); + dag InOperandList = (ins GPR32Opnd:$src, GPR32Opnd:$rs); string AsmString = !strconcat(instr_asm, "\t$rt, $rs"); - list Pattern = [(set GPR32:$rt, (OpNode GPR32:$src, GPR32:$rs))]; + list Pattern = [(set GPR32Opnd:$rt, (OpNode GPR32Opnd:$src, GPR32Opnd:$rs))]; InstrItinClass Itinerary = itin; string Constraints = "$src = $rt"; } @@ -515,209 +515,209 @@ class INSV_DESC_BASE, IsCommutable, + DSPROpnd, DSPROpnd>, IsCommutable, Defs<[DSPOutFlag20]>; class ADDU_S_QB_DESC : ADDU_QB_DESC_BASE<"addu_s.qb", int_mips_addu_s_qb, - NoItinerary, DSPR, DSPR>, + NoItinerary, DSPROpnd, DSPROpnd>, IsCommutable, Defs<[DSPOutFlag20]>; class SUBU_QB_DESC : ADDU_QB_DESC_BASE<"subu.qb", null_frag, NoItinerary, - DSPR, DSPR>, + DSPROpnd, DSPROpnd>, Defs<[DSPOutFlag20]>; class SUBU_S_QB_DESC : ADDU_QB_DESC_BASE<"subu_s.qb", int_mips_subu_s_qb, - NoItinerary, DSPR, DSPR>, + NoItinerary, DSPROpnd, DSPROpnd>, Defs<[DSPOutFlag20]>; class ADDQ_PH_DESC : ADDU_QB_DESC_BASE<"addq.ph", null_frag, NoItinerary, - DSPR, DSPR>, IsCommutable, + DSPROpnd, DSPROpnd>, IsCommutable, Defs<[DSPOutFlag20]>; class ADDQ_S_PH_DESC : ADDU_QB_DESC_BASE<"addq_s.ph", int_mips_addq_s_ph, - NoItinerary, DSPR, DSPR>, + NoItinerary, DSPROpnd, DSPROpnd>, IsCommutable, Defs<[DSPOutFlag20]>; class SUBQ_PH_DESC : ADDU_QB_DESC_BASE<"subq.ph", null_frag, NoItinerary, - DSPR, DSPR>, + DSPROpnd, DSPROpnd>, Defs<[DSPOutFlag20]>; class SUBQ_S_PH_DESC : ADDU_QB_DESC_BASE<"subq_s.ph", int_mips_subq_s_ph, - NoItinerary, DSPR, DSPR>, + NoItinerary, DSPROpnd, DSPROpnd>, Defs<[DSPOutFlag20]>; class ADDQ_S_W_DESC : ADDU_QB_DESC_BASE<"addq_s.w", int_mips_addq_s_w, - NoItinerary, GPR32, GPR32>, + NoItinerary, GPR32Opnd, GPR32Opnd>, IsCommutable, Defs<[DSPOutFlag20]>; class SUBQ_S_W_DESC : ADDU_QB_DESC_BASE<"subq_s.w", int_mips_subq_s_w, - NoItinerary, GPR32, GPR32>, + NoItinerary, GPR32Opnd, GPR32Opnd>, Defs<[DSPOutFlag20]>; class ADDSC_DESC : ADDU_QB_DESC_BASE<"addsc", null_frag, NoItinerary, - GPR32, GPR32>, IsCommutable, + GPR32Opnd, GPR32Opnd>, IsCommutable, Defs<[DSPCarry]>; class ADDWC_DESC : ADDU_QB_DESC_BASE<"addwc", null_frag, NoItinerary, - GPR32, GPR32>, + GPR32Opnd, GPR32Opnd>, IsCommutable, Uses<[DSPCarry]>, Defs<[DSPOutFlag20]>; class MODSUB_DESC : ADDU_QB_DESC_BASE<"modsub", int_mips_modsub, NoItinerary, - GPR32, GPR32>; + GPR32Opnd, GPR32Opnd>; class RADDU_W_QB_DESC : RADDU_W_QB_DESC_BASE<"raddu.w.qb", int_mips_raddu_w_qb, - NoItinerary, GPR32, DSPR>; + NoItinerary, GPR32Opnd, DSPROpnd>; // Absolute value class ABSQ_S_PH_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.ph", int_mips_absq_s_ph, - NoItinerary, DSPR>, + NoItinerary, DSPROpnd>, Defs<[DSPOutFlag20]>; class ABSQ_S_W_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.w", int_mips_absq_s_w, - NoItinerary, GPR32>, + NoItinerary, GPR32Opnd>, Defs<[DSPOutFlag20]>; // Precision reduce/expand class PRECRQ_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq.qb.ph", int_mips_precrq_qb_ph, - NoItinerary, DSPR, DSPR>; + NoItinerary, DSPROpnd, DSPROpnd>; class PRECRQ_PH_W_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq.ph.w", int_mips_precrq_ph_w, - NoItinerary, DSPR, GPR32>; + NoItinerary, DSPROpnd, GPR32Opnd>; class PRECRQ_RS_PH_W_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq_rs.ph.w", int_mips_precrq_rs_ph_w, - NoItinerary, DSPR, - GPR32>, + NoItinerary, DSPROpnd, + GPR32Opnd>, Defs<[DSPOutFlag22]>; class PRECRQU_S_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrqu_s.qb.ph", int_mips_precrqu_s_qb_ph, - NoItinerary, DSPR, - DSPR>, + NoItinerary, DSPROpnd, + DSPROpnd>, Defs<[DSPOutFlag22]>; class PRECEQ_W_PHL_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceq.w.phl", int_mips_preceq_w_phl, - NoItinerary, GPR32, DSPR>; + NoItinerary, GPR32Opnd, DSPROpnd>; class PRECEQ_W_PHR_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceq.w.phr", int_mips_preceq_w_phr, - NoItinerary, GPR32, DSPR>; + NoItinerary, GPR32Opnd, DSPROpnd>; class PRECEQU_PH_QBL_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbl", int_mips_precequ_ph_qbl, - NoItinerary, DSPR>; + NoItinerary, DSPROpnd>; class PRECEQU_PH_QBR_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbr", int_mips_precequ_ph_qbr, - NoItinerary, DSPR>; + NoItinerary, DSPROpnd>; class PRECEQU_PH_QBLA_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbla", int_mips_precequ_ph_qbla, - NoItinerary, DSPR>; + NoItinerary, DSPROpnd>; class PRECEQU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbra", int_mips_precequ_ph_qbra, - NoItinerary, DSPR>; + NoItinerary, DSPROpnd>; class PRECEU_PH_QBL_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbl", int_mips_preceu_ph_qbl, - NoItinerary, DSPR>; + NoItinerary, DSPROpnd>; class PRECEU_PH_QBR_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbr", int_mips_preceu_ph_qbr, - NoItinerary, DSPR>; + NoItinerary, DSPROpnd>; class PRECEU_PH_QBLA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbla", int_mips_preceu_ph_qbla, - NoItinerary, DSPR>; + NoItinerary, DSPROpnd>; class PRECEU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbra", int_mips_preceu_ph_qbra, - NoItinerary, DSPR>; + NoItinerary, DSPROpnd>; // Shift class SHLL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shll.qb", null_frag, immZExt3, - NoItinerary, DSPR>, + NoItinerary, DSPROpnd>, Defs<[DSPOutFlag22]>; class SHLLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shllv.qb", int_mips_shll_qb, - NoItinerary, DSPR>, + NoItinerary, DSPROpnd>, Defs<[DSPOutFlag22]>; class SHRL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shrl.qb", null_frag, immZExt3, - NoItinerary, DSPR>; + NoItinerary, DSPROpnd>; class SHRLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.qb", int_mips_shrl_qb, - NoItinerary, DSPR>; + NoItinerary, DSPROpnd>; class SHLL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll.ph", null_frag, immZExt4, - NoItinerary, DSPR>, + NoItinerary, DSPROpnd>, Defs<[DSPOutFlag22]>; class SHLLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv.ph", int_mips_shll_ph, - NoItinerary, DSPR>, + NoItinerary, DSPROpnd>, Defs<[DSPOutFlag22]>; class SHLL_S_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.ph", int_mips_shll_s_ph, - immZExt4, NoItinerary, DSPR>, + immZExt4, NoItinerary, DSPROpnd>, Defs<[DSPOutFlag22]>; class SHLLV_S_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.ph", int_mips_shll_s_ph, - NoItinerary, DSPR>, + NoItinerary, DSPROpnd>, Defs<[DSPOutFlag22]>; class SHRA_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra.ph", null_frag, immZExt4, - NoItinerary, DSPR>; + NoItinerary, DSPROpnd>; class SHRAV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav.ph", int_mips_shra_ph, - NoItinerary, DSPR>; + NoItinerary, DSPROpnd>; class SHRA_R_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.ph", int_mips_shra_r_ph, - immZExt4, NoItinerary, DSPR>; + immZExt4, NoItinerary, DSPROpnd>; class SHRAV_R_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.ph", int_mips_shra_r_ph, - NoItinerary, DSPR>; + NoItinerary, DSPROpnd>; class SHLL_S_W_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.w", int_mips_shll_s_w, - immZExt5, NoItinerary, GPR32>, + immZExt5, NoItinerary, GPR32Opnd>, Defs<[DSPOutFlag22]>; class SHLLV_S_W_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.w", int_mips_shll_s_w, - NoItinerary, GPR32>, + NoItinerary, GPR32Opnd>, Defs<[DSPOutFlag22]>; class SHRA_R_W_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.w", int_mips_shra_r_w, - immZExt5, NoItinerary, GPR32>; + immZExt5, NoItinerary, GPR32Opnd>; class SHRAV_R_W_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.w", int_mips_shra_r_w, - NoItinerary, GPR32>; + NoItinerary, GPR32Opnd>; // Multiplication class MULEU_S_PH_QBL_DESC : ADDU_QB_DESC_BASE<"muleu_s.ph.qbl", int_mips_muleu_s_ph_qbl, - NoItinerary, DSPR, DSPR>, + NoItinerary, DSPROpnd, DSPROpnd>, Defs<[DSPOutFlag21]>; class MULEU_S_PH_QBR_DESC : ADDU_QB_DESC_BASE<"muleu_s.ph.qbr", int_mips_muleu_s_ph_qbr, - NoItinerary, DSPR, DSPR>, + NoItinerary, DSPROpnd, DSPROpnd>, Defs<[DSPOutFlag21]>; class MULEQ_S_W_PHL_DESC : ADDU_QB_DESC_BASE<"muleq_s.w.phl", int_mips_muleq_s_w_phl, - NoItinerary, GPR32, DSPR>, + NoItinerary, GPR32Opnd, DSPROpnd>, IsCommutable, Defs<[DSPOutFlag21]>; class MULEQ_S_W_PHR_DESC : ADDU_QB_DESC_BASE<"muleq_s.w.phr", int_mips_muleq_s_w_phr, - NoItinerary, GPR32, DSPR>, + NoItinerary, GPR32Opnd, DSPROpnd>, IsCommutable, Defs<[DSPOutFlag21]>; class MULQ_RS_PH_DESC : ADDU_QB_DESC_BASE<"mulq_rs.ph", int_mips_mulq_rs_ph, - NoItinerary, DSPR, DSPR>, + NoItinerary, DSPROpnd, DSPROpnd>, IsCommutable, Defs<[DSPOutFlag21]>; class MULSAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsaq_s.w.ph", @@ -737,10 +737,10 @@ class MAQ_SA_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phr", MipsMAQ_SA_W_PHR>, Defs<[DSPOutFlag16_19]>; // Move from/to hi/lo. -class MFHI_DESC : MFHI_DESC_BASE<"mfhi", HI32DSP, NoItinerary>; -class MFLO_DESC : MFHI_DESC_BASE<"mflo", LO32DSP, NoItinerary>; -class MTHI_DESC : MTHI_DESC_BASE<"mthi", HI32DSP, NoItinerary>; -class MTLO_DESC : MTHI_DESC_BASE<"mtlo", LO32DSP, NoItinerary>; +class MFHI_DESC : MFHI_DESC_BASE<"mfhi", HI32DSPOpnd, NoItinerary>; +class MFLO_DESC : MFHI_DESC_BASE<"mflo", LO32DSPOpnd, NoItinerary>; +class MTHI_DESC : MTHI_DESC_BASE<"mthi", HI32DSPOpnd, NoItinerary>; +class MTLO_DESC : MTHI_DESC_BASE<"mtlo", LO32DSPOpnd, NoItinerary>; // Dot product with accumulate/subtract class DPAU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbl", MipsDPAU_H_QBL>; @@ -773,67 +773,67 @@ class MSUBU_DSP_DESC : MADD_DESC_BASE<"msubu", MipsMSubu, NoItinerary>; // Comparison class CMPU_EQ_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.eq.qb", int_mips_cmpu_eq_qb, NoItinerary, - DSPR>, + DSPROpnd>, IsCommutable, Defs<[DSPCCond]>; class CMPU_LT_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.lt.qb", int_mips_cmpu_lt_qb, NoItinerary, - DSPR>, Defs<[DSPCCond]>; + DSPROpnd>, Defs<[DSPCCond]>; class CMPU_LE_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.le.qb", int_mips_cmpu_le_qb, NoItinerary, - DSPR>, Defs<[DSPCCond]>; + DSPROpnd>, Defs<[DSPCCond]>; class CMPGU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.eq.qb", int_mips_cmpgu_eq_qb, - NoItinerary, GPR32, DSPR>, + NoItinerary, GPR32Opnd, DSPROpnd>, IsCommutable; class CMPGU_LT_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.lt.qb", int_mips_cmpgu_lt_qb, - NoItinerary, GPR32, DSPR>; + NoItinerary, GPR32Opnd, DSPROpnd>; class CMPGU_LE_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.le.qb", int_mips_cmpgu_le_qb, - NoItinerary, GPR32, DSPR>; + NoItinerary, GPR32Opnd, DSPROpnd>; class CMP_EQ_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.eq.ph", int_mips_cmp_eq_ph, - NoItinerary, DSPR>, + NoItinerary, DSPROpnd>, IsCommutable, Defs<[DSPCCond]>; class CMP_LT_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.lt.ph", int_mips_cmp_lt_ph, - NoItinerary, DSPR>, + NoItinerary, DSPROpnd>, Defs<[DSPCCond]>; class CMP_LE_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.le.ph", int_mips_cmp_le_ph, - NoItinerary, DSPR>, + NoItinerary, DSPROpnd>, Defs<[DSPCCond]>; // Misc class BITREV_DESC : ABSQ_S_PH_R2_DESC_BASE<"bitrev", int_mips_bitrev, - NoItinerary, GPR32>; + NoItinerary, GPR32Opnd>; class PACKRL_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"packrl.ph", int_mips_packrl_ph, - NoItinerary, DSPR, DSPR>; + NoItinerary, DSPROpnd, DSPROpnd>; class REPL_QB_DESC : REPL_DESC_BASE<"repl.qb", int_mips_repl_qb, immZExt8, - NoItinerary, DSPR>; + NoItinerary, DSPROpnd>; class REPL_PH_DESC : REPL_DESC_BASE<"repl.ph", int_mips_repl_ph, immZExt10, - NoItinerary, DSPR>; + NoItinerary, DSPROpnd>; class REPLV_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.qb", int_mips_repl_qb, - NoItinerary, DSPR, GPR32>; + NoItinerary, DSPROpnd, GPR32Opnd>; class REPLV_PH_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.ph", int_mips_repl_ph, - NoItinerary, DSPR, GPR32>; + NoItinerary, DSPROpnd, GPR32Opnd>; class PICK_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"pick.qb", int_mips_pick_qb, - NoItinerary, DSPR, DSPR>, + NoItinerary, DSPROpnd, DSPROpnd>, Uses<[DSPCCond]>; class PICK_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"pick.ph", int_mips_pick_ph, - NoItinerary, DSPR, DSPR>, + NoItinerary, DSPROpnd, DSPROpnd>, Uses<[DSPCCond]>; class LWX_DESC : LX_DESC_BASE<"lwx", int_mips_lwx, NoItinerary>; @@ -905,97 +905,97 @@ class INSV_DESC : INSV_DESC_BASE<"insv", int_mips_insv, NoItinerary>, // MIPS DSP Rev 2 // Addition/subtraction class ADDU_PH_DESC : ADDU_QB_DESC_BASE<"addu.ph", int_mips_addu_ph, NoItinerary, - DSPR, DSPR>, IsCommutable, + DSPROpnd, DSPROpnd>, IsCommutable, Defs<[DSPOutFlag20]>; class ADDU_S_PH_DESC : ADDU_QB_DESC_BASE<"addu_s.ph", int_mips_addu_s_ph, - NoItinerary, DSPR, DSPR>, + NoItinerary, DSPROpnd, DSPROpnd>, IsCommutable, Defs<[DSPOutFlag20]>; class SUBU_PH_DESC : ADDU_QB_DESC_BASE<"subu.ph", int_mips_subu_ph, NoItinerary, - DSPR, DSPR>, + DSPROpnd, DSPROpnd>, Defs<[DSPOutFlag20]>; class SUBU_S_PH_DESC : ADDU_QB_DESC_BASE<"subu_s.ph", int_mips_subu_s_ph, - NoItinerary, DSPR, DSPR>, + NoItinerary, DSPROpnd, DSPROpnd>, Defs<[DSPOutFlag20]>; class ADDUH_QB_DESC : ADDUH_QB_DESC_BASE<"adduh.qb", int_mips_adduh_qb, - NoItinerary, DSPR>, IsCommutable; + NoItinerary, DSPROpnd>, IsCommutable; class ADDUH_R_QB_DESC : ADDUH_QB_DESC_BASE<"adduh_r.qb", int_mips_adduh_r_qb, - NoItinerary, DSPR>, IsCommutable; + NoItinerary, DSPROpnd>, IsCommutable; class SUBUH_QB_DESC : ADDUH_QB_DESC_BASE<"subuh.qb", int_mips_subuh_qb, - NoItinerary, DSPR>; + NoItinerary, DSPROpnd>; class SUBUH_R_QB_DESC : ADDUH_QB_DESC_BASE<"subuh_r.qb", int_mips_subuh_r_qb, - NoItinerary, DSPR>; + NoItinerary, DSPROpnd>; class ADDQH_PH_DESC : ADDUH_QB_DESC_BASE<"addqh.ph", int_mips_addqh_ph, - NoItinerary, DSPR>, IsCommutable; + NoItinerary, DSPROpnd>, IsCommutable; class ADDQH_R_PH_DESC : ADDUH_QB_DESC_BASE<"addqh_r.ph", int_mips_addqh_r_ph, - NoItinerary, DSPR>, IsCommutable; + NoItinerary, DSPROpnd>, IsCommutable; class SUBQH_PH_DESC : ADDUH_QB_DESC_BASE<"subqh.ph", int_mips_subqh_ph, - NoItinerary, DSPR>; + NoItinerary, DSPROpnd>; class SUBQH_R_PH_DESC : ADDUH_QB_DESC_BASE<"subqh_r.ph", int_mips_subqh_r_ph, - NoItinerary, DSPR>; + NoItinerary, DSPROpnd>; class ADDQH_W_DESC : ADDUH_QB_DESC_BASE<"addqh.w", int_mips_addqh_w, - NoItinerary, GPR32>, IsCommutable; + NoItinerary, GPR32Opnd>, IsCommutable; class ADDQH_R_W_DESC : ADDUH_QB_DESC_BASE<"addqh_r.w", int_mips_addqh_r_w, - NoItinerary, GPR32>, IsCommutable; + NoItinerary, GPR32Opnd>, IsCommutable; class SUBQH_W_DESC : ADDUH_QB_DESC_BASE<"subqh.w", int_mips_subqh_w, - NoItinerary, GPR32>; + NoItinerary, GPR32Opnd>; class SUBQH_R_W_DESC : ADDUH_QB_DESC_BASE<"subqh_r.w", int_mips_subqh_r_w, - NoItinerary, GPR32>; + NoItinerary, GPR32Opnd>; // Comparison class CMPGDU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.eq.qb", int_mips_cmpgdu_eq_qb, - NoItinerary, GPR32, DSPR>, + NoItinerary, GPR32Opnd, DSPROpnd>, IsCommutable, Defs<[DSPCCond]>; class CMPGDU_LT_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.lt.qb", int_mips_cmpgdu_lt_qb, - NoItinerary, GPR32, DSPR>, + NoItinerary, GPR32Opnd, DSPROpnd>, Defs<[DSPCCond]>; class CMPGDU_LE_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.le.qb", int_mips_cmpgdu_le_qb, - NoItinerary, GPR32, DSPR>, + NoItinerary, GPR32Opnd, DSPROpnd>, Defs<[DSPCCond]>; // Absolute class ABSQ_S_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.qb", int_mips_absq_s_qb, - NoItinerary, DSPR>, + NoItinerary, DSPROpnd>, Defs<[DSPOutFlag20]>; // Multiplication class MUL_PH_DESC : ADDUH_QB_DESC_BASE<"mul.ph", null_frag, NoItinerary, - DSPR>, IsCommutable, + DSPROpnd>, IsCommutable, Defs<[DSPOutFlag21]>; class MUL_S_PH_DESC : ADDUH_QB_DESC_BASE<"mul_s.ph", int_mips_mul_s_ph, - NoItinerary, DSPR>, IsCommutable, + NoItinerary, DSPROpnd>, IsCommutable, Defs<[DSPOutFlag21]>; class MULQ_S_W_DESC : ADDUH_QB_DESC_BASE<"mulq_s.w", int_mips_mulq_s_w, - NoItinerary, GPR32>, IsCommutable, + NoItinerary, GPR32Opnd>, IsCommutable, Defs<[DSPOutFlag21]>; class MULQ_RS_W_DESC : ADDUH_QB_DESC_BASE<"mulq_rs.w", int_mips_mulq_rs_w, - NoItinerary, GPR32>, IsCommutable, + NoItinerary, GPR32Opnd>, IsCommutable, Defs<[DSPOutFlag21]>; class MULQ_S_PH_DESC : ADDU_QB_DESC_BASE<"mulq_s.ph", int_mips_mulq_s_ph, - NoItinerary, DSPR, DSPR>, + NoItinerary, DSPROpnd, DSPROpnd>, IsCommutable, Defs<[DSPOutFlag21]>; // Dot product with accumulate/subtract @@ -1026,36 +1026,36 @@ class MULSA_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsa.w.ph", MipsMULSA_W_PH>; // Precision reduce/expand class PRECR_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precr.qb.ph", int_mips_precr_qb_ph, - NoItinerary, DSPR, DSPR>; + NoItinerary, DSPROpnd, DSPROpnd>; class PRECR_SRA_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra.ph.w", int_mips_precr_sra_ph_w, - NoItinerary, DSPR, - GPR32>; + NoItinerary, DSPROpnd, + GPR32Opnd>; class PRECR_SRA_R_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra_r.ph.w", int_mips_precr_sra_r_ph_w, - NoItinerary, DSPR, - GPR32>; + NoItinerary, DSPROpnd, + GPR32Opnd>; // Shift class SHRA_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra.qb", null_frag, immZExt3, - NoItinerary, DSPR>; + NoItinerary, DSPROpnd>; class SHRAV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav.qb", int_mips_shra_qb, - NoItinerary, DSPR>; + NoItinerary, DSPROpnd>; class SHRA_R_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.qb", int_mips_shra_r_qb, - immZExt3, NoItinerary, DSPR>; + immZExt3, NoItinerary, DSPROpnd>; class SHRAV_R_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.qb", int_mips_shra_r_qb, - NoItinerary, DSPR>; + NoItinerary, DSPROpnd>; class SHRL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shrl.ph", null_frag, immZExt4, - NoItinerary, DSPR>; + NoItinerary, DSPROpnd>; class SHRLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.ph", int_mips_shrl_ph, - NoItinerary, DSPR>; + NoItinerary, DSPROpnd>; // Misc class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, immZExt5, @@ -1252,12 +1252,12 @@ let isPseudo = 1, isCodeGenOnly = 1 in { // Pseudo CMP and PICK instructions. class PseudoCMP : - PseudoDSP<(outs DSPCC:$cmp), (ins DSPR:$rs, DSPR:$rt), []>, - PseudoInstExpansion<(RealInst DSPR:$rs, DSPR:$rt)>, NeverHasSideEffects; + PseudoDSP<(outs DSPCC:$cmp), (ins DSPROpnd:$rs, DSPROpnd:$rt), []>, + PseudoInstExpansion<(RealInst DSPROpnd:$rs, DSPROpnd:$rt)>, NeverHasSideEffects; class PseudoPICK : - PseudoDSP<(outs DSPR:$rd), (ins DSPCC:$cmp, DSPR:$rs, DSPR:$rt), []>, - PseudoInstExpansion<(RealInst DSPR:$rd, DSPR:$rs, DSPR:$rt)>, + PseudoDSP<(outs DSPROpnd:$rd), (ins DSPCC:$cmp, DSPROpnd:$rs, DSPROpnd:$rt), []>, + PseudoInstExpansion<(RealInst DSPROpnd:$rd, DSPROpnd:$rs, DSPROpnd:$rt)>, NeverHasSideEffects; def PseudoCMP_EQ_PH : PseudoCMP; diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index 62a594f..176eb64 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -392,6 +392,16 @@ def ACC64DSPAsmOperand : MipsAsmRegOperand { let ParserMethod = "parseACC64DSP"; } +def LO32DSPAsmOperand : MipsAsmRegOperand { + let Name = "LO32DSPAsm"; + let ParserMethod = "parseLO32DSP"; +} + +def HI32DSPAsmOperand : MipsAsmRegOperand { + let Name = "HI32DSPAsm"; + let ParserMethod = "parseHI32DSP"; +} + def CCRAsmOperand : MipsAsmRegOperand { let Name = "CCRAsm"; let ParserMethod = "parseCCRRegs"; @@ -458,6 +468,14 @@ def FCCRegsOpnd : RegisterOperand { let ParserMatchClass = FCCRegsAsmOperand; } +def LO32DSPOpnd : RegisterOperand { + let ParserMatchClass = LO32DSPAsmOperand; +} + +def HI32DSPOpnd : RegisterOperand { + let ParserMatchClass = HI32DSPAsmOperand; +} + def ACC64DSPOpnd : RegisterOperand { let ParserMatchClass = ACC64DSPAsmOperand; } -- cgit v1.1 From 899ee589f5182a35495f068ae15b5f2b5ff4ef8a Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Wed, 14 Aug 2013 01:15:52 +0000 Subject: [mips] Fix bug in parsing accumulator registers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188344 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 28ad090..c9bc29a 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -861,8 +861,8 @@ int MipsAsmParser::matchFCCRegisterName(StringRef Name) { int MipsAsmParser::matchACRegisterName(StringRef Name) { - if (Name.startswith("acc")) { - StringRef NumString = Name.substr(3); + if (Name.startswith("ac")) { + StringRef NumString = Name.substr(2); unsigned IntVal; if (NumString.getAsInteger(10, IntVal)) return -1; // This is not an integer. -- cgit v1.1 From cc60bbc739bf4459eae7f2a6f0feb5534dd99f46 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 14 Aug 2013 05:58:39 +0000 Subject: Replace EVT with MVT in many of the shuffle lowering functions. Keeps compiler from generating unneeded checks and handling for extended types. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188361 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 83 +++++++++++++++++++++----------------- 1 file changed, 46 insertions(+), 37 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 9625e4b..80ef332 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3532,7 +3532,7 @@ static bool isSequentialOrUndefInRange(ArrayRef Mask, /// isPSHUFDMask - Return true if the node specifies a shuffle of elements that /// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference /// the second operand. -static bool isPSHUFDMask(ArrayRef Mask, EVT VT) { +static bool isPSHUFDMask(ArrayRef Mask, MVT VT) { if (VT == MVT::v4f32 || VT == MVT::v4i32 ) return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4); if (VT == MVT::v2f64 || VT == MVT::v2i64) @@ -3542,7 +3542,7 @@ static bool isPSHUFDMask(ArrayRef Mask, EVT VT) { /// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that /// is suitable for input to PSHUFHW. -static bool isPSHUFHWMask(ArrayRef Mask, EVT VT, bool HasInt256) { +static bool isPSHUFHWMask(ArrayRef Mask, MVT VT, bool HasInt256) { if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16)) return false; @@ -3571,7 +3571,7 @@ static bool isPSHUFHWMask(ArrayRef Mask, EVT VT, bool HasInt256) { /// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that /// is suitable for input to PSHUFLW. -static bool isPSHUFLWMask(ArrayRef Mask, EVT VT, bool HasInt256) { +static bool isPSHUFLWMask(ArrayRef Mask, MVT VT, bool HasInt256) { if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16)) return false; @@ -3600,7 +3600,7 @@ static bool isPSHUFLWMask(ArrayRef Mask, EVT VT, bool HasInt256) { /// isPALIGNRMask - Return true if the node specifies a shuffle of elements that /// is suitable for input to PALIGNR. -static bool isPALIGNRMask(ArrayRef Mask, EVT VT, +static bool isPALIGNRMask(ArrayRef Mask, MVT VT, const X86Subtarget *Subtarget) { if ((VT.is128BitVector() && !Subtarget->hasSSSE3()) || (VT.is256BitVector() && !Subtarget->hasInt256())) @@ -3690,7 +3690,7 @@ static void CommuteVectorShuffleMask(SmallVectorImpl &Mask, /// specifies a shuffle of elements that is suitable for input to 128/256-bit /// SHUFPS and SHUFPD. If Commuted is true, then it checks for sources to be /// reverse of what x86 shuffles want. -static bool isSHUFPMask(ArrayRef Mask, EVT VT, bool HasFp256, +static bool isSHUFPMask(ArrayRef Mask, MVT VT, bool HasFp256, bool Commuted = false) { if (!HasFp256 && VT.is256BitVector()) return false; @@ -3743,7 +3743,7 @@ static bool isSHUFPMask(ArrayRef Mask, EVT VT, bool HasFp256, /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVHLPS. -static bool isMOVHLPSMask(ArrayRef Mask, EVT VT) { +static bool isMOVHLPSMask(ArrayRef Mask, MVT VT) { if (!VT.is128BitVector()) return false; @@ -3762,7 +3762,7 @@ static bool isMOVHLPSMask(ArrayRef Mask, EVT VT) { /// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, /// <2, 3, 2, 3> -static bool isMOVHLPS_v_undef_Mask(ArrayRef Mask, EVT VT) { +static bool isMOVHLPS_v_undef_Mask(ArrayRef Mask, MVT VT) { if (!VT.is128BitVector()) return false; @@ -3779,7 +3779,7 @@ static bool isMOVHLPS_v_undef_Mask(ArrayRef Mask, EVT VT) { /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. -static bool isMOVLPMask(ArrayRef Mask, EVT VT) { +static bool isMOVLPMask(ArrayRef Mask, MVT VT) { if (!VT.is128BitVector()) return false; @@ -3801,7 +3801,7 @@ static bool isMOVLPMask(ArrayRef Mask, EVT VT) { /// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVLHPS. -static bool isMOVLHPSMask(ArrayRef Mask, EVT VT) { +static bool isMOVLHPSMask(ArrayRef Mask, MVT VT) { if (!VT.is128BitVector()) return false; @@ -3870,7 +3870,7 @@ SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp, /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. -static bool isUNPCKLMask(ArrayRef Mask, EVT VT, +static bool isUNPCKLMask(ArrayRef Mask, MVT VT, bool HasInt256, bool V2IsSplat = false) { if (VT.is512BitVector()) @@ -3909,7 +3909,7 @@ static bool isUNPCKLMask(ArrayRef Mask, EVT VT, /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKH. -static bool isUNPCKHMask(ArrayRef Mask, EVT VT, +static bool isUNPCKHMask(ArrayRef Mask, MVT VT, bool HasInt256, bool V2IsSplat = false) { unsigned NumElts = VT.getVectorNumElements(); @@ -3948,7 +3948,7 @@ static bool isUNPCKHMask(ArrayRef Mask, EVT VT, /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, /// <0, 0, 1, 1> -static bool isUNPCKL_v_undef_Mask(ArrayRef Mask, EVT VT, bool HasInt256) { +static bool isUNPCKL_v_undef_Mask(ArrayRef Mask, MVT VT, bool HasInt256) { unsigned NumElts = VT.getVectorNumElements(); bool Is256BitVec = VT.is256BitVector(); @@ -3991,7 +3991,7 @@ static bool isUNPCKL_v_undef_Mask(ArrayRef Mask, EVT VT, bool HasInt256) { /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, /// <2, 2, 3, 3> -static bool isUNPCKH_v_undef_Mask(ArrayRef Mask, EVT VT, bool HasInt256) { +static bool isUNPCKH_v_undef_Mask(ArrayRef Mask, MVT VT, bool HasInt256) { unsigned NumElts = VT.getVectorNumElements(); if (VT.is512BitVector()) @@ -4178,7 +4178,7 @@ static bool isVPERMILPMask(ArrayRef Mask, EVT VT, bool HasFp256) { /// isCommutedMOVLMask - Returns true if the shuffle mask is except the reverse /// of what x86 movss want. X86 movs requires the lowest element to be lowest /// element of vector 2 and the other elements to come from vector 1 in order. -static bool isCommutedMOVLMask(ArrayRef Mask, EVT VT, +static bool isCommutedMOVLMask(ArrayRef Mask, MVT VT, bool V2IsSplat = false, bool V2IsUndef = false) { if (!VT.is128BitVector()) return false; @@ -4202,7 +4202,7 @@ static bool isCommutedMOVLMask(ArrayRef Mask, EVT VT, /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSHDUP. /// Masks to match: <1, 1, 3, 3> or <1, 1, 3, 3, 5, 5, 7, 7> -static bool isMOVSHDUPMask(ArrayRef Mask, EVT VT, +static bool isMOVSHDUPMask(ArrayRef Mask, MVT VT, const X86Subtarget *Subtarget) { if (!Subtarget->hasSSE3()) return false; @@ -4226,7 +4226,7 @@ static bool isMOVSHDUPMask(ArrayRef Mask, EVT VT, /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSLDUP. /// Masks to match: <0, 0, 2, 2> or <0, 0, 2, 2, 4, 4, 6, 6> -static bool isMOVSLDUPMask(ArrayRef Mask, EVT VT, +static bool isMOVSLDUPMask(ArrayRef Mask, MVT VT, const X86Subtarget *Subtarget) { if (!Subtarget->hasSSE3()) return false; @@ -4250,7 +4250,7 @@ static bool isMOVSLDUPMask(ArrayRef Mask, EVT VT, /// isMOVDDUPYMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to 256-bit /// version of MOVDDUP. -static bool isMOVDDUPYMask(ArrayRef Mask, EVT VT, bool HasFp256) { +static bool isMOVDDUPYMask(ArrayRef Mask, MVT VT, bool HasFp256) { if (!HasFp256 || !VT.is256BitVector()) return false; @@ -4270,7 +4270,7 @@ static bool isMOVDDUPYMask(ArrayRef Mask, EVT VT, bool HasFp256) { /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to 128-bit /// version of MOVDDUP. -static bool isMOVDDUPMask(ArrayRef Mask, EVT VT) { +static bool isMOVDDUPMask(ArrayRef Mask, MVT VT) { if (!VT.is128BitVector()) return false; @@ -4534,7 +4534,7 @@ static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp, /// match movhlps. The lower half elements should come from upper half of /// V1 (and in order), and the upper half elements should come from the upper /// half of V2 (and in order). -static bool ShouldXformToMOVHLPS(ArrayRef Mask, EVT VT) { +static bool ShouldXformToMOVHLPS(ArrayRef Mask, MVT VT) { if (!VT.is128BitVector()) return false; if (VT.getVectorNumElements() != 4) @@ -4591,7 +4591,7 @@ static bool WillBeConstantPoolLoad(SDNode *N) { /// half of V2 (and in order). And since V1 will become the source of the /// MOVLP, it must be either a vector load or a scalar load to vector. static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, - ArrayRef Mask, EVT VT) { + ArrayRef Mask, MVT VT) { if (!VT.is128BitVector()) return false; @@ -13560,37 +13560,46 @@ bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const { bool X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, EVT VT) const { + if (!VT.isSimple()) + return false; + + MVT SVT = VT.getSimpleVT(); + // Very little shuffling can be done for 64-bit vectors right now. if (VT.getSizeInBits() == 64) return false; // FIXME: pshufb, blends, shifts. - return (VT.getVectorNumElements() == 2 || + return (SVT.getVectorNumElements() == 2 || ShuffleVectorSDNode::isSplatMask(&M[0], VT) || - isMOVLMask(M, VT) || - isSHUFPMask(M, VT, Subtarget->hasFp256()) || - isPSHUFDMask(M, VT) || - isPSHUFHWMask(M, VT, Subtarget->hasInt256()) || - isPSHUFLWMask(M, VT, Subtarget->hasInt256()) || - isPALIGNRMask(M, VT, Subtarget) || - isUNPCKLMask(M, VT, Subtarget->hasInt256()) || - isUNPCKHMask(M, VT, Subtarget->hasInt256()) || - isUNPCKL_v_undef_Mask(M, VT, Subtarget->hasInt256()) || - isUNPCKH_v_undef_Mask(M, VT, Subtarget->hasInt256())); + isMOVLMask(M, SVT) || + isSHUFPMask(M, SVT, Subtarget->hasFp256()) || + isPSHUFDMask(M, SVT) || + isPSHUFHWMask(M, SVT, Subtarget->hasInt256()) || + isPSHUFLWMask(M, SVT, Subtarget->hasInt256()) || + isPALIGNRMask(M, SVT, Subtarget) || + isUNPCKLMask(M, SVT, Subtarget->hasInt256()) || + isUNPCKHMask(M, SVT, Subtarget->hasInt256()) || + isUNPCKL_v_undef_Mask(M, SVT, Subtarget->hasInt256()) || + isUNPCKH_v_undef_Mask(M, SVT, Subtarget->hasInt256())); } bool X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl &Mask, EVT VT) const { - unsigned NumElts = VT.getVectorNumElements(); + if (!VT.isSimple()) + return false; + + MVT SVT = VT.getSimpleVT(); + unsigned NumElts = SVT.getVectorNumElements(); // FIXME: This collection of masks seems suspect. if (NumElts == 2) return true; - if (NumElts == 4 && VT.is128BitVector()) { - return (isMOVLMask(Mask, VT) || - isCommutedMOVLMask(Mask, VT, true) || - isSHUFPMask(Mask, VT, Subtarget->hasFp256()) || - isSHUFPMask(Mask, VT, Subtarget->hasFp256(), /* Commuted */ true)); + if (NumElts == 4 && SVT.is128BitVector()) { + return (isMOVLMask(Mask, SVT) || + isCommutedMOVLMask(Mask, SVT, true) || + isSHUFPMask(Mask, SVT, Subtarget->hasFp256()) || + isSHUFPMask(Mask, SVT, Subtarget->hasFp256(), /* Commuted */ true)); } return false; } -- cgit v1.1 From d36b53e764b742c2ba16ae2b2517646694e05bff Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 14 Aug 2013 06:21:10 +0000 Subject: Replace EVT with MVT in isVectorShift. Keeps compiler from generating unneeded checks and handling for extended types. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188362 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 80ef332..f73689d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5076,7 +5076,8 @@ bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp, /// logical left shift of a vector. static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { - unsigned NumElems = SVOp->getValueType(0).getVectorNumElements(); + unsigned NumElems = + SVOp->getValueType(0).getSimpleVT().getVectorNumElements(); unsigned NumZeros = getNumOfConsecutiveZeros( SVOp, NumElems, false /* check zeros from right */, DAG, SVOp->getMaskElt(0)); @@ -5110,7 +5111,8 @@ static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, /// logical left shift of a vector. static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { - unsigned NumElems = SVOp->getValueType(0).getVectorNumElements(); + unsigned NumElems = + SVOp->getValueType(0).getSimpleVT().getVectorNumElements(); unsigned NumZeros = getNumOfConsecutiveZeros( SVOp, NumElems, true /* check zeros from left */, DAG, NumElems - SVOp->getMaskElt(NumElems - 1) - 1); @@ -5146,7 +5148,7 @@ static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { // Although the logic below support any bitwidth size, there are no // shift instructions which handle more than 128-bit vectors. - if (!SVOp->getValueType(0).is128BitVector()) + if (!SVOp->getValueType(0).getSimpleVT().is128BitVector()) return false; if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) || -- cgit v1.1 From f3d98a882e1cc426001adafafa440b69143d83e8 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 14 Aug 2013 07:04:42 +0000 Subject: Use MVT in more lowering code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188363 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f73689d..6253b33 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4789,7 +4789,7 @@ static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) { /// getLegalSplat - Generate a legal splat with supported x86 shuffles static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) { - EVT VT = V.getValueType(); + MVT VT = V.getValueType().getSimpleVT(); SDLoc dl(V); if (VT.is128BitVector()) { @@ -4815,7 +4815,7 @@ static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) { /// PromoteSplat - Splat is promoted to target supported vector shuffles. static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) { - EVT SrcVT = SV->getValueType(0); + MVT SrcVT = SV->getValueType(0).getSimpleVT(); SDValue V1 = SV->getOperand(0); SDLoc dl(SV); @@ -4838,7 +4838,7 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) { // instruction because the target has no such instruction. Generate shuffles // which repeat i16 and i8 several times until they fit in i32, and then can // be manipulated by target suported shuffles. - EVT EltVT = SrcVT.getVectorElementType(); + MVT EltVT = SrcVT.getVectorElementType(); if (EltVT == MVT::i8 || EltVT == MVT::i16) V1 = PromoteSplati8i16(V1, DAG, EltNo); @@ -4860,7 +4860,7 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, bool IsZero, const X86Subtarget *Subtarget, SelectionDAG &DAG) { - EVT VT = V2.getValueType(); + MVT VT = V2.getValueType().getSimpleVT(); SDValue V1 = IsZero ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT); unsigned NumElems = VT.getVectorNumElements(); @@ -5563,7 +5563,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) const { - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); // Skip if insert_vec_elt is not supported. if (!isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT)) @@ -5639,7 +5639,7 @@ X86TargetLowering::buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); assert((VT.getVectorElementType() == MVT::i1) && (VT.getSizeInBits() <= 16) && "Unexpected type in LowerBUILD_VECTORvXi1!"); -- cgit v1.1 From 158ec07008961c3cb506ebafd7c0201ca2001a33 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 14 Aug 2013 07:34:43 +0000 Subject: Make some helper methods static. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188364 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 52 ++++++++++++++++++++------------------ lib/Target/X86/X86ISelLowering.h | 9 ------- 2 files changed, 27 insertions(+), 34 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6253b33..1a17a04 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5431,8 +5431,8 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl &Elts, /// a scalar load, or a constant. /// The VBROADCAST node is returned when a pattern is found, /// or SDValue() otherwise. -SDValue -X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget, + SelectionDAG &DAG) { if (!Subtarget->hasFp256()) return SDValue(); @@ -5525,7 +5525,8 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const { assert(C && "Invalid constant type"); - SDValue CP = DAG.getConstantPool(C, getPointerTy()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy()); unsigned Alignment = cast(CP)->getAlignment(); Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP, MachinePointerInfo::getConstantPool(), @@ -5561,12 +5562,12 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -SDValue -X86TargetLowering::buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) const { +static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getValueType().getSimpleVT(); // Skip if insert_vec_elt is not supported. - if (!isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT)) + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT)) return SDValue(); SDLoc DL(Op); @@ -5769,7 +5770,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl); } - SDValue Broadcast = LowerVectorBroadcast(Op, DAG); + SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, DAG); if (Broadcast.getNode()) return Broadcast; @@ -6415,10 +6416,10 @@ LowerVECTOR_SHUFFLEv8i16(SDValue Op, const X86Subtarget *Subtarget, // 1. [ssse3] 1 x pshufb // 2. [ssse3] 2 x pshufb + 1 x por // 3. [all] v8i16 shuffle + N x pextrw + rotate + pinsrw -static -SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, - SelectionDAG &DAG, - const X86TargetLowering &TLI) { +static SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, + const X86Subtarget* Subtarget, + SelectionDAG &DAG) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); SDLoc dl(SVOp); @@ -6434,7 +6435,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, // present, fall back to case 3. // If SSSE3, use 1 pshufb instruction per vector with elements in the result. - if (TLI.getSubtarget()->hasSSSE3()) { + if (Subtarget->hasSSSE3()) { SmallVector pshufbMask; // If all result elements are from one input vector, then only translate @@ -7039,8 +7040,8 @@ SDValue getMOVLP(SDValue &Op, SDLoc &dl, SelectionDAG &DAG, bool HasSSE2) { } // Reduce a vector shuffle to zext. -SDValue -X86TargetLowering::LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerVectorIntExtend(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { // PMOVZX is only available from SSE41. if (!Subtarget->hasSSE41()) return SDValue(); @@ -7089,7 +7090,7 @@ X86TargetLowering::LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const { EVT NeVT = EVT::getIntegerVT(*Context, NBits); EVT NVT = EVT::getVectorVT(*Context, NeVT, NumElems >> Shift); - if (!isTypeLegal(NVT)) + if (!DAG.getTargetLoweringInfo().isTypeLegal(NVT)) return SDValue(); // Simplify the operand as it's prepared to be fed into shuffle. @@ -7127,8 +7128,9 @@ X86TargetLowering::LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const { DAG.getNode(X86ISD::VZEXT, DL, NVT, V1)); } -SDValue -X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { +static SDValue +NormalizeVectorShuffle(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { ShuffleVectorSDNode *SVOp = cast(Op); MVT VT = Op.getValueType().getSimpleVT(); SDLoc dl(Op); @@ -7141,13 +7143,13 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { // Handle splat operations if (SVOp->isSplat()) { // Use vbroadcast whenever the splat comes from a foldable load - SDValue Broadcast = LowerVectorBroadcast(Op, DAG); + SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, DAG); if (Broadcast.getNode()) return Broadcast; } // Check integer expanding shuffles. - SDValue NewOp = LowerVectorIntExtend(Op, DAG); + SDValue NewOp = LowerVectorIntExtend(Op, Subtarget, DAG); if (NewOp.getNode()) return NewOp; @@ -7227,7 +7229,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // Normalize the input vectors. Here splats, zeroed vectors, profitable // narrowing and commutation of operands should be handled. The actual code // doesn't include all of those, work in progress... - SDValue NewOp = NormalizeVectorShuffle(Op, DAG); + SDValue NewOp = NormalizeVectorShuffle(Op, Subtarget, DAG); if (NewOp.getNode()) return NewOp; @@ -7485,7 +7487,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { } if (VT == MVT::v16i8) { - SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(SVOp, DAG, *this); + SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(SVOp, Subtarget, DAG); if (NewOp.getNode()) return NewOp; } @@ -9210,8 +9212,8 @@ static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) { // LowerVectorAllZeroTest - Check whether an OR'd tree is PTEST-able. // -SDValue X86TargetLowering::LowerVectorAllZeroTest(SDValue Op, - SelectionDAG &DAG) const { +static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree."); if (!Subtarget->hasSSE41()) @@ -9444,7 +9446,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, case ISD::AND: Opcode = X86ISD::AND; break; case ISD::OR: { if (!NeedTruncation && (X86CC == X86::COND_E || X86CC == X86::COND_NE)) { - SDValue EFLAGS = LowerVectorAllZeroTest(Op, DAG); + SDValue EFLAGS = LowerVectorAllZeroTest(Op, Subtarget, DAG); if (EFLAGS.getNode()) return EFLAGS; } @@ -12688,7 +12690,7 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, // Hopefully, this VECTOR_SHUFFLE is just a VZEXT. if (Op0.getOpcode() == ISD::BITCAST && Op00.getOpcode() == ISD::VECTOR_SHUFFLE) - Tmp1 = LowerVectorIntExtend(Op00, DAG); + Tmp1 = LowerVectorIntExtend(Op00, Subtarget, DAG); if (Tmp1.getNode()) { SDValue Tmp1Op0 = Tmp1.getOperand(0); assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT && diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 7201f7a..89ab428 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -894,15 +894,6 @@ namespace llvm { SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; - // Utility functions to help LowerVECTOR_SHUFFLE & LowerBUILD_VECTOR - SDValue LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const; - SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const; - SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) const; - - SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const; - - SDValue LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const; - virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, -- cgit v1.1 From 8971717313ad72b06940bcc4198ae00c2ecaa385 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 14 Aug 2013 07:35:18 +0000 Subject: Remove tab characters. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188365 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1a17a04..aee4d38 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5679,7 +5679,7 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { SDValue FullMask = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, DAG.getConstant(Immediate, MVT::i16)); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, FullMask, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0)); } if (!isSplatVector(Op.getNode())) -- cgit v1.1 From 35e194fbadb5b89406b176e44e67f78b514b0cb1 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 14 Aug 2013 07:53:41 +0000 Subject: Make more helper methods into static functions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188366 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 30 +++++++++++++++++------------- lib/Target/X86/X86ISelLowering.h | 4 ---- 2 files changed, 17 insertions(+), 17 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index aee4d38..f234c86 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -12087,7 +12087,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo); } -SDValue X86TargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); EVT EltTy = VT.getVectorElementType(); unsigned NumElts = VT.getVectorNumElements(); @@ -12434,7 +12434,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, return SDValue(); } -SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, + SelectionDAG &DAG) { EVT VT = Op.getValueType(); SDLoc dl(Op); @@ -12792,9 +12793,10 @@ static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget, return DAG.getMergeValues(Ops, array_lengthof(Ops), dl); } -SDValue X86TargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const { - EVT SrcVT = Op.getOperand(0).getValueType(); - EVT DstVT = Op.getValueType(); +static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + MVT SrcVT = Op.getOperand(0).getValueType().getSimpleVT(); + MVT DstVT = Op.getValueType().getSimpleVT(); assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() && Subtarget->hasMMX() && "Unexpected custom BITCAST"); assert((DstVT == MVT::i64 || @@ -12879,7 +12881,8 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { Op.getOperand(1), Op.getOperand(2)); } -SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { assert(Subtarget->isTargetDarwin() && Subtarget->is64Bit()); // For MacOSX, we want to call an alternative entry point: __sincos_stret, @@ -12890,8 +12893,8 @@ SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { EVT ArgVT = Arg.getValueType(); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - ArgListTy Args; - ArgListEntry Entry; + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; Entry.Node = Arg; Entry.Ty = ArgTy; @@ -12904,7 +12907,8 @@ SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { // the small struct {f32, f32} is returned in (eax, edx). For f64, // the results are returned via SRet in memory. const char *LibcallName = isF64 ? "__sincos_stret" : "__sincosf_stret"; - SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Callee = DAG.getExternalSymbol(LibcallName, TLI.getPointerTy()); Type *RetTy = isF64 ? (Type*)StructType::get(ArgTy, ArgTy, NULL) @@ -12915,7 +12919,7 @@ SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { CallingConv::C, /*isTaillCall=*/false, /*doesNotRet=*/false, /*isReturnValueUsed*/true, Callee, Args, DAG, dl); - std::pair CallResult = LowerCallTo(CLI); + std::pair CallResult = TLI.LowerCallTo(CLI); if (isF64) // Returned in xmm0 and xmm1. @@ -12995,7 +12999,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::MUL: return LowerMUL(Op, Subtarget, DAG); case ISD::SRA: case ISD::SRL: - case ISD::SHL: return LowerShift(Op, DAG); + case ISD::SHL: return LowerShift(Op, Subtarget, DAG); case ISD::SADDO: case ISD::UADDO: case ISD::SSUBO: @@ -13003,7 +13007,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SMULO: case ISD::UMULO: return LowerXALUO(Op, DAG); case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, Subtarget,DAG); - case ISD::BITCAST: return LowerBITCAST(Op, DAG); + case ISD::BITCAST: return LowerBITCAST(Op, Subtarget, DAG); case ISD::ADDC: case ISD::ADDE: case ISD::SUBC: @@ -13011,7 +13015,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ADD: return LowerADD(Op, DAG); case ISD::SUB: return LowerSUB(Op, DAG); case ISD::SDIV: return LowerSDIV(Op, DAG); - case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); + case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG); } } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 89ab428..53f09c4 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -854,7 +854,6 @@ namespace llvm { SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBITCAST(SDValue op, SelectionDAG &DAG) const; SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const; @@ -889,10 +888,7 @@ namespace llvm { SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; virtual SDValue LowerFormalArguments(SDValue Chain, -- cgit v1.1 From dd34dc99fdf66edc52b5fc2ddf8132ebaa134aee Mon Sep 17 00:00:00 2001 From: Renato Golin Date: Wed, 14 Aug 2013 16:35:29 +0000 Subject: Let t2LDRBi8 and t2LDRBi12 have same Base Pointer When determining if two different loads are from the same base address, this patch allows one load to use a t2LDRi8 address mode and another to use a t2LDRi12 address mode. The current implementation is very conservative and this allows the case of differing Thumb2 byte loads to be considered. Allowing these differing modes instead of forcing the exact same opcode is useful for situations where one opcodes loads from a base address+1 and a second opcode loads for a base address-1. Patch by Daniel Stewart. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188385 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseInstrInfo.cpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 977d936..62e8063 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1404,9 +1404,11 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case ARM::VLDRD: case ARM::VLDRS: case ARM::t2LDRi8: + case ARM::t2LDRBi8: case ARM::t2LDRDi8: case ARM::t2LDRSHi8: case ARM::t2LDRi12: + case ARM::t2LDRBi12: case ARM::t2LDRSHi12: break; } @@ -1423,8 +1425,10 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case ARM::VLDRD: case ARM::VLDRS: case ARM::t2LDRi8: + case ARM::t2LDRBi8: case ARM::t2LDRSHi8: case ARM::t2LDRi12: + case ARM::t2LDRBi12: case ARM::t2LDRSHi12: break; } @@ -1471,7 +1475,16 @@ bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, if ((Offset2 - Offset1) / 8 > 64) return false; - if (Load1->getMachineOpcode() != Load2->getMachineOpcode()) + // Check if the machine opcodes are different. If they are different + // then we consider them to not be of the same base address, + // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12. + // In this case, they are considered to be the same because they are different + // encoding forms of the same basic instruction. + if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) && + !((Load1->getMachineOpcode() == ARM::t2LDRBi8 && + Load2->getMachineOpcode() == ARM::t2LDRBi12) || + (Load1->getMachineOpcode() == ARM::t2LDRBi12 && + Load2->getMachineOpcode() == ARM::t2LDRBi8))) return false; // FIXME: overly conservative? // Four loads in a row should be sufficient. -- cgit v1.1 From 341c1a50adeadd848b2e73e9184d81331ee1cb92 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Wed, 14 Aug 2013 20:05:04 +0000 Subject: Actually fix PPC64 64-bit GPR inline asm constraint matching This is a follow-up to r187693, correcting that code to request the correct register class. The previous version, with the wrong register class, was not really correcting the constraints, but rather was removing them. Coincidentally, this fixed the failing test case in r187693, but obviously created other problems. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188407 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 886def1..2a28d73 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -7591,7 +7591,7 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, PPC::GPRCRegClass.contains(R.first)) { const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); return std::make_pair(TRI->getMatchingSuperReg(R.first, - PPC::sub_32, &PPC::GPRCRegClass), + PPC::sub_32, &PPC::G8RCRegClass), &PPC::G8RCRegClass); } -- cgit v1.1 From 2760387d868eb6fd323f5159aa3909ec3d956ea8 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 14 Aug 2013 22:21:57 +0000 Subject: R600/SI: Add pattern for fp_to_uint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes the F2U opcode for the Mesa driver. Patch by: Marek Olšák Signed-off-by: Marek Olšák git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188418 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstructions.td | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index dc14609..7393e1d 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -615,7 +615,9 @@ defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32", defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", [(set f32:$dst, (uint_to_fp i32:$src0))] >; -defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>; +defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", + [(set i32:$dst, (fp_to_uint f32:$src0))] +>; defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32", [(set i32:$dst, (fp_to_sint f32:$src0))] >; -- cgit v1.1 From 9735dc6c1b20ae311bba50fbf28feb8c41810d3c Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 14 Aug 2013 22:22:03 +0000 Subject: R600/SI: Fix an obvious typo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch by: Marek Olšák Signed-off-by: Marek Olšák git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188419 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUCallingConv.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUCallingConv.td b/lib/Target/R600/AMDGPUCallingConv.td index fc95d58..84d3118 100644 --- a/lib/Target/R600/AMDGPUCallingConv.td +++ b/lib/Target/R600/AMDGPUCallingConv.td @@ -24,7 +24,7 @@ def CC_SI : CallingConv<[ CCIfInReg>>, CCIfNotInReg Date: Wed, 14 Aug 2013 22:22:09 +0000 Subject: R600/SI: Allow conversion between v32i8 and v8i32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch by: Marek Olšák Signed-off-by: Marek Olšák git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188420 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstructions.td | 5 +++++ lib/Target/R600/SIRegisterInfo.td | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 7393e1d..82c4b5e 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1510,6 +1510,11 @@ def : BitConvert ; def : BitConvert ; def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; + /********** =================== **********/ /********** Src & Dst modifiers **********/ /********** =================== **********/ diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index 292b9d2..82d1e71 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -159,7 +159,7 @@ def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, i1], 64, def SReg_128 : RegisterClass<"AMDGPU", [v16i8, i128], 128, (add SGPR_128)>; -def SReg_256 : RegisterClass<"AMDGPU", [v32i8], 256, (add SGPR_256)>; +def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add SGPR_256)>; def SReg_512 : RegisterClass<"AMDGPU", [v64i8], 512, (add SGPR_512)>; @@ -174,7 +174,7 @@ def VReg_96 : RegisterClass<"AMDGPU", [untyped], 96, (add VGPR_96)> { def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>; -def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 256, (add VGPR_256)>; +def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add VGPR_256)>; def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>; -- cgit v1.1 From 67ca7b1bb07e331981e700cc4cc5944f68a9e256 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 14 Aug 2013 22:22:14 +0000 Subject: R600/SI: Handle MSAA texture targets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch by: Marek Olšák Signed-off-by: Marek Olšák git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188421 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600Instructions.td | 16 +++++++++++++++- lib/Target/R600/SIInstructions.td | 19 ++++++++++++++++++- 2 files changed, 33 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 7e61b18..52205cc 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -230,7 +230,7 @@ def TEX_RECT : PatLeaf< def TEX_ARRAY : PatLeaf< (imm), [{uint32_t TType = (uint32_t)N->getZExtValue(); - return TType == 9 || TType == 10 || TType == 15 || TType == 16; + return TType == 9 || TType == 10 || TType == 16; }] >; @@ -241,6 +241,20 @@ def TEX_SHADOW_ARRAY : PatLeaf< }] >; +def TEX_MSAA : PatLeaf< + (imm), + [{uint32_t TType = (uint32_t)N->getZExtValue(); + return TType == 14; + }] +>; + +def TEX_ARRAY_MSAA : PatLeaf< + (imm), + [{uint32_t TType = (uint32_t)N->getZExtValue(); + return TType == 15; + }] +>; + class EG_CF_RAT cfinst, bits <6> ratinst, bits<4> mask, dag outs, dag ins, string asm, list pattern> : InstR600ISA , diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 82c4b5e..909c976 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -500,7 +500,7 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper < //def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>; //def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>; -//def IMAGE_LOAD : MIMG_NoPattern_ <"IMAGE_LOAD", 0x00000000>; +def IMAGE_LOAD : MIMG_NoSampler_Helper <0x00000000, "IMAGE_LOAD">; def IMAGE_LOAD_MIP : MIMG_NoSampler_Helper <0x00000001, "IMAGE_LOAD_MIP">; //def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_PCK", 0x00000002>; //def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_PCK_SGN", 0x00000003>; @@ -1397,9 +1397,21 @@ class ImageLoadArrayPattern : (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc) >; +class ImageLoadMSAAPattern : Pat < + (name addr_type:$addr, v32i8:$rsrc, TEX_MSAA), + (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc) +>; + +class ImageLoadArrayMSAAPattern : Pat < + (name addr_type:$addr, v32i8:$rsrc, TEX_ARRAY_MSAA), + (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc) +>; + multiclass ImageLoadPatterns { def : ImageLoadPattern ; def : ImageLoadArrayPattern ; + def : ImageLoadMSAAPattern ; + def : ImageLoadArrayMSAAPattern ; } defm : ImageLoadPatterns; @@ -1416,6 +1428,11 @@ def : Pat < (IMAGE_GET_RESINFO 0xf, 0, 0, 1, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc) >; +def : Pat < + (int_SI_resinfo i32:$mipid, v32i8:$rsrc, TEX_ARRAY_MSAA), + (IMAGE_GET_RESINFO 0xf, 0, 0, 1, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc) +>; + /********** ============================================ **********/ /********** Extraction, Insertion, Building and Casting **********/ /********** ============================================ **********/ -- cgit v1.1 From df4626ef15ba0eb5f571a3ee6314e5c388258927 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 14 Aug 2013 23:24:17 +0000 Subject: R600/SI: Assign a register class to the $vaddr operand for MIMG instructions The previous code declared the operand as unknown:$vaddr, which made it possible for scalar registers to be used instead of vector registers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188425 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIDefines.h | 6 ++ lib/Target/R600/SIISelLowering.cpp | 8 ++- lib/Target/R600/SIInstrFormats.td | 3 + lib/Target/R600/SIInstrInfo.cpp | 5 ++ lib/Target/R600/SIInstrInfo.h | 2 +- lib/Target/R600/SIInstrInfo.td | 33 ++++++----- lib/Target/R600/SIInstructions.td | 112 ++++++++++++++++++++++--------------- 7 files changed, 109 insertions(+), 60 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIDefines.h b/lib/Target/R600/SIDefines.h index 147578c..572ed6a 100644 --- a/lib/Target/R600/SIDefines.h +++ b/lib/Target/R600/SIDefines.h @@ -11,6 +11,12 @@ #ifndef SIDEFINES_H_ #define SIDEFINES_H_ +namespace SIInstrFlags { +enum { + MIMG = 1 << 3 +}; +} + #define R_00B028_SPI_SHADER_PGM_RSRC1_PS 0x00B028 #define R_00B02C_SPI_SHADER_PGM_RSRC2_PS 0x00B02C #define S_00B02C_EXTRA_LDS_SIZE(x) (((x) & 0xFF) << 8) diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index a76e6ee..4631f8a 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -1022,9 +1022,11 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node, /// \brief Fold the instructions after slecting them SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, SelectionDAG &DAG) const { + const SIInstrInfo *TII = + static_cast(getTargetMachine().getInstrInfo()); Node = AdjustRegClass(Node, DAG); - if (AMDGPU::isMIMG(Node->getMachineOpcode()) != -1) + if (TII->isMIMG(Node->getMachineOpcode())) adjustWritemask(Node, DAG); return foldOperands(Node, DAG); @@ -1034,7 +1036,9 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, /// bits set in the writemask void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const { - if (AMDGPU::isMIMG(MI->getOpcode()) == -1) + const SIInstrInfo *TII = + static_cast(getTargetMachine().getInstrInfo()); + if (!TII->isMIMG(MI->getOpcode())) return; unsigned VReg = MI->getOperand(0).getReg(); diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td index 434aa7e..cd1bbcd 100644 --- a/lib/Target/R600/SIInstrFormats.td +++ b/lib/Target/R600/SIInstrFormats.td @@ -17,10 +17,12 @@ class InstSI pattern> : field bits<1> VM_CNT = 0; field bits<1> EXP_CNT = 0; field bits<1> LGKM_CNT = 0; + field bits<1> MIMG = 0; let TSFlags{0} = VM_CNT; let TSFlags{1} = EXP_CNT; let TSFlags{2} = LGKM_CNT; + let TSFlags{3} = MIMG; } class Enc32 pattern> : @@ -414,6 +416,7 @@ class MIMG op, dag outs, dag ins, string asm, list pattern> : let VM_CNT = 1; let EXP_CNT = 1; + let MIMG = 1; } def EXP : Enc64< diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index 551ae86..9bb4ad9 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -15,6 +15,7 @@ #include "SIInstrInfo.h" #include "AMDGPUTargetMachine.h" +#include "SIDefines.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCInstrDesc.h" @@ -224,6 +225,10 @@ SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { return RC != &AMDGPU::EXECRegRegClass; } +int SIInstrInfo::isMIMG(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::MIMG; +} + //===----------------------------------------------------------------------===// // Indirect addressing callbacks //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index 87eff4d..8d24ab4 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -47,6 +47,7 @@ public: virtual bool isMov(unsigned Opcode) const; virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const; + int isMIMG(uint16_t Opcode) const; virtual int getIndirectIndexBegin(const MachineFunction &MF) const; @@ -80,7 +81,6 @@ namespace AMDGPU { int getVOPe64(uint16_t Opcode); int getCommuteRev(uint16_t Opcode); int getCommuteOrig(uint16_t Opcode); - int isMIMG(uint16_t Opcode); } // End namespace AMDGPU diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 302fa24..71d20ea 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -397,11 +397,12 @@ class MTBUF_Load_Helper op, string asm, RegisterClass regClass> : MTBUF let mayStore = 0; } -class MIMG_NoSampler_Helper op, string asm> : MIMG < +class MIMG_NoSampler_Helper op, string asm, + RegisterClass src_rc> : MIMG < op, (outs VReg_128:$vdata), (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128, - i1imm:$tfe, i1imm:$lwe, i1imm:$slc, unknown:$vaddr, + i1imm:$tfe, i1imm:$lwe, i1imm:$slc, src_rc:$vaddr, SReg_256:$srsrc), asm#" $vdata, $dmask, $unorm, $glc, $da, $r128," #" $tfe, $lwe, $slc, $vaddr, $srsrc", @@ -412,11 +413,18 @@ class MIMG_NoSampler_Helper op, string asm> : MIMG < let hasPostISelHook = 1; } -class MIMG_Sampler_Helper op, string asm> : MIMG < +multiclass MIMG_NoSampler op, string asm> { + def _V1 : MIMG_NoSampler_Helper ; + def _V2 : MIMG_NoSampler_Helper ; + def _V4 : MIMG_NoSampler_Helper ; +} + +class MIMG_Sampler_Helper op, string asm, + RegisterClass src_rc> : MIMG < op, (outs VReg_128:$vdata), (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128, - i1imm:$tfe, i1imm:$lwe, i1imm:$slc, unknown:$vaddr, + i1imm:$tfe, i1imm:$lwe, i1imm:$slc, src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp), asm#" $vdata, $dmask, $unorm, $glc, $da, $r128," #" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp", @@ -426,6 +434,14 @@ class MIMG_Sampler_Helper op, string asm> : MIMG < let hasPostISelHook = 1; } +multiclass MIMG_Sampler op, string asm> { + def _V1 : MIMG_Sampler_Helper ; + def _V2 : MIMG_Sampler_Helper ; + def _V4 : MIMG_Sampler_Helper ; + def _V8 : MIMG_Sampler_Helper ; + def _V16 : MIMG_Sampler_Helper ; +} + //===----------------------------------------------------------------------===// // Vector instruction mappings //===----------------------------------------------------------------------===// @@ -457,13 +473,4 @@ def getCommuteOrig : InstrMapping { let ValueCols = [["1"]]; } -// Test if the supplied opcode is an MIMG instruction -def isMIMG : InstrMapping { - let FilterClass = "MIMG"; - let RowFields = ["Inst"]; - let ColFields = ["Size"]; - let KeyCol = ["8"]; - let ValueCols = [["8"]]; -} - include "SIInstructions.td" diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 909c976..5fbd68f 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -500,8 +500,8 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper < //def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>; //def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>; -def IMAGE_LOAD : MIMG_NoSampler_Helper <0x00000000, "IMAGE_LOAD">; -def IMAGE_LOAD_MIP : MIMG_NoSampler_Helper <0x00000001, "IMAGE_LOAD_MIP">; +defm IMAGE_LOAD : MIMG_NoSampler <0x00000000, "IMAGE_LOAD">; +defm IMAGE_LOAD_MIP : MIMG_NoSampler <0x00000001, "IMAGE_LOAD_MIP">; //def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_PCK", 0x00000002>; //def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_PCK_SGN", 0x00000003>; //def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK", 0x00000004>; @@ -510,7 +510,7 @@ def IMAGE_LOAD_MIP : MIMG_NoSampler_Helper <0x00000001, "IMAGE_LOAD_MIP">; //def IMAGE_STORE_MIP : MIMG_NoPattern_ <"IMAGE_STORE_MIP", 0x00000009>; //def IMAGE_STORE_PCK : MIMG_NoPattern_ <"IMAGE_STORE_PCK", 0x0000000a>; //def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"IMAGE_STORE_MIP_PCK", 0x0000000b>; -def IMAGE_GET_RESINFO : MIMG_NoSampler_Helper <0x0000000e, "IMAGE_GET_RESINFO">; +def IMAGE_GET_RESINFO : MIMG_NoSampler_Helper <0x0000000e, "IMAGE_GET_RESINFO", VReg_32>; //def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_SWAP", 0x0000000f>; //def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_CMPSWAP", 0x00000010>; //def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"IMAGE_ATOMIC_ADD", 0x00000011>; @@ -528,20 +528,20 @@ def IMAGE_GET_RESINFO : MIMG_NoSampler_Helper <0x0000000e, "IMAGE_GET_RESINFO">; //def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_FCMPSWAP", 0x0000001d>; //def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMIN", 0x0000001e>; //def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMAX", 0x0000001f>; -def IMAGE_SAMPLE : MIMG_Sampler_Helper <0x00000020, "IMAGE_SAMPLE">; +defm IMAGE_SAMPLE : MIMG_Sampler <0x00000020, "IMAGE_SAMPLE">; //def IMAGE_SAMPLE_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL", 0x00000021>; -def IMAGE_SAMPLE_D : MIMG_Sampler_Helper <0x00000022, "IMAGE_SAMPLE_D">; +defm IMAGE_SAMPLE_D : MIMG_Sampler <0x00000022, "IMAGE_SAMPLE_D">; //def IMAGE_SAMPLE_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL", 0x00000023>; -def IMAGE_SAMPLE_L : MIMG_Sampler_Helper <0x00000024, "IMAGE_SAMPLE_L">; -def IMAGE_SAMPLE_B : MIMG_Sampler_Helper <0x00000025, "IMAGE_SAMPLE_B">; +defm IMAGE_SAMPLE_L : MIMG_Sampler <0x00000024, "IMAGE_SAMPLE_L">; +defm IMAGE_SAMPLE_B : MIMG_Sampler <0x00000025, "IMAGE_SAMPLE_B">; //def IMAGE_SAMPLE_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL", 0x00000026>; //def IMAGE_SAMPLE_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ", 0x00000027>; -def IMAGE_SAMPLE_C : MIMG_Sampler_Helper <0x00000028, "IMAGE_SAMPLE_C">; +defm IMAGE_SAMPLE_C : MIMG_Sampler <0x00000028, "IMAGE_SAMPLE_C">; //def IMAGE_SAMPLE_C_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL", 0x00000029>; -def IMAGE_SAMPLE_C_D : MIMG_Sampler_Helper <0x0000002a, "IMAGE_SAMPLE_C_D">; +defm IMAGE_SAMPLE_C_D : MIMG_Sampler <0x0000002a, "IMAGE_SAMPLE_C_D">; //def IMAGE_SAMPLE_C_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL", 0x0000002b>; -def IMAGE_SAMPLE_C_L : MIMG_Sampler_Helper <0x0000002c, "IMAGE_SAMPLE_C_L">; -def IMAGE_SAMPLE_C_B : MIMG_Sampler_Helper <0x0000002d, "IMAGE_SAMPLE_C_B">; +defm IMAGE_SAMPLE_C_L : MIMG_Sampler <0x0000002c, "IMAGE_SAMPLE_C_L">; +defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "IMAGE_SAMPLE_C_B">; //def IMAGE_SAMPLE_C_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL", 0x0000002e>; //def IMAGE_SAMPLE_C_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ", 0x0000002f>; //def IMAGE_SAMPLE_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_O", 0x00000030>; @@ -1327,7 +1327,7 @@ def : Pat < /* int_SI_sample for simple 1D texture lookup */ def : Pat < (int_SI_sample v1i32:$addr, v32i8:$rsrc, v16i8:$sampler, imm), - (IMAGE_SAMPLE 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) + (IMAGE_SAMPLE_V1 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; class SamplePattern : Pat < @@ -1358,33 +1358,51 @@ class SampleShadowArrayPattern; /* int_SI_sample* for texture lookups consuming more address parameters */ -multiclass SamplePatterns { - def : SamplePattern ; - def : SampleRectPattern ; - def : SampleArrayPattern ; - def : SampleShadowPattern ; - def : SampleShadowArrayPattern ; - - def : SamplePattern ; - def : SampleArrayPattern ; - def : SampleShadowPattern ; - def : SampleShadowArrayPattern ; - - def : SamplePattern ; - def : SampleArrayPattern ; - def : SampleShadowPattern ; - def : SampleShadowArrayPattern ; - - def : SamplePattern ; - def : SampleArrayPattern ; - def : SampleShadowPattern ; - def : SampleShadowArrayPattern ; +multiclass SamplePatterns { + def : SamplePattern ; + def : SampleRectPattern ; + def : SampleArrayPattern ; + def : SampleShadowPattern ; + def : SampleShadowArrayPattern ; + + def : SamplePattern ; + def : SampleArrayPattern ; + def : SampleShadowPattern ; + def : SampleShadowArrayPattern ; + + def : SamplePattern ; + def : SampleArrayPattern ; + def : SampleShadowPattern ; + def : SampleShadowArrayPattern ; + + def : SamplePattern ; + def : SampleArrayPattern ; + def : SampleShadowPattern ; + def : SampleShadowArrayPattern ; } -defm : SamplePatterns; -defm : SamplePatterns; -defm : SamplePatterns; -defm : SamplePatterns; +defm : SamplePatterns; +defm : SamplePatterns; +defm : SamplePatterns; +defm : SamplePatterns; /* int_SI_imageload for texture fetches consuming varying address parameters */ class ImageLoadPattern : Pat < @@ -1407,15 +1425,21 @@ class ImageLoadArrayMSAAPattern; -multiclass ImageLoadPatterns { - def : ImageLoadPattern ; - def : ImageLoadArrayPattern ; - def : ImageLoadMSAAPattern ; - def : ImageLoadArrayMSAAPattern ; +multiclass ImageLoadPatterns { + def : ImageLoadPattern ; + def : ImageLoadArrayPattern ; } -defm : ImageLoadPatterns; -defm : ImageLoadPatterns; +multiclass ImageLoadMSAAPatterns { + def : ImageLoadMSAAPattern ; + def : ImageLoadArrayMSAAPattern ; +} + +defm : ImageLoadPatterns; +defm : ImageLoadPatterns; + +defm : ImageLoadMSAAPatterns; +defm : ImageLoadMSAAPatterns; /* Image resource information */ def : Pat < -- cgit v1.1 From 636298ba64fd07d4ddcae6005e7fc1db43eb5335 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 14 Aug 2013 23:24:24 +0000 Subject: R600/SI: Choose the correct MOV instruction for copying immediates The instruction selector will now try to infer the destination register so it can decided whether to use V_MOV_B32 or S_MOV_B32 when copying immediates. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188426 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 29 +++++++++++++++++++++++++++++ lib/Target/R600/SIInstrInfo.td | 16 ++++++++++++++++ lib/Target/R600/SIInstructions.td | 10 ++++++++++ lib/Target/R600/SIRegisterInfo.cpp | 11 +++++++++++ lib/Target/R600/SIRegisterInfo.h | 3 +++ 5 files changed, 69 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index f222901..d339b09 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -77,6 +77,7 @@ private: bool isLocalLoad(const LoadSDNode *N) const; bool isRegionLoad(const LoadSDNode *N) const; + const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, SDValue& Offset); @@ -102,6 +103,34 @@ AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM) AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() { } +/// \brief Determine the register class for \p OpNo +/// \returns The register class of the virtual register that will be used for +/// the given operand number \OpNo or NULL if the register class cannot be +/// determined. +const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, + unsigned OpNo) const { + if (!N->isMachineOpcode()) { + return NULL; + } + switch (N->getMachineOpcode()) { + default: { + const MCInstrDesc &Desc = TM.getInstrInfo()->get(N->getMachineOpcode()); + int RegClass = Desc.OpInfo[Desc.getNumDefs() + OpNo].RegClass; + if (RegClass == -1) { + return NULL; + } + return TM.getRegisterInfo()->getRegClass(RegClass); + } + case AMDGPU::REG_SEQUENCE: { + const TargetRegisterClass *SuperRC = TM.getRegisterInfo()->getRegClass( + cast(N->getOperand(0))->getZExtValue()); + unsigned SubRegIdx = + dyn_cast(N->getOperand(OpNo + 1))->getZExtValue(); + return TM.getRegisterInfo()->getSubClassWithSubReg(SuperRC, SubRegIdx); + } + } +} + SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) { return CurDAG->getTargetConstant(Imm, MVT::i32); } diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 71d20ea..df6d994 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -58,6 +58,22 @@ class InlineImm : PatLeaf <(vt imm), [{ (*(const SITargetLowering *)getTargetLowering()).analyzeImmediate(N) == 0; }]>; +class SGPRImm : PatLeaf().getGeneration() < + AMDGPUSubtarget::SOUTHERN_ISLANDS) { + return false; + } + const SIRegisterInfo *SIRI = + static_cast(TM.getRegisterInfo()); + for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end(); + U != E; ++U) { + if (SIRI->isSGPRClass(getOperandRegClass(*U, U.getOperandNo()))) { + return true; + } + } + return false; +}]>; + //===----------------------------------------------------------------------===// // SI assembler operands //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 5fbd68f..b20d7c0 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1583,6 +1583,16 @@ def : Pat < /********** ================== **********/ def : Pat < + (SGPRImm<(i32 imm)>:$imm), + (S_MOV_B32 imm:$imm) +>; + +def : Pat < + (SGPRImm<(f32 fpimm)>:$imm), + (S_MOV_B32 fpimm:$imm) +>; + +def : Pat < (i32 imm:$imm), (V_MOV_B32_e32 imm:$imm) >; diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp index 50fd4c7..5d12564 100644 --- a/lib/Target/R600/SIRegisterInfo.cpp +++ b/lib/Target/R600/SIRegisterInfo.cpp @@ -70,3 +70,14 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const { } return NULL; } + +bool SIRegisterInfo::isSGPRClass(const TargetRegisterClass *RC) const { + if (!RC) { + return false; + } + return RC == &AMDGPU::SReg_32RegClass || + RC == &AMDGPU::SReg_64RegClass || + RC == &AMDGPU::SReg_128RegClass || + RC == &AMDGPU::SReg_256RegClass || + RC == &AMDGPU::SReg_512RegClass; +} diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h index d0df4f9..ffc5797 100644 --- a/lib/Target/R600/SIRegisterInfo.h +++ b/lib/Target/R600/SIRegisterInfo.h @@ -45,6 +45,9 @@ struct SIRegisterInfo : public AMDGPURegisterInfo { /// \brief Return the 'base' register class for this register. /// e.g. SGPR0 => SReg_32, VGPR => VReg_32 SGPR0_SGPR1 -> SReg_32, etc. const TargetRegisterClass *getPhysRegClass(unsigned Reg) const; + + /// \returns true if this class contains only SGPR registers + bool isSGPRClass(const TargetRegisterClass *RC) const; }; } // End namespace llvm -- cgit v1.1 From 38d5e1c36d954f1ff6489f58efd1d4865217cf9b Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 14 Aug 2013 23:24:32 +0000 Subject: R600/SI: Lower BUILD_VECTOR to REG_SEQUENCE v2 Using REG_SEQUENCE for BUILD_VECTOR rather than a series of INSERT_SUBREG instructions should make it easier for the register allocator to coalasce unnecessary copies. v2: - Use an SGPR register class if all the operands of BUILD_VECTOR are SGPRs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188427 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 88 ++++++++++++++++++++++++++-------- lib/Target/R600/AMDGPUInstructions.td | 49 ------------------- lib/Target/R600/AMDGPURegisterInfo.cpp | 32 +++++-------- lib/Target/R600/AMDGPURegisterInfo.h | 4 ++ lib/Target/R600/R600RegisterInfo.cpp | 10 ---- lib/Target/R600/R600RegisterInfo.h | 4 -- lib/Target/R600/SIInstructions.td | 10 ---- 7 files changed, 86 insertions(+), 111 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index d339b09..22bdb90 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -285,35 +285,85 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { break; } case ISD::BUILD_VECTOR: { + unsigned RegClassID; const AMDGPUSubtarget &ST = TM.getSubtarget(); - if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { - break; + const AMDGPURegisterInfo *TRI = + static_cast(TM.getRegisterInfo()); + const SIRegisterInfo *SIRI = + static_cast(TM.getRegisterInfo()); + EVT VT = N->getValueType(0); + unsigned NumVectorElts = VT.getVectorNumElements(); + assert(VT.getVectorElementType().bitsEq(MVT::i32)); + if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { + bool UseVReg = true; + for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end(); + U != E; ++U) { + if (!U->isMachineOpcode()) { + continue; + } + const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo()); + if (!RC) { + continue; + } + if (SIRI->isSGPRClass(RC)) { + UseVReg = false; + } + } + switch(NumVectorElts) { + case 1: RegClassID = UseVReg ? AMDGPU::VReg_32RegClassID : + AMDGPU::SReg_32RegClassID; + break; + case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID : + AMDGPU::SReg_64RegClassID; + break; + case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID : + AMDGPU::SReg_128RegClassID; + break; + case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID : + AMDGPU::SReg_256RegClassID; + break; + case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID : + AMDGPU::SReg_512RegClassID; + break; + } + } else { + // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG + // that adds a 128 bits reg copy when going through TwoAddressInstructions + // pass. We want to avoid 128 bits copies as much as possible because they + // can't be bundled by our scheduler. + switch(NumVectorElts) { + case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; + case 4: RegClassID = AMDGPU::R600_Reg128RegClassID; break; + default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); + } } - unsigned RegClassID; - switch(N->getValueType(0).getVectorNumElements()) { - case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; - case 4: RegClassID = AMDGPU::R600_Reg128RegClassID; break; - default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); + SDValue RegClass = CurDAG->getTargetConstant(RegClassID, MVT::i32); + + if (NumVectorElts == 1) { + return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, + VT.getVectorElementType(), + N->getOperand(0), RegClass); } - // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG - // that adds a 128 bits reg copy when going through TwoAddressInstructions - // pass. We want to avoid 128 bits copies as much as possible because they - // can't be bundled by our scheduler. - SDValue RegSeqArgs[9] = { - CurDAG->getTargetConstant(RegClassID, MVT::i32), - SDValue(), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32), - SDValue(), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32), - SDValue(), CurDAG->getTargetConstant(AMDGPU::sub2, MVT::i32), - SDValue(), CurDAG->getTargetConstant(AMDGPU::sub3, MVT::i32) - }; + + assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " + "supported yet"); + // 16 = Max Num Vector Elements + // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) + // 1 = Vector Register Class + SDValue RegSeqArgs[16 * 2 + 1]; + + RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32); bool IsRegSeq = true; for (unsigned i = 0; i < N->getNumOperands(); i++) { + // XXX: Why is this here? if (dyn_cast(N->getOperand(i))) { IsRegSeq = false; break; } - RegSeqArgs[2 * i + 1] = N->getOperand(i); + RegSeqArgs[1 + (2 * i)] = N->getOperand(i); + RegSeqArgs[1 + (2 * i) + 1] = + CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32); } if (!IsRegSeq) break; diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index d6a7759..ddb655a 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -254,61 +254,12 @@ class Insert_Element ; -// Vector Build pattern -class Vector1_Build : Pat < - (vecType (build_vector elemType:$src)), - (vecType (COPY_TO_REGCLASS $src, rc)) ->; - -class Vector2_Build : Pat < - (vecType (build_vector elemType:$sub0, elemType:$sub1)), - (INSERT_SUBREG (INSERT_SUBREG - (vecType (IMPLICIT_DEF)), $sub0, sub0), $sub1, sub1) ->; - class Vector4_Build : Pat < (vecType (build_vector elemType:$x, elemType:$y, elemType:$z, elemType:$w)), (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (vecType (IMPLICIT_DEF)), $x, sub0), $y, sub1), $z, sub2), $w, sub3) >; -class Vector8_Build : Pat < - (vecType (build_vector elemType:$sub0, elemType:$sub1, - elemType:$sub2, elemType:$sub3, - elemType:$sub4, elemType:$sub5, - elemType:$sub6, elemType:$sub7)), - (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG - (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG - (vecType (IMPLICIT_DEF)), $sub0, sub0), $sub1, sub1), - $sub2, sub2), $sub3, sub3), - $sub4, sub4), $sub5, sub5), - $sub6, sub6), $sub7, sub7) ->; - -class Vector16_Build : Pat < - (vecType (build_vector elemType:$sub0, elemType:$sub1, - elemType:$sub2, elemType:$sub3, - elemType:$sub4, elemType:$sub5, - elemType:$sub6, elemType:$sub7, - elemType:$sub8, elemType:$sub9, - elemType:$sub10, elemType:$sub11, - elemType:$sub12, elemType:$sub13, - elemType:$sub14, elemType:$sub15)), - (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG - (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG - (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG - (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG - (vecType (IMPLICIT_DEF)), $sub0, sub0), $sub1, sub1), - $sub2, sub2), $sub3, sub3), - $sub4, sub4), $sub5, sub5), - $sub6, sub6), $sub7, sub7), - $sub8, sub8), $sub9, sub9), - $sub10, sub10), $sub11, sub11), - $sub12, sub12), $sub13, sub13), - $sub14, sub14), $sub15, sub15) ->; - // XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer // can handle COPY instructions. // bitconvert pattern diff --git a/lib/Target/R600/AMDGPURegisterInfo.cpp b/lib/Target/R600/AMDGPURegisterInfo.cpp index 3402092..47617a7 100644 --- a/lib/Target/R600/AMDGPURegisterInfo.cpp +++ b/lib/Target/R600/AMDGPURegisterInfo.cpp @@ -46,27 +46,21 @@ unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const { return 0; } +unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) const { + static const unsigned SubRegs[] = { + AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4, + AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, AMDGPU::sub8, AMDGPU::sub9, + AMDGPU::sub10, AMDGPU::sub11, AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, + AMDGPU::sub15 + }; + + assert (Channel < array_lengthof(SubRegs)); + return SubRegs[Channel]; +} + unsigned AMDGPURegisterInfo::getIndirectSubReg(unsigned IndirectIndex) const { - switch(IndirectIndex) { - case 0: return AMDGPU::sub0; - case 1: return AMDGPU::sub1; - case 2: return AMDGPU::sub2; - case 3: return AMDGPU::sub3; - case 4: return AMDGPU::sub4; - case 5: return AMDGPU::sub5; - case 6: return AMDGPU::sub6; - case 7: return AMDGPU::sub7; - case 8: return AMDGPU::sub8; - case 9: return AMDGPU::sub9; - case 10: return AMDGPU::sub10; - case 11: return AMDGPU::sub11; - case 12: return AMDGPU::sub12; - case 13: return AMDGPU::sub13; - case 14: return AMDGPU::sub14; - case 15: return AMDGPU::sub15; - default: llvm_unreachable("indirect index out of range"); - } + return getSubRegFromChannel(IndirectIndex); } #define GET_REGINFO_TARGET_DESC diff --git a/lib/Target/R600/AMDGPURegisterInfo.h b/lib/Target/R600/AMDGPURegisterInfo.h index 7cbd34b..135d3dd 100644 --- a/lib/Target/R600/AMDGPURegisterInfo.h +++ b/lib/Target/R600/AMDGPURegisterInfo.h @@ -50,6 +50,10 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo { assert(!"Unimplemented"); return NULL; } + /// \returns the sub reg enum value for the given \p Channel + /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0) + unsigned getSubRegFromChannel(unsigned Channel) const; + const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const; void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, diff --git a/lib/Target/R600/R600RegisterInfo.cpp b/lib/Target/R600/R600RegisterInfo.cpp index a42043b..4dc63fe 100644 --- a/lib/Target/R600/R600RegisterInfo.cpp +++ b/lib/Target/R600/R600RegisterInfo.cpp @@ -86,16 +86,6 @@ const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass( } } -unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) const { - switch (Channel) { - default: assert(!"Invalid channel index"); return 0; - case 0: return AMDGPU::sub0; - case 1: return AMDGPU::sub1; - case 2: return AMDGPU::sub2; - case 3: return AMDGPU::sub3; - } -} - const RegClassWeight &R600RegisterInfo::getRegClassWeight( const TargetRegisterClass *RC) const { return RCW; diff --git a/lib/Target/R600/R600RegisterInfo.h b/lib/Target/R600/R600RegisterInfo.h index 9b286ee..d458e55 100644 --- a/lib/Target/R600/R600RegisterInfo.h +++ b/lib/Target/R600/R600RegisterInfo.h @@ -43,10 +43,6 @@ struct R600RegisterInfo : public AMDGPURegisterInfo { /// CFGStructurizer virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const; - /// \returns the sub reg enum value for the given \p Channel - /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sel_x) - unsigned getSubRegFromChannel(unsigned Channel) const; - virtual const RegClassWeight &getRegClassWeight(const TargetRegisterClass *RC) const; }; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index b20d7c0..d4e0b03 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1525,16 +1525,6 @@ foreach Index = 0-15 in { >; } -def : Vector1_Build ; -def : Vector2_Build ; -def : Vector2_Build ; -def : Vector4_Build ; -def : Vector4_Build ; -def : Vector8_Build ; -def : Vector8_Build ; -def : Vector16_Build ; -def : Vector16_Build ; - def : BitConvert ; def : BitConvert ; -- cgit v1.1 From 68db37b952be497c94c7aa98cf26f3baadb5afd3 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 14 Aug 2013 23:24:45 +0000 Subject: R600/SI: Convert v16i8 resource descriptors to i128 Now that compute support is better on SI, we can't continue using v16i8 for descriptors since this is also a legal type in OpenCL. This patch fixes numerous hangs with the piglit OpenCL test and since we now use a target specific DAG node for LOAD_CONSTANT with the correct MemOperandFlags, this should also fix: https://bugs.freedesktop.org/show_bug.cgi?id=66805 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188429 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPU.h | 1 + lib/Target/R600/AMDGPUISelLowering.cpp | 6 ++ lib/Target/R600/AMDGPUISelLowering.h | 7 ++ lib/Target/R600/AMDGPUTargetMachine.cpp | 1 + lib/Target/R600/CMakeLists.txt | 1 + lib/Target/R600/SIISelLowering.cpp | 55 +++++++++++- lib/Target/R600/SIISelLowering.h | 3 + lib/Target/R600/SIInstrInfo.td | 20 +++++ lib/Target/R600/SIInstructions.td | 76 ++++++++--------- lib/Target/R600/SIIntrinsics.td | 6 +- lib/Target/R600/SIRegisterInfo.td | 2 +- lib/Target/R600/SITypeRewriter.cpp | 146 ++++++++++++++++++++++++++++++++ 12 files changed, 280 insertions(+), 44 deletions(-) create mode 100644 lib/Target/R600/SITypeRewriter.cpp (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index 6b374cb..e2d1caf 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -34,6 +34,7 @@ FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm); FunctionPass *createAMDGPUCFGStructurizerPass(TargetMachine &tm); // SI Passes +FunctionPass *createSITypeRewriter(); FunctionPass *createSIAnnotateControlFlowPass(); FunctionPass *createSILowerControlFlowPass(TargetMachine &tm); FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm); diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index efd2756..9bb487e 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -507,5 +507,11 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(CONST_ADDRESS) NODE_NAME_CASE(REGISTER_LOAD) NODE_NAME_CASE(REGISTER_STORE) + NODE_NAME_CASE(LOAD_CONSTANT) + NODE_NAME_CASE(LOAD_INPUT) + NODE_NAME_CASE(SAMPLE) + NODE_NAME_CASE(SAMPLEB) + NODE_NAME_CASE(SAMPLED) + NODE_NAME_CASE(SAMPLEL) } } diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index f614e23..5419e71 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -139,6 +139,13 @@ enum { CONST_ADDRESS, REGISTER_LOAD, REGISTER_STORE, + LOAD_INPUT, + SAMPLE, + SAMPLEB, + SAMPLED, + SAMPLEL, + FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, + LOAD_CONSTANT, LAST_AMDGPU_ISD_NUMBER }; diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index 5ebc5f2..d77cddd 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -121,6 +121,7 @@ AMDGPUPassConfig::addPreISel() { const AMDGPUSubtarget &ST = TM->getSubtarget(); addPass(createFlattenCFGPass()); if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { + addPass(createSITypeRewriter()); addPass(createStructurizeCFGPass()); addPass(createSIAnnotateControlFlowPass()); } else { diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt index fde187b..658eeea 100644 --- a/lib/Target/R600/CMakeLists.txt +++ b/lib/Target/R600/CMakeLists.txt @@ -47,6 +47,7 @@ add_llvm_target(R600CodeGen SILowerControlFlow.cpp SIMachineFunctionInfo.cpp SIRegisterInfo.cpp + SITypeRewriter.cpp ) add_dependencies(LLVMR600CodeGen AMDGPUCommonTableGen intrinsics_gen) diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 4631f8a..40f0827 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -37,7 +37,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : addRegisterClass(MVT::v2i1, &AMDGPU::VReg_64RegClass); addRegisterClass(MVT::v4i1, &AMDGPU::VReg_128RegClass); - addRegisterClass(MVT::v16i8, &AMDGPU::SReg_128RegClass); addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass); addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass); @@ -70,6 +69,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::ADD, MVT::i64, Legal); setOperationAction(ISD::ADD, MVT::i32, Legal); + setOperationAction(ISD::BITCAST, MVT::i128, Legal); + setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); @@ -82,6 +83,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v16i8, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand); @@ -415,7 +419,31 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case Intrinsic::r600_read_tidig_z: return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass, AMDGPU::VGPR2, VT); - + case AMDGPUIntrinsic::SI_load_const: { + SDValue Ops [] = { + ResourceDescriptorToi128(Op.getOperand(1), DAG), + Op.getOperand(2) + }; + + MachineMemOperand *MMO = new MachineMemOperand(MachinePointerInfo(), + MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, + VT.getSizeInBits() / 8, 4); + return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL, + Op->getVTList(), Ops, 2, VT, MMO); + } + case AMDGPUIntrinsic::SI_sample: + return LowerSampleIntrinsic(AMDGPUISD::SAMPLE, Op, DAG); + case AMDGPUIntrinsic::SI_sampleb: + return LowerSampleIntrinsic(AMDGPUISD::SAMPLEB, Op, DAG); + case AMDGPUIntrinsic::SI_sampled: + return LowerSampleIntrinsic(AMDGPUISD::SAMPLED, Op, DAG); + case AMDGPUIntrinsic::SI_samplel: + return LowerSampleIntrinsic(AMDGPUISD::SAMPLEL, Op, DAG); + case AMDGPUIntrinsic::SI_vs_load_input: + return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT, + ResourceDescriptorToi128(Op.getOperand(1), DAG), + Op.getOperand(2), + Op.getOperand(3)); } } } @@ -516,6 +544,29 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND, return Chain; } +SDValue SITargetLowering::ResourceDescriptorToi128(SDValue Op, + SelectionDAG &DAG) const { + + if (Op.getValueType() == MVT::i128) { + return Op; + } + + assert(Op.getOpcode() == ISD::UNDEF); + + return DAG.getNode(ISD::BUILD_PAIR, SDLoc(Op), MVT::i128, + DAG.getConstant(0, MVT::i64), + DAG.getConstant(0, MVT::i64)); +} + +SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode, + const SDValue &Op, + SelectionDAG &DAG) const { + return DAG.getNode(Opcode, SDLoc(Op), Op.getValueType(), Op.getOperand(1), + Op.getOperand(2), + ResourceDescriptorToi128(Op.getOperand(3), DAG), + Op.getOperand(4)); +} + SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index effbf1f..321e58c 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -23,11 +23,14 @@ namespace llvm { class SITargetLowering : public AMDGPUTargetLowering { SDValue LowerParameter(SelectionDAG &DAG, EVT VT, SDLoc DL, SDValue Chain, unsigned Offset) const; + SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op, + SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; + SDValue ResourceDescriptorToi128(SDValue Op, SelectionDAG &DAG) const; bool foldImm(SDValue &Operand, int32_t &Immediate, bool &ScalarSlotUsed) const; const TargetRegisterClass *getRegClassForNode(SelectionDAG &DAG, diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index df6d994..b741978 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -16,6 +16,26 @@ def SIadd64bit32bit : SDNode<"ISD::ADD", SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisVT<0, i64>, SDTCisVT<2, i32>]> >; +def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT", + SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisVT<1, i128>, SDTCisVT<2, i32>]>, + [SDNPMayLoad, SDNPMemOperand] +>; + +def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT", + SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, i128>, SDTCisVT<2, i16>, + SDTCisVT<3, i32>]> +>; + +class SDSample : SDNode , SDTCisVec<1>, SDTCisVT<2, v32i8>, + SDTCisVT<3, i128>, SDTCisVT<4, i32>]> +>; + +def SIsample : SDSample<"AMDGPUISD::SAMPLE">; +def SIsampleb : SDSample<"AMDGPUISD::SAMPLEB">; +def SIsampled : SDSample<"AMDGPUISD::SAMPLED">; +def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">; + // Transformation function, extract the lower 32bit of a 64bit immediate def LO32 : SDNodeXFormgetTargetConstant(N->getZExtValue() & 0xffffffff, MVT::i32); diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index d4e0b03..4704217 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1303,7 +1303,7 @@ def : Pat < /* int_SI_vs_load_input */ def : Pat< - (int_SI_vs_load_input v16i8:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr), + (SIload_input i128:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr), (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset) >; @@ -1324,63 +1324,63 @@ def : Pat < /********** Image sampling patterns **********/ /********** ======================= **********/ -/* int_SI_sample for simple 1D texture lookup */ +/* SIsample for simple 1D texture lookup */ def : Pat < - (int_SI_sample v1i32:$addr, v32i8:$rsrc, v16i8:$sampler, imm), + (SIsample v1i32:$addr, v32i8:$rsrc, i128:$sampler, imm), (IMAGE_SAMPLE_V1 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; -class SamplePattern : Pat < - (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, imm), +class SamplePattern : Pat < + (name vt:$addr, v32i8:$rsrc, i128:$sampler, imm), (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; -class SampleRectPattern : Pat < - (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_RECT), +class SampleRectPattern : Pat < + (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_RECT), (opcode 0xf, 1, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; -class SampleArrayPattern : Pat < - (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_ARRAY), +class SampleArrayPattern : Pat < + (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_ARRAY), (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; -class SampleShadowPattern : Pat < - (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_SHADOW), + (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_SHADOW), (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; -class SampleShadowArrayPattern : Pat < - (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_SHADOW_ARRAY), + (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_SHADOW_ARRAY), (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; -/* int_SI_sample* for texture lookups consuming more address parameters */ +/* SIsample* for texture lookups consuming more address parameters */ multiclass SamplePatterns { - def : SamplePattern ; - def : SampleRectPattern ; - def : SampleArrayPattern ; - def : SampleShadowPattern ; - def : SampleShadowArrayPattern ; - - def : SamplePattern ; - def : SampleArrayPattern ; - def : SampleShadowPattern ; - def : SampleShadowArrayPattern ; - - def : SamplePattern ; - def : SampleArrayPattern ; - def : SampleShadowPattern ; - def : SampleShadowArrayPattern ; - - def : SamplePattern ; - def : SampleArrayPattern ; - def : SampleShadowPattern ; - def : SampleShadowArrayPattern ; + def : SamplePattern ; + def : SampleRectPattern ; + def : SampleArrayPattern ; + def : SampleShadowPattern ; + def : SampleShadowArrayPattern ; + + def : SamplePattern ; + def : SampleArrayPattern ; + def : SampleShadowPattern ; + def : SampleShadowArrayPattern ; + + def : SamplePattern ; + def : SampleArrayPattern ; + def : SampleShadowPattern ; + def : SampleShadowArrayPattern ; + + def : SamplePattern ; + def : SampleArrayPattern ; + def : SampleShadowPattern ; + def : SampleShadowArrayPattern ; } defm : SamplePatterns; // 2. Offset loaded in an 32bit SGPR def : Pat < - (int_SI_load_const v16i8:$sbase, imm:$offset), + (SIload_constant i128:$sbase, imm:$offset), (S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset)) >; // 3. Offset in an 32Bit VGPR def : Pat < - (int_SI_load_const v16i8:$sbase, i32:$voff), + (SIload_constant i128:$sbase, i32:$voff), (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff) >; @@ -1777,7 +1777,7 @@ defm : SMRD_Pattern ; defm : SMRD_Pattern ; defm : SMRD_Pattern ; defm : SMRD_Pattern ; -defm : SMRD_Pattern ; +defm : SMRD_Pattern ; defm : SMRD_Pattern ; //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td index 2fa073e..d6e26ad 100644 --- a/lib/Target/R600/SIIntrinsics.td +++ b/lib/Target/R600/SIIntrinsics.td @@ -17,10 +17,10 @@ let TargetPrefix = "SI", isTarget = 1 in { def int_SI_tid : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>; def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; - def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v16i8_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ; + def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>; + def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_anyint_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ; - class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; + class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>; def int_SI_sample : Sample; def int_SI_sampleb : Sample; diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index 82d1e71..0b90772 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -157,7 +157,7 @@ def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, i1], 64, (add SGPR_64, VCCReg, EXECReg) >; -def SReg_128 : RegisterClass<"AMDGPU", [v16i8, i128], 128, (add SGPR_128)>; +def SReg_128 : RegisterClass<"AMDGPU", [i128], 128, (add SGPR_128)>; def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add SGPR_256)>; diff --git a/lib/Target/R600/SITypeRewriter.cpp b/lib/Target/R600/SITypeRewriter.cpp new file mode 100644 index 0000000..9da11e8 --- /dev/null +++ b/lib/Target/R600/SITypeRewriter.cpp @@ -0,0 +1,146 @@ +//===-- SITypeRewriter.cpp - Remove unwanted types ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This pass removes performs the following type substitution on all +/// non-compute shaders: +/// +/// v16i8 => i128 +/// - v16i8 is used for constant memory resource descriptors. This type is +/// legal for some compute APIs, and we don't want to declare it as legal +/// in the backend, because we want the legalizer to expand all v16i8 +/// operations. +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" + +#include "llvm/IR/IRBuilder.h" +#include "llvm/InstVisitor.h" + +using namespace llvm; + +namespace { + +class SITypeRewriter : public FunctionPass, + public InstVisitor { + + static char ID; + Module *Mod; + Type *v16i8; + Type *i128; + +public: + SITypeRewriter() : FunctionPass(ID) { } + virtual bool doInitialization(Module &M); + virtual bool runOnFunction(Function &F); + virtual const char *getPassName() const { + return "SI Type Rewriter"; + } + void visitLoadInst(LoadInst &I); + void visitCallInst(CallInst &I); + void visitBitCast(BitCastInst &I); +}; + +} // End anonymous namespace + +char SITypeRewriter::ID = 0; + +bool SITypeRewriter::doInitialization(Module &M) { + Mod = &M; + v16i8 = VectorType::get(Type::getInt8Ty(M.getContext()), 16); + i128 = Type::getIntNTy(M.getContext(), 128); + return false; +} + +bool SITypeRewriter::runOnFunction(Function &F) { + AttributeSet Set = F.getAttributes(); + Attribute A = Set.getAttribute(AttributeSet::FunctionIndex, "ShaderType"); + + unsigned ShaderType = ShaderType::COMPUTE; + if (A.isStringAttribute()) { + StringRef Str = A.getValueAsString(); + Str.getAsInteger(0, ShaderType); + } + if (ShaderType != ShaderType::COMPUTE) { + visit(F); + } + + visit(F); + + return false; +} + +void SITypeRewriter::visitLoadInst(LoadInst &I) { + Value *Ptr = I.getPointerOperand(); + Type *PtrTy = Ptr->getType(); + Type *ElemTy = PtrTy->getPointerElementType(); + IRBuilder<> Builder(&I); + if (ElemTy == v16i8) { + Value *BitCast = Builder.CreateBitCast(Ptr, Type::getIntNPtrTy(I.getContext(), 128, 2)); + LoadInst *Load = Builder.CreateLoad(BitCast); + SmallVector , 8> MD; + I.getAllMetadataOtherThanDebugLoc(MD); + for (unsigned i = 0, e = MD.size(); i != e; ++i) { + Load->setMetadata(MD[i].first, MD[i].second); + } + Value *BitCastLoad = Builder.CreateBitCast(Load, I.getType()); + I.replaceAllUsesWith(BitCastLoad); + I.eraseFromParent(); + } +} + +void SITypeRewriter::visitCallInst(CallInst &I) { + IRBuilder<> Builder(&I); + SmallVector Args; + SmallVector Types; + bool NeedToReplace = false; + Function *F = I.getCalledFunction(); + std::string Name = F->getName().str(); + for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { + Value *Arg = I.getArgOperand(i); + if (Arg->getType() == v16i8) { + Args.push_back(Builder.CreateBitCast(Arg, i128)); + Types.push_back(i128); + NeedToReplace = true; + Name = Name + ".i128"; + } else { + Args.push_back(Arg); + Types.push_back(Arg->getType()); + } + } + + if (!NeedToReplace) { + return; + } + Function *NewF = Mod->getFunction(Name); + if (!NewF) { + NewF = Function::Create(FunctionType::get(F->getReturnType(), Types, false), GlobalValue::ExternalLinkage, Name, Mod); + NewF->setAttributes(F->getAttributes()); + } + I.replaceAllUsesWith(Builder.CreateCall(NewF, Args)); + I.eraseFromParent(); +} + +void SITypeRewriter::visitBitCast(BitCastInst &I) { + IRBuilder<> Builder(&I); + if (I.getDestTy() != i128) { + return; + } + + if (BitCastInst *Op = dyn_cast(I.getOperand(0))) { + if (Op->getSrcTy() == i128) { + I.replaceAllUsesWith(Op->getOperand(0)); + I.eraseFromParent(); + } + } +} + +FunctionPass *llvm::createSITypeRewriter() { + return new SITypeRewriter(); +} -- cgit v1.1 From e8e33f448e8830590c498ac5101ef8b27446ca3b Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 14 Aug 2013 23:24:53 +0000 Subject: R600/SI: Replace v1i32 type with i32 in imageload and sample intrinsics git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188430 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIISelLowering.cpp | 2 -- lib/Target/R600/SIInstrInfo.td | 2 +- lib/Target/R600/SIInstructions.td | 2 +- lib/Target/R600/SITypeRewriter.cpp | 16 ++++++++++++++++ 4 files changed, 18 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 40f0827..30a510d 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -43,8 +43,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : addRegisterClass(MVT::i32, &AMDGPU::VSrc_32RegClass); addRegisterClass(MVT::f32, &AMDGPU::VSrc_32RegClass); - addRegisterClass(MVT::v1i32, &AMDGPU::VSrc_32RegClass); - addRegisterClass(MVT::f64, &AMDGPU::VSrc_64RegClass); addRegisterClass(MVT::v2i32, &AMDGPU::VSrc_64RegClass); addRegisterClass(MVT::v2f32, &AMDGPU::VSrc_64RegClass); diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index b741978..2639456 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -27,7 +27,7 @@ def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT", >; class SDSample : SDNode , SDTCisVec<1>, SDTCisVT<2, v32i8>, + SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVT<2, v32i8>, SDTCisVT<3, i128>, SDTCisVT<4, i32>]> >; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 4704217..e719cb3 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1326,7 +1326,7 @@ def : Pat < /* SIsample for simple 1D texture lookup */ def : Pat < - (SIsample v1i32:$addr, v32i8:$rsrc, i128:$sampler, imm), + (SIsample i32:$addr, v32i8:$rsrc, i128:$sampler, imm), (IMAGE_SAMPLE_V1 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; diff --git a/lib/Target/R600/SITypeRewriter.cpp b/lib/Target/R600/SITypeRewriter.cpp index 9da11e8..f194d8b 100644 --- a/lib/Target/R600/SITypeRewriter.cpp +++ b/lib/Target/R600/SITypeRewriter.cpp @@ -16,6 +16,9 @@ /// legal for some compute APIs, and we don't want to declare it as legal /// in the backend, because we want the legalizer to expand all v16i8 /// operations. +/// v1* => * +/// - Having v1* types complicates the legalizer and we can easily replace +/// - them with the element type. //===----------------------------------------------------------------------===// #include "AMDGPU.h" @@ -109,6 +112,19 @@ void SITypeRewriter::visitCallInst(CallInst &I) { Types.push_back(i128); NeedToReplace = true; Name = Name + ".i128"; + } else if (Arg->getType()->isVectorTy() && + Arg->getType()->getVectorNumElements() == 1 && + Arg->getType()->getVectorElementType() == + Type::getInt32Ty(I.getContext())){ + Type *ElementTy = Arg->getType()->getVectorElementType(); + std::string TypeName = "i32"; + InsertElementInst *Def = dyn_cast(Arg); + assert(Def); + Args.push_back(Def->getOperand(1)); + Types.push_back(ElementTy); + std::string VecTypeName = "v1" + TypeName; + Name = Name.replace(Name.find(VecTypeName), VecTypeName.length(), TypeName); + NeedToReplace = true; } else { Args.push_back(Arg); Types.push_back(Arg->getType()); -- cgit v1.1 From a41520cf9b9cefed2091a0624a34c5f7fdb42a68 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 14 Aug 2013 23:25:00 +0000 Subject: R600/SI: Improve legalization of vector operations This should fix hangs in the OpenCL piglit tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188431 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 49 ++++++++++++++++++++++++++++++++-- lib/Target/R600/AMDGPUISelLowering.h | 5 ++++ lib/Target/R600/SIISelLowering.cpp | 6 ++--- lib/Target/R600/SIISelLowering.h | 1 + 4 files changed, 56 insertions(+), 5 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 9bb487e..1e79998 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -79,8 +79,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::LOAD, MVT::f64, Promote); AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Expand); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Expand); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom); setOperationAction(ISD::FNEG, MVT::v2f32, Expand); setOperationAction(ISD::FNEG, MVT::v4f32, Expand); @@ -182,6 +184,8 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); // AMDGPU DAG lowering + case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); + case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); } @@ -208,6 +212,47 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, return DAG.getConstant(Offset, TD->getPointerSize() == 8 ? MVT::i64 : MVT::i32); } +void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG, + SmallVectorImpl &Args, + unsigned Start, + unsigned Count) const { + EVT VT = Op.getValueType(); + for (unsigned i = Start, e = Start + Count; i != e; ++i) { + Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), + VT.getVectorElementType(), + Op, DAG.getConstant(i, MVT::i32))); + } +} + +SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op, + SelectionDAG &DAG) const { + SmallVector Args; + SDValue A = Op.getOperand(0); + SDValue B = Op.getOperand(1); + + ExtractVectorElements(A, DAG, Args, 0, + A.getValueType().getVectorNumElements()); + ExtractVectorElements(B, DAG, Args, 0, + B.getValueType().getVectorNumElements()); + + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), + &Args[0], Args.size()); +} + +SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, + SelectionDAG &DAG) const { + + SmallVector Args; + EVT VT = Op.getValueType(); + unsigned Start = cast(Op.getOperand(1))->getZExtValue(); + ExtractVectorElements(Op.getOperand(0), DAG, Args, Start, + VT.getVectorNumElements()); + + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), + &Args[0], Args.size()); +} + + SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntrinsicID = cast(Op.getOperand(0))->getZExtValue(); diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index 5419e71..9adbb54 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -25,6 +25,11 @@ class MachineRegisterInfo; class AMDGPUTargetLowering : public TargetLowering { private: + void ExtractVectorElements(SDValue Op, SelectionDAG &DAG, + SmallVectorImpl &Args, + unsigned Start, unsigned Count) const; + SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 30a510d..0bd8bce 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -34,9 +34,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass); addRegisterClass(MVT::i64, &AMDGPU::VSrc_64RegClass); - addRegisterClass(MVT::v2i1, &AMDGPU::VReg_64RegClass); - addRegisterClass(MVT::v4i1, &AMDGPU::VReg_128RegClass); - addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass); addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass); @@ -110,6 +107,9 @@ bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT, return VT.bitsGT(MVT::i32); } +bool SITargetLowering::shouldSplitVectorElementType(EVT VT) const { + return VT.bitsLE(MVT::i8); +} SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, SDLoc DL, SDValue Chain, diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index 321e58c..9c54a6f 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -47,6 +47,7 @@ class SITargetLowering : public AMDGPUTargetLowering { public: SITargetLowering(TargetMachine &tm); bool allowsUnalignedMemoryAccesses(EVT VT, bool *IsFast) const; + virtual bool shouldSplitVectorElementType(EVT VT) const; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, -- cgit v1.1 From 5a0910b34959fa8e0b5a49908f51a15bc3a48069 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 15 Aug 2013 02:33:50 +0000 Subject: Replace getValueType().getSimpleVT() with getSimpleValueType(). Also remove one weird cast from MVT->EVT just to call getSimpleVT(). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188441 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 2 +- lib/Target/PowerPC/PPCISelLowering.cpp | 8 +- lib/Target/X86/X86FastISel.cpp | 2 +- lib/Target/X86/X86ISelDAGToDAG.cpp | 4 +- lib/Target/X86/X86ISelLowering.cpp | 138 ++++++++++++++++----------------- 5 files changed, 77 insertions(+), 77 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index ed054aa..fc847b8 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -1933,7 +1933,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl &Args, !VA.isRegLoc() || !ArgLocs[++i].isRegLoc()) return false; } else { - switch (static_cast(ArgVT).getSimpleVT().SimpleTy) { + switch (ArgVT.SimpleTy) { default: return false; case MVT::i1: diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 2a28d73..2b83a22 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -3947,7 +3947,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, continue; } - switch (Arg.getValueType().getSimpleVT().SimpleTy) { + switch (Arg.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unexpected ValueType for argument!"); case MVT::i32: case MVT::i64: @@ -3970,7 +3970,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // must be passed right-justified in the stack doubleword, and // in the GPR, if one is available. SDValue StoreOff; - if (Arg.getValueType().getSimpleVT().SimpleTy == MVT::f32) { + if (Arg.getSimpleValueType().SimpleTy == MVT::f32) { SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); } else @@ -4278,7 +4278,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, continue; } - switch (Arg.getValueType().getSimpleVT().SimpleTy) { + switch (Arg.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unexpected ValueType for argument!"); case MVT::i32: case MVT::i64: @@ -4743,7 +4743,7 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); SDValue Tmp; - switch (Op.getValueType().getSimpleVT().SimpleTy) { + switch (Op.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); case MVT::i32: Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ : diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 5bc3420..8e942c8 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1026,7 +1026,7 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) { return false; // Handle zero-extension from i1 to i8, which is common. - MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()).getSimpleVT(); + MVT SrcVT = TLI.getSimpleValueType(I->getOperand(0)->getType()); if (SrcVT.SimpleTy == MVT::i1) { // Set the high bits to zero. ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false); diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 9465420..8f8d488 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2609,7 +2609,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // On x86-32, only the ABCD registers have 8-bit subregisters. if (!Subtarget->is64Bit()) { const TargetRegisterClass *TRC; - switch (N0.getValueType().getSimpleVT().SimpleTy) { + switch (N0.getSimpleValueType().SimpleTy) { case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break; case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break; default: llvm_unreachable("Unsupported TEST operand type!"); @@ -2644,7 +2644,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // Put the value in an ABCD register. const TargetRegisterClass *TRC; - switch (N0.getValueType().getSimpleVT().SimpleTy) { + switch (N0.getSimpleValueType().SimpleTy) { case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break; case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break; case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f234c86..4a1f1ad 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3827,7 +3827,7 @@ static bool isMOVLHPSMask(ArrayRef Mask, MVT VT) { static SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { - MVT VT = SVOp->getValueType(0).getSimpleVT(); + MVT VT = SVOp->getSimpleValueType(0); SDLoc dl(SVOp); if (VT != MVT::v8i32 && VT != MVT::v8f32) @@ -4081,7 +4081,7 @@ static bool isVPERM2X128Mask(ArrayRef Mask, EVT VT, bool HasFp256) { /// getShuffleVPERM2X128Immediate - Return the appropriate immediate to shuffle /// the specified VECTOR_MASK mask with VPERM2F128/VPERM2I128 instructions. static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) { - MVT VT = SVOp->getValueType(0).getSimpleVT(); + MVT VT = SVOp->getSimpleValueType(0); unsigned HalfSize = VT.getVectorNumElements()/2; @@ -4296,7 +4296,7 @@ static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) { uint64_t Index = cast(N->getOperand(1).getNode())->getZExtValue(); - MVT VT = N->getValueType(0).getSimpleVT(); + MVT VT = N->getSimpleValueType(0); unsigned ElSize = VT.getVectorElementType().getSizeInBits(); bool Result = (Index * ElSize) % vecWidth == 0; @@ -4314,7 +4314,7 @@ static bool isVINSERTIndex(SDNode *N, unsigned vecWidth) { uint64_t Index = cast(N->getOperand(2).getNode())->getZExtValue(); - MVT VT = N->getValueType(0).getSimpleVT(); + MVT VT = N->getSimpleValueType(0); unsigned ElSize = VT.getVectorElementType().getSizeInBits(); bool Result = (Index * ElSize) % vecWidth == 0; @@ -4341,7 +4341,7 @@ bool X86::isVEXTRACT256Index(SDNode *N) { /// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions. /// Handles 128-bit and 256-bit. static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) { - MVT VT = N->getValueType(0).getSimpleVT(); + MVT VT = N->getSimpleValueType(0); assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for PSHUF/SHUFP"); @@ -4371,7 +4371,7 @@ static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) { /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle /// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction. static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) { - MVT VT = N->getValueType(0).getSimpleVT(); + MVT VT = N->getSimpleValueType(0); assert((VT == MVT::v8i16 || VT == MVT::v16i16) && "Unsupported vector type for PSHUFHW"); @@ -4395,7 +4395,7 @@ static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) { /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle /// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction. static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) { - MVT VT = N->getValueType(0).getSimpleVT(); + MVT VT = N->getSimpleValueType(0); assert((VT == MVT::v8i16 || VT == MVT::v16i16) && "Unsupported vector type for PSHUFHW"); @@ -4419,7 +4419,7 @@ static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) { /// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle /// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction. static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) { - MVT VT = SVOp->getValueType(0).getSimpleVT(); + MVT VT = SVOp->getSimpleValueType(0); unsigned EltSize = VT.getVectorElementType().getSizeInBits() >> 3; unsigned NumElts = VT.getVectorNumElements(); @@ -4448,7 +4448,7 @@ static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) { uint64_t Index = cast(N->getOperand(1).getNode())->getZExtValue(); - MVT VecVT = N->getOperand(0).getValueType().getSimpleVT(); + MVT VecVT = N->getOperand(0).getSimpleValueType(); MVT ElVT = VecVT.getVectorElementType(); unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits(); @@ -4463,7 +4463,7 @@ static unsigned getInsertVINSERTImmediate(SDNode *N, unsigned vecWidth) { uint64_t Index = cast(N->getOperand(2).getNode())->getZExtValue(); - MVT VecVT = N->getValueType(0).getSimpleVT(); + MVT VecVT = N->getSimpleValueType(0); MVT ElVT = VecVT.getVectorElementType(); unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits(); @@ -4512,7 +4512,7 @@ bool X86::isZeroNode(SDValue Elt) { /// their permute mask. static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { - MVT VT = SVOp->getValueType(0).getSimpleVT(); + MVT VT = SVOp->getSimpleValueType(0); unsigned NumElems = VT.getVectorNumElements(); SmallVector MaskVec; @@ -4789,7 +4789,7 @@ static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) { /// getLegalSplat - Generate a legal splat with supported x86 shuffles static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) { - MVT VT = V.getValueType().getSimpleVT(); + MVT VT = V.getSimpleValueType(); SDLoc dl(V); if (VT.is128BitVector()) { @@ -4815,7 +4815,7 @@ static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) { /// PromoteSplat - Splat is promoted to target supported vector shuffles. static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) { - MVT SrcVT = SV->getValueType(0).getSimpleVT(); + MVT SrcVT = SV->getSimpleValueType(0); SDValue V1 = SV->getOperand(0); SDLoc dl(SV); @@ -4860,7 +4860,7 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, bool IsZero, const X86Subtarget *Subtarget, SelectionDAG &DAG) { - MVT VT = V2.getValueType().getSimpleVT(); + MVT VT = V2.getSimpleValueType(); SDValue V1 = IsZero ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT); unsigned NumElems = VT.getVectorNumElements(); @@ -4978,7 +4978,7 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG, // Recurse into target specific vector shuffles to find scalars. if (isTargetShuffle(Opcode)) { - MVT ShufVT = V.getValueType().getSimpleVT(); + MVT ShufVT = V.getSimpleValueType(); unsigned NumElems = ShufVT.getVectorNumElements(); SmallVector ShuffleMask; bool IsUnary; @@ -5077,7 +5077,7 @@ bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp, static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { unsigned NumElems = - SVOp->getValueType(0).getSimpleVT().getVectorNumElements(); + SVOp->getSimpleValueType(0).getVectorNumElements(); unsigned NumZeros = getNumOfConsecutiveZeros( SVOp, NumElems, false /* check zeros from right */, DAG, SVOp->getMaskElt(0)); @@ -5112,7 +5112,7 @@ static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { unsigned NumElems = - SVOp->getValueType(0).getSimpleVT().getVectorNumElements(); + SVOp->getSimpleValueType(0).getVectorNumElements(); unsigned NumZeros = getNumOfConsecutiveZeros( SVOp, NumElems, true /* check zeros from left */, DAG, NumElems - SVOp->getMaskElt(NumElems - 1) - 1); @@ -5148,7 +5148,7 @@ static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { // Although the logic below support any bitwidth size, there are no // shift instructions which handle more than 128-bit vectors. - if (!SVOp->getValueType(0).getSimpleVT().is128BitVector()) + if (!SVOp->getSimpleValueType(0).is128BitVector()) return false; if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) || @@ -5436,7 +5436,7 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget, if (!Subtarget->hasFp256()) return SDValue(); - MVT VT = Op.getValueType().getSimpleVT(); + MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && @@ -5563,7 +5563,7 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget, } static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType().getSimpleVT(); + MVT VT = Op.getSimpleValueType(); // Skip if insert_vec_elt is not supported. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -5640,7 +5640,7 @@ static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { - MVT VT = Op.getValueType().getSimpleVT(); + MVT VT = Op.getSimpleValueType(); assert((VT.getVectorElementType() == MVT::i1) && (VT.getSizeInBits() <= 16) && "Unexpected type in LowerBUILD_VECTORvXi1!"); @@ -5713,7 +5713,7 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { // res = allOnes ### CMOVNE -1, %res // else // res = allZero - MVT InVT = In.getValueType().getSimpleVT(); + MVT InVT = In.getSimpleValueType(); SDValue Bit1 = DAG.getNode(ISD::AND, dl, InVT, In, DAG.getConstant(1, InVT)); EFLAGS = EmitTest(Bit1, X86::COND_NE, DAG); X86CC = DAG.getConstant(X86::COND_NE, MVT::i8); @@ -5742,7 +5742,7 @@ SDValue X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); - MVT VT = Op.getValueType().getSimpleVT(); + MVT VT = Op.getSimpleValueType(); MVT ExtVT = VT.getVectorElementType(); unsigned NumElems = Op.getNumOperands(); @@ -6078,7 +6078,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // to create 256-bit vectors from two other 128-bit ones. static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { SDLoc dl(Op); - MVT ResVT = Op.getValueType().getSimpleVT(); + MVT ResVT = Op.getSimpleValueType(); assert((ResVT.is256BitVector() || ResVT.is512BitVector()) && "Value type must be 256-/512-bit wide"); @@ -6107,7 +6107,7 @@ LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp, SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); SDLoc dl(SVOp); - MVT VT = SVOp->getValueType(0).getSimpleVT(); + MVT VT = SVOp->getSimpleValueType(0); MVT EltVT = VT.getVectorElementType(); unsigned NumElems = VT.getVectorNumElements(); @@ -6548,7 +6548,7 @@ static SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp, const X86Subtarget *Subtarget, SelectionDAG &DAG) { - MVT VT = SVOp->getValueType(0).getSimpleVT(); + MVT VT = SVOp->getSimpleValueType(0); SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); SDLoc dl(SVOp); @@ -6596,7 +6596,7 @@ SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp, static SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { - MVT VT = SVOp->getValueType(0).getSimpleVT(); + MVT VT = SVOp->getSimpleValueType(0); SDLoc dl(SVOp); unsigned NumElems = VT.getVectorNumElements(); MVT NewVT; @@ -6675,7 +6675,7 @@ LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { if (NewOp.getNode()) return NewOp; - MVT VT = SVOp->getValueType(0).getSimpleVT(); + MVT VT = SVOp->getSimpleValueType(0); unsigned NumElems = VT.getVectorNumElements(); unsigned NumLaneElems = NumElems / 2; @@ -6787,7 +6787,7 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); SDLoc dl(SVOp); - MVT VT = SVOp->getValueType(0).getSimpleVT(); + MVT VT = SVOp->getSimpleValueType(0); assert(VT.is128BitVector() && "Unsupported vector size"); @@ -7132,7 +7132,7 @@ static SDValue NormalizeVectorShuffle(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { ShuffleVectorSDNode *SVOp = cast(Op); - MVT VT = Op.getValueType().getSimpleVT(); + MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); @@ -7167,7 +7167,7 @@ NormalizeVectorShuffle(SDValue Op, const X86Subtarget *Subtarget, if (ISD::isBuildVectorAllZeros(V2.getNode())) { SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG); if (NewOp.getNode()) { - MVT NewVT = NewOp.getValueType().getSimpleVT(); + MVT NewVT = NewOp.getSimpleValueType(); if (isCommutedMOVLMask(cast(NewOp)->getMask(), NewVT, true, false)) return getVZextMovL(VT, NewVT, NewOp.getOperand(0), @@ -7176,7 +7176,7 @@ NormalizeVectorShuffle(SDValue Op, const X86Subtarget *Subtarget, } else if (ISD::isBuildVectorAllZeros(V1.getNode())) { SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG); if (NewOp.getNode()) { - MVT NewVT = NewOp.getValueType().getSimpleVT(); + MVT NewVT = NewOp.getSimpleValueType(); if (isMOVLMask(cast(NewOp)->getMask(), NewVT)) return getVZextMovL(VT, NewVT, NewOp.getOperand(1), DAG, Subtarget, dl); @@ -7191,7 +7191,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { ShuffleVectorSDNode *SVOp = cast(Op); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); - MVT VT = Op.getValueType().getSimpleVT(); + MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); unsigned NumElems = VT.getVectorNumElements(); bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; @@ -7511,10 +7511,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { } static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType().getSimpleVT(); + MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); - if (!Op.getOperand(0).getValueType().getSimpleVT().is128BitVector()) + if (!Op.getOperand(0).getSimpleValueType().is128BitVector()) return SDValue(); if (VT.getSizeInBits() == 8) { @@ -7580,7 +7580,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, return SDValue(); SDValue Vec = Op.getOperand(0); - MVT VecVT = Vec.getValueType().getSimpleVT(); + MVT VecVT = Vec.getSimpleValueType(); // If this is a 256-bit vector result, first extract the 128-bit vector and // then extract the element from the 128-bit vector. @@ -7609,7 +7609,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, return Res; } - MVT VT = Op.getValueType().getSimpleVT(); + MVT VT = Op.getSimpleValueType(); // TODO: handle v16i8. if (VT.getSizeInBits() == 16) { SDValue Vec = Op.getOperand(0); @@ -7636,7 +7636,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, // SHUFPS the element to the lowest double word, then movss. int Mask[4] = { static_cast(Idx), -1, -1, -1 }; - MVT VVT = Op.getOperand(0).getValueType().getSimpleVT(); + MVT VVT = Op.getOperand(0).getSimpleValueType(); SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), DAG.getUNDEF(VVT), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, @@ -7655,7 +7655,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, // Note if the lower 64 bits of the result of the UNPCKHPD is then stored // to a f64mem, the whole operation is folded into a single MOVHPDmr. int Mask[2] = { 1, -1 }; - MVT VVT = Op.getOperand(0).getValueType().getSimpleVT(); + MVT VVT = Op.getOperand(0).getSimpleValueType(); SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), DAG.getUNDEF(VVT), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, @@ -7666,7 +7666,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, } static SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType().getSimpleVT(); + MVT VT = Op.getSimpleValueType(); MVT EltVT = VT.getVectorElementType(); SDLoc dl(Op); @@ -7720,7 +7720,7 @@ static SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { - MVT VT = Op.getValueType().getSimpleVT(); + MVT VT = Op.getSimpleValueType(); MVT EltVT = VT.getVectorElementType(); SDLoc dl(Op); @@ -7770,7 +7770,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { LLVMContext *Context = DAG.getContext(); SDLoc dl(Op); - MVT OpVT = Op.getValueType().getSimpleVT(); + MVT OpVT = Op.getSimpleValueType(); // If this is a 256-bit vector result, first insert into a 128-bit // vector and then insert into the 256-bit vector. @@ -8794,9 +8794,9 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, const X86Subtarget *Subtarget) { - MVT VT = Op->getValueType(0).getSimpleVT(); + MVT VT = Op->getSimpleValueType(0); SDValue In = Op->getOperand(0); - MVT InVT = In.getValueType().getSimpleVT(); + MVT InVT = In.getSimpleValueType(); SDLoc dl(Op); // Optimize vectors in AVX mode: @@ -8847,9 +8847,9 @@ SDValue X86TargetLowering::LowerANY_EXTEND(SDValue Op, SDValue X86TargetLowering::LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); - MVT VT = Op.getValueType().getSimpleVT(); + MVT VT = Op.getSimpleValueType(); SDValue In = Op.getOperand(0); - MVT SVT = In.getValueType().getSimpleVT(); + MVT SVT = In.getSimpleValueType(); if (Subtarget->hasFp256()) { SDValue Res = LowerAVXExtend(Op, DAG, Subtarget); @@ -8879,9 +8879,9 @@ SDValue X86TargetLowering::LowerZERO_EXTEND(SDValue Op, SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); - MVT VT = Op.getValueType().getSimpleVT(); + MVT VT = Op.getSimpleValueType(); SDValue In = Op.getOperand(0); - MVT SVT = In.getValueType().getSimpleVT(); + MVT SVT = In.getSimpleValueType(); if ((VT == MVT::v4i32) && (SVT == MVT::v4i64)) { // On AVX2, v4i64 -> v4i32 becomes VPERMD. @@ -8996,7 +8996,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const { - MVT VT = Op.getValueType().getSimpleVT(); + MVT VT = Op.getSimpleValueType(); if (VT.isVector()) { if (VT == MVT::v8i16) return DAG.getNode(ISD::TRUNCATE, SDLoc(Op), VT, @@ -9040,9 +9040,9 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) { SDLoc DL(Op); - MVT VT = Op.getValueType().getSimpleVT(); + MVT VT = Op.getSimpleValueType(); SDValue In = Op.getOperand(0); - MVT SVT = In.getValueType().getSimpleVT(); + MVT SVT = In.getSimpleValueType(); assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!"); @@ -9054,7 +9054,7 @@ static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const { LLVMContext *Context = DAG.getContext(); SDLoc dl(Op); - MVT VT = Op.getValueType().getSimpleVT(); + MVT VT = Op.getSimpleValueType(); MVT EltVT = VT; unsigned NumElts = VT == MVT::f64 ? 2 : 4; if (VT.isVector()) { @@ -9088,7 +9088,7 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const { LLVMContext *Context = DAG.getContext(); SDLoc dl(Op); - MVT VT = Op.getValueType().getSimpleVT(); + MVT VT = Op.getSimpleValueType(); MVT EltVT = VT; unsigned NumElts = VT == MVT::f64 ? 2 : 4; if (VT.isVector()) { @@ -9125,8 +9125,8 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDLoc dl(Op); - MVT VT = Op.getValueType().getSimpleVT(); - MVT SrcVT = Op1.getValueType().getSimpleVT(); + MVT VT = Op.getSimpleValueType(); + MVT SrcVT = Op1.getSimpleValueType(); // If second operand is smaller, extend it first. if (SrcVT.bitsLT(VT)) { @@ -9202,7 +9202,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) { SDValue N0 = Op.getOperand(0); SDLoc dl(Op); - MVT VT = Op.getValueType().getSimpleVT(); + MVT VT = Op.getSimpleValueType(); // Lower ISD::FGETSIGN to (AND (X86ISD::FGETSIGNx86 ...) 1). SDValue xFGETSIGN = DAG.getNode(X86ISD::FGETSIGNx86, dl, VT, N0, @@ -9682,7 +9682,7 @@ static int translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0, // Lower256IntVSETCC - Break a VSETCC 256-bit integer VSETCC into two new 128 // ones, and then concatenate the result back. static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType().getSimpleVT(); + MVT VT = Op.getSimpleValueType(); assert(VT.is256BitVector() && Op.getOpcode() == ISD::SETCC && "Unsupported value type for operation"); @@ -9714,7 +9714,7 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue CC = Op.getOperand(2); - MVT VT = Op.getValueType().getSimpleVT(); + MVT VT = Op.getSimpleValueType(); assert(Op0.getValueType().getVectorElementType().getSizeInBits() >= 32 && Op.getValueType().getScalarType() == MVT::i1 && @@ -9750,14 +9750,14 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue CC = Op.getOperand(2); - MVT VT = Op.getValueType().getSimpleVT(); + MVT VT = Op.getSimpleValueType(); ISD::CondCode SetCCOpcode = cast(CC)->get(); - bool isFP = Op.getOperand(1).getValueType().getSimpleVT().isFloatingPoint(); + bool isFP = Op.getOperand(1).getSimpleValueType().isFloatingPoint(); SDLoc dl(Op); if (isFP) { #ifndef NDEBUG - MVT EltVT = Op0.getValueType().getVectorElementType().getSimpleVT(); + MVT EltVT = Op0.getSimpleValueType().getVectorElementType(); assert(EltVT == MVT::f32 || EltVT == MVT::f64); #endif @@ -9945,7 +9945,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { - MVT VT = Op.getValueType().getSimpleVT(); + MVT VT = Op.getSimpleValueType(); if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG); @@ -9989,7 +9989,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { } } - bool isFP = Op1.getValueType().getSimpleVT().isFloatingPoint(); + bool isFP = Op1.getSimpleValueType().isFloatingPoint(); unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG); if (X86CC == X86::COND_INVALID) return SDValue(); @@ -10144,7 +10144,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cmp = Cond.getOperand(1); unsigned Opc = Cmp.getOpcode(); - MVT VT = Op.getValueType().getSimpleVT(); + MVT VT = Op.getSimpleValueType(); bool IllegalFPCMov = false; if (VT.isFloatingPoint() && !VT.isVector() && @@ -10285,9 +10285,9 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_AVX512(SDValue Op, SDValue X86TargetLowering::LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const { - MVT VT = Op->getValueType(0).getSimpleVT(); + MVT VT = Op->getSimpleValueType(0); SDValue In = Op->getOperand(0); - MVT InVT = In.getValueType().getSimpleVT(); + MVT InVT = In.getSimpleValueType(); SDLoc dl(Op); if (VT.is512BitVector() || InVT.getVectorElementType() == MVT::i1) @@ -12795,8 +12795,8 @@ static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget, static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { - MVT SrcVT = Op.getOperand(0).getValueType().getSimpleVT(); - MVT DstVT = Op.getValueType().getSimpleVT(); + MVT SrcVT = Op.getOperand(0).getSimpleValueType(); + MVT DstVT = Op.getSimpleValueType(); assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() && Subtarget->hasMMX() && "Unexpected custom BITCAST"); assert((DstVT == MVT::i64 || @@ -17887,7 +17887,7 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) { RHS.getOpcode() != ISD::VECTOR_SHUFFLE) return false; - MVT VT = LHS.getValueType().getSimpleVT(); + MVT VT = LHS.getSimpleValueType(); assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for horizontal add/sub"); -- cgit v1.1 From 0ff1190888f5b24f64a196392c452800447186d8 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 15 Aug 2013 02:44:19 +0000 Subject: Replace getValueType().getSimpleVT() with getSimpleValueType(). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188442 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index ba85e35..01fbdb3 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -249,7 +249,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { SDValue Addr; SDValue Offset, Base; unsigned Opcode; - MVT::SimpleValueType TargetVT = LD->getValueType(0).getSimpleVT().SimpleTy; + MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy; if (SelectDirectAddr(N1, Addr)) { switch (TargetVT) { @@ -1347,8 +1347,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { SDValue Addr; SDValue Offset, Base; unsigned Opcode; - MVT::SimpleValueType SourceVT = - N1.getNode()->getValueType(0).getSimpleVT().SimpleTy; + MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy; if (SelectDirectAddr(N2, Addr)) { switch (SourceVT) { -- cgit v1.1 From 8d725b9a03a3fb1136ccd084523990e12a0d9aec Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 15 Aug 2013 05:33:45 +0000 Subject: Use MVT in place of EVT in more X86 operation lowering functions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188445 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 71 ++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 38 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4a1f1ad..3144038 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4049,7 +4049,7 @@ static bool isMOVLMask(ArrayRef Mask, EVT VT) { /// vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15> /// The first half comes from the second half of V1 and the second half from the /// the second half of V2. -static bool isVPERM2X128Mask(ArrayRef Mask, EVT VT, bool HasFp256) { +static bool isVPERM2X128Mask(ArrayRef Mask, MVT VT, bool HasFp256) { if (!HasFp256 || !VT.is256BitVector()) return false; @@ -4103,7 +4103,7 @@ static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) { } // Symetric in-lane mask. Each lane has 4 elements (for imm8) -static bool isPermImmMask(ArrayRef Mask, EVT VT, unsigned& Imm8) { +static bool isPermImmMask(ArrayRef Mask, MVT VT, unsigned& Imm8) { unsigned EltSize = VT.getVectorElementType().getSizeInBits(); if (EltSize < 32) return false; @@ -4147,7 +4147,7 @@ static bool isPermImmMask(ArrayRef Mask, EVT VT, unsigned& Imm8) { /// to the same elements of the low, but to the higher half of the source. /// In VPERMILPD the two lanes could be shuffled independently of each other /// with the same restriction that lanes can't be crossed. Also handles PSHUFDY. -static bool isVPERMILPMask(ArrayRef Mask, EVT VT, bool HasFp256) { +static bool isVPERMILPMask(ArrayRef Mask, MVT VT, bool HasFp256) { if (!HasFp256) return false; @@ -4743,7 +4743,7 @@ static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1, } /// getUnpackl - Returns a vector_shuffle node for an unpackl operation. -static SDValue getUnpackl(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1, +static SDValue getUnpackl(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1, SDValue V2) { unsigned NumElems = VT.getVectorNumElements(); SmallVector Mask; @@ -4755,7 +4755,7 @@ static SDValue getUnpackl(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1, } /// getUnpackh - Returns a vector_shuffle node for an unpackh operation. -static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1, +static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1, SDValue V2) { unsigned NumElems = VT.getVectorNumElements(); SmallVector Mask; @@ -4771,7 +4771,7 @@ static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1, // Generate shuffles which repeat i16 and i8 several times until they can be // represented by v4f32 and then be manipulated by target suported shuffles. static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) { - EVT VT = V.getValueType(); + MVT VT = V.getSimpleValueType(); int NumElems = VT.getVectorNumElements(); SDLoc dl(V); @@ -6633,7 +6633,7 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, /// getVZextMovL - Return a zero-extending vector move low node. /// -static SDValue getVZextMovL(MVT VT, EVT OpVT, +static SDValue getVZextMovL(MVT VT, MVT OpVT, SDValue SrcOp, SelectionDAG &DAG, const X86Subtarget *Subtarget, SDLoc dl) { if (VT == MVT::v2f64 || VT == MVT::v4f32) { @@ -6938,7 +6938,7 @@ static bool MayFoldVectorLoad(SDValue V) { static SDValue getMOVDDup(SDValue &Op, SDLoc &dl, SDValue V1, SelectionDAG &DAG) { - EVT VT = Op.getValueType(); + MVT VT = Op.getSimpleValueType(); // Canonizalize to v2f64. V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1); @@ -6952,7 +6952,7 @@ SDValue getMOVLowToHigh(SDValue &Op, SDLoc &dl, SelectionDAG &DAG, bool HasSSE2) { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); - EVT VT = Op.getValueType(); + MVT VT = Op.getSimpleValueType(); assert(VT != MVT::v2i64 && "unsupported shuffle type"); @@ -6970,7 +6970,7 @@ static SDValue getMOVHighToLow(SDValue &Op, SDLoc &dl, SelectionDAG &DAG) { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); - EVT VT = Op.getValueType(); + MVT VT = Op.getSimpleValueType(); assert((VT == MVT::v4i32 || VT == MVT::v4f32) && "unsupported shuffle type"); @@ -6986,7 +6986,7 @@ static SDValue getMOVLP(SDValue &Op, SDLoc &dl, SelectionDAG &DAG, bool HasSSE2) { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); - EVT VT = Op.getValueType(); + MVT VT = Op.getSimpleValueType(); unsigned NumElems = VT.getVectorNumElements(); // Use MOVLPS and MOVLPD in case V1 or V2 are loads. During isel, the second @@ -7046,7 +7046,7 @@ static SDValue LowerVectorIntExtend(SDValue Op, const X86Subtarget *Subtarget, if (!Subtarget->hasSSE41()) return SDValue(); - EVT VT = Op.getValueType(); + MVT VT = Op.getSimpleValueType(); // Only AVX2 support 256-bit vector integer extending. if (!Subtarget->hasInt256() && VT.is256BitVector()) @@ -7085,10 +7085,9 @@ static SDValue LowerVectorIntExtend(SDValue Op, const X86Subtarget *Subtarget, return SDValue(); } - LLVMContext *Context = DAG.getContext(); unsigned NBits = VT.getVectorElementType().getSizeInBits() << Shift; - EVT NeVT = EVT::getIntegerVT(*Context, NBits); - EVT NVT = EVT::getVectorVT(*Context, NeVT, NumElems >> Shift); + MVT NeVT = MVT::getIntegerVT(NBits); + MVT NVT = MVT::getVectorVT(NeVT, NumElems >> Shift); if (!DAG.getTargetLoweringInfo().isTypeLegal(NVT)) return SDValue(); @@ -7098,8 +7097,8 @@ static SDValue LowerVectorIntExtend(SDValue Op, const X86Subtarget *Subtarget, if (V1.getOpcode() == ISD::BITCAST && V1.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && V1.getOperand(0).getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT && - V1.getOperand(0) - .getOperand(0).getValueType().getSizeInBits() == SignificantBits) { + V1.getOperand(0).getOperand(0) + .getSimpleValueType().getSizeInBits() == SignificantBits) { // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x) SDValue V = V1.getOperand(0).getOperand(0).getOperand(0); ConstantSDNode *CIdx = @@ -7108,19 +7107,19 @@ static SDValue LowerVectorIntExtend(SDValue Op, const X86Subtarget *Subtarget, // selection to fold it. Otherwise, we will short the conversion sequence. if (CIdx && CIdx->getZExtValue() == 0 && (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse())) { - if (V.getValueSizeInBits() > V1.getValueSizeInBits()) { + MVT FullVT = V.getSimpleValueType(); + MVT V1VT = V1.getSimpleValueType(); + if (FullVT.getSizeInBits() > V1VT.getSizeInBits()) { // The "ext_vec_elt" node is wider than the result node. // In this case we should extract subvector from V. // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast (extract_subvector x)). - unsigned Ratio = V.getValueSizeInBits() / V1.getValueSizeInBits(); - EVT FullVT = V.getValueType(); - EVT SubVecVT = EVT::getVectorVT(*Context, - FullVT.getVectorElementType(), + unsigned Ratio = FullVT.getSizeInBits() / V1VT.getSizeInBits(); + MVT SubVecVT = MVT::getVectorVT(FullVT.getVectorElementType(), FullVT.getVectorNumElements()/Ratio); V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, V, DAG.getIntPtrConstant(0)); } - V1 = DAG.getNode(ISD::BITCAST, DL, V1.getValueType(), V); + V1 = DAG.getNode(ISD::BITCAST, DL, V1VT, V); } } @@ -7454,10 +7453,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { if ((V2IsUndef && HasInt256 && VT.is256BitVector() && NumElems == 8) || VT.is512BitVector()) { - EVT MaskEltVT = EVT::getIntegerVT(*DAG.getContext(), - VT.getVectorElementType().getSizeInBits()); - EVT MaskVectorVT = - EVT::getVectorVT(*DAG.getContext(),MaskEltVT, NumElems); + MVT MaskEltVT = MVT::getIntegerVT(VT.getVectorElementType().getSizeInBits()); + MVT MaskVectorVT = MVT::getVectorVT(MaskEltVT, NumElems); SmallVector permclMask; for (unsigned i = 0; i != NumElems; ++i) { permclMask.push_back(DAG.getConstant((M[i]>=0) ? M[i] : 0, MaskEltVT)); @@ -7590,7 +7587,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, // Get the 128-bit vector. Vec = Extract128BitVector(Vec, IdxVal, DAG, dl); - EVT EltVT = VecVT.getVectorElementType(); + MVT EltVT = VecVT.getVectorElementType(); unsigned ElemsPerChunk = 128 / EltVT.getSizeInBits(); @@ -7768,7 +7765,6 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { } static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { - LLVMContext *Context = DAG.getContext(); SDLoc dl(Op); MVT OpVT = Op.getSimpleValueType(); @@ -7777,8 +7773,7 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { if (!OpVT.is128BitVector()) { // Insert into a 128-bit vector. unsigned SizeFactor = OpVT.getSizeInBits()/128; - EVT VT128 = EVT::getVectorVT(*Context, - OpVT.getVectorElementType(), + MVT VT128 = MVT::getVectorVT(OpVT.getVectorElementType(), OpVT.getVectorNumElements() / SizeFactor); Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Op.getOperand(0)); @@ -7806,8 +7801,8 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget, SDValue In = Op.getOperand(0); SDValue Idx = Op.getOperand(1); unsigned IdxVal = cast(Idx)->getZExtValue(); - EVT ResVT = Op.getValueType(); - EVT InVT = In.getValueType(); + MVT ResVT = Op.getSimpleValueType(); + MVT InVT = In.getSimpleValueType(); if (Subtarget->hasFp256()) { if (ResVT.is128BitVector() && @@ -7834,16 +7829,16 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget, SDValue SubVec = Op.getNode()->getOperand(1); SDValue Idx = Op.getNode()->getOperand(2); - if ((Op.getNode()->getValueType(0).is256BitVector() || - Op.getNode()->getValueType(0).is512BitVector()) && - SubVec.getNode()->getValueType(0).is128BitVector() && + if ((Op.getNode()->getSimpleValueType(0).is256BitVector() || + Op.getNode()->getSimpleValueType(0).is512BitVector()) && + SubVec.getNode()->getSimpleValueType(0).is128BitVector() && isa(Idx)) { unsigned IdxVal = cast(Idx)->getZExtValue(); return Insert128BitVector(Vec, SubVec, IdxVal, DAG, dl); } - if (Op.getNode()->getValueType(0).is512BitVector() && - SubVec.getNode()->getValueType(0).is256BitVector() && + if (Op.getNode()->getSimpleValueType(0).is512BitVector() && + SubVec.getNode()->getSimpleValueType(0).is256BitVector() && isa(Idx)) { unsigned IdxVal = cast(Idx)->getZExtValue(); return Insert256BitVector(Vec, SubVec, IdxVal, DAG, dl); -- cgit v1.1 From 07ad0c4c99cb704d4f3e1bc1ba3bb1b194efe572 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 15 Aug 2013 05:57:07 +0000 Subject: Use MVT instead of EVT in X86ISelDAGToDAG since all the types should be legal. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188446 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelDAGToDAG.cpp | 58 +++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 29 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 8f8d488..33c80c7 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -183,7 +183,7 @@ namespace { SDNode *Select(SDNode *N); SDNode *SelectGather(SDNode *N, unsigned Opc); SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); - SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT); + SDNode *SelectAtomicLoadArith(SDNode *Node, MVT NVT); bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM); bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM); @@ -491,8 +491,8 @@ void X86DAGToDAGISel::PreprocessISelDAG() { if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) continue; - EVT SrcVT = N->getOperand(0).getValueType(); - EVT DstVT = N->getValueType(0); + MVT SrcVT = N->getOperand(0).getSimpleValueType(); + MVT DstVT = N->getSimpleValueType(0); // If any of the sources are vectors, no fp stack involved. if (SrcVT.isVector() || DstVT.isVector()) @@ -519,7 +519,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { // Here we could have an FP stack truncation or an FPStack <-> SSE convert. // FPStack has extload and truncstore. SSE can fold direct loads into other // operations. Based on this, decide what we want to do. - EVT MemVT; + MVT MemVT; if (N->getOpcode() == ISD::FP_ROUND) MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'. else @@ -783,7 +783,7 @@ static bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, Mask != (0xffu << ScaleLog)) return true; - EVT VT = N.getValueType(); + MVT VT = N.getSimpleValueType(); SDLoc DL(N); SDValue Eight = DAG.getConstant(8, MVT::i8); SDValue NewMask = DAG.getConstant(0xff, VT); @@ -831,7 +831,7 @@ static bool FoldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3) return true; - EVT VT = N.getValueType(); + MVT VT = N.getSimpleValueType(); SDLoc DL(N); SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, VT); SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask); @@ -904,7 +904,7 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, // Scale the leading zero count down based on the actual size of the value. // Also scale it down based on the size of the shift. - MaskLZ -= (64 - X.getValueSizeInBits()) + ShiftAmt; + MaskLZ -= (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt; // The final check is to ensure that any masked out high bits of X are // already known to be zero. Otherwise, the mask has a semantic impact @@ -914,23 +914,23 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, // replace them with zero extensions cheaply if necessary. bool ReplacingAnyExtend = false; if (X.getOpcode() == ISD::ANY_EXTEND) { - unsigned ExtendBits = - X.getValueSizeInBits() - X.getOperand(0).getValueSizeInBits(); + unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() - + X.getOperand(0).getSimpleValueType().getSizeInBits(); // Assume that we'll replace the any-extend with a zero-extend, and // narrow the search to the extended value. X = X.getOperand(0); MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits; ReplacingAnyExtend = true; } - APInt MaskedHighBits = APInt::getHighBitsSet(X.getValueSizeInBits(), - MaskLZ); + APInt MaskedHighBits = + APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ); APInt KnownZero, KnownOne; DAG.ComputeMaskedBits(X, KnownZero, KnownOne); if (MaskedHighBits != KnownZero) return true; // We've identified a pattern that can be transformed into a single shift // and an addressing mode. Make it so. - EVT VT = N.getValueType(); + MVT VT = N.getSimpleValueType(); if (ReplacingAnyExtend) { assert(X.getValueType() != VT); // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND. @@ -1059,7 +1059,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // We only handle up to 64-bit values here as those are what matter for // addressing mode optimizations. - if (X.getValueSizeInBits() > 64) break; + if (X.getSimpleValueType().getSizeInBits() > 64) break; // The mask used for the transform is expected to be post-shift, but we // found the shift first so just apply the shift to the mask before passing @@ -1244,7 +1244,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // We only handle up to 64-bit values here as those are what matter for // addressing mode optimizations. - if (X.getValueSizeInBits() > 64) break; + if (X.getSimpleValueType().getSizeInBits() > 64) break; if (!isa(N.getOperand(1))) break; @@ -1323,7 +1323,7 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, if (MatchAddress(N, AM)) return false; - EVT VT = N.getValueType(); + MVT VT = N.getSimpleValueType(); if (AM.BaseType == X86ISelAddressMode::RegBase) { if (!AM.Base_Reg.getNode()) AM.Base_Reg = CurDAG->getRegister(0, VT); @@ -1465,7 +1465,7 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue N, assert (T == AM.Segment); AM.Segment = Copy; - EVT VT = N.getValueType(); + MVT VT = N.getSimpleValueType(); unsigned Complexity = 0; if (AM.BaseType == X86ISelAddressMode::RegBase) if (AM.Base_Reg.getNode()) @@ -1706,7 +1706,7 @@ static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { // + non-empty, otherwise. static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG, SDLoc dl, - enum AtomicOpc &Op, EVT NVT, + enum AtomicOpc &Op, MVT NVT, SDValue Val) { if (ConstantSDNode *CN = dyn_cast(Val)) { int64_t CNVal = CN->getSExtValue(); @@ -1753,7 +1753,7 @@ static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG, return Val; } -SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { +SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) { if (Node->hasAnyUseOfValue(0)) return 0; @@ -1793,7 +1793,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant); unsigned Opc = 0; - switch (NVT.getSimpleVT().SimpleTy) { + switch (NVT.SimpleTy) { default: return 0; case MVT::i8: if (isCN) @@ -2047,7 +2047,7 @@ SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) { } SDNode *X86DAGToDAGISel::Select(SDNode *Node) { - EVT NVT = Node->getValueType(0); + MVT NVT = Node->getSimpleValueType(0); unsigned Opc, MOpc; unsigned Opcode = Node->getOpcode(); SDLoc dl(Node); @@ -2187,7 +2187,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { break; unsigned ShlOp, Op; - EVT CstVT = NVT; + MVT CstVT = NVT; // Check the minimum bitwidth for the new constant. // TODO: AND32ri is the same as AND64ri32 with zext imm. @@ -2202,7 +2202,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { if (NVT == CstVT) break; - switch (NVT.getSimpleVT().SimpleTy) { + switch (NVT.SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i32: assert(CstVT == MVT::i8); @@ -2239,7 +2239,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue N1 = Node->getOperand(1); unsigned LoReg; - switch (NVT.getSimpleVT().SimpleTy) { + switch (NVT.SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: LoReg = X86::AL; Opc = X86::MUL8r; break; case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break; @@ -2268,7 +2268,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { bool isSigned = Opcode == ISD::SMUL_LOHI; bool hasBMI2 = Subtarget->hasBMI2(); if (!isSigned) { - switch (NVT.getSimpleVT().SimpleTy) { + switch (NVT.SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; @@ -2278,7 +2278,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break; } } else { - switch (NVT.getSimpleVT().SimpleTy) { + switch (NVT.SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break; case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break; @@ -2415,7 +2415,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { bool isSigned = Opcode == ISD::SDIVREM; if (!isSigned) { - switch (NVT.getSimpleVT().SimpleTy) { + switch (NVT.SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break; case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break; @@ -2423,7 +2423,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break; } } else { - switch (NVT.getSimpleVT().SimpleTy) { + switch (NVT.SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break; case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break; @@ -2434,7 +2434,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { unsigned LoReg, HiReg, ClrReg; unsigned SExtOpcode; - switch (NVT.getSimpleVT().SimpleTy) { + switch (NVT.SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: LoReg = X86::AL; ClrReg = HiReg = X86::AH; @@ -2489,7 +2489,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } else { // Zero out the high part, effectively zero extending the input. SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0); - switch (NVT.getSimpleVT().SimpleTy) { + switch (NVT.SimpleTy) { case MVT::i16: ClrNode = SDValue(CurDAG->getMachineNode( -- cgit v1.1 From 84e51517e25119419d53d6ed61fef15219efd733 Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Thu, 15 Aug 2013 07:11:34 +0000 Subject: Tentative fix for global-buffer-overflow caused by r188426. Found by AddressSanitizer git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188448 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index 22bdb90..77ca885 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -115,7 +115,10 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, switch (N->getMachineOpcode()) { default: { const MCInstrDesc &Desc = TM.getInstrInfo()->get(N->getMachineOpcode()); - int RegClass = Desc.OpInfo[Desc.getNumDefs() + OpNo].RegClass; + unsigned OpIdx = Desc.getNumDefs() + OpNo; + if (OpIdx >= Desc.getNumOperands()) + return NULL; + int RegClass = Desc.OpInfo[OpIdx].RegClass; if (RegClass == -1) { return NULL; } -- cgit v1.1 From 46ceaf4ba64cdd0ac37578c0132cad39c9ea21c0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 15 Aug 2013 07:30:51 +0000 Subject: Don't let isPermImmMask handle v16i32 since VPERMI doesn't match on that type. Remove 128-bit vector handling from isPermImmMask too, it's covered by isPSHUFDMask. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188449 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 35 +++++++++-------------------------- 1 file changed, 9 insertions(+), 26 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 3144038..f5cb021 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4102,41 +4102,26 @@ static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) { return (FstHalf | (SndHalf << 4)); } -// Symetric in-lane mask. Each lane has 4 elements (for imm8) +// Symmetric in-lane mask. Each lane has 4 elements (for imm8) static bool isPermImmMask(ArrayRef Mask, MVT VT, unsigned& Imm8) { - unsigned EltSize = VT.getVectorElementType().getSizeInBits(); - if (EltSize < 32) + unsigned NumElts = VT.getVectorNumElements(); + if (!(VT.is256BitVector() && NumElts == 4) && + !(VT.is512BitVector() && NumElts == 8)) return false; - unsigned NumElts = VT.getVectorNumElements(); Imm8 = 0; - if (VT.is128BitVector() || (VT.is256BitVector() && EltSize == 64)) { - for (unsigned i = 0; i != NumElts; ++i) { - if (Mask[i] < 0) - continue; - Imm8 |= Mask[i] << (i*2); - } - return true; - } - unsigned LaneSize = 4; - SmallVector MaskVal(LaneSize, -1); - for (unsigned l = 0; l != NumElts; l += LaneSize) { for (unsigned i = 0; i != LaneSize; ++i) { if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize)) return false; - if (Mask[i+l] < 0) - continue; - if (MaskVal[i] < 0) { - MaskVal[i] = Mask[i+l] - l; - Imm8 |= MaskVal[i] << (i*2); - continue; - } - if (Mask[i+l] != (signed)(MaskVal[i]+l)) + if (Mask[i] >= 0 && !isUndefOrEqual(Mask[i+l], Mask[i]+l)) return false; + if (Mask[i+l] >= 0) + Imm8 |= (Mask[i+l] - l) << (i*2); } } + return true; } @@ -4165,9 +4150,7 @@ static bool isVPERMILPMask(ArrayRef Mask, MVT VT, bool HasFp256) { if (NumElts != 8 || l == 0) continue; // VPERMILPS handling - if (Mask[i] < 0) - continue; - if (!isUndefOrEqual(Mask[i+l], Mask[i]+l)) + if (Mask[i] >= 0 && !isUndefOrEqual(Mask[i+l], Mask[i]+l)) return false; } } -- cgit v1.1 From d9767021f83879429e930b068d1d6aef22285b33 Mon Sep 17 00:00:00 2001 From: Hao Liu Date: Thu, 15 Aug 2013 08:26:11 +0000 Subject: Clang and AArch64 backend patches to support shll/shl and vmovl instructions and ACLE functions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188451 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 52 +++++++++++ lib/Target/AArch64/AArch64ISelLowering.h | 5 +- lib/Target/AArch64/AArch64InstrFormats.td | 19 ++++ lib/Target/AArch64/AArch64InstrNEON.td | 145 +++++++++++++++++++++++++++++ 4 files changed, 220 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index ec838fb..d12302e 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -77,6 +77,7 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::SRA); + setTargetDAGCombine(ISD::SHL); // AArch64 does not have i1 loads, or much of anything for i1 really. setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); @@ -3235,6 +3236,56 @@ static SDValue PerformSRACombine(SDNode *N, DAG.getConstant(LSB + Width - 1, MVT::i64)); } +/// Check if this is a valid build_vector for the immediate operand of +/// a vector shift operation, where all the elements of the build_vector +/// must have the same constant integer value. +static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { + // Ignore bit_converts. + while (Op.getOpcode() == ISD::BITCAST) + Op = Op.getOperand(0); + BuildVectorSDNode *BVN = dyn_cast(Op.getNode()); + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, + HasAnyUndefs, ElementBits) || + SplatBitSize > ElementBits) + return false; + Cnt = SplatBits.getSExtValue(); + return true; +} + +/// Check if this is a valid build_vector for the immediate operand of +/// a vector shift left operation. That value must be in the range: +/// 0 <= Value < ElementBits for a left shift +static bool isVShiftLImm(SDValue Op, EVT VT, int64_t &Cnt) { + assert(VT.isVector() && "vector shift count is not a vector type"); + unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); + if (!getVShiftImm(Op, ElementBits, Cnt)) + return false; + return (Cnt >= 0 && Cnt < ElementBits); +} + +static SDValue PerformSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, + const AArch64Subtarget *ST) { + SelectionDAG &DAG = DCI.DAG; + EVT VT = N->getValueType(0); + + // Nothing to be done for scalar shifts. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!VT.isVector() || !TLI.isTypeLegal(VT)) + return SDValue(); + + assert(ST->hasNEON() && "unexpected vector shift"); + int64_t Cnt; + if (isVShiftLImm(N->getOperand(1), VT, Cnt)) { + SDValue RHS = DAG.getNode(AArch64ISD::NEON_DUPIMM, SDLoc(N->getOperand(0)), + VT, DAG.getConstant(Cnt, MVT::i32)); + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), RHS); + } + + return SDValue(); +} SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, @@ -3244,6 +3295,7 @@ AArch64TargetLowering::PerformDAGCombine(SDNode *N, case ISD::AND: return PerformANDCombine(N, DCI); case ISD::OR: return PerformORCombine(N, DCI, getSubtarget()); case ISD::SRA: return PerformSRACombine(N, DCI); + case ISD::SHL: return PerformSHLCombine(N, DCI, getSubtarget()); } return SDValue(); } diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index 67a908e..c9795b2 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -132,7 +132,10 @@ namespace AArch64ISD { NEON_CMPZ, // Vector compare bitwise test - NEON_TST + NEON_TST, + + // Operation for the immediate in vector shift + NEON_DUPIMM }; } diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index 09451fd..020ee6c 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -1050,5 +1050,24 @@ class NeonI_2VMisc size, bits<5> opcode, // Inherit Rd in 4-0 } +// Format AdvSIMD 2 vector 1 immediate shift +class NeonI_2VShiftImm opcode, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdn +{ + bits<7> Imm; + let Inst{31} = 0b0; + let Inst{30} = q; + let Inst{29} = u; + let Inst{28-23} = 0b011110; + let Inst{22-16} = Imm; + let Inst{15-11} = opcode; + let Inst{10} = 0b1; + + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + } diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 98b9e3e..175c3aa 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -41,6 +41,9 @@ def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3, def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>; +def Neon_dupImm : SDNode<"AArch64ISD::NEON_DUPIMM", SDTypeProfile<1, 1, + [SDTCisVec<0>, SDTCisVT<1, i32>]>>; + //===----------------------------------------------------------------------===// // Multiclasses //===----------------------------------------------------------------------===// @@ -1409,6 +1412,148 @@ def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>; def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>; } +// Vector Shift (Immediate) + +def imm0_63 : Operand, ImmLeaf= 0 && Imm < 63; }]> { + let ParserMatchClass = uimm6_asmoperand; +} + +class N2VShiftLeft opcode, string asmop, string T, + RegisterClass VPRC, ValueType Ty, Operand ImmTy> + : NeonI_2VShiftImm; + +multiclass NeonI_N2VShL opcode, string asmop> { + // 64-bit vector types. + def _8B : N2VShiftLeft<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3> { + let Inst{22-19} = 0b0001; // immh:immb = 0001xxx + } + + def _4H : N2VShiftLeft<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4> { + let Inst{22-20} = 0b001; // immh:immb = 001xxxx + } + + def _2S : N2VShiftLeft<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5> { + let Inst{22-21} = 0b01; // immh:immb = 01xxxxx + } + + // 128-bit vector types. + def _16B : N2VShiftLeft<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3> { + let Inst{22-19} = 0b0001; // immh:immb = 0001xxx + } + + def _8H : N2VShiftLeft<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4> { + let Inst{22-20} = 0b001; // immh:immb = 001xxxx + } + + def _4S : N2VShiftLeft<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5> { + let Inst{22-21} = 0b01; // immh:immb = 01xxxxx + } + + def _2D : N2VShiftLeft<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63> { + let Inst{22} = 0b1; // immh:immb = 1xxxxxx + } +} + +def Neon_top16B : PatFrag<(ops node:$in), + (extract_subvector (v16i8 node:$in), (iPTR 8))>; +def Neon_top8H : PatFrag<(ops node:$in), + (extract_subvector (v8i16 node:$in), (iPTR 4))>; +def Neon_top4S : PatFrag<(ops node:$in), + (extract_subvector (v4i32 node:$in), (iPTR 2))>; + +class N2VShiftLong opcode, string asmop, string DestT, + string SrcT, ValueType DestTy, ValueType SrcTy, + Operand ImmTy, SDPatternOperator ExtOp> + : NeonI_2VShiftImm; + +class N2VShiftLongHigh opcode, string asmop, string DestT, + string SrcT, ValueType DestTy, ValueType SrcTy, + int StartIndex, Operand ImmTy, + SDPatternOperator ExtOp, PatFrag getTop> + : NeonI_2VShiftImm; + +multiclass NeonI_N2VShLL opcode, string asmop, + SDNode ExtOp> { + // 64-bit vector types. + def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8, + uimm3, ExtOp>{ + let Inst{22-19} = 0b0001; // immh:immb = 0001xxx + } + + def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16, + uimm4, ExtOp>{ + let Inst{22-20} = 0b001; // immh:immb = 001xxxx + } + + def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32, + uimm5, ExtOp>{ + let Inst{22-21} = 0b01; // immh:immb = 01xxxxx + } + + // 128-bit vector types + def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b", + v8i16, v8i8, 8, uimm3, ExtOp, Neon_top16B>{ + let Inst{22-19} = 0b0001; // immh:immb = 0001xxx + } + + def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h", + v4i32, v4i16, 4, uimm4, ExtOp, Neon_top8H>{ + let Inst{22-20} = 0b001; // immh:immb = 001xxxx + } + + def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s", + v2i64, v2i32, 2, uimm5, ExtOp, Neon_top4S>{ + let Inst{22-21} = 0b01; // immh:immb = 01xxxxx + } + + // Use other patterns to match when the immediate is 0. + def : Pat<(v8i16 (ExtOp (v8i8 VPR64:$Rn))), + (!cast(prefix # "_8B") VPR64:$Rn, 0)>; + + def : Pat<(v4i32 (ExtOp (v4i16 VPR64:$Rn))), + (!cast(prefix # "_4H") VPR64:$Rn, 0)>; + + def : Pat<(v2i64 (ExtOp (v2i32 VPR64:$Rn))), + (!cast(prefix # "_2S") VPR64:$Rn, 0)>; + + def : Pat<(v8i16 (ExtOp (v8i8 (Neon_top16B VPR128:$Rn)))), + (!cast(prefix # "_16B") VPR128:$Rn, 0)>; + + def : Pat<(v4i32 (ExtOp (v4i16 (Neon_top8H VPR128:$Rn)))), + (!cast(prefix # "_8H") VPR128:$Rn, 0)>; + + def : Pat<(v2i64 (ExtOp (v2i32 (Neon_top4S VPR128:$Rn)))), + (!cast(prefix # "_4S") VPR128:$Rn, 0)>; +} + +// Shift left immediate +defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">; + +// Shift left long immediate +defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>; +defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>; + // Scalar Arithmetic class NeonI_Scalar3Same_D_size opcode, string asmop> -- cgit v1.1 From d36e1efa4b674b6b224995657e04a1c6145f70db Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 15 Aug 2013 08:38:25 +0000 Subject: Revert r188449 as it turns out we're just missing the instructions that need the v16i32/v16f32 matching. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188454 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f5cb021..3144038 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4102,26 +4102,41 @@ static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) { return (FstHalf | (SndHalf << 4)); } -// Symmetric in-lane mask. Each lane has 4 elements (for imm8) +// Symetric in-lane mask. Each lane has 4 elements (for imm8) static bool isPermImmMask(ArrayRef Mask, MVT VT, unsigned& Imm8) { - unsigned NumElts = VT.getVectorNumElements(); - if (!(VT.is256BitVector() && NumElts == 4) && - !(VT.is512BitVector() && NumElts == 8)) + unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + if (EltSize < 32) return false; + unsigned NumElts = VT.getVectorNumElements(); Imm8 = 0; + if (VT.is128BitVector() || (VT.is256BitVector() && EltSize == 64)) { + for (unsigned i = 0; i != NumElts; ++i) { + if (Mask[i] < 0) + continue; + Imm8 |= Mask[i] << (i*2); + } + return true; + } + unsigned LaneSize = 4; + SmallVector MaskVal(LaneSize, -1); + for (unsigned l = 0; l != NumElts; l += LaneSize) { for (unsigned i = 0; i != LaneSize; ++i) { if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize)) return false; - if (Mask[i] >= 0 && !isUndefOrEqual(Mask[i+l], Mask[i]+l)) + if (Mask[i+l] < 0) + continue; + if (MaskVal[i] < 0) { + MaskVal[i] = Mask[i+l] - l; + Imm8 |= MaskVal[i] << (i*2); + continue; + } + if (Mask[i+l] != (signed)(MaskVal[i]+l)) return false; - if (Mask[i+l] >= 0) - Imm8 |= (Mask[i+l] - l) << (i*2); } } - return true; } @@ -4150,7 +4165,9 @@ static bool isVPERMILPMask(ArrayRef Mask, MVT VT, bool HasFp256) { if (NumElts != 8 || l == 0) continue; // VPERMILPS handling - if (Mask[i] >= 0 && !isUndefOrEqual(Mask[i+l], Mask[i]+l)) + if (Mask[i] < 0) + continue; + if (!isUndefOrEqual(Mask[i+l], Mask[i]+l)) return false; } } -- cgit v1.1 From e2a9376b1bd2204ea6f56a35b762e28e0ef4e35a Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Thu, 15 Aug 2013 12:24:57 +0000 Subject: [Mips][msa] Added the simple builtins (add_a to dpsub[su], ilvev to ldi) Includes: add_a, adds_[asu], addv, addvi, andi.b, asub_[su].[bhwd], aver?_[su]_[bhwd], bclr, bclri, bins[lr], bins[lr]i, bmnzi, bmzi, bneg, bnegi, bseli, bset, bseti, c(eq|ne), c(eq|ne)i, cl[et]_[su], cl[et]i_[su], copy_[su].[bhw], div_[su], dotp_[su], dpadd_[su], dpsub_[su], ilvev, ilvl, ilvod, ilvr, insv, insve, ldi Patch by Daniel Sanders git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188457 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsInstrInfo.td | 4 + lib/Target/Mips/MipsMSAInstrFormats.td | 77 +++ lib/Target/Mips/MipsMSAInstrInfo.td | 1137 +++++++++++++++++++++++++++++++- lib/Target/Mips/MipsRegisterInfo.td | 5 +- lib/Target/Mips/MipsSEISelLowering.cpp | 33 +- lib/Target/Mips/MipsSEISelLowering.h | 2 + 6 files changed, 1238 insertions(+), 20 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 81dbd42..1753207 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -264,6 +264,10 @@ def simm16_64 : Operand; def shamt : Operand; // Unsigned Operand +def uimm5 : Operand { + let PrintMethod = "printUnsignedImm"; +} + def uimm16 : Operand { let PrintMethod = "printUnsignedImm"; } diff --git a/lib/Target/Mips/MipsMSAInstrFormats.td b/lib/Target/Mips/MipsMSAInstrFormats.td index 7cc1a88..6f1e58f 100644 --- a/lib/Target/Mips/MipsMSAInstrFormats.td +++ b/lib/Target/Mips/MipsMSAInstrFormats.td @@ -21,14 +21,91 @@ class PseudoMSA pattern, let Predicates = [HasMSA]; } +class MSA_BIT_B_FMT major, bits<6> minor>: MSAInst { + let Inst{25-23} = major; + let Inst{22-19} = 0b1110; + let Inst{5-0} = minor; +} + +class MSA_BIT_H_FMT major, bits<6> minor>: MSAInst { + let Inst{25-23} = major; + let Inst{22-20} = 0b110; + let Inst{5-0} = minor; +} + +class MSA_BIT_W_FMT major, bits<6> minor>: MSAInst { + let Inst{25-23} = major; + let Inst{22-21} = 0b10; + let Inst{5-0} = minor; +} + +class MSA_BIT_D_FMT major, bits<6> minor>: MSAInst { + let Inst{25-23} = major; + let Inst{22} = 0b0; + let Inst{5-0} = minor; +} + +class MSA_2R_FMT major, bits<2> df, bits<6> minor>: MSAInst { + let Inst{25-18} = major; + let Inst{17-16} = df; + let Inst{5-0} = minor; +} + +class MSA_2RF_FMT major, bits<1> df, bits<6> minor>: MSAInst { + let Inst{25-17} = major; + let Inst{16} = df; + let Inst{5-0} = minor; +} + class MSA_3R_FMT major, bits<2> df, bits<6> minor>: MSAInst { let Inst{25-23} = major; let Inst{22-21} = df; let Inst{5-0} = minor; } +class MSA_3RF_FMT major, bits<1> df, bits<6> minor>: MSAInst { + let Inst{25-22} = major; + let Inst{21} = df; + let Inst{5-0} = minor; +} + +class MSA_ELM_B_FMT major, bits<6> minor>: MSAInst { + let Inst{25-22} = major; + let Inst{21-20} = 0b00; + let Inst{5-0} = minor; +} + +class MSA_ELM_H_FMT major, bits<6> minor>: MSAInst { + let Inst{25-22} = major; + let Inst{21-19} = 0b100; + let Inst{5-0} = minor; +} + +class MSA_ELM_W_FMT major, bits<6> minor>: MSAInst { + let Inst{25-22} = major; + let Inst{21-18} = 0b1100; + let Inst{5-0} = minor; +} + +class MSA_ELM_D_FMT major, bits<6> minor>: MSAInst { + let Inst{25-22} = major; + let Inst{21-17} = 0b11100; + let Inst{5-0} = minor; +} + class MSA_I5_FMT major, bits<2> df, bits<6> minor>: MSAInst { let Inst{25-23} = major; let Inst{22-21} = df; let Inst{5-0} = minor; } + +class MSA_I8_FMT major, bits<6> minor>: MSAInst { + let Inst{25-24} = major; + let Inst{5-0} = minor; +} + +class MSA_I10_FMT major, bits<2> df, bits<6> minor>: MSAInst { + let Inst{25-23} = major; + let Inst{22-21} = df; + let Inst{5-0} = minor; +} diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 24482c0..ea8938b 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -11,17 +11,887 @@ // //===----------------------------------------------------------------------===// +def immSExt5 : ImmLeaf(Imm);}]>; +def immSExt10: ImmLeaf(Imm);}]>; + +def uimm3 : Operand { + let PrintMethod = "printUnsignedImm"; +} + +def uimm4 : Operand { + let PrintMethod = "printUnsignedImm"; +} + +def uimm6 : Operand { + let PrintMethod = "printUnsignedImm"; +} + +def uimm8 : Operand { + let PrintMethod = "printUnsignedImm"; +} + +def simm5 : Operand; + +def simm10 : Operand; + // Instruction encoding. +class ADD_A_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b010000>; +class ADD_A_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b010000>; +class ADD_A_W_ENC : MSA_3R_FMT<0b000, 0b10, 0b010000>; +class ADD_A_D_ENC : MSA_3R_FMT<0b000, 0b11, 0b010000>; + +class ADDS_A_B_ENC : MSA_3R_FMT<0b001, 0b00, 0b010000>; +class ADDS_A_H_ENC : MSA_3R_FMT<0b001, 0b01, 0b010000>; +class ADDS_A_W_ENC : MSA_3R_FMT<0b001, 0b10, 0b010000>; +class ADDS_A_D_ENC : MSA_3R_FMT<0b001, 0b11, 0b010000>; + +class ADDS_S_B_ENC : MSA_3R_FMT<0b010, 0b00, 0b010000>; +class ADDS_S_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b010000>; +class ADDS_S_W_ENC : MSA_3R_FMT<0b010, 0b10, 0b010000>; +class ADDS_S_D_ENC : MSA_3R_FMT<0b010, 0b11, 0b010000>; + +class ADDS_U_B_ENC : MSA_3R_FMT<0b011, 0b00, 0b010000>; +class ADDS_U_H_ENC : MSA_3R_FMT<0b011, 0b01, 0b010000>; +class ADDS_U_W_ENC : MSA_3R_FMT<0b011, 0b10, 0b010000>; +class ADDS_U_D_ENC : MSA_3R_FMT<0b011, 0b11, 0b010000>; + +class ADDV_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b001110>; +class ADDV_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b001110>; +class ADDV_W_ENC : MSA_3R_FMT<0b000, 0b10, 0b001110>; +class ADDV_D_ENC : MSA_3R_FMT<0b000, 0b11, 0b001110>; + +class ADDVI_B_ENC : MSA_I5_FMT<0b000, 0b00, 0b000110>; +class ADDVI_H_ENC : MSA_I5_FMT<0b000, 0b01, 0b000110>; +class ADDVI_W_ENC : MSA_I5_FMT<0b000, 0b10, 0b000110>; +class ADDVI_D_ENC : MSA_I5_FMT<0b000, 0b11, 0b000110>; + +class ANDI_B_ENC : MSA_I8_FMT<0b00, 0b000000>; + +class ASUB_S_B_ENC : MSA_3R_FMT<0b100, 0b00, 0b010001>; +class ASUB_S_H_ENC : MSA_3R_FMT<0b100, 0b01, 0b010001>; +class ASUB_S_W_ENC : MSA_3R_FMT<0b100, 0b10, 0b010001>; +class ASUB_S_D_ENC : MSA_3R_FMT<0b100, 0b11, 0b010001>; + +class ASUB_U_B_ENC : MSA_3R_FMT<0b101, 0b00, 0b010001>; +class ASUB_U_H_ENC : MSA_3R_FMT<0b101, 0b01, 0b010001>; +class ASUB_U_W_ENC : MSA_3R_FMT<0b101, 0b10, 0b010001>; +class ASUB_U_D_ENC : MSA_3R_FMT<0b101, 0b11, 0b010001>; + +class AVE_S_B_ENC : MSA_3R_FMT<0b100, 0b00, 0b010000>; +class AVE_S_H_ENC : MSA_3R_FMT<0b100, 0b01, 0b010000>; +class AVE_S_W_ENC : MSA_3R_FMT<0b100, 0b10, 0b010000>; +class AVE_S_D_ENC : MSA_3R_FMT<0b100, 0b11, 0b010000>; + +class AVE_U_B_ENC : MSA_3R_FMT<0b101, 0b00, 0b010000>; +class AVE_U_H_ENC : MSA_3R_FMT<0b101, 0b01, 0b010000>; +class AVE_U_W_ENC : MSA_3R_FMT<0b101, 0b10, 0b010000>; +class AVE_U_D_ENC : MSA_3R_FMT<0b101, 0b11, 0b010000>; + +class AVER_S_B_ENC : MSA_3R_FMT<0b110, 0b00, 0b010000>; +class AVER_S_H_ENC : MSA_3R_FMT<0b110, 0b01, 0b010000>; +class AVER_S_W_ENC : MSA_3R_FMT<0b110, 0b10, 0b010000>; +class AVER_S_D_ENC : MSA_3R_FMT<0b110, 0b11, 0b010000>; + +class AVER_U_B_ENC : MSA_3R_FMT<0b111, 0b00, 0b010000>; +class AVER_U_H_ENC : MSA_3R_FMT<0b111, 0b01, 0b010000>; +class AVER_U_W_ENC : MSA_3R_FMT<0b111, 0b10, 0b010000>; +class AVER_U_D_ENC : MSA_3R_FMT<0b111, 0b11, 0b010000>; + +class BCLR_B_ENC : MSA_3R_FMT<0b011, 0b00, 0b001101>; +class BCLR_H_ENC : MSA_3R_FMT<0b011, 0b01, 0b001101>; +class BCLR_W_ENC : MSA_3R_FMT<0b011, 0b10, 0b001101>; +class BCLR_D_ENC : MSA_3R_FMT<0b011, 0b11, 0b001101>; + +class BCLRI_B_ENC : MSA_BIT_B_FMT<0b011, 0b001001>; +class BCLRI_H_ENC : MSA_BIT_H_FMT<0b011, 0b001001>; +class BCLRI_W_ENC : MSA_BIT_W_FMT<0b011, 0b001001>; +class BCLRI_D_ENC : MSA_BIT_D_FMT<0b011, 0b001001>; + +class BINSL_B_ENC : MSA_3R_FMT<0b110, 0b00, 0b001101>; +class BINSL_H_ENC : MSA_3R_FMT<0b110, 0b01, 0b001101>; +class BINSL_W_ENC : MSA_3R_FMT<0b110, 0b10, 0b001101>; +class BINSL_D_ENC : MSA_3R_FMT<0b110, 0b11, 0b001101>; + +class BINSLI_B_ENC : MSA_BIT_B_FMT<0b110, 0b001001>; +class BINSLI_H_ENC : MSA_BIT_H_FMT<0b110, 0b001001>; +class BINSLI_W_ENC : MSA_BIT_W_FMT<0b110, 0b001001>; +class BINSLI_D_ENC : MSA_BIT_D_FMT<0b110, 0b001001>; + +class BINSR_B_ENC : MSA_3R_FMT<0b111, 0b00, 0b001101>; +class BINSR_H_ENC : MSA_3R_FMT<0b111, 0b01, 0b001101>; +class BINSR_W_ENC : MSA_3R_FMT<0b111, 0b10, 0b001101>; +class BINSR_D_ENC : MSA_3R_FMT<0b111, 0b11, 0b001101>; + +class BINSRI_B_ENC : MSA_BIT_B_FMT<0b111, 0b001001>; +class BINSRI_H_ENC : MSA_BIT_H_FMT<0b111, 0b001001>; +class BINSRI_W_ENC : MSA_BIT_W_FMT<0b111, 0b001001>; +class BINSRI_D_ENC : MSA_BIT_D_FMT<0b111, 0b001001>; + +class BMNZI_B_ENC : MSA_I8_FMT<0b00, 0b000001>; + +class BMZI_B_ENC : MSA_I8_FMT<0b01, 0b000001>; + +class BNEG_B_ENC : MSA_3R_FMT<0b101, 0b00, 0b001101>; +class BNEG_H_ENC : MSA_3R_FMT<0b101, 0b01, 0b001101>; +class BNEG_W_ENC : MSA_3R_FMT<0b101, 0b10, 0b001101>; +class BNEG_D_ENC : MSA_3R_FMT<0b101, 0b11, 0b001101>; + +class BNEGI_B_ENC : MSA_BIT_B_FMT<0b101, 0b001001>; +class BNEGI_H_ENC : MSA_BIT_H_FMT<0b101, 0b001001>; +class BNEGI_W_ENC : MSA_BIT_W_FMT<0b101, 0b001001>; +class BNEGI_D_ENC : MSA_BIT_D_FMT<0b101, 0b001001>; + +class BSELI_B_ENC : MSA_I8_FMT<0b10, 0b000001>; + +class BSET_B_ENC : MSA_3R_FMT<0b100, 0b00, 0b001101>; +class BSET_H_ENC : MSA_3R_FMT<0b100, 0b01, 0b001101>; +class BSET_W_ENC : MSA_3R_FMT<0b100, 0b10, 0b001101>; +class BSET_D_ENC : MSA_3R_FMT<0b100, 0b11, 0b001101>; + +class BSETI_B_ENC : MSA_BIT_B_FMT<0b100, 0b001001>; +class BSETI_H_ENC : MSA_BIT_H_FMT<0b100, 0b001001>; +class BSETI_W_ENC : MSA_BIT_W_FMT<0b100, 0b001001>; +class BSETI_D_ENC : MSA_BIT_D_FMT<0b100, 0b001001>; + +class CEQ_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b001111>; +class CEQ_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b001111>; +class CEQ_W_ENC : MSA_3R_FMT<0b000, 0b10, 0b001111>; +class CEQ_D_ENC : MSA_3R_FMT<0b000, 0b11, 0b001111>; + +class CEQI_B_ENC : MSA_I5_FMT<0b000, 0b00, 0b000111>; +class CEQI_H_ENC : MSA_I5_FMT<0b000, 0b01, 0b000111>; +class CEQI_W_ENC : MSA_I5_FMT<0b000, 0b10, 0b000111>; +class CEQI_D_ENC : MSA_I5_FMT<0b000, 0b11, 0b000111>; + +class CLE_S_B_ENC : MSA_3R_FMT<0b100, 0b00, 0b001111>; +class CLE_S_H_ENC : MSA_3R_FMT<0b100, 0b01, 0b001111>; +class CLE_S_W_ENC : MSA_3R_FMT<0b100, 0b10, 0b001111>; +class CLE_S_D_ENC : MSA_3R_FMT<0b100, 0b11, 0b001111>; + +class CLE_U_B_ENC : MSA_3R_FMT<0b101, 0b00, 0b001111>; +class CLE_U_H_ENC : MSA_3R_FMT<0b101, 0b01, 0b001111>; +class CLE_U_W_ENC : MSA_3R_FMT<0b101, 0b10, 0b001111>; +class CLE_U_D_ENC : MSA_3R_FMT<0b101, 0b11, 0b001111>; + +class CLEI_S_B_ENC : MSA_I5_FMT<0b100, 0b00, 0b000111>; +class CLEI_S_H_ENC : MSA_I5_FMT<0b100, 0b01, 0b000111>; +class CLEI_S_W_ENC : MSA_I5_FMT<0b100, 0b10, 0b000111>; +class CLEI_S_D_ENC : MSA_I5_FMT<0b100, 0b11, 0b000111>; + +class CLEI_U_B_ENC : MSA_I5_FMT<0b101, 0b00, 0b000111>; +class CLEI_U_H_ENC : MSA_I5_FMT<0b101, 0b01, 0b000111>; +class CLEI_U_W_ENC : MSA_I5_FMT<0b101, 0b10, 0b000111>; +class CLEI_U_D_ENC : MSA_I5_FMT<0b101, 0b11, 0b000111>; + +class CLT_S_B_ENC : MSA_3R_FMT<0b010, 0b00, 0b001111>; +class CLT_S_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b001111>; +class CLT_S_W_ENC : MSA_3R_FMT<0b010, 0b10, 0b001111>; +class CLT_S_D_ENC : MSA_3R_FMT<0b010, 0b11, 0b001111>; + +class CLT_U_B_ENC : MSA_3R_FMT<0b011, 0b00, 0b001111>; +class CLT_U_H_ENC : MSA_3R_FMT<0b011, 0b01, 0b001111>; +class CLT_U_W_ENC : MSA_3R_FMT<0b011, 0b10, 0b001111>; +class CLT_U_D_ENC : MSA_3R_FMT<0b011, 0b11, 0b001111>; + +class CLTI_S_B_ENC : MSA_I5_FMT<0b010, 0b00, 0b000111>; +class CLTI_S_H_ENC : MSA_I5_FMT<0b010, 0b01, 0b000111>; +class CLTI_S_W_ENC : MSA_I5_FMT<0b010, 0b10, 0b000111>; +class CLTI_S_D_ENC : MSA_I5_FMT<0b010, 0b11, 0b000111>; + +class CLTI_U_B_ENC : MSA_I5_FMT<0b011, 0b00, 0b000111>; +class CLTI_U_H_ENC : MSA_I5_FMT<0b011, 0b01, 0b000111>; +class CLTI_U_W_ENC : MSA_I5_FMT<0b011, 0b10, 0b000111>; +class CLTI_U_D_ENC : MSA_I5_FMT<0b011, 0b11, 0b000111>; + +class COPY_S_B_ENC : MSA_ELM_B_FMT<0b0010, 0b011001>; +class COPY_S_H_ENC : MSA_ELM_H_FMT<0b0010, 0b011001>; +class COPY_S_W_ENC : MSA_ELM_W_FMT<0b0010, 0b011001>; + +class COPY_U_B_ENC : MSA_ELM_B_FMT<0b0011, 0b011001>; +class COPY_U_H_ENC : MSA_ELM_H_FMT<0b0011, 0b011001>; +class COPY_U_W_ENC : MSA_ELM_W_FMT<0b0011, 0b011001>; + +class DIV_S_B_ENC : MSA_3R_FMT<0b100, 0b00, 0b010010>; +class DIV_S_H_ENC : MSA_3R_FMT<0b100, 0b01, 0b010010>; +class DIV_S_W_ENC : MSA_3R_FMT<0b100, 0b10, 0b010010>; +class DIV_S_D_ENC : MSA_3R_FMT<0b100, 0b11, 0b010010>; + +class DIV_U_B_ENC : MSA_3R_FMT<0b101, 0b00, 0b010010>; +class DIV_U_H_ENC : MSA_3R_FMT<0b101, 0b01, 0b010010>; +class DIV_U_W_ENC : MSA_3R_FMT<0b101, 0b10, 0b010010>; +class DIV_U_D_ENC : MSA_3R_FMT<0b101, 0b11, 0b010010>; + +class DOTP_S_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b010011>; +class DOTP_S_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b010011>; +class DOTP_S_W_ENC : MSA_3R_FMT<0b000, 0b10, 0b010011>; +class DOTP_S_D_ENC : MSA_3R_FMT<0b000, 0b11, 0b010011>; + +class DOTP_U_B_ENC : MSA_3R_FMT<0b001, 0b00, 0b010011>; +class DOTP_U_H_ENC : MSA_3R_FMT<0b001, 0b01, 0b010011>; +class DOTP_U_W_ENC : MSA_3R_FMT<0b001, 0b10, 0b010011>; +class DOTP_U_D_ENC : MSA_3R_FMT<0b001, 0b11, 0b010011>; + +class DPADD_S_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b010011>; +class DPADD_S_W_ENC : MSA_3R_FMT<0b010, 0b10, 0b010011>; +class DPADD_S_D_ENC : MSA_3R_FMT<0b010, 0b11, 0b010011>; + +class DPADD_U_H_ENC : MSA_3R_FMT<0b011, 0b01, 0b010011>; +class DPADD_U_W_ENC : MSA_3R_FMT<0b011, 0b10, 0b010011>; +class DPADD_U_D_ENC : MSA_3R_FMT<0b011, 0b11, 0b010011>; + +class DPSUB_S_H_ENC : MSA_3R_FMT<0b100, 0b01, 0b010011>; +class DPSUB_S_W_ENC : MSA_3R_FMT<0b100, 0b10, 0b010011>; +class DPSUB_S_D_ENC : MSA_3R_FMT<0b100, 0b11, 0b010011>; + +class DPSUB_U_H_ENC : MSA_3R_FMT<0b101, 0b01, 0b010011>; +class DPSUB_U_W_ENC : MSA_3R_FMT<0b101, 0b10, 0b010011>; +class DPSUB_U_D_ENC : MSA_3R_FMT<0b101, 0b11, 0b010011>; + +class ILVEV_B_ENC : MSA_3R_FMT<0b110, 0b00, 0b010100>; +class ILVEV_H_ENC : MSA_3R_FMT<0b110, 0b01, 0b010100>; +class ILVEV_W_ENC : MSA_3R_FMT<0b110, 0b10, 0b010100>; +class ILVEV_D_ENC : MSA_3R_FMT<0b110, 0b11, 0b010100>; + +class ILVL_B_ENC : MSA_3R_FMT<0b100, 0b00, 0b010100>; +class ILVL_H_ENC : MSA_3R_FMT<0b100, 0b01, 0b010100>; +class ILVL_W_ENC : MSA_3R_FMT<0b100, 0b10, 0b010100>; +class ILVL_D_ENC : MSA_3R_FMT<0b100, 0b11, 0b010100>; + +class ILVOD_B_ENC : MSA_3R_FMT<0b111, 0b00, 0b010100>; +class ILVOD_H_ENC : MSA_3R_FMT<0b111, 0b01, 0b010100>; +class ILVOD_W_ENC : MSA_3R_FMT<0b111, 0b10, 0b010100>; +class ILVOD_D_ENC : MSA_3R_FMT<0b111, 0b11, 0b010100>; + +class ILVR_B_ENC : MSA_3R_FMT<0b101, 0b00, 0b010100>; +class ILVR_H_ENC : MSA_3R_FMT<0b101, 0b01, 0b010100>; +class ILVR_W_ENC : MSA_3R_FMT<0b101, 0b10, 0b010100>; +class ILVR_D_ENC : MSA_3R_FMT<0b101, 0b11, 0b010100>; + +class INSERT_B_ENC : MSA_ELM_B_FMT<0b0100, 0b011001>; +class INSERT_H_ENC : MSA_ELM_H_FMT<0b0100, 0b011001>; +class INSERT_W_ENC : MSA_ELM_W_FMT<0b0100, 0b011001>; + class LD_B_ENC : MSA_I5_FMT<0b110, 0b00, 0b000111>; class LD_H_ENC : MSA_I5_FMT<0b110, 0b01, 0b000111>; class LD_W_ENC : MSA_I5_FMT<0b110, 0b10, 0b000111>; class LD_D_ENC : MSA_I5_FMT<0b110, 0b11, 0b000111>; + +class LDI_B_ENC : MSA_I10_FMT<0b010, 0b00, 0b001100>; +class LDI_H_ENC : MSA_I10_FMT<0b010, 0b01, 0b001100>; +class LDI_W_ENC : MSA_I10_FMT<0b010, 0b10, 0b001100>; +class LDI_D_ENC : MSA_I10_FMT<0b010, 0b11, 0b001100>; + class ST_B_ENC : MSA_I5_FMT<0b111, 0b00, 0b000111>; class ST_H_ENC : MSA_I5_FMT<0b111, 0b01, 0b000111>; class ST_W_ENC : MSA_I5_FMT<0b111, 0b10, 0b000111>; class ST_D_ENC : MSA_I5_FMT<0b111, 0b11, 0b000111>; // Instruction desc. +class MSA_BIT_D_DESC_BASE { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins RCWS:$ws, uimm6:$u6); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u6"); + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, immZExt6:$u6))]; + InstrItinClass Itinerary = itin; +} + +class MSA_BIT_W_DESC_BASE { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins RCWS:$ws, uimm5:$u5); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u5"); + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, immZExt5:$u5))]; + InstrItinClass Itinerary = itin; +} + +class MSA_BIT_H_DESC_BASE { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins RCWS:$ws, uimm4:$u4); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u4"); + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, immZExt4:$u4))]; + InstrItinClass Itinerary = itin; +} + +class MSA_BIT_B_DESC_BASE { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins RCWS:$ws, uimm3:$u3); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u3"); + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, immZExt3:$u3))]; + InstrItinClass Itinerary = itin; +} + +class MSA_COPY_DESC_BASE { + dag OutOperandList = (outs RCD:$rd); + dag InOperandList = (ins RCWS:$ws, uimm6:$n); + string AsmString = !strconcat(instr_asm, "\t$rd, $ws[$n]"); + list Pattern = [(set RCD:$rd, (OpNode RCWS:$ws, immZExt6:$n))]; + InstrItinClass Itinerary = itin; +} + +class MSA_I5_DESC_BASE { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins RCWS:$ws, uimm5:$u5); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u5"); + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, immZExt5:$u5))]; + InstrItinClass Itinerary = itin; +} + +class MSA_SI5_DESC_BASE { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins RCWS:$ws, simm5:$s5); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $s5"); + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, immSExt5:$s5))]; + InstrItinClass Itinerary = itin; +} + +class MSA_I8_DESC_BASE { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins RCWS:$ws, uimm8:$u8); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u8"); + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, immZExt8:$u8))]; + InstrItinClass Itinerary = itin; +} + +class MSA_I10_DESC_BASE { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins simm10:$i10); + string AsmString = !strconcat(instr_asm, "\t$wd, $i10"); + list Pattern = [(set RCWD:$wd, (OpNode immSExt10:$i10))]; + InstrItinClass Itinerary = itin; +} + +class MSA_2R_DESC_BASE { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins RCWS:$ws); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws"); + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws))]; + InstrItinClass Itinerary = itin; +} + +class MSA_2RF_DESC_BASE : + MSA_2R_DESC_BASE; + + +class MSA_3R_DESC_BASE { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins RCWS:$ws, RCWT:$wt); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $wt"); + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, RCWT:$wt))]; + InstrItinClass Itinerary = itin; +} + +class MSA_3R_4R_DESC_BASE { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins RCWD:$wd_in, RCWS:$ws, RCWT:$wt); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $wt"); + list Pattern = [(set RCWD:$wd, + (OpNode RCWD:$wd_in, RCWS:$ws, RCWT:$wt))]; + InstrItinClass Itinerary = itin; + string Constraints = "$wd = $wd_in"; +} + +class MSA_3RF_DESC_BASE : + MSA_3R_DESC_BASE; + +class MSA_3RF_4RF_DESC_BASE : + MSA_3R_4R_DESC_BASE; + +class MSA_INSERT_DESC_BASE { + dag OutOperandList = (outs RCD:$wd); + dag InOperandList = (ins RCD:$wd_in, uimm6:$n, RCWS:$rs); + string AsmString = !strconcat(instr_asm, "\t$wd[$n], $rs"); + list Pattern = [(set RCD:$wd, (OpNode RCD:$wd_in, + immZExt6:$n, + RCWS:$rs))]; + InstrItinClass Itinerary = itin; + string Constraints = "$wd = $wd_in"; +} + +class ADD_A_B_DESC : MSA_3R_DESC_BASE<"add_a.b", int_mips_add_a_b, NoItinerary, + MSA128, MSA128>, IsCommutable; +class ADD_A_H_DESC : MSA_3R_DESC_BASE<"add_a.h", int_mips_add_a_h, NoItinerary, + MSA128, MSA128>, IsCommutable; +class ADD_A_W_DESC : MSA_3R_DESC_BASE<"add_a.w", int_mips_add_a_w, NoItinerary, + MSA128, MSA128>, IsCommutable; +class ADD_A_D_DESC : MSA_3R_DESC_BASE<"add_a.d", int_mips_add_a_d, NoItinerary, + MSA128, MSA128>, IsCommutable; + +class ADDS_A_B_DESC : MSA_3R_DESC_BASE<"adds_a.b", int_mips_adds_a_b, + NoItinerary, MSA128, MSA128>, + IsCommutable; +class ADDS_A_H_DESC : MSA_3R_DESC_BASE<"adds_a.h", int_mips_adds_a_h, + NoItinerary, MSA128, MSA128>, + IsCommutable; +class ADDS_A_W_DESC : MSA_3R_DESC_BASE<"adds_a.w", int_mips_adds_a_w, + NoItinerary, MSA128, MSA128>, + IsCommutable; +class ADDS_A_D_DESC : MSA_3R_DESC_BASE<"adds_a.d", int_mips_adds_a_d, + NoItinerary, MSA128, MSA128>, + IsCommutable; + +class ADDS_S_B_DESC : MSA_3R_DESC_BASE<"adds_s.b", int_mips_adds_s_b, + NoItinerary, MSA128, MSA128>, + IsCommutable; +class ADDS_S_H_DESC : MSA_3R_DESC_BASE<"adds_s.h", int_mips_adds_s_h, + NoItinerary, MSA128, MSA128>, + IsCommutable; +class ADDS_S_W_DESC : MSA_3R_DESC_BASE<"adds_s.w", int_mips_adds_s_w, + NoItinerary, MSA128, MSA128>, + IsCommutable; +class ADDS_S_D_DESC : MSA_3R_DESC_BASE<"adds_s.d", int_mips_adds_s_d, + NoItinerary, MSA128, MSA128>, + IsCommutable; + +class ADDS_U_B_DESC : MSA_3R_DESC_BASE<"adds_u.b", int_mips_adds_u_b, + NoItinerary, MSA128, MSA128>, + IsCommutable; +class ADDS_U_H_DESC : MSA_3R_DESC_BASE<"adds_u.h", int_mips_adds_u_h, + NoItinerary, MSA128, MSA128>, + IsCommutable; +class ADDS_U_W_DESC : MSA_3R_DESC_BASE<"adds_u.w", int_mips_adds_u_w, + NoItinerary, MSA128, MSA128>, + IsCommutable; +class ADDS_U_D_DESC : MSA_3R_DESC_BASE<"adds_u.d", int_mips_adds_u_d, + NoItinerary, MSA128, MSA128>, + IsCommutable; + +class ADDV_B_DESC : MSA_3R_DESC_BASE<"addv.b", int_mips_addv_b, NoItinerary, + MSA128, MSA128>, IsCommutable; +class ADDV_H_DESC : MSA_3R_DESC_BASE<"addv.h", int_mips_addv_h, NoItinerary, + MSA128, MSA128>, IsCommutable; +class ADDV_W_DESC : MSA_3R_DESC_BASE<"addv.w", int_mips_addv_w, NoItinerary, + MSA128, MSA128>, IsCommutable; +class ADDV_D_DESC : MSA_3R_DESC_BASE<"addv.d", int_mips_addv_d, NoItinerary, + MSA128, MSA128>, IsCommutable; + +class ADDVI_B_DESC : MSA_I5_DESC_BASE<"addvi.b", int_mips_addvi_b, NoItinerary, + MSA128, MSA128>; +class ADDVI_H_DESC : MSA_I5_DESC_BASE<"addvi.h", int_mips_addvi_h, NoItinerary, + MSA128, MSA128>; +class ADDVI_W_DESC : MSA_I5_DESC_BASE<"addvi.w", int_mips_addvi_w, NoItinerary, + MSA128, MSA128>; +class ADDVI_D_DESC : MSA_I5_DESC_BASE<"addvi.d", int_mips_addvi_d, NoItinerary, + MSA128, MSA128>; + +class ANDI_B_DESC : MSA_I8_DESC_BASE<"andi.b", int_mips_andi_b, NoItinerary, + MSA128, MSA128>; + +class ASUB_S_B_DESC : MSA_3R_DESC_BASE<"asub_s.b", int_mips_asub_s_b, + NoItinerary, MSA128, MSA128>; +class ASUB_S_H_DESC : MSA_3R_DESC_BASE<"asub_s.h", int_mips_asub_s_h, + NoItinerary, MSA128, MSA128>; +class ASUB_S_W_DESC : MSA_3R_DESC_BASE<"asub_s.w", int_mips_asub_s_w, + NoItinerary, MSA128, MSA128>; +class ASUB_S_D_DESC : MSA_3R_DESC_BASE<"asub_s.d", int_mips_asub_s_d, + NoItinerary, MSA128, MSA128>; + +class ASUB_U_B_DESC : MSA_3R_DESC_BASE<"asub_u.b", int_mips_asub_u_b, + NoItinerary, MSA128, MSA128>; +class ASUB_U_H_DESC : MSA_3R_DESC_BASE<"asub_u.h", int_mips_asub_u_h, + NoItinerary, MSA128, MSA128>; +class ASUB_U_W_DESC : MSA_3R_DESC_BASE<"asub_u.w", int_mips_asub_u_w, + NoItinerary, MSA128, MSA128>; +class ASUB_U_D_DESC : MSA_3R_DESC_BASE<"asub_u.d", int_mips_asub_u_d, + NoItinerary, MSA128, MSA128>; + +class AVE_S_B_DESC : MSA_3R_DESC_BASE<"ave_s.b", int_mips_ave_s_b, NoItinerary, + MSA128, MSA128>, IsCommutable; +class AVE_S_H_DESC : MSA_3R_DESC_BASE<"ave_s.h", int_mips_ave_s_h, NoItinerary, + MSA128, MSA128>, IsCommutable; +class AVE_S_W_DESC : MSA_3R_DESC_BASE<"ave_s.w", int_mips_ave_s_w, NoItinerary, + MSA128, MSA128>, IsCommutable; +class AVE_S_D_DESC : MSA_3R_DESC_BASE<"ave_s.d", int_mips_ave_s_d, NoItinerary, + MSA128, MSA128>, IsCommutable; + +class AVE_U_B_DESC : MSA_3R_DESC_BASE<"ave_u.b", int_mips_ave_u_b, NoItinerary, + MSA128, MSA128>, IsCommutable; +class AVE_U_H_DESC : MSA_3R_DESC_BASE<"ave_u.h", int_mips_ave_u_h, NoItinerary, + MSA128, MSA128>, IsCommutable; +class AVE_U_W_DESC : MSA_3R_DESC_BASE<"ave_u.w", int_mips_ave_u_w, NoItinerary, + MSA128, MSA128>, IsCommutable; +class AVE_U_D_DESC : MSA_3R_DESC_BASE<"ave_u.d", int_mips_ave_u_d, NoItinerary, + MSA128, MSA128>, IsCommutable; + +class AVER_S_B_DESC : MSA_3R_DESC_BASE<"aver_s.b", int_mips_aver_s_b, + NoItinerary, MSA128, MSA128>, + IsCommutable; +class AVER_S_H_DESC : MSA_3R_DESC_BASE<"aver_s.h", int_mips_aver_s_h, + NoItinerary, MSA128, MSA128>, + IsCommutable; +class AVER_S_W_DESC : MSA_3R_DESC_BASE<"aver_s.w", int_mips_aver_s_w, + NoItinerary, MSA128, MSA128>, + IsCommutable; +class AVER_S_D_DESC : MSA_3R_DESC_BASE<"aver_s.d", int_mips_aver_s_d, + NoItinerary, MSA128, MSA128>, + IsCommutable; + +class AVER_U_B_DESC : MSA_3R_DESC_BASE<"aver_u.b", int_mips_aver_u_b, + NoItinerary, MSA128, MSA128>, + IsCommutable; +class AVER_U_H_DESC : MSA_3R_DESC_BASE<"aver_u.h", int_mips_aver_u_h, + NoItinerary, MSA128, MSA128>, + IsCommutable; +class AVER_U_W_DESC : MSA_3R_DESC_BASE<"aver_u.w", int_mips_aver_u_w, + NoItinerary, MSA128, MSA128>, + IsCommutable; +class AVER_U_D_DESC : MSA_3R_DESC_BASE<"aver_u.d", int_mips_aver_u_d, + NoItinerary, MSA128, MSA128>, + IsCommutable; + +class BCLR_B_DESC : MSA_3R_DESC_BASE<"bclr.b", int_mips_bclr_b, NoItinerary, + MSA128, MSA128>; +class BCLR_H_DESC : MSA_3R_DESC_BASE<"bclr.h", int_mips_bclr_h, NoItinerary, + MSA128, MSA128>; +class BCLR_W_DESC : MSA_3R_DESC_BASE<"bclr.w", int_mips_bclr_w, NoItinerary, + MSA128, MSA128>; +class BCLR_D_DESC : MSA_3R_DESC_BASE<"bclr.d", int_mips_bclr_d, NoItinerary, + MSA128, MSA128>; + +class BCLRI_B_DESC : MSA_BIT_B_DESC_BASE<"bclri.b", int_mips_bclri_b, + NoItinerary, MSA128, MSA128>; +class BCLRI_H_DESC : MSA_BIT_H_DESC_BASE<"bclri.h", int_mips_bclri_h, + NoItinerary, MSA128, MSA128>; +class BCLRI_W_DESC : MSA_BIT_W_DESC_BASE<"bclri.w", int_mips_bclri_w, + NoItinerary, MSA128, MSA128>; +class BCLRI_D_DESC : MSA_BIT_D_DESC_BASE<"bclri.d", int_mips_bclri_d, + NoItinerary, MSA128, MSA128>; + +class BINSL_B_DESC : MSA_3R_DESC_BASE<"binsl.b", int_mips_binsl_b, NoItinerary, + MSA128, MSA128>; +class BINSL_H_DESC : MSA_3R_DESC_BASE<"binsl.h", int_mips_binsl_h, NoItinerary, + MSA128, MSA128>; +class BINSL_W_DESC : MSA_3R_DESC_BASE<"binsl.w", int_mips_binsl_w, NoItinerary, + MSA128, MSA128>; +class BINSL_D_DESC : MSA_3R_DESC_BASE<"binsl.d", int_mips_binsl_d, NoItinerary, + MSA128, MSA128>; + +class BINSLI_B_DESC : MSA_BIT_B_DESC_BASE<"binsli.b", int_mips_binsli_b, + NoItinerary, MSA128, MSA128>; +class BINSLI_H_DESC : MSA_BIT_H_DESC_BASE<"binsli.h", int_mips_binsli_h, + NoItinerary, MSA128, MSA128>; +class BINSLI_W_DESC : MSA_BIT_W_DESC_BASE<"binsli.w", int_mips_binsli_w, + NoItinerary, MSA128, MSA128>; +class BINSLI_D_DESC : MSA_BIT_D_DESC_BASE<"binsli.d", int_mips_binsli_d, + NoItinerary, MSA128, MSA128>; + +class BINSR_B_DESC : MSA_3R_DESC_BASE<"binsr.b", int_mips_binsr_b, NoItinerary, + MSA128, MSA128>; +class BINSR_H_DESC : MSA_3R_DESC_BASE<"binsr.h", int_mips_binsr_h, NoItinerary, + MSA128, MSA128>; +class BINSR_W_DESC : MSA_3R_DESC_BASE<"binsr.w", int_mips_binsr_w, NoItinerary, + MSA128, MSA128>; +class BINSR_D_DESC : MSA_3R_DESC_BASE<"binsr.d", int_mips_binsr_d, NoItinerary, + MSA128, MSA128>; + +class BINSRI_B_DESC : MSA_BIT_B_DESC_BASE<"binsri.b", int_mips_binsri_b, + NoItinerary, MSA128, MSA128>; +class BINSRI_H_DESC : MSA_BIT_H_DESC_BASE<"binsri.h", int_mips_binsri_h, + NoItinerary, MSA128, MSA128>; +class BINSRI_W_DESC : MSA_BIT_W_DESC_BASE<"binsri.w", int_mips_binsri_w, + NoItinerary, MSA128, MSA128>; +class BINSRI_D_DESC : MSA_BIT_D_DESC_BASE<"binsri.d", int_mips_binsri_d, + NoItinerary, MSA128, MSA128>; + +class BMNZI_B_DESC : MSA_I8_DESC_BASE<"bmnzi.b", int_mips_bmnzi_b, NoItinerary, + MSA128, MSA128>; + +class BMZI_B_DESC : MSA_I8_DESC_BASE<"bmzi.b", int_mips_bmzi_b, NoItinerary, + MSA128, MSA128>; + +class BNEG_B_DESC : MSA_3R_DESC_BASE<"bneg.b", int_mips_bneg_b, NoItinerary, + MSA128, MSA128>; +class BNEG_H_DESC : MSA_3R_DESC_BASE<"bneg.h", int_mips_bneg_h, NoItinerary, + MSA128, MSA128>; +class BNEG_W_DESC : MSA_3R_DESC_BASE<"bneg.w", int_mips_bneg_w, NoItinerary, + MSA128, MSA128>; +class BNEG_D_DESC : MSA_3R_DESC_BASE<"bneg.d", int_mips_bneg_d, NoItinerary, + MSA128, MSA128>; + +class BNEGI_B_DESC : MSA_BIT_B_DESC_BASE<"bnegi.b", int_mips_bnegi_b, + NoItinerary, MSA128, MSA128>; +class BNEGI_H_DESC : MSA_BIT_H_DESC_BASE<"bnegi.h", int_mips_bnegi_h, + NoItinerary, MSA128, MSA128>; +class BNEGI_W_DESC : MSA_BIT_W_DESC_BASE<"bnegi.w", int_mips_bnegi_w, + NoItinerary, MSA128, MSA128>; +class BNEGI_D_DESC : MSA_BIT_D_DESC_BASE<"bnegi.d", int_mips_bnegi_d, + NoItinerary, MSA128, MSA128>; + +class BSELI_B_DESC : MSA_I8_DESC_BASE<"bseli.b", int_mips_bseli_b, NoItinerary, + MSA128, MSA128>; + +class BSET_B_DESC : MSA_3R_DESC_BASE<"bset.b", int_mips_bset_b, NoItinerary, + MSA128, MSA128>; +class BSET_H_DESC : MSA_3R_DESC_BASE<"bset.h", int_mips_bset_h, NoItinerary, + MSA128, MSA128>; +class BSET_W_DESC : MSA_3R_DESC_BASE<"bset.w", int_mips_bset_w, NoItinerary, + MSA128, MSA128>; +class BSET_D_DESC : MSA_3R_DESC_BASE<"bset.d", int_mips_bset_d, NoItinerary, + MSA128, MSA128>; + +class BSETI_B_DESC : MSA_BIT_B_DESC_BASE<"bseti.b", int_mips_bseti_b, + NoItinerary, MSA128, MSA128>; +class BSETI_H_DESC : MSA_BIT_H_DESC_BASE<"bseti.h", int_mips_bseti_h, + NoItinerary, MSA128, MSA128>; +class BSETI_W_DESC : MSA_BIT_W_DESC_BASE<"bseti.w", int_mips_bseti_w, + NoItinerary, MSA128, MSA128>; +class BSETI_D_DESC : MSA_BIT_D_DESC_BASE<"bseti.d", int_mips_bseti_d, + NoItinerary, MSA128, MSA128>; + +class CEQ_B_DESC : MSA_3R_DESC_BASE<"ceq.b", int_mips_ceq_b, NoItinerary, + MSA128, MSA128>, IsCommutable; +class CEQ_H_DESC : MSA_3R_DESC_BASE<"ceq.h", int_mips_ceq_h, NoItinerary, + MSA128, MSA128>, IsCommutable; +class CEQ_W_DESC : MSA_3R_DESC_BASE<"ceq.w", int_mips_ceq_w, NoItinerary, + MSA128, MSA128>, IsCommutable; +class CEQ_D_DESC : MSA_3R_DESC_BASE<"ceq.d", int_mips_ceq_d, NoItinerary, + MSA128, MSA128>, IsCommutable; + +class CEQI_B_DESC : MSA_SI5_DESC_BASE<"ceqi.b", int_mips_ceqi_b, NoItinerary, + MSA128, MSA128>; +class CEQI_H_DESC : MSA_SI5_DESC_BASE<"ceqi.h", int_mips_ceqi_h, NoItinerary, + MSA128, MSA128>; +class CEQI_W_DESC : MSA_SI5_DESC_BASE<"ceqi.w", int_mips_ceqi_w, NoItinerary, + MSA128, MSA128>; +class CEQI_D_DESC : MSA_SI5_DESC_BASE<"ceqi.d", int_mips_ceqi_d, NoItinerary, + MSA128, MSA128>; + +class CLE_S_B_DESC : MSA_3R_DESC_BASE<"cle_s.b", int_mips_cle_s_b, NoItinerary, + MSA128, MSA128>; +class CLE_S_H_DESC : MSA_3R_DESC_BASE<"cle_s.h", int_mips_cle_s_h, NoItinerary, + MSA128, MSA128>; +class CLE_S_W_DESC : MSA_3R_DESC_BASE<"cle_s.w", int_mips_cle_s_w, NoItinerary, + MSA128, MSA128>; +class CLE_S_D_DESC : MSA_3R_DESC_BASE<"cle_s.d", int_mips_cle_s_d, NoItinerary, + MSA128, MSA128>; + +class CLE_U_B_DESC : MSA_3R_DESC_BASE<"cle_u.b", int_mips_cle_u_b, NoItinerary, + MSA128, MSA128>; +class CLE_U_H_DESC : MSA_3R_DESC_BASE<"cle_u.h", int_mips_cle_u_h, NoItinerary, + MSA128, MSA128>; +class CLE_U_W_DESC : MSA_3R_DESC_BASE<"cle_u.w", int_mips_cle_u_w, NoItinerary, + MSA128, MSA128>; +class CLE_U_D_DESC : MSA_3R_DESC_BASE<"cle_u.d", int_mips_cle_u_d, NoItinerary, + MSA128, MSA128>; + +class CLEI_S_B_DESC : MSA_SI5_DESC_BASE<"clei_s.b", int_mips_clei_s_b, + NoItinerary, MSA128, MSA128>; +class CLEI_S_H_DESC : MSA_SI5_DESC_BASE<"clei_s.h", int_mips_clei_s_h, + NoItinerary, MSA128, MSA128>; +class CLEI_S_W_DESC : MSA_SI5_DESC_BASE<"clei_s.w", int_mips_clei_s_w, + NoItinerary, MSA128, MSA128>; +class CLEI_S_D_DESC : MSA_SI5_DESC_BASE<"clei_s.d", int_mips_clei_s_d, + NoItinerary, MSA128, MSA128>; + +class CLEI_U_B_DESC : MSA_SI5_DESC_BASE<"clei_u.b", int_mips_clei_u_b, + NoItinerary, MSA128, MSA128>; +class CLEI_U_H_DESC : MSA_SI5_DESC_BASE<"clei_u.h", int_mips_clei_u_h, + NoItinerary, MSA128, MSA128>; +class CLEI_U_W_DESC : MSA_SI5_DESC_BASE<"clei_u.w", int_mips_clei_u_w, + NoItinerary, MSA128, MSA128>; +class CLEI_U_D_DESC : MSA_SI5_DESC_BASE<"clei_u.d", int_mips_clei_u_d, + NoItinerary, MSA128, MSA128>; + +class CLT_S_B_DESC : MSA_3R_DESC_BASE<"clt_s.b", int_mips_clt_s_b, NoItinerary, + MSA128, MSA128>; +class CLT_S_H_DESC : MSA_3R_DESC_BASE<"clt_s.h", int_mips_clt_s_h, NoItinerary, + MSA128, MSA128>; +class CLT_S_W_DESC : MSA_3R_DESC_BASE<"clt_s.w", int_mips_clt_s_w, NoItinerary, + MSA128, MSA128>; +class CLT_S_D_DESC : MSA_3R_DESC_BASE<"clt_s.d", int_mips_clt_s_d, NoItinerary, + MSA128, MSA128>; + +class CLT_U_B_DESC : MSA_3R_DESC_BASE<"clt_u.b", int_mips_clt_u_b, NoItinerary, + MSA128, MSA128>; +class CLT_U_H_DESC : MSA_3R_DESC_BASE<"clt_u.h", int_mips_clt_u_h, NoItinerary, + MSA128, MSA128>; +class CLT_U_W_DESC : MSA_3R_DESC_BASE<"clt_u.w", int_mips_clt_u_w, NoItinerary, + MSA128, MSA128>; +class CLT_U_D_DESC : MSA_3R_DESC_BASE<"clt_u.d", int_mips_clt_u_d, NoItinerary, + MSA128, MSA128>; + +class CLTI_S_B_DESC : MSA_SI5_DESC_BASE<"clti_s.b", int_mips_clti_s_b, + NoItinerary, MSA128, MSA128>; +class CLTI_S_H_DESC : MSA_SI5_DESC_BASE<"clti_s.h", int_mips_clti_s_h, + NoItinerary, MSA128, MSA128>; +class CLTI_S_W_DESC : MSA_SI5_DESC_BASE<"clti_s.w", int_mips_clti_s_w, + NoItinerary, MSA128, MSA128>; +class CLTI_S_D_DESC : MSA_SI5_DESC_BASE<"clti_s.d", int_mips_clti_s_d, + NoItinerary, MSA128, MSA128>; + +class CLTI_U_B_DESC : MSA_SI5_DESC_BASE<"clti_u.b", int_mips_clti_u_b, + NoItinerary, MSA128, MSA128>; +class CLTI_U_H_DESC : MSA_SI5_DESC_BASE<"clti_u.h", int_mips_clti_u_h, + NoItinerary, MSA128, MSA128>; +class CLTI_U_W_DESC : MSA_SI5_DESC_BASE<"clti_u.w", int_mips_clti_u_w, + NoItinerary, MSA128, MSA128>; +class CLTI_U_D_DESC : MSA_SI5_DESC_BASE<"clti_u.d", int_mips_clti_u_d, + NoItinerary, MSA128, MSA128>; + +class COPY_S_B_DESC : MSA_COPY_DESC_BASE<"copy_s.b", int_mips_copy_s_b, + NoItinerary, GPR32, MSA128>; +class COPY_S_H_DESC : MSA_COPY_DESC_BASE<"copy_s.h", int_mips_copy_s_h, + NoItinerary, GPR32, MSA128>; +class COPY_S_W_DESC : MSA_COPY_DESC_BASE<"copy_s.w", int_mips_copy_s_w, + NoItinerary, GPR32, MSA128>; + +class COPY_U_B_DESC : MSA_COPY_DESC_BASE<"copy_u.b", int_mips_copy_u_b, + NoItinerary, GPR32, MSA128>; +class COPY_U_H_DESC : MSA_COPY_DESC_BASE<"copy_u.h", int_mips_copy_u_h, + NoItinerary, GPR32, MSA128>; +class COPY_U_W_DESC : MSA_COPY_DESC_BASE<"copy_u.w", int_mips_copy_u_w, + NoItinerary, GPR32, MSA128>; + +class DIV_S_B_DESC : MSA_3R_DESC_BASE<"div_s.b", int_mips_div_s_b, NoItinerary, + MSA128, MSA128>; +class DIV_S_H_DESC : MSA_3R_DESC_BASE<"div_s.h", int_mips_div_s_h, NoItinerary, + MSA128, MSA128>; +class DIV_S_W_DESC : MSA_3R_DESC_BASE<"div_s.w", int_mips_div_s_w, NoItinerary, + MSA128, MSA128>; +class DIV_S_D_DESC : MSA_3R_DESC_BASE<"div_s.d", int_mips_div_s_d, NoItinerary, + MSA128, MSA128>; + +class DIV_U_B_DESC : MSA_3R_DESC_BASE<"div_u.b", int_mips_div_u_b, NoItinerary, + MSA128, MSA128>; +class DIV_U_H_DESC : MSA_3R_DESC_BASE<"div_u.h", int_mips_div_u_h, NoItinerary, + MSA128, MSA128>; +class DIV_U_W_DESC : MSA_3R_DESC_BASE<"div_u.w", int_mips_div_u_w, NoItinerary, + MSA128, MSA128>; +class DIV_U_D_DESC : MSA_3R_DESC_BASE<"div_u.d", int_mips_div_u_d, NoItinerary, + MSA128, MSA128>; + +class DOTP_S_B_DESC : MSA_3R_DESC_BASE<"dotp_s.b", int_mips_dotp_s_b, + NoItinerary, MSA128, MSA128>, + IsCommutable; +class DOTP_S_H_DESC : MSA_3R_DESC_BASE<"dotp_s.h", int_mips_dotp_s_h, + NoItinerary, MSA128, MSA128>, + IsCommutable; +class DOTP_S_W_DESC : MSA_3R_DESC_BASE<"dotp_s.w", int_mips_dotp_s_w, + NoItinerary, MSA128, MSA128>, + IsCommutable; +class DOTP_S_D_DESC : MSA_3R_DESC_BASE<"dotp_s.d", int_mips_dotp_s_d, + NoItinerary, MSA128, MSA128>, + IsCommutable; + +class DOTP_U_B_DESC : MSA_3R_DESC_BASE<"dotp_u.b", int_mips_dotp_u_b, + NoItinerary, MSA128, MSA128>, + IsCommutable; +class DOTP_U_H_DESC : MSA_3R_DESC_BASE<"dotp_u.h", int_mips_dotp_u_h, + NoItinerary, MSA128, MSA128>, + IsCommutable; +class DOTP_U_W_DESC : MSA_3R_DESC_BASE<"dotp_u.w", int_mips_dotp_u_w, + NoItinerary, MSA128, MSA128>, + IsCommutable; +class DOTP_U_D_DESC : MSA_3R_DESC_BASE<"dotp_u.d", int_mips_dotp_u_d, + NoItinerary, MSA128, MSA128>, + IsCommutable; + +class DPADD_S_H_DESC : MSA_3R_4R_DESC_BASE<"dpadd_s.h", int_mips_dpadd_s_h, + NoItinerary, MSA128, MSA128>, + IsCommutable; +class DPADD_S_W_DESC : MSA_3R_4R_DESC_BASE<"dpadd_s.w", int_mips_dpadd_s_w, + NoItinerary, MSA128, MSA128>, + IsCommutable; +class DPADD_S_D_DESC : MSA_3R_4R_DESC_BASE<"dpadd_s.d", int_mips_dpadd_s_d, + NoItinerary, MSA128, MSA128>, + IsCommutable; + +class DPADD_U_H_DESC : MSA_3R_4R_DESC_BASE<"dpadd_u.h", int_mips_dpadd_u_h, + NoItinerary, MSA128, MSA128>, + IsCommutable; +class DPADD_U_W_DESC : MSA_3R_4R_DESC_BASE<"dpadd_u.w", int_mips_dpadd_u_w, + NoItinerary, MSA128, MSA128>, + IsCommutable; +class DPADD_U_D_DESC : MSA_3R_4R_DESC_BASE<"dpadd_u.d", int_mips_dpadd_u_d, + NoItinerary, MSA128, MSA128>, + IsCommutable; + +class DPSUB_S_H_DESC : MSA_3R_4R_DESC_BASE<"dpsub_s.h", int_mips_dpsub_s_h, + NoItinerary, MSA128, MSA128>; +class DPSUB_S_W_DESC : MSA_3R_4R_DESC_BASE<"dpsub_s.w", int_mips_dpsub_s_w, + NoItinerary, MSA128, MSA128>; +class DPSUB_S_D_DESC : MSA_3R_4R_DESC_BASE<"dpsub_s.d", int_mips_dpsub_s_d, + NoItinerary, MSA128, MSA128>; + +class DPSUB_U_H_DESC : MSA_3R_4R_DESC_BASE<"dpsub_u.h", int_mips_dpsub_u_h, + NoItinerary, MSA128, MSA128>; +class DPSUB_U_W_DESC : MSA_3R_4R_DESC_BASE<"dpsub_u.w", int_mips_dpsub_u_w, + NoItinerary, MSA128, MSA128>; +class DPSUB_U_D_DESC : MSA_3R_4R_DESC_BASE<"dpsub_u.d", int_mips_dpsub_u_d, + NoItinerary, MSA128, MSA128>; + +class ILVEV_B_DESC : MSA_3R_DESC_BASE<"ilvev.b", int_mips_ilvev_b, NoItinerary, + MSA128, MSA128>; +class ILVEV_H_DESC : MSA_3R_DESC_BASE<"ilvev.h", int_mips_ilvev_h, NoItinerary, + MSA128, MSA128>; +class ILVEV_W_DESC : MSA_3R_DESC_BASE<"ilvev.w", int_mips_ilvev_w, NoItinerary, + MSA128, MSA128>; +class ILVEV_D_DESC : MSA_3R_DESC_BASE<"ilvev.d", int_mips_ilvev_d, NoItinerary, + MSA128, MSA128>; + +class ILVL_B_DESC : MSA_3R_DESC_BASE<"ilvl.b", int_mips_ilvl_b, NoItinerary, + MSA128, MSA128>; +class ILVL_H_DESC : MSA_3R_DESC_BASE<"ilvl.h", int_mips_ilvl_h, NoItinerary, + MSA128, MSA128>; +class ILVL_W_DESC : MSA_3R_DESC_BASE<"ilvl.w", int_mips_ilvl_w, NoItinerary, + MSA128, MSA128>; +class ILVL_D_DESC : MSA_3R_DESC_BASE<"ilvl.d", int_mips_ilvl_d, NoItinerary, + MSA128, MSA128>; + +class ILVOD_B_DESC : MSA_3R_DESC_BASE<"ilvod.b", int_mips_ilvod_b, NoItinerary, + MSA128, MSA128>; +class ILVOD_H_DESC : MSA_3R_DESC_BASE<"ilvod.h", int_mips_ilvod_h, NoItinerary, + MSA128, MSA128>; +class ILVOD_W_DESC : MSA_3R_DESC_BASE<"ilvod.w", int_mips_ilvod_w, NoItinerary, + MSA128, MSA128>; +class ILVOD_D_DESC : MSA_3R_DESC_BASE<"ilvod.d", int_mips_ilvod_d, NoItinerary, + MSA128, MSA128>; + +class ILVR_B_DESC : MSA_3R_DESC_BASE<"ilvr.b", int_mips_ilvr_b, NoItinerary, + MSA128, MSA128>; +class ILVR_H_DESC : MSA_3R_DESC_BASE<"ilvr.h", int_mips_ilvr_h, NoItinerary, + MSA128, MSA128>; +class ILVR_W_DESC : MSA_3R_DESC_BASE<"ilvr.w", int_mips_ilvr_w, NoItinerary, + MSA128, MSA128>; +class ILVR_D_DESC : MSA_3R_DESC_BASE<"ilvr.d", int_mips_ilvr_d, NoItinerary, + MSA128, MSA128>; + +class INSERT_B_DESC : MSA_INSERT_DESC_BASE<"insert.b", int_mips_insert_b, + NoItinerary, MSA128, GPR32>; +class INSERT_H_DESC : MSA_INSERT_DESC_BASE<"insert.h", int_mips_insert_h, + NoItinerary, MSA128, GPR32>; +class INSERT_W_DESC : MSA_INSERT_DESC_BASE<"insert.w", int_mips_insert_w, + NoItinerary, MSA128, GPR32>; + class LD_DESC_BASE { @@ -32,6 +902,20 @@ class LD_DESC_BASE; +class LD_H_DESC : LD_DESC_BASE<"ld.h", load, v8i16, NoItinerary, MSA128>; +class LD_W_DESC : LD_DESC_BASE<"ld.w", load, v4i32, NoItinerary, MSA128>; +class LD_D_DESC : LD_DESC_BASE<"ld.d", load, v2i64, NoItinerary, MSA128>; + +class LDI_B_DESC : MSA_I10_DESC_BASE<"ldi.b", int_mips_ldi_b, + NoItinerary, MSA128>; +class LDI_H_DESC : MSA_I10_DESC_BASE<"ldi.h", int_mips_ldi_h, + NoItinerary, MSA128>; +class LDI_W_DESC : MSA_I10_DESC_BASE<"ldi.w", int_mips_ldi_w, + NoItinerary, MSA128>; +class LDI_D_DESC : MSA_I10_DESC_BASE<"ldi.d", int_mips_ldi_d, + NoItinerary, MSA128>; + class ST_DESC_BASE { @@ -43,21 +927,257 @@ class ST_DESC_BASE; -class LD_H_DESC : LD_DESC_BASE<"ld.h", load, v8i16, NoItinerary, MSA128>; -class LD_W_DESC : LD_DESC_BASE<"ld.w", load, v4i32, NoItinerary, MSA128>; -class LD_D_DESC : LD_DESC_BASE<"ld.d", load, v2i64, NoItinerary, MSA128>; class ST_B_DESC : ST_DESC_BASE<"st.b", store, v16i8, NoItinerary, MSA128>; class ST_H_DESC : ST_DESC_BASE<"st.h", store, v8i16, NoItinerary, MSA128>; class ST_W_DESC : ST_DESC_BASE<"st.w", store, v4i32, NoItinerary, MSA128>; class ST_D_DESC : ST_DESC_BASE<"st.d", store, v2i64, NoItinerary, MSA128>; // Instruction defs. +def ADD_A_B : ADD_A_B_ENC, ADD_A_B_DESC, Requires<[HasMSA]>; +def ADD_A_H : ADD_A_H_ENC, ADD_A_H_DESC, Requires<[HasMSA]>; +def ADD_A_W : ADD_A_W_ENC, ADD_A_W_DESC, Requires<[HasMSA]>; +def ADD_A_D : ADD_A_D_ENC, ADD_A_D_DESC, Requires<[HasMSA]>; + +def ADDS_A_B : ADDS_A_B_ENC, ADDS_A_B_DESC, Requires<[HasMSA]>; +def ADDS_A_H : ADDS_A_H_ENC, ADDS_A_H_DESC, Requires<[HasMSA]>; +def ADDS_A_W : ADDS_A_W_ENC, ADDS_A_W_DESC, Requires<[HasMSA]>; +def ADDS_A_D : ADDS_A_D_ENC, ADDS_A_D_DESC, Requires<[HasMSA]>; + +def ADDS_S_B : ADDS_S_B_ENC, ADDS_S_B_DESC, Requires<[HasMSA]>; +def ADDS_S_H : ADDS_S_H_ENC, ADDS_S_H_DESC, Requires<[HasMSA]>; +def ADDS_S_W : ADDS_S_W_ENC, ADDS_S_W_DESC, Requires<[HasMSA]>; +def ADDS_S_D : ADDS_S_D_ENC, ADDS_S_D_DESC, Requires<[HasMSA]>; + +def ADDS_U_B : ADDS_U_B_ENC, ADDS_U_B_DESC, Requires<[HasMSA]>; +def ADDS_U_H : ADDS_U_H_ENC, ADDS_U_H_DESC, Requires<[HasMSA]>; +def ADDS_U_W : ADDS_U_W_ENC, ADDS_U_W_DESC, Requires<[HasMSA]>; +def ADDS_U_D : ADDS_U_D_ENC, ADDS_U_D_DESC, Requires<[HasMSA]>; + +def ADDV_B : ADDV_B_ENC, ADDV_B_DESC, Requires<[HasMSA]>; +def ADDV_H : ADDV_H_ENC, ADDV_H_DESC, Requires<[HasMSA]>; +def ADDV_W : ADDV_W_ENC, ADDV_W_DESC, Requires<[HasMSA]>; +def ADDV_D : ADDV_D_ENC, ADDV_D_DESC, Requires<[HasMSA]>; + +def ADDVI_B : ADDVI_B_ENC, ADDVI_B_DESC, Requires<[HasMSA]>; +def ADDVI_H : ADDVI_H_ENC, ADDVI_H_DESC, Requires<[HasMSA]>; +def ADDVI_W : ADDVI_W_ENC, ADDVI_W_DESC, Requires<[HasMSA]>; +def ADDVI_D : ADDVI_D_ENC, ADDVI_D_DESC, Requires<[HasMSA]>; + +def ANDI_B : ANDI_B_ENC, ANDI_B_DESC, Requires<[HasMSA]>; + +def ASUB_S_B : ASUB_S_B_ENC, ASUB_S_B_DESC, Requires<[HasMSA]>; +def ASUB_S_H : ASUB_S_H_ENC, ASUB_S_H_DESC, Requires<[HasMSA]>; +def ASUB_S_W : ASUB_S_W_ENC, ASUB_S_W_DESC, Requires<[HasMSA]>; +def ASUB_S_D : ASUB_S_D_ENC, ASUB_S_D_DESC, Requires<[HasMSA]>; + +def ASUB_U_B : ASUB_U_B_ENC, ASUB_U_B_DESC, Requires<[HasMSA]>; +def ASUB_U_H : ASUB_U_H_ENC, ASUB_U_H_DESC, Requires<[HasMSA]>; +def ASUB_U_W : ASUB_U_W_ENC, ASUB_U_W_DESC, Requires<[HasMSA]>; +def ASUB_U_D : ASUB_U_D_ENC, ASUB_U_D_DESC, Requires<[HasMSA]>; + +def AVE_S_B : AVE_S_B_ENC, AVE_S_B_DESC, Requires<[HasMSA]>; +def AVE_S_H : AVE_S_H_ENC, AVE_S_H_DESC, Requires<[HasMSA]>; +def AVE_S_W : AVE_S_W_ENC, AVE_S_W_DESC, Requires<[HasMSA]>; +def AVE_S_D : AVE_S_D_ENC, AVE_S_D_DESC, Requires<[HasMSA]>; + +def AVE_U_B : AVE_U_B_ENC, AVE_U_B_DESC, Requires<[HasMSA]>; +def AVE_U_H : AVE_U_H_ENC, AVE_U_H_DESC, Requires<[HasMSA]>; +def AVE_U_W : AVE_U_W_ENC, AVE_U_W_DESC, Requires<[HasMSA]>; +def AVE_U_D : AVE_U_D_ENC, AVE_U_D_DESC, Requires<[HasMSA]>; + +def AVER_S_B : AVER_S_B_ENC, AVER_S_B_DESC, Requires<[HasMSA]>; +def AVER_S_H : AVER_S_H_ENC, AVER_S_H_DESC, Requires<[HasMSA]>; +def AVER_S_W : AVER_S_W_ENC, AVER_S_W_DESC, Requires<[HasMSA]>; +def AVER_S_D : AVER_S_D_ENC, AVER_S_D_DESC, Requires<[HasMSA]>; + +def AVER_U_B : AVER_U_B_ENC, AVER_U_B_DESC, Requires<[HasMSA]>; +def AVER_U_H : AVER_U_H_ENC, AVER_U_H_DESC, Requires<[HasMSA]>; +def AVER_U_W : AVER_U_W_ENC, AVER_U_W_DESC, Requires<[HasMSA]>; +def AVER_U_D : AVER_U_D_ENC, AVER_U_D_DESC, Requires<[HasMSA]>; + +def BCLR_B : BCLR_B_ENC, BCLR_B_DESC, Requires<[HasMSA]>; +def BCLR_H : BCLR_H_ENC, BCLR_H_DESC, Requires<[HasMSA]>; +def BCLR_W : BCLR_W_ENC, BCLR_W_DESC, Requires<[HasMSA]>; +def BCLR_D : BCLR_D_ENC, BCLR_D_DESC, Requires<[HasMSA]>; + +def BCLRI_B : BCLRI_B_ENC, BCLRI_B_DESC, Requires<[HasMSA]>; +def BCLRI_H : BCLRI_H_ENC, BCLRI_H_DESC, Requires<[HasMSA]>; +def BCLRI_W : BCLRI_W_ENC, BCLRI_W_DESC, Requires<[HasMSA]>; +def BCLRI_D : BCLRI_D_ENC, BCLRI_D_DESC, Requires<[HasMSA]>; + +def BINSL_B : BINSL_B_ENC, BINSL_B_DESC, Requires<[HasMSA]>; +def BINSL_H : BINSL_H_ENC, BINSL_H_DESC, Requires<[HasMSA]>; +def BINSL_W : BINSL_W_ENC, BINSL_W_DESC, Requires<[HasMSA]>; +def BINSL_D : BINSL_D_ENC, BINSL_D_DESC, Requires<[HasMSA]>; + +def BINSLI_B : BINSLI_B_ENC, BINSLI_B_DESC, Requires<[HasMSA]>; +def BINSLI_H : BINSLI_H_ENC, BINSLI_H_DESC, Requires<[HasMSA]>; +def BINSLI_W : BINSLI_W_ENC, BINSLI_W_DESC, Requires<[HasMSA]>; +def BINSLI_D : BINSLI_D_ENC, BINSLI_D_DESC, Requires<[HasMSA]>; + +def BINSR_B : BINSR_B_ENC, BINSR_B_DESC, Requires<[HasMSA]>; +def BINSR_H : BINSR_H_ENC, BINSR_H_DESC, Requires<[HasMSA]>; +def BINSR_W : BINSR_W_ENC, BINSR_W_DESC, Requires<[HasMSA]>; +def BINSR_D : BINSR_D_ENC, BINSR_D_DESC, Requires<[HasMSA]>; + +def BINSRI_B : BINSRI_B_ENC, BINSRI_B_DESC, Requires<[HasMSA]>; +def BINSRI_H : BINSRI_H_ENC, BINSRI_H_DESC, Requires<[HasMSA]>; +def BINSRI_W : BINSRI_W_ENC, BINSRI_W_DESC, Requires<[HasMSA]>; +def BINSRI_D : BINSRI_D_ENC, BINSRI_D_DESC, Requires<[HasMSA]>; + +def BMNZI_B : BMNZI_B_ENC, BMNZI_B_DESC, Requires<[HasMSA]>; + +def BMZI_B : BMZI_B_ENC, BMZI_B_DESC, Requires<[HasMSA]>; + +def BNEG_B : BNEG_B_ENC, BNEG_B_DESC, Requires<[HasMSA]>; +def BNEG_H : BNEG_H_ENC, BNEG_H_DESC, Requires<[HasMSA]>; +def BNEG_W : BNEG_W_ENC, BNEG_W_DESC, Requires<[HasMSA]>; +def BNEG_D : BNEG_D_ENC, BNEG_D_DESC, Requires<[HasMSA]>; + +def BNEGI_B : BNEGI_B_ENC, BNEGI_B_DESC, Requires<[HasMSA]>; +def BNEGI_H : BNEGI_H_ENC, BNEGI_H_DESC, Requires<[HasMSA]>; +def BNEGI_W : BNEGI_W_ENC, BNEGI_W_DESC, Requires<[HasMSA]>; +def BNEGI_D : BNEGI_D_ENC, BNEGI_D_DESC, Requires<[HasMSA]>; + +def BSELI_B : BSELI_B_ENC, BSELI_B_DESC, Requires<[HasMSA]>; + +def BSET_B : BSET_B_ENC, BSET_B_DESC, Requires<[HasMSA]>; +def BSET_H : BSET_H_ENC, BSET_H_DESC, Requires<[HasMSA]>; +def BSET_W : BSET_W_ENC, BSET_W_DESC, Requires<[HasMSA]>; +def BSET_D : BSET_D_ENC, BSET_D_DESC, Requires<[HasMSA]>; + +def BSETI_B : BSETI_B_ENC, BSETI_B_DESC, Requires<[HasMSA]>; +def BSETI_H : BSETI_H_ENC, BSETI_H_DESC, Requires<[HasMSA]>; +def BSETI_W : BSETI_W_ENC, BSETI_W_DESC, Requires<[HasMSA]>; +def BSETI_D : BSETI_D_ENC, BSETI_D_DESC, Requires<[HasMSA]>; + +def CEQ_B : CEQ_B_ENC, CEQ_B_DESC, Requires<[HasMSA]>; +def CEQ_H : CEQ_H_ENC, CEQ_H_DESC, Requires<[HasMSA]>; +def CEQ_W : CEQ_W_ENC, CEQ_W_DESC, Requires<[HasMSA]>; +def CEQ_D : CEQ_D_ENC, CEQ_D_DESC, Requires<[HasMSA]>; + +def CEQI_B : CEQI_B_ENC, CEQI_B_DESC, Requires<[HasMSA]>; +def CEQI_H : CEQI_H_ENC, CEQI_H_DESC, Requires<[HasMSA]>; +def CEQI_W : CEQI_W_ENC, CEQI_W_DESC, Requires<[HasMSA]>; +def CEQI_D : CEQI_D_ENC, CEQI_D_DESC, Requires<[HasMSA]>; + +def CLE_S_B : CLE_S_B_ENC, CLE_S_B_DESC, Requires<[HasMSA]>; +def CLE_S_H : CLE_S_H_ENC, CLE_S_H_DESC, Requires<[HasMSA]>; +def CLE_S_W : CLE_S_W_ENC, CLE_S_W_DESC, Requires<[HasMSA]>; +def CLE_S_D : CLE_S_D_ENC, CLE_S_D_DESC, Requires<[HasMSA]>; + +def CLE_U_B : CLE_U_B_ENC, CLE_U_B_DESC, Requires<[HasMSA]>; +def CLE_U_H : CLE_U_H_ENC, CLE_U_H_DESC, Requires<[HasMSA]>; +def CLE_U_W : CLE_U_W_ENC, CLE_U_W_DESC, Requires<[HasMSA]>; +def CLE_U_D : CLE_U_D_ENC, CLE_U_D_DESC, Requires<[HasMSA]>; + +def CLEI_S_B : CLEI_S_B_ENC, CLEI_S_B_DESC, Requires<[HasMSA]>; +def CLEI_S_H : CLEI_S_H_ENC, CLEI_S_H_DESC, Requires<[HasMSA]>; +def CLEI_S_W : CLEI_S_W_ENC, CLEI_S_W_DESC, Requires<[HasMSA]>; +def CLEI_S_D : CLEI_S_D_ENC, CLEI_S_D_DESC, Requires<[HasMSA]>; + +def CLEI_U_B : CLEI_U_B_ENC, CLEI_U_B_DESC, Requires<[HasMSA]>; +def CLEI_U_H : CLEI_U_H_ENC, CLEI_U_H_DESC, Requires<[HasMSA]>; +def CLEI_U_W : CLEI_U_W_ENC, CLEI_U_W_DESC, Requires<[HasMSA]>; +def CLEI_U_D : CLEI_U_D_ENC, CLEI_U_D_DESC, Requires<[HasMSA]>; + +def CLT_S_B : CLT_S_B_ENC, CLT_S_B_DESC, Requires<[HasMSA]>; +def CLT_S_H : CLT_S_H_ENC, CLT_S_H_DESC, Requires<[HasMSA]>; +def CLT_S_W : CLT_S_W_ENC, CLT_S_W_DESC, Requires<[HasMSA]>; +def CLT_S_D : CLT_S_D_ENC, CLT_S_D_DESC, Requires<[HasMSA]>; + +def CLT_U_B : CLT_U_B_ENC, CLT_U_B_DESC, Requires<[HasMSA]>; +def CLT_U_H : CLT_U_H_ENC, CLT_U_H_DESC, Requires<[HasMSA]>; +def CLT_U_W : CLT_U_W_ENC, CLT_U_W_DESC, Requires<[HasMSA]>; +def CLT_U_D : CLT_U_D_ENC, CLT_U_D_DESC, Requires<[HasMSA]>; + +def CLTI_S_B : CLTI_S_B_ENC, CLTI_S_B_DESC, Requires<[HasMSA]>; +def CLTI_S_H : CLTI_S_H_ENC, CLTI_S_H_DESC, Requires<[HasMSA]>; +def CLTI_S_W : CLTI_S_W_ENC, CLTI_S_W_DESC, Requires<[HasMSA]>; +def CLTI_S_D : CLTI_S_D_ENC, CLTI_S_D_DESC, Requires<[HasMSA]>; + +def CLTI_U_B : CLTI_U_B_ENC, CLTI_U_B_DESC, Requires<[HasMSA]>; +def CLTI_U_H : CLTI_U_H_ENC, CLTI_U_H_DESC, Requires<[HasMSA]>; +def CLTI_U_W : CLTI_U_W_ENC, CLTI_U_W_DESC, Requires<[HasMSA]>; +def CLTI_U_D : CLTI_U_D_ENC, CLTI_U_D_DESC, Requires<[HasMSA]>; + +def COPY_S_B : COPY_S_B_ENC, COPY_S_B_DESC, Requires<[HasMSA]>; +def COPY_S_H : COPY_S_H_ENC, COPY_S_H_DESC, Requires<[HasMSA]>; +def COPY_S_W : COPY_S_W_ENC, COPY_S_W_DESC, Requires<[HasMSA]>; + +def COPY_U_B : COPY_U_B_ENC, COPY_U_B_DESC, Requires<[HasMSA]>; +def COPY_U_H : COPY_U_H_ENC, COPY_U_H_DESC, Requires<[HasMSA]>; +def COPY_U_W : COPY_U_W_ENC, COPY_U_W_DESC, Requires<[HasMSA]>; + +def DIV_S_B : DIV_S_B_ENC, DIV_S_B_DESC, Requires<[HasMSA]>; +def DIV_S_H : DIV_S_H_ENC, DIV_S_H_DESC, Requires<[HasMSA]>; +def DIV_S_W : DIV_S_W_ENC, DIV_S_W_DESC, Requires<[HasMSA]>; +def DIV_S_D : DIV_S_D_ENC, DIV_S_D_DESC, Requires<[HasMSA]>; + +def DIV_U_B : DIV_U_B_ENC, DIV_U_B_DESC, Requires<[HasMSA]>; +def DIV_U_H : DIV_U_H_ENC, DIV_U_H_DESC, Requires<[HasMSA]>; +def DIV_U_W : DIV_U_W_ENC, DIV_U_W_DESC, Requires<[HasMSA]>; +def DIV_U_D : DIV_U_D_ENC, DIV_U_D_DESC, Requires<[HasMSA]>; + +def DOTP_S_B : DOTP_S_B_ENC, DOTP_S_B_DESC, Requires<[HasMSA]>; +def DOTP_S_H : DOTP_S_H_ENC, DOTP_S_H_DESC, Requires<[HasMSA]>; +def DOTP_S_W : DOTP_S_W_ENC, DOTP_S_W_DESC, Requires<[HasMSA]>; +def DOTP_S_D : DOTP_S_D_ENC, DOTP_S_D_DESC, Requires<[HasMSA]>; + +def DOTP_U_B : DOTP_U_B_ENC, DOTP_U_B_DESC, Requires<[HasMSA]>; +def DOTP_U_H : DOTP_U_H_ENC, DOTP_U_H_DESC, Requires<[HasMSA]>; +def DOTP_U_W : DOTP_U_W_ENC, DOTP_U_W_DESC, Requires<[HasMSA]>; +def DOTP_U_D : DOTP_U_D_ENC, DOTP_U_D_DESC, Requires<[HasMSA]>; + +def DPADD_S_H : DPADD_S_H_ENC, DPADD_S_H_DESC, Requires<[HasMSA]>; +def DPADD_S_W : DPADD_S_W_ENC, DPADD_S_W_DESC, Requires<[HasMSA]>; +def DPADD_S_D : DPADD_S_D_ENC, DPADD_S_D_DESC, Requires<[HasMSA]>; + +def DPADD_U_H : DPADD_U_H_ENC, DPADD_U_H_DESC, Requires<[HasMSA]>; +def DPADD_U_W : DPADD_U_W_ENC, DPADD_U_W_DESC, Requires<[HasMSA]>; +def DPADD_U_D : DPADD_U_D_ENC, DPADD_U_D_DESC, Requires<[HasMSA]>; + +def DPSUB_S_H : DPSUB_S_H_ENC, DPSUB_S_H_DESC, Requires<[HasMSA]>; +def DPSUB_S_W : DPSUB_S_W_ENC, DPSUB_S_W_DESC, Requires<[HasMSA]>; +def DPSUB_S_D : DPSUB_S_D_ENC, DPSUB_S_D_DESC, Requires<[HasMSA]>; + +def DPSUB_U_H : DPSUB_U_H_ENC, DPSUB_U_H_DESC, Requires<[HasMSA]>; +def DPSUB_U_W : DPSUB_U_W_ENC, DPSUB_U_W_DESC, Requires<[HasMSA]>; +def DPSUB_U_D : DPSUB_U_D_ENC, DPSUB_U_D_DESC, Requires<[HasMSA]>; + +def ILVEV_B : ILVEV_B_ENC, ILVEV_B_DESC, Requires<[HasMSA]>; +def ILVEV_H : ILVEV_H_ENC, ILVEV_H_DESC, Requires<[HasMSA]>; +def ILVEV_W : ILVEV_W_ENC, ILVEV_W_DESC, Requires<[HasMSA]>; +def ILVEV_D : ILVEV_D_ENC, ILVEV_D_DESC, Requires<[HasMSA]>; + +def ILVL_B : ILVL_B_ENC, ILVL_B_DESC, Requires<[HasMSA]>; +def ILVL_H : ILVL_H_ENC, ILVL_H_DESC, Requires<[HasMSA]>; +def ILVL_W : ILVL_W_ENC, ILVL_W_DESC, Requires<[HasMSA]>; +def ILVL_D : ILVL_D_ENC, ILVL_D_DESC, Requires<[HasMSA]>; + +def ILVOD_B : ILVOD_B_ENC, ILVOD_B_DESC, Requires<[HasMSA]>; +def ILVOD_H : ILVOD_H_ENC, ILVOD_H_DESC, Requires<[HasMSA]>; +def ILVOD_W : ILVOD_W_ENC, ILVOD_W_DESC, Requires<[HasMSA]>; +def ILVOD_D : ILVOD_D_ENC, ILVOD_D_DESC, Requires<[HasMSA]>; + +def ILVR_B : ILVR_B_ENC, ILVR_B_DESC, Requires<[HasMSA]>; +def ILVR_H : ILVR_H_ENC, ILVR_H_DESC, Requires<[HasMSA]>; +def ILVR_W : ILVR_W_ENC, ILVR_W_DESC, Requires<[HasMSA]>; +def ILVR_D : ILVR_D_ENC, ILVR_D_DESC, Requires<[HasMSA]>; + +def INSERT_B : INSERT_B_ENC, INSERT_B_DESC, Requires<[HasMSA]>; +def INSERT_H : INSERT_H_ENC, INSERT_H_DESC, Requires<[HasMSA]>; +def INSERT_W : INSERT_W_ENC, INSERT_W_DESC, Requires<[HasMSA]>; + def LD_B: LD_B_ENC, LD_B_DESC, Requires<[HasMSA]>; def LD_H: LD_H_ENC, LD_H_DESC, Requires<[HasMSA]>; def LD_W: LD_W_ENC, LD_W_DESC, Requires<[HasMSA]>; def LD_D: LD_D_ENC, LD_D_DESC, Requires<[HasMSA]>; +def LDI_B : LDI_B_ENC, LDI_B_DESC, Requires<[HasMSA]>; +def LDI_H : LDI_H_ENC, LDI_H_DESC, Requires<[HasMSA]>; +def LDI_W : LDI_W_ENC, LDI_W_DESC, Requires<[HasMSA]>; + def ST_B: ST_B_ENC, ST_B_DESC, Requires<[HasMSA]>; def ST_H: ST_H_ENC, ST_H_DESC, Requires<[HasMSA]>; def ST_W: ST_W_ENC, ST_W_DESC, Requires<[HasMSA]>; @@ -67,3 +1187,12 @@ def ST_D: ST_D_ENC, ST_D_DESC, Requires<[HasMSA]>; class MSAPat : Pat, Requires<[pred]>; +def LD_FW : MSAPat<(v4f32 (load addr:$addr)), + (LD_W addr:$addr)>; +def LD_FD : MSAPat<(v2f64 (load addr:$addr)), + (LD_D addr:$addr)>; + +def ST_FW : MSAPat<(store (v4f32 MSA128:$ws), addr:$addr), + (ST_W MSA128:$ws, addr:$addr)>; +def ST_FD : MSAPat<(store (v2f64 MSA128:$ws), addr:$addr), + (ST_D MSA128:$ws, addr:$addr)>; diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index 176eb64..b7c005f 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -343,8 +343,9 @@ def CCR : RegisterClass<"Mips", [i32], 32, (sequence "FCR%u", 0, 31)>, def FCC : RegisterClass<"Mips", [i32], 32, (sequence "FCC%u", 0, 7)>, Unallocatable; -def MSA128: RegisterClass<"Mips", [v16i8, v8i16, v4i32, v2i64], 128, - (sequence "W%u", 0, 31)>; +def MSA128: RegisterClass<"Mips", + [v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64], + 128, (sequence "W%u", 0, 31)>; // Hi/Lo Registers def LO32 : RegisterClass<"Mips", [i32], 32, (add LO0)>; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index c54c55c..750ec0e 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -78,20 +78,13 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::MUL, MVT::v2i16, Legal); if (Subtarget->hasMSA()) { - MVT::SimpleValueType VecTys[4] = {MVT::v16i8, MVT::v8i16, - MVT::v4i32, MVT::v2i64}; - - for (unsigned i = 0; i < array_lengthof(VecTys); ++i) { - addRegisterClass(VecTys[i], &Mips::MSA128RegClass); - - // Expand all builtin opcodes. - for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) - setOperationAction(Opc, VecTys[i], Expand); - - setOperationAction(ISD::LOAD, VecTys[i], Legal); - setOperationAction(ISD::STORE, VecTys[i], Legal); - setOperationAction(ISD::BITCAST, VecTys[i], Legal); - } + addMSAType(MVT::v16i8); + addMSAType(MVT::v8i16); + addMSAType(MVT::v4i32); + addMSAType(MVT::v2i64); + addMSAType(MVT::v8f16); + addMSAType(MVT::v4f32); + addMSAType(MVT::v2f64); } if (!TM.Options.UseSoftFloat) { @@ -140,6 +133,18 @@ llvm::createMipsSETargetLowering(MipsTargetMachine &TM) { return new MipsSETargetLowering(TM); } +void +MipsSETargetLowering::addMSAType(MVT::SimpleValueType Ty) { + addRegisterClass(Ty, &Mips::MSA128RegClass); + + // Expand all builtin opcodes. + for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) + setOperationAction(Opc, Ty, Expand); + + setOperationAction(ISD::LOAD, Ty, Legal); + setOperationAction(ISD::STORE, Ty, Legal); + setOperationAction(ISD::BITCAST, Ty, Legal); +} bool MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h index da1321b..b56036d 100644 --- a/lib/Target/Mips/MipsSEISelLowering.h +++ b/lib/Target/Mips/MipsSEISelLowering.h @@ -22,6 +22,8 @@ namespace llvm { public: explicit MipsSETargetLowering(MipsTargetMachine &TM); + void addMSAType(MVT::SimpleValueType Ty); + virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const; virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; -- cgit v1.1 From d0f99639c16ddad697db30e75643ae4cc52c3e80 Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Thu, 15 Aug 2013 13:45:36 +0000 Subject: [Mips][msa] Added the simple builtins (fadd to ftq) Includes: fadd, fceq, fcg[et], fclass, fcl[et], fcne, fcun, fdiv, fexdo, fexp2, fexup[lr], ffint_[su], ffql, ffqr, fill, flog2, fmadd, fmax, fmax_a, fmin, fmin_a, fmsub, fmul, frint, frcp, frsqrt, fseq, fsge, fsgt, fsle, fslt, fsne, fsqr, fsub, ftint_s, ftq Patch by Daniel Sanders git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188458 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 467 ++++++++++++++++++++++++++++++++++++ 1 file changed, 467 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index ea8938b..82db568 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -247,6 +247,130 @@ class DPSUB_U_H_ENC : MSA_3R_FMT<0b101, 0b01, 0b010011>; class DPSUB_U_W_ENC : MSA_3R_FMT<0b101, 0b10, 0b010011>; class DPSUB_U_D_ENC : MSA_3R_FMT<0b101, 0b11, 0b010011>; +class FADD_W_ENC : MSA_3RF_FMT<0b0000, 0b0, 0b011011>; +class FADD_D_ENC : MSA_3RF_FMT<0b0000, 0b1, 0b011011>; + +class FCEQ_W_ENC : MSA_3RF_FMT<0b0010, 0b0, 0b011010>; +class FCEQ_D_ENC : MSA_3RF_FMT<0b0010, 0b1, 0b011010>; + +class FCGE_W_ENC : MSA_3RF_FMT<0b0101, 0b0, 0b011010>; +class FCGE_D_ENC : MSA_3RF_FMT<0b0101, 0b1, 0b011010>; + +class FCGT_W_ENC : MSA_3RF_FMT<0b0111, 0b0, 0b011010>; +class FCGT_D_ENC : MSA_3RF_FMT<0b0111, 0b1, 0b011010>; + +class FCLASS_W_ENC : MSA_2RF_FMT<0b110010000, 0b0, 0b011110>; +class FCLASS_D_ENC : MSA_2RF_FMT<0b110010000, 0b1, 0b011110>; + +class FCLE_W_ENC : MSA_3RF_FMT<0b0110, 0b0, 0b011010>; +class FCLE_D_ENC : MSA_3RF_FMT<0b0110, 0b1, 0b011010>; + +class FCLT_W_ENC : MSA_3RF_FMT<0b0100, 0b0, 0b011010>; +class FCLT_D_ENC : MSA_3RF_FMT<0b0100, 0b1, 0b011010>; + +class FCNE_W_ENC : MSA_3RF_FMT<0b0011, 0b0, 0b011010>; +class FCNE_D_ENC : MSA_3RF_FMT<0b0011, 0b1, 0b011010>; + +class FCUN_W_ENC : MSA_3RF_FMT<0b0001, 0b0, 0b011010>; +class FCUN_D_ENC : MSA_3RF_FMT<0b0001, 0b1, 0b011010>; + +class FDIV_W_ENC : MSA_3RF_FMT<0b0011, 0b0, 0b011011>; +class FDIV_D_ENC : MSA_3RF_FMT<0b0011, 0b1, 0b011011>; + +class FEXDO_H_ENC : MSA_3RF_FMT<0b1000, 0b0, 0b011011>; +class FEXDO_W_ENC : MSA_3RF_FMT<0b1000, 0b1, 0b011011>; + +class FEXP2_W_ENC : MSA_3RF_FMT<0b0111, 0b0, 0b011011>; +class FEXP2_D_ENC : MSA_3RF_FMT<0b0111, 0b1, 0b011011>; + +class FEXUPL_W_ENC : MSA_2RF_FMT<0b110011000, 0b0, 0b011110>; +class FEXUPL_D_ENC : MSA_2RF_FMT<0b110011000, 0b1, 0b011110>; + +class FEXUPR_W_ENC : MSA_2RF_FMT<0b110011001, 0b0, 0b011110>; +class FEXUPR_D_ENC : MSA_2RF_FMT<0b110011001, 0b1, 0b011110>; + +class FFINT_S_W_ENC : MSA_2RF_FMT<0b110011110, 0b0, 0b011110>; +class FFINT_S_D_ENC : MSA_2RF_FMT<0b110011110, 0b1, 0b011110>; + +class FFINT_U_W_ENC : MSA_2RF_FMT<0b110011111, 0b0, 0b011110>; +class FFINT_U_D_ENC : MSA_2RF_FMT<0b110011111, 0b1, 0b011110>; + +class FFQL_W_ENC : MSA_2RF_FMT<0b110011010, 0b0, 0b011110>; +class FFQL_D_ENC : MSA_2RF_FMT<0b110011010, 0b1, 0b011110>; + +class FFQR_W_ENC : MSA_2RF_FMT<0b110011011, 0b0, 0b011110>; +class FFQR_D_ENC : MSA_2RF_FMT<0b110011011, 0b1, 0b011110>; + +class FILL_B_ENC : MSA_2R_FMT<0b11000000, 0b00, 0b011110>; +class FILL_H_ENC : MSA_2R_FMT<0b11000000, 0b01, 0b011110>; +class FILL_W_ENC : MSA_2R_FMT<0b11000000, 0b10, 0b011110>; + +class FLOG2_W_ENC : MSA_2RF_FMT<0b110010111, 0b0, 0b011110>; +class FLOG2_D_ENC : MSA_2RF_FMT<0b110010111, 0b1, 0b011110>; + +class FMADD_W_ENC : MSA_3RF_FMT<0b0100, 0b0, 0b011011>; +class FMADD_D_ENC : MSA_3RF_FMT<0b0100, 0b1, 0b011011>; + +class FMAX_W_ENC : MSA_3RF_FMT<0b1110, 0b0, 0b011011>; +class FMAX_D_ENC : MSA_3RF_FMT<0b1110, 0b1, 0b011011>; + +class FMAX_A_W_ENC : MSA_3RF_FMT<0b1111, 0b0, 0b011011>; +class FMAX_A_D_ENC : MSA_3RF_FMT<0b1111, 0b1, 0b011011>; + +class FMIN_W_ENC : MSA_3RF_FMT<0b1100, 0b0, 0b011011>; +class FMIN_D_ENC : MSA_3RF_FMT<0b1100, 0b1, 0b011011>; + +class FMIN_A_W_ENC : MSA_3RF_FMT<0b1101, 0b0, 0b011011>; +class FMIN_A_D_ENC : MSA_3RF_FMT<0b1101, 0b1, 0b011011>; + +class FMSUB_W_ENC : MSA_3RF_FMT<0b0101, 0b0, 0b011011>; +class FMSUB_D_ENC : MSA_3RF_FMT<0b0101, 0b1, 0b011011>; + +class FMUL_W_ENC : MSA_3RF_FMT<0b0010, 0b0, 0b011011>; +class FMUL_D_ENC : MSA_3RF_FMT<0b0010, 0b1, 0b011011>; + +class FRINT_W_ENC : MSA_2RF_FMT<0b110010110, 0b0, 0b011110>; +class FRINT_D_ENC : MSA_2RF_FMT<0b110010110, 0b1, 0b011110>; + +class FRCP_W_ENC : MSA_2RF_FMT<0b110010101, 0b0, 0b011110>; +class FRCP_D_ENC : MSA_2RF_FMT<0b110010101, 0b1, 0b011110>; + +class FRSQRT_W_ENC : MSA_2RF_FMT<0b110010100, 0b0, 0b011110>; +class FRSQRT_D_ENC : MSA_2RF_FMT<0b110010100, 0b1, 0b011110>; + +class FSEQ_W_ENC : MSA_3RF_FMT<0b1010, 0b0, 0b011010>; +class FSEQ_D_ENC : MSA_3RF_FMT<0b1010, 0b1, 0b011010>; + +class FSGE_W_ENC : MSA_3RF_FMT<0b1101, 0b0, 0b011010>; +class FSGE_D_ENC : MSA_3RF_FMT<0b1101, 0b1, 0b011010>; + +class FSGT_W_ENC : MSA_3RF_FMT<0b1111, 0b0, 0b011010>; +class FSGT_D_ENC : MSA_3RF_FMT<0b1111, 0b1, 0b011010>; + +class FSLE_W_ENC : MSA_3RF_FMT<0b1110, 0b0, 0b011010>; +class FSLE_D_ENC : MSA_3RF_FMT<0b1110, 0b1, 0b011010>; + +class FSLT_W_ENC : MSA_3RF_FMT<0b1100, 0b0, 0b011010>; +class FSLT_D_ENC : MSA_3RF_FMT<0b1100, 0b1, 0b011010>; + +class FSNE_W_ENC : MSA_3RF_FMT<0b1011, 0b0, 0b011010>; +class FSNE_D_ENC : MSA_3RF_FMT<0b1011, 0b1, 0b011010>; + +class FSQRT_W_ENC : MSA_2RF_FMT<0b110010011, 0b0, 0b011110>; +class FSQRT_D_ENC : MSA_2RF_FMT<0b110010011, 0b1, 0b011110>; + +class FSUB_W_ENC : MSA_3RF_FMT<0b0001, 0b0, 0b011011>; +class FSUB_D_ENC : MSA_3RF_FMT<0b0001, 0b1, 0b011011>; + +class FTINT_S_W_ENC : MSA_2RF_FMT<0b110011100, 0b0, 0b011110>; +class FTINT_S_D_ENC : MSA_2RF_FMT<0b110011100, 0b1, 0b011110>; + +class FTINT_U_W_ENC : MSA_2RF_FMT<0b110011101, 0b0, 0b011110>; +class FTINT_U_D_ENC : MSA_2RF_FMT<0b110011101, 0b1, 0b011110>; + +class FTQ_H_ENC : MSA_3RF_FMT<0b1010, 0b0, 0b011011>; +class FTQ_W_ENC : MSA_3RF_FMT<0b1010, 0b1, 0b011011>; + class ILVEV_B_ENC : MSA_3R_FMT<0b110, 0b00, 0b010100>; class ILVEV_H_ENC : MSA_3R_FMT<0b110, 0b01, 0b010100>; class ILVEV_W_ENC : MSA_3R_FMT<0b110, 0b10, 0b010100>; @@ -849,6 +973,221 @@ class DPSUB_U_W_DESC : MSA_3R_4R_DESC_BASE<"dpsub_u.w", int_mips_dpsub_u_w, class DPSUB_U_D_DESC : MSA_3R_4R_DESC_BASE<"dpsub_u.d", int_mips_dpsub_u_d, NoItinerary, MSA128, MSA128>; +class FADD_W_DESC : MSA_3RF_DESC_BASE<"fadd.w", int_mips_fadd_w, + NoItinerary, MSA128, MSA128>, + IsCommutable; +class FADD_D_DESC : MSA_3RF_DESC_BASE<"fadd.d", int_mips_fadd_d, + NoItinerary, MSA128, MSA128>, + IsCommutable; + +class FCEQ_W_DESC : MSA_3RF_DESC_BASE<"fceq.w", int_mips_fceq_w, + NoItinerary, MSA128, MSA128>, + IsCommutable; +class FCEQ_D_DESC : MSA_3RF_DESC_BASE<"fceq.d", int_mips_fceq_d, + NoItinerary, MSA128, MSA128>, + IsCommutable; + +class FCGE_W_DESC : MSA_3RF_DESC_BASE<"fcge.w", int_mips_fcge_w, + NoItinerary, MSA128, MSA128>; +class FCGE_D_DESC : MSA_3RF_DESC_BASE<"fcge.d", int_mips_fcge_d, + NoItinerary, MSA128, MSA128>; + +class FCGT_W_DESC : MSA_3RF_DESC_BASE<"fcgt.w", int_mips_fcgt_w, + NoItinerary, MSA128, MSA128>; +class FCGT_D_DESC : MSA_3RF_DESC_BASE<"fcgt.d", int_mips_fcgt_d, + NoItinerary, MSA128, MSA128>; + +class FCLASS_W_DESC : MSA_2RF_DESC_BASE<"fclass.w", int_mips_fclass_w, + NoItinerary, MSA128, MSA128>; +class FCLASS_D_DESC : MSA_2RF_DESC_BASE<"fclass.d", int_mips_fclass_d, + NoItinerary, MSA128, MSA128>; + +class FCLE_W_DESC : MSA_3RF_DESC_BASE<"fcle.w", int_mips_fcle_w, + NoItinerary, MSA128, MSA128>; +class FCLE_D_DESC : MSA_3RF_DESC_BASE<"fcle.d", int_mips_fcle_d, + NoItinerary, MSA128, MSA128>; + +class FCLT_W_DESC : MSA_3RF_DESC_BASE<"fclt.w", int_mips_fclt_w, + NoItinerary, MSA128, MSA128>; +class FCLT_D_DESC : MSA_3RF_DESC_BASE<"fclt.d", int_mips_fclt_d, + NoItinerary, MSA128, MSA128>; + +class FCNE_W_DESC : MSA_3RF_DESC_BASE<"fcne.w", int_mips_fcne_w, + NoItinerary, MSA128, MSA128>, + IsCommutable; +class FCNE_D_DESC : MSA_3RF_DESC_BASE<"fcne.d", int_mips_fcne_d, + NoItinerary, MSA128, MSA128>, + IsCommutable; + +class FCUN_W_DESC : MSA_3RF_DESC_BASE<"fcun.w", int_mips_fcun_w, + NoItinerary, MSA128, MSA128>, + IsCommutable; +class FCUN_D_DESC : MSA_3RF_DESC_BASE<"fcun.d", int_mips_fcun_d, + NoItinerary, MSA128, MSA128>, + IsCommutable; + +class FDIV_W_DESC : MSA_3RF_DESC_BASE<"fdiv.w", int_mips_fdiv_w, + NoItinerary, MSA128, MSA128>; +class FDIV_D_DESC : MSA_3RF_DESC_BASE<"fdiv.d", int_mips_fdiv_d, + NoItinerary, MSA128, MSA128>; + +class FEXDO_H_DESC : MSA_3RF_DESC_BASE<"fexdo.h", int_mips_fexdo_h, + NoItinerary, MSA128, MSA128>; +class FEXDO_W_DESC : MSA_3RF_DESC_BASE<"fexdo.w", int_mips_fexdo_w, + NoItinerary, MSA128, MSA128>; + +class FEXP2_W_DESC : MSA_3RF_DESC_BASE<"fexp2.w", int_mips_fexp2_w, + NoItinerary, MSA128, MSA128>; +class FEXP2_D_DESC : MSA_3RF_DESC_BASE<"fexp2.d", int_mips_fexp2_d, + NoItinerary, MSA128, MSA128>; + +class FEXUPL_W_DESC : MSA_2RF_DESC_BASE<"fexupl.w", int_mips_fexupl_w, + NoItinerary, MSA128, MSA128>; +class FEXUPL_D_DESC : MSA_2RF_DESC_BASE<"fexupl.d", int_mips_fexupl_d, + NoItinerary, MSA128, MSA128>; + +class FEXUPR_W_DESC : MSA_2RF_DESC_BASE<"fexupr.w", int_mips_fexupr_w, + NoItinerary, MSA128, MSA128>; +class FEXUPR_D_DESC : MSA_2RF_DESC_BASE<"fexupr.d", int_mips_fexupr_d, + NoItinerary, MSA128, MSA128>; + +class FFINT_S_W_DESC : MSA_2RF_DESC_BASE<"ffint_s.w", int_mips_ffint_s_w, + NoItinerary, MSA128, MSA128>; +class FFINT_S_D_DESC : MSA_2RF_DESC_BASE<"ffint_s.d", int_mips_ffint_s_d, + NoItinerary, MSA128, MSA128>; + +class FFINT_U_W_DESC : MSA_2RF_DESC_BASE<"ffint_u.w", int_mips_ffint_u_w, + NoItinerary, MSA128, MSA128>; +class FFINT_U_D_DESC : MSA_2RF_DESC_BASE<"ffint_u.d", int_mips_ffint_u_d, + NoItinerary, MSA128, MSA128>; + +class FFQL_W_DESC : MSA_2RF_DESC_BASE<"ffql.w", int_mips_ffql_w, + NoItinerary, MSA128, MSA128>; +class FFQL_D_DESC : MSA_2RF_DESC_BASE<"ffql.d", int_mips_ffql_d, + NoItinerary, MSA128, MSA128>; + +class FFQR_W_DESC : MSA_2RF_DESC_BASE<"ffqr.w", int_mips_ffqr_w, + NoItinerary, MSA128, MSA128>; +class FFQR_D_DESC : MSA_2RF_DESC_BASE<"ffqr.d", int_mips_ffqr_d, + NoItinerary, MSA128, MSA128>; + +class FILL_B_DESC : MSA_2R_DESC_BASE<"fill.b", int_mips_fill_b, + NoItinerary, MSA128, GPR32>; +class FILL_H_DESC : MSA_2R_DESC_BASE<"fill.h", int_mips_fill_h, + NoItinerary, MSA128, GPR32>; +class FILL_W_DESC : MSA_2R_DESC_BASE<"fill.w", int_mips_fill_w, + NoItinerary, MSA128, GPR32>; + +class FLOG2_W_DESC : MSA_2RF_DESC_BASE<"flog2.w", int_mips_flog2_w, + NoItinerary, MSA128, MSA128>; +class FLOG2_D_DESC : MSA_2RF_DESC_BASE<"flog2.d", int_mips_flog2_d, + NoItinerary, MSA128, MSA128>; + +class FMADD_W_DESC : MSA_3RF_4RF_DESC_BASE<"fmadd.w", int_mips_fmadd_w, + NoItinerary, MSA128, MSA128>; +class FMADD_D_DESC : MSA_3RF_4RF_DESC_BASE<"fmadd.d", int_mips_fmadd_d, + NoItinerary, MSA128, MSA128>; + +class FMAX_W_DESC : MSA_3RF_DESC_BASE<"fmax.w", int_mips_fmax_w, + NoItinerary, MSA128, MSA128>; +class FMAX_D_DESC : MSA_3RF_DESC_BASE<"fmax.d", int_mips_fmax_d, + NoItinerary, MSA128, MSA128>; + +class FMAX_A_W_DESC : MSA_3RF_DESC_BASE<"fmax_a.w", int_mips_fmax_a_w, + NoItinerary, MSA128, MSA128>; +class FMAX_A_D_DESC : MSA_3RF_DESC_BASE<"fmax_a.d", int_mips_fmax_a_d, + NoItinerary, MSA128, MSA128>; + +class FMIN_W_DESC : MSA_3RF_DESC_BASE<"fmin.w", int_mips_fmin_w, + NoItinerary, MSA128, MSA128>; +class FMIN_D_DESC : MSA_3RF_DESC_BASE<"fmin.d", int_mips_fmin_d, + NoItinerary, MSA128, MSA128>; + +class FMIN_A_W_DESC : MSA_3RF_DESC_BASE<"fmin_a.w", int_mips_fmin_a_w, + NoItinerary, MSA128, MSA128>; +class FMIN_A_D_DESC : MSA_3RF_DESC_BASE<"fmin_a.d", int_mips_fmin_a_d, + NoItinerary, MSA128, MSA128>; + +class FMSUB_W_DESC : MSA_3RF_4RF_DESC_BASE<"fmsub.w", int_mips_fmsub_w, + NoItinerary, MSA128, MSA128>; +class FMSUB_D_DESC : MSA_3RF_4RF_DESC_BASE<"fmsub.d", int_mips_fmsub_d, + NoItinerary, MSA128, MSA128>; + +class FMUL_W_DESC : MSA_3RF_DESC_BASE<"fmul.w", int_mips_fmul_w, + NoItinerary, MSA128, MSA128>; +class FMUL_D_DESC : MSA_3RF_DESC_BASE<"fmul.d", int_mips_fmul_d, + NoItinerary, MSA128, MSA128>; + +class FRINT_W_DESC : MSA_2RF_DESC_BASE<"frint.w", int_mips_frint_w, + NoItinerary, MSA128, MSA128>; +class FRINT_D_DESC : MSA_2RF_DESC_BASE<"frint.d", int_mips_frint_d, + NoItinerary, MSA128, MSA128>; + +class FRCP_W_DESC : MSA_2RF_DESC_BASE<"frcp.w", int_mips_frcp_w, + NoItinerary, MSA128, MSA128>; +class FRCP_D_DESC : MSA_2RF_DESC_BASE<"frcp.d", int_mips_frcp_d, + NoItinerary, MSA128, MSA128>; + +class FRSQRT_W_DESC : MSA_2RF_DESC_BASE<"frsqrt.w", int_mips_frsqrt_w, + NoItinerary, MSA128, MSA128>; +class FRSQRT_D_DESC : MSA_2RF_DESC_BASE<"frsqrt.d", int_mips_frsqrt_d, + NoItinerary, MSA128, MSA128>; + +class FSEQ_W_DESC : MSA_3RF_DESC_BASE<"fseq.w", int_mips_fseq_w, + NoItinerary, MSA128, MSA128>; +class FSEQ_D_DESC : MSA_3RF_DESC_BASE<"fseq.d", int_mips_fseq_d, + NoItinerary, MSA128, MSA128>; + +class FSGE_W_DESC : MSA_3RF_DESC_BASE<"fsge.w", int_mips_fsge_w, + NoItinerary, MSA128, MSA128>; +class FSGE_D_DESC : MSA_3RF_DESC_BASE<"fsge.d", int_mips_fsge_d, + NoItinerary, MSA128, MSA128>; + +class FSGT_W_DESC : MSA_3RF_DESC_BASE<"fsgt.w", int_mips_fsgt_w, + NoItinerary, MSA128, MSA128>; +class FSGT_D_DESC : MSA_3RF_DESC_BASE<"fsgt.d", int_mips_fsgt_d, + NoItinerary, MSA128, MSA128>; + +class FSLE_W_DESC : MSA_3RF_DESC_BASE<"fsle.w", int_mips_fsle_w, + NoItinerary, MSA128, MSA128>; +class FSLE_D_DESC : MSA_3RF_DESC_BASE<"fsle.d", int_mips_fsle_d, + NoItinerary, MSA128, MSA128>; + +class FSLT_W_DESC : MSA_3RF_DESC_BASE<"fslt.w", int_mips_fslt_w, + NoItinerary, MSA128, MSA128>; +class FSLT_D_DESC : MSA_3RF_DESC_BASE<"fslt.d", int_mips_fslt_d, + NoItinerary, MSA128, MSA128>; + +class FSNE_W_DESC : MSA_3RF_DESC_BASE<"fsne.w", int_mips_fsne_w, + NoItinerary, MSA128, MSA128>; +class FSNE_D_DESC : MSA_3RF_DESC_BASE<"fsne.d", int_mips_fsne_d, + NoItinerary, MSA128, MSA128>; + +class FSQRT_W_DESC : MSA_2RF_DESC_BASE<"fsqrt.w", int_mips_fsqrt_w, + NoItinerary, MSA128, MSA128>; +class FSQRT_D_DESC : MSA_2RF_DESC_BASE<"fsqrt.d", int_mips_fsqrt_d, + NoItinerary, MSA128, MSA128>; + +class FSUB_W_DESC : MSA_3RF_DESC_BASE<"fsub.w", int_mips_fsub_w, + NoItinerary, MSA128, MSA128>; +class FSUB_D_DESC : MSA_3RF_DESC_BASE<"fsub.d", int_mips_fsub_d, + NoItinerary, MSA128, MSA128>; + +class FTINT_S_W_DESC : MSA_2RF_DESC_BASE<"ftint_s.w", int_mips_ftint_s_w, + NoItinerary, MSA128, MSA128>; +class FTINT_S_D_DESC : MSA_2RF_DESC_BASE<"ftint_s.d", int_mips_ftint_s_d, + NoItinerary, MSA128, MSA128>; + +class FTINT_U_W_DESC : MSA_2RF_DESC_BASE<"ftint_u.w", int_mips_ftint_u_w, + NoItinerary, MSA128, MSA128>; +class FTINT_U_D_DESC : MSA_2RF_DESC_BASE<"ftint_u.d", int_mips_ftint_u_d, + NoItinerary, MSA128, MSA128>; + +class FTQ_H_DESC : MSA_3RF_DESC_BASE<"ftq.h", int_mips_ftq_h, + NoItinerary, MSA128, MSA128>; +class FTQ_W_DESC : MSA_3RF_DESC_BASE<"ftq.w", int_mips_ftq_w, + NoItinerary, MSA128, MSA128>; + class ILVEV_B_DESC : MSA_3R_DESC_BASE<"ilvev.b", int_mips_ilvev_b, NoItinerary, MSA128, MSA128>; class ILVEV_H_DESC : MSA_3R_DESC_BASE<"ilvev.h", int_mips_ilvev_h, NoItinerary, @@ -1145,6 +1484,130 @@ def DPSUB_U_H : DPSUB_U_H_ENC, DPSUB_U_H_DESC, Requires<[HasMSA]>; def DPSUB_U_W : DPSUB_U_W_ENC, DPSUB_U_W_DESC, Requires<[HasMSA]>; def DPSUB_U_D : DPSUB_U_D_ENC, DPSUB_U_D_DESC, Requires<[HasMSA]>; +def FADD_W : FADD_W_ENC, FADD_W_DESC, Requires<[HasMSA]>; +def FADD_D : FADD_D_ENC, FADD_D_DESC, Requires<[HasMSA]>; + +def FCEQ_W : FCEQ_W_ENC, FCEQ_W_DESC, Requires<[HasMSA]>; +def FCEQ_D : FCEQ_D_ENC, FCEQ_D_DESC, Requires<[HasMSA]>; + +def FCLE_W : FCLE_W_ENC, FCLE_W_DESC, Requires<[HasMSA]>; +def FCLE_D : FCLE_D_ENC, FCLE_D_DESC, Requires<[HasMSA]>; + +def FCLT_W : FCLT_W_ENC, FCLT_W_DESC, Requires<[HasMSA]>; +def FCLT_D : FCLT_D_ENC, FCLT_D_DESC, Requires<[HasMSA]>; + +def FCLASS_W : FCLASS_W_ENC, FCLASS_W_DESC, Requires<[HasMSA]>; +def FCLASS_D : FCLASS_D_ENC, FCLASS_D_DESC, Requires<[HasMSA]>; + +def FCGE_W : FCGE_W_ENC, FCGE_W_DESC, Requires<[HasMSA]>; +def FCGE_D : FCGE_D_ENC, FCGE_D_DESC, Requires<[HasMSA]>; + +def FCGT_W : FCGT_W_ENC, FCGT_W_DESC, Requires<[HasMSA]>; +def FCGT_D : FCGT_D_ENC, FCGT_D_DESC, Requires<[HasMSA]>; + +def FCNE_W : FCNE_W_ENC, FCNE_W_DESC, Requires<[HasMSA]>; +def FCNE_D : FCNE_D_ENC, FCNE_D_DESC, Requires<[HasMSA]>; + +def FCUN_W : FCUN_W_ENC, FCUN_W_DESC, Requires<[HasMSA]>; +def FCUN_D : FCUN_D_ENC, FCUN_D_DESC, Requires<[HasMSA]>; + +def FDIV_W : FDIV_W_ENC, FDIV_W_DESC, Requires<[HasMSA]>; +def FDIV_D : FDIV_D_ENC, FDIV_D_DESC, Requires<[HasMSA]>; + +def FEXDO_H : FEXDO_H_ENC, FEXDO_H_DESC, Requires<[HasMSA]>; +def FEXDO_W : FEXDO_W_ENC, FEXDO_W_DESC, Requires<[HasMSA]>; + +def FEXP2_W : FEXP2_W_ENC, FEXP2_W_DESC, Requires<[HasMSA]>; +def FEXP2_D : FEXP2_D_ENC, FEXP2_D_DESC, Requires<[HasMSA]>; + +def FEXUPL_W : FEXUPL_W_ENC, FEXUPL_W_DESC, Requires<[HasMSA]>; +def FEXUPL_D : FEXUPL_D_ENC, FEXUPL_D_DESC, Requires<[HasMSA]>; + +def FEXUPR_W : FEXUPR_W_ENC, FEXUPR_W_DESC, Requires<[HasMSA]>; +def FEXUPR_D : FEXUPR_D_ENC, FEXUPR_D_DESC, Requires<[HasMSA]>; + +def FFINT_S_W : FFINT_S_W_ENC, FFINT_S_W_DESC, Requires<[HasMSA]>; +def FFINT_S_D : FFINT_S_D_ENC, FFINT_S_D_DESC, Requires<[HasMSA]>; + +def FFINT_U_W : FFINT_U_W_ENC, FFINT_U_W_DESC, Requires<[HasMSA]>; +def FFINT_U_D : FFINT_U_D_ENC, FFINT_U_D_DESC, Requires<[HasMSA]>; + +def FFQL_W : FFQL_W_ENC, FFQL_W_DESC, Requires<[HasMSA]>; +def FFQL_D : FFQL_D_ENC, FFQL_D_DESC, Requires<[HasMSA]>; + +def FFQR_W : FFQR_W_ENC, FFQR_W_DESC, Requires<[HasMSA]>; +def FFQR_D : FFQR_D_ENC, FFQR_D_DESC, Requires<[HasMSA]>; + +def FILL_B : FILL_B_ENC, FILL_B_DESC, Requires<[HasMSA]>; +def FILL_H : FILL_H_ENC, FILL_H_DESC, Requires<[HasMSA]>; +def FILL_W : FILL_W_ENC, FILL_W_DESC, Requires<[HasMSA]>; + +def FLOG2_W : FLOG2_W_ENC, FLOG2_W_DESC, Requires<[HasMSA]>; +def FLOG2_D : FLOG2_D_ENC, FLOG2_D_DESC, Requires<[HasMSA]>; + +def FMADD_W : FMADD_W_ENC, FMADD_W_DESC, Requires<[HasMSA]>; +def FMADD_D : FMADD_D_ENC, FMADD_D_DESC, Requires<[HasMSA]>; + +def FMAX_W : FMAX_W_ENC, FMAX_W_DESC, Requires<[HasMSA]>; +def FMAX_D : FMAX_D_ENC, FMAX_D_DESC, Requires<[HasMSA]>; + +def FMAX_A_W : FMAX_A_W_ENC, FMAX_A_W_DESC, Requires<[HasMSA]>; +def FMAX_A_D : FMAX_A_D_ENC, FMAX_A_D_DESC, Requires<[HasMSA]>; + +def FMIN_W : FMIN_W_ENC, FMIN_W_DESC, Requires<[HasMSA]>; +def FMIN_D : FMIN_D_ENC, FMIN_D_DESC, Requires<[HasMSA]>; + +def FMIN_A_W : FMIN_A_W_ENC, FMIN_A_W_DESC, Requires<[HasMSA]>; +def FMIN_A_D : FMIN_A_D_ENC, FMIN_A_D_DESC, Requires<[HasMSA]>; + +def FMSUB_W : FMSUB_W_ENC, FMSUB_W_DESC, Requires<[HasMSA]>; +def FMSUB_D : FMSUB_D_ENC, FMSUB_D_DESC, Requires<[HasMSA]>; + +def FMUL_W : FMUL_W_ENC, FMUL_W_DESC, Requires<[HasMSA]>; +def FMUL_D : FMUL_D_ENC, FMUL_D_DESC, Requires<[HasMSA]>; + +def FRINT_W : FRINT_W_ENC, FRINT_W_DESC, Requires<[HasMSA]>; +def FRINT_D : FRINT_D_ENC, FRINT_D_DESC, Requires<[HasMSA]>; + +def FRCP_W : FRCP_W_ENC, FRCP_W_DESC, Requires<[HasMSA]>; +def FRCP_D : FRCP_D_ENC, FRCP_D_DESC, Requires<[HasMSA]>; + +def FRSQRT_W : FRSQRT_W_ENC, FRSQRT_W_DESC, Requires<[HasMSA]>; +def FRSQRT_D : FRSQRT_D_ENC, FRSQRT_D_DESC, Requires<[HasMSA]>; + +def FSEQ_W : FSEQ_W_ENC, FSEQ_W_DESC, Requires<[HasMSA]>; +def FSEQ_D : FSEQ_D_ENC, FSEQ_D_DESC, Requires<[HasMSA]>; + +def FSLE_W : FSLE_W_ENC, FSLE_W_DESC, Requires<[HasMSA]>; +def FSLE_D : FSLE_D_ENC, FSLE_D_DESC, Requires<[HasMSA]>; + +def FSLT_W : FSLT_W_ENC, FSLT_W_DESC, Requires<[HasMSA]>; +def FSLT_D : FSLT_D_ENC, FSLT_D_DESC, Requires<[HasMSA]>; + +def FSGE_W : FSGE_W_ENC, FSGE_W_DESC, Requires<[HasMSA]>; +def FSGE_D : FSGE_D_ENC, FSGE_D_DESC, Requires<[HasMSA]>; + +def FSGT_W : FSGT_W_ENC, FSGT_W_DESC, Requires<[HasMSA]>; +def FSGT_D : FSGT_D_ENC, FSGT_D_DESC, Requires<[HasMSA]>; + +def FSNE_W : FSNE_W_ENC, FSNE_W_DESC, Requires<[HasMSA]>; +def FSNE_D : FSNE_D_ENC, FSNE_D_DESC, Requires<[HasMSA]>; + +def FSQRT_W : FSQRT_W_ENC, FSQRT_W_DESC, Requires<[HasMSA]>; +def FSQRT_D : FSQRT_D_ENC, FSQRT_D_DESC, Requires<[HasMSA]>; + +def FSUB_W : FSUB_W_ENC, FSUB_W_DESC, Requires<[HasMSA]>; +def FSUB_D : FSUB_D_ENC, FSUB_D_DESC, Requires<[HasMSA]>; + +def FTINT_S_W : FTINT_S_W_ENC, FTINT_S_W_DESC, Requires<[HasMSA]>; +def FTINT_S_D : FTINT_S_D_ENC, FTINT_S_D_DESC, Requires<[HasMSA]>; + +def FTINT_U_W : FTINT_U_W_ENC, FTINT_U_W_DESC, Requires<[HasMSA]>; +def FTINT_U_D : FTINT_U_D_ENC, FTINT_U_D_DESC, Requires<[HasMSA]>; + +def FTQ_H : FTQ_H_ENC, FTQ_H_DESC, Requires<[HasMSA]>; +def FTQ_W : FTQ_W_ENC, FTQ_W_DESC, Requires<[HasMSA]>; + def ILVEV_B : ILVEV_B_ENC, ILVEV_B_DESC, Requires<[HasMSA]>; def ILVEV_H : ILVEV_H_ENC, ILVEV_H_DESC, Requires<[HasMSA]>; def ILVEV_W : ILVEV_W_ENC, ILVEV_W_DESC, Requires<[HasMSA]>; @@ -1187,11 +1650,15 @@ def ST_D: ST_D_ENC, ST_D_DESC, Requires<[HasMSA]>; class MSAPat : Pat, Requires<[pred]>; +def LD_FH : MSAPat<(v8f16 (load addr:$addr)), + (LD_H addr:$addr)>; def LD_FW : MSAPat<(v4f32 (load addr:$addr)), (LD_W addr:$addr)>; def LD_FD : MSAPat<(v2f64 (load addr:$addr)), (LD_D addr:$addr)>; +def ST_FH : MSAPat<(store (v8f16 MSA128:$ws), addr:$addr), + (ST_H MSA128:$ws, addr:$addr)>; def ST_FW : MSAPat<(store (v4f32 MSA128:$ws), addr:$addr), (ST_W MSA128:$ws, addr:$addr)>; def ST_FD : MSAPat<(store (v2f64 MSA128:$ws), addr:$addr), -- cgit v1.1 From bd71eea899d579deb1fcee02944f955a4708091a Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Thu, 15 Aug 2013 14:22:07 +0000 Subject: [Mips][msa] Added the simple builtins (madd_q to xori) Includes: madd_q, maddr_q, maddv, max_[asu], maxi_[su], min_[asu], mini_[su], mod_[su], msub_q, msubr_q, msubv, mul_q, mulr_q, mulv, nloc, nlzc, nori, ori, pckev, pckod, pcnt, sat_[su], shf, sld, sldi, sll, slli, splat, splati, sr[al], sr[al]i, subs_[su], subss_u, subus_s, subv, subvi, vshf, xori Patch by Daniel Sanders git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188460 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 842 ++++++++++++++++++++++++++++++++++++ 1 file changed, 842 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 82db568..c03924f 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -405,11 +405,234 @@ class LDI_H_ENC : MSA_I10_FMT<0b010, 0b01, 0b001100>; class LDI_W_ENC : MSA_I10_FMT<0b010, 0b10, 0b001100>; class LDI_D_ENC : MSA_I10_FMT<0b010, 0b11, 0b001100>; +class MADD_Q_H_ENC : MSA_3RF_FMT<0b0010, 0b0, 0b011100>; +class MADD_Q_W_ENC : MSA_3RF_FMT<0b0010, 0b1, 0b011100>; + +class MADDR_Q_H_ENC : MSA_3RF_FMT<0b0011, 0b0, 0b011100>; +class MADDR_Q_W_ENC : MSA_3RF_FMT<0b0011, 0b1, 0b011100>; + +class MADDV_B_ENC : MSA_3R_FMT<0b001, 0b00, 0b010010>; +class MADDV_H_ENC : MSA_3R_FMT<0b001, 0b01, 0b010010>; +class MADDV_W_ENC : MSA_3R_FMT<0b001, 0b10, 0b010010>; +class MADDV_D_ENC : MSA_3R_FMT<0b001, 0b11, 0b010010>; + +class MAX_A_B_ENC : MSA_3R_FMT<0b110, 0b00, 0b001110>; +class MAX_A_H_ENC : MSA_3R_FMT<0b110, 0b01, 0b001110>; +class MAX_A_W_ENC : MSA_3R_FMT<0b110, 0b10, 0b001110>; +class MAX_A_D_ENC : MSA_3R_FMT<0b110, 0b11, 0b001110>; + +class MAX_S_B_ENC : MSA_3R_FMT<0b010, 0b00, 0b001110>; +class MAX_S_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b001110>; +class MAX_S_W_ENC : MSA_3R_FMT<0b010, 0b10, 0b001110>; +class MAX_S_D_ENC : MSA_3R_FMT<0b010, 0b11, 0b001110>; + +class MAX_U_B_ENC : MSA_3R_FMT<0b011, 0b00, 0b001110>; +class MAX_U_H_ENC : MSA_3R_FMT<0b011, 0b01, 0b001110>; +class MAX_U_W_ENC : MSA_3R_FMT<0b011, 0b10, 0b001110>; +class MAX_U_D_ENC : MSA_3R_FMT<0b011, 0b11, 0b001110>; + +class MAXI_S_B_ENC : MSA_I5_FMT<0b010, 0b00, 0b000110>; +class MAXI_S_H_ENC : MSA_I5_FMT<0b010, 0b01, 0b000110>; +class MAXI_S_W_ENC : MSA_I5_FMT<0b010, 0b10, 0b000110>; +class MAXI_S_D_ENC : MSA_I5_FMT<0b010, 0b11, 0b000110>; + +class MAXI_U_B_ENC : MSA_I5_FMT<0b011, 0b00, 0b000110>; +class MAXI_U_H_ENC : MSA_I5_FMT<0b011, 0b01, 0b000110>; +class MAXI_U_W_ENC : MSA_I5_FMT<0b011, 0b10, 0b000110>; +class MAXI_U_D_ENC : MSA_I5_FMT<0b011, 0b11, 0b000110>; + +class MIN_A_B_ENC : MSA_3R_FMT<0b111, 0b00, 0b001110>; +class MIN_A_H_ENC : MSA_3R_FMT<0b111, 0b01, 0b001110>; +class MIN_A_W_ENC : MSA_3R_FMT<0b111, 0b10, 0b001110>; +class MIN_A_D_ENC : MSA_3R_FMT<0b111, 0b11, 0b001110>; + +class MIN_S_B_ENC : MSA_3R_FMT<0b100, 0b00, 0b001110>; +class MIN_S_H_ENC : MSA_3R_FMT<0b100, 0b01, 0b001110>; +class MIN_S_W_ENC : MSA_3R_FMT<0b100, 0b10, 0b001110>; +class MIN_S_D_ENC : MSA_3R_FMT<0b100, 0b11, 0b001110>; + +class MIN_U_B_ENC : MSA_3R_FMT<0b101, 0b00, 0b001110>; +class MIN_U_H_ENC : MSA_3R_FMT<0b101, 0b01, 0b001110>; +class MIN_U_W_ENC : MSA_3R_FMT<0b101, 0b10, 0b001110>; +class MIN_U_D_ENC : MSA_3R_FMT<0b101, 0b11, 0b001110>; + +class MINI_S_B_ENC : MSA_I5_FMT<0b100, 0b00, 0b000110>; +class MINI_S_H_ENC : MSA_I5_FMT<0b100, 0b01, 0b000110>; +class MINI_S_W_ENC : MSA_I5_FMT<0b100, 0b10, 0b000110>; +class MINI_S_D_ENC : MSA_I5_FMT<0b100, 0b11, 0b000110>; + +class MINI_U_B_ENC : MSA_I5_FMT<0b101, 0b00, 0b000110>; +class MINI_U_H_ENC : MSA_I5_FMT<0b101, 0b01, 0b000110>; +class MINI_U_W_ENC : MSA_I5_FMT<0b101, 0b10, 0b000110>; +class MINI_U_D_ENC : MSA_I5_FMT<0b101, 0b11, 0b000110>; + +class MOD_S_B_ENC : MSA_3R_FMT<0b110, 0b00, 0b010010>; +class MOD_S_H_ENC : MSA_3R_FMT<0b110, 0b01, 0b010010>; +class MOD_S_W_ENC : MSA_3R_FMT<0b110, 0b10, 0b010010>; +class MOD_S_D_ENC : MSA_3R_FMT<0b110, 0b11, 0b010010>; + +class MOD_U_B_ENC : MSA_3R_FMT<0b111, 0b00, 0b010010>; +class MOD_U_H_ENC : MSA_3R_FMT<0b111, 0b01, 0b010010>; +class MOD_U_W_ENC : MSA_3R_FMT<0b111, 0b10, 0b010010>; +class MOD_U_D_ENC : MSA_3R_FMT<0b111, 0b11, 0b010010>; + +class MSUB_Q_H_ENC : MSA_3RF_FMT<0b0100, 0b0, 0b011100>; +class MSUB_Q_W_ENC : MSA_3RF_FMT<0b0100, 0b1, 0b011100>; + +class MSUBR_Q_H_ENC : MSA_3RF_FMT<0b0101, 0b0, 0b011100>; +class MSUBR_Q_W_ENC : MSA_3RF_FMT<0b0101, 0b1, 0b011100>; + +class MSUBV_B_ENC : MSA_3R_FMT<0b010, 0b00, 0b010010>; +class MSUBV_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b010010>; +class MSUBV_W_ENC : MSA_3R_FMT<0b010, 0b10, 0b010010>; +class MSUBV_D_ENC : MSA_3R_FMT<0b010, 0b11, 0b010010>; + +class MUL_Q_H_ENC : MSA_3RF_FMT<0b0000, 0b0, 0b011100>; +class MUL_Q_W_ENC : MSA_3RF_FMT<0b0000, 0b1, 0b011100>; + +class MULR_Q_H_ENC : MSA_3RF_FMT<0b0001, 0b0, 0b011100>; +class MULR_Q_W_ENC : MSA_3RF_FMT<0b0001, 0b1, 0b011100>; + +class MULV_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b010010>; +class MULV_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b010010>; +class MULV_W_ENC : MSA_3R_FMT<0b000, 0b10, 0b010010>; +class MULV_D_ENC : MSA_3R_FMT<0b000, 0b11, 0b010010>; + +class NLOC_B_ENC : MSA_2R_FMT<0b11000010, 0b00, 0b011110>; +class NLOC_H_ENC : MSA_2R_FMT<0b11000010, 0b01, 0b011110>; +class NLOC_W_ENC : MSA_2R_FMT<0b11000010, 0b10, 0b011110>; +class NLOC_D_ENC : MSA_2R_FMT<0b11000010, 0b11, 0b011110>; + +class NLZC_B_ENC : MSA_2R_FMT<0b11000011, 0b00, 0b011110>; +class NLZC_H_ENC : MSA_2R_FMT<0b11000011, 0b01, 0b011110>; +class NLZC_W_ENC : MSA_2R_FMT<0b11000011, 0b10, 0b011110>; +class NLZC_D_ENC : MSA_2R_FMT<0b11000011, 0b11, 0b011110>; + +class NORI_B_ENC : MSA_I8_FMT<0b10, 0b000000>; + +class ORI_B_ENC : MSA_I8_FMT<0b01, 0b000000>; + +class PCKEV_B_ENC : MSA_3R_FMT<0b010, 0b00, 0b010100>; +class PCKEV_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b010100>; +class PCKEV_W_ENC : MSA_3R_FMT<0b010, 0b10, 0b010100>; +class PCKEV_D_ENC : MSA_3R_FMT<0b010, 0b11, 0b010100>; + +class PCKOD_B_ENC : MSA_3R_FMT<0b011, 0b00, 0b010100>; +class PCKOD_H_ENC : MSA_3R_FMT<0b011, 0b01, 0b010100>; +class PCKOD_W_ENC : MSA_3R_FMT<0b011, 0b10, 0b010100>; +class PCKOD_D_ENC : MSA_3R_FMT<0b011, 0b11, 0b010100>; + +class PCNT_B_ENC : MSA_2R_FMT<0b11000001, 0b00, 0b011110>; +class PCNT_H_ENC : MSA_2R_FMT<0b11000001, 0b01, 0b011110>; +class PCNT_W_ENC : MSA_2R_FMT<0b11000001, 0b10, 0b011110>; +class PCNT_D_ENC : MSA_2R_FMT<0b11000001, 0b11, 0b011110>; + +class SAT_S_B_ENC : MSA_BIT_B_FMT<0b000, 0b001010>; +class SAT_S_H_ENC : MSA_BIT_H_FMT<0b000, 0b001010>; +class SAT_S_W_ENC : MSA_BIT_W_FMT<0b000, 0b001010>; +class SAT_S_D_ENC : MSA_BIT_D_FMT<0b000, 0b001010>; + +class SAT_U_B_ENC : MSA_BIT_B_FMT<0b001, 0b001010>; +class SAT_U_H_ENC : MSA_BIT_H_FMT<0b001, 0b001010>; +class SAT_U_W_ENC : MSA_BIT_W_FMT<0b001, 0b001010>; +class SAT_U_D_ENC : MSA_BIT_D_FMT<0b001, 0b001010>; + +class SHF_B_ENC : MSA_I8_FMT<0b00, 0b000010>; +class SHF_H_ENC : MSA_I8_FMT<0b01, 0b000010>; +class SHF_W_ENC : MSA_I8_FMT<0b10, 0b000010>; + +class SLD_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b010100>; +class SLD_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b010100>; +class SLD_W_ENC : MSA_3R_FMT<0b000, 0b10, 0b010100>; +class SLD_D_ENC : MSA_3R_FMT<0b000, 0b11, 0b010100>; + +class SLDI_B_ENC : MSA_ELM_B_FMT<0b0000, 0b011001>; +class SLDI_H_ENC : MSA_ELM_H_FMT<0b0000, 0b011001>; +class SLDI_W_ENC : MSA_ELM_W_FMT<0b0000, 0b011001>; +class SLDI_D_ENC : MSA_ELM_D_FMT<0b0000, 0b011001>; + +class SLL_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b001101>; +class SLL_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b001101>; +class SLL_W_ENC : MSA_3R_FMT<0b000, 0b10, 0b001101>; +class SLL_D_ENC : MSA_3R_FMT<0b000, 0b11, 0b001101>; + +class SLLI_B_ENC : MSA_BIT_B_FMT<0b000, 0b001001>; +class SLLI_H_ENC : MSA_BIT_H_FMT<0b000, 0b001001>; +class SLLI_W_ENC : MSA_BIT_W_FMT<0b000, 0b001001>; +class SLLI_D_ENC : MSA_BIT_D_FMT<0b000, 0b001001>; + +class SPLAT_B_ENC : MSA_3R_FMT<0b001, 0b00, 0b010100>; +class SPLAT_H_ENC : MSA_3R_FMT<0b001, 0b01, 0b010100>; +class SPLAT_W_ENC : MSA_3R_FMT<0b001, 0b10, 0b010100>; +class SPLAT_D_ENC : MSA_3R_FMT<0b001, 0b11, 0b010100>; + +class SPLATI_B_ENC : MSA_ELM_B_FMT<0b0001, 0b011001>; +class SPLATI_H_ENC : MSA_ELM_H_FMT<0b0001, 0b011001>; +class SPLATI_W_ENC : MSA_ELM_W_FMT<0b0001, 0b011001>; +class SPLATI_D_ENC : MSA_ELM_D_FMT<0b0001, 0b011001>; + +class SRA_B_ENC : MSA_3R_FMT<0b001, 0b00, 0b001101>; +class SRA_H_ENC : MSA_3R_FMT<0b001, 0b01, 0b001101>; +class SRA_W_ENC : MSA_3R_FMT<0b001, 0b10, 0b001101>; +class SRA_D_ENC : MSA_3R_FMT<0b001, 0b11, 0b001101>; + +class SRAI_B_ENC : MSA_BIT_B_FMT<0b001, 0b001001>; +class SRAI_H_ENC : MSA_BIT_H_FMT<0b001, 0b001001>; +class SRAI_W_ENC : MSA_BIT_W_FMT<0b001, 0b001001>; +class SRAI_D_ENC : MSA_BIT_D_FMT<0b001, 0b001001>; + +class SRL_B_ENC : MSA_3R_FMT<0b010, 0b00, 0b001101>; +class SRL_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b001101>; +class SRL_W_ENC : MSA_3R_FMT<0b010, 0b10, 0b001101>; +class SRL_D_ENC : MSA_3R_FMT<0b010, 0b11, 0b001101>; + +class SRLI_B_ENC : MSA_BIT_B_FMT<0b010, 0b001001>; +class SRLI_H_ENC : MSA_BIT_H_FMT<0b010, 0b001001>; +class SRLI_W_ENC : MSA_BIT_W_FMT<0b010, 0b001001>; +class SRLI_D_ENC : MSA_BIT_D_FMT<0b010, 0b001001>; + class ST_B_ENC : MSA_I5_FMT<0b111, 0b00, 0b000111>; class ST_H_ENC : MSA_I5_FMT<0b111, 0b01, 0b000111>; class ST_W_ENC : MSA_I5_FMT<0b111, 0b10, 0b000111>; class ST_D_ENC : MSA_I5_FMT<0b111, 0b11, 0b000111>; +class SUBS_S_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b010001>; +class SUBS_S_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b010001>; +class SUBS_S_W_ENC : MSA_3R_FMT<0b000, 0b10, 0b010001>; +class SUBS_S_D_ENC : MSA_3R_FMT<0b000, 0b11, 0b010001>; + +class SUBS_U_B_ENC : MSA_3R_FMT<0b001, 0b00, 0b010001>; +class SUBS_U_H_ENC : MSA_3R_FMT<0b001, 0b01, 0b010001>; +class SUBS_U_W_ENC : MSA_3R_FMT<0b001, 0b10, 0b010001>; +class SUBS_U_D_ENC : MSA_3R_FMT<0b001, 0b11, 0b010001>; + +class SUBSUS_U_B_ENC : MSA_3R_FMT<0b011, 0b00, 0b010001>; +class SUBSUS_U_H_ENC : MSA_3R_FMT<0b011, 0b01, 0b010001>; +class SUBSUS_U_W_ENC : MSA_3R_FMT<0b011, 0b10, 0b010001>; +class SUBSUS_U_D_ENC : MSA_3R_FMT<0b011, 0b11, 0b010001>; + +class SUBSUU_S_B_ENC : MSA_3R_FMT<0b010, 0b00, 0b010001>; +class SUBSUU_S_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b010001>; +class SUBSUU_S_W_ENC : MSA_3R_FMT<0b010, 0b10, 0b010001>; +class SUBSUU_S_D_ENC : MSA_3R_FMT<0b010, 0b11, 0b010001>; + +class SUBV_B_ENC : MSA_3R_FMT<0b001, 0b00, 0b001110>; +class SUBV_H_ENC : MSA_3R_FMT<0b001, 0b01, 0b001110>; +class SUBV_W_ENC : MSA_3R_FMT<0b001, 0b10, 0b001110>; +class SUBV_D_ENC : MSA_3R_FMT<0b001, 0b11, 0b001110>; + +class SUBVI_B_ENC : MSA_I5_FMT<0b001, 0b00, 0b000110>; +class SUBVI_H_ENC : MSA_I5_FMT<0b001, 0b01, 0b000110>; +class SUBVI_W_ENC : MSA_I5_FMT<0b001, 0b10, 0b000110>; +class SUBVI_D_ENC : MSA_I5_FMT<0b001, 0b11, 0b000110>; + +class VSHF_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b010101>; +class VSHF_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b010101>; +class VSHF_W_ENC : MSA_3R_FMT<0b000, 0b10, 0b010101>; +class VSHF_D_ENC : MSA_3R_FMT<0b000, 0b11, 0b010101>; + +class XORI_B_ENC : MSA_I8_FMT<0b11, 0b000000>; + // Instruction desc. class MSA_BIT_D_DESC_BASE; +class MADD_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"madd_q.h", int_mips_madd_q_h, + NoItinerary, MSA128, MSA128>; +class MADD_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"madd_q.w", int_mips_madd_q_w, + NoItinerary, MSA128, MSA128>; + +class MADDR_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"maddr_q.h", int_mips_maddr_q_h, + NoItinerary, MSA128, MSA128>; +class MADDR_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"maddr_q.w", int_mips_maddr_q_w, + NoItinerary, MSA128, MSA128>; + +class MADDV_B_DESC : MSA_3R_4R_DESC_BASE<"maddv.b", int_mips_maddv_b, + NoItinerary, MSA128, MSA128>; +class MADDV_H_DESC : MSA_3R_4R_DESC_BASE<"maddv.h", int_mips_maddv_h, + NoItinerary, MSA128, MSA128>; +class MADDV_W_DESC : MSA_3R_4R_DESC_BASE<"maddv.w", int_mips_maddv_w, + NoItinerary, MSA128, MSA128>; +class MADDV_D_DESC : MSA_3R_4R_DESC_BASE<"maddv.d", int_mips_maddv_d, + NoItinerary, MSA128, MSA128>; + +class MAX_A_B_DESC : MSA_3R_DESC_BASE<"max_a.b", int_mips_max_a_b, NoItinerary, + MSA128, MSA128>; +class MAX_A_H_DESC : MSA_3R_DESC_BASE<"max_a.h", int_mips_max_a_h, NoItinerary, + MSA128, MSA128>; +class MAX_A_W_DESC : MSA_3R_DESC_BASE<"max_a.w", int_mips_max_a_w, NoItinerary, + MSA128, MSA128>; +class MAX_A_D_DESC : MSA_3R_DESC_BASE<"max_a.d", int_mips_max_a_d, NoItinerary, + MSA128, MSA128>; + +class MAX_S_B_DESC : MSA_3R_DESC_BASE<"max_s.b", int_mips_max_s_b, NoItinerary, + MSA128, MSA128>; +class MAX_S_H_DESC : MSA_3R_DESC_BASE<"max_s.h", int_mips_max_s_h, NoItinerary, + MSA128, MSA128>; +class MAX_S_W_DESC : MSA_3R_DESC_BASE<"max_s.w", int_mips_max_s_w, NoItinerary, + MSA128, MSA128>; +class MAX_S_D_DESC : MSA_3R_DESC_BASE<"max_s.d", int_mips_max_s_d, NoItinerary, + MSA128, MSA128>; + +class MAX_U_B_DESC : MSA_3R_DESC_BASE<"max_u.b", int_mips_max_u_b, NoItinerary, + MSA128, MSA128>; +class MAX_U_H_DESC : MSA_3R_DESC_BASE<"max_u.h", int_mips_max_u_h, NoItinerary, + MSA128, MSA128>; +class MAX_U_W_DESC : MSA_3R_DESC_BASE<"max_u.w", int_mips_max_u_w, NoItinerary, + MSA128, MSA128>; +class MAX_U_D_DESC : MSA_3R_DESC_BASE<"max_u.d", int_mips_max_u_d, NoItinerary, + MSA128, MSA128>; + +class MAXI_S_B_DESC : MSA_I5_DESC_BASE<"maxi_s.b", int_mips_maxi_s_b, + NoItinerary, MSA128, MSA128>; +class MAXI_S_H_DESC : MSA_I5_DESC_BASE<"maxi_s.h", int_mips_maxi_s_h, + NoItinerary, MSA128, MSA128>; +class MAXI_S_W_DESC : MSA_I5_DESC_BASE<"maxi_s.w", int_mips_maxi_s_w, + NoItinerary, MSA128, MSA128>; +class MAXI_S_D_DESC : MSA_I5_DESC_BASE<"maxi_s.d", int_mips_maxi_s_d, + NoItinerary, MSA128, MSA128>; + +class MAXI_U_B_DESC : MSA_I5_DESC_BASE<"maxi_u.b", int_mips_maxi_u_b, + NoItinerary, MSA128, MSA128>; +class MAXI_U_H_DESC : MSA_I5_DESC_BASE<"maxi_u.h", int_mips_maxi_u_h, + NoItinerary, MSA128, MSA128>; +class MAXI_U_W_DESC : MSA_I5_DESC_BASE<"maxi_u.w", int_mips_maxi_u_w, + NoItinerary, MSA128, MSA128>; +class MAXI_U_D_DESC : MSA_I5_DESC_BASE<"maxi_u.d", int_mips_maxi_u_d, + NoItinerary, MSA128, MSA128>; + +class MIN_A_B_DESC : MSA_3R_DESC_BASE<"min_a.b", int_mips_min_a_b, NoItinerary, + MSA128, MSA128>; +class MIN_A_H_DESC : MSA_3R_DESC_BASE<"min_a.h", int_mips_min_a_h, NoItinerary, + MSA128, MSA128>; +class MIN_A_W_DESC : MSA_3R_DESC_BASE<"min_a.w", int_mips_min_a_w, NoItinerary, + MSA128, MSA128>; +class MIN_A_D_DESC : MSA_3R_DESC_BASE<"min_a.d", int_mips_min_a_d, NoItinerary, + MSA128, MSA128>; + +class MIN_S_B_DESC : MSA_3R_DESC_BASE<"min_s.b", int_mips_min_s_b, NoItinerary, + MSA128, MSA128>; +class MIN_S_H_DESC : MSA_3R_DESC_BASE<"min_s.h", int_mips_min_s_h, NoItinerary, + MSA128, MSA128>; +class MIN_S_W_DESC : MSA_3R_DESC_BASE<"min_s.w", int_mips_min_s_w, NoItinerary, + MSA128, MSA128>; +class MIN_S_D_DESC : MSA_3R_DESC_BASE<"min_s.d", int_mips_min_s_d, NoItinerary, + MSA128, MSA128>; + +class MIN_U_B_DESC : MSA_3R_DESC_BASE<"min_u.b", int_mips_min_u_b, NoItinerary, + MSA128, MSA128>; +class MIN_U_H_DESC : MSA_3R_DESC_BASE<"min_u.h", int_mips_min_u_h, NoItinerary, + MSA128, MSA128>; +class MIN_U_W_DESC : MSA_3R_DESC_BASE<"min_u.w", int_mips_min_u_w, NoItinerary, + MSA128, MSA128>; +class MIN_U_D_DESC : MSA_3R_DESC_BASE<"min_u.d", int_mips_min_u_d, NoItinerary, + MSA128, MSA128>; + +class MINI_S_B_DESC : MSA_I5_DESC_BASE<"mini_s.b", int_mips_mini_s_b, + NoItinerary, MSA128, MSA128>; +class MINI_S_H_DESC : MSA_I5_DESC_BASE<"mini_s.h", int_mips_mini_s_h, + NoItinerary, MSA128, MSA128>; +class MINI_S_W_DESC : MSA_I5_DESC_BASE<"mini_s.w", int_mips_mini_s_w, + NoItinerary, MSA128, MSA128>; +class MINI_S_D_DESC : MSA_I5_DESC_BASE<"mini_s.d", int_mips_mini_s_d, + NoItinerary, MSA128, MSA128>; + +class MINI_U_B_DESC : MSA_I5_DESC_BASE<"mini_u.b", int_mips_mini_u_b, + NoItinerary, MSA128, MSA128>; +class MINI_U_H_DESC : MSA_I5_DESC_BASE<"mini_u.h", int_mips_mini_u_h, + NoItinerary, MSA128, MSA128>; +class MINI_U_W_DESC : MSA_I5_DESC_BASE<"mini_u.w", int_mips_mini_u_w, + NoItinerary, MSA128, MSA128>; +class MINI_U_D_DESC : MSA_I5_DESC_BASE<"mini_u.d", int_mips_mini_u_d, + NoItinerary, MSA128, MSA128>; + +class MOD_S_B_DESC : MSA_3R_DESC_BASE<"mod_s.b", int_mips_mod_s_b, NoItinerary, + MSA128, MSA128>; +class MOD_S_H_DESC : MSA_3R_DESC_BASE<"mod_s.h", int_mips_mod_s_h, NoItinerary, + MSA128, MSA128>; +class MOD_S_W_DESC : MSA_3R_DESC_BASE<"mod_s.w", int_mips_mod_s_w, NoItinerary, + MSA128, MSA128>; +class MOD_S_D_DESC : MSA_3R_DESC_BASE<"mod_s.d", int_mips_mod_s_d, NoItinerary, + MSA128, MSA128>; + +class MOD_U_B_DESC : MSA_3R_DESC_BASE<"mod_u.b", int_mips_mod_u_b, NoItinerary, + MSA128, MSA128>; +class MOD_U_H_DESC : MSA_3R_DESC_BASE<"mod_u.h", int_mips_mod_u_h, NoItinerary, + MSA128, MSA128>; +class MOD_U_W_DESC : MSA_3R_DESC_BASE<"mod_u.w", int_mips_mod_u_w, NoItinerary, + MSA128, MSA128>; +class MOD_U_D_DESC : MSA_3R_DESC_BASE<"mod_u.d", int_mips_mod_u_d, NoItinerary, + MSA128, MSA128>; + +class MSUB_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"msub_q.h", int_mips_msub_q_h, + NoItinerary, MSA128, MSA128>; +class MSUB_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"msub_q.w", int_mips_msub_q_w, + NoItinerary, MSA128, MSA128>; + +class MSUBR_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"msubr_q.h", int_mips_msubr_q_h, + NoItinerary, MSA128, MSA128>; +class MSUBR_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"msubr_q.w", int_mips_msubr_q_w, + NoItinerary, MSA128, MSA128>; + +class MSUBV_B_DESC : MSA_3R_4R_DESC_BASE<"msubv.b", int_mips_msubv_b, + NoItinerary, MSA128, MSA128>; +class MSUBV_H_DESC : MSA_3R_4R_DESC_BASE<"msubv.h", int_mips_msubv_h, + NoItinerary, MSA128, MSA128>; +class MSUBV_W_DESC : MSA_3R_4R_DESC_BASE<"msubv.w", int_mips_msubv_w, + NoItinerary, MSA128, MSA128>; +class MSUBV_D_DESC : MSA_3R_4R_DESC_BASE<"msubv.d", int_mips_msubv_d, + NoItinerary, MSA128, MSA128>; + +class MUL_Q_H_DESC : MSA_3RF_DESC_BASE<"mul_q.h", int_mips_mul_q_h, + NoItinerary, MSA128, MSA128>; +class MUL_Q_W_DESC : MSA_3RF_DESC_BASE<"mul_q.w", int_mips_mul_q_w, + NoItinerary, MSA128, MSA128>; + +class MULR_Q_H_DESC : MSA_3RF_DESC_BASE<"mulr_q.h", int_mips_mulr_q_h, + NoItinerary, MSA128, MSA128>; +class MULR_Q_W_DESC : MSA_3RF_DESC_BASE<"mulr_q.w", int_mips_mulr_q_w, + NoItinerary, MSA128, MSA128>; + +class MULV_B_DESC : MSA_3R_DESC_BASE<"mulv.b", int_mips_mulv_b, + NoItinerary, MSA128, MSA128>; +class MULV_H_DESC : MSA_3R_DESC_BASE<"mulv.h", int_mips_mulv_h, + NoItinerary, MSA128, MSA128>; +class MULV_W_DESC : MSA_3R_DESC_BASE<"mulv.w", int_mips_mulv_w, + NoItinerary, MSA128, MSA128>; +class MULV_D_DESC : MSA_3R_DESC_BASE<"mulv.d", int_mips_mulv_d, + NoItinerary, MSA128, MSA128>; + +class NLOC_B_DESC : MSA_2R_DESC_BASE<"nloc.b", int_mips_nloc_b, + NoItinerary, MSA128, MSA128>; +class NLOC_H_DESC : MSA_2R_DESC_BASE<"nloc.h", int_mips_nloc_h, + NoItinerary, MSA128, MSA128>; +class NLOC_W_DESC : MSA_2R_DESC_BASE<"nloc.w", int_mips_nloc_w, + NoItinerary, MSA128, MSA128>; +class NLOC_D_DESC : MSA_2R_DESC_BASE<"nloc.d", int_mips_nloc_d, + NoItinerary, MSA128, MSA128>; + +class NLZC_B_DESC : MSA_2R_DESC_BASE<"nlzc.b", int_mips_nlzc_b, + NoItinerary, MSA128, MSA128>; +class NLZC_H_DESC : MSA_2R_DESC_BASE<"nlzc.h", int_mips_nlzc_h, + NoItinerary, MSA128, MSA128>; +class NLZC_W_DESC : MSA_2R_DESC_BASE<"nlzc.w", int_mips_nlzc_w, + NoItinerary, MSA128, MSA128>; +class NLZC_D_DESC : MSA_2R_DESC_BASE<"nlzc.d", int_mips_nlzc_d, + NoItinerary, MSA128, MSA128>; + +class NORI_B_DESC : MSA_I8_DESC_BASE<"nori.b", int_mips_nori_b, NoItinerary, + MSA128, MSA128>; + +class ORI_B_DESC : MSA_I8_DESC_BASE<"ori.b", int_mips_ori_b, NoItinerary, + MSA128, MSA128>; + +class PCKEV_B_DESC : MSA_3R_DESC_BASE<"pckev.b", int_mips_pckev_b, NoItinerary, + MSA128, MSA128>; +class PCKEV_H_DESC : MSA_3R_DESC_BASE<"pckev.h", int_mips_pckev_h, NoItinerary, + MSA128, MSA128>; +class PCKEV_W_DESC : MSA_3R_DESC_BASE<"pckev.w", int_mips_pckev_w, NoItinerary, + MSA128, MSA128>; +class PCKEV_D_DESC : MSA_3R_DESC_BASE<"pckev.d", int_mips_pckev_d, NoItinerary, + MSA128, MSA128>; + +class PCKOD_B_DESC : MSA_3R_DESC_BASE<"pckod.b", int_mips_pckod_b, NoItinerary, + MSA128, MSA128>; +class PCKOD_H_DESC : MSA_3R_DESC_BASE<"pckod.h", int_mips_pckod_h, NoItinerary, + MSA128, MSA128>; +class PCKOD_W_DESC : MSA_3R_DESC_BASE<"pckod.w", int_mips_pckod_w, NoItinerary, + MSA128, MSA128>; +class PCKOD_D_DESC : MSA_3R_DESC_BASE<"pckod.d", int_mips_pckod_d, NoItinerary, + MSA128, MSA128>; + +class PCNT_B_DESC : MSA_2R_DESC_BASE<"pcnt.b", int_mips_pcnt_b, + NoItinerary, MSA128, MSA128>; +class PCNT_H_DESC : MSA_2R_DESC_BASE<"pcnt.h", int_mips_pcnt_h, + NoItinerary, MSA128, MSA128>; +class PCNT_W_DESC : MSA_2R_DESC_BASE<"pcnt.w", int_mips_pcnt_w, + NoItinerary, MSA128, MSA128>; +class PCNT_D_DESC : MSA_2R_DESC_BASE<"pcnt.d", int_mips_pcnt_d, + NoItinerary, MSA128, MSA128>; + +class SAT_S_B_DESC : MSA_BIT_B_DESC_BASE<"sat_s.b", int_mips_sat_s_b, + NoItinerary, MSA128, MSA128>; +class SAT_S_H_DESC : MSA_BIT_H_DESC_BASE<"sat_s.h", int_mips_sat_s_h, + NoItinerary, MSA128, MSA128>; +class SAT_S_W_DESC : MSA_BIT_W_DESC_BASE<"sat_s.w", int_mips_sat_s_w, + NoItinerary, MSA128, MSA128>; +class SAT_S_D_DESC : MSA_BIT_D_DESC_BASE<"sat_s.d", int_mips_sat_s_d, + NoItinerary, MSA128, MSA128>; + +class SAT_U_B_DESC : MSA_BIT_B_DESC_BASE<"sat_u.b", int_mips_sat_u_b, + NoItinerary, MSA128, MSA128>; +class SAT_U_H_DESC : MSA_BIT_H_DESC_BASE<"sat_u.h", int_mips_sat_u_h, + NoItinerary, MSA128, MSA128>; +class SAT_U_W_DESC : MSA_BIT_W_DESC_BASE<"sat_u.w", int_mips_sat_u_w, + NoItinerary, MSA128, MSA128>; +class SAT_U_D_DESC : MSA_BIT_D_DESC_BASE<"sat_u.d", int_mips_sat_u_d, + NoItinerary, MSA128, MSA128>; + +class SHF_B_DESC : MSA_I8_DESC_BASE<"shf.b", int_mips_shf_b, NoItinerary, + MSA128, MSA128>; +class SHF_H_DESC : MSA_I8_DESC_BASE<"shf.h", int_mips_shf_h, NoItinerary, + MSA128, MSA128>; +class SHF_W_DESC : MSA_I8_DESC_BASE<"shf.w", int_mips_shf_w, NoItinerary, + MSA128, MSA128>; + +class SLD_B_DESC : MSA_3R_DESC_BASE<"sld.b", int_mips_sld_b, NoItinerary, + MSA128, MSA128>; +class SLD_H_DESC : MSA_3R_DESC_BASE<"sld.h", int_mips_sld_h, NoItinerary, + MSA128, MSA128>; +class SLD_W_DESC : MSA_3R_DESC_BASE<"sld.w", int_mips_sld_w, NoItinerary, + MSA128, MSA128>; +class SLD_D_DESC : MSA_3R_DESC_BASE<"sld.d", int_mips_sld_d, NoItinerary, + MSA128, MSA128>; + +class SLDI_B_DESC : MSA_BIT_B_DESC_BASE<"sldi.b", int_mips_sldi_b, + NoItinerary, MSA128, MSA128>; +class SLDI_H_DESC : MSA_BIT_H_DESC_BASE<"sldi.h", int_mips_sldi_h, + NoItinerary, MSA128, MSA128>; +class SLDI_W_DESC : MSA_BIT_W_DESC_BASE<"sldi.w", int_mips_sldi_w, + NoItinerary, MSA128, MSA128>; +class SLDI_D_DESC : MSA_BIT_D_DESC_BASE<"sldi.d", int_mips_sldi_d, + NoItinerary, MSA128, MSA128>; + +class SLL_B_DESC : MSA_3R_DESC_BASE<"sll.b", int_mips_sll_b, NoItinerary, + MSA128, MSA128>; +class SLL_H_DESC : MSA_3R_DESC_BASE<"sll.h", int_mips_sll_h, NoItinerary, + MSA128, MSA128>; +class SLL_W_DESC : MSA_3R_DESC_BASE<"sll.w", int_mips_sll_w, NoItinerary, + MSA128, MSA128>; +class SLL_D_DESC : MSA_3R_DESC_BASE<"sll.d", int_mips_sll_d, NoItinerary, + MSA128, MSA128>; + +class SLLI_B_DESC : MSA_BIT_B_DESC_BASE<"slli.b", int_mips_slli_b, + NoItinerary, MSA128, MSA128>; +class SLLI_H_DESC : MSA_BIT_H_DESC_BASE<"slli.h", int_mips_slli_h, + NoItinerary, MSA128, MSA128>; +class SLLI_W_DESC : MSA_BIT_W_DESC_BASE<"slli.w", int_mips_slli_w, + NoItinerary, MSA128, MSA128>; +class SLLI_D_DESC : MSA_BIT_D_DESC_BASE<"slli.d", int_mips_slli_d, + NoItinerary, MSA128, MSA128>; + +class SPLAT_B_DESC : MSA_3R_DESC_BASE<"splat.b", int_mips_splat_b, NoItinerary, + MSA128, MSA128, GPR32>; +class SPLAT_H_DESC : MSA_3R_DESC_BASE<"splat.h", int_mips_splat_h, NoItinerary, + MSA128, MSA128, GPR32>; +class SPLAT_W_DESC : MSA_3R_DESC_BASE<"splat.w", int_mips_splat_w, NoItinerary, + MSA128, MSA128, GPR32>; +class SPLAT_D_DESC : MSA_3R_DESC_BASE<"splat.d", int_mips_splat_d, NoItinerary, + MSA128, MSA128, GPR32>; + +class SPLATI_B_DESC : MSA_BIT_B_DESC_BASE<"splati.b", int_mips_splati_b, + NoItinerary, MSA128, MSA128>; +class SPLATI_H_DESC : MSA_BIT_H_DESC_BASE<"splati.h", int_mips_splati_h, + NoItinerary, MSA128, MSA128>; +class SPLATI_W_DESC : MSA_BIT_W_DESC_BASE<"splati.w", int_mips_splati_w, + NoItinerary, MSA128, MSA128>; +class SPLATI_D_DESC : MSA_BIT_D_DESC_BASE<"splati.d", int_mips_splati_d, + NoItinerary, MSA128, MSA128>; + +class SRA_B_DESC : MSA_3R_DESC_BASE<"sra.b", int_mips_sra_b, NoItinerary, + MSA128, MSA128>; +class SRA_H_DESC : MSA_3R_DESC_BASE<"sra.h", int_mips_sra_h, NoItinerary, + MSA128, MSA128>; +class SRA_W_DESC : MSA_3R_DESC_BASE<"sra.w", int_mips_sra_w, NoItinerary, + MSA128, MSA128>; +class SRA_D_DESC : MSA_3R_DESC_BASE<"sra.d", int_mips_sra_d, NoItinerary, + MSA128, MSA128>; + +class SRAI_B_DESC : MSA_BIT_B_DESC_BASE<"srai.b", int_mips_srai_b, + NoItinerary, MSA128, MSA128>; +class SRAI_H_DESC : MSA_BIT_H_DESC_BASE<"srai.h", int_mips_srai_h, + NoItinerary, MSA128, MSA128>; +class SRAI_W_DESC : MSA_BIT_W_DESC_BASE<"srai.w", int_mips_srai_w, + NoItinerary, MSA128, MSA128>; +class SRAI_D_DESC : MSA_BIT_D_DESC_BASE<"srai.d", int_mips_srai_d, + NoItinerary, MSA128, MSA128>; + +class SRL_B_DESC : MSA_3R_DESC_BASE<"srl.b", int_mips_srl_b, NoItinerary, + MSA128, MSA128>; +class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", int_mips_srl_h, NoItinerary, + MSA128, MSA128>; +class SRL_W_DESC : MSA_3R_DESC_BASE<"srl.w", int_mips_srl_w, NoItinerary, + MSA128, MSA128>; +class SRL_D_DESC : MSA_3R_DESC_BASE<"srl.d", int_mips_srl_d, NoItinerary, + MSA128, MSA128>; + +class SRLI_B_DESC : MSA_BIT_B_DESC_BASE<"srli.b", int_mips_srli_b, + NoItinerary, MSA128, MSA128>; +class SRLI_H_DESC : MSA_BIT_H_DESC_BASE<"srli.h", int_mips_srli_h, + NoItinerary, MSA128, MSA128>; +class SRLI_W_DESC : MSA_BIT_W_DESC_BASE<"srli.w", int_mips_srli_w, + NoItinerary, MSA128, MSA128>; +class SRLI_D_DESC : MSA_BIT_D_DESC_BASE<"srli.d", int_mips_srli_d, + NoItinerary, MSA128, MSA128>; + class ST_DESC_BASE { @@ -1271,6 +1825,71 @@ class ST_H_DESC : ST_DESC_BASE<"st.h", store, v8i16, NoItinerary, MSA128>; class ST_W_DESC : ST_DESC_BASE<"st.w", store, v4i32, NoItinerary, MSA128>; class ST_D_DESC : ST_DESC_BASE<"st.d", store, v2i64, NoItinerary, MSA128>; +class SUBS_S_B_DESC : MSA_3R_DESC_BASE<"subs_s.b", int_mips_subs_s_b, + NoItinerary, MSA128, MSA128>; +class SUBS_S_H_DESC : MSA_3R_DESC_BASE<"subs_s.h", int_mips_subs_s_h, + NoItinerary, MSA128, MSA128>; +class SUBS_S_W_DESC : MSA_3R_DESC_BASE<"subs_s.w", int_mips_subs_s_w, + NoItinerary, MSA128, MSA128>; +class SUBS_S_D_DESC : MSA_3R_DESC_BASE<"subs_s.d", int_mips_subs_s_d, + NoItinerary, MSA128, MSA128>; + +class SUBS_U_B_DESC : MSA_3R_DESC_BASE<"subs_u.b", int_mips_subs_u_b, + NoItinerary, MSA128, MSA128>; +class SUBS_U_H_DESC : MSA_3R_DESC_BASE<"subs_u.h", int_mips_subs_u_h, + NoItinerary, MSA128, MSA128>; +class SUBS_U_W_DESC : MSA_3R_DESC_BASE<"subs_u.w", int_mips_subs_u_w, + NoItinerary, MSA128, MSA128>; +class SUBS_U_D_DESC : MSA_3R_DESC_BASE<"subs_u.d", int_mips_subs_u_d, + NoItinerary, MSA128, MSA128>; + +class SUBSUS_U_B_DESC : MSA_3R_DESC_BASE<"subsus_u.b", int_mips_subsus_u_b, + NoItinerary, MSA128, MSA128>; +class SUBSUS_U_H_DESC : MSA_3R_DESC_BASE<"subsus_u.h", int_mips_subsus_u_h, + NoItinerary, MSA128, MSA128>; +class SUBSUS_U_W_DESC : MSA_3R_DESC_BASE<"subsus_u.w", int_mips_subsus_u_w, + NoItinerary, MSA128, MSA128>; +class SUBSUS_U_D_DESC : MSA_3R_DESC_BASE<"subsus_u.d", int_mips_subsus_u_d, + NoItinerary, MSA128, MSA128>; + +class SUBSUU_S_B_DESC : MSA_3R_DESC_BASE<"subsuu_s.b", int_mips_subsuu_s_b, + NoItinerary, MSA128, MSA128>; +class SUBSUU_S_H_DESC : MSA_3R_DESC_BASE<"subsuu_s.h", int_mips_subsuu_s_h, + NoItinerary, MSA128, MSA128>; +class SUBSUU_S_W_DESC : MSA_3R_DESC_BASE<"subsuu_s.w", int_mips_subsuu_s_w, + NoItinerary, MSA128, MSA128>; +class SUBSUU_S_D_DESC : MSA_3R_DESC_BASE<"subsuu_s.d", int_mips_subsuu_s_d, + NoItinerary, MSA128, MSA128>; + +class SUBV_B_DESC : MSA_3R_DESC_BASE<"subv.b", int_mips_subv_b, + NoItinerary, MSA128, MSA128>; +class SUBV_H_DESC : MSA_3R_DESC_BASE<"subv.h", int_mips_subv_h, + NoItinerary, MSA128, MSA128>; +class SUBV_W_DESC : MSA_3R_DESC_BASE<"subv.w", int_mips_subv_w, + NoItinerary, MSA128, MSA128>; +class SUBV_D_DESC : MSA_3R_DESC_BASE<"subv.d", int_mips_subv_d, + NoItinerary, MSA128, MSA128>; + +class SUBVI_B_DESC : MSA_I5_DESC_BASE<"subvi.b", int_mips_subvi_b, NoItinerary, + MSA128, MSA128>; +class SUBVI_H_DESC : MSA_I5_DESC_BASE<"subvi.h", int_mips_subvi_h, NoItinerary, + MSA128, MSA128>; +class SUBVI_W_DESC : MSA_I5_DESC_BASE<"subvi.w", int_mips_subvi_w, NoItinerary, + MSA128, MSA128>; +class SUBVI_D_DESC : MSA_I5_DESC_BASE<"subvi.d", int_mips_subvi_d, NoItinerary, + MSA128, MSA128>; + +class VSHF_B_DESC : MSA_3R_DESC_BASE<"vshf.b", int_mips_vshf_b, + NoItinerary, MSA128, MSA128>; +class VSHF_H_DESC : MSA_3R_DESC_BASE<"vshf.h", int_mips_vshf_h, + NoItinerary, MSA128, MSA128>; +class VSHF_W_DESC : MSA_3R_DESC_BASE<"vshf.w", int_mips_vshf_w, + NoItinerary, MSA128, MSA128>; +class VSHF_D_DESC : MSA_3R_DESC_BASE<"vshf.d", int_mips_vshf_d, + NoItinerary, MSA128, MSA128>; + +class XORI_B_DESC : MSA_I8_DESC_BASE<"xori.b", int_mips_xori_b, NoItinerary, + MSA128, MSA128>; // Instruction defs. def ADD_A_B : ADD_A_B_ENC, ADD_A_B_DESC, Requires<[HasMSA]>; def ADD_A_H : ADD_A_H_ENC, ADD_A_H_DESC, Requires<[HasMSA]>; @@ -1641,11 +2260,234 @@ def LDI_B : LDI_B_ENC, LDI_B_DESC, Requires<[HasMSA]>; def LDI_H : LDI_H_ENC, LDI_H_DESC, Requires<[HasMSA]>; def LDI_W : LDI_W_ENC, LDI_W_DESC, Requires<[HasMSA]>; +def MADD_Q_H : MADD_Q_H_ENC, MADD_Q_H_DESC, Requires<[HasMSA]>; +def MADD_Q_W : MADD_Q_W_ENC, MADD_Q_W_DESC, Requires<[HasMSA]>; + +def MADDR_Q_H : MADDR_Q_H_ENC, MADDR_Q_H_DESC, Requires<[HasMSA]>; +def MADDR_Q_W : MADDR_Q_W_ENC, MADDR_Q_W_DESC, Requires<[HasMSA]>; + +def MADDV_B : MADDV_B_ENC, MADDV_B_DESC, Requires<[HasMSA]>; +def MADDV_H : MADDV_H_ENC, MADDV_H_DESC, Requires<[HasMSA]>; +def MADDV_W : MADDV_W_ENC, MADDV_W_DESC, Requires<[HasMSA]>; +def MADDV_D : MADDV_D_ENC, MADDV_D_DESC, Requires<[HasMSA]>; + +def MAX_A_B : MAX_A_B_ENC, MAX_A_B_DESC, Requires<[HasMSA]>; +def MAX_A_H : MAX_A_H_ENC, MAX_A_H_DESC, Requires<[HasMSA]>; +def MAX_A_W : MAX_A_W_ENC, MAX_A_W_DESC, Requires<[HasMSA]>; +def MAX_A_D : MAX_A_D_ENC, MAX_A_D_DESC, Requires<[HasMSA]>; + +def MAX_S_B : MAX_S_B_ENC, MAX_S_B_DESC, Requires<[HasMSA]>; +def MAX_S_H : MAX_S_H_ENC, MAX_S_H_DESC, Requires<[HasMSA]>; +def MAX_S_W : MAX_S_W_ENC, MAX_S_W_DESC, Requires<[HasMSA]>; +def MAX_S_D : MAX_S_D_ENC, MAX_S_D_DESC, Requires<[HasMSA]>; + +def MAX_U_B : MAX_U_B_ENC, MAX_U_B_DESC, Requires<[HasMSA]>; +def MAX_U_H : MAX_U_H_ENC, MAX_U_H_DESC, Requires<[HasMSA]>; +def MAX_U_W : MAX_U_W_ENC, MAX_U_W_DESC, Requires<[HasMSA]>; +def MAX_U_D : MAX_U_D_ENC, MAX_U_D_DESC, Requires<[HasMSA]>; + +def MAXI_S_B : MAXI_S_B_ENC, MAXI_S_B_DESC, Requires<[HasMSA]>; +def MAXI_S_H : MAXI_S_H_ENC, MAXI_S_H_DESC, Requires<[HasMSA]>; +def MAXI_S_W : MAXI_S_W_ENC, MAXI_S_W_DESC, Requires<[HasMSA]>; +def MAXI_S_D : MAXI_S_D_ENC, MAXI_S_D_DESC, Requires<[HasMSA]>; + +def MAXI_U_B : MAXI_U_B_ENC, MAXI_U_B_DESC, Requires<[HasMSA]>; +def MAXI_U_H : MAXI_U_H_ENC, MAXI_U_H_DESC, Requires<[HasMSA]>; +def MAXI_U_W : MAXI_U_W_ENC, MAXI_U_W_DESC, Requires<[HasMSA]>; +def MAXI_U_D : MAXI_U_D_ENC, MAXI_U_D_DESC, Requires<[HasMSA]>; + +def MIN_A_B : MIN_A_B_ENC, MIN_A_B_DESC, Requires<[HasMSA]>; +def MIN_A_H : MIN_A_H_ENC, MIN_A_H_DESC, Requires<[HasMSA]>; +def MIN_A_W : MIN_A_W_ENC, MIN_A_W_DESC, Requires<[HasMSA]>; +def MIN_A_D : MIN_A_D_ENC, MIN_A_D_DESC, Requires<[HasMSA]>; + +def MIN_S_B : MIN_S_B_ENC, MIN_S_B_DESC, Requires<[HasMSA]>; +def MIN_S_H : MIN_S_H_ENC, MIN_S_H_DESC, Requires<[HasMSA]>; +def MIN_S_W : MIN_S_W_ENC, MIN_S_W_DESC, Requires<[HasMSA]>; +def MIN_S_D : MIN_S_D_ENC, MIN_S_D_DESC, Requires<[HasMSA]>; + +def MIN_U_B : MIN_U_B_ENC, MIN_U_B_DESC, Requires<[HasMSA]>; +def MIN_U_H : MIN_U_H_ENC, MIN_U_H_DESC, Requires<[HasMSA]>; +def MIN_U_W : MIN_U_W_ENC, MIN_U_W_DESC, Requires<[HasMSA]>; +def MIN_U_D : MIN_U_D_ENC, MIN_U_D_DESC, Requires<[HasMSA]>; + +def MINI_S_B : MINI_S_B_ENC, MINI_S_B_DESC, Requires<[HasMSA]>; +def MINI_S_H : MINI_S_H_ENC, MINI_S_H_DESC, Requires<[HasMSA]>; +def MINI_S_W : MINI_S_W_ENC, MINI_S_W_DESC, Requires<[HasMSA]>; +def MINI_S_D : MINI_S_D_ENC, MINI_S_D_DESC, Requires<[HasMSA]>; + +def MINI_U_B : MINI_U_B_ENC, MINI_U_B_DESC, Requires<[HasMSA]>; +def MINI_U_H : MINI_U_H_ENC, MINI_U_H_DESC, Requires<[HasMSA]>; +def MINI_U_W : MINI_U_W_ENC, MINI_U_W_DESC, Requires<[HasMSA]>; +def MINI_U_D : MINI_U_D_ENC, MINI_U_D_DESC, Requires<[HasMSA]>; + +def MOD_S_B : MOD_S_B_ENC, MOD_S_B_DESC, Requires<[HasMSA]>; +def MOD_S_H : MOD_S_H_ENC, MOD_S_H_DESC, Requires<[HasMSA]>; +def MOD_S_W : MOD_S_W_ENC, MOD_S_W_DESC, Requires<[HasMSA]>; +def MOD_S_D : MOD_S_D_ENC, MOD_S_D_DESC, Requires<[HasMSA]>; + +def MOD_U_B : MOD_U_B_ENC, MOD_U_B_DESC, Requires<[HasMSA]>; +def MOD_U_H : MOD_U_H_ENC, MOD_U_H_DESC, Requires<[HasMSA]>; +def MOD_U_W : MOD_U_W_ENC, MOD_U_W_DESC, Requires<[HasMSA]>; +def MOD_U_D : MOD_U_D_ENC, MOD_U_D_DESC, Requires<[HasMSA]>; + +def MSUB_Q_H : MSUB_Q_H_ENC, MSUB_Q_H_DESC, Requires<[HasMSA]>; +def MSUB_Q_W : MSUB_Q_W_ENC, MSUB_Q_W_DESC, Requires<[HasMSA]>; + +def MSUBR_Q_H : MSUBR_Q_H_ENC, MSUBR_Q_H_DESC, Requires<[HasMSA]>; +def MSUBR_Q_W : MSUBR_Q_W_ENC, MSUBR_Q_W_DESC, Requires<[HasMSA]>; + +def MSUBV_B : MSUBV_B_ENC, MSUBV_B_DESC, Requires<[HasMSA]>; +def MSUBV_H : MSUBV_H_ENC, MSUBV_H_DESC, Requires<[HasMSA]>; +def MSUBV_W : MSUBV_W_ENC, MSUBV_W_DESC, Requires<[HasMSA]>; +def MSUBV_D : MSUBV_D_ENC, MSUBV_D_DESC, Requires<[HasMSA]>; + +def MUL_Q_H : MUL_Q_H_ENC, MUL_Q_H_DESC, Requires<[HasMSA]>; +def MUL_Q_W : MUL_Q_W_ENC, MUL_Q_W_DESC, Requires<[HasMSA]>; + +def MULR_Q_H : MULR_Q_H_ENC, MULR_Q_H_DESC, Requires<[HasMSA]>; +def MULR_Q_W : MULR_Q_W_ENC, MULR_Q_W_DESC, Requires<[HasMSA]>; + +def MULV_B : MULV_B_ENC, MULV_B_DESC, Requires<[HasMSA]>; +def MULV_H : MULV_H_ENC, MULV_H_DESC, Requires<[HasMSA]>; +def MULV_W : MULV_W_ENC, MULV_W_DESC, Requires<[HasMSA]>; +def MULV_D : MULV_D_ENC, MULV_D_DESC, Requires<[HasMSA]>; + +def NLOC_B : NLOC_B_ENC, NLOC_B_DESC, Requires<[HasMSA]>; +def NLOC_H : NLOC_H_ENC, NLOC_H_DESC, Requires<[HasMSA]>; +def NLOC_W : NLOC_W_ENC, NLOC_W_DESC, Requires<[HasMSA]>; +def NLOC_D : NLOC_D_ENC, NLOC_D_DESC, Requires<[HasMSA]>; + +def NLZC_B : NLZC_B_ENC, NLZC_B_DESC, Requires<[HasMSA]>; +def NLZC_H : NLZC_H_ENC, NLZC_H_DESC, Requires<[HasMSA]>; +def NLZC_W : NLZC_W_ENC, NLZC_W_DESC, Requires<[HasMSA]>; +def NLZC_D : NLZC_D_ENC, NLZC_D_DESC, Requires<[HasMSA]>; + +def NORI_B : NORI_B_ENC, NORI_B_DESC, Requires<[HasMSA]>; + +def ORI_B : ORI_B_ENC, ORI_B_DESC, Requires<[HasMSA]>; + +def PCKEV_B : PCKEV_B_ENC, PCKEV_B_DESC, Requires<[HasMSA]>; +def PCKEV_H : PCKEV_H_ENC, PCKEV_H_DESC, Requires<[HasMSA]>; +def PCKEV_W : PCKEV_W_ENC, PCKEV_W_DESC, Requires<[HasMSA]>; +def PCKEV_D : PCKEV_D_ENC, PCKEV_D_DESC, Requires<[HasMSA]>; + +def PCKOD_B : PCKOD_B_ENC, PCKOD_B_DESC, Requires<[HasMSA]>; +def PCKOD_H : PCKOD_H_ENC, PCKOD_H_DESC, Requires<[HasMSA]>; +def PCKOD_W : PCKOD_W_ENC, PCKOD_W_DESC, Requires<[HasMSA]>; +def PCKOD_D : PCKOD_D_ENC, PCKOD_D_DESC, Requires<[HasMSA]>; + +def PCNT_B : PCNT_B_ENC, PCNT_B_DESC, Requires<[HasMSA]>; +def PCNT_H : PCNT_H_ENC, PCNT_H_DESC, Requires<[HasMSA]>; +def PCNT_W : PCNT_W_ENC, PCNT_W_DESC, Requires<[HasMSA]>; +def PCNT_D : PCNT_D_ENC, PCNT_D_DESC, Requires<[HasMSA]>; + +def SAT_S_B : SAT_S_B_ENC, SAT_S_B_DESC, Requires<[HasMSA]>; +def SAT_S_H : SAT_S_H_ENC, SAT_S_H_DESC, Requires<[HasMSA]>; +def SAT_S_W : SAT_S_W_ENC, SAT_S_W_DESC, Requires<[HasMSA]>; +def SAT_S_D : SAT_S_D_ENC, SAT_S_D_DESC, Requires<[HasMSA]>; + +def SAT_U_B : SAT_U_B_ENC, SAT_U_B_DESC, Requires<[HasMSA]>; +def SAT_U_H : SAT_U_H_ENC, SAT_U_H_DESC, Requires<[HasMSA]>; +def SAT_U_W : SAT_U_W_ENC, SAT_U_W_DESC, Requires<[HasMSA]>; +def SAT_U_D : SAT_U_D_ENC, SAT_U_D_DESC, Requires<[HasMSA]>; + +def SHF_B : SHF_B_ENC, SHF_B_DESC, Requires<[HasMSA]>; +def SHF_H : SHF_H_ENC, SHF_H_DESC, Requires<[HasMSA]>; +def SHF_W : SHF_W_ENC, SHF_W_DESC, Requires<[HasMSA]>; + +def SLD_B : SLD_B_ENC, SLD_B_DESC, Requires<[HasMSA]>; +def SLD_H : SLD_H_ENC, SLD_H_DESC, Requires<[HasMSA]>; +def SLD_W : SLD_W_ENC, SLD_W_DESC, Requires<[HasMSA]>; +def SLD_D : SLD_D_ENC, SLD_D_DESC, Requires<[HasMSA]>; + +def SLDI_B : SLDI_B_ENC, SLDI_B_DESC, Requires<[HasMSA]>; +def SLDI_H : SLDI_H_ENC, SLDI_H_DESC, Requires<[HasMSA]>; +def SLDI_W : SLDI_W_ENC, SLDI_W_DESC, Requires<[HasMSA]>; +def SLDI_D : SLDI_D_ENC, SLDI_D_DESC, Requires<[HasMSA]>; + +def SLL_B : SLL_B_ENC, SLL_B_DESC, Requires<[HasMSA]>; +def SLL_H : SLL_H_ENC, SLL_H_DESC, Requires<[HasMSA]>; +def SLL_W : SLL_W_ENC, SLL_W_DESC, Requires<[HasMSA]>; +def SLL_D : SLL_D_ENC, SLL_D_DESC, Requires<[HasMSA]>; + +def SLLI_B : SLLI_B_ENC, SLLI_B_DESC, Requires<[HasMSA]>; +def SLLI_H : SLLI_H_ENC, SLLI_H_DESC, Requires<[HasMSA]>; +def SLLI_W : SLLI_W_ENC, SLLI_W_DESC, Requires<[HasMSA]>; +def SLLI_D : SLLI_D_ENC, SLLI_D_DESC, Requires<[HasMSA]>; + +def SPLAT_B : SPLAT_B_ENC, SPLAT_B_DESC, Requires<[HasMSA]>; +def SPLAT_H : SPLAT_H_ENC, SPLAT_H_DESC, Requires<[HasMSA]>; +def SPLAT_W : SPLAT_W_ENC, SPLAT_W_DESC, Requires<[HasMSA]>; +def SPLAT_D : SPLAT_D_ENC, SPLAT_D_DESC, Requires<[HasMSA]>; + +def SPLATI_B : SPLATI_B_ENC, SPLATI_B_DESC, Requires<[HasMSA]>; +def SPLATI_H : SPLATI_H_ENC, SPLATI_H_DESC, Requires<[HasMSA]>; +def SPLATI_W : SPLATI_W_ENC, SPLATI_W_DESC, Requires<[HasMSA]>; +def SPLATI_D : SPLATI_D_ENC, SPLATI_D_DESC, Requires<[HasMSA]>; + +def SRA_B : SRA_B_ENC, SRA_B_DESC, Requires<[HasMSA]>; +def SRA_H : SRA_H_ENC, SRA_H_DESC, Requires<[HasMSA]>; +def SRA_W : SRA_W_ENC, SRA_W_DESC, Requires<[HasMSA]>; +def SRA_D : SRA_D_ENC, SRA_D_DESC, Requires<[HasMSA]>; + +def SRAI_B : SRAI_B_ENC, SRAI_B_DESC, Requires<[HasMSA]>; +def SRAI_H : SRAI_H_ENC, SRAI_H_DESC, Requires<[HasMSA]>; +def SRAI_W : SRAI_W_ENC, SRAI_W_DESC, Requires<[HasMSA]>; +def SRAI_D : SRAI_D_ENC, SRAI_D_DESC, Requires<[HasMSA]>; + +def SRL_B : SRL_B_ENC, SRL_B_DESC, Requires<[HasMSA]>; +def SRL_H : SRL_H_ENC, SRL_H_DESC, Requires<[HasMSA]>; +def SRL_W : SRL_W_ENC, SRL_W_DESC, Requires<[HasMSA]>; +def SRL_D : SRL_D_ENC, SRL_D_DESC, Requires<[HasMSA]>; + +def SRLI_B : SRLI_B_ENC, SRLI_B_DESC, Requires<[HasMSA]>; +def SRLI_H : SRLI_H_ENC, SRLI_H_DESC, Requires<[HasMSA]>; +def SRLI_W : SRLI_W_ENC, SRLI_W_DESC, Requires<[HasMSA]>; +def SRLI_D : SRLI_D_ENC, SRLI_D_DESC, Requires<[HasMSA]>; + def ST_B: ST_B_ENC, ST_B_DESC, Requires<[HasMSA]>; def ST_H: ST_H_ENC, ST_H_DESC, Requires<[HasMSA]>; def ST_W: ST_W_ENC, ST_W_DESC, Requires<[HasMSA]>; def ST_D: ST_D_ENC, ST_D_DESC, Requires<[HasMSA]>; +def SUBS_S_B : SUBS_S_B_ENC, SUBS_S_B_DESC, Requires<[HasMSA]>; +def SUBS_S_H : SUBS_S_H_ENC, SUBS_S_H_DESC, Requires<[HasMSA]>; +def SUBS_S_W : SUBS_S_W_ENC, SUBS_S_W_DESC, Requires<[HasMSA]>; +def SUBS_S_D : SUBS_S_D_ENC, SUBS_S_D_DESC, Requires<[HasMSA]>; + +def SUBS_U_B : SUBS_U_B_ENC, SUBS_U_B_DESC, Requires<[HasMSA]>; +def SUBS_U_H : SUBS_U_H_ENC, SUBS_U_H_DESC, Requires<[HasMSA]>; +def SUBS_U_W : SUBS_U_W_ENC, SUBS_U_W_DESC, Requires<[HasMSA]>; +def SUBS_U_D : SUBS_U_D_ENC, SUBS_U_D_DESC, Requires<[HasMSA]>; + +def SUBSUS_U_B : SUBSUS_U_B_ENC, SUBSUS_U_B_DESC, Requires<[HasMSA]>; +def SUBSUS_U_H : SUBSUS_U_H_ENC, SUBSUS_U_H_DESC, Requires<[HasMSA]>; +def SUBSUS_U_W : SUBSUS_U_W_ENC, SUBSUS_U_W_DESC, Requires<[HasMSA]>; +def SUBSUS_U_D : SUBSUS_U_D_ENC, SUBSUS_U_D_DESC, Requires<[HasMSA]>; + +def SUBSUU_S_B : SUBSUU_S_B_ENC, SUBSUU_S_B_DESC, Requires<[HasMSA]>; +def SUBSUU_S_H : SUBSUU_S_H_ENC, SUBSUU_S_H_DESC, Requires<[HasMSA]>; +def SUBSUU_S_W : SUBSUU_S_W_ENC, SUBSUU_S_W_DESC, Requires<[HasMSA]>; +def SUBSUU_S_D : SUBSUU_S_D_ENC, SUBSUU_S_D_DESC, Requires<[HasMSA]>; + +def SUBV_B : SUBV_B_ENC, SUBV_B_DESC, Requires<[HasMSA]>; +def SUBV_H : SUBV_H_ENC, SUBV_H_DESC, Requires<[HasMSA]>; +def SUBV_W : SUBV_W_ENC, SUBV_W_DESC, Requires<[HasMSA]>; +def SUBV_D : SUBV_D_ENC, SUBV_D_DESC, Requires<[HasMSA]>; + +def SUBVI_B : SUBVI_B_ENC, SUBVI_B_DESC, Requires<[HasMSA]>; +def SUBVI_H : SUBVI_H_ENC, SUBVI_H_DESC, Requires<[HasMSA]>; +def SUBVI_W : SUBVI_W_ENC, SUBVI_W_DESC, Requires<[HasMSA]>; +def SUBVI_D : SUBVI_D_ENC, SUBVI_D_DESC, Requires<[HasMSA]>; + +def VSHF_B : VSHF_B_ENC, VSHF_B_DESC, Requires<[HasMSA]>; +def VSHF_H : VSHF_H_ENC, VSHF_H_DESC, Requires<[HasMSA]>; +def VSHF_W : VSHF_W_ENC, VSHF_W_DESC, Requires<[HasMSA]>; +def VSHF_D : VSHF_D_ENC, VSHF_D_DESC, Requires<[HasMSA]>; + +def XORI_B : XORI_B_ENC, XORI_B_DESC, Requires<[HasMSA]>; + // Patterns. class MSAPat : Pat, Requires<[pred]>; -- cgit v1.1 From 428715d4e120e6ef6fc898665607a92f3dd02709 Mon Sep 17 00:00:00 2001 From: Mihai Popa Date: Thu, 15 Aug 2013 15:43:06 +0000 Subject: This fixes three issues related to Thumb literal loads: 1. The offset range for Thumb1 PC relative loads is [0..1020] and not [-1024..1020] 2. Thumb2 PC relative loads may define the PC, so the restriction placed on target register is removed 3. Removes unneeded alias between "ldr.n" and t1LDRpci. ".n" is actually stripped by both tablegen and the ASM parser, so this alias rule really does nothing git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188466 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrThumb.td | 3 --- lib/Target/ARM/ARMInstrThumb2.td | 6 +++--- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 2 +- 3 files changed, 4 insertions(+), 7 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index d5b6563..7e383d2 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -662,9 +662,6 @@ def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, let Inst{7-0} = addr; } -def : tInstAlias<"ldr${p}.n $Rt, $addr", - (tLDRpci tGPR:$Rt, t_addrmode_pc:$addr, pred:$p), 0>; - // A8.6.194 & A8.6.192 defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rrs4, t_addrmode_is4, AddrModeT1_4, diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 5d04d21..77a3759 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -4191,7 +4191,7 @@ def : t2InstAlias<"ldrsh${p} $Rt, $addr", (t2LDRSHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>; def : t2InstAlias<"ldr${p} $Rt, $addr", - (t2LDRpci GPR:$Rt, t2ldrlabel:$addr, pred:$p)>; + (t2LDRpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>; def : t2InstAlias<"ldrb${p} $Rt, $addr", (t2LDRBpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>; def : t2InstAlias<"ldrh${p} $Rt, $addr", @@ -4404,7 +4404,7 @@ def : t2InstAlias<"adr${p} $Rd, $addr", // LDR(literal) w/ alternate [pc, #imm] syntax. def t2LDRpcrel : t2AsmPseudo<"ldr${p} $Rt, $addr", - (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; + (ins GPR:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; def t2LDRBpcrel : t2AsmPseudo<"ldrb${p} $Rt, $addr", (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; def t2LDRHpcrel : t2AsmPseudo<"ldrh${p} $Rt, $addr", @@ -4415,7 +4415,7 @@ def t2LDRSHpcrel : t2AsmPseudo<"ldrsh${p} $Rt, $addr", (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; // Version w/ the .w suffix. def : t2InstAlias<"ldr${p}.w $Rt, $addr", - (t2LDRpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p), 0>; + (t2LDRpcrel GPR:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p), 0>; def : t2InstAlias<"ldrb${p}.w $Rt, $addr", (t2LDRBpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; def : t2InstAlias<"ldrh${p}.w $Rt, $addr", diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 03d3a48..3d7baf5 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -647,7 +647,7 @@ public: Val = Memory.OffsetImm->getValue(); } else return false; - return ((Val % 4) == 0) && (Val >= -1020) && (Val <= 1020); + return ((Val % 4) == 0) && (Val >= 0) && (Val <= 1020); } bool isFPImm() const { if (!isImm()) return false; -- cgit v1.1 From e4272979978951bece0141cdd9f6a4481b34656f Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Thu, 15 Aug 2013 18:46:14 +0000 Subject: Constify the function parameters. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188469 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index b994e67..ecf7b73 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -627,8 +627,8 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { /// to use the stack, and if we don't adjust the stack we clobber the first /// frame index. /// See X86InstrInfo::copyPhysReg. -static bool usesTheStack(MachineFunction &MF) { - MachineRegisterInfo &MRI = MF.getRegInfo(); +static bool usesTheStack(const MachineFunction &MF) { + const MachineRegisterInfo &MRI = MF.getRegInfo(); for (MachineRegisterInfo::reg_iterator ri = MRI.reg_begin(X86::EFLAGS), re = MRI.reg_end(); ri != re; ++ri) -- cgit v1.1 From 24adc8f60f0a39e45363eef5392fe1a7e27bd12f Mon Sep 17 00:00:00 2001 From: Renato Golin Date: Thu, 15 Aug 2013 20:45:13 +0000 Subject: make arm-reserve-r9 available for all ARM r9 is defined as a platform-specific register in the ARM EABI. It can be reserved for a special purpose or be used as a general purpose register. Add support for reserving r9 for all ARM, while leaving the IOS usage unchanged. Patch by Jeroen Hofstee. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188485 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMSubtarget.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index c592421..a94c942 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -169,9 +169,10 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { if (isAAPCS_ABI()) stackAlignment = 8; - if (!isTargetIOS()) + if (!isTargetIOS()) { UseMovt = hasV6T2Ops(); - else { + IsR9Reserved = ReserveR9; + } else { IsR9Reserved = ReserveR9 | !HasV6Ops; UseMovt = DarwinUseMOVT && hasV6T2Ops(); SupportsTailCall = !getTargetTriple().isOSVersionLT(5, 0); -- cgit v1.1 From 14c41370e36068ae25c871d5bd8f99f92bbb7d45 Mon Sep 17 00:00:00 2001 From: Renato Golin Date: Thu, 15 Aug 2013 20:54:38 +0000 Subject: make arm-use-movt available for all ARM Before this patch this flag is IOS specific, but is also useful for bare project like bootloaders / kernels etc, since movw / movt prevents simple relocation. Therefore make this flag more commonly available. note: this patch depends on a similiar rename in clang Patch by Jeroen Hofstee. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188487 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMSubtarget.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index a94c942..3111f5e 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -32,7 +32,7 @@ ReserveR9("arm-reserve-r9", cl::Hidden, cl::desc("Reserve R9, making it unavailable as GPR")); static cl::opt -DarwinUseMOVT("arm-darwin-use-movt", cl::init(true), cl::Hidden); +ArmUseMOVT("arm-use-movt", cl::init(true), cl::Hidden); static cl::opt UseFusedMulOps("arm-use-mulops", @@ -169,12 +169,12 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { if (isAAPCS_ABI()) stackAlignment = 8; + UseMovt = hasV6T2Ops() && ArmUseMOVT; + if (!isTargetIOS()) { - UseMovt = hasV6T2Ops(); IsR9Reserved = ReserveR9; } else { IsR9Reserved = ReserveR9 | !HasV6Ops; - UseMovt = DarwinUseMOVT && hasV6T2Ops(); SupportsTailCall = !getTargetTriple().isOSVersionLT(5, 0); } -- cgit v1.1 From c34540aa861105d515ed73eb2ae614d9453016b6 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 15 Aug 2013 23:11:03 +0000 Subject: Fix spelling git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188506 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600TextureIntrinsicsReplacer.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp index 3768ba0..b069522 100644 --- a/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp +++ b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp @@ -35,9 +35,9 @@ class R600TextureIntrinsicsReplacer : FunctionType *TexSign; FunctionType *TexQSign; - void getAdjustementFromTextureTarget(unsigned TextureType, bool hasLOD, - unsigned SrcSelect[4], unsigned CT[4], - bool &useShadowVariant) { + void getAdjustmentFromTextureTarget(unsigned TextureType, bool hasLOD, + unsigned SrcSelect[4], unsigned CT[4], + bool &useShadowVariant) { enum TextureTypes { TEXTURE_1D = 1, TEXTURE_2D, @@ -174,8 +174,8 @@ class R600TextureIntrinsicsReplacer : }; bool useShadowVariant; - getAdjustementFromTextureTarget(TextureType, hasLOD, SrcSelect, CT, - useShadowVariant); + getAdjustmentFromTextureTarget(TextureType, hasLOD, SrcSelect, CT, + useShadowVariant); ReplaceCallInst(I, FT, useShadowVariant?ShadowInt:VanillaInt, SrcSelect, Offset, ResourceId, SamplerId, CT, Coord); @@ -198,8 +198,8 @@ class R600TextureIntrinsicsReplacer : }; bool useShadowVariant; - getAdjustementFromTextureTarget(TextureType, false, SrcSelect, CT, - useShadowVariant); + getAdjustmentFromTextureTarget(TextureType, false, SrcSelect, CT, + useShadowVariant); ReplaceCallInst(I, TexQSign, "llvm.R600.txf", SrcSelect, Offset, ResourceId, SamplerId, CT, Coord); -- cgit v1.1 From e560d526a1aebf45e5333ab7b24689be930a8026 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 16 Aug 2013 01:11:46 +0000 Subject: R600: Change the RAT instruction assembly names so they match the docs Tested-by: Aaron Watry git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188515 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600ControlFlowFinalizer.cpp | 4 +- lib/Target/R600/R600Instructions.td | 63 +++++++++++++++------------- 2 files changed, 35 insertions(+), 32 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp index 715be37..ab71bc1 100644 --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -380,8 +380,8 @@ public: case AMDGPU::RAT_WRITE_CACHELESS_32_eg: case AMDGPU::RAT_WRITE_CACHELESS_64_eg: case AMDGPU::RAT_WRITE_CACHELESS_128_eg: - case AMDGPU::RAT_STORE_DWORD32_cm: - case AMDGPU::RAT_STORE_DWORD64_cm: + case AMDGPU::RAT_STORE_DWORD32: + case AMDGPU::RAT_STORE_DWORD64: DEBUG(dbgs() << CfCount << ":"; MI->dump();); CfCount++; break; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 52205cc..a67276c 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -255,12 +255,12 @@ def TEX_ARRAY_MSAA : PatLeaf< }] >; -class EG_CF_RAT cfinst, bits <6> ratinst, bits<4> mask, dag outs, - dag ins, string asm, list pattern> : +class EG_CF_RAT cfinst, bits <6> ratinst, bits<4> ratid, bits<4> mask, + dag outs, dag ins, string asm, list pattern> : InstR600ISA , CF_ALLOC_EXPORT_WORD0_RAT, CF_ALLOC_EXPORT_WORD1_BUF { - let rat_id = 0; + let rat_id = ratid; let rat_inst = ratinst; let rim = 0; // XXX: Have a separate instruction for non-indexed writes. @@ -1261,6 +1261,20 @@ let Predicates = [isR700] in { } //===----------------------------------------------------------------------===// +// Evergreen / Cayman store instructions +//===----------------------------------------------------------------------===// + +let Predicates = [isEGorCayman] in { + +class CF_MEM_RAT_CACHELESS rat_inst, bits<4> rat_id, bits<4> mask, dag ins, + string name, list pattern> + : EG_CF_RAT <0x57, rat_inst, rat_id, mask, (outs), ins, + "MEM_RAT_CACHELESS "#name, pattern>; + +} // End Predicates = [isEGorCayman] + + +//===----------------------------------------------------------------------===// // Evergreen Only instructions //===----------------------------------------------------------------------===// @@ -1288,36 +1302,32 @@ def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>; //===----------------------------------------------------------------------===// // Memory read/write instructions //===----------------------------------------------------------------------===// -let usesCustomInserter = 1 in { - -class RAT_WRITE_CACHELESS_eg mask, string name, - list pattern> - : EG_CF_RAT <0x57, 0x2, mask, (outs), ins, name, pattern> { -} -} // End usesCustomInserter = 1 +let usesCustomInserter = 1 in { // 32-bit store -def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg < +def RAT_WRITE_CACHELESS_32_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0x1, (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), - 0x1, "RAT_WRITE_CACHELESS_32_eg $rw_gpr, $index_gpr, $eop", + "STORE_RAW $rw_gpr, $index_gpr, $eop", [(global_store i32:$rw_gpr, i32:$index_gpr)] >; // 64-bit store -def RAT_WRITE_CACHELESS_64_eg : RAT_WRITE_CACHELESS_eg < +def RAT_WRITE_CACHELESS_64_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0x3, (ins R600_Reg64:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), - 0x3, "RAT_WRITE_CACHELESS_64_eg $rw_gpr.XY, $index_gpr, $eop", + "STORE_RAW $rw_gpr.XY, $index_gpr, $eop", [(global_store v2i32:$rw_gpr, i32:$index_gpr)] >; //128-bit store -def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg < +def RAT_WRITE_CACHELESS_128_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0xf, (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), - 0xf, "RAT_WRITE_CACHELESS_128 $rw_gpr.XYZW, $index_gpr, $eop", + "STORE_RAW $rw_gpr.XYZW, $index_gpr, $eop", [(global_store v4i32:$rw_gpr, i32:$index_gpr)] >; +} // End usesCustomInserter = 1 + class VTX_READ_eg buffer_id, dag outs, list pattern> : VTX_WORD0_eg, VTX_READ { @@ -1785,23 +1795,16 @@ def : Pat < def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>; - -class RAT_STORE_DWORD_cm mask, dag ins, list pat> : EG_CF_RAT < - 0x57, 0x14, mask, (outs), ins, - "EXPORT_RAT_INST_STORE_DWORD $rw_gpr, $index_gpr", pat -> { +class RAT_STORE_DWORD mask> : + CF_MEM_RAT_CACHELESS <0x14, 0, mask, + (ins rc:$rw_gpr, R600_TReg32_X:$index_gpr), + "STORE_DWORD $rw_gpr, $index_gpr", + [(global_store vt:$rw_gpr, i32:$index_gpr)]> { let eop = 0; // This bit is not used on Cayman. } -def RAT_STORE_DWORD32_cm : RAT_STORE_DWORD_cm <0x1, - (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr), - [(global_store i32:$rw_gpr, i32:$index_gpr)] ->; - -def RAT_STORE_DWORD64_cm : RAT_STORE_DWORD_cm <0x3, - (ins R600_Reg64:$rw_gpr, R600_TReg32_X:$index_gpr), - [(global_store v2i32:$rw_gpr, i32:$index_gpr)] ->; +def RAT_STORE_DWORD32 : RAT_STORE_DWORD ; +def RAT_STORE_DWORD64 : RAT_STORE_DWORD ; class VTX_READ_cm buffer_id, dag outs, list pattern> : VTX_WORD0_cm, VTX_READ { -- cgit v1.1 From e7ac2ed1c268891a856ab38db1e34372a79da86a Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 16 Aug 2013 01:11:51 +0000 Subject: R600: Add IsExport bit to TableGen instruction definitions Tested-by: Aaron Watry git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188516 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600ControlFlowFinalizer.cpp | 13 ++++--------- lib/Target/R600/R600Defines.h | 3 ++- lib/Target/R600/R600InstrFormats.td | 2 ++ lib/Target/R600/R600InstrInfo.cpp | 4 ++++ lib/Target/R600/R600InstrInfo.h | 1 + lib/Target/R600/R600Instructions.td | 3 +++ 6 files changed, 16 insertions(+), 10 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp index ab71bc1..ac3d8f6 100644 --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -373,15 +373,6 @@ public: case AMDGPU::CF_ALU: I = MI; AluClauses.push_back(MakeALUClause(MBB, I)); - case AMDGPU::EG_ExportBuf: - case AMDGPU::EG_ExportSwz: - case AMDGPU::R600_ExportBuf: - case AMDGPU::R600_ExportSwz: - case AMDGPU::RAT_WRITE_CACHELESS_32_eg: - case AMDGPU::RAT_WRITE_CACHELESS_64_eg: - case AMDGPU::RAT_WRITE_CACHELESS_128_eg: - case AMDGPU::RAT_STORE_DWORD32: - case AMDGPU::RAT_STORE_DWORD64: DEBUG(dbgs() << CfCount << ":"; MI->dump();); CfCount++; break; @@ -491,6 +482,10 @@ public: EmitALUClause(I, AluClauses[i], CfCount); } default: + if (TII->isExport(MI->getOpcode())) { + DEBUG(dbgs() << CfCount << ":"; MI->dump();); + CfCount++; + } break; } } diff --git a/lib/Target/R600/R600Defines.h b/lib/Target/R600/R600Defines.h index 90fc29c..8dc9ebb 100644 --- a/lib/Target/R600/R600Defines.h +++ b/lib/Target/R600/R600Defines.h @@ -44,7 +44,8 @@ namespace R600_InstFlag { TEX_INST = (1 << 13), ALU_INST = (1 << 14), LDS_1A = (1 << 15), - LDS_1A1D = (1 << 16) + LDS_1A1D = (1 << 16), + IS_EXPORT = (1 << 17) }; } diff --git a/lib/Target/R600/R600InstrFormats.td b/lib/Target/R600/R600InstrFormats.td index 2d72404..2ae3311 100644 --- a/lib/Target/R600/R600InstrFormats.td +++ b/lib/Target/R600/R600InstrFormats.td @@ -29,6 +29,7 @@ class InstR600 pattern, bit VTXInst = 0; bit TEXInst = 0; bit ALUInst = 0; + bit IsExport = 0; let Namespace = "AMDGPU"; let OutOperandList = outs; @@ -53,6 +54,7 @@ class InstR600 pattern, let TSFlags{14} = ALUInst; let TSFlags{15} = LDS_1A; let TSFlags{16} = LDS_1A1D; + let TSFlags{17} = IsExport; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 4e7eff9..9548a34 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -160,6 +160,10 @@ bool R600InstrInfo::isTransOnly(const MachineInstr *MI) const { return isTransOnly(MI->getOpcode()); } +bool R600InstrInfo::isExport(unsigned Opcode) const { + return (get(Opcode).TSFlags & R600_InstFlag::IS_EXPORT); +} + bool R600InstrInfo::usesVertexCache(unsigned Opcode) const { return ST.hasVertexCache() && IS_VTX(get(Opcode)); } diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h index cdaa2fb..e28d771 100644 --- a/lib/Target/R600/R600InstrInfo.h +++ b/lib/Target/R600/R600InstrInfo.h @@ -68,6 +68,7 @@ namespace llvm { bool isTransOnly(unsigned Opcode) const; bool isTransOnly(const MachineInstr *MI) const; + bool isExport(unsigned Opcode) const; bool usesVertexCache(unsigned Opcode) const; bool usesVertexCache(const MachineInstr *MI) const; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index a67276c..bacedfc 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -278,6 +278,7 @@ class EG_CF_RAT cfinst, bits <6> ratinst, bits<4> ratid, bits<4> mask, let Inst{31-0} = Word0; let Inst{63-32} = Word1; + let IsExport = 1; } @@ -551,6 +552,7 @@ class ExportSwzInst : InstR600ISA<( let elem_size = 3; let Inst{31-0} = Word0; let Inst{63-32} = Word1; + let IsExport = 1; } } // End usesCustomInserter = 1 @@ -564,6 +566,7 @@ class ExportBufInst : InstR600ISA<( let elem_size = 0; let Inst{31-0} = Word0; let Inst{63-32} = Word1; + let IsExport = 1; } //===----------------------------------------------------------------------===// -- cgit v1.1 From 6b88cdb34cc78f815946b8ebe6c2332d084526ad Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 16 Aug 2013 01:11:55 +0000 Subject: R600: Enable folding of inline literals into REQ_SEQUENCE instructions Tested-by: Aaron Watry git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188517 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 37 +++++++++++++------------ lib/Target/R600/R600OptimizeVectorRegisters.cpp | 3 ++ 2 files changed, 23 insertions(+), 17 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index 77ca885..4f78f29 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -453,29 +453,32 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { continue; } } else { - if (!TII->isALUInstr(Use->getMachineOpcode()) || - (TII->get(Use->getMachineOpcode()).TSFlags & - R600_InstFlag::VECTOR)) { - continue; - } - - int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(), - AMDGPU::OpName::literal); - if (ImmIdx == -1) { - continue; - } - - if (TII->getOperandIdx(Use->getMachineOpcode(), - AMDGPU::OpName::dst) != -1) { - // subtract one from ImmIdx, because the DST operand is usually index - // 0 for MachineInstrs, but we have no DST in the Ops vector. - ImmIdx--; + switch(Use->getMachineOpcode()) { + case AMDGPU::REG_SEQUENCE: break; + default: + if (!TII->isALUInstr(Use->getMachineOpcode()) || + (TII->get(Use->getMachineOpcode()).TSFlags & + R600_InstFlag::VECTOR)) { + continue; + } } // Check that we aren't already using an immediate. // XXX: It's possible for an instruction to have more than one // immediate operand, but this is not supported yet. if (ImmReg == AMDGPU::ALU_LITERAL_X) { + int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(), + AMDGPU::OpName::literal); + if (ImmIdx == -1) { + continue; + } + + if (TII->getOperandIdx(Use->getMachineOpcode(), + AMDGPU::OpName::dst) != -1) { + // subtract one from ImmIdx, because the DST operand is usually index + // 0 for MachineInstrs, but we have no DST in the Ops vector. + ImmIdx--; + } ConstantSDNode *C = dyn_cast(Use->getOperand(ImmIdx)); assert(C); diff --git a/lib/Target/R600/R600OptimizeVectorRegisters.cpp b/lib/Target/R600/R600OptimizeVectorRegisters.cpp index acacffa..cf719c0 100644 --- a/lib/Target/R600/R600OptimizeVectorRegisters.cpp +++ b/lib/Target/R600/R600OptimizeVectorRegisters.cpp @@ -50,6 +50,9 @@ isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) { E = MRI.def_end(); It != E; ++It) { return (*It).isImplicitDef(); } + if (MRI.isReserved(Reg)) { + return false; + } llvm_unreachable("Reg without a def"); return false; } -- cgit v1.1 From 4781d314b7b0bd239dc3986b4157726e80a270ba Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 16 Aug 2013 01:12:00 +0000 Subject: R600: Add support for v4i32 stores on Cayman Tested-by: Aaron Watry git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188518 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600Instructions.td | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/Target') diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index bacedfc..06886ce 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1808,6 +1808,7 @@ class RAT_STORE_DWORD mask> : def RAT_STORE_DWORD32 : RAT_STORE_DWORD ; def RAT_STORE_DWORD64 : RAT_STORE_DWORD ; +def RAT_STORE_DWORD128 : RAT_STORE_DWORD ; class VTX_READ_cm buffer_id, dag outs, list pattern> : VTX_WORD0_cm, VTX_READ { -- cgit v1.1 From ec484277dd04399d7b2ea37508e39fc4998bc9a7 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 16 Aug 2013 01:12:06 +0000 Subject: R600: Add support for i16 and i8 global stores Tested-by: Aaron Watry git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188519 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 1 + lib/Target/R600/AMDGPUISelLowering.h | 1 + lib/Target/R600/AMDGPUInstrInfo.td | 4 +++ lib/Target/R600/AMDGPUInstructions.td | 15 +++++++++ lib/Target/R600/R600ISelLowering.cpp | 61 +++++++++++++++++++++++++++------- lib/Target/R600/R600Instructions.td | 13 ++++++++ lib/Target/R600/SIInstrInfo.td | 6 ++-- lib/Target/R600/SIInstructions.td | 32 +++++++++++------- 8 files changed, 106 insertions(+), 27 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 1e79998..7ceab2d 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -558,5 +558,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(SAMPLEB) NODE_NAME_CASE(SAMPLED) NODE_NAME_CASE(SAMPLEL) + NODE_NAME_CASE(STORE_MSKOR) } } diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index 9adbb54..8788c20 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -150,6 +150,7 @@ enum { SAMPLED, SAMPLEL, FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, + STORE_MSKOR, LOAD_CONSTANT, LAST_AMDGPU_ISD_NUMBER }; diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td index 48d89dd..c61993a 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.td +++ b/lib/Target/R600/AMDGPUInstrInfo.td @@ -72,3 +72,7 @@ def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD", def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE", SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>, [SDNPHasChain, SDNPMayStore]>; + +def AMDGPUstore_mskor : SDNode<"AMDGPUISD::STORE_MSKOR", + SDTypeProfile<0, 2, []>, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index ddb655a..df0bade 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -146,6 +146,16 @@ def az_extloadi32_constant : PatFrag<(ops node:$ptr), return isConstantLoad(dyn_cast(N), -1); }]>; +def truncstorei8_global : PatFrag<(ops node:$val, node:$ptr), + (truncstorei8 node:$val, node:$ptr), [{ + return isGlobalStore(dyn_cast(N)); +}]>; + +def truncstorei16_global : PatFrag<(ops node:$val, node:$ptr), + (truncstorei16 node:$val, node:$ptr), [{ + return isGlobalStore(dyn_cast(N)); +}]>; + def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return isLocalLoad(dyn_cast(N)); }]>; @@ -155,6 +165,11 @@ def local_store : PatFrag<(ops node:$val, node:$ptr), return isLocalStore(dyn_cast(N)); }]>; +def mskor_global : PatFrag<(ops node:$val, node:$ptr), + (AMDGPUstore_mskor node:$val, node:$ptr), [{ + return dyn_cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; +}]>; + class Constants { int TWO_PI = 0x40c90fdb; int PI = 0x40490fdb; diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index a89875c..b6b6560 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -84,6 +84,8 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::STORE, MVT::i32, Custom); setOperationAction(ISD::STORE, MVT::v2i32, Custom); setOperationAction(ISD::STORE, MVT::v4i32, Custom); + setTruncStoreAction(MVT::i32, MVT::i8, Custom); + setTruncStoreAction(MVT::i32, MVT::i16, Custom); setOperationAction(ISD::LOAD, MVT::i32, Custom); setOperationAction(ISD::LOAD, MVT::v4i32, Custom); @@ -1009,19 +1011,54 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { SDValue Value = Op.getOperand(1); SDValue Ptr = Op.getOperand(2); - if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && - Ptr->getOpcode() != AMDGPUISD::DWORDADDR) { - // Convert pointer from byte address to dword address. - Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(), - DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), - Ptr, DAG.getConstant(2, MVT::i32))); - - if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) { - assert(!"Truncated and indexed stores not supported yet"); - } else { - Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand()); + if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) { + if (StoreNode->isTruncatingStore()) { + EVT VT = Value.getValueType(); + assert(VT == MVT::i32); + EVT MemVT = StoreNode->getMemoryVT(); + SDValue MaskConstant; + if (MemVT == MVT::i8) { + MaskConstant = DAG.getConstant(0xFF, MVT::i32); + } else { + assert(MemVT == MVT::i16); + MaskConstant = DAG.getConstant(0xFFFF, MVT::i32); + } + SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr, + DAG.getConstant(2, MVT::i32)); + SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(0x00000003, VT)); + SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant); + SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex, + DAG.getConstant(3, VT)); + SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift); + SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift); + // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32 + // vector instead. + SDValue Src[4] = { + ShiftedValue, + DAG.getConstant(0, MVT::i32), + DAG.getConstant(0, MVT::i32), + Mask + }; + SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src, 4); + SDValue Args[3] = { Chain, Input, DWordAddr }; + return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL, + Op->getVTList(), Args, 3, MemVT, + StoreNode->getMemOperand()); + } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && + Value.getValueType().bitsGE(MVT::i32)) { + // Convert pointer from byte address to dword address. + Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(), + DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), + Ptr, DAG.getConstant(2, MVT::i32))); + + if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) { + assert(!"Truncated and indexed stores not supported yet"); + } else { + Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand()); + } + return Chain; } - return Chain; } EVT ValueVT = Value.getValueType(); diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 06886ce..b059a81 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1274,6 +1274,19 @@ class CF_MEM_RAT_CACHELESS rat_inst, bits<4> rat_id, bits<4> mask, dag : EG_CF_RAT <0x57, rat_inst, rat_id, mask, (outs), ins, "MEM_RAT_CACHELESS "#name, pattern>; +class CF_MEM_RAT rat_inst, bits<4> rat_id, dag ins, string name, + list pattern> + : EG_CF_RAT <0x56, rat_inst, rat_id, 0xf /* mask */, (outs), ins, + "MEM_RAT "#name, pattern>; + +def RAT_MSKOR : CF_MEM_RAT <0x11, 0, + (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr), + "MSKOR $rw_gpr.XW, $index_gpr", + [(mskor_global v4i32:$rw_gpr, i32:$index_gpr)] +> { + let eop = 0; +} + } // End Predicates = [isEGorCayman] diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 2639456..ecc4718 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -400,9 +400,9 @@ multiclass MUBUF_Load_Helper op, string asm, RegisterClass regClass> { } } -class MUBUF_Store_Helper op, string name, RegisterClass vdataClass, - ValueType VT> : - MUBUF op, string name, RegisterClass vdataClass> : + MUBUF { diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index e719cb3..4eb3566 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -409,19 +409,25 @@ defm BUFFER_LOAD_SSHORT : MUBUF_Load_Helper <0x0000000b, "BUFFER_LOAD_SSHORT", V defm BUFFER_LOAD_DWORD : MUBUF_Load_Helper <0x0000000c, "BUFFER_LOAD_DWORD", VReg_32>; defm BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <0x0000000d, "BUFFER_LOAD_DWORDX2", VReg_64>; defm BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128>; -//def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>; -//def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>; + +def BUFFER_STORE_BYTE : MUBUF_Store_Helper < + 0x00000018, "BUFFER_STORE_BYTE", VReg_32 +>; + +def BUFFER_STORE_SHORT : MUBUF_Store_Helper < + 0x0000001a, "BUFFER_STORE_SHORT", VReg_32 +>; def BUFFER_STORE_DWORD : MUBUF_Store_Helper < - 0x0000001c, "BUFFER_STORE_DWORD", VReg_32, i32 + 0x0000001c, "BUFFER_STORE_DWORD", VReg_32 >; def BUFFER_STORE_DWORDX2 : MUBUF_Store_Helper < - 0x0000001d, "BUFFER_STORE_DWORDX2", VReg_64, i64 + 0x0000001d, "BUFFER_STORE_DWORDX2", VReg_64 >; def BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper < - 0x0000001e, "BUFFER_STORE_DWORDX4", VReg_128, v4i32 + 0x0000001e, "BUFFER_STORE_DWORDX4", VReg_128 >; //def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>; //def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>; @@ -1826,23 +1832,25 @@ defm : MUBUFLoad_Pattern ; -multiclass MUBUFStore_Pattern { +multiclass MUBUFStore_Pattern { def : Pat < - (global_store vt:$value, i64:$ptr), + (st vt:$value, i64:$ptr), (Instr $value, (SI_ADDR64_RSRC (i64 0)), $ptr, 0) >; def : Pat < - (global_store vt:$value, (add i64:$ptr, i64:$offset)), + (st vt:$value, (add i64:$ptr, i64:$offset)), (Instr $value, (SI_ADDR64_RSRC $ptr), $offset, 0) >; } -defm : MUBUFStore_Pattern ; -defm : MUBUFStore_Pattern ; -defm : MUBUFStore_Pattern ; -defm : MUBUFStore_Pattern ; +defm : MUBUFStore_Pattern ; +defm : MUBUFStore_Pattern ; +defm : MUBUFStore_Pattern ; +defm : MUBUFStore_Pattern ; +defm : MUBUFStore_Pattern ; +defm : MUBUFStore_Pattern ; /********** ====================== **********/ /********** Indirect adressing **********/ -- cgit v1.1 From 4c52d450dc3968267d1f089d36397fc785dcc7b4 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 16 Aug 2013 01:12:11 +0000 Subject: R600: Add support for global vector stores with elements less than 32-bits Tested-by: Aaron Watry git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188520 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 61 ++++++++++++++++++++++++++++++++++ lib/Target/R600/AMDGPUISelLowering.h | 4 +++ lib/Target/R600/R600ISelLowering.cpp | 8 ++++- 3 files changed, 72 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 7ceab2d..78495ca 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -67,6 +67,13 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::STORE, MVT::f64, Promote); AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64); + setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom); + setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom); + setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom); + // XXX: This can be change to Custom, once ExpandVectorStores can + // handle 64-bit stores. + setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand); + setOperationAction(ISD::LOAD, MVT::f32, Promote); AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32); @@ -187,6 +194,7 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::STORE: return LowerVectorStore(Op, DAG); case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); } return Op; @@ -487,6 +495,59 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, return DAG.getMergeValues(Ops, 2, DL); } +SDValue AMDGPUTargetLowering::LowerVectorStore(const SDValue &Op, + SelectionDAG &DAG) const { + StoreSDNode *Store = dyn_cast(Op); + EVT MemVT = Store->getMemoryVT(); + unsigned MemBits = MemVT.getSizeInBits(); + + // Byte stores are really expensive, so if possible, try to pack + // 32-bit vector truncatating store into an i32 store. + // XXX: We could also handle optimize other vector bitwidths + if (!MemVT.isVector() || MemBits > 32) { + return SDValue(); + } + + SDLoc DL(Op); + const SDValue &Value = Store->getValue(); + EVT VT = Value.getValueType(); + const SDValue &Ptr = Store->getBasePtr(); + EVT MemEltVT = MemVT.getVectorElementType(); + unsigned MemEltBits = MemEltVT.getSizeInBits(); + unsigned MemNumElements = MemVT.getVectorNumElements(); + EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()); + SDValue Mask; + switch(MemEltBits) { + case 8: + Mask = DAG.getConstant(0xFF, PackedVT); + break; + case 16: + Mask = DAG.getConstant(0xFFFF, PackedVT); + break; + default: + llvm_unreachable("Cannot lower this vector store"); + } + SDValue PackedValue; + for (unsigned i = 0; i < MemNumElements; ++i) { + EVT ElemVT = VT.getVectorElementType(); + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value, + DAG.getConstant(i, MVT::i32)); + Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT); + Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask); + SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT); + Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift); + if (i == 0) { + PackedValue = Elt; + } else { + PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt); + } + } + return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr, + MachinePointerInfo(Store->getMemOperand()->getValue()), + Store->isVolatile(), Store->isNonTemporal(), + Store->getAlignment()); +} + //===----------------------------------------------------------------------===// // Helper functions //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index 8788c20..e3a0dcc 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -51,6 +51,10 @@ protected: void AnalyzeFormalArguments(CCState &State, const SmallVectorImpl &Ins) const; + /// \brief Lower vector stores by merging the vector elements into an integer + /// of the same bitwidth. + SDValue LowerVectorStore(const SDValue &Op, SelectionDAG &DAG) const; + public: AMDGPUTargetLowering(TargetMachine &TM); diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index b6b6560..e10af2b 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -1011,10 +1011,15 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { SDValue Value = Op.getOperand(1); SDValue Ptr = Op.getOperand(2); + SDValue Result = AMDGPUTargetLowering::LowerVectorStore(Op, DAG); + if (Result.getNode()) { + return Result; + } + if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) { if (StoreNode->isTruncatingStore()) { EVT VT = Value.getValueType(); - assert(VT == MVT::i32); + assert(VT.bitsLE(MVT::i32)); EVT MemVT = StoreNode->getMemoryVT(); SDValue MaskConstant; if (MemVT == MVT::i8) { @@ -1571,6 +1576,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, } } } + case AMDGPUISD::EXPORT: { SDValue Arg = N->getOperand(1); if (Arg.getOpcode() != ISD::BUILD_VECTOR) -- cgit v1.1 From 30d84d8dfa0433088d541c66b92af0da3855bc9c Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 16 Aug 2013 01:12:16 +0000 Subject: R600: Add support for global vector loads with element types less than 32-bits Tested-by: Aaron Watry git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188521 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 78495ca..746c479 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -91,6 +91,19 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::v2i8, Expand); + setLoadExtAction(ISD::SEXTLOAD, MVT::v2i8, Expand); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i8, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Expand); + setLoadExtAction(ISD::SEXTLOAD, MVT::v4i8, Expand); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, Expand); + setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, Expand); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, Expand); + setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Expand); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, Expand); + setOperationAction(ISD::FNEG, MVT::v2f32, Expand); setOperationAction(ISD::FNEG, MVT::v4f32, Expand); -- cgit v1.1 From a6a39ced095c2f453624ce62c4aead25db41a18f Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 16 Aug 2013 01:12:20 +0000 Subject: R600/SI: Fix incorrect encoding of DS_WRITE_B32 instructions The SIInsertWaits pass was overwriting the first operand (gds bit) of DS_WRITE_B32 with the second operand (value to write). This meant that any time the value to write was stored in an odd number VGPR, the gds bit would be set causing the instruction to write to GDS instead of LDS. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188522 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInsertWaits.cpp | 4 +--- lib/Target/R600/SIInstrInfo.td | 8 ++++---- lib/Target/R600/SIInstructions.td | 6 +++--- 3 files changed, 8 insertions(+), 10 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp index ba202e3..c477be5 100644 --- a/lib/Target/R600/SIInsertWaits.cpp +++ b/lib/Target/R600/SIInsertWaits.cpp @@ -134,9 +134,7 @@ Counters SIInsertWaits::getHwCounts(MachineInstr &MI) { // LGKM may uses larger values if (TSFlags & SIInstrFlags::LGKM_CNT) { - MachineOperand &Op = MI.getOperand(0); - if (!Op.isReg()) - Op = MI.getOperand(1); + const MachineOperand &Op = MI.getOperand(0); assert(Op.isReg() && "First LGKM operand must be a register!"); unsigned Reg = Op.getReg(); diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index ecc4718..1965ba0 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -342,8 +342,8 @@ class VOP3_64 op, string opName, list pattern> : VOP3 < class DS_Load_Helper op, string asm, RegisterClass regClass> : DS < op, (outs regClass:$vdst), - (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, VReg_32:$data1, - i8imm:$offset0, i8imm:$offset1), + (ins VReg_32:$addr, VReg_32:$data0, VReg_32:$data1, + i8imm:$offset0, i8imm:$offset1, i1imm:$gds), asm#" $vdst, $gds, $addr, $data0, $data1, $offset0, $offset1, [M0]", []> { let mayLoad = 1; @@ -353,8 +353,8 @@ class DS_Load_Helper op, string asm, RegisterClass regClass> : DS < class DS_Store_Helper op, string asm, RegisterClass regClass> : DS < op, (outs), - (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, VReg_32:$data1, - i8imm:$offset0, i8imm:$offset1), + (ins VReg_32:$addr, VReg_32:$data0, VReg_32:$data1, + i8imm:$offset0, i8imm:$offset1, i1imm:$gds), asm#" $gds, $addr, $data0, $data1, $offset0, $offset1, [M0]", []> { let mayStore = 1; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 4eb3566..9856fa6 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1745,13 +1745,13 @@ def : Pat < def : Pat < (local_load i64:$src0), - (i32 (DS_READ_B32 0, (EXTRACT_SUBREG $src0, sub0), - (EXTRACT_SUBREG $src0, sub0), (EXTRACT_SUBREG $src0, sub0), 0, 0)) + (i32 (DS_READ_B32 (EXTRACT_SUBREG $src0, sub0), + (EXTRACT_SUBREG $src0, sub0), (EXTRACT_SUBREG $src0, sub0), 0, 0, 0)) >; def : Pat < (local_store i32:$src1, i64:$src0), - (DS_WRITE_B32 0, (EXTRACT_SUBREG $src0, sub0), $src1, $src1, 0, 0) + (DS_WRITE_B32 (EXTRACT_SUBREG $src0, sub0), $src1, $src1, 0, 0, 0) >; /********** ================== **********/ -- cgit v1.1 From b07ec9606813cf16b137f0bb9ce09ca7ce57f026 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 16 Aug 2013 01:18:43 +0000 Subject: Revert "R600/SI: Fix incorrect encoding of DS_WRITE_B32 instructions" This reverts commit a6a39ced095c2f453624ce62c4aead25db41a18f. This is the wrong version of this fix. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188523 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInsertWaits.cpp | 4 +++- lib/Target/R600/SIInstrInfo.td | 8 ++++---- lib/Target/R600/SIInstructions.td | 6 +++--- 3 files changed, 10 insertions(+), 8 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp index c477be5..ba202e3 100644 --- a/lib/Target/R600/SIInsertWaits.cpp +++ b/lib/Target/R600/SIInsertWaits.cpp @@ -134,7 +134,9 @@ Counters SIInsertWaits::getHwCounts(MachineInstr &MI) { // LGKM may uses larger values if (TSFlags & SIInstrFlags::LGKM_CNT) { - const MachineOperand &Op = MI.getOperand(0); + MachineOperand &Op = MI.getOperand(0); + if (!Op.isReg()) + Op = MI.getOperand(1); assert(Op.isReg() && "First LGKM operand must be a register!"); unsigned Reg = Op.getReg(); diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 1965ba0..ecc4718 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -342,8 +342,8 @@ class VOP3_64 op, string opName, list pattern> : VOP3 < class DS_Load_Helper op, string asm, RegisterClass regClass> : DS < op, (outs regClass:$vdst), - (ins VReg_32:$addr, VReg_32:$data0, VReg_32:$data1, - i8imm:$offset0, i8imm:$offset1, i1imm:$gds), + (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, VReg_32:$data1, + i8imm:$offset0, i8imm:$offset1), asm#" $vdst, $gds, $addr, $data0, $data1, $offset0, $offset1, [M0]", []> { let mayLoad = 1; @@ -353,8 +353,8 @@ class DS_Load_Helper op, string asm, RegisterClass regClass> : DS < class DS_Store_Helper op, string asm, RegisterClass regClass> : DS < op, (outs), - (ins VReg_32:$addr, VReg_32:$data0, VReg_32:$data1, - i8imm:$offset0, i8imm:$offset1, i1imm:$gds), + (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, VReg_32:$data1, + i8imm:$offset0, i8imm:$offset1), asm#" $gds, $addr, $data0, $data1, $offset0, $offset1, [M0]", []> { let mayStore = 1; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 9856fa6..4eb3566 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1745,13 +1745,13 @@ def : Pat < def : Pat < (local_load i64:$src0), - (i32 (DS_READ_B32 (EXTRACT_SUBREG $src0, sub0), - (EXTRACT_SUBREG $src0, sub0), (EXTRACT_SUBREG $src0, sub0), 0, 0, 0)) + (i32 (DS_READ_B32 0, (EXTRACT_SUBREG $src0, sub0), + (EXTRACT_SUBREG $src0, sub0), (EXTRACT_SUBREG $src0, sub0), 0, 0)) >; def : Pat < (local_store i32:$src1, i64:$src0), - (DS_WRITE_B32 (EXTRACT_SUBREG $src0, sub0), $src1, $src1, 0, 0, 0) + (DS_WRITE_B32 0, (EXTRACT_SUBREG $src0, sub0), $src1, $src1, 0, 0) >; /********** ================== **********/ -- cgit v1.1 From 0163356ad1944dda162956993a95e547dc03251b Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 16 Aug 2013 06:07:34 +0000 Subject: Don't use v16i32 for load pattern matching. All 512-bit loads are cated to v8i64. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188534 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 14 +++++++------- lib/Target/X86/X86InstrFragmentsSIMD.td | 10 +++++----- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index f4528a9..d100e88 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -555,7 +555,7 @@ let Constraints = "$src1 = $dst" in { (bitconvert (mem_frag addr:$src3)))))]>, EVEX_4V; } } -defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32, i512mem, +defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv8i64, i512mem, v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64, i512mem, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; @@ -1107,7 +1107,7 @@ def VMOVDQA64rm : AVX512BI<0x6F, MRMSrcMem, (outs VR512:$dst), } multiclass avx512_mov_int opc, string asm, RegisterClass RC, - RegisterClass KRC, + RegisterClass KRC, PatFrag bc_frag, PatFrag ld_frag, X86MemOperand x86memop> { let neverHasSideEffects = 1 in def rr : AVX512XSI, + [(set RC:$dst, (bc_frag (ld_frag addr:$src)))]>, EVEX; let Constraints = "$src1 = $dst" in { def rrk : AVX512XSI, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VMOVDQU64 : avx512_mov_int<0x6F, "vmovdqu64", VR512, VK8WM, memopv8i64, i512mem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VMOVDQU32 : avx512_mov_int<0x6F, "vmovdqu32", VR512, VK16WM, bc_v16i32, + memopv8i64, i512mem>, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VMOVDQU64 : avx512_mov_int<0x6F, "vmovdqu64", VR512, VK8WM, bc_v8i64, + memopv8i64, i512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; let AddedComplexity = 20 in { def : Pat<(v16f32 (vselect VK16WM:$mask, (v16f32 VR512:$src1), diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 3d6370f..fe35393 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -277,7 +277,6 @@ def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>; // 512-bit load pattern fragments def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (load node:$ptr))>; def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (load node:$ptr))>; -def loadv16i32 : PatFrag<(ops node:$ptr), (v16i32 (load node:$ptr))>; def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (load node:$ptr))>; // 128-/256-/512-bit extload pattern fragments @@ -351,8 +350,6 @@ def alignedloadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (alignedload512 node:$ptr))>; def alignedloadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (alignedload512 node:$ptr))>; -def alignedloadv16i32 : PatFrag<(ops node:$ptr), - (v16i32 (alignedload512 node:$ptr))>; def alignedloadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (alignedload512 node:$ptr))>; @@ -379,14 +376,12 @@ def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>; // 256-bit memop pattern fragments // NOTE: all 256-bit integer vector loads are promoted to v4i64 def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>; -def memopv8i32 : PatFrag<(ops node:$ptr), (v8i32 (memop node:$ptr))>; def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>; def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>; // 512-bit memop pattern fragments def memopv16f32 : PatFrag<(ops node:$ptr), (v16f32 (memop node:$ptr))>; def memopv8f64 : PatFrag<(ops node:$ptr), (v8f64 (memop node:$ptr))>; -def memopv16i32 : PatFrag<(ops node:$ptr), (v16i32 (memop node:$ptr))>; def memopv8i64 : PatFrag<(ops node:$ptr), (v8i64 (memop node:$ptr))>; // SSSE3 uses MMX registers for some instructions. They aren't aligned on a @@ -438,6 +433,11 @@ def bc_v16i16 : PatFrag<(ops node:$in), (v16i16 (bitconvert node:$in))>; def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>; def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>; +// 512-bit bitconvert pattern fragments +def bc_v16i32 : PatFrag<(ops node:$in), (v16i32 (bitconvert node:$in))>; +def bc_v8i64 : PatFrag<(ops node:$in), (v8i64 (bitconvert node:$in))>; + + def vzmovl_v2i64 : PatFrag<(ops node:$src), (bitconvert (v2i64 (X86vzmovl (v2i64 (scalar_to_vector (loadi64 node:$src))))))>; -- cgit v1.1 From 6f297afb7ea6ab53be1feae4a335e7b1cb7a1f02 Mon Sep 17 00:00:00 2001 From: Vladimir Medic Date: Fri, 16 Aug 2013 10:17:03 +0000 Subject: This patch implements wait instruction for mips. Examples are added in test files. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188537 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsInstrInfo.td | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 1753207..3d7ba00 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -673,6 +673,15 @@ class DEI_FT : InstSE<(outs RO:$rt), (ins), !strconcat(opstr, "\t$rt"), [], NoItinerary, FrmOther>; +// Wait +class WAIT_FT : + InstSE<(outs), (ins), opstr, [], NoItinerary, FrmOther> { + let Inst{31-26} = 0x10; + let Inst{25} = 1; + let Inst{24-6} = 0; + let Inst{5-0} = 0x20; +} + // Sync let hasSideEffects = 1 in class SYNC_FT : @@ -990,6 +999,8 @@ def DERET : ER_FT<"deret">, ER_FM<0x1f>; def EI : DEI_FT<"ei", GPR32Opnd>, EI_FM<1>; def DI : DEI_FT<"di", GPR32Opnd>, EI_FM<0>; +def WAIT : WAIT_FT<"wait">; + /// Load-linked, Store-conditional let Predicates = [NotN64, HasStdEnc] in { def LL : LLBase<"ll", GPR32Opnd, mem>, LW_FM<0x30>; -- cgit v1.1 From 6c51f89498dd813c8dd16e46069decf2897b31b2 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 16 Aug 2013 10:22:54 +0000 Subject: [SystemZ] Fix sign of integer memcmp result r188163 used CLC to implement memcmp. Code that compares the result directly against zero can test the CC value produced by CLC, but code that needs an integer result must use IPM. The sequence I'd used was: ipm sll , 2 sra , 30 but I'd forgotten that this inverts the order, so that CC==1 ("less") becomes an integer greater than zero, and CC==2 ("greater") becomes an integer less than zero. This sequence should only be used if the CLC arguments are reversed to compensate. The problem then is that the branch condition must also be reversed when testing the CLC result directly. Rather than do that, I went for a different sequence that works with the natural CLC order: ipm srl , 28 rll , , 31 One advantage of this is that it doesn't clobber CC. A disadvantage is that any sign extension to 64 bits must be done separately, rather than being folded into the shifts. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188538 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZInstrInfo.cpp | 55 ++++++++++++-------------- lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp | 15 +++---- 2 files changed, 34 insertions(+), 36 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 54a8669..f2e877f 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -311,18 +311,11 @@ bool SystemZInstrInfo::analyzeCompare(const MachineInstr *MI, return false; } -// If Reg is a virtual register that is used by only a single non-debug -// instruction, return the defining instruction, otherwise return null. -static MachineInstr *getDefSingleUse(const MachineRegisterInfo *MRI, - unsigned Reg) { +// If Reg is a virtual register, return its definition, otherwise return null. +static MachineInstr *getDef(unsigned Reg, + const MachineRegisterInfo *MRI) { if (TargetRegisterInfo::isPhysicalRegister(Reg)) return 0; - - MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg); - MachineRegisterInfo::use_nodbg_iterator E = MRI->use_nodbg_end(); - if (I == E || llvm::next(I) != E) - return 0; - return MRI->getUniqueVRegDef(Reg); } @@ -333,42 +326,46 @@ static bool isShift(MachineInstr *MI, int Opcode, int64_t Imm) { MI->getOperand(3).getImm() == Imm); } +// If the destination of MI has no uses, delete it as dead. +static void eraseIfDead(MachineInstr *MI, const MachineRegisterInfo *MRI) { + if (MRI->use_nodbg_empty(MI->getOperand(0).getReg())) + MI->eraseFromParent(); +} + // Compare compares SrcReg against zero. Check whether SrcReg contains -// the result of an IPM sequence that is only used by Compare. Try to -// delete both of them if so and return true if a change was made. -static bool removeIPM(MachineInstr *Compare, unsigned SrcReg, - const MachineRegisterInfo *MRI, - const TargetRegisterInfo *TRI) { - MachineInstr *SRA = getDefSingleUse(MRI, SrcReg); - if (!SRA || !isShift(SRA, SystemZ::SRA, 30)) +// the result of an IPM sequence whose input CC survives until Compare, +// and whether Compare is therefore redundant. Delete it and return +// true if so. +static bool removeIPMBasedCompare(MachineInstr *Compare, unsigned SrcReg, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI) { + MachineInstr *RLL = getDef(SrcReg, MRI); + if (!RLL || !isShift(RLL, SystemZ::RLL, 31)) return false; - MachineInstr *SLL = getDefSingleUse(MRI, SRA->getOperand(1).getReg()); - if (!SLL || !isShift(SLL, SystemZ::SLL, 2)) + MachineInstr *SRL = getDef(RLL->getOperand(1).getReg(), MRI); + if (!SRL || !isShift(SRL, SystemZ::SRL, 28)) return false; - MachineInstr *IPM = getDefSingleUse(MRI, SLL->getOperand(1).getReg()); + MachineInstr *IPM = getDef(SRL->getOperand(1).getReg(), MRI); if (!IPM || IPM->getOpcode() != SystemZ::IPM) return false; // Check that there are no assignments to CC between the IPM and Compare, - // except for the SRA that we'd like to delete. We can ignore SLL because - // it does not assign to CC. We can also ignore uses of the SRA CC result, - // since it is effectively restoring CC to the value it had before IPM - // (for all current use cases). if (IPM->getParent() != Compare->getParent()) return false; MachineBasicBlock::iterator MBBI = IPM, MBBE = Compare; for (++MBBI; MBBI != MBBE; ++MBBI) { MachineInstr *MI = MBBI; - if (MI != SRA && MI->modifiesRegister(SystemZ::CC, TRI)) + if (MI->modifiesRegister(SystemZ::CC, TRI)) return false; } - IPM->eraseFromParent(); - SLL->eraseFromParent(); - SRA->eraseFromParent(); Compare->eraseFromParent(); + eraseIfDead(RLL, MRI); + eraseIfDead(SRL, MRI); + eraseIfDead(IPM, MRI); + return true; } @@ -381,7 +378,7 @@ SystemZInstrInfo::optimizeCompareInstr(MachineInstr *Compare, bool IsLogical = (Compare->getDesc().TSFlags & SystemZII::IsLogical) != 0; if (Value == 0 && !IsLogical && - removeIPM(Compare, SrcReg, MRI, TM.getRegisterInfo())) + removeIPMBasedCompare(Compare, SrcReg, MRI, TM.getRegisterInfo())) return true; return false; } diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index 341dc94..825153c 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -140,14 +140,15 @@ EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, Src1, Src2, Size); SDValue Glue = Chain.getValue(1); // IPM inserts the CC value into bits 29 and 28, with 0 meaning "equal", - // 1 meaning "greater" and 2 meaning "less". Convert them into an - // integer that is respectively equal, greater or less than 0. + // 1 meaning "less" and 2 meaning "greater". Bits 30 and 31 are zero. + // Convert this into an integer that is respectively equal, less + // or greater than 0. SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); - SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, IPM, - DAG.getConstant(2, MVT::i32)); - SDValue SRA = DAG.getNode(ISD::SRA, DL, MVT::i32, SHL, - DAG.getConstant(30, MVT::i32)); - return std::make_pair(SRA, Chain); + SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, + DAG.getConstant(28, MVT::i32)); + SDValue ROTL = DAG.getNode(ISD::ROTL, DL, MVT::i32, SRL, + DAG.getConstant(31, MVT::i32)); + return std::make_pair(ROTL, Chain); } } return std::make_pair(SDValue(), SDValue()); -- cgit v1.1 From 6a079fef4fad3e6c2e07c9e1d0776e20a0b05b1e Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 16 Aug 2013 10:55:47 +0000 Subject: [SystemZ] Fix handling of 64-bit memcmp results Generalize r188163 to cope with return types other than MVT::i32, just as the existing visitMemCmpCall code did. I've split this out into a subroutine so that it can be used for other upcoming patches. I also noticed that I'd used the wrong API to record the out chain. It's a load that uses DAG.getRoot() rather than getRoot(), so the out chain should go on PendingLoads. I don't have a testcase for that because we don't do any interesting scheduling on z yet. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188540 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZInstrInfo.cpp | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index f2e877f..a84ce28 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -339,7 +339,12 @@ static void eraseIfDead(MachineInstr *MI, const MachineRegisterInfo *MRI) { static bool removeIPMBasedCompare(MachineInstr *Compare, unsigned SrcReg, const MachineRegisterInfo *MRI, const TargetRegisterInfo *TRI) { + MachineInstr *LGFR = 0; MachineInstr *RLL = getDef(SrcReg, MRI); + if (RLL && RLL->getOpcode() == SystemZ::LGFR) { + LGFR = RLL; + RLL = getDef(LGFR->getOperand(1).getReg(), MRI); + } if (!RLL || !isShift(RLL, SystemZ::RLL, 31)) return false; @@ -362,6 +367,8 @@ static bool removeIPMBasedCompare(MachineInstr *Compare, unsigned SrcReg, } Compare->eraseFromParent(); + if (LGFR) + eraseIfDead(LGFR, MRI); eraseIfDead(RLL, MRI); eraseIfDead(SRL, MRI); eraseIfDead(IPM, MRI); -- cgit v1.1 From e1b2af731e2a45344a7c502232f66c55cd746da0 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 16 Aug 2013 11:21:54 +0000 Subject: [SystemZ] Use CLST to implement strcmp git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188544 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelLowering.cpp | 63 ++++++++++++++++++++++++++ lib/Target/SystemZ/SystemZISelLowering.h | 7 +++ lib/Target/SystemZ/SystemZInstrFormats.td | 20 ++++++++ lib/Target/SystemZ/SystemZInstrInfo.td | 4 ++ lib/Target/SystemZ/SystemZOperators.td | 7 +++ lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp | 37 +++++++++++---- lib/Target/SystemZ/SystemZSelectionDAGInfo.h | 6 +++ 7 files changed, 134 insertions(+), 10 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 899b08c..1fe54f1 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1702,6 +1702,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(UDIVREM64); OPCODE(MVC); OPCODE(CLC); + OPCODE(STRCMP); OPCODE(IPM); OPCODE(ATOMIC_SWAPW); OPCODE(ATOMIC_LOADW_ADD); @@ -2261,6 +2262,66 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI, return MBB; } +// Decompose string pseudo-instruction MI into a loop that continually performs +// Opcode until CC != 3. +MachineBasicBlock * +SystemZTargetLowering::emitStringWrapper(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned Opcode) const { + const SystemZInstrInfo *TII = TM.getInstrInfo(); + MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + DebugLoc DL = MI->getDebugLoc(); + + uint64_t End1Reg = MI->getOperand(0).getReg(); + uint64_t Start1Reg = MI->getOperand(1).getReg(); + uint64_t Start2Reg = MI->getOperand(2).getReg(); + uint64_t CharReg = MI->getOperand(3).getReg(); + + const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass; + uint64_t This1Reg = MRI.createVirtualRegister(RC); + uint64_t This2Reg = MRI.createVirtualRegister(RC); + uint64_t End2Reg = MRI.createVirtualRegister(RC); + + MachineBasicBlock *StartMBB = MBB; + MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB); + MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); + + // StartMBB: + // R0W = %CharReg + // # fall through to LoopMMB + BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0W).addReg(CharReg); + MBB->addSuccessor(LoopMBB); + + // LoopMBB: + // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ] + // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ] + // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0W + // JO LoopMBB + // # fall through to DoneMMB + MBB = LoopMBB; + MBB->addLiveIn(SystemZ::R0W); + + BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg) + .addReg(Start1Reg).addMBB(StartMBB) + .addReg(End1Reg).addMBB(LoopMBB); + BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg) + .addReg(Start2Reg).addMBB(StartMBB) + .addReg(End2Reg).addMBB(LoopMBB); + BuildMI(MBB, DL, TII->get(Opcode)) + .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define) + .addReg(This1Reg).addReg(This2Reg); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ANY).addImm(SystemZ::CCMASK_3).addMBB(LoopMBB); + MBB->addSuccessor(LoopMBB); + MBB->addSuccessor(DoneMBB); + + DoneMBB->addLiveIn(SystemZ::CC); + + MI->eraseFromParent(); + return DoneMBB; +} + MachineBasicBlock *SystemZTargetLowering:: EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { switch (MI->getOpcode()) { @@ -2488,6 +2549,8 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { return emitMemMemWrapper(MI, MBB, SystemZ::MVC); case SystemZ::CLCWrapper: return emitMemMemWrapper(MI, MBB, SystemZ::CLC); + case SystemZ::CLSTLoop: + return emitStringWrapper(MI, MBB, SystemZ::CLST); default: llvm_unreachable("Unexpected instr type to insert"); } diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 0036ce8..4ee87d3 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -84,6 +84,10 @@ namespace SystemZISD { // as for MVC. CLC, + // Use a CLST-based sequence to implement strcmp(). The two input operands + // are the addresses of the strings to compare. + STRCMP, + // Store the CC value in bits 29 and 28 of an integer. IPM, @@ -240,6 +244,9 @@ private: MachineBasicBlock *emitMemMemWrapper(MachineInstr *MI, MachineBasicBlock *BB, unsigned Opcode) const; + MachineBasicBlock *emitStringWrapper(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Opcode) const; }; } // end namespace llvm diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index 2d18e03..1f80c27 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -1405,3 +1405,23 @@ multiclass MemorySS opcode, [(operator bdaddr12only:$dest, bdaddr12only:$src, imm32len8:$length)]>; } + +// Define an instruction that operates on two strings, both terminated +// by the character in R0. The instruction processes a CPU-determinated +// number of bytes at a time and sets CC to 3 if the instruction needs +// to be repeated. Also define a pseudo instruction that represents +// the full loop (the main instruction plus the branch on CC==3). +multiclass StringRRE opcode, + SDPatternOperator operator> { + def "" : InstRRE { + let Constraints = "$R1 = $R1src, $R2 = $R2src"; + let DisableEncoding = "$R1src, $R2src"; + } + let usesCustomInserter = 1 in + def Loop : Pseudo<(outs GR64:$end), + (ins GR64:$start1, GR64:$start2, GR32:$char), + [(set GR64:$end, (operator GR64:$start1, GR64:$start2, + GR32:$char))]>; +} diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 834ffed..31832f7 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -996,6 +996,10 @@ defm : ZXB; let mayLoad = 1, Defs = [CC] in defm CLC : MemorySS<"clc", 0xD5, z_clc>; +// String comparison. +let mayLoad = 1, Defs = [CC], Uses = [R0W] in + defm CLST : StringRRE<"clst", 0xB25D, z_strcmp>; + //===----------------------------------------------------------------------===// // Atomic operations //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index 8a5b909..816fb5d 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -58,6 +58,11 @@ def SDT_ZMemMemLength : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisPtrTy<1>, SDTCisVT<2, i32>]>; +def SDT_ZString : SDTypeProfile<1, 3, + [SDTCisPtrTy<0>, + SDTCisPtrTy<1>, + SDTCisPtrTy<2>, + SDTCisVT<3, i32>]>; def SDT_ZI32Intrinsic : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>; //===----------------------------------------------------------------------===// @@ -114,6 +119,8 @@ def z_mvc : SDNode<"SystemZISD::MVC", SDT_ZMemMemLength, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; def z_clc : SDNode<"SystemZISD::CLC", SDT_ZMemMemLength, [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>; +def z_strcmp : SDNode<"SystemZISD::STRCMP", SDT_ZString, + [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>; def z_ipm : SDNode<"SystemZISD::IPM", SDT_ZI32Intrinsic, [SDNPInGlue]>; diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index 825153c..789594b 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -126,6 +126,19 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, return SDValue(); } +// Convert the current CC value into an integer that is 0 if CC == 0, +// less than zero if CC == 1 and greater than zero if CC >= 2. +// The sequence starts with IPM, which puts CC into bits 29 and 28 +// of an integer and clears bits 30 and 31. +static SDValue addIPMSequence(SDLoc DL, SDValue Glue, SelectionDAG &DAG) { + SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); + SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, + DAG.getConstant(28, MVT::i32)); + SDValue ROTL = DAG.getNode(ISD::ROTL, DL, MVT::i32, SRL, + DAG.getConstant(31, MVT::i32)); + return ROTL; +} + std::pair SystemZSelectionDAGInfo:: EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, SDValue Src1, SDValue Src2, SDValue Size, @@ -139,17 +152,21 @@ EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, Chain = DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, Src1, Src2, Size); SDValue Glue = Chain.getValue(1); - // IPM inserts the CC value into bits 29 and 28, with 0 meaning "equal", - // 1 meaning "less" and 2 meaning "greater". Bits 30 and 31 are zero. - // Convert this into an integer that is respectively equal, less - // or greater than 0. - SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); - SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, - DAG.getConstant(28, MVT::i32)); - SDValue ROTL = DAG.getNode(ISD::ROTL, DL, MVT::i32, SRL, - DAG.getConstant(31, MVT::i32)); - return std::make_pair(ROTL, Chain); + return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); } } return std::make_pair(SDValue(), SDValue()); } + +std::pair SystemZSelectionDAGInfo:: +EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src1, SDValue Src2, + MachinePointerInfo Op1PtrInfo, + MachinePointerInfo Op2PtrInfo) const { + SDVTList VTs = DAG.getVTList(Src1.getValueType(), MVT::Other, MVT::Glue); + SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src1, Src2, + DAG.getConstant(0, MVT::i32)); + Chain = Unused.getValue(1); + SDValue Glue = Chain.getValue(2); + return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); +} diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h index c757e16..c874a4d 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -45,6 +45,12 @@ public: SDValue Src1, SDValue Src2, SDValue Size, MachinePointerInfo Op1PtrInfo, MachinePointerInfo Op2PtrInfo) const LLVM_OVERRIDE; + + virtual std::pair + EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src1, SDValue Src2, + MachinePointerInfo Op1PtrInfo, + MachinePointerInfo Op2PtrInfo) const LLVM_OVERRIDE; }; } -- cgit v1.1 From 4fc7355a21e1fa838406e15459aaf54a58fcf909 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 16 Aug 2013 11:29:37 +0000 Subject: [SystemZ] Use MVST to implement strcpy and stpcpy git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188546 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelLowering.cpp | 3 +++ lib/Target/SystemZ/SystemZISelLowering.h | 3 +++ lib/Target/SystemZ/SystemZInstrInfo.td | 4 ++++ lib/Target/SystemZ/SystemZOperators.td | 2 ++ lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp | 11 +++++++++++ lib/Target/SystemZ/SystemZSelectionDAGInfo.h | 7 +++++++ 6 files changed, 30 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 1fe54f1..b22cc40 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1703,6 +1703,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(MVC); OPCODE(CLC); OPCODE(STRCMP); + OPCODE(STPCPY); OPCODE(IPM); OPCODE(ATOMIC_SWAPW); OPCODE(ATOMIC_LOADW_ADD); @@ -2551,6 +2552,8 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { return emitMemMemWrapper(MI, MBB, SystemZ::CLC); case SystemZ::CLSTLoop: return emitStringWrapper(MI, MBB, SystemZ::CLST); + case SystemZ::MVSTLoop: + return emitStringWrapper(MI, MBB, SystemZ::MVST); default: llvm_unreachable("Unexpected instr type to insert"); } diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 4ee87d3..b27f167 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -84,6 +84,9 @@ namespace SystemZISD { // as for MVC. CLC, + // Use an MVST-based sequence to implement stpcpy(). + STPCPY, + // Use a CLST-based sequence to implement strcmp(). The two input operands // are the addresses of the strings to compare. STRCMP, diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 31832f7..43537aa 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -336,6 +336,10 @@ def MVGHI : StoreSIL<"mvghi", 0xE548, store, imm64sx16>; let mayLoad = 1, mayStore = 1 in defm MVC : MemorySS<"mvc", 0xD2, z_mvc>; +// String moves. +let mayLoad = 1, mayStore = 1, Defs = [CC], Uses = [R0W] in + defm MVST : StringRRE<"mvst", 0xB255, z_stpcpy>; + defm LoadStore8_32 : MVCLoadStore; defm LoadStore16_32 : MVCLoadStore; def z_strcmp : SDNode<"SystemZISD::STRCMP", SDT_ZString, [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>; +def z_stpcpy : SDNode<"SystemZISD::STPCPY", SDT_ZString, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; def z_ipm : SDNode<"SystemZISD::IPM", SDT_ZI32Intrinsic, [SDNPInGlue]>; diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index 789594b..0a2080d 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -159,6 +159,17 @@ EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, } std::pair SystemZSelectionDAGInfo:: +EmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Dest, SDValue Src, + MachinePointerInfo DestPtrInfo, + MachinePointerInfo SrcPtrInfo, bool isStpcpy) const { + SDVTList VTs = DAG.getVTList(Dest.getValueType(), MVT::Other); + SDValue EndDest = DAG.getNode(SystemZISD::STPCPY, DL, VTs, Chain, Dest, Src, + DAG.getConstant(0, MVT::i32)); + return std::make_pair(isStpcpy ? EndDest : Dest, EndDest.getValue(1)); +} + +std::pair SystemZSelectionDAGInfo:: EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, SDValue Src1, SDValue Src2, MachinePointerInfo Op1PtrInfo, diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h index c874a4d..123cb63 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -47,6 +47,13 @@ public: MachinePointerInfo Op2PtrInfo) const LLVM_OVERRIDE; virtual std::pair + EmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Dest, SDValue Src, + MachinePointerInfo DestPtrInfo, + MachinePointerInfo SrcPtrInfo, + bool isStpcpy) const LLVM_OVERRIDE; + + virtual std::pair EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, SDValue Src1, SDValue Src2, MachinePointerInfo Op1PtrInfo, -- cgit v1.1 From 19262ee0725a09b7c621a3d2eb66ba1513ae932a Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 16 Aug 2013 11:41:43 +0000 Subject: [SystemZ] Use SRST to implement strlen and strnlen It would also make sense to use it for memchr; I'm working on that now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188547 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelLowering.cpp | 3 +++ lib/Target/SystemZ/SystemZISelLowering.h | 6 +++++ lib/Target/SystemZ/SystemZInstrInfo.td | 4 +++ lib/Target/SystemZ/SystemZOperators.td | 2 ++ lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp | 34 ++++++++++++++++++++++++++ lib/Target/SystemZ/SystemZSelectionDAGInfo.h | 10 ++++++++ 6 files changed, 59 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index b22cc40..788fc2e 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1704,6 +1704,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(CLC); OPCODE(STRCMP); OPCODE(STPCPY); + OPCODE(SEARCH_STRING); OPCODE(IPM); OPCODE(ATOMIC_SWAPW); OPCODE(ATOMIC_LOADW_ADD); @@ -2554,6 +2555,8 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { return emitStringWrapper(MI, MBB, SystemZ::CLST); case SystemZ::MVSTLoop: return emitStringWrapper(MI, MBB, SystemZ::MVST); + case SystemZ::SRSTLoop: + return emitStringWrapper(MI, MBB, SystemZ::SRST); default: llvm_unreachable("Unexpected instr type to insert"); } diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index b27f167..21677a0 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -91,6 +91,12 @@ namespace SystemZISD { // are the addresses of the strings to compare. STRCMP, + // Use an SRST-based sequence to search a block of memory. The first + // operand is the end address, the second is the start, and the third + // is the character to search for. CC is set to 1 on success and 2 + // on failure. + SEARCH_STRING, + // Store the CC value in bits 29 and 28 of an integer. IPM, diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 43537aa..7789d61 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -1163,6 +1163,10 @@ let usesCustomInserter = 1 in { def ZEXT128_64 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>; } +// Search a block of memory for a character. +let mayLoad = 1, Defs = [CC], Uses = [R0W] in + defm SRST : StringRRE<"srst", 0xb25e, z_search_string>; + //===----------------------------------------------------------------------===// // Peepholes. //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index e0eeab1..1a3d45e 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -123,6 +123,8 @@ def z_strcmp : SDNode<"SystemZISD::STRCMP", SDT_ZString, [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>; def z_stpcpy : SDNode<"SystemZISD::STPCPY", SDT_ZString, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_search_string : SDNode<"SystemZISD::SEARCH_STRING", SDT_ZString, + [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>; def z_ipm : SDNode<"SystemZISD::IPM", SDT_ZI32Intrinsic, [SDNPInGlue]>; diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index 0a2080d..73bd480 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -181,3 +181,37 @@ EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, SDValue Glue = Chain.getValue(2); return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); } + +// Search from Src for a null character, stopping once Src reaches Limit. +// Return a pair of values, the first being the number of nonnull characters +// and the second being the out chain. +// +// This can be used for strlen by setting Limit to 0. +static std::pair getBoundedStrlen(SelectionDAG &DAG, SDLoc DL, + SDValue Chain, SDValue Src, + SDValue Limit) { + EVT PtrVT = Src.getValueType(); + SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue); + SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain, + Limit, Src, DAG.getConstant(0, MVT::i32)); + Chain = End.getValue(1); + SDValue Len = DAG.getNode(ISD::SUB, DL, PtrVT, End, Src); + return std::make_pair(Len, Chain); +} + +std::pair SystemZSelectionDAGInfo:: +EmitTargetCodeForStrlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src, MachinePointerInfo SrcPtrInfo) const { + EVT PtrVT = Src.getValueType(); + return getBoundedStrlen(DAG, DL, Chain, Src, DAG.getConstant(0, PtrVT)); +} + +std::pair SystemZSelectionDAGInfo:: +EmitTargetCodeForStrnlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src, SDValue MaxLength, + MachinePointerInfo SrcPtrInfo) const { + EVT PtrVT = Src.getValueType(); + MaxLength = DAG.getZExtOrTrunc(MaxLength, DL, PtrVT); + SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, MaxLength); + return getBoundedStrlen(DAG, DL, Chain, Src, Limit); +} diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h index 123cb63..26d6488 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -58,6 +58,16 @@ public: SDValue Src1, SDValue Src2, MachinePointerInfo Op1PtrInfo, MachinePointerInfo Op2PtrInfo) const LLVM_OVERRIDE; + + virtual std::pair + EmitTargetCodeForStrlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src, MachinePointerInfo SrcPtrInfo) const + LLVM_OVERRIDE; + + virtual std::pair + EmitTargetCodeForStrnlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src, SDValue MaxLength, + MachinePointerInfo SrcPtrInfo) const LLVM_OVERRIDE; }; } -- cgit v1.1 From 8b36f9e4314ac4d786d2d4fd5fa9e7858487ee9e Mon Sep 17 00:00:00 2001 From: Mihai Popa Date: Fri, 16 Aug 2013 11:55:44 +0000 Subject: Fix Thumb2 aliasing complementary instructions taking modified immediates There are many Thumb instructions which take 12-bit immediates encoded in a special 8-byte value + 4-byte rotator form. Not all numbers are represented, and it's legal to transform an assembly instruction to be able to encode the immediate. For example: AND and BIC are complementary instructions; one can switch the AND to a BIC as long as the immediate is complemented. The intent is to switch one instruction into its complementary one when the immediate cannot be encoded in the form requested in the original assembly and when the complementary immediate is encodable. The patch addresses two issues: 1. definition of t2SOImmNot immediate - it has to check that the orignal value is not encoded naturally 2. t2AND and t2BIC instruction aliases which should use the Thumb2 SOImm operand rather than the ARM one. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188548 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrThumb2.td | 8 ++++---- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 77a3759..376246b 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -4353,16 +4353,16 @@ def : t2InstAlias<"mvn${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>; // Same for AND <--> BIC def : t2InstAlias<"bic${s}${p} $Rd, $Rn, $imm", - (t2ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm, + (t2ANDri rGPR:$Rd, rGPR:$Rn, t2_so_imm_not:$imm, pred:$p, cc_out:$s)>; def : t2InstAlias<"bic${s}${p} $Rdn, $imm", - (t2ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm, + (t2ANDri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm, pred:$p, cc_out:$s)>; def : t2InstAlias<"and${s}${p} $Rd, $Rn, $imm", - (t2BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm, + (t2BICri rGPR:$Rd, rGPR:$Rn, t2_so_imm_not:$imm, pred:$p, cc_out:$s)>; def : t2InstAlias<"and${s}${p} $Rdn, $imm", - (t2BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm, + (t2BICri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm, pred:$p, cc_out:$s)>; // Likewise, "add Rd, t2_so_imm_neg" -> sub def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm", diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 3d7baf5..d48ece1 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -946,7 +946,8 @@ public: const MCConstantExpr *CE = dyn_cast(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); - return ARM_AM::getT2SOImmVal(~Value) != -1; + return ARM_AM::getT2SOImmVal(Value) == -1 && + ARM_AM::getT2SOImmVal(~Value) != -1; } bool isT2SOImmNeg() const { if (!isImm()) return false; -- cgit v1.1 From e97fc44045732de9fc4715241013f9238ec007dc Mon Sep 17 00:00:00 2001 From: Mihai Popa Date: Fri, 16 Aug 2013 12:03:00 +0000 Subject: Add support for Thumb2 literal loads with negative zero offset Thumb2 literal loads use an offset encoding which allows for negative zero. This fixes parsing and encoding so that #-0 is correctly processed. The parser represents #-0 as INT32_MIN. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188549 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 2 -- lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp | 6 ++++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index d48ece1..df9306a 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -1793,8 +1793,6 @@ public: void addMemPCRelImm12Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); int32_t Imm = Memory.OffsetImm->getValue(); - // FIXME: Handle #-0 - if (Imm == INT32_MIN) Imm = 0; Inst.addOperand(MCOperand::CreateImm(Imm)); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index c0c21d3..a247b02 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -778,8 +778,10 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx, } else { Reg = ARM::PC; int32_t Offset = MO.getImm(); - // FIXME: Handle #-0. - if (Offset < 0) { + if (Offset == INT32_MIN) { + Offset = 0; + isAdd = false; + } else if (Offset < 0) { Offset *= -1; isAdd = false; } -- cgit v1.1 From 671aea08f8dfb09a0191e7b9e266c2884efd3252 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 16 Aug 2013 12:52:08 +0000 Subject: When initializing the PIC global base register on ARM/ELF add pc to fix the address. This unbreaks PIC with fast isel on ELF targets (PR16717). The output matches what GCC and SDag do for PIC but may not cover all of the many flavors of PIC that exist. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188551 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrInfo.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 8cdb853..868dd47 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -130,6 +130,10 @@ namespace { MIB.addImm(0); AddDefaultPred(MIB); + // Fix the GOT address by adding pc. + BuildMI(FirstMBB, MBBI, DL, TII.get(ARM::tPICADD), GlobalBaseReg) + .addReg(GlobalBaseReg).addImm(ARMPCLabelIndex); + return true; } -- cgit v1.1 From d0cad1f3ed71bd98ef8017c31573abf22fac9588 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 16 Aug 2013 14:48:09 +0000 Subject: R600: Allocate memoperand in the MachienFunction so it doesn't leak. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188555 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIISelLowering.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 0bd8bce..dc721e9 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -423,9 +423,10 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { Op.getOperand(2) }; - MachineMemOperand *MMO = new MachineMemOperand(MachinePointerInfo(), - MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, - VT.getSizeInBits() / 8, 4); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo(), + MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, + VT.getSizeInBits() / 8, 4); return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL, Op->getVTList(), Ops, 2, VT, MMO); } -- cgit v1.1 From 1dbfa36ba1e2f6e4630b5f41254e307f81cece80 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Fri, 16 Aug 2013 15:26:36 +0000 Subject: Test commit. Just a blank line git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188556 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index c03924f..9e79bfd 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -2505,3 +2505,4 @@ def ST_FW : MSAPat<(store (v4f32 MSA128:$ws), addr:$addr), (ST_W MSA128:$ws, addr:$addr)>; def ST_FD : MSAPat<(store (v2f64 MSA128:$ws), addr:$addr), (ST_D MSA128:$ws, addr:$addr)>; + -- cgit v1.1 From ea549a847d87cb8ce46f6a45b24ae888db697a07 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Fri, 16 Aug 2013 15:27:12 +0000 Subject: Reverted test commit (r188556) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188557 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 1 - 1 file changed, 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 9e79bfd..c03924f 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -2505,4 +2505,3 @@ def ST_FW : MSAPat<(store (v4f32 MSA128:$ws), addr:$addr), (ST_W MSA128:$ws, addr:$addr)>; def ST_FD : MSAPat<(store (v2f64 MSA128:$ws), addr:$addr), (ST_D MSA128:$ws, addr:$addr)>; - -- cgit v1.1 From ebd4eec5386e946dc80f4d80e803125af55c2a68 Mon Sep 17 00:00:00 2001 From: Michel Danzer Date: Fri, 16 Aug 2013 16:19:24 +0000 Subject: R600/SI: Fix broken encoding of DS_WRITE_B32 The logic in SIInsertWaits::getHwCounts() only really made sense for SMRD instructions, and trying to shoehorn it into handling DS_WRITE_B32 caused it to corrupt the encoding of that by clobbering the first operand with the second one. Undo that damage and only apply the SMRD logic to that. Fixes some derivates related piglit regressions with radeonsi. Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188558 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIDefines.h | 3 ++- lib/Target/R600/SIInsertWaits.cpp | 21 +++++++++++++-------- lib/Target/R600/SIInstrFormats.td | 3 +++ lib/Target/R600/SIInstrInfo.cpp | 4 ++++ lib/Target/R600/SIInstrInfo.h | 1 + 5 files changed, 23 insertions(+), 9 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIDefines.h b/lib/Target/R600/SIDefines.h index 572ed6a..f5445ad 100644 --- a/lib/Target/R600/SIDefines.h +++ b/lib/Target/R600/SIDefines.h @@ -13,7 +13,8 @@ namespace SIInstrFlags { enum { - MIMG = 1 << 3 + MIMG = 1 << 3, + SMRD = 1 << 4 }; } diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp index ba202e3..7e42fb7 100644 --- a/lib/Target/R600/SIInsertWaits.cpp +++ b/lib/Target/R600/SIInsertWaits.cpp @@ -134,14 +134,19 @@ Counters SIInsertWaits::getHwCounts(MachineInstr &MI) { // LGKM may uses larger values if (TSFlags & SIInstrFlags::LGKM_CNT) { - MachineOperand &Op = MI.getOperand(0); - if (!Op.isReg()) - Op = MI.getOperand(1); - assert(Op.isReg() && "First LGKM operand must be a register!"); - - unsigned Reg = Op.getReg(); - unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize(); - Result.Named.LGKM = Size > 4 ? 2 : 1; + if (TII->isSMRD(MI.getOpcode())) { + + MachineOperand &Op = MI.getOperand(0); + assert(Op.isReg() && "First LGKM operand must be a register!"); + + unsigned Reg = Op.getReg(); + unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize(); + Result.Named.LGKM = Size > 4 ? 2 : 1; + + } else { + // DS + Result.Named.LGKM = 1; + } } else { Result.Named.LGKM = 0; diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td index cd1bbcd..9576c05 100644 --- a/lib/Target/R600/SIInstrFormats.td +++ b/lib/Target/R600/SIInstrFormats.td @@ -18,11 +18,13 @@ class InstSI pattern> : field bits<1> EXP_CNT = 0; field bits<1> LGKM_CNT = 0; field bits<1> MIMG = 0; + field bits<1> SMRD = 0; let TSFlags{0} = VM_CNT; let TSFlags{1} = EXP_CNT; let TSFlags{2} = LGKM_CNT; let TSFlags{3} = MIMG; + let TSFlags{4} = SMRD; } class Enc32 pattern> : @@ -142,6 +144,7 @@ class SMRD op, bits<1> imm, dag outs, dag ins, string asm, let Inst{31-27} = 0x18; //encoding let LGKM_CNT = 1; + let SMRD = 1; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index 9bb4ad9..2719ea2 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -229,6 +229,10 @@ int SIInstrInfo::isMIMG(uint16_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::MIMG; } +int SIInstrInfo::isSMRD(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::SMRD; +} + //===----------------------------------------------------------------------===// // Indirect addressing callbacks //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index 8d24ab4..87b8063 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -48,6 +48,7 @@ public: virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const; int isMIMG(uint16_t Opcode) const; + int isSMRD(uint16_t Opcode) const; virtual int getIndirectIndexBegin(const MachineFunction &MF) const; -- cgit v1.1 From b89f197713737e8c93045eb4dbd9c1d7ab41acd4 Mon Sep 17 00:00:00 2001 From: Michel Danzer Date: Fri, 16 Aug 2013 16:19:31 +0000 Subject: R600/SI: Add pattern for xor of i1 Fixes two recent piglit regressions with radeonsi. Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188559 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstructions.td | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 4eb3566..436a2cd 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1144,7 +1144,9 @@ def : Pat < (S_OR_B64 $src0, $src1) >; def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>; -def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", []>; +def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", + [(set i1:$dst, (xor i1:$src0, i1:$src1))] +>; def S_ANDN2_B32 : SOP2_32 <0x00000014, "S_ANDN2_B32", []>; def S_ANDN2_B64 : SOP2_64 <0x00000015, "S_ANDN2_B64", []>; def S_ORN2_B32 : SOP2_32 <0x00000016, "S_ORN2_B32", []>; -- cgit v1.1 From 9bb6c81683393363ed1ff8c66397f2d944c0966b Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Fri, 16 Aug 2013 20:05:04 +0000 Subject: [PowerPC] Preparatory refactoring for making prologue and epilogue safe on PPC32 SVR4 ABI [Patch and following text by Mark Minich; committing on his behalf.] There are FIXME's in PowerPC/PPCFrameLowering.cpp, method PPCFrameLowering::emitPrologue() related to "negative offsets of R1" on PPC32 SVR4. They're true, but the real issue is that on PPC32 SVR4 (and any ABI without a Red Zone), no spills may be made until after the stackframe is claimed, which also includes the LR spill which is at a positive offset. The same problem exists in emitEpilogue(), though there's no FIXME for it. I intend to fix this issue, making LLVM-compiled code finally safe for use on SVR4/EABI/e500 32-bit platforms (including in particular, OS-free embedded systems & kernel code, where interrupts may share the same stack as user code). In preparation for making these changes, to make the diffs for the functional changes less cluttered, I am providing the non-functional refactorings in two stages: Stage 1 does some minor fluffy refactorings to pull multiple method calls up into a single bool, creating named bools for repeated uses of obscure logic, moving some code up earlier because either stage 2 or my final version will require it earlier, and rewording/adding some comments. My stage 1 changes can be characterized as primarily fluffy cleanup, the purpose of which may be unclear until the stage 2 or final changes are made. My stage 2 refactorings combine the separate PPC32 & PPC64 logic, which is currently performed by largely duplicate code, into a single flow, with the differences handled by a group of constants initialized early in the methods. This submission is for my stage 1 changes. There should be no functional changes whatsoever; this is a pure refactoring. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188573 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCFrameLowering.cpp | 187 +++++++++++++++++--------------- 1 file changed, 102 insertions(+), 85 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 24d3a0b..5161de8 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -204,10 +204,9 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, unsigned FrameSize = UseEstimate ? MFI->estimateStackSize(MF) : MFI->getStackSize(); - // Get the alignments provided by the target, and the maximum alignment - // (if any) of the fixed frame objects. - unsigned TargetAlign = getStackAlignment(); - unsigned MaxAlign = MFI->getMaxAlignment(); + // Get stack alignments. The frame must be aligned to the greatest of these: + unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI + unsigned MaxAlign = MFI->getMaxAlignment(); // algmt required by data in frame unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1; const PPCRegisterInfo *RegInfo = @@ -346,12 +345,20 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { bool needsFrameMoves = MMI.hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry(); + // Get processor type. + bool isPPC64 = Subtarget.isPPC64(); + // Get the ABI. + bool isDarwinABI = Subtarget.isDarwinABI(); + bool isSVR4ABI = Subtarget.isSVR4ABI(); + assert((isDarwinABI || isSVR4ABI) && + "Currently only Darwin and SVR4 ABIs are supported for PowerPC."); + // Prepare for frame info. MCSymbol *FrameLabel = 0; // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, // process it. - if (!Subtarget.isSVR4ABI()) + if (!isSVR4ABI) for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { HandleVRSaveUpdate(MBBI, TII); @@ -371,10 +378,6 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { if (MFI->isFrameAddressTaken()) replaceFPWithRealFP(MF); - // Get processor type. - bool isPPC64 = Subtarget.isPPC64(); - // Get operating system - bool isDarwinABI = Subtarget.isDarwinABI(); // Check if the link register (LR) must be saved. PPCFunctionInfo *FI = MF.getInfo(); bool MustSaveLR = FI->mustSaveLR(); @@ -383,11 +386,18 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { bool HasFP = hasFP(MF); bool HasBP = RegInfo->hasBasePointer(MF); + // Regarding this assert: Even though LR is saved in the caller's frame (i.e., + // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no + // Red Zone, an asynchronous event (a form of "callee") could claim a frame & + // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR. + assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && + "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); + int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI); int FPOffset = 0; if (HasFP) { - if (Subtarget.isSVR4ABI()) { + if (isSVR4ABI) { MachineFrameInfo *FFI = MF.getFrameInfo(); int FPIndex = FI->getFramePointerSaveIndex(); assert(FPIndex && "No Frame Pointer Save Slot!"); @@ -399,7 +409,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { int BPOffset = 0; if (HasBP) { - if (Subtarget.isSVR4ABI()) { + if (isSVR4ABI) { MachineFrameInfo *FFI = MF.getFrameInfo(); int BPIndex = FI->getBasePointerSaveIndex(); assert(BPIndex && "No Base Pointer Save Slot!"); @@ -410,6 +420,16 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { } } + // Get stack alignments. + unsigned MaxAlign = MFI->getMaxAlignment(); + if (HasBP && MaxAlign > 1) + assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) && + "Invalid alignment!"); + + // Frames of 32KB & larger require special handling because they cannot be + // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. + bool isLargeFrame = !isInt<16>(NegFrameSize); + if (isPPC64) { if (MustSaveLR) BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0); @@ -444,21 +464,19 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { .addReg(PPC::X12, getKillRegState(true)) .addImm(8) .addReg(PPC::X1); - } else { + } else { // PPC32... if (MustSaveLR) BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR), PPC::R0); if (HasFP) - // FIXME: On PPC32 SVR4, FPOffset is negative and access to negative - // offsets of R1 is not allowed. + // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe. BuildMI(MBB, MBBI, dl, TII.get(PPC::STW)) .addReg(PPC::R31) .addImm(FPOffset) .addReg(PPC::R1); if (HasBP) - // FIXME: On PPC32 SVR4, FPOffset is negative and access to negative - // offsets of R1 is not allowed. + // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe. BuildMI(MBB, MBBI, dl, TII.get(PPC::STW)) .addReg(PPC::R30) .addImm(BPOffset) @@ -468,23 +486,19 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { "Prologue CR saving supported only in 64-bit mode"); if (MustSaveLR) + // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe. BuildMI(MBB, MBBI, dl, TII.get(PPC::STW)) .addReg(PPC::R0) .addImm(LROffset) .addReg(PPC::R1); } - // Skip if a leaf routine. + // Skip the rest if this is a leaf function & all spills fit in the Red Zone. if (!FrameSize) return; - // Get stack alignments. - unsigned MaxAlign = MFI->getMaxAlignment(); - // Adjust stack pointer: r1 += NegFrameSize. // If there is a preferred stack alignment, align R1 now - if (!isPPC64) { - // PPC32. - + if (!isPPC64) { // PPC32... if (HasBP) { // Save a copy of r1 as the base pointer. BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), PPC::R30) @@ -493,15 +507,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { } if (HasBP && MaxAlign > 1) { - assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) && - "Invalid alignment!"); - BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), PPC::R0) .addReg(PPC::R1) .addImm(0) .addImm(32 - Log2_32(MaxAlign)) .addImm(31); - if (isInt<16>(NegFrameSize)) { + if (!isLargeFrame) { BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC), PPC::R0) .addReg(PPC::R0, RegState::Kill) .addImm(NegFrameSize); @@ -519,11 +530,13 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { .addReg(PPC::R1, RegState::Kill) .addReg(PPC::R1) .addReg(PPC::R0); - } else if (isInt<16>(NegFrameSize)) { + + } else if (!isLargeFrame) { BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1) .addReg(PPC::R1) .addImm(NegFrameSize) .addReg(PPC::R1); + } else { BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0) .addImm(NegFrameSize >> 16); @@ -535,7 +548,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { .addReg(PPC::R1) .addReg(PPC::R0); } - } else { // PPC64. + } else { // PPC64... if (HasBP) { // Save a copy of r1 as the base pointer. BuildMI(MBB, MBBI, dl, TII.get(PPC::OR8), PPC::X30) @@ -544,14 +557,11 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { } if (HasBP && MaxAlign > 1) { - assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) && - "Invalid alignment!"); - BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), PPC::X0) .addReg(PPC::X1) .addImm(0) .addImm(64 - Log2_32(MaxAlign)); - if (isInt<16>(NegFrameSize)) { + if (!isLargeFrame) { BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0) .addReg(PPC::X0, RegState::Kill) .addImm(NegFrameSize); @@ -569,11 +579,13 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { .addReg(PPC::X1, RegState::Kill) .addReg(PPC::X1) .addReg(PPC::X0); - } else if (isInt<16>(NegFrameSize)) { + + } else if (!isLargeFrame) { BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1) .addReg(PPC::X1) .addImm(NegFrameSize) .addReg(PPC::X1); + } else { BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0) .addImm(NegFrameSize >> 16); @@ -625,11 +637,11 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { // If there is a frame pointer, copy R1 into R31 if (HasFP) { - if (!isPPC64) { + if (!isPPC64) { // PPC32... BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), PPC::R31) .addReg(PPC::R1) .addReg(PPC::R1); - } else { + } else { // PPC64... BuildMI(MBB, MBBI, dl, TII.get(PPC::OR8), PPC::X31) .addReg(PPC::X1) .addReg(PPC::X1); @@ -641,8 +653,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { // Mark effective beginning of when frame pointer is ready. BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(ReadyLabel); - unsigned Reg = HasFP ? (isPPC64 ? PPC::X31 : PPC::R31) - : (isPPC64 ? PPC::X1 : PPC::R1); + unsigned Reg = isPPC64 ? PPC::X31 : PPC::R31; Reg = MRI->getDwarfRegNum(Reg, true); MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(ReadyLabel, Reg)); } @@ -664,19 +675,16 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { // For SVR4, don't emit a move for the CR spill slot if we haven't // spilled CRs. - if (Subtarget.isSVR4ABI() - && (PPC::CR2 <= Reg && Reg <= PPC::CR4) - && MustSaveCRs.empty()) - continue; + if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) + && MustSaveCRs.empty()) + continue; // For 64-bit SVR4 when we have spilled CRs, the spill location // is SP+8, not a frame-relative slot. - if (Subtarget.isSVR4ABI() - && Subtarget.isPPC64() - && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { + if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { MMI.addFrameInst(MCCFIInstruction::createOffset( Label, MRI->getDwarfRegNum(PPC::CR2, true), 8)); - continue; + continue; } int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); @@ -707,7 +715,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, RetOpcode == PPC::TCRETURNai8) && "Can only insert epilog into returning blocks"); - // Get alignment info so we know how to restore r1 + // Get alignment info so we know how to restore the SP. const MachineFrameInfo *MFI = MF.getFrameInfo(); // Get the number of bytes allocated from the FrameInfo. @@ -715,8 +723,10 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, // Get processor type. bool isPPC64 = Subtarget.isPPC64(); - // Get operating system + // Get the ABI. bool isDarwinABI = Subtarget.isDarwinABI(); + bool isSVR4ABI = Subtarget.isSVR4ABI(); + // Check if the link register (LR) has been saved. PPCFunctionInfo *FI = MF.getInfo(); bool MustSaveLR = FI->mustSaveLR(); @@ -729,7 +739,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, int FPOffset = 0; if (HasFP) { - if (Subtarget.isSVR4ABI()) { + if (isSVR4ABI) { MachineFrameInfo *FFI = MF.getFrameInfo(); int FPIndex = FI->getFramePointerSaveIndex(); assert(FPIndex && "No Frame Pointer Save Slot!"); @@ -741,7 +751,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, int BPOffset = 0; if (HasBP) { - if (Subtarget.isSVR4ABI()) { + if (isSVR4ABI) { MachineFrameInfo *FFI = MF.getFrameInfo(); int BPIndex = FI->getBasePointerSaveIndex(); assert(BPIndex && "No Base Pointer Save Slot!"); @@ -773,30 +783,35 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, FrameSize += StackAdj; } + // Frames of 32KB & larger require special handling because they cannot be + // indexed into with a simple LD/LWZ immediate offset operand. + bool isLargeFrame = !isInt<16>(FrameSize); + if (FrameSize) { - // The loaded (or persistent) stack pointer value is offset by the 'stwu' - // on entry to the function. Add this offset back now. + // In the prologue, the loaded (or persistent) stack pointer value is offset + // by the STDU/STDUX/STWU/STWUX instruction. Add this offset back now. if (!isPPC64) { // If this function contained a fastcc call and GuaranteedTailCallOpt is // enabled (=> hasFastCall()==true) the fastcc call might contain a tail // call which invalidates the stack pointer value in SP(0). So we use the // value of R31 in this case. - if (FI->hasFastCall() && isInt<16>(FrameSize)) { - assert(hasFP(MF) && "Expecting a valid the frame pointer."); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1) - .addReg(PPC::R31).addImm(FrameSize); - } else if(FI->hasFastCall()) { - BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0) - .addImm(FrameSize >> 16); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0) - .addReg(PPC::R0, RegState::Kill) - .addImm(FrameSize & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD4)) - .addReg(PPC::R1) - .addReg(PPC::R31) - .addReg(PPC::R0); - } else if (isInt<16>(FrameSize) && - !HasBP && + if (FI->hasFastCall()) { + assert(HasFP && "Expecting a valid frame pointer."); + if (!isLargeFrame) { + BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1) + .addReg(PPC::R31).addImm(FrameSize); + } else { + BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0) + .addImm(FrameSize >> 16); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0) + .addReg(PPC::R0, RegState::Kill) + .addImm(FrameSize & 0xFFFF); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD4)) + .addReg(PPC::R1) + .addReg(PPC::R31) + .addReg(PPC::R0); + } + } else if (!isLargeFrame && !HasBP && !MFI->hasVarSizedObjects()) { BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1) .addReg(PPC::R1).addImm(FrameSize); @@ -804,22 +819,24 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ),PPC::R1) .addImm(0).addReg(PPC::R1); } - } else { - if (FI->hasFastCall() && isInt<16>(FrameSize)) { - assert(hasFP(MF) && "Expecting a valid the frame pointer."); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1) - .addReg(PPC::X31).addImm(FrameSize); - } else if(FI->hasFastCall()) { - BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0) - .addImm(FrameSize >> 16); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0) - .addReg(PPC::X0, RegState::Kill) - .addImm(FrameSize & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD8)) - .addReg(PPC::X1) - .addReg(PPC::X31) - .addReg(PPC::X0); - } else if (isInt<16>(FrameSize) && !HasBP && + } else { // PPC64... + if (FI->hasFastCall()) { + if (!isLargeFrame) { + assert(HasFP && "Expecting a valid frame pointer."); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1) + .addReg(PPC::X31).addImm(FrameSize); + } else { + BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0) + .addImm(FrameSize >> 16); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0) + .addReg(PPC::X0, RegState::Kill) + .addImm(FrameSize & 0xFFFF); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD8)) + .addReg(PPC::X1) + .addReg(PPC::X31) + .addReg(PPC::X0); + } + } else if (!isLargeFrame && !HasBP && !MFI->hasVarSizedObjects()) { BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1) .addReg(PPC::X1).addImm(FrameSize); @@ -854,7 +871,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, if (MustSaveLR) BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR8)).addReg(PPC::X0); - } else { + } else { // PPC32... if (MustSaveLR) BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R0) .addImm(LROffset).addReg(PPC::R1); -- cgit v1.1 From 80f60610826158dcbfbeb5c235ca0ea673d23281 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Fri, 16 Aug 2013 23:05:18 +0000 Subject: Fix a subtle difference between running clang vs llc for mips16. This regards how mips16 is viewed. It's not really a target type but there has always been a target for it in the td files. It's more properly -mcpu=mips32 -mattr=+mips16 . This is how clang treats it but we have always had the -mcpu=mips16 which I probably should delete now but it will require updating all the .ll test cases for mips16. In this case it changed how we decide if we have a count bits instruction and whether instruction lowering should then expand ctlz. Now that we have dual mode compilation, -mattr=+mips16 really just indicates the inital processor mode that we are compiling for. (It is also possible to have -mcpu=64 -mattr=+mips16 but as far as I know, nobody has even built such a processor, though there is an architecture manual for this). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188586 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsSubtarget.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index e2e7e08..8383904 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -95,6 +95,9 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, // Set UseSmallSection. UseSmallSection = !IsLinux && (RM == Reloc::Static); + // set some subtarget specific features + if (inMips16Mode()) + HasBitCount=false; } bool -- cgit v1.1 From 0673379712100110c213a0e7610b73b6c706e83d Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Fri, 16 Aug 2013 23:37:23 +0000 Subject: ARM: Clean up fast-isel machine verifier errors. Lots of machine verifier errors result from using a plain GPR regclass for incoming argument copies. A more restrictive rGPR class is more appropriate since it more accurately represents what's happening, plus it lines up better with isel later on so the verifier is happier. Reduces the number of ARM fast-isel tests not running with the verifier enabled by over half. rdar://12594152 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188592 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index fc847b8..02ebe66 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -3049,7 +3049,7 @@ bool ARMFastISel::FastLowerArguments() { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; - const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32); + const TargetRegisterClass *RC = &ARM::rGPRRegClass; Idx = 0; for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++Idx) { -- cgit v1.1 From b49860ef030cb2dba0386278ee8737eecc4e7272 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Fri, 16 Aug 2013 23:37:31 +0000 Subject: ARM: Fix more fast-isel verifier failures. Teach the generic instruction selection helper functions to constrain the register classes of their input operands. For non-physical register references, the generic code needs to be careful not to mess that up when replacing references to result registers. As the comment indicates for MachineRegisterInfo::replaceRegWith(), it's important to call constrainRegClass() first. rdar://12594152 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188593 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 43 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 02ebe66..2845db3 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -176,6 +176,8 @@ class ARMFastISel : public FastISel { // Utility routines. private: + unsigned constrainOperandRegClass(const MCInstrDesc &II, unsigned OpNum, + unsigned Op); bool isTypeLegal(Type *Ty, MVT &VT); bool isLoadTypeLegal(Type *Ty, MVT &VT); bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, @@ -291,6 +293,23 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { return MIB; } +unsigned ARMFastISel::constrainOperandRegClass(const MCInstrDesc &II, + unsigned Op, unsigned OpNum) { + if (TargetRegisterInfo::isVirtualRegister(Op)) { + const TargetRegisterClass *RegClass = + TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF); + if (!MRI.constrainRegClass(Op, RegClass)) { + // If it's not legal to COPY between the register classes, something + // has gone very wrong before we got here. + unsigned NewOp = createResultReg(RegClass); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), NewOp).addReg(Op)); + return NewOp; + } + } + return Op; +} + unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode, const TargetRegisterClass* RC) { unsigned ResultReg = createResultReg(RC); @@ -306,6 +325,9 @@ unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + // Make sure the input operand is sufficiently constrained to be legal + // for this instruction. + Op0 = constrainOperandRegClass(II, Op0, 1); if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill)); @@ -326,6 +348,11 @@ unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + // Make sure the input operands are sufficiently constrained to be legal + // for this instruction. + Op0 = constrainOperandRegClass(II, Op0, 1); + Op1 = constrainOperandRegClass(II, Op1, 2); + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) @@ -349,6 +376,12 @@ unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + // Make sure the input operands are sufficiently constrained to be legal + // for this instruction. + Op0 = constrainOperandRegClass(II, Op0, 1); + Op1 = constrainOperandRegClass(II, Op1, 2); + Op2 = constrainOperandRegClass(II, Op1, 3); + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) @@ -373,6 +406,9 @@ unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + // Make sure the input operand is sufficiently constrained to be legal + // for this instruction. + Op0 = constrainOperandRegClass(II, Op0, 1); if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) @@ -395,6 +431,9 @@ unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + // Make sure the input operand is sufficiently constrained to be legal + // for this instruction. + Op0 = constrainOperandRegClass(II, Op0, 1); if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) @@ -418,6 +457,10 @@ unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + // Make sure the input operands are sufficiently constrained to be legal + // for this instruction. + Op0 = constrainOperandRegClass(II, Op0, 1); + Op1 = constrainOperandRegClass(II, Op1, 2); if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) -- cgit v1.1 From 785bd598529fa12d0a0f577c4d63c4ab371bc559 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Fri, 16 Aug 2013 23:37:36 +0000 Subject: ARM: Fast-isel register class constrain for extends. Properly constrain the operand register class for instructions used in [sz]ext expansion. Update more tests to use the verifier now that we're getting the register classes correct. rdar://12594152 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188594 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 2845db3..fb8153d 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -2774,6 +2774,7 @@ unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, *FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opcode), ResultReg); if (setsCPSR) MIB.addReg(ARM::CPSR, RegState::Define); + SrcReg = constrainOperandRegClass(TII.get(Opcode), SrcReg, 1 + setsCPSR); AddDefaultPred(MIB.addReg(SrcReg, isKill * RegState::Kill).addImm(ImmEnc)); if (hasS) AddDefaultCC(MIB); -- cgit v1.1 From 62c7749437a74a4e0cf28edff865bd82f2b9aecc Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Fri, 16 Aug 2013 23:37:40 +0000 Subject: ARM: Properly constrain comparison fastisel register classes. Ongoing 'make the verifier happy' improvements to ARM fast-isel. rdar://12594152 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188595 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index fb8153d..e1af947 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -1537,13 +1537,15 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, } } + const MCInstrDesc &II = TII.get(CmpOpc); + SrcReg1 = constrainOperandRegClass(II, SrcReg1, 0); if (!UseImm) { - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(CmpOpc)) + SrcReg2 = constrainOperandRegClass(II, SrcReg2, 1); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(SrcReg1).addReg(SrcReg2)); } else { MachineInstrBuilder MIB; - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(SrcReg1); // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0. @@ -1742,6 +1744,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) { } unsigned CmpOpc = isThumb2 ? ARM::t2CMPri : ARM::CMPri; + CondReg = constrainOperandRegClass(TII.get(CmpOpc), CondReg, 0); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) .addReg(CondReg).addImm(0)); @@ -1758,12 +1761,16 @@ bool ARMFastISel::SelectSelect(const Instruction *I) { MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi; } unsigned ResultReg = createResultReg(RC); - if (!UseImm) + if (!UseImm) { + Op2Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 1); + Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 2); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) .addReg(Op2Reg).addReg(Op1Reg).addImm(ARMCC::NE).addReg(ARM::CPSR); - else + } else { + Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 1); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) .addReg(Op1Reg).addImm(Imm).addImm(ARMCC::EQ).addReg(ARM::CPSR); + } UpdateValueMap(I, ResultReg); return true; } -- cgit v1.1 From 0991c314d7c1a2052963dc89af1d2f07134488b6 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 16 Aug 2013 23:51:24 +0000 Subject: R600: Expand vector float operations for both SI and R600 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Michel Dänzer git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188596 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 22 ++++++++++++++++++---- lib/Target/R600/R600ISelLowering.cpp | 9 --------- 2 files changed, 18 insertions(+), 13 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 746c479..25b1e54 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -115,14 +115,14 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::VSELECT, MVT::v2f32, Expand); setOperationAction(ISD::VSELECT, MVT::v4f32, Expand); - static const int types[] = { + static const int IntTypes[] = { (int)MVT::v2i32, (int)MVT::v4i32 }; - const size_t NumTypes = array_lengthof(types); + const size_t NumIntTypes = array_lengthof(IntTypes); - for (unsigned int x = 0; x < NumTypes; ++x) { - MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x]; + for (unsigned int x = 0; x < NumIntTypes; ++x) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x]; //Expand the following operations for the current type by default setOperationAction(ISD::ADD, VT, Expand); setOperationAction(ISD::AND, VT, Expand); @@ -141,6 +141,20 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::VSELECT, VT, Expand); setOperationAction(ISD::XOR, VT, Expand); } + + static const int FloatTypes[] = { + (int)MVT::v2f32, + (int)MVT::v4f32 + }; + const size_t NumFloatTypes = array_lengthof(FloatTypes); + + for (unsigned int x = 0; x < NumFloatTypes; ++x) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x]; + setOperationAction(ISD::FADD, VT, Expand); + setOperationAction(ISD::FDIV, VT, Expand); + setOperationAction(ISD::FMUL, VT, Expand); + setOperationAction(ISD::FSUB, VT, Expand); + } } //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index e10af2b..b822431 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -38,15 +38,6 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : computeRegisterProperties(); - setOperationAction(ISD::FADD, MVT::v4f32, Expand); - setOperationAction(ISD::FADD, MVT::v2f32, Expand); - setOperationAction(ISD::FMUL, MVT::v4f32, Expand); - setOperationAction(ISD::FMUL, MVT::v2f32, Expand); - setOperationAction(ISD::FDIV, MVT::v4f32, Expand); - setOperationAction(ISD::FDIV, MVT::v2f32, Expand); - setOperationAction(ISD::FSUB, MVT::v4f32, Expand); - setOperationAction(ISD::FSUB, MVT::v2f32, Expand); - setOperationAction(ISD::FCOS, MVT::f32, Custom); setOperationAction(ISD::FSIN, MVT::f32, Custom); -- cgit v1.1 From 84c0bd9803547b1cb09a20032bbc151a718b9457 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 16 Aug 2013 23:51:29 +0000 Subject: R600: Expand vector FFLOOR ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Michel Dänzer git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188597 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 25b1e54..cab07da 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -152,6 +152,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x]; setOperationAction(ISD::FADD, VT, Expand); setOperationAction(ISD::FDIV, VT, Expand); + setOperationAction(ISD::FFLOOR, VT, Expand); setOperationAction(ISD::FMUL, VT, Expand); setOperationAction(ISD::FSUB, VT, Expand); } -- cgit v1.1 From 3cae823f69f083c5eef19f0ea3128c48d7807c9b Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 16 Aug 2013 23:51:33 +0000 Subject: R600: Expand vector FRINT ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Michel Dänzer git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188598 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index cab07da..dd8b73f 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -154,6 +154,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::FDIV, VT, Expand); setOperationAction(ISD::FFLOOR, VT, Expand); setOperationAction(ISD::FMUL, VT, Expand); + setOperationAction(ISD::FRINT, VT, Expand); setOperationAction(ISD::FSUB, VT, Expand); } } -- cgit v1.1 From 913da2b7b32daa5c018ec1b7422cf76c3d01649b Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Sat, 17 Aug 2013 00:06:51 +0000 Subject: R600: Fix possible use of an uninitialized variable Spotted by Nick Lewycky! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188599 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600TextureIntrinsicsReplacer.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/Target') diff --git a/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp index b069522..37d9059 100644 --- a/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp +++ b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp @@ -60,6 +60,7 @@ class R600TextureIntrinsicsReplacer : switch (TextureType) { case 0: + useShadowVariant = false; return; case TEXTURE_RECT: case TEXTURE_1D: -- cgit v1.1 From 1edd1a336a79c4cb804d32cb492738549154c69c Mon Sep 17 00:00:00 2001 From: Dmitri Gribenko Date: Sun, 18 Aug 2013 08:29:51 +0000 Subject: Remove unused stdio.h includes git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188626 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp | 1 - lib/Target/Mips/Mips16InstrInfo.cpp | 4 +--- lib/Target/NVPTX/NVPTXInstrInfo.cpp | 1 - lib/Target/R600/AMDGPUSubtarget.cpp | 1 - lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 1 - lib/Target/R600/SIInstrInfo.cpp | 1 - 6 files changed, 1 insertion(+), 8 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp index 2ea0d2e..7c41507 100644 --- a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp +++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp @@ -21,7 +21,6 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/Support/raw_ostream.h" -#include using namespace llvm; diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index 887aa80..9a04c85e 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -10,7 +10,7 @@ // This file contains the Mips16 implementation of the TargetInstrInfo class. // //===----------------------------------------------------------------------===// -#include + #include "Mips16InstrInfo.h" #include "InstPrinter/MipsInstPrinter.h" #include "MipsMachineFunction.h" @@ -473,7 +473,6 @@ const MipsInstrInfo *llvm::createMips16InstrInfo(MipsTargetMachine &TM) { return new Mips16InstrInfo(TM); } -#include bool Mips16InstrInfo::validImmediate(unsigned Opcode, unsigned Reg, int64_t Amount) { switch (Opcode) { @@ -493,6 +492,5 @@ bool Mips16InstrInfo::validImmediate(unsigned Opcode, unsigned Reg, return isInt<16>(Amount); return isInt<15>(Amount); } - printf("Unexpected opcode %i \n", Opcode); llvm_unreachable("unexpected Opcode in validImmediate"); } diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp index b406aa9..1f54d3e 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -21,7 +21,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include using namespace llvm; diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp index 8ed5a74..8b2e445 100644 --- a/lib/Target/R600/AMDGPUSubtarget.cpp +++ b/lib/Target/R600/AMDGPUSubtarget.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "AMDGPUSubtarget.h" -#include using namespace llvm; diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp index f470783..dd8df65 100644 --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp @@ -24,7 +24,6 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/raw_ostream.h" -#include using namespace llvm; diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index 2719ea2..e23eef3 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -19,7 +19,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCInstrDesc.h" -#include using namespace llvm; -- cgit v1.1 From ff79bc6e189f4d38021bba6a99d1d9e1af999df3 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 18 Aug 2013 08:53:01 +0000 Subject: Make more of the lowering helpers static. Also use MVT instead of EVT in a couple places. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188629 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 34 +++++++++++++++++----------------- lib/Target/X86/X86ISelLowering.h | 7 ------- 2 files changed, 17 insertions(+), 24 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 3144038..312c8db 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5253,9 +5253,8 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, TLI.getScalarShiftAmountTy(SrcOp.getValueType())))); } -SDValue -X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, SDLoc dl, - SelectionDAG &DAG) const { +static SDValue +LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, SDLoc dl, SelectionDAG &DAG) { // Check if the scalar load can be widened into a vector load. And if // the address is "base + cst" see if the cst can be "absorbed" into @@ -8829,8 +8828,8 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); } -SDValue X86TargetLowering::LowerANY_EXTEND(SDValue Op, - SelectionDAG &DAG) const { +static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { if (Subtarget->hasFp256()) { SDValue Res = LowerAVXExtend(Op, DAG, Subtarget); if (Res.getNode()) @@ -8839,8 +8838,9 @@ SDValue X86TargetLowering::LowerANY_EXTEND(SDValue Op, return SDValue(); } -SDValue X86TargetLowering::LowerZERO_EXTEND(SDValue Op, - SelectionDAG &DAG) const { + +static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { SDLoc DL(Op); MVT VT = Op.getSimpleValueType(); SDValue In = Op.getOperand(0); @@ -10248,11 +10248,10 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops)); } -SDValue X86TargetLowering::LowerSIGN_EXTEND_AVX512(SDValue Op, - SelectionDAG &DAG) const { - EVT VT = Op->getValueType(0); +static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, SelectionDAG &DAG) { + MVT VT = Op->getSimpleValueType(0); SDValue In = Op->getOperand(0); - EVT InVT = In.getValueType(); + MVT InVT = In.getSimpleValueType(); SDLoc dl(Op); if (InVT.getVectorElementType().getSizeInBits() >=8 && @@ -10267,7 +10266,8 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_AVX512(SDValue Op, Constant *C = ConstantInt::get(*DAG.getContext(), (NumElts == 8)? APInt(64, ~0ULL): APInt(32, ~0U)); - SDValue CP = DAG.getConstantPool(C, getPointerTy()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy()); unsigned Alignment = cast(CP)->getAlignment(); SDValue Ld = DAG.getLoad(VT.getScalarType(), dl, DAG.getEntryNode(), CP, MachinePointerInfo::getConstantPool(), @@ -10278,8 +10278,8 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_AVX512(SDValue Op, return SDValue(); } -SDValue X86TargetLowering::LowerSIGN_EXTEND(SDValue Op, - SelectionDAG &DAG) const { +static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { MVT VT = Op->getSimpleValueType(0); SDValue In = Op->getOperand(0); MVT InVT = In.getSimpleValueType(); @@ -12958,9 +12958,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); - case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG); - case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG); - case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, DAG); + case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, Subtarget, DAG); + case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, Subtarget, DAG); + case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, Subtarget, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG); case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 53f09c4..eafe027 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -839,8 +839,6 @@ namespace llvm { bool isSigned, bool isReplace) const; - SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, SDLoc dl, - SelectionDAG &DAG) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; @@ -860,11 +858,6 @@ namespace llvm { SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const; SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const; SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerZERO_EXTEND_AVX512(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const; -- cgit v1.1 From 3491d67d3a50e81e3f65c1bdf01dd7962dc10c46 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Sun, 18 Aug 2013 13:08:57 +0000 Subject: AVX-512: Added VMOVD, VMOVQ, VMOVSS, VMOVSD instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188637 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 2 - lib/Target/X86/X86InstrAVX512.td | 277 +++++++++++++++++++++++++++++++- lib/Target/X86/X86InstrFormats.td | 26 +-- lib/Target/X86/X86InstrFragmentsSIMD.td | 4 +- lib/Target/X86/X86InstrInfo.cpp | 48 ++++-- lib/Target/X86/X86InstrSSE.td | 22 +-- 6 files changed, 335 insertions(+), 44 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 312c8db..4e71f36 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1434,8 +1434,6 @@ void X86TargetLowering::resetOperationActions() { if (!VT.is512BitVector()) continue; - setOperationAction(ISD::LOAD, VT, Promote); - AddPromotedToType (ISD::LOAD, VT, MVT::v8i64); setOperationAction(ISD::SELECT, VT, Promote); AddPromotedToType (ISD::SELECT, VT, MVT::v8i64); } diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index d100e88..760b4ed 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -555,7 +555,7 @@ let Constraints = "$src1 = $dst" in { (bitconvert (mem_frag addr:$src3)))))]>, EVEX_4V; } } -defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv8i64, i512mem, +defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32, i512mem, v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64, i512mem, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; @@ -1107,7 +1107,7 @@ def VMOVDQA64rm : AVX512BI<0x6F, MRMSrcMem, (outs VR512:$dst), } multiclass avx512_mov_int opc, string asm, RegisterClass RC, - RegisterClass KRC, PatFrag bc_frag, + RegisterClass KRC, PatFrag ld_frag, X86MemOperand x86memop> { let neverHasSideEffects = 1 in def rr : AVX512XSI, + [(set RC:$dst, (ld_frag addr:$src))]>, EVEX; let Constraints = "$src1 = $dst" in { def rrk : AVX512XSI, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VMOVDQU64 : avx512_mov_int<0x6F, "vmovdqu64", VR512, VK8WM, bc_v8i64, - memopv8i64, i512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VMOVDQU32 : avx512_mov_int<0x6F, "vmovdqu32", VR512, VK16WM, memopv16i32, i512mem>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VMOVDQU64 : avx512_mov_int<0x6F, "vmovdqu64", VR512, VK8WM, memopv8i64, i512mem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; let AddedComplexity = 20 in { def : Pat<(v16f32 (vselect VK16WM:$mask, (v16f32 VR512:$src1), @@ -1151,4 +1151,267 @@ def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src1), (v8i64 VR512:$src2))), (VMOVDQU64rrk VR512:$src2, VK8WM:$mask, VR512:$src1)>; } +// Move Int Doubleword to Packed Double Int +// +def VMOVDI2PDIZrr : AVX512SI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), + "vmovd{z}\t{$src, $dst|$dst, $src}", + [(set VR128X:$dst, + (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, + EVEX, VEX_LIG; +def VMOVDI2PDIZrm : AVX512SI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), + "vmovd{z}\t{$src, $dst|$dst, $src}", + [(set VR128X:$dst, + (v4i32 (scalar_to_vector (loadi32 addr:$src))))], + IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; +def VMOV64toPQIZrr : AVX512SI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), + "vmovq{z}\t{$src, $dst|$dst, $src}", + [(set VR128X:$dst, + (v2i64 (scalar_to_vector GR64:$src)))], + IIC_SSE_MOVDQ>, EVEX, VEX_W, VEX_LIG; +def VMOV64toSDZrr : AVX512SI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), + "vmovq{z}\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (bitconvert GR64:$src))], + IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>; +def VMOVSDto64Zrr : AVX512SI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), + "vmovq{z}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (bitconvert FR64:$src))], + IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>; +def VMOVSDto64Zmr : AVX512SI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), + "vmovq{z}\t{$src, $dst|$dst, $src}", + [(store (i64 (bitconvert FR64:$src)), addr:$dst)], + IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>, + EVEX_CD8<64, CD8VT1>; + +// Move Int Doubleword to Single Scalar +// +def VMOVDI2SSZrr : AVX512SI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), + "vmovd{z}\t{$src, $dst|$dst, $src}", + [(set FR32X:$dst, (bitconvert GR32:$src))], + IIC_SSE_MOVDQ>, EVEX, VEX_LIG; + +def VMOVDI2SSZrm : AVX512SI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src), + "vmovd{z}\t{$src, $dst|$dst, $src}", + [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))], + IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; + +// Move Packed Doubleword Int to Packed Double Int +// +def VMOVPDI2DIZrr : AVX512SI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), + "vmovd{z}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (vector_extract (v4i32 VR128X:$src), + (iPTR 0)))], IIC_SSE_MOVD_ToGP>, + EVEX, VEX_LIG; +def VMOVPDI2DIZmr : AVX512SI<0x7E, MRMDestMem, (outs), + (ins i32mem:$dst, VR128X:$src), + "vmovd{z}\t{$src, $dst|$dst, $src}", + [(store (i32 (vector_extract (v4i32 VR128X:$src), + (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, + EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; + +// Move Packed Doubleword Int first element to Doubleword Int +// +def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), + "vmovq{z}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), + (iPTR 0)))], + IIC_SSE_MOVD_ToGP>, TB, OpSize, EVEX, VEX_LIG, VEX_W, + Requires<[HasAVX512, In64BitMode]>; + +def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), + (ins i64mem:$dst, VR128X:$src), + "vmovq{z}\t{$src, $dst|$dst, $src}", + [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), + addr:$dst)], IIC_SSE_MOVDQ>, + EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>, + Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>; + +// Move Scalar Single to Double Int +// +def VMOVSS2DIZrr : AVX512SI<0x7E, MRMDestReg, (outs GR32:$dst), + (ins FR32X:$src), + "vmovd{z}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (bitconvert FR32X:$src))], + IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG; +def VMOVSS2DIZmr : AVX512SI<0x7E, MRMDestMem, (outs), + (ins i32mem:$dst, FR32X:$src), + "vmovd{z}\t{$src, $dst|$dst, $src}", + [(store (i32 (bitconvert FR32X:$src)), addr:$dst)], + IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; + +// Move Quadword Int to Packed Quadword Int +// +def VMOVQI2PQIZrm : AVX512SI<0x7E, MRMSrcMem, (outs VR128X:$dst), + (ins i64mem:$src), + "vmovq{z}\t{$src, $dst|$dst, $src}", + [(set VR128X:$dst, + (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, + EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; + +//===----------------------------------------------------------------------===// +// AVX-512 MOVSS, MOVSD +//===----------------------------------------------------------------------===// + +multiclass avx512_move_scalar { + def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128X:$dst, (vt (OpNode VR128X:$src1, + (scalar_to_vector RC:$src2))))], + IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG; + def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>, + EVEX, VEX_LIG; + def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, + EVEX, VEX_LIG; +} + +let ExeDomain = SSEPackedSingle in +defm VMOVSSZ : avx512_move_scalar<"movss{z}", FR32X, X86Movss, v4f32, f32mem, + loadf32>, XS, EVEX_CD8<32, CD8VT1>; + +let ExeDomain = SSEPackedDouble in +defm VMOVSDZ : avx512_move_scalar<"movsd{z}", FR64X, X86Movsd, v2f64, f64mem, + loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>; + + +// For the disassembler +let isCodeGenOnly = 1 in { + def VMOVSSZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst), + (ins VR128X:$src1, FR32X:$src2), + "movss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], + IIC_SSE_MOV_S_RR>, + XS, EVEX_4V, VEX_LIG; + def VMOVSDZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst), + (ins VR128X:$src1, FR64X:$src2), + "movsd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], + IIC_SSE_MOV_S_RR>, + XD, EVEX_4V, VEX_LIG, VEX_W; +} + +let Predicates = [HasAVX512] in { + let AddedComplexity = 20 in { + // MOVSSrm zeros the high parts of the register; represent this + // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 + def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), + (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128X)>; + def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), + (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128X)>; + def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), + (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128X)>; + + // MOVSDrm zeros the high parts of the register; represent this + // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 + def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), + (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128X)>; + def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), + (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128X)>; + def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), + (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128X)>; + def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), + (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128X)>; + def : Pat<(v2f64 (X86vzload addr:$src)), + (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128X)>; + + // Represent the same patterns above but in the form they appear for + // 256-bit types + def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, + (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))), + (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; + def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, + (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))), + (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; + def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, + (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))), + (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; + } + def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, + (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))), + (SUBREG_TO_REG (i64 0), (VMOVSDZrm addr:$src), sub_xmm)>; + + // Extract and store. + def : Pat<(store (f32 (vector_extract (v4f32 VR128X:$src), (iPTR 0))), + addr:$dst), + (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>; + def : Pat<(store (f64 (vector_extract (v2f64 VR128X:$src), (iPTR 0))), + addr:$dst), + (VMOVSDZmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X))>; + + // Shuffle with VMOVSS + def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)), + (VMOVSSZrr (v4i32 VR128X:$src1), + (COPY_TO_REGCLASS (v4i32 VR128X:$src2), FR32X))>; + def : Pat<(v4f32 (X86Movss VR128X:$src1, VR128X:$src2)), + (VMOVSSZrr (v4f32 VR128X:$src1), + (COPY_TO_REGCLASS (v4f32 VR128X:$src2), FR32X))>; + + // 256-bit variants + def : Pat<(v8i32 (X86Movss VR256X:$src1, VR256X:$src2)), + (SUBREG_TO_REG (i32 0), + (VMOVSSZrr (EXTRACT_SUBREG (v8i32 VR256X:$src1), sub_xmm), + (EXTRACT_SUBREG (v8i32 VR256X:$src2), sub_xmm)), + sub_xmm)>; + def : Pat<(v8f32 (X86Movss VR256X:$src1, VR256X:$src2)), + (SUBREG_TO_REG (i32 0), + (VMOVSSZrr (EXTRACT_SUBREG (v8f32 VR256X:$src1), sub_xmm), + (EXTRACT_SUBREG (v8f32 VR256X:$src2), sub_xmm)), + sub_xmm)>; + + // Shuffle with VMOVSD + def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)), + (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; + def : Pat<(v2f64 (X86Movsd VR128X:$src1, VR128X:$src2)), + (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; + def : Pat<(v4f32 (X86Movsd VR128X:$src1, VR128X:$src2)), + (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; + def : Pat<(v4i32 (X86Movsd VR128X:$src1, VR128X:$src2)), + (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; + + // 256-bit variants + def : Pat<(v4i64 (X86Movsd VR256X:$src1, VR256X:$src2)), + (SUBREG_TO_REG (i32 0), + (VMOVSDZrr (EXTRACT_SUBREG (v4i64 VR256X:$src1), sub_xmm), + (EXTRACT_SUBREG (v4i64 VR256X:$src2), sub_xmm)), + sub_xmm)>; + def : Pat<(v4f64 (X86Movsd VR256X:$src1, VR256X:$src2)), + (SUBREG_TO_REG (i32 0), + (VMOVSDZrr (EXTRACT_SUBREG (v4f64 VR256X:$src1), sub_xmm), + (EXTRACT_SUBREG (v4f64 VR256X:$src2), sub_xmm)), + sub_xmm)>; + + def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)), + (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; + def : Pat<(v2i64 (X86Movlpd VR128X:$src1, VR128X:$src2)), + (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; + def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)), + (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; + def : Pat<(v4i32 (X86Movlps VR128X:$src1, VR128X:$src2)), + (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; +} + +let AddedComplexity = 15 in +def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), + (ins VR128X:$src), + "vmovq{z}\t{$src, $dst|$dst, $src}", + [(set VR128X:$dst, (v2i64 (X86vzmovl + (v2i64 VR128X:$src))))], + IIC_SSE_MOVQ_RR>, EVEX, VEX_W; + +let AddedComplexity = 20 in +def VMOVZPQILo2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), + (ins i128mem:$src), + "vmovq{z}\t{$src, $dst|$dst, $src}", + [(set VR128X:$dst, (v2i64 (X86vzmovl + (loadv2i64 addr:$src))))], + IIC_SSE_MOVDQ>, EVEX, VEX_W, + EVEX_CD8<8, CD8VT8>; +let AddedComplexity = 20 in { + def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), + (VMOVZPQILo2PQIZrm addr:$src)>; + def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))), + (VMOVZPQILo2PQIZrr VR128X:$src)>; +} diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 64018b3..b50706c 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -339,10 +339,11 @@ def __xd : XD; class SI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : I { - let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], + let Predicates = !if(hasEVEXPrefix /* EVEX */, [HasAVX512], + !if(hasVEXPrefix /* VEX */, [UseAVX], !if(!eq(Prefix, __xs.Prefix), [UseSSE1], !if(!eq(Prefix, __xd.Prefix), [UseSSE2], - !if(hasOpSizePrefix, [UseSSE2], [UseSSE1])))); + !if(hasOpSizePrefix, [UseSSE2], [UseSSE1]))))); // AVX instructions have a 'v' prefix in the mnemonic let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); @@ -352,8 +353,9 @@ class SI o, Format F, dag outs, dag ins, string asm, class SIi8 o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : Ii8 { - let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], - !if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2])); + let Predicates = !if(hasEVEXPrefix /* EVEX */, [HasAVX512], + !if(hasVEXPrefix /* VEX */, [UseAVX], + !if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2]))); // AVX instructions have a 'v' prefix in the mnemonic let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); @@ -363,8 +365,9 @@ class SIi8 o, Format F, dag outs, dag ins, string asm, class PI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin, Domain d> : I { - let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], - !if(hasOpSizePrefix /* OpSize */, [UseSSE2], [UseSSE1])); + let Predicates = !if(hasEVEXPrefix /* EVEX */, [HasAVX512], + !if(hasVEXPrefix /* VEX */, [HasAVX], + !if(hasOpSizePrefix /* OpSize */, [UseSSE2], [UseSSE1]))); // AVX instructions have a 'v' prefix in the mnemonic let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); @@ -381,11 +384,12 @@ class MMXPI o, Format F, dag outs, dag ins, string asm, list patter class PIi8 o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin, Domain d> : Ii8 { - let Predicates = !if(hasVEX_4VPrefix /* VEX */, [HasAVX], - !if(hasOpSizePrefix /* OpSize */, [UseSSE2], [UseSSE1])); + let Predicates = !if(hasEVEXPrefix /* EVEX */, [HasAVX512], + !if(hasVEXPrefix /* VEX */, [HasAVX], + !if(hasOpSizePrefix /* OpSize */, [UseSSE2], [UseSSE1]))); // AVX instructions have a 'v' prefix in the mnemonic - let AsmString = !if(hasVEX_4VPrefix, !strconcat("v", asm), asm); + let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); } // SSE1 Instruction Templates: @@ -460,7 +464,7 @@ class PDIi8 o, Format F, dag outs, dag ins, string asm, class VSDI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : I, XD, - Requires<[HasAVX]>; + Requires<[UseAVX]>; class VS2SI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : I, XS, @@ -472,7 +476,7 @@ class VPDI o, Format F, dag outs, dag ins, string asm, class VS2I o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : I, TB, - OpSize, Requires<[HasAVX]>; + OpSize, Requires<[UseAVX]>; class S2I o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : I, TB, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index fe35393..e6460e9 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -348,6 +348,8 @@ def alignedloadv4i64 : PatFrag<(ops node:$ptr), // 512-bit aligned load pattern fragments def alignedloadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (alignedload512 node:$ptr))>; +def alignedloadv16i32 : PatFrag<(ops node:$ptr), + (v16i32 (alignedload512 node:$ptr))>; def alignedloadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (alignedload512 node:$ptr))>; def alignedloadv8i64 : PatFrag<(ops node:$ptr), @@ -382,6 +384,7 @@ def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>; // 512-bit memop pattern fragments def memopv16f32 : PatFrag<(ops node:$ptr), (v16f32 (memop node:$ptr))>; def memopv8f64 : PatFrag<(ops node:$ptr), (v8f64 (memop node:$ptr))>; +def memopv16i32 : PatFrag<(ops node:$ptr), (v16i32 (memop node:$ptr))>; def memopv8i64 : PatFrag<(ops node:$ptr), (v8i64 (memop node:$ptr))>; // SSSE3 uses MMX registers for some instructions. They aren't aligned on a @@ -437,7 +440,6 @@ def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>; def bc_v16i32 : PatFrag<(ops node:$in), (v16i32 (bitconvert node:$in))>; def bc_v8i64 : PatFrag<(ops node:$in), (v8i64 (bitconvert node:$in))>; - def vzmovl_v2i64 : PatFrag<(ops node:$src), (bitconvert (v2i64 (X86vzmovl (v2i64 (scalar_to_vector (loadi64 node:$src))))))>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index b773768..71df2bb 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -2910,17 +2910,20 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, // SrcReg(GR64) -> DestReg(VR64) bool HasAVX = Subtarget.hasAVX(); + bool HasAVX512 = Subtarget.hasAVX512(); if (X86::GR64RegClass.contains(DestReg)) { - if (X86::VR128RegClass.contains(SrcReg)) + if (X86::VR128XRegClass.contains(SrcReg)) // Copy from a VR128 register to a GR64 register. - return HasAVX ? X86::VMOVPQIto64rr : X86::MOVPQIto64rr; + return HasAVX512 ? X86::VMOVPQIto64Zrr: (HasAVX ? X86::VMOVPQIto64rr : + X86::MOVPQIto64rr); if (X86::VR64RegClass.contains(SrcReg)) // Copy from a VR64 register to a GR64 register. return X86::MOVSDto64rr; } else if (X86::GR64RegClass.contains(SrcReg)) { // Copy from a GR64 register to a VR128 register. - if (X86::VR128RegClass.contains(DestReg)) - return HasAVX ? X86::VMOV64toPQIrr : X86::MOV64toPQIrr; + if (X86::VR128XRegClass.contains(DestReg)) + return HasAVX512 ? X86::VMOV64toPQIZrr: (HasAVX ? X86::VMOV64toPQIrr : + X86::MOV64toPQIrr); // Copy from a GR64 register to a VR64 register. if (X86::VR64RegClass.contains(DestReg)) return X86::MOV64toSDrr; @@ -2929,13 +2932,13 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, // SrcReg(FR32) -> DestReg(GR32) // SrcReg(GR32) -> DestReg(FR32) - if (X86::GR32RegClass.contains(DestReg) && X86::FR32RegClass.contains(SrcReg)) + if (X86::GR32RegClass.contains(DestReg) && X86::FR32XRegClass.contains(SrcReg)) // Copy from a FR32 register to a GR32 register. - return HasAVX ? X86::VMOVSS2DIrr : X86::MOVSS2DIrr; + return HasAVX512 ? X86::VMOVSS2DIZrr : (HasAVX ? X86::VMOVSS2DIrr : X86::MOVSS2DIrr); - if (X86::FR32RegClass.contains(DestReg) && X86::GR32RegClass.contains(SrcReg)) + if (X86::FR32XRegClass.contains(DestReg) && X86::GR32RegClass.contains(SrcReg)) // Copy from a GR32 register to a FR32 register. - return HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr; + return HasAVX512 ? X86::VMOVDI2SSZrr : (HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr); return 0; } @@ -3040,6 +3043,21 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, bool isStackAligned, const TargetMachine &TM, bool load) { + if (TM.getSubtarget().hasAVX512()) { + if (X86::VK8RegClass.hasSubClassEq(RC) || + X86::VK16RegClass.hasSubClassEq(RC)) + return load ? X86::KMOVWkm : X86::KMOVWmk; + + if (X86::FR32XRegClass.hasSubClassEq(RC)) + return load ? X86::VMOVSSZrm : X86::VMOVSSZmr; + if (X86::FR64XRegClass.hasSubClassEq(RC)) + return load ? X86::VMOVSDZrm : X86::VMOVSDZmr; + if (X86::VR128XRegClass.hasSubClassEq(RC) || + X86::VR256XRegClass.hasSubClassEq(RC) || + X86::VR512RegClass.hasSubClassEq(RC)) + return load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr; + } + bool HasAVX = TM.getSubtarget().hasAVX(); switch (RC->getSize()) { default: @@ -3099,6 +3117,12 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, return load ? X86::VMOVAPSYrm : X86::VMOVAPSYmr; else return load ? X86::VMOVUPSYrm : X86::VMOVUPSYmr; + case 64: + assert(X86::VR512RegClass.hasSubClassEq(RC) && "Unknown 64-byte regclass"); + if (isStackAligned) + return load ? X86::VMOVAPSZrm : X86::VMOVAPSZmr; + else + return load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr; } } @@ -3125,7 +3149,7 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, const MachineFunction &MF = *MBB.getParent(); assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() && "Stack slot too small for store"); - unsigned Alignment = RC->getSize() == 32 ? 32 : 16; + unsigned Alignment = std::max(RC->getSize(), 16); bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) || RI.canRealignStack(MF); unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); @@ -3141,7 +3165,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl &NewMIs) const { - unsigned Alignment = RC->getSize() == 32 ? 32 : 16; + unsigned Alignment = std::max(RC->getSize(), 16); bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= Alignment; unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); @@ -3161,7 +3185,7 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { const MachineFunction &MF = *MBB.getParent(); - unsigned Alignment = RC->getSize() == 32 ? 32 : 16; + unsigned Alignment = std::max(RC->getSize(), 16); bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) || RI.canRealignStack(MF); unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); @@ -3175,7 +3199,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl &NewMIs) const { - unsigned Alignment = RC->getSize() == 32 ? 32 : 16; + unsigned Alignment = std::max(RC->getSize(), 16); bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= Alignment; unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index a86006a..aa057db 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -4484,8 +4484,8 @@ def MOVPQIto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), //===---------------------------------------------------------------------===// // Bitcast FR64 <-> GR64 // -let Predicates = [HasAVX] in -def VMOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), +let Predicates = [UseAVX] in +def VMOV64toSDrm : VS2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>, VEX, Sched<[WriteLoad]>; @@ -4577,7 +4577,7 @@ def MOVZDI2PDIrm : S2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), IIC_SSE_MOVDQ>; } // AddedComplexity, SchedRW -let Predicates = [HasAVX] in { +let Predicates = [UseAVX] in { // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. let AddedComplexity = 20 in { def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), @@ -4630,7 +4630,7 @@ def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS, - VEX, Requires<[HasAVX]>; + VEX, Requires<[UseAVX]>; def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -4674,7 +4674,7 @@ def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (v2i64 (X86vzmovl (v2i64 (scalar_to_vector (loadi64 addr:$src))))))], IIC_SSE_MOVDQ>, - XS, VEX, Requires<[HasAVX]>, Sched<[WriteLoad]>; + XS, VEX, Requires<[UseAVX]>, Sched<[WriteLoad]>; let AddedComplexity = 20 in def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), @@ -4685,7 +4685,7 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), IIC_SSE_MOVDQ>, XS, Requires<[UseSSE2]>, Sched<[WriteLoad]>; -let Predicates = [HasAVX], AddedComplexity = 20 in { +let Predicates = [UseAVX], AddedComplexity = 20 in { def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), (VMOVZQI2PQIrm addr:$src)>; def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))), @@ -4719,7 +4719,7 @@ def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))], IIC_SSE_MOVQ_RR>, - XS, VEX, Requires<[HasAVX]>; + XS, VEX, Requires<[UseAVX]>; let AddedComplexity = 15 in def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", @@ -4735,7 +4735,7 @@ def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), [(set VR128:$dst, (v2i64 (X86vzmovl (loadv2i64 addr:$src))))], IIC_SSE_MOVDQ>, - XS, VEX, Requires<[HasAVX]>; + XS, VEX, Requires<[UseAVX]>; let AddedComplexity = 20 in { def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movq\t{$src, $dst|$dst, $src}", @@ -4747,7 +4747,7 @@ def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), } // SchedRW let AddedComplexity = 20 in { - let Predicates = [HasAVX] in { + let Predicates = [UseAVX] in { def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), (VMOVZPQILo2PQIrm addr:$src)>; def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), @@ -4771,7 +4771,7 @@ def VMOVQd64rr : VS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), IIC_SSE_MOVDQ>, VEX, VEX_W; // Recognize "movd" with GR64 destination, but encode as a "movq" def VMOVQd64rr_alt : VS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), - "movd\t{$src, $dst|$dst, $src}", [], + "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>, VEX, VEX_W; } // SchedRW @@ -4780,7 +4780,7 @@ def VMOVQd64rr_alt : VS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), // xm = mem64 let SchedRW = [WriteMove] in { -let Predicates = [HasAVX] in +let Predicates = [UseAVX] in def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS; def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), -- cgit v1.1 From 57cf3500a3dec411500737b91e1a3970be488337 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Sun, 18 Aug 2013 18:06:03 +0000 Subject: ARM: make sure we keep inline asm operands tied. When patching inlineasm nodes to use GPRPair for 64-bit values, we were dropping the information that two operands were tied, which effectively broke the live-interval of vregs affected. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188643 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelDAGToDAG.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 4ca3af6..3298b73 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -3614,7 +3614,10 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ if(PairedReg.getNode()) { OpChanged[OpChanged.size() -1 ] = true; Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); - Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); + if (IsTiedToChangedOp) + Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx); + else + Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); // Replace the current flag. AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( Flag, MVT::i32); -- cgit v1.1 From 953a78084b85ea88cd2b208153a72df70e27133f Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Mon, 19 Aug 2013 05:01:02 +0000 Subject: Add the PPC fcpsgn instruction Modern PPC cores support a floating-point copysign instruction, and we can use this to lower the FCOPYSIGN node (which is created from calls to the libm copysign function). A couple of extra patterns are necessary because the operand types of FCOPYSIGN need not agree. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188653 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPC.td | 12 +++++++----- lib/Target/PowerPC/PPCISelLowering.cpp | 9 +++++++-- lib/Target/PowerPC/PPCInstrInfo.td | 28 ++++++++++++++++++++++++++++ lib/Target/PowerPC/PPCSubtarget.cpp | 1 + lib/Target/PowerPC/PPCSubtarget.h | 2 ++ 5 files changed, 45 insertions(+), 7 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index 806822c..0d950ee 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -57,6 +57,8 @@ def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true", "Enable the MFOCRF instruction">; def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true", "Enable the fsqrt instruction">; +def FeatureFCPSGN : SubtargetFeature<"fcpsgn", "HasFCPSGN", "true", + "Enable the fcpsgn instruction">; def FeatureFRE : SubtargetFeature<"fre", "HasFRE", "true", "Enable the fre instruction">; def FeatureFRES : SubtargetFeature<"fres", "HasFRES", "true", @@ -194,7 +196,7 @@ def : ProcessorModel<"e5500", PPCE5500Model, FeatureSTFIWX, FeatureBookE, FeatureISEL]>; def : ProcessorModel<"a2", PPCA2Model, [DirectiveA2, FeatureBookE, FeatureMFOCRF, - FeatureFSqrt, FeatureFRE, FeatureFRES, + FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, FeatureFPCVT, FeatureISEL, @@ -202,7 +204,7 @@ def : ProcessorModel<"a2", PPCA2Model, /*, Feature64BitRegs */]>; def : ProcessorModel<"a2q", PPCA2Model, [DirectiveA2, FeatureBookE, FeatureMFOCRF, - FeatureFSqrt, FeatureFRE, FeatureFRES, + FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, FeatureFPCVT, FeatureISEL, @@ -228,19 +230,19 @@ def : ProcessorModel<"pwr5x", G5Model, FeatureSTFIWX, FeatureFPRND, Feature64Bit]>; def : ProcessorModel<"pwr6", G5Model, [DirectivePwr6, FeatureAltivec, - FeatureMFOCRF, FeatureFSqrt, FeatureFRE, + FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, Feature64Bit /*, Feature64BitRegs */]>; def : ProcessorModel<"pwr6x", G5Model, [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF, - FeatureFSqrt, FeatureFRE, FeatureFRES, + FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, Feature64Bit]>; def : ProcessorModel<"pwr7", G5Model, [DirectivePwr7, FeatureAltivec, - FeatureMFOCRF, FeatureFSqrt, FeatureFRE, + FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, FeatureFPCVT, FeatureISEL, diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 2b83a22..bc55d38 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -149,8 +149,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) Subtarget->hasFRSQRTES() && Subtarget->hasFRES())) setOperationAction(ISD::FSQRT, MVT::f32, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + if (Subtarget->hasFCPSGN()) { + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal); + } else { + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + } if (Subtarget->hasFPRND()) { setOperationAction(ISD::FFLOOR, MVT::f64, Legal); diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 84ddb3f..35e9935 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -785,6 +785,20 @@ multiclass XForm_26r opcode, bits<10> xo, dag OOL, dag IOL, } } +multiclass XForm_28r opcode, bits<10> xo, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list pattern> { + let BaseName = asmbase in { + def NAME : XForm_28, RecFormRel; + let Defs = [CR1] in + def o : XForm_28, isDOT, RecFormRel; + } +} + multiclass AForm_1r opcode, bits<5> xo, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, list pattern> { @@ -1762,6 +1776,14 @@ defm FNEGD : XForm_26r<63, 40, (outs f8rc:$frD), (ins f8rc:$frB), "fneg", "$frD, $frB", FPGeneral, [(set f64:$frD, (fneg f64:$frB))]>; +defm FCPSGNS : XForm_28r<63, 8, (outs f4rc:$frD), (ins f4rc:$frA, f4rc:$frB), + "fcpsgn", "$frD, $frA, $frB", FPGeneral, + [(set f32:$frD, (fcopysign f32:$frB, f32:$frA))]>; +let Interpretation64Bit = 1 in +defm FCPSGND : XForm_28r<63, 8, (outs f8rc:$frD), (ins f8rc:$frA, f8rc:$frB), + "fcpsgn", "$frD, $frA, $frB", FPGeneral, + [(set f64:$frD, (fcopysign f64:$frB, f64:$frA))]>; + // Reciprocal estimates. defm FRE : XForm_26r<63, 24, (outs f8rc:$frD), (ins f8rc:$frB), "fre", "$frD, $frB", FPGeneral, @@ -2270,6 +2292,12 @@ def : Pat<(fma (fneg f32:$A), f32:$C, f32:$B), def : Pat<(fma f32:$A, (fneg f32:$C), f32:$B), (FNMSUBS $A, $C, $B)>; +// FCOPYSIGN's operand types need not agree. +def : Pat<(fcopysign f64:$frB, f32:$frA), + (FCPSGND (COPY_TO_REGCLASS $frA, F8RC), $frB)>; +def : Pat<(fcopysign f32:$frB, f64:$frA), + (FCPSGNS (COPY_TO_REGCLASS $frA, F4RC), $frB)>; + include "PPCInstrAltivec.td" include "PPCInstr64Bit.td" diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 12d0326..f975f55 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -74,6 +74,7 @@ void PPCSubtarget::initializeEnvironment() { Use64BitRegs = false; HasAltivec = false; HasQPX = false; + HasFCPSGN = false; HasFSQRT = false; HasFRE = false; HasFRES = false; diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 3f3fc0e..a933bf6 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -76,6 +76,7 @@ protected: bool IsPPC64; bool HasAltivec; bool HasQPX; + bool HasFCPSGN; bool HasFSQRT; bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES; bool HasRecipPrec; @@ -171,6 +172,7 @@ public: bool isLittleEndian() const { return IsLittleEndian; } // Specific obvious features. + bool hasFCPSGN() const { return HasFCPSGN; } bool hasFSQRT() const { return HasFSQRT; } bool hasFRE() const { return HasFRE; } bool hasFRES() const { return HasFRES; } -- cgit v1.1 From 80f54784da0bd42fb79176bbf447a31d69287fe3 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Mon, 19 Aug 2013 12:42:31 +0000 Subject: [SystemZ] Add support for sibling calls This first cut is pretty conservative. The final argument register (R6) is call-saved, so we would need to make sure that the R6 argument to a sibling call is the same as the R6 argument to the calling function, which seems worth keeping as a separate patch. Saying that integer truncations are free means that we no longer use the extending instructions LGF and LLGF for spills in int-conv-09.ll and int-conv-10.ll. Instead we treat the registers as 64 bits wide and truncate them to 32-bits where necessary. I think it's unlikely we'd use LGF and LLGF for spills in other situations for the same reason, so I'm removing the tests rather than replacing them. The associated code is generic and applies to many more instructions than just LGF and LLGF, so there is no corresponding code removal. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188669 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/README.txt | 4 -- lib/Target/SystemZ/SystemZFrameLowering.cpp | 3 +- lib/Target/SystemZ/SystemZISelLowering.cpp | 85 ++++++++++++++++++++++++----- lib/Target/SystemZ/SystemZISelLowering.h | 5 ++ lib/Target/SystemZ/SystemZInstrInfo.td | 10 ++++ lib/Target/SystemZ/SystemZOperators.td | 3 + 6 files changed, 89 insertions(+), 21 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt index eebc4e4..93e29b8 100644 --- a/lib/Target/SystemZ/README.txt +++ b/lib/Target/SystemZ/README.txt @@ -35,10 +35,6 @@ performance measurements. -- -We don't support tail calls at present. - --- - We don't support prefetching yet. -- diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp index a58da90..ed75e28 100644 --- a/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -420,8 +420,7 @@ void SystemZFrameLowering::emitEpilogue(MachineFunction &MF, SystemZMachineFunctionInfo *ZFI = MF.getInfo(); // Skip the return instruction. - assert(MBBI->getOpcode() == SystemZ::RET && - "Can only insert epilogue into returning blocks"); + assert(MBBI->isReturn() && "Can only insert epilogue into returning blocks"); uint64_t StackSize = getAllocatedStackSize(MF); if (ZFI->getLowSavedGPR()) { diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 788fc2e..0000485 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -305,6 +305,22 @@ bool SystemZTargetLowering::isLegalAddressingMode(const AddrMode &AM, return AM.Scale == 0 || AM.Scale == 1; } +bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const { + if (!FromType->isIntegerTy() || !ToType->isIntegerTy()) + return false; + unsigned FromBits = FromType->getPrimitiveSizeInBits(); + unsigned ToBits = ToType->getPrimitiveSizeInBits(); + return FromBits > ToBits; +} + +bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const { + if (!FromVT.isInteger() || !ToVT.isInteger()) + return false; + unsigned FromBits = FromVT.getSizeInBits(); + unsigned ToBits = ToVT.getSizeInBits(); + return FromBits > ToBits; +} + //===----------------------------------------------------------------------===// // Inline asm support //===----------------------------------------------------------------------===// @@ -527,6 +543,17 @@ LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, #include "SystemZGenCallingConv.inc" +bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType, + Type *ToType) const { + return isTruncateFree(FromType, ToType); +} + +bool SystemZTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { + if (!CI->isTailCall()) + return false; + return true; +} + // Value is a value that has been passed to us in the location described by VA // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining // any loads onto Chain. @@ -689,6 +716,23 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, return Chain; } +static bool canUseSiblingCall(CCState ArgCCInfo, + SmallVectorImpl &ArgLocs) { + // Punt if there are any indirect or stack arguments, or if the call + // needs the call-saved argument register R6. + for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { + CCValAssign &VA = ArgLocs[I]; + if (VA.getLocInfo() == CCValAssign::Indirect) + return false; + if (!VA.isRegLoc()) + return false; + unsigned Reg = VA.getLocReg(); + if (Reg == SystemZ::R6W || Reg == SystemZ::R6D) + return false; + } + return true; +} + SDValue SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { @@ -699,26 +743,29 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; - bool &isTailCall = CLI.IsTailCall; + bool &IsTailCall = CLI.IsTailCall; CallingConv::ID CallConv = CLI.CallConv; bool IsVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); EVT PtrVT = getPointerTy(); - // SystemZ target does not yet support tail call optimization. - isTailCall = false; - // Analyze the operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState ArgCCInfo(CallConv, IsVarArg, MF, TM, ArgLocs, *DAG.getContext()); ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ); + // We don't support GuaranteedTailCallOpt, only automatically-detected + // sibling calls. + if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs)) + IsTailCall = false; + // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = ArgCCInfo.getNextStackOffset(); // Mark the start of the call. - Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, PtrVT, true), - DL); + if (!IsTailCall) + Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, PtrVT, true), + DL); // Copy argument values to their designated locations. SmallVector, 9> RegsToPass; @@ -767,22 +814,27 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOpChains[0], MemOpChains.size()); - // Build a sequence of copy-to-reg nodes, chained and glued together. - SDValue Glue; - for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) { - Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first, - RegsToPass[I].second, Glue); - Glue = Chain.getValue(1); - } - // Accept direct calls by converting symbolic call addresses to the - // associated Target* opcodes. + // associated Target* opcodes. Force %r1 to be used for indirect + // tail calls. + SDValue Glue; if (GlobalAddressSDNode *G = dyn_cast(Callee)) { Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT); Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); } else if (ExternalSymbolSDNode *E = dyn_cast(Callee)) { Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT); Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); + } else if (IsTailCall) { + Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue); + Glue = Chain.getValue(1); + Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType()); + } + + // Build a sequence of copy-to-reg nodes, chained and glued together. + for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) { + Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first, + RegsToPass[I].second, Glue); + Glue = Chain.getValue(1); } // The first call operand is the chain and the second is the target address. @@ -802,6 +854,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, // Emit the call. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + if (IsTailCall) + return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, &Ops[0], Ops.size()); Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, &Ops[0], Ops.size()); Glue = Chain.getValue(1); @@ -1689,6 +1743,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { OPCODE(RET_FLAG); OPCODE(CALL); + OPCODE(SIBCALL); OPCODE(PCREL_WRAPPER); OPCODE(CMP); OPCODE(UCMP); diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 21677a0..3692e1e 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -32,6 +32,7 @@ namespace SystemZISD { // is the target address. The arguments start at operand 2. // There is an optional glue operand at the end. CALL, + SIBCALL, // Wraps a TargetGlobalAddress that should be loaded using PC-relative // accesses (LARL). Operand 0 is the address. @@ -155,6 +156,8 @@ public: LLVM_OVERRIDE; virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const LLVM_OVERRIDE; + virtual bool isTruncateFree(Type *, Type *) const LLVM_OVERRIDE; + virtual bool isTruncateFree(EVT, EVT) const LLVM_OVERRIDE; virtual const char *getTargetNodeName(unsigned Opcode) const LLVM_OVERRIDE; virtual std::pair getRegForInlineAsmConstraint(const std::string &Constraint, @@ -174,6 +177,8 @@ public: MachineBasicBlock *BB) const LLVM_OVERRIDE; virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const LLVM_OVERRIDE; + virtual bool allowTruncateForTailCall(Type *, Type *) const LLVM_OVERRIDE; + virtual bool mayBeEmittedAsTailCall(CallInst *CI) const LLVM_OVERRIDE; virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 7789d61..876b48b 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -212,6 +212,16 @@ let isCall = 1, Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D, "basr\t%r14, $R2", [(z_call ADDR64:$R2)]>; } +// Sibling calls. Indirect sibling calls must be via R1, since R2 upwards +// are argument registers and since branching to R0 is a no-op. +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, + isCodeGenOnly = 1, R1 = 15 in { + def CallJG : InstRIL<0xC04, (outs), (ins pcrel32call:$I2), + "jg\t$I2", [(z_sibcall pcrel32call:$I2)]>; + let R2 = 1, Uses = [R1D] in + def CallBR : InstRR<0x07, (outs), (ins), "br\t%r1", [(z_sibcall R1D)]>; +} + // Define the general form of the call instructions for the asm parser. // These instructions don't hard-code %r14 as the return address register. def AsmBRAS : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16:$I2), diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index 1a3d45e..8d6c619 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -82,6 +82,9 @@ def z_retflag : SDNode<"SystemZISD::RET_FLAG", SDTNone, def z_call : SDNode<"SystemZISD::CALL", SDT_ZCall, [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>; +def z_sibcall : SDNode<"SystemZISD::SIBCALL", SDT_ZCall, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, + SDNPVariadic]>; def z_pcrel_wrapper : SDNode<"SystemZISD::PCREL_WRAPPER", SDT_ZWrapPtr, []>; def z_cmp : SDNode<"SystemZISD::CMP", SDT_ZCmp, [SDNPOutGlue]>; def z_ucmp : SDNode<"SystemZISD::UCMP", SDT_ZCmp, [SDNPOutGlue]>; -- cgit v1.1 From b0d40a22e5aa1a51913fa161c2ce5513d7bd9293 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Mon, 19 Aug 2013 12:48:54 +0000 Subject: [SystemZ] Add integer absolute (load positive) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188670 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZInstrInfo.td | 12 +++++++++++- lib/Target/SystemZ/SystemZOperators.td | 8 ++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 876b48b..dd3a9bc 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -534,11 +534,21 @@ let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1, } //===----------------------------------------------------------------------===// -// Negation +// Absolute and Negation //===----------------------------------------------------------------------===// let Defs = [CC] in { let CCValues = 0xF, CompareZeroCCMask = 0x8 in { + def LPR : UnaryRR <"lp", 0x10, z_iabs32, GR32, GR32>; + def LPGR : UnaryRRE<"lpg", 0xB900, z_iabs64, GR64, GR64>; + } + let CCValues = 0xE, CompareZeroCCMask = 0xE in + def LPGFR : UnaryRRE<"lpgf", 0xB910, null_frag, GR64, GR32>; +} +defm : SXU; + +let Defs = [CC] in { + let CCValues = 0xF, CompareZeroCCMask = 0x8 in { def LCR : UnaryRR <"lc", 0x13, ineg, GR32, GR32>; def LCGR : UnaryRRE<"lcg", 0xB903, ineg, GR64, GR64>; } diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index 8d6c619..59e1ffc 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -240,6 +240,14 @@ def or_as_revinserti8 : PatFrag<(ops node:$src1, node:$src2), APInt::getLowBitsSet(BitWidth, 8)); }]>; +// Integer absolute, matching the canonical form generated by DAGCombiner. +def z_iabs32 : PatFrag<(ops node:$src), + (xor (add node:$src, (sra node:$src, (i32 31))), + (sra node:$src, (i32 31)))>; +def z_iabs64 : PatFrag<(ops node:$src), + (xor (add node:$src, (sra node:$src, (i32 63))), + (sra node:$src, (i32 63)))>; + // Fused multiply-add and multiply-subtract, but with the order of the // operands matching SystemZ's MA and MS instructions. def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3), -- cgit v1.1 From d4a37e61378949835d93df6b8e4a9feadb4edeef Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Mon, 19 Aug 2013 12:56:58 +0000 Subject: [SystemZ] Add negative integer absolute (load negative) For now this matches the equivalent of (neg (abs ...)), which did hit a few times in projects/test-suite. We should probably also match cases where absolute-like selects are used with reversed arguments. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188671 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/README.txt | 6 ------ lib/Target/SystemZ/SystemZInstrInfo.td | 10 ++++++++++ lib/Target/SystemZ/SystemZOperators.td | 2 ++ 3 files changed, 12 insertions(+), 6 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt index 93e29b8..58e631f 100644 --- a/lib/Target/SystemZ/README.txt +++ b/lib/Target/SystemZ/README.txt @@ -104,12 +104,6 @@ such as ICM and STCM. -- -DAGCombiner can detect integer absolute, but there's not yet an associated -ISD opcode. We could add one and implement it using LOAD POSITIVE. -Negated absolutes could use LOAD NEGATIVE. - --- - DAGCombiner doesn't yet fold truncations of extended loads. Functions like: unsigned long f (unsigned long x, unsigned short *y) diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index dd3a9bc..d857a57 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -549,6 +549,16 @@ defm : SXU; let Defs = [CC] in { let CCValues = 0xF, CompareZeroCCMask = 0x8 in { + def LNR : UnaryRR <"ln", 0x11, z_inegabs32, GR32, GR32>; + def LNGR : UnaryRRE<"lng", 0xB901, z_inegabs64, GR64, GR64>; + } + let CCValues = 0xE, CompareZeroCCMask = 0xE in + def LNGFR : UnaryRRE<"lngf", 0xB911, null_frag, GR64, GR32>; +} +defm : SXU; + +let Defs = [CC] in { + let CCValues = 0xF, CompareZeroCCMask = 0x8 in { def LCR : UnaryRR <"lc", 0x13, ineg, GR32, GR32>; def LCGR : UnaryRRE<"lcg", 0xB903, ineg, GR64, GR64>; } diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index 59e1ffc..5745e29 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -247,6 +247,8 @@ def z_iabs32 : PatFrag<(ops node:$src), def z_iabs64 : PatFrag<(ops node:$src), (xor (add node:$src, (sra node:$src, (i32 63))), (sra node:$src, (i32 63)))>; +def z_inegabs32 : PatFrag<(ops node:$src), (ineg (z_iabs32 node:$src))>; +def z_inegabs64 : PatFrag<(ops node:$src), (ineg (z_iabs64 node:$src))>; // Fused multiply-add and multiply-subtract, but with the order of the // operands matching SystemZ's MA and MS instructions. -- cgit v1.1 From f12df0ad507a620daedcab4041d473e40af81eb9 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Mon, 19 Aug 2013 13:26:14 +0000 Subject: AVX-512: added arithmetic and logical operations. ADD, SUB, MUL integer and FP types. OR, AND, XOR. Added embeded broadcast form for these instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188673 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 11 +- lib/Target/X86/X86InstrAVX512.td | 247 ++++++++++++++++++++++++++++++-- lib/Target/X86/X86InstrFragmentsSIMD.td | 18 ++- 3 files changed, 249 insertions(+), 27 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4e71f36..3c3f09f 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1390,6 +1390,9 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::AND, MVT::v8i64, Legal); setOperationAction(ISD::OR, MVT::v8i64, Legal); setOperationAction(ISD::XOR, MVT::v8i64, Legal); + setOperationAction(ISD::AND, MVT::v16i32, Legal); + setOperationAction(ISD::OR, MVT::v16i32, Legal); + setOperationAction(ISD::XOR, MVT::v16i32, Legal); // Custom lower several nodes. for (int i = MVT::FIRST_VECTOR_VALUETYPE; @@ -1409,14 +1412,6 @@ void X86TargetLowering::resetOperationActions() { if (!VT.is512BitVector()) continue; - if (VT != MVT::v8i64) { - setOperationAction(ISD::XOR, VT, Promote); - AddPromotedToType (ISD::XOR, VT, MVT::v8i64); - setOperationAction(ISD::OR, VT, Promote); - AddPromotedToType (ISD::OR, VT, MVT::v8i64); - setOperationAction(ISD::AND, VT, Promote); - AddPromotedToType (ISD::AND, VT, MVT::v8i64); - } if ( EltSize >= 32) { setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 760b4ed..a6035af 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -80,6 +80,21 @@ let Predicates = [HasAVX512] in { def : Pat<(v16i16 (bitconvert (v32i8 VR256X:$src))), (v16i16 VR256X:$src)>; } +// +// AVX-512: VPXOR instruction writes zero to its upper part, it's safe build zeros. +// + +let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, + isPseudo = 1, Predicates = [HasAVX512] in { +def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "", + [(set VR512:$dst, (v16f32 immAllZerosV))]>; +} + +def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>; +def : Pat<(v16i32 immAllZerosV), (AVX512_512_SET0)>; +def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>; +def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>; + //===----------------------------------------------------------------------===// // AVX-512 - VECTOR INSERT // @@ -518,16 +533,16 @@ multiclass avx512_perm opc, string OpcodeStr, RegisterClass RC, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, - (OpVT (X86VPermv RC:$src1, - (bitconvert (mem_frag addr:$src2)))))]>, EVEX_4V; + (OpVT (X86VPermv RC:$src1, (mem_frag addr:$src2))))]>, + EVEX_4V; } -defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, memopv8i64, i512mem, +defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, memopv16i32, i512mem, v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; let ExeDomain = SSEPackedSingle in -defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, memopv8f64, f512mem, +defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, memopv16f32, f512mem, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; let ExeDomain = SSEPackedDouble in defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem, @@ -552,7 +567,7 @@ let Constraints = "$src1 = $dst" in { "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (OpVT (X86VPermv3 RC:$src1, RC:$src2, - (bitconvert (mem_frag addr:$src3)))))]>, EVEX_4V; + (mem_frag addr:$src3))))]>, EVEX_4V; } } defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32, i512mem, @@ -631,18 +646,17 @@ multiclass avx512_icmp_packed opc, string OpcodeStr, RegisterClass KRC, def rm : AVX512BI, EVEX_4V; } defm VPCMPEQDZ : avx512_icmp_packed<0x76, "vpcmpeqd", VK16, VR512, i512mem, - memopv8i64, X86pcmpeqm, v16i32>, EVEX_V512; + memopv16i32, X86pcmpeqm, v16i32>, EVEX_V512; defm VPCMPEQQZ : avx512_icmp_packed<0x29, "vpcmpeqq", VK8, VR512, i512mem, memopv8i64, X86pcmpeqm, v8i64>, T8, EVEX_V512, VEX_W; defm VPCMPGTDZ : avx512_icmp_packed<0x66, "vpcmpgtd", VK16, VR512, i512mem, - memopv8i64, X86pcmpgtm, v16i32>, EVEX_V512; + memopv16i32, X86pcmpgtm, v16i32>, EVEX_V512; defm VPCMPGTQZ : avx512_icmp_packed<0x37, "vpcmpgtq", VK8, VR512, i512mem, memopv8i64, X86pcmpgtm, v8i64>, T8, EVEX_V512, VEX_W; @@ -656,7 +670,6 @@ def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>; - multiclass avx512_icmp_cc opc, RegisterClass KRC, RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag, SDNode OpNode, ValueType vt, Operand CC, string asm, @@ -667,9 +680,8 @@ multiclass avx512_icmp_cc opc, RegisterClass KRC, IIC_SSE_CMPP_RR>, EVEX_4V; def rmi : AVX512AIi8, EVEX_4V; + [(set KRC:$dst, (OpNode (vt RC:$src1), (memop_frag addr:$src2), + imm:$cc))], IIC_SSE_CMPP_RM>, EVEX_4V; // Accept explicit immediate argument form instead of comparison code. let neverHasSideEffects = 1 in { def rri_alt : AVX512AIi8 opc, RegisterClass KRC, } } -defm VPCMPDZ : avx512_icmp_cc<0x1F, VK16, VR512, i512mem, memopv8i64, +defm VPCMPDZ : avx512_icmp_cc<0x1F, VK16, VR512, i512mem, memopv16i32, X86cmpm, v16i32, AVXCC, "vpcmp${cc}d\t{$src2, $src1, $dst|$dst, $src1, $src2}", "vpcmpd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPCMPUDZ : avx512_icmp_cc<0x1E, VK16, VR512, i512mem, memopv8i64, +defm VPCMPUDZ : avx512_icmp_cc<0x1E, VK16, VR512, i512mem, memopv16i32, X86cmpmu, v16i32, AVXCC, "vpcmp${cc}ud\t{$src2, $src1, $dst|$dst, $src1, $src2}", "vpcmpud\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, @@ -1415,3 +1427,208 @@ let AddedComplexity = 20 in { (VMOVZPQILo2PQIZrr VR128X:$src)>; } +//===----------------------------------------------------------------------===// +// AVX-512 - Integer arithmetic +// +multiclass avx512_binop_rm opc, string OpcodeStr, SDNode OpNode, + ValueType OpVT, RegisterClass RC, PatFrag memop_frag, + X86MemOperand x86memop, PatFrag scalar_mfrag, + X86MemOperand x86scalar_mop, string BrdcstStr, + OpndItins itins, bit IsCommutable = 0> { + let isCommutable = IsCommutable in + def rr : AVX512BI, EVEX_4V; + def rm : AVX512BI, EVEX_4V; + def rmb : AVX512BI, EVEX_4V, EVEX_B; +} +multiclass avx512_binop_rm2 opc, string OpcodeStr, + ValueType DstVT, ValueType SrcVT, RegisterClass RC, + PatFrag memop_frag, X86MemOperand x86memop, + OpndItins itins, + bit IsCommutable = 0> { + let isCommutable = IsCommutable in + def rr : AVX512BI, EVEX_4V, VEX_W; + def rm : AVX512BI, EVEX_4V, VEX_W; +} + +defm VPADDDZ : avx512_binop_rm<0xFE, "vpaddd", add, v16i32, VR512, memopv16i32, + i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + +defm VPSUBDZ : avx512_binop_rm<0xFA, "vpsubd", sub, v16i32, VR512, memopv16i32, + i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 0>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + +defm VPMULLDZ : avx512_binop_rm<0x40, "vpmulld", mul, v16i32, VR512, memopv16i32, + i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>, + T8, EVEX_V512, EVEX_CD8<32, CD8VF>; + +defm VPADDQZ : avx512_binop_rm<0xD4, "vpaddq", add, v8i64, VR512, memopv8i64, + i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 1>, + EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_W; + +defm VPSUBQZ : avx512_binop_rm<0xFB, "vpsubq", sub, v8i64, VR512, memopv8i64, + i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + +defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, + VR512, memopv8i64, i512mem, SSE_INTALU_ITINS_P, 1>, T8, + EVEX_V512, EVEX_CD8<64, CD8VF>; + +defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32, + VR512, memopv8i64, i512mem, SSE_INTMUL_ITINS_P, 1>, EVEX_V512, + EVEX_CD8<64, CD8VF>; + +def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))), + (VPMULUDQZrr VR512:$src1, VR512:$src2)>; + +//===----------------------------------------------------------------------===// +// AVX-512 Logical Instructions +//===----------------------------------------------------------------------===// + +defm VPANDDZ : avx512_binop_rm<0xDB, "vpandd", and, v16i32, VR512, memopv16i32, + i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPANDQZ : avx512_binop_rm<0xDB, "vpandq", and, v8i64, VR512, memopv8i64, + i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPORDZ : avx512_binop_rm<0xEB, "vpord", or, v16i32, VR512, memopv16i32, + i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPORQZ : avx512_binop_rm<0xEB, "vporq", or, v8i64, VR512, memopv8i64, + i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPXORDZ : avx512_binop_rm<0xEF, "vpxord", xor, v16i32, VR512, memopv16i32, + i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPXORQZ : avx512_binop_rm<0xEF, "vpxorq", xor, v8i64, VR512, memopv8i64, + i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPANDNDZ : avx512_binop_rm<0xDF, "vpandnd", X86andnp, v16i32, VR512, + memopv16i32, i512mem, loadi32, i32mem, "{1to16}", + SSE_BIT_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPANDNQZ : avx512_binop_rm<0xDF, "vpandnq", X86andnp, v8i64, VR512, memopv8i64, + i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 0>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + +//===----------------------------------------------------------------------===// +// AVX-512 FP arithmetic +//===----------------------------------------------------------------------===// + +multiclass avx512_binop_s opc, string OpcodeStr, SDNode OpNode, + SizeItins itins> { + defm SSZ : sse12_fp_scalar, XS, EVEX_4V, VEX_LIG, + EVEX_CD8<32, CD8VT1>; + defm SDZ : sse12_fp_scalar, XD, VEX_W, EVEX_4V, VEX_LIG, + EVEX_CD8<64, CD8VT1>; +} + +let isCommutable = 1 in { +defm VADD : avx512_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>; +defm VMUL : avx512_binop_s<0x59, "mul", fmul, SSE_ALU_ITINS_S>; +defm VMIN : avx512_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>; +defm VMAX : avx512_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>; +} +let isCommutable = 0 in { +defm VSUB : avx512_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>; +defm VDIV : avx512_binop_s<0x5E, "div", fdiv, SSE_ALU_ITINS_S>; +} + +multiclass avx512_fp_packed opc, string OpcodeStr, SDNode OpNode, + RegisterClass RC, ValueType vt, + X86MemOperand x86memop, PatFrag mem_frag, + X86MemOperand x86scalar_mop, PatFrag scalar_mfrag, + string BrdcstStr, + Domain d, OpndItins itins, bit commutable> { + let isCommutable = commutable in + def rr : PI, + EVEX_4V; + let mayLoad = 1 in { + def rm : PI, EVEX_4V; + def rmb : PI, EVEX_4V, EVEX_B; + } +} + +defm VADDPSZ : avx512_fp_packed<0x58, "addps", fadd, VR512, v16f32, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, + SSE_ALU_ITINS_P.s, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>; + +defm VADDPDZ : avx512_fp_packed<0x58, "addpd", fadd, VR512, v8f64, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, + SSE_ALU_ITINS_P.d, 1>, + EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; + +defm VMULPSZ : avx512_fp_packed<0x59, "mulps", fmul, VR512, v16f32, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, + SSE_ALU_ITINS_P.s, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VMULPDZ : avx512_fp_packed<0x59, "mulpd", fmul, VR512, v8f64, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, + SSE_ALU_ITINS_P.d, 1>, + EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; + +defm VMINPSZ : avx512_fp_packed<0x5D, "minps", X86fmin, VR512, v16f32, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, + SSE_ALU_ITINS_P.s, 1>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VMAXPSZ : avx512_fp_packed<0x5F, "maxps", X86fmax, VR512, v16f32, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, + SSE_ALU_ITINS_P.s, 1>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + +defm VMINPDZ : avx512_fp_packed<0x5D, "minpd", X86fmin, VR512, v8f64, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, + SSE_ALU_ITINS_P.d, 1>, + EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; +defm VMAXPDZ : avx512_fp_packed<0x5F, "maxpd", X86fmax, VR512, v8f64, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, + SSE_ALU_ITINS_P.d, 1>, + EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; + +defm VSUBPSZ : avx512_fp_packed<0x5C, "subps", fsub, VR512, v16f32, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, + SSE_ALU_ITINS_P.s, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VDIVPSZ : avx512_fp_packed<0x5E, "divps", fdiv, VR512, v16f32, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, + SSE_ALU_ITINS_P.s, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>; + +defm VSUBPDZ : avx512_fp_packed<0x5C, "subpd", fsub, VR512, v8f64, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, + SSE_ALU_ITINS_P.d, 0>, + EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; +defm VDIVPDZ : avx512_fp_packed<0x5E, "divpd", fdiv, VR512, v8f64, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, + SSE_ALU_ITINS_P.d, 0>, + EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; + diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index e6460e9..9f1c999 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -366,6 +366,16 @@ def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{ || cast(N)->getAlignment() >= 16; }]>; +def memop4 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return Subtarget->hasVectorUAMem() + || cast(N)->getAlignment() >= 4; +}]>; + +def memop8 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return Subtarget->hasVectorUAMem() + || cast(N)->getAlignment() >= 8; +}]>; + def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>; def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>; @@ -382,10 +392,10 @@ def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>; def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>; // 512-bit memop pattern fragments -def memopv16f32 : PatFrag<(ops node:$ptr), (v16f32 (memop node:$ptr))>; -def memopv8f64 : PatFrag<(ops node:$ptr), (v8f64 (memop node:$ptr))>; -def memopv16i32 : PatFrag<(ops node:$ptr), (v16i32 (memop node:$ptr))>; -def memopv8i64 : PatFrag<(ops node:$ptr), (v8i64 (memop node:$ptr))>; +def memopv16f32 : PatFrag<(ops node:$ptr), (v16f32 (memop4 node:$ptr))>; +def memopv8f64 : PatFrag<(ops node:$ptr), (v8f64 (memop8 node:$ptr))>; +def memopv16i32 : PatFrag<(ops node:$ptr), (v16i32 (memop4 node:$ptr))>; +def memopv8i64 : PatFrag<(ops node:$ptr), (v8i64 (memop8 node:$ptr))>; // SSSE3 uses MMX registers for some instructions. They aren't aligned on a // 16-byte boundary. -- cgit v1.1 From 756e89c8c2a3c30ce3a73ed13724aad1b41a5608 Mon Sep 17 00:00:00 2001 From: Mihai Popa Date: Mon, 19 Aug 2013 15:02:25 +0000 Subject: Thumb2 add immediate alias for SP The Thumb2 add immediate is in fact defined for SP. The manual is misleading as it points to a different section for add immediate with SP, however the encoding is the same as for add immediate with register only with the SP operand hard coded. As such add immediate with SP and add immediate with register can safely be treated as the same instruction. All the patch does is adjust a register constraint on an instruction alias. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188676 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrThumb2.td | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 376246b..02ff08b 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -4087,7 +4087,8 @@ def : t2InstAlias<"sbc${s}${p} $Rd, $Rn, $ShiftedRm", // Aliases for ADD without the ".w" optional width specifier. def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm", - (t2ADDri rGPR:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; + (t2ADDri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, + cc_out:$s)>; def : t2InstAlias<"add${p} $Rd, $Rn, $imm", (t2ADDri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>; def : t2InstAlias<"add${s}${p} $Rd, $Rn, $Rm", -- cgit v1.1 From 1e09ed13893ad9d463c6c08c996170bac6e60449 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Mon, 19 Aug 2013 19:08:03 +0000 Subject: [mips] Fix instruction definitions that were incorrectly marked as code-gen-only. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188690 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsCondMov.td | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td index b313c52..1f19adc 100644 --- a/lib/Target/Mips/MipsCondMov.td +++ b/lib/Target/Mips/MipsCondMov.td @@ -148,15 +148,17 @@ let Predicates = [NotFP64bit, HasStdEnc] in { CMov_I_F_FM<19, 17>; } -let Predicates = [IsFP64bit, HasStdEnc], isCodeGenOnly = 1 in { +let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in { def MOVZ_I_D64 : CMov_I_F_FT<"movz.d", GPR32Opnd, FGR64Opnd, IIFmove>, CMov_I_F_FM<18, 17>; - def MOVZ_I64_D64 : CMov_I_F_FT<"movz.d", GPR64Opnd, FGR64Opnd, - IIFmove>, CMov_I_F_FM<18, 17>; def MOVN_I_D64 : CMov_I_F_FT<"movn.d", GPR32Opnd, FGR64Opnd, IIFmove>, CMov_I_F_FM<19, 17>; - def MOVN_I64_D64 : CMov_I_F_FT<"movn.d", GPR64Opnd, FGR64Opnd, - IIFmove>, CMov_I_F_FM<19, 17>; + let isCodeGenOnly = 1 in { + def MOVZ_I64_D64 : CMov_I_F_FT<"movz.d", GPR64Opnd, FGR64Opnd, + IIFmove>, CMov_I_F_FM<18, 17>; + def MOVN_I64_D64 : CMov_I_F_FT<"movn.d", GPR64Opnd, FGR64Opnd, + IIFmove>, CMov_I_F_FM<19, 17>; + } } def MOVT_I : CMov_F_I_FT<"movt", GPR32Opnd, IIArith, MipsCMovFP_T>, @@ -184,7 +186,8 @@ let Predicates = [NotFP64bit, HasStdEnc] in { def MOVF_D32 : CMov_F_F_FT<"movf.d", AFGR64Opnd, IIFmove, MipsCMovFP_F>, CMov_F_F_FM<17, 0>; } -let Predicates = [IsFP64bit, HasStdEnc], isCodeGenOnly = 1 in { + +let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in { def MOVT_D64 : CMov_F_F_FT<"movt.d", FGR64Opnd, IIFmove, MipsCMovFP_T>, CMov_F_F_FM<17, 1>; def MOVF_D64 : CMov_F_F_FT<"movf.d", FGR64Opnd, IIFmove, MipsCMovFP_F>, -- cgit v1.1 From 30cbccb029b01738202bd04341b1cbf68a7814c9 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Mon, 19 Aug 2013 23:35:24 +0000 Subject: Don't form PPC CTR-based loops around a copysignl call copysign/copysignf never become function calls (because the SDAG expansion code does not lower to the corresponding function call, but rather directly implements the associated logic), but copysignl almost always is lowered into a call to the requested libm functon (and, thus, might clobber CTR). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188727 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCCTRLoops.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index 05dad8a..d9f1323 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -284,8 +284,9 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { default: return true; case LibFunc::copysign: case LibFunc::copysignf: - case LibFunc::copysignl: continue; // ISD::FCOPYSIGN is never a library call. + case LibFunc::copysignl: + return true; case LibFunc::fabs: case LibFunc::fabsf: case LibFunc::fabsl: -- cgit v1.1 From 66d1fa6f4b443ac9f8bcea5d1f71a73ada733a42 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Mon, 19 Aug 2013 23:35:46 +0000 Subject: Add a llvm.copysign intrinsic This adds a llvm.copysign intrinsic; We already have Libfunc recognition for copysign (which is turned into the FCOPYSIGN SDAG node). In order to autovectorize calls to copysign in the loop vectorizer, we need a corresponding intrinsic as well. In addition to the expected changes to the language reference, the loop vectorizer, BasicTTI, and the SDAG builder (the intrinsic is transformed into an FCOPYSIGN node, just like the function call), this also adds FCOPYSIGN to a few lists in LegalizeVector{Ops,Types} so that vector copysigns can be expanded. In TargetLoweringBase::initActions, I've made the default action for FCOPYSIGN be Expand for vector types. This seems correct for all in-tree targets, and I think is the right thing to do because, previously, there was no way to generate vector-values FCOPYSIGN nodes (and most targets don't specify an action for vector-typed FCOPYSIGN). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188728 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCCTRLoops.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index d9f1323..4224ae2 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -253,6 +253,12 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { case Intrinsic::sin: case Intrinsic::cos: return true; + case Intrinsic::copysign: + if (CI->getArgOperand(0)->getType()->getScalarType()-> + isPPC_FP128Ty()) + return true; + else + continue; // ISD::FCOPYSIGN is never a library call. case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; case Intrinsic::floor: Opcode = ISD::FFLOOR; break; case Intrinsic::ceil: Opcode = ISD::FCEIL; break; -- cgit v1.1 From e3b29fbc5f4d7632a88b6f470a96cc6ac09e31ed Mon Sep 17 00:00:00 2001 From: Venkatraman Govindaraju Date: Tue, 20 Aug 2013 01:26:14 +0000 Subject: [Sparc] Use HWEncoding instead of unused Num field in Sparc register definitions. Also, correct the definitions of RETL and RET instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188738 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Sparc/SparcInstrInfo.td | 4 ++-- lib/Target/Sparc/SparcRegisterInfo.td | 17 +++++++---------- 2 files changed, 9 insertions(+), 12 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index d4cac4d..2d228ea 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -286,11 +286,11 @@ let usesCustomInserter = 1, Uses = [FCC] in { // Section A.3 - Synthetic Instructions, p. 85 // special cases of JMPL: let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in { - let rd = O7.Num, rs1 = G0.Num in + let rd = 0, rs1 = 15 in def RETL: F3_2<2, 0b111000, (outs), (ins i32imm:$val), "jmp %o7+$val", [(retflag simm13:$val)]>; - let rd = I7.Num, rs1 = G0.Num in + let rd = 0, rs1 = 31 in def RET: F3_2<2, 0b111000, (outs), (ins i32imm:$val), "jmp %i7+$val", []>; } diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td index a59c442..b239f80 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.td +++ b/lib/Target/Sparc/SparcRegisterInfo.td @@ -11,8 +11,8 @@ // Declarations that describe the Sparc register file //===----------------------------------------------------------------------===// -class SparcReg : Register { - field bits<5> Num; +class SparcReg Enc, string n> : Register { + let HWEncoding = Enc; let Namespace = "SP"; } @@ -27,16 +27,13 @@ def sub_odd : SubRegIndex<32, 32>; // Registers are identified with 5-bit ID numbers. // Ri - 32-bit integer registers -class Ri num, string n> : SparcReg { - let Num = num; -} +class Ri Enc, string n> : SparcReg; + // Rf - 32-bit floating-point registers -class Rf num, string n> : SparcReg { - let Num = num; -} +class Rf Enc, string n> : SparcReg; + // Rd - Slots in the FP register file for 64-bit floating-point values. -class Rd num, string n, list subregs> : SparcReg { - let Num = num; +class Rd Enc, string n, list subregs> : SparcReg { let SubRegs = subregs; let SubRegIndices = [sub_even, sub_odd]; let CoveredBySubRegs = 1; -- cgit v1.1 From 6af35e95765e2d577919714dfbdac3ebf84cac78 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Tue, 20 Aug 2013 03:12:23 +0000 Subject: [PowerPC] More refactoring prior to real PPC emitPrologue/Epilogue changes. (Patch committed on behalf of Mark Minich, whose log entry follows.) This is a continuation of the refactorings performed in svn rev 188573 (see that rev's comments for more detail). This is my stage 2 refactoring: I combined the emitPrologue() & emitEpilogue() PPC32 & PPC64 code into a single flow, simplifying a lot of the code since in essence the PPC32 & PPC64 code generation logic is the same, only the instruction forms are different (in most cases). This simplification is necessary because my functional changes (yet to come) add significant complexity, and without the simplification of my stage 2 refactoring, the overall complexity of both emitPrologue() & emitEpilogue() would have become almost intractable for most mortal programmers (like me). This submission was intended to be a pure refactoring (no functional changes whatsoever). However, in the process of combining the PPC32 & PPC64 flows, I spotted a difference that I believe is a bug (see svn rev 186478 line 863, or svn rev 188573 line 888): This line appears to be restoring the BP with the original FP content, not the original BP content. When I merged the 32-bit and 64-bit code, I used the corresponding code from the 64-bit flow, which I believe uses the correct offset (BPOffset) for this operation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188741 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCFrameLowering.cpp | 465 +++++++++++++------------------- 1 file changed, 194 insertions(+), 271 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 5161de8..0ac2ced 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -382,10 +382,36 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { PPCFunctionInfo *FI = MF.getInfo(); bool MustSaveLR = FI->mustSaveLR(); const SmallVectorImpl &MustSaveCRs = FI->getMustSaveCRs(); - // Do we have a frame pointer for this function? + // Do we have a frame pointer and/or base pointer for this function? bool HasFP = hasFP(MF); bool HasBP = RegInfo->hasBasePointer(MF); + unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; + unsigned BPReg = isPPC64 ? PPC::X30 : PPC::R30; + unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; + unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR; + unsigned ScratchReg = isPPC64 ? PPC::X0 : PPC::R0; + unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg + // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) + const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8 + : PPC::MFLR ); + const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD + : PPC::STW ); + const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU + : PPC::STWU ); + const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX + : PPC::STWUX); + const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8 + : PPC::LIS ); + const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8 + : PPC::ORI ); + const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 + : PPC::OR ); + const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8 + : PPC::SUBFC); + const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8 + : PPC::SUBFIC); + // Regarding this assert: Even though LR is saved in the caller's frame (i.e., // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no // Red Zone, an asynchronous event (a form of "callee") could claim a frame & @@ -430,173 +456,106 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. bool isLargeFrame = !isInt<16>(NegFrameSize); - if (isPPC64) { - if (MustSaveLR) - BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0); + if (MustSaveLR) + BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg); - if (!MustSaveCRs.empty()) { - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), PPC::X12); - for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) - MIB.addReg(MustSaveCRs[i], RegState::ImplicitKill); - } + assert((isPPC64 || MustSaveCRs.empty()) && + "Prologue CR saving supported only in 64-bit mode"); - if (HasFP) - BuildMI(MBB, MBBI, dl, TII.get(PPC::STD)) - .addReg(PPC::X31) - .addImm(FPOffset) - .addReg(PPC::X1); - - if (HasBP) - BuildMI(MBB, MBBI, dl, TII.get(PPC::STD)) - .addReg(PPC::X30) - .addImm(BPOffset) - .addReg(PPC::X1); - - if (MustSaveLR) - BuildMI(MBB, MBBI, dl, TII.get(PPC::STD)) - .addReg(PPC::X0) - .addImm(LROffset) - .addReg(PPC::X1); - - if (!MustSaveCRs.empty()) - BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) - .addReg(PPC::X12, getKillRegState(true)) - .addImm(8) - .addReg(PPC::X1); - } else { // PPC32... - if (MustSaveLR) - BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR), PPC::R0); - - if (HasFP) - // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe. - BuildMI(MBB, MBBI, dl, TII.get(PPC::STW)) - .addReg(PPC::R31) - .addImm(FPOffset) - .addReg(PPC::R1); - - if (HasBP) - // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe. - BuildMI(MBB, MBBI, dl, TII.get(PPC::STW)) - .addReg(PPC::R30) - .addImm(BPOffset) - .addReg(PPC::R1); - - assert(MustSaveCRs.empty() && - "Prologue CR saving supported only in 64-bit mode"); - - if (MustSaveLR) - // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe. - BuildMI(MBB, MBBI, dl, TII.get(PPC::STW)) - .addReg(PPC::R0) - .addImm(LROffset) - .addReg(PPC::R1); + if (!MustSaveCRs.empty()) { // will only occur for PPC64 + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), TempReg); + for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) + MIB.addReg(MustSaveCRs[i], RegState::ImplicitKill); } + if (HasFP) + // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe. + BuildMI(MBB, MBBI, dl, StoreInst) + .addReg(FPReg) + .addImm(FPOffset) + .addReg(SPReg); + + if (HasBP) + // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe. + BuildMI(MBB, MBBI, dl, StoreInst) + .addReg(BPReg) + .addImm(BPOffset) + .addReg(SPReg); + + if (MustSaveLR) + // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe. + BuildMI(MBB, MBBI, dl, StoreInst) + .addReg(ScratchReg) + .addImm(LROffset) + .addReg(SPReg); + + if (!MustSaveCRs.empty()) // will only occur for PPC64 + BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) + .addReg(TempReg, getKillRegState(true)) + .addImm(8) + .addReg(SPReg); + // Skip the rest if this is a leaf function & all spills fit in the Red Zone. if (!FrameSize) return; // Adjust stack pointer: r1 += NegFrameSize. // If there is a preferred stack alignment, align R1 now - if (!isPPC64) { // PPC32... - if (HasBP) { - // Save a copy of r1 as the base pointer. - BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), PPC::R30) - .addReg(PPC::R1) - .addReg(PPC::R1); - } - if (HasBP && MaxAlign > 1) { - BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), PPC::R0) - .addReg(PPC::R1) + if (HasBP) { + // Save a copy of r1 as the base pointer. + BuildMI(MBB, MBBI, dl, OrInst, BPReg) + .addReg(SPReg) + .addReg(SPReg); + } + + if (HasBP && MaxAlign > 1) { + if (isPPC64) + BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg) + .addReg(SPReg) + .addImm(0) + .addImm(64 - Log2_32(MaxAlign)); + else // PPC32... + BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg) + .addReg(SPReg) .addImm(0) .addImm(32 - Log2_32(MaxAlign)) .addImm(31); - if (!isLargeFrame) { - BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC), PPC::R0) - .addReg(PPC::R0, RegState::Kill) - .addImm(NegFrameSize); - } else { - BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R12) - .addImm(NegFrameSize >> 16); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R12) - .addReg(PPC::R12, RegState::Kill) - .addImm(NegFrameSize & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFC), PPC::R0) - .addReg(PPC::R0, RegState::Kill) - .addReg(PPC::R12, RegState::Kill); - } - BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX), PPC::R1) - .addReg(PPC::R1, RegState::Kill) - .addReg(PPC::R1) - .addReg(PPC::R0); - - } else if (!isLargeFrame) { - BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1) - .addReg(PPC::R1) - .addImm(NegFrameSize) - .addReg(PPC::R1); - + if (!isLargeFrame) { + BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg) + .addReg(ScratchReg, RegState::Kill) + .addImm(NegFrameSize); } else { - BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0) + BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) .addImm(NegFrameSize >> 16); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0) - .addReg(PPC::R0, RegState::Kill) + BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) + .addReg(TempReg, RegState::Kill) .addImm(NegFrameSize & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX), PPC::R1) - .addReg(PPC::R1, RegState::Kill) - .addReg(PPC::R1) - .addReg(PPC::R0); + BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) + .addReg(ScratchReg, RegState::Kill) + .addReg(TempReg, RegState::Kill); } - } else { // PPC64... - if (HasBP) { - // Save a copy of r1 as the base pointer. - BuildMI(MBB, MBBI, dl, TII.get(PPC::OR8), PPC::X30) - .addReg(PPC::X1) - .addReg(PPC::X1); - } - - if (HasBP && MaxAlign > 1) { - BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), PPC::X0) - .addReg(PPC::X1) - .addImm(0) - .addImm(64 - Log2_32(MaxAlign)); - if (!isLargeFrame) { - BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0) - .addReg(PPC::X0, RegState::Kill) - .addImm(NegFrameSize); - } else { - BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X12) - .addImm(NegFrameSize >> 16); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X12) - .addReg(PPC::X12, RegState::Kill) - .addImm(NegFrameSize & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFC8), PPC::X0) - .addReg(PPC::X0, RegState::Kill) - .addReg(PPC::X12, RegState::Kill); - } - BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX), PPC::X1) - .addReg(PPC::X1, RegState::Kill) - .addReg(PPC::X1) - .addReg(PPC::X0); + BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) + .addReg(SPReg, RegState::Kill) + .addReg(SPReg) + .addReg(ScratchReg); - } else if (!isLargeFrame) { - BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1) - .addReg(PPC::X1) - .addImm(NegFrameSize) - .addReg(PPC::X1); + } else if (!isLargeFrame) { + BuildMI(MBB, MBBI, dl, StoreUpdtInst, SPReg) + .addReg(SPReg) + .addImm(NegFrameSize) + .addReg(SPReg); - } else { - BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0) - .addImm(NegFrameSize >> 16); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0) - .addReg(PPC::X0, RegState::Kill) - .addImm(NegFrameSize & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX), PPC::X1) - .addReg(PPC::X1, RegState::Kill) - .addReg(PPC::X1) - .addReg(PPC::X0); - } + } else { + BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) + .addImm(NegFrameSize >> 16); + BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) + .addReg(ScratchReg, RegState::Kill) + .addImm(NegFrameSize & 0xFFFF); + BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) + .addReg(SPReg, RegState::Kill) + .addReg(SPReg) + .addReg(ScratchReg); } // Add the "machine moves" for the instructions we generated above, but in @@ -612,22 +571,19 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { MCCFIInstruction::createDefCfaOffset(FrameLabel, NegFrameSize)); if (HasFP) { - unsigned Reg = isPPC64 ? PPC::X31 : PPC::R31; - Reg = MRI->getDwarfRegNum(Reg, true); + unsigned Reg = MRI->getDwarfRegNum(FPReg, true); MMI.addFrameInst( MCCFIInstruction::createOffset(FrameLabel, Reg, FPOffset)); } if (HasBP) { - unsigned Reg = isPPC64 ? PPC::X30 : PPC::R30; - Reg = MRI->getDwarfRegNum(Reg, true); + unsigned Reg = MRI->getDwarfRegNum(BPReg, true); MMI.addFrameInst( MCCFIInstruction::createOffset(FrameLabel, Reg, BPOffset)); } if (MustSaveLR) { - unsigned Reg = isPPC64 ? PPC::LR8 : PPC::LR; - Reg = MRI->getDwarfRegNum(Reg, true); + unsigned Reg = MRI->getDwarfRegNum(LRReg, true); MMI.addFrameInst( MCCFIInstruction::createOffset(FrameLabel, Reg, LROffset)); } @@ -637,15 +593,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { // If there is a frame pointer, copy R1 into R31 if (HasFP) { - if (!isPPC64) { // PPC32... - BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), PPC::R31) - .addReg(PPC::R1) - .addReg(PPC::R1); - } else { // PPC64... - BuildMI(MBB, MBBI, dl, TII.get(PPC::OR8), PPC::X31) - .addReg(PPC::X1) - .addReg(PPC::X1); - } + BuildMI(MBB, MBBI, dl, OrInst, FPReg) + .addReg(SPReg) + .addReg(SPReg); if (needsFrameMoves) { ReadyLabel = MMI.getContext().CreateTempSymbol(); @@ -653,8 +603,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { // Mark effective beginning of when frame pointer is ready. BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(ReadyLabel); - unsigned Reg = isPPC64 ? PPC::X31 : PPC::R31; - Reg = MRI->getDwarfRegNum(Reg, true); + unsigned Reg = MRI->getDwarfRegNum(FPReg, true); MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(ReadyLabel, Reg)); } } @@ -731,10 +680,28 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, PPCFunctionInfo *FI = MF.getInfo(); bool MustSaveLR = FI->mustSaveLR(); const SmallVectorImpl &MustSaveCRs = FI->getMustSaveCRs(); - // Do we have a frame pointer for this function? + // Do we have a frame pointer and/or base pointer for this function? bool HasFP = hasFP(MF); bool HasBP = RegInfo->hasBasePointer(MF); + unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; + unsigned BPReg = isPPC64 ? PPC::X30 : PPC::R30; + unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; + unsigned ScratchReg = isPPC64 ? PPC::X0 : PPC::R0; + unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg + const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8 + : PPC::MTLR ); + const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD + : PPC::LWZ ); + const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8 + : PPC::LIS ); + const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8 + : PPC::ORI ); + const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8 + : PPC::ADDI ); + const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 + : PPC::ADD4 ); + int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI); int FPOffset = 0; @@ -790,106 +757,69 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, if (FrameSize) { // In the prologue, the loaded (or persistent) stack pointer value is offset // by the STDU/STDUX/STWU/STWUX instruction. Add this offset back now. - if (!isPPC64) { - // If this function contained a fastcc call and GuaranteedTailCallOpt is - // enabled (=> hasFastCall()==true) the fastcc call might contain a tail - // call which invalidates the stack pointer value in SP(0). So we use the - // value of R31 in this case. - if (FI->hasFastCall()) { - assert(HasFP && "Expecting a valid frame pointer."); - if (!isLargeFrame) { - BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1) - .addReg(PPC::R31).addImm(FrameSize); - } else { - BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0) - .addImm(FrameSize >> 16); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0) - .addReg(PPC::R0, RegState::Kill) - .addImm(FrameSize & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD4)) - .addReg(PPC::R1) - .addReg(PPC::R31) - .addReg(PPC::R0); - } - } else if (!isLargeFrame && !HasBP && - !MFI->hasVarSizedObjects()) { - BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1) - .addReg(PPC::R1).addImm(FrameSize); - } else { - BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ),PPC::R1) - .addImm(0).addReg(PPC::R1); - } - } else { // PPC64... - if (FI->hasFastCall()) { - if (!isLargeFrame) { - assert(HasFP && "Expecting a valid frame pointer."); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1) - .addReg(PPC::X31).addImm(FrameSize); - } else { - BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0) - .addImm(FrameSize >> 16); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0) - .addReg(PPC::X0, RegState::Kill) - .addImm(FrameSize & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD8)) - .addReg(PPC::X1) - .addReg(PPC::X31) - .addReg(PPC::X0); - } - } else if (!isLargeFrame && !HasBP && - !MFI->hasVarSizedObjects()) { - BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1) - .addReg(PPC::X1).addImm(FrameSize); + + // If this function contained a fastcc call and GuaranteedTailCallOpt is + // enabled (=> hasFastCall()==true) the fastcc call might contain a tail + // call which invalidates the stack pointer value in SP(0). So we use the + // value of R31 in this case. + if (FI->hasFastCall()) { + assert(HasFP && "Expecting a valid frame pointer."); + if (!isLargeFrame) { + BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) + .addReg(FPReg).addImm(FrameSize); } else { - BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X1) - .addImm(0).addReg(PPC::X1); + BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) + .addImm(FrameSize >> 16); + BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) + .addReg(ScratchReg, RegState::Kill) + .addImm(FrameSize & 0xFFFF); + BuildMI(MBB, MBBI, dl, AddInst) + .addReg(SPReg) + .addReg(FPReg) + .addReg(ScratchReg); } + } else if (!isLargeFrame && !HasBP && !MFI->hasVarSizedObjects()) { + BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) + .addReg(SPReg) + .addImm(FrameSize); + } else { + BuildMI(MBB, MBBI, dl, LoadInst, SPReg) + .addImm(0) + .addReg(SPReg); } - } - - if (isPPC64) { - if (MustSaveLR) - BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0) - .addImm(LROffset).addReg(PPC::X1); - if (!MustSaveCRs.empty()) - BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), PPC::X12) - .addImm(8).addReg(PPC::X1); - - if (HasFP) - BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X31) - .addImm(FPOffset).addReg(PPC::X1); + } - if (HasBP) - BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X30) - .addImm(BPOffset).addReg(PPC::X1); + if (MustSaveLR) + BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) + .addImm(LROffset) + .addReg(SPReg); - if (!MustSaveCRs.empty()) - for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) - BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) - .addReg(PPC::X12, getKillRegState(i == e-1)); + assert((isPPC64 || MustSaveCRs.empty()) && + "Epilogue CR restoring supported only in 64-bit mode"); - if (MustSaveLR) - BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR8)).addReg(PPC::X0); - } else { // PPC32... - if (MustSaveLR) - BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R0) - .addImm(LROffset).addReg(PPC::R1); + if (!MustSaveCRs.empty()) // will only occur for PPC64 + BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) + .addImm(8) + .addReg(SPReg); - assert(MustSaveCRs.empty() && - "Epilogue CR restoring supported only in 64-bit mode"); + if (HasFP) + BuildMI(MBB, MBBI, dl, LoadInst, FPReg) + .addImm(FPOffset) + .addReg(SPReg); - if (HasFP) - BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R31) - .addImm(FPOffset).addReg(PPC::R1); + if (HasBP) + BuildMI(MBB, MBBI, dl, LoadInst, BPReg) + .addImm(BPOffset) + .addReg(SPReg); - if (HasBP) - BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R30) - .addImm(FPOffset).addReg(PPC::R1); + if (!MustSaveCRs.empty()) // will only occur for PPC64 + for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) + BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) + .addReg(TempReg, getKillRegState(i == e-1)); - if (MustSaveLR) - BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR)).addReg(PPC::R0); - } + if (MustSaveLR) + BuildMI(MBB, MBBI, dl, MTLRInst).addReg(ScratchReg); // Callee pop calling convention. Pop parameter/linkage area. Used for tail // call optimization @@ -897,27 +827,20 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, MF.getFunction()->getCallingConv() == CallingConv::Fast) { PPCFunctionInfo *FI = MF.getInfo(); unsigned CallerAllocatedAmt = FI->getMinReservedArea(); - unsigned StackReg = isPPC64 ? PPC::X1 : PPC::R1; - unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; - unsigned TmpReg = isPPC64 ? PPC::X0 : PPC::R0; - unsigned ADDIInstr = isPPC64 ? PPC::ADDI8 : PPC::ADDI; - unsigned ADDInstr = isPPC64 ? PPC::ADD8 : PPC::ADD4; - unsigned LISInstr = isPPC64 ? PPC::LIS8 : PPC::LIS; - unsigned ORIInstr = isPPC64 ? PPC::ORI8 : PPC::ORI; if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { - BuildMI(MBB, MBBI, dl, TII.get(ADDIInstr), StackReg) - .addReg(StackReg).addImm(CallerAllocatedAmt); + BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) + .addReg(SPReg).addImm(CallerAllocatedAmt); } else { - BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) + BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) .addImm(CallerAllocatedAmt >> 16); - BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) - .addReg(TmpReg, RegState::Kill) + BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) + .addReg(ScratchReg, RegState::Kill) .addImm(CallerAllocatedAmt & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(ADDInstr)) - .addReg(StackReg) + BuildMI(MBB, MBBI, dl, AddInst) + .addReg(SPReg) .addReg(FPReg) - .addReg(TmpReg); + .addReg(ScratchReg); } } else if (RetOpcode == PPC::TCRETURNdi) { MBBI = MBB.getLastNonDebugInstr(); -- cgit v1.1 From b5e1d5b46fb9b8fdf3924f92f2c5ea978bc2be01 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 20 Aug 2013 04:24:14 +0000 Subject: Move AVX and non-AVX replication inside a couple multiclasses to avoid repeating each instruction for both individually. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188743 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 147 +++++++++++++++++------------------------- 1 file changed, 60 insertions(+), 87 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index aa057db..8699595 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2921,20 +2921,8 @@ let isCommutable = 0 in /// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those /// classes below -multiclass basic_sse12_fp_binop_s opc, string OpcodeStr, SDNode OpNode, - SizeItins itins, - bit Is2Addr = 1> { - defm SS : sse12_fp_scalar, XS; - defm SD : sse12_fp_scalar, XD; -} - multiclass basic_sse12_fp_binop_p opc, string OpcodeStr, SDNode OpNode, SizeItins itins> { -let Predicates = [HasAVX] in { defm V#NAME#PS : sse12_fp_packed, TB, VEX_4V; @@ -2948,93 +2936,78 @@ let Predicates = [HasAVX] in { defm V#NAME#PDY : sse12_fp_packed, TB, OpSize, VEX_4V, VEX_L; -} - -let Constraints = "$src1 = $dst" in { - defm PS : sse12_fp_packed, TB; - defm PD : sse12_fp_packed, TB, OpSize; -} -} -multiclass basic_sse12_fp_binop_s_int opc, string OpcodeStr, - SizeItins itins, - bit Is2Addr = 1> { - defm SS : sse12_fp_scalar_int, XS; - defm SD : sse12_fp_scalar_int, XD; + let Constraints = "$src1 = $dst" in { + defm PS : sse12_fp_packed, TB; + defm PD : sse12_fp_packed, TB, OpSize; + } } -// Binary Arithmetic instructions -defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>; -defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>; -let isCommutable = 0 in { - defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>; - defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>; - defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>; - defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>; -} +multiclass basic_sse12_fp_binop_s opc, string OpcodeStr, SDNode OpNode, + SizeItins itins> { + defm V#NAME#SS : sse12_fp_scalar, XS, VEX_4V, VEX_LIG; + defm V#NAME#SD : sse12_fp_scalar, XD, VEX_4V, VEX_LIG; -let isCodeGenOnly = 1 in { - defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>; - defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>; + let Constraints = "$src1 = $dst" in { + defm SS : sse12_fp_scalar, XS; + defm SD : sse12_fp_scalar, XD; + } } -defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S, 0>, - basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S, 0>, - VEX_4V, VEX_LIG; -defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S, 0>, - basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S, 0>, - VEX_4V, VEX_LIG; +multiclass basic_sse12_fp_binop_s_int opc, string OpcodeStr, + SizeItins itins> { + defm V#NAME#SS : sse12_fp_scalar_int, XS, VEX_4V, VEX_LIG; + defm V#NAME#SD : sse12_fp_scalar_int, XD, VEX_4V, VEX_LIG; -let isCommutable = 0 in { - defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S, 0>, - basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S, 0>, - VEX_4V, VEX_LIG; - defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S, 0>, - basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S, 0>, - VEX_4V, VEX_LIG; - defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S, 0>, - basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S, 0>, - VEX_4V, VEX_LIG; - defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S, 0>, - basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S, 0>, - VEX_4V, VEX_LIG; + let Constraints = "$src1 = $dst" in { + defm SS : sse12_fp_scalar_int, XS; + defm SD : sse12_fp_scalar_int, XD; + } } -let Constraints = "$src1 = $dst" in { - defm ADD : basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>, - basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S>; - defm MUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S>, - basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S>; - - let isCommutable = 0 in { - defm SUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>, - basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S>; - defm DIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>, - basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S>; - defm MAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>, - basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>; - defm MIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>, - basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>; - } +// Binary Arithmetic instructions +defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>, + basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>, + basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S>; +defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>, + basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S>, + basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S>; +let isCommutable = 0 in { + defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>, + basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>, + basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S>; + defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>, + basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>, + basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S>; + defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>, + basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>, + basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>; + defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>, + basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>, + basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>; } let isCodeGenOnly = 1 in { - defm VMAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S, 0>, - VEX_4V, VEX_LIG; - defm VMINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S, 0>, - VEX_4V, VEX_LIG; - let Constraints = "$src1 = $dst" in { - defm MAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>; - defm MINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>; - } + defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>, + basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>; + defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>, + basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>; } /// Unop Arithmetic -- cgit v1.1 From 3b7722cf3a751fc332cfce4dc8d63ecd83deb127 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 20 Aug 2013 05:22:42 +0000 Subject: Add AVX-512 and related features to the CPUID detection code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188745 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86Subtarget.cpp | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index fae90f2..fb9b55b 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -351,14 +351,30 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { HasRTM = true; ToggleFeature(X86::FeatureRTM); } - if (IsIntel && ((EBX >> 19) & 0x1)) { - HasADX = true; - ToggleFeature(X86::FeatureADX); + if (IsIntel && ((EBX >> 16) & 0x1)) { + X86SSELevel = AVX512; + ToggleFeature(X86::FeatureAVX512); } if (IsIntel && ((EBX >> 18) & 0x1)) { HasRDSEED = true; ToggleFeature(X86::FeatureRDSEED); } + if (IsIntel && ((EBX >> 19) & 0x1)) { + HasADX = true; + ToggleFeature(X86::FeatureADX); + } + if (IsIntel && ((EBX >> 26) & 0x1)) { + HasPFI = true; + ToggleFeature(X86::FeaturePFI); + } + if (IsIntel && ((EBX >> 27) & 0x1)) { + HasERI = true; + ToggleFeature(X86::FeatureERI); + } + if (IsIntel && ((EBX >> 28) & 0x1)) { + HasCDI = true; + ToggleFeature(X86::FeatureCDI); + } } } } -- cgit v1.1 From 1299e49ba71ea3932f31656725f32aaff6003d5c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 20 Aug 2013 05:23:59 +0000 Subject: Fix formatting. No functional change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188746 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86Subtarget.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index fb9b55b..d428d44 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -495,7 +495,7 @@ void X86Subtarget::initializeEnvironment() { HasHLE = false; HasERI = false; HasCDI = false; - HasPFI=false; + HasPFI = false; HasADX = false; HasPRFCHW = false; HasRDSEED = false; -- cgit v1.1 From c149fbbe279ef623e6067304fd08dc1a62d74f7d Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Tue, 20 Aug 2013 08:38:21 +0000 Subject: [mips][msa] Added and.v, bmnz.v, bmz.v, bsel.v, nor.v, or.v, xor.v git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188767 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrFormats.td | 5 +++ lib/Target/Mips/MipsMSAInstrInfo.td | 59 ++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrFormats.td b/lib/Target/Mips/MipsMSAInstrFormats.td index 6f1e58f..35082c2 100644 --- a/lib/Target/Mips/MipsMSAInstrFormats.td +++ b/lib/Target/Mips/MipsMSAInstrFormats.td @@ -109,3 +109,8 @@ class MSA_I10_FMT major, bits<2> df, bits<6> minor>: MSAInst { let Inst{22-21} = df; let Inst{5-0} = minor; } + +class MSA_VEC_FMT major, bits<6> minor>: MSAInst { + let Inst{25-21} = major; + let Inst{5-0} = minor; +} diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index c03924f..1657962 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -65,6 +65,8 @@ class ADDVI_H_ENC : MSA_I5_FMT<0b000, 0b01, 0b000110>; class ADDVI_W_ENC : MSA_I5_FMT<0b000, 0b10, 0b000110>; class ADDVI_D_ENC : MSA_I5_FMT<0b000, 0b11, 0b000110>; +class AND_V_ENC : MSA_VEC_FMT<0b00000, 0b011110>; + class ANDI_B_ENC : MSA_I8_FMT<0b00, 0b000000>; class ASUB_S_B_ENC : MSA_3R_FMT<0b100, 0b00, 0b010001>; @@ -127,8 +129,12 @@ class BINSRI_H_ENC : MSA_BIT_H_FMT<0b111, 0b001001>; class BINSRI_W_ENC : MSA_BIT_W_FMT<0b111, 0b001001>; class BINSRI_D_ENC : MSA_BIT_D_FMT<0b111, 0b001001>; +class BMNZ_V_ENC : MSA_VEC_FMT<0b00100, 0b011110>; + class BMNZI_B_ENC : MSA_I8_FMT<0b00, 0b000001>; +class BMZ_V_ENC : MSA_VEC_FMT<0b00101, 0b011110>; + class BMZI_B_ENC : MSA_I8_FMT<0b01, 0b000001>; class BNEG_B_ENC : MSA_3R_FMT<0b101, 0b00, 0b001101>; @@ -141,6 +147,8 @@ class BNEGI_H_ENC : MSA_BIT_H_FMT<0b101, 0b001001>; class BNEGI_W_ENC : MSA_BIT_W_FMT<0b101, 0b001001>; class BNEGI_D_ENC : MSA_BIT_D_FMT<0b101, 0b001001>; +class BSEL_V_ENC : MSA_VEC_FMT<0b00110, 0b011110>; + class BSELI_B_ENC : MSA_I8_FMT<0b10, 0b000001>; class BSET_B_ENC : MSA_3R_FMT<0b100, 0b00, 0b001101>; @@ -508,8 +516,12 @@ class NLZC_H_ENC : MSA_2R_FMT<0b11000011, 0b01, 0b011110>; class NLZC_W_ENC : MSA_2R_FMT<0b11000011, 0b10, 0b011110>; class NLZC_D_ENC : MSA_2R_FMT<0b11000011, 0b11, 0b011110>; +class NOR_V_ENC : MSA_VEC_FMT<0b00010, 0b011110>; + class NORI_B_ENC : MSA_I8_FMT<0b10, 0b000000>; +class OR_V_ENC : MSA_VEC_FMT<0b00001, 0b011110>; + class ORI_B_ENC : MSA_I8_FMT<0b01, 0b000000>; class PCKEV_B_ENC : MSA_3R_FMT<0b010, 0b00, 0b010100>; @@ -631,6 +643,8 @@ class VSHF_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b010101>; class VSHF_W_ENC : MSA_3R_FMT<0b000, 0b10, 0b010101>; class VSHF_D_ENC : MSA_3R_FMT<0b000, 0b11, 0b010101>; +class XOR_V_ENC : MSA_VEC_FMT<0b00011, 0b011110>; + class XORI_B_ENC : MSA_I8_FMT<0b11, 0b000000>; // Instruction desc. @@ -784,6 +798,16 @@ class MSA_INSERT_DESC_BASE { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins RCWS:$ws, RCWT:$wt); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $wt"); + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, RCWT:$wt))]; + InstrItinClass Itinerary = itin; +} + class ADD_A_B_DESC : MSA_3R_DESC_BASE<"add_a.b", int_mips_add_a_b, NoItinerary, MSA128, MSA128>, IsCommutable; class ADD_A_H_DESC : MSA_3R_DESC_BASE<"add_a.h", int_mips_add_a_h, NoItinerary, @@ -850,6 +874,9 @@ class ADDVI_W_DESC : MSA_I5_DESC_BASE<"addvi.w", int_mips_addvi_w, NoItinerary, class ADDVI_D_DESC : MSA_I5_DESC_BASE<"addvi.d", int_mips_addvi_d, NoItinerary, MSA128, MSA128>; +class AND_V_DESC : MSA_VEC_DESC_BASE<"and.v", int_mips_and_v, NoItinerary, + MSA128, MSA128>; + class ANDI_B_DESC : MSA_I8_DESC_BASE<"andi.b", int_mips_andi_b, NoItinerary, MSA128, MSA128>; @@ -969,9 +996,15 @@ class BINSRI_W_DESC : MSA_BIT_W_DESC_BASE<"binsri.w", int_mips_binsri_w, class BINSRI_D_DESC : MSA_BIT_D_DESC_BASE<"binsri.d", int_mips_binsri_d, NoItinerary, MSA128, MSA128>; +class BMNZ_V_DESC : MSA_VEC_DESC_BASE<"bmnz.v", int_mips_bmnz_v, NoItinerary, + MSA128, MSA128>; + class BMNZI_B_DESC : MSA_I8_DESC_BASE<"bmnzi.b", int_mips_bmnzi_b, NoItinerary, MSA128, MSA128>; +class BMZ_V_DESC : MSA_VEC_DESC_BASE<"bmz.v", int_mips_bmz_v, NoItinerary, + MSA128, MSA128>; + class BMZI_B_DESC : MSA_I8_DESC_BASE<"bmzi.b", int_mips_bmzi_b, NoItinerary, MSA128, MSA128>; @@ -993,6 +1026,9 @@ class BNEGI_W_DESC : MSA_BIT_W_DESC_BASE<"bnegi.w", int_mips_bnegi_w, class BNEGI_D_DESC : MSA_BIT_D_DESC_BASE<"bnegi.d", int_mips_bnegi_d, NoItinerary, MSA128, MSA128>; +class BSEL_V_DESC : MSA_VEC_DESC_BASE<"bsel.v", int_mips_bsel_v, NoItinerary, + MSA128, MSA128>; + class BSELI_B_DESC : MSA_I8_DESC_BASE<"bseli.b", int_mips_bseli_b, NoItinerary, MSA128, MSA128>; @@ -1661,9 +1697,15 @@ class NLZC_W_DESC : MSA_2R_DESC_BASE<"nlzc.w", int_mips_nlzc_w, class NLZC_D_DESC : MSA_2R_DESC_BASE<"nlzc.d", int_mips_nlzc_d, NoItinerary, MSA128, MSA128>; +class NOR_V_DESC : MSA_VEC_DESC_BASE<"nor.v", int_mips_nor_v, NoItinerary, + MSA128, MSA128>; + class NORI_B_DESC : MSA_I8_DESC_BASE<"nori.b", int_mips_nori_b, NoItinerary, MSA128, MSA128>; +class OR_V_DESC : MSA_VEC_DESC_BASE<"or.v", int_mips_or_v, NoItinerary, + MSA128, MSA128>; + class ORI_B_DESC : MSA_I8_DESC_BASE<"ori.b", int_mips_ori_b, NoItinerary, MSA128, MSA128>; @@ -1888,6 +1930,9 @@ class VSHF_W_DESC : MSA_3R_DESC_BASE<"vshf.w", int_mips_vshf_w, class VSHF_D_DESC : MSA_3R_DESC_BASE<"vshf.d", int_mips_vshf_d, NoItinerary, MSA128, MSA128>; +class XOR_V_DESC : MSA_VEC_DESC_BASE<"xor.v", int_mips_xor_v, NoItinerary, + MSA128, MSA128>; + class XORI_B_DESC : MSA_I8_DESC_BASE<"xori.b", int_mips_xori_b, NoItinerary, MSA128, MSA128>; // Instruction defs. @@ -1921,6 +1966,8 @@ def ADDVI_H : ADDVI_H_ENC, ADDVI_H_DESC, Requires<[HasMSA]>; def ADDVI_W : ADDVI_W_ENC, ADDVI_W_DESC, Requires<[HasMSA]>; def ADDVI_D : ADDVI_D_ENC, ADDVI_D_DESC, Requires<[HasMSA]>; +def AND_V : AND_V_ENC, AND_V_DESC, Requires<[HasMSA]>; + def ANDI_B : ANDI_B_ENC, ANDI_B_DESC, Requires<[HasMSA]>; def ASUB_S_B : ASUB_S_B_ENC, ASUB_S_B_DESC, Requires<[HasMSA]>; @@ -1983,8 +2030,12 @@ def BINSRI_H : BINSRI_H_ENC, BINSRI_H_DESC, Requires<[HasMSA]>; def BINSRI_W : BINSRI_W_ENC, BINSRI_W_DESC, Requires<[HasMSA]>; def BINSRI_D : BINSRI_D_ENC, BINSRI_D_DESC, Requires<[HasMSA]>; +def BMNZ_V : BMNZ_V_ENC, BMNZ_V_DESC, Requires<[HasMSA]>; + def BMNZI_B : BMNZI_B_ENC, BMNZI_B_DESC, Requires<[HasMSA]>; +def BMZ_V : BMZ_V_ENC, BMZ_V_DESC, Requires<[HasMSA]>; + def BMZI_B : BMZI_B_ENC, BMZI_B_DESC, Requires<[HasMSA]>; def BNEG_B : BNEG_B_ENC, BNEG_B_DESC, Requires<[HasMSA]>; @@ -1997,6 +2048,8 @@ def BNEGI_H : BNEGI_H_ENC, BNEGI_H_DESC, Requires<[HasMSA]>; def BNEGI_W : BNEGI_W_ENC, BNEGI_W_DESC, Requires<[HasMSA]>; def BNEGI_D : BNEGI_D_ENC, BNEGI_D_DESC, Requires<[HasMSA]>; +def BSEL_V : BSEL_V_ENC, BSEL_V_DESC, Requires<[HasMSA]>; + def BSELI_B : BSELI_B_ENC, BSELI_B_DESC, Requires<[HasMSA]>; def BSET_B : BSET_B_ENC, BSET_B_DESC, Requires<[HasMSA]>; @@ -2363,8 +2416,12 @@ def NLZC_H : NLZC_H_ENC, NLZC_H_DESC, Requires<[HasMSA]>; def NLZC_W : NLZC_W_ENC, NLZC_W_DESC, Requires<[HasMSA]>; def NLZC_D : NLZC_D_ENC, NLZC_D_DESC, Requires<[HasMSA]>; +def NOR_V : NOR_V_ENC, NOR_V_DESC, Requires<[HasMSA]>; + def NORI_B : NORI_B_ENC, NORI_B_DESC, Requires<[HasMSA]>; +def OR_V : OR_V_ENC, OR_V_DESC, Requires<[HasMSA]>; + def ORI_B : ORI_B_ENC, ORI_B_DESC, Requires<[HasMSA]>; def PCKEV_B : PCKEV_B_ENC, PCKEV_B_DESC, Requires<[HasMSA]>; @@ -2486,6 +2543,8 @@ def VSHF_H : VSHF_H_ENC, VSHF_H_DESC, Requires<[HasMSA]>; def VSHF_W : VSHF_W_ENC, VSHF_W_DESC, Requires<[HasMSA]>; def VSHF_D : VSHF_D_ENC, VSHF_D_DESC, Requires<[HasMSA]>; +def XOR_V : XOR_V_ENC, XOR_V_DESC, Requires<[HasMSA]>; + def XORI_B : XORI_B_ENC, XORI_B_DESC, Requires<[HasMSA]>; // Patterns. -- cgit v1.1 From 32c2bfda77d54ca6ad8e08d2de03daa7ae432305 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Tue, 20 Aug 2013 08:57:11 +0000 Subject: ARM: implement some simple f64 materializations. Previously we used a const-pool load for virtually all 64-bit floating values. Actually, we can get quite a few common values (including 0.0, 1.0) via "vmov" instructions of one stripe or another. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188773 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 50 ++++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 10 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index caec11e..f22c5b9 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -452,6 +452,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } setOperationAction(ISD::ConstantFP, MVT::f32, Custom); + setOperationAction(ISD::ConstantFP, MVT::f64, Custom); if (Subtarget->hasNEON()) { addDRTypeForNEON(MVT::v2f32); @@ -4271,17 +4272,25 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const { - if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16()) + if (!ST->hasVFP3()) return SDValue(); + bool IsDouble = Op.getValueType() == MVT::f64; ConstantFPSDNode *CFP = cast(Op); - assert(Op.getValueType() == MVT::f32 && - "ConstantFP custom lowering should only occur for f32."); // Try splatting with a VMOV.f32... APFloat FPVal = CFP->getValueAPF(); - int ImmVal = ARM_AM::getFP32Imm(FPVal); + int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal); + if (ImmVal != -1) { + if (IsDouble || !ST->useNEONForSinglePrecisionFP()) { + // We have code in place to select a valid ConstantFP already, no need to + // do any mangling. + return Op; + } + + // It's a float and we are trying to use NEON operations where + // possible. Lower it to a splat followed by an extract. SDLoc DL(Op); SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32); SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32, @@ -4290,15 +4299,31 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, DAG.getConstant(0, MVT::i32)); } - // If that fails, try a VMOV.i32 + // The rest of our options are NEON only, make sure that's allowed before + // proceeding.. + if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP())) + return SDValue(); + EVT VMovVT; - unsigned iVal = FPVal.bitcastToAPInt().getZExtValue(); - SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false, - VMOVModImm); + uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue(); + + // It wouldn't really be worth bothering for doubles except for one very + // important value, which does happen to match: 0.0. So make sure we don't do + // anything stupid. + if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32)) + return SDValue(); + + // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too). + SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, VMovVT, + false, VMOVModImm); if (NewVal != SDValue()) { SDLoc DL(Op); SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT, NewVal); + if (IsDouble) + return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant); + + // It's a float: cast and extract a vector element. SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, VecConstant); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, @@ -4306,11 +4331,16 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, } // Finally, try a VMVN.i32 - NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false, - VMVNModImm); + NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, VMovVT, + false, VMVNModImm); if (NewVal != SDValue()) { SDLoc DL(Op); SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal); + + if (IsDouble) + return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant); + + // It's a float: cast and extract a vector element. SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, VecConstant); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, -- cgit v1.1 From 6ef333501eb917cbd79a51c84294051a1a257a0b Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Tue, 20 Aug 2013 09:22:54 +0000 Subject: [mips][msa] Added insve git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188777 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 1657962..5a33d75 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -403,6 +403,11 @@ class INSERT_B_ENC : MSA_ELM_B_FMT<0b0100, 0b011001>; class INSERT_H_ENC : MSA_ELM_H_FMT<0b0100, 0b011001>; class INSERT_W_ENC : MSA_ELM_W_FMT<0b0100, 0b011001>; +class INSVE_B_ENC : MSA_ELM_B_FMT<0b0101, 0b011001>; +class INSVE_H_ENC : MSA_ELM_H_FMT<0b0101, 0b011001>; +class INSVE_W_ENC : MSA_ELM_W_FMT<0b0101, 0b011001>; +class INSVE_D_ENC : MSA_ELM_D_FMT<0b0101, 0b011001>; + class LD_B_ENC : MSA_I5_FMT<0b110, 0b00, 0b000111>; class LD_H_ENC : MSA_I5_FMT<0b110, 0b01, 0b000111>; class LD_W_ENC : MSA_I5_FMT<0b110, 0b10, 0b000111>; @@ -798,6 +803,19 @@ class MSA_INSERT_DESC_BASE { + dag OutOperandList = (outs RCD:$wd); + dag InOperandList = (ins RCD:$wd_in, uimm6:$n, RCWS:$ws); + string AsmString = !strconcat(instr_asm, "\t$wd[$n], $ws[0]"); + list Pattern = [(set RCD:$wd, (OpNode RCD:$wd_in, + immZExt6:$n, + RCWS:$ws))]; + InstrItinClass Itinerary = itin; + string Constraints = "$wd = $wd_in"; +} + class MSA_VEC_DESC_BASE { @@ -1490,6 +1508,15 @@ class INSERT_H_DESC : MSA_INSERT_DESC_BASE<"insert.h", int_mips_insert_h, class INSERT_W_DESC : MSA_INSERT_DESC_BASE<"insert.w", int_mips_insert_w, NoItinerary, MSA128, GPR32>; +class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", int_mips_insve_b, + NoItinerary, MSA128, MSA128>; +class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", int_mips_insve_h, + NoItinerary, MSA128, MSA128>; +class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", int_mips_insve_w, + NoItinerary, MSA128, MSA128>; +class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", int_mips_insve_d, + NoItinerary, MSA128, MSA128>; + class LD_DESC_BASE { @@ -2304,6 +2331,11 @@ def INSERT_B : INSERT_B_ENC, INSERT_B_DESC, Requires<[HasMSA]>; def INSERT_H : INSERT_H_ENC, INSERT_H_DESC, Requires<[HasMSA]>; def INSERT_W : INSERT_W_ENC, INSERT_W_DESC, Requires<[HasMSA]>; +def INSVE_B : INSVE_B_ENC, INSVE_B_DESC, Requires<[HasMSA]>; +def INSVE_H : INSVE_H_ENC, INSVE_H_DESC, Requires<[HasMSA]>; +def INSVE_W : INSVE_W_ENC, INSVE_W_DESC, Requires<[HasMSA]>; +def INSVE_D : INSVE_D_ENC, INSVE_D_DESC, Requires<[HasMSA]>; + def LD_B: LD_B_ENC, LD_B_DESC, Requires<[HasMSA]>; def LD_H: LD_H_ENC, LD_H_DESC, Requires<[HasMSA]>; def LD_W: LD_W_ENC, LD_W_DESC, Requires<[HasMSA]>; -- cgit v1.1 From 8c20158fb0e1e5d747077f065eb0170c5af1fbfa Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 20 Aug 2013 09:38:48 +0000 Subject: [SystemZ] Use SRST to optimize memchr SystemZTargetLowering::emitStringWrapper() previously loaded the character into R0 before the loop and made R0 live on entry. I'd forgotten that allocatable registers weren't allowed to be live across blocks at this stage, and it confused LiveVariables enough to cause a miscompilation of f3 in memchr-02.ll. This patch instead loads R0 in the loop and leaves LICM to hoist it after RA. This is actually what I'd tried originally, but I went for the manual optimisation after noticing that R0 often wasn't being hoisted. This bug forced me to go back and look at why, now fixed as r188774. We should also try to optimize null checks so that they test the CC result of the SRST directly. The select between null and the SRST GPR result could then usually be deleted as dead. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188779 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZ.h | 5 +++++ lib/Target/SystemZ/SystemZISelLowering.cpp | 7 +++--- lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp | 30 ++++++++++++++++++++++++++ lib/Target/SystemZ/SystemZSelectionDAGInfo.h | 5 +++++ 4 files changed, 44 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h index eccc2aa..bb6ceac 100644 --- a/lib/Target/SystemZ/SystemZ.h +++ b/lib/Target/SystemZ/SystemZ.h @@ -52,6 +52,11 @@ namespace llvm { const unsigned CCMASK_CS_NE = CCMASK_1; const unsigned CCMASK_CS = CCMASK_0 | CCMASK_1; + // Condition-code mask assignments for a completed SRST loop. + const unsigned CCMASK_SRST_FOUND = CCMASK_1; + const unsigned CCMASK_SRST_NOTFOUND = CCMASK_2; + const unsigned CCMASK_SRST = CCMASK_1 | CCMASK_2; + // Return true if Val fits an LLILL operand. static inline bool isImmLL(uint64_t Val) { return (Val & ~0x000000000000ffffULL) == 0; diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 0000485..6710f89 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -2345,19 +2345,19 @@ SystemZTargetLowering::emitStringWrapper(MachineInstr *MI, MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); // StartMBB: - // R0W = %CharReg // # fall through to LoopMMB - BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0W).addReg(CharReg); MBB->addSuccessor(LoopMBB); // LoopMBB: // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ] // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ] + // R0W = %CharReg // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0W // JO LoopMBB // # fall through to DoneMMB + // + // The load of R0W can be hoisted by post-RA LICM. MBB = LoopMBB; - MBB->addLiveIn(SystemZ::R0W); BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg) .addReg(Start1Reg).addMBB(StartMBB) @@ -2365,6 +2365,7 @@ SystemZTargetLowering::emitStringWrapper(MachineInstr *MI, BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg) .addReg(Start2Reg).addMBB(StartMBB) .addReg(End2Reg).addMBB(LoopMBB); + BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0W).addReg(CharReg); BuildMI(MBB, DL, TII->get(Opcode)) .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define) .addReg(This1Reg).addReg(This2Reg); diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index 73bd480..638c3ee 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -159,6 +159,36 @@ EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, } std::pair SystemZSelectionDAGInfo:: +EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src, SDValue Char, SDValue Length, + MachinePointerInfo SrcPtrInfo) const { + // Use SRST to find the character. End is its address on success. + EVT PtrVT = Src.getValueType(); + SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue); + Length = DAG.getZExtOrTrunc(Length, DL, PtrVT); + Char = DAG.getZExtOrTrunc(Char, DL, MVT::i32); + Char = DAG.getNode(ISD::AND, DL, MVT::i32, Char, + DAG.getConstant(255, MVT::i32)); + SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, Length); + SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain, + Limit, Src, Char); + Chain = End.getValue(1); + SDValue Glue = End.getValue(2); + + // Now select between End and null, depending on whether the character + // was found. + SmallVector Ops; + Ops.push_back(End); + Ops.push_back(DAG.getConstant(0, PtrVT)); + Ops.push_back(DAG.getConstant(SystemZ::CCMASK_SRST, MVT::i32)); + Ops.push_back(DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, MVT::i32)); + Ops.push_back(Glue); + VTs = DAG.getVTList(PtrVT, MVT::Glue); + End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, &Ops[0], Ops.size()); + return std::make_pair(End, Chain); +} + +std::pair SystemZSelectionDAGInfo:: EmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, SDValue Dest, SDValue Src, MachinePointerInfo DestPtrInfo, diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h index 26d6488..281d1e2 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -47,6 +47,11 @@ public: MachinePointerInfo Op2PtrInfo) const LLVM_OVERRIDE; virtual std::pair + EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src, SDValue Char, SDValue Length, + MachinePointerInfo SrcPtrInfo) const LLVM_OVERRIDE; + + virtual std::pair EmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, SDValue Dest, SDValue Src, MachinePointerInfo DestPtrInfo, -- cgit v1.1 From 0371d01fb98a580aa79ddcc0e4ed5bd2d2e541b5 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 20 Aug 2013 09:40:35 +0000 Subject: [SystemZ] Update README We now use MVST, CLST and SRST for the obvious cases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188781 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/README.txt | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt index 58e631f..4107685 100644 --- a/lib/Target/SystemZ/README.txt +++ b/lib/Target/SystemZ/README.txt @@ -73,10 +73,8 @@ Likewise memcmp and CLC. MVCLE and CLCLE could be useful too. -- -We don't optimize string operations. - -MVST, CLST, SRST and CUSE could be useful here. Some of the TRANSLATE -family might be too, although they are probably more difficult to exploit. +We don't use CUSE or the TRANSLATE family of instructions for string +operations. The TRANSLATE ones are probably more difficult to exploit. -- -- cgit v1.1 From c5158b869bbde7b08c486c6f326bd1c701367c98 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Tue, 20 Aug 2013 09:41:47 +0000 Subject: [mips][msa] Removed fcge, fcgt, fsge, fsgt These instructions were present in a draft spec but were removed before publication. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188782 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 44 ------------------------------------- 1 file changed, 44 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 5a33d75..541e3ad 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -261,12 +261,6 @@ class FADD_D_ENC : MSA_3RF_FMT<0b0000, 0b1, 0b011011>; class FCEQ_W_ENC : MSA_3RF_FMT<0b0010, 0b0, 0b011010>; class FCEQ_D_ENC : MSA_3RF_FMT<0b0010, 0b1, 0b011010>; -class FCGE_W_ENC : MSA_3RF_FMT<0b0101, 0b0, 0b011010>; -class FCGE_D_ENC : MSA_3RF_FMT<0b0101, 0b1, 0b011010>; - -class FCGT_W_ENC : MSA_3RF_FMT<0b0111, 0b0, 0b011010>; -class FCGT_D_ENC : MSA_3RF_FMT<0b0111, 0b1, 0b011010>; - class FCLASS_W_ENC : MSA_2RF_FMT<0b110010000, 0b0, 0b011110>; class FCLASS_D_ENC : MSA_2RF_FMT<0b110010000, 0b1, 0b011110>; @@ -349,12 +343,6 @@ class FRSQRT_D_ENC : MSA_2RF_FMT<0b110010100, 0b1, 0b011110>; class FSEQ_W_ENC : MSA_3RF_FMT<0b1010, 0b0, 0b011010>; class FSEQ_D_ENC : MSA_3RF_FMT<0b1010, 0b1, 0b011010>; -class FSGE_W_ENC : MSA_3RF_FMT<0b1101, 0b0, 0b011010>; -class FSGE_D_ENC : MSA_3RF_FMT<0b1101, 0b1, 0b011010>; - -class FSGT_W_ENC : MSA_3RF_FMT<0b1111, 0b0, 0b011010>; -class FSGT_D_ENC : MSA_3RF_FMT<0b1111, 0b1, 0b011010>; - class FSLE_W_ENC : MSA_3RF_FMT<0b1110, 0b0, 0b011010>; class FSLE_D_ENC : MSA_3RF_FMT<0b1110, 0b1, 0b011010>; @@ -1264,16 +1252,6 @@ class FCEQ_D_DESC : MSA_3RF_DESC_BASE<"fceq.d", int_mips_fceq_d, NoItinerary, MSA128, MSA128>, IsCommutable; -class FCGE_W_DESC : MSA_3RF_DESC_BASE<"fcge.w", int_mips_fcge_w, - NoItinerary, MSA128, MSA128>; -class FCGE_D_DESC : MSA_3RF_DESC_BASE<"fcge.d", int_mips_fcge_d, - NoItinerary, MSA128, MSA128>; - -class FCGT_W_DESC : MSA_3RF_DESC_BASE<"fcgt.w", int_mips_fcgt_w, - NoItinerary, MSA128, MSA128>; -class FCGT_D_DESC : MSA_3RF_DESC_BASE<"fcgt.d", int_mips_fcgt_d, - NoItinerary, MSA128, MSA128>; - class FCLASS_W_DESC : MSA_2RF_DESC_BASE<"fclass.w", int_mips_fclass_w, NoItinerary, MSA128, MSA128>; class FCLASS_D_DESC : MSA_2RF_DESC_BASE<"fclass.d", int_mips_fclass_d, @@ -1415,16 +1393,6 @@ class FSEQ_W_DESC : MSA_3RF_DESC_BASE<"fseq.w", int_mips_fseq_w, class FSEQ_D_DESC : MSA_3RF_DESC_BASE<"fseq.d", int_mips_fseq_d, NoItinerary, MSA128, MSA128>; -class FSGE_W_DESC : MSA_3RF_DESC_BASE<"fsge.w", int_mips_fsge_w, - NoItinerary, MSA128, MSA128>; -class FSGE_D_DESC : MSA_3RF_DESC_BASE<"fsge.d", int_mips_fsge_d, - NoItinerary, MSA128, MSA128>; - -class FSGT_W_DESC : MSA_3RF_DESC_BASE<"fsgt.w", int_mips_fsgt_w, - NoItinerary, MSA128, MSA128>; -class FSGT_D_DESC : MSA_3RF_DESC_BASE<"fsgt.d", int_mips_fsgt_d, - NoItinerary, MSA128, MSA128>; - class FSLE_W_DESC : MSA_3RF_DESC_BASE<"fsle.w", int_mips_fsle_w, NoItinerary, MSA128, MSA128>; class FSLE_D_DESC : MSA_3RF_DESC_BASE<"fsle.d", int_mips_fsle_d, @@ -2198,12 +2166,6 @@ def FCLT_D : FCLT_D_ENC, FCLT_D_DESC, Requires<[HasMSA]>; def FCLASS_W : FCLASS_W_ENC, FCLASS_W_DESC, Requires<[HasMSA]>; def FCLASS_D : FCLASS_D_ENC, FCLASS_D_DESC, Requires<[HasMSA]>; -def FCGE_W : FCGE_W_ENC, FCGE_W_DESC, Requires<[HasMSA]>; -def FCGE_D : FCGE_D_ENC, FCGE_D_DESC, Requires<[HasMSA]>; - -def FCGT_W : FCGT_W_ENC, FCGT_W_DESC, Requires<[HasMSA]>; -def FCGT_D : FCGT_D_ENC, FCGT_D_DESC, Requires<[HasMSA]>; - def FCNE_W : FCNE_W_ENC, FCNE_W_DESC, Requires<[HasMSA]>; def FCNE_D : FCNE_D_ENC, FCNE_D_DESC, Requires<[HasMSA]>; @@ -2283,12 +2245,6 @@ def FSLE_D : FSLE_D_ENC, FSLE_D_DESC, Requires<[HasMSA]>; def FSLT_W : FSLT_W_ENC, FSLT_W_DESC, Requires<[HasMSA]>; def FSLT_D : FSLT_D_ENC, FSLT_D_DESC, Requires<[HasMSA]>; -def FSGE_W : FSGE_W_ENC, FSGE_W_DESC, Requires<[HasMSA]>; -def FSGE_D : FSGE_D_ENC, FSGE_D_DESC, Requires<[HasMSA]>; - -def FSGT_W : FSGT_W_ENC, FSGT_W_DESC, Requires<[HasMSA]>; -def FSGT_D : FSGT_D_ENC, FSGT_D_DESC, Requires<[HasMSA]>; - def FSNE_W : FSNE_W_ENC, FSNE_W_DESC, Requires<[HasMSA]>; def FSNE_D : FSNE_D_ENC, FSNE_D_DESC, Requires<[HasMSA]>; -- cgit v1.1 From 38cd21a3e9533215b6abf5750d715d0596720542 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Tue, 20 Aug 2013 11:00:29 +0000 Subject: AVX-512: Added more patterns for VMOVSS, VMOVSD, VMOVD, VMOVQ git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188786 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 81 ++++++++++++++++++++++++++++++++++------ lib/Target/X86/X86InstrSSE.td | 2 +- 2 files changed, 71 insertions(+), 12 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index a6035af..ccbd18e 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1305,28 +1305,51 @@ let isCodeGenOnly = 1 in { } let Predicates = [HasAVX512] in { + let AddedComplexity = 15 in { + // Move scalar to XMM zero-extended, zeroing a VR128X then do a + // MOVS{S,D} to the lower bits. + def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32X:$src)))), + (VMOVSSZrr (v4f32 (V_SET0)), FR32X:$src)>; + def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))), + (VMOVSSZrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>; + def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))), + (VMOVSSZrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>; + def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))), + (VMOVSDZrr (v2f64 (V_SET0)), FR64X:$src)>; + + // Move low f32 and clear high bits. + def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))), + (SUBREG_TO_REG (i32 0), + (VMOVSSZrr (v4f32 (V_SET0)), + (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>; + def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))), + (SUBREG_TO_REG (i32 0), + (VMOVSSZrr (v4i32 (V_SET0)), + (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>; + } + let AddedComplexity = 20 in { // MOVSSrm zeros the high parts of the register; represent this // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), - (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128X)>; + (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>; def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), - (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128X)>; + (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>; def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), - (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128X)>; + (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>; // MOVSDrm zeros the high parts of the register; represent this // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), - (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128X)>; + (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), - (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128X)>; + (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), - (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128X)>; + (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), - (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128X)>; + (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; def : Pat<(v2f64 (X86vzload addr:$src)), - (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128X)>; + (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; // Represent the same patterns above but in the form they appear for // 256-bit types @@ -1340,10 +1363,28 @@ let Predicates = [HasAVX512] in { (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; } + def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, + (v4f32 (scalar_to_vector FR32X:$src)), (iPTR 0)))), + (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSZrr (v4f32 (V_SET0)), + FR32X:$src)), sub_xmm)>; + def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, + (v2f64 (scalar_to_vector FR64X:$src)), (iPTR 0)))), + (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDZrr (v2f64 (V_SET0)), + FR64X:$src)), sub_xmm)>; def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))), (SUBREG_TO_REG (i64 0), (VMOVSDZrm addr:$src), sub_xmm)>; + // Move low f64 and clear high bits. + def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))), + (SUBREG_TO_REG (i32 0), + (VMOVSDZrr (v2f64 (V_SET0)), + (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>; + + def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))), + (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (V_SET0)), + (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>; + // Extract and store. def : Pat<(store (f32 (vector_extract (v4f32 VR128X:$src), (iPTR 0))), addr:$dst), @@ -1420,11 +1461,29 @@ def VMOVZPQILo2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), (loadv2i64 addr:$src))))], IIC_SSE_MOVDQ>, EVEX, VEX_W, EVEX_CD8<8, CD8VT8>; -let AddedComplexity = 20 in { - def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), + +let Predicates = [HasAVX512] in { + // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. + let AddedComplexity = 20 in { + def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), + (VMOVDI2PDIZrm addr:$src)>; + + def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), + (VMOVDI2PDIZrm addr:$src)>; + def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), + (VMOVDI2PDIZrm addr:$src)>; + def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), (VMOVZPQILo2PQIZrm addr:$src)>; - def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))), + def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))), (VMOVZPQILo2PQIZrr VR128X:$src)>; + } + // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext. + def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, + (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))), + (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>; + def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, + (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))), + (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 8699595..4eaba38 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -526,7 +526,7 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in { } // Patterns -let Predicates = [HasAVX] in { +let Predicates = [UseAVX] in { let AddedComplexity = 15 in { // Move scalar to XMM zero-extended, zeroing a VR128 then do a // MOVS{S,D} to the lower bits. -- cgit v1.1 From 8b262e5ab8cbeb8f6f61d92b20d886675966fe34 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Tue, 20 Aug 2013 19:12:42 +0000 Subject: ARM: Fix fast-isel copy/paste-o. Update testcase to be more careful about checking register values. While regexes are general goodness for these sorts of testcases, in this example, the registers are constrained by the calling convention, so we can and should check their explicit values. rdar://14779513 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188819 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index e1af947..f4aaf09 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -1762,7 +1762,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) { } unsigned ResultReg = createResultReg(RC); if (!UseImm) { - Op2Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 1); + Op2Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op2Reg, 1); Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 2); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) .addReg(Op2Reg).addReg(Op1Reg).addImm(ARMCC::NE).addReg(ARM::CPSR); -- cgit v1.1 From 93877b3cbcefc0f281b744b135d609d35c3f119c Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Tue, 20 Aug 2013 20:46:51 +0000 Subject: [mips] Guard micromips instructions with predicate InMicroMips. Also, fix assembler predicate HasStdEnd so that it is false when the target is micromips. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188824 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MicroMipsInstrInfo.td | 20 +++++++++----------- lib/Target/Mips/MipsInstrInfo.td | 2 +- 2 files changed, 10 insertions(+), 12 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td index bf07d84..cbb987f 100644 --- a/lib/Target/Mips/MicroMipsInstrInfo.td +++ b/lib/Target/Mips/MicroMipsInstrInfo.td @@ -28,7 +28,7 @@ class StoreLeftRightMM; -let DecoderNamespace = "MicroMips" in { +let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { /// Arithmetic Instructions (ALU Immediate) def ADDiu_MM : MMRel, ArithLogicI<"addiu", simm16, GPR32Opnd>, ADDI_FM_MM<0xc>; @@ -96,14 +96,12 @@ let DecoderNamespace = "MicroMips" in { defm SW_MM : StoreM<"sw", GPR32Opnd>, MMRel, LW_FM_MM<0x3e>; /// Load and Store Instructions - unaligned - let Predicates = [InMicroMips] in { - def LWL_MM : LoadLeftRightMM<"lwl", MipsLWL, GPR32Opnd, mem_mm_12>, - LWL_FM_MM<0x0>; - def LWR_MM : LoadLeftRightMM<"lwr", MipsLWR, GPR32Opnd, mem_mm_12>, - LWL_FM_MM<0x1>; - def SWL_MM : StoreLeftRightMM<"swl", MipsSWL, GPR32Opnd, mem_mm_12>, - LWL_FM_MM<0x8>; - def SWR_MM : StoreLeftRightMM<"swr", MipsSWR, GPR32Opnd, mem_mm_12>, - LWL_FM_MM<0x9>; - } + def LWL_MM : LoadLeftRightMM<"lwl", MipsLWL, GPR32Opnd, mem_mm_12>, + LWL_FM_MM<0x0>; + def LWR_MM : LoadLeftRightMM<"lwr", MipsLWR, GPR32Opnd, mem_mm_12>, + LWL_FM_MM<0x1>; + def SWL_MM : StoreLeftRightMM<"swl", MipsSWL, GPR32Opnd, mem_mm_12>, + LWL_FM_MM<0x8>; + def SWR_MM : StoreLeftRightMM<"swr", MipsSWR, GPR32Opnd, mem_mm_12>, + LWL_FM_MM<0x9>; } diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 3d7ba00..9ba1879 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -180,7 +180,7 @@ def RelocPIC : Predicate<"TM.getRelocationModel() == Reloc::PIC_">, def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">, AssemblerPredicate<"FeatureMips32">; def HasStdEnc : Predicate<"Subtarget.hasStandardEncoding()">, - AssemblerPredicate<"!FeatureMips16">; + AssemblerPredicate<"!FeatureMips16,!FeatureMicroMips">; def NotDSP : Predicate<"!Subtarget.hasDSP()">; def InMicroMips : Predicate<"Subtarget.inMicroMipsMode()">, AssemblerPredicate<"FeatureMicroMips">; -- cgit v1.1 From 0323d4b169279414862174f38ae04add6b747a60 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Tue, 20 Aug 2013 20:53:09 +0000 Subject: Add an option which permits the user to specify using a bitmask, that various functions be compiled as mips32, without having to add attributes. This is useful in certain situations where you don't want to have to edit the function attributes in the source. For now it's only an option used for the compiler developers when debugging the mips16 port. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188826 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsOs16.cpp | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsOs16.cpp b/lib/Target/Mips/MipsOs16.cpp index 1919077..5f9b60c 100644 --- a/lib/Target/Mips/MipsOs16.cpp +++ b/lib/Target/Mips/MipsOs16.cpp @@ -14,9 +14,17 @@ #define DEBUG_TYPE "mips-os16" #include "MipsOs16.h" #include "llvm/IR/Module.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" + +static cl::opt Mips32FunctionMask( + "mips32-function-mask", + cl::init(""), + cl::desc("Force function to be mips32"), + cl::Hidden); + namespace { // Figure out if we need float point based on the function signature. @@ -85,18 +93,32 @@ namespace llvm { bool MipsOs16::runOnModule(Module &M) { - DEBUG(errs() << "Run on Module MipsOs16\n"); + bool usingMask = Mips32FunctionMask.length() > 0; + DEBUG(dbgs() << "Run on Module MipsOs16 \n" << Mips32FunctionMask << "\n"); + if (usingMask) + DEBUG(dbgs() << "using mask \n" << Mips32FunctionMask << "\n"); + unsigned int functionIndex = 0; bool modified = false; for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { if (F->isDeclaration()) continue; DEBUG(dbgs() << "Working on " << F->getName() << "\n"); - if (needsFP(*F)) { - DEBUG(dbgs() << " need to compile as nomips16 \n"); - F->addFnAttr("nomips16"); + if (usingMask) { + if ((functionIndex < Mips32FunctionMask.length()) && + (Mips32FunctionMask[functionIndex] == '1')) { + DEBUG(dbgs() << "mask forced mips32: " << F->getName() << "\n"); + F->addFnAttr("nomips16"); + } + functionIndex++; } else { - F->addFnAttr("mips16"); - DEBUG(dbgs() << " no need to compile as nomips16 \n"); + if (needsFP(*F)) { + DEBUG(dbgs() << "os16 forced mips32: " << F->getName() << "\n"); + F->addFnAttr("nomips16"); + } + else { + DEBUG(dbgs() << "os16 forced mips16: " << F->getName() << "\n"); + F->addFnAttr("mips16"); + } } } return modified; -- cgit v1.1 From a98a486ad194c38293efcc5359d6ed2493f950dc Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Tue, 20 Aug 2013 21:08:22 +0000 Subject: [mips] Resolve register classes dynamically using ptr_rc to reduce the number of load/store instructions defined. Previously, we were defining load/store instructions for each pointer size (32 and 64-bit), but now we need just one definition. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188830 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MicroMipsInstrInfo.td | 16 +- lib/Target/Mips/Mips64InstrInfo.td | 96 +++++------- lib/Target/Mips/MipsDSPInstrInfo.td | 8 +- lib/Target/Mips/MipsISelLowering.cpp | 60 ++------ lib/Target/Mips/MipsInstrFPU.td | 59 +++----- lib/Target/Mips/MipsInstrInfo.td | 256 ++++++++++---------------------- lib/Target/Mips/MipsRegisterInfo.cpp | 5 + lib/Target/Mips/MipsRegisterInfo.h | 3 + lib/Target/Mips/MipsSEFrameLowering.cpp | 8 - lib/Target/Mips/MipsSEInstrInfo.cpp | 44 +++--- 10 files changed, 184 insertions(+), 371 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td index cbb987f..2cc5555 100644 --- a/lib/Target/Mips/MicroMipsInstrInfo.td +++ b/lib/Target/Mips/MicroMipsInstrInfo.td @@ -86,14 +86,14 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { SRLV_FM_MM<0xd0, 0>; /// Load and Store Instructions - aligned - defm LB_MM : LoadM<"lb", GPR32Opnd, sextloadi8>, MMRel, LW_FM_MM<0x7>; - defm LBu_MM : LoadM<"lbu", GPR32Opnd, zextloadi8>, MMRel, LW_FM_MM<0x5>; - defm LH_MM : LoadM<"lh", GPR32Opnd, sextloadi16>, MMRel, LW_FM_MM<0xf>; - defm LHu_MM : LoadM<"lhu", GPR32Opnd, zextloadi16>, MMRel, LW_FM_MM<0xd>; - defm LW_MM : LoadM<"lw", GPR32Opnd>, MMRel, LW_FM_MM<0x3f>; - defm SB_MM : StoreM<"sb", GPR32Opnd, truncstorei8>, MMRel, LW_FM_MM<0x6>; - defm SH_MM : StoreM<"sh", GPR32Opnd, truncstorei16>, MMRel, LW_FM_MM<0xe>; - defm SW_MM : StoreM<"sw", GPR32Opnd>, MMRel, LW_FM_MM<0x3e>; + def LB_MM : Load<"lb", GPR32Opnd, sextloadi8>, MMRel, LW_FM_MM<0x7>; + def LBu_MM : Load<"lbu", GPR32Opnd, zextloadi8>, MMRel, LW_FM_MM<0x5>; + def LH_MM : Load<"lh", GPR32Opnd, sextloadi16>, MMRel, LW_FM_MM<0xf>; + def LHu_MM : Load<"lhu", GPR32Opnd, zextloadi16>, MMRel, LW_FM_MM<0xd>; + def LW_MM : Load<"lw", GPR32Opnd>, MMRel, LW_FM_MM<0x3f>; + def SB_MM : Store<"sb", GPR32Opnd, truncstorei8>, MMRel, LW_FM_MM<0x6>; + def SH_MM : Store<"sh", GPR32Opnd, truncstorei16>, MMRel, LW_FM_MM<0xe>; + def SW_MM : Store<"sw", GPR32Opnd>, MMRel, LW_FM_MM<0x3e>; /// Load and Store Instructions - unaligned def LWL_MM : LoadLeftRightMM<"lwl", MipsLWL, GPR32Opnd, mem_mm_12>, diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 9e164fb..659a7a2 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -34,36 +34,21 @@ def immZExt6 : ImmLeaf; //===----------------------------------------------------------------------===// // Instructions specific format //===----------------------------------------------------------------------===// -let DecoderNamespace = "Mips64" in { - -multiclass Atomic2Ops64 { - def NAME : Atomic2Ops, Requires<[NotN64, HasStdEnc]>; - def _P8 : Atomic2Ops, Requires<[IsN64, HasStdEnc]>; -} - -multiclass AtomicCmpSwap64 { - def NAME : AtomicCmpSwap, - Requires<[NotN64, HasStdEnc]>; - def _P8 : AtomicCmpSwap, - Requires<[IsN64, HasStdEnc]>; -} -} -let usesCustomInserter = 1, Predicates = [HasStdEnc], - DecoderNamespace = "Mips64" in { - defm ATOMIC_LOAD_ADD_I64 : Atomic2Ops64; - defm ATOMIC_LOAD_SUB_I64 : Atomic2Ops64; - defm ATOMIC_LOAD_AND_I64 : Atomic2Ops64; - defm ATOMIC_LOAD_OR_I64 : Atomic2Ops64; - defm ATOMIC_LOAD_XOR_I64 : Atomic2Ops64; - defm ATOMIC_LOAD_NAND_I64 : Atomic2Ops64; - defm ATOMIC_SWAP_I64 : Atomic2Ops64; - defm ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap64; +let usesCustomInserter = 1 in { + def ATOMIC_LOAD_ADD_I64 : Atomic2Ops; + def ATOMIC_LOAD_SUB_I64 : Atomic2Ops; + def ATOMIC_LOAD_AND_I64 : Atomic2Ops; + def ATOMIC_LOAD_OR_I64 : Atomic2Ops; + def ATOMIC_LOAD_XOR_I64 : Atomic2Ops; + def ATOMIC_LOAD_NAND_I64 : Atomic2Ops; + def ATOMIC_SWAP_I64 : Atomic2Ops; + def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap; } /// Pseudo instructions for loading and storing accumulator registers. let isPseudo = 1, isCodeGenOnly = 1 in { - defm LOAD_ACC128 : LoadM<"", ACC128>; - defm STORE_ACC128 : StoreM<"", ACC128>; + def LOAD_ACC128 : Load<"", ACC128>; + def STORE_ACC128 : Store<"", ACC128>; } //===----------------------------------------------------------------------===// @@ -134,43 +119,36 @@ let Predicates = [HasMips64r2, HasStdEnc] in { /// Load and Store Instructions /// aligned let isCodeGenOnly = 1 in { -defm LB64 : LoadM<"lb", GPR64Opnd, sextloadi8, IILoad>, LW_FM<0x20>; -defm LBu64 : LoadM<"lbu", GPR64Opnd, zextloadi8, IILoad>, LW_FM<0x24>; -defm LH64 : LoadM<"lh", GPR64Opnd, sextloadi16, IILoad>, LW_FM<0x21>; -defm LHu64 : LoadM<"lhu", GPR64Opnd, zextloadi16, IILoad>, LW_FM<0x25>; -defm LW64 : LoadM<"lw", GPR64Opnd, sextloadi32, IILoad>, LW_FM<0x23>; -defm SB64 : StoreM<"sb", GPR64Opnd, truncstorei8, IIStore>, LW_FM<0x28>; -defm SH64 : StoreM<"sh", GPR64Opnd, truncstorei16, IIStore>, LW_FM<0x29>; -defm SW64 : StoreM<"sw", GPR64Opnd, truncstorei32, IIStore>, LW_FM<0x2b>; +def LB64 : Load<"lb", GPR64Opnd, sextloadi8, IILoad>, LW_FM<0x20>; +def LBu64 : Load<"lbu", GPR64Opnd, zextloadi8, IILoad>, LW_FM<0x24>; +def LH64 : Load<"lh", GPR64Opnd, sextloadi16, IILoad>, LW_FM<0x21>; +def LHu64 : Load<"lhu", GPR64Opnd, zextloadi16, IILoad>, LW_FM<0x25>; +def LW64 : Load<"lw", GPR64Opnd, sextloadi32, IILoad>, LW_FM<0x23>; +def SB64 : Store<"sb", GPR64Opnd, truncstorei8, IIStore>, LW_FM<0x28>; +def SH64 : Store<"sh", GPR64Opnd, truncstorei16, IIStore>, LW_FM<0x29>; +def SW64 : Store<"sw", GPR64Opnd, truncstorei32, IIStore>, LW_FM<0x2b>; } -defm LWu : LoadM<"lwu", GPR64Opnd, zextloadi32, IILoad>, LW_FM<0x27>; -defm LD : LoadM<"ld", GPR64Opnd, load, IILoad>, LW_FM<0x37>; -defm SD : StoreM<"sd", GPR64Opnd, store, IIStore>, LW_FM<0x3f>; +def LWu : Load<"lwu", GPR64Opnd, zextloadi32, IILoad>, LW_FM<0x27>; +def LD : Load<"ld", GPR64Opnd, load, IILoad>, LW_FM<0x37>; +def SD : Store<"sd", GPR64Opnd, store, IIStore>, LW_FM<0x3f>; /// load/store left/right let isCodeGenOnly = 1 in { -defm LWL64 : LoadLeftRightM<"lwl", MipsLWL, GPR64Opnd>, LW_FM<0x22>; -defm LWR64 : LoadLeftRightM<"lwr", MipsLWR, GPR64Opnd>, LW_FM<0x26>; -defm SWL64 : StoreLeftRightM<"swl", MipsSWL, GPR64Opnd>, LW_FM<0x2a>; -defm SWR64 : StoreLeftRightM<"swr", MipsSWR, GPR64Opnd>, LW_FM<0x2e>; +def LWL64 : LoadLeftRight<"lwl", MipsLWL, GPR64Opnd>, LW_FM<0x22>; +def LWR64 : LoadLeftRight<"lwr", MipsLWR, GPR64Opnd>, LW_FM<0x26>; +def SWL64 : StoreLeftRight<"swl", MipsSWL, GPR64Opnd>, LW_FM<0x2a>; +def SWR64 : StoreLeftRight<"swr", MipsSWR, GPR64Opnd>, LW_FM<0x2e>; } -defm LDL : LoadLeftRightM<"ldl", MipsLDL, GPR64Opnd>, LW_FM<0x1a>; -defm LDR : LoadLeftRightM<"ldr", MipsLDR, GPR64Opnd>, LW_FM<0x1b>; -defm SDL : StoreLeftRightM<"sdl", MipsSDL, GPR64Opnd>, LW_FM<0x2c>; -defm SDR : StoreLeftRightM<"sdr", MipsSDR, GPR64Opnd>, LW_FM<0x2d>; +def LDL : LoadLeftRight<"ldl", MipsLDL, GPR64Opnd>, LW_FM<0x1a>; +def LDR : LoadLeftRight<"ldr", MipsLDR, GPR64Opnd>, LW_FM<0x1b>; +def SDL : StoreLeftRight<"sdl", MipsSDL, GPR64Opnd>, LW_FM<0x2c>; +def SDR : StoreLeftRight<"sdr", MipsSDR, GPR64Opnd>, LW_FM<0x2d>; /// Load-linked, Store-conditional -let Predicates = [NotN64, HasStdEnc] in { - def LLD : LLBase<"lld", GPR64Opnd, mem>, LW_FM<0x34>; - def SCD : SCBase<"scd", GPR64Opnd, mem>, LW_FM<0x3c>; -} - -let Predicates = [IsN64, HasStdEnc], isCodeGenOnly = 1 in { - def LLD_P8 : LLBase<"lld", GPR64Opnd, mem64>, LW_FM<0x34>; - def SCD_P8 : SCBase<"scd", GPR64Opnd, mem64>, LW_FM<0x3c>; -} +def LLD : LLBase<"lld", GPR64Opnd>, LW_FM<0x34>; +def SCD : SCBase<"scd", GPR64Opnd>, LW_FM<0x3c>; /// Jump and Branch Instructions let isCodeGenOnly = 1 in { @@ -221,7 +199,7 @@ def DCLO : CountLeading1<"dclo", GPR64Opnd>, CLO_FM<0x25>; def DSBH : SubwordSwap<"dsbh", GPR64Opnd>, SEB_FM<2, 0x24>; def DSHD : SubwordSwap<"dshd", GPR64Opnd>, SEB_FM<5, 0x24>; -def LEA_ADDiu64 : EffectiveAddress<"daddiu", GPR64Opnd, mem_ea_64>, LW_FM<0x19>; +def LEA_ADDiu64 : EffectiveAddress<"daddiu", GPR64Opnd>, LW_FM<0x19>; let isCodeGenOnly = 1 in def RDHWR64 : ReadHardware, RDHWR_FM; @@ -251,18 +229,12 @@ let isCodeGenOnly = 1, rs = 0, shamt = 0 in { //===----------------------------------------------------------------------===// // extended loads -let Predicates = [NotN64, HasStdEnc] in { +let Predicates = [HasStdEnc] in { def : MipsPat<(i64 (extloadi1 addr:$src)), (LB64 addr:$src)>; def : MipsPat<(i64 (extloadi8 addr:$src)), (LB64 addr:$src)>; def : MipsPat<(i64 (extloadi16 addr:$src)), (LH64 addr:$src)>; def : MipsPat<(i64 (extloadi32 addr:$src)), (LW64 addr:$src)>; } -let Predicates = [IsN64, HasStdEnc] in { - def : MipsPat<(i64 (extloadi1 addr:$src)), (LB64_P8 addr:$src)>; - def : MipsPat<(i64 (extloadi8 addr:$src)), (LB64_P8 addr:$src)>; - def : MipsPat<(i64 (extloadi16 addr:$src)), (LH64_P8 addr:$src)>; - def : MipsPat<(i64 (extloadi32 addr:$src)), (LW64_P8 addr:$src)>; -} // hi/lo relocs def : MipsPat<(MipsHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>; diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td index 5020aa3..53e3389 100644 --- a/lib/Target/Mips/MipsDSPInstrInfo.td +++ b/lib/Target/Mips/MipsDSPInstrInfo.td @@ -1242,12 +1242,12 @@ def PREPEND : PREPEND_ENC, PREPEND_DESC; // Pseudos. let isPseudo = 1, isCodeGenOnly = 1 in { // Pseudo instructions for loading and storing accumulator registers. - defm LOAD_ACC64DSP : LoadM<"", ACC64DSPOpnd>; - defm STORE_ACC64DSP : StoreM<"", ACC64DSPOpnd>; + def LOAD_ACC64DSP : Load<"", ACC64DSPOpnd>; + def STORE_ACC64DSP : Store<"", ACC64DSPOpnd>; // Pseudos for loading and storing ccond field of DSP control register. - defm LOAD_CCOND_DSP : LoadM<"load_ccond_dsp", DSPCC>; - defm STORE_CCOND_DSP : StoreM<"store_ccond_dsp", DSPCC>; + def LOAD_CCOND_DSP : Load<"load_ccond_dsp", DSPCC>; + def STORE_CCOND_DSP : Store<"store_ccond_dsp", DSPCC>; } // Pseudo CMP and PICK instructions. diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 377236a..3601a13 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -797,107 +797,75 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, default: llvm_unreachable("Unexpected instr type to insert"); case Mips::ATOMIC_LOAD_ADD_I8: - case Mips::ATOMIC_LOAD_ADD_I8_P8: return emitAtomicBinaryPartword(MI, BB, 1, Mips::ADDu); case Mips::ATOMIC_LOAD_ADD_I16: - case Mips::ATOMIC_LOAD_ADD_I16_P8: return emitAtomicBinaryPartword(MI, BB, 2, Mips::ADDu); case Mips::ATOMIC_LOAD_ADD_I32: - case Mips::ATOMIC_LOAD_ADD_I32_P8: return emitAtomicBinary(MI, BB, 4, Mips::ADDu); case Mips::ATOMIC_LOAD_ADD_I64: - case Mips::ATOMIC_LOAD_ADD_I64_P8: return emitAtomicBinary(MI, BB, 8, Mips::DADDu); case Mips::ATOMIC_LOAD_AND_I8: - case Mips::ATOMIC_LOAD_AND_I8_P8: return emitAtomicBinaryPartword(MI, BB, 1, Mips::AND); case Mips::ATOMIC_LOAD_AND_I16: - case Mips::ATOMIC_LOAD_AND_I16_P8: return emitAtomicBinaryPartword(MI, BB, 2, Mips::AND); case Mips::ATOMIC_LOAD_AND_I32: - case Mips::ATOMIC_LOAD_AND_I32_P8: return emitAtomicBinary(MI, BB, 4, Mips::AND); case Mips::ATOMIC_LOAD_AND_I64: - case Mips::ATOMIC_LOAD_AND_I64_P8: return emitAtomicBinary(MI, BB, 8, Mips::AND64); case Mips::ATOMIC_LOAD_OR_I8: - case Mips::ATOMIC_LOAD_OR_I8_P8: return emitAtomicBinaryPartword(MI, BB, 1, Mips::OR); case Mips::ATOMIC_LOAD_OR_I16: - case Mips::ATOMIC_LOAD_OR_I16_P8: return emitAtomicBinaryPartword(MI, BB, 2, Mips::OR); case Mips::ATOMIC_LOAD_OR_I32: - case Mips::ATOMIC_LOAD_OR_I32_P8: return emitAtomicBinary(MI, BB, 4, Mips::OR); case Mips::ATOMIC_LOAD_OR_I64: - case Mips::ATOMIC_LOAD_OR_I64_P8: return emitAtomicBinary(MI, BB, 8, Mips::OR64); case Mips::ATOMIC_LOAD_XOR_I8: - case Mips::ATOMIC_LOAD_XOR_I8_P8: return emitAtomicBinaryPartword(MI, BB, 1, Mips::XOR); case Mips::ATOMIC_LOAD_XOR_I16: - case Mips::ATOMIC_LOAD_XOR_I16_P8: return emitAtomicBinaryPartword(MI, BB, 2, Mips::XOR); case Mips::ATOMIC_LOAD_XOR_I32: - case Mips::ATOMIC_LOAD_XOR_I32_P8: return emitAtomicBinary(MI, BB, 4, Mips::XOR); case Mips::ATOMIC_LOAD_XOR_I64: - case Mips::ATOMIC_LOAD_XOR_I64_P8: return emitAtomicBinary(MI, BB, 8, Mips::XOR64); case Mips::ATOMIC_LOAD_NAND_I8: - case Mips::ATOMIC_LOAD_NAND_I8_P8: return emitAtomicBinaryPartword(MI, BB, 1, 0, true); case Mips::ATOMIC_LOAD_NAND_I16: - case Mips::ATOMIC_LOAD_NAND_I16_P8: return emitAtomicBinaryPartword(MI, BB, 2, 0, true); case Mips::ATOMIC_LOAD_NAND_I32: - case Mips::ATOMIC_LOAD_NAND_I32_P8: return emitAtomicBinary(MI, BB, 4, 0, true); case Mips::ATOMIC_LOAD_NAND_I64: - case Mips::ATOMIC_LOAD_NAND_I64_P8: return emitAtomicBinary(MI, BB, 8, 0, true); case Mips::ATOMIC_LOAD_SUB_I8: - case Mips::ATOMIC_LOAD_SUB_I8_P8: return emitAtomicBinaryPartword(MI, BB, 1, Mips::SUBu); case Mips::ATOMIC_LOAD_SUB_I16: - case Mips::ATOMIC_LOAD_SUB_I16_P8: return emitAtomicBinaryPartword(MI, BB, 2, Mips::SUBu); case Mips::ATOMIC_LOAD_SUB_I32: - case Mips::ATOMIC_LOAD_SUB_I32_P8: return emitAtomicBinary(MI, BB, 4, Mips::SUBu); case Mips::ATOMIC_LOAD_SUB_I64: - case Mips::ATOMIC_LOAD_SUB_I64_P8: return emitAtomicBinary(MI, BB, 8, Mips::DSUBu); case Mips::ATOMIC_SWAP_I8: - case Mips::ATOMIC_SWAP_I8_P8: return emitAtomicBinaryPartword(MI, BB, 1, 0); case Mips::ATOMIC_SWAP_I16: - case Mips::ATOMIC_SWAP_I16_P8: return emitAtomicBinaryPartword(MI, BB, 2, 0); case Mips::ATOMIC_SWAP_I32: - case Mips::ATOMIC_SWAP_I32_P8: return emitAtomicBinary(MI, BB, 4, 0); case Mips::ATOMIC_SWAP_I64: - case Mips::ATOMIC_SWAP_I64_P8: return emitAtomicBinary(MI, BB, 8, 0); case Mips::ATOMIC_CMP_SWAP_I8: - case Mips::ATOMIC_CMP_SWAP_I8_P8: return emitAtomicCmpSwapPartword(MI, BB, 1); case Mips::ATOMIC_CMP_SWAP_I16: - case Mips::ATOMIC_CMP_SWAP_I16_P8: return emitAtomicCmpSwapPartword(MI, BB, 2); case Mips::ATOMIC_CMP_SWAP_I32: - case Mips::ATOMIC_CMP_SWAP_I32_P8: return emitAtomicCmpSwap(MI, BB, 4); case Mips::ATOMIC_CMP_SWAP_I64: - case Mips::ATOMIC_CMP_SWAP_I64_P8: return emitAtomicCmpSwap(MI, BB, 8); case Mips::PseudoSDIV: case Mips::PseudoUDIV: @@ -924,16 +892,16 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, unsigned LL, SC, AND, NOR, ZERO, BEQ; if (Size == 4) { - LL = IsN64 ? Mips::LL_P8 : Mips::LL; - SC = IsN64 ? Mips::SC_P8 : Mips::SC; + LL = Mips::LL; + SC = Mips::SC; AND = Mips::AND; NOR = Mips::NOR; ZERO = Mips::ZERO; BEQ = Mips::BEQ; } else { - LL = IsN64 ? Mips::LLD_P8 : Mips::LLD; - SC = IsN64 ? Mips::SCD_P8 : Mips::SCD; + LL = Mips::LLD; + SC = Mips::SCD; AND = Mips::AND64; NOR = Mips::NOR64; ZERO = Mips::ZERO_64; @@ -1009,8 +977,6 @@ MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI, const TargetRegisterClass *RC = getRegClassFor(MVT::i32); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); - unsigned LL = IsN64 ? Mips::LL_P8 : Mips::LL; - unsigned SC = IsN64 ? Mips::SC_P8 : Mips::SC; unsigned Dest = MI->getOperand(0).getReg(); unsigned Ptr = MI->getOperand(1).getReg(); @@ -1107,7 +1073,7 @@ MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI, // beq success,$0,loopMBB BB = loopMBB; - BuildMI(BB, DL, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0); + BuildMI(BB, DL, TII->get(Mips::LL), OldVal).addReg(AlignedAddr).addImm(0); if (Nand) { // and andres, oldval, incr2 // nor binopres, $0, andres @@ -1130,7 +1096,7 @@ MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI, .addReg(OldVal).addReg(Mask2); BuildMI(BB, DL, TII->get(Mips::OR), StoreVal) .addReg(MaskedOldVal0).addReg(NewVal); - BuildMI(BB, DL, TII->get(SC), Success) + BuildMI(BB, DL, TII->get(Mips::SC), Success) .addReg(StoreVal).addReg(AlignedAddr).addImm(0); BuildMI(BB, DL, TII->get(Mips::BEQ)) .addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB); @@ -1171,15 +1137,15 @@ MipsTargetLowering::emitAtomicCmpSwap(MachineInstr *MI, unsigned LL, SC, ZERO, BNE, BEQ; if (Size == 4) { - LL = IsN64 ? Mips::LL_P8 : Mips::LL; - SC = IsN64 ? Mips::SC_P8 : Mips::SC; + LL = Mips::LL; + SC = Mips::SC; ZERO = Mips::ZERO; BNE = Mips::BNE; BEQ = Mips::BEQ; } else { - LL = IsN64 ? Mips::LLD_P8 : Mips::LLD; - SC = IsN64 ? Mips::SCD_P8 : Mips::SCD; + LL = Mips::LLD; + SC = Mips::SCD; ZERO = Mips::ZERO_64; BNE = Mips::BNE64; BEQ = Mips::BEQ64; @@ -1251,8 +1217,6 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI, const TargetRegisterClass *RC = getRegClassFor(MVT::i32); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); - unsigned LL = IsN64 ? Mips::LL_P8 : Mips::LL; - unsigned SC = IsN64 ? Mips::SC_P8 : Mips::SC; unsigned Dest = MI->getOperand(0).getReg(); unsigned Ptr = MI->getOperand(1).getReg(); @@ -1349,7 +1313,7 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI, // and maskedoldval0,oldval,mask // bne maskedoldval0,shiftedcmpval,sinkMBB BB = loop1MBB; - BuildMI(BB, DL, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0); + BuildMI(BB, DL, TII->get(Mips::LL), OldVal).addReg(AlignedAddr).addImm(0); BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal0) .addReg(OldVal).addReg(Mask); BuildMI(BB, DL, TII->get(Mips::BNE)) @@ -1365,7 +1329,7 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI, .addReg(OldVal).addReg(Mask2); BuildMI(BB, DL, TII->get(Mips::OR), StoreVal) .addReg(MaskedOldVal1).addReg(ShiftedNewVal); - BuildMI(BB, DL, TII->get(SC), Success) + BuildMI(BB, DL, TII->get(Mips::SC), Success) .addReg(StoreVal).addReg(AlignedAddr).addImm(0); BuildMI(BB, DL, TII->get(Mips::BEQ)) .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB); diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index f64e6f7..28fdbf5 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -143,16 +143,16 @@ class MTC1_FT; class LW_FT : - InstSE<(outs RC:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), + SDPatternOperator OpNode= null_frag> : + InstSE<(outs RC:$rt), (ins mem:$addr), !strconcat(opstr, "\t$rt, $addr"), [(set RC:$rt, (OpNode addrDefault:$addr))], Itin, FrmFI> { let DecoderMethod = "DecodeFMem"; let mayLoad = 1; } class SW_FT : - InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), + SDPatternOperator OpNode= null_frag> : + InstSE<(outs), (ins RC:$rt, mem:$addr), !strconcat(opstr, "\t$rt, $addr"), [(OpNode RC:$rt, addrDefault:$addr)], Itin, FrmFI> { let DecoderMethod = "DecodeFMem"; let mayStore = 1; @@ -353,39 +353,23 @@ def FMOV_D64 : ABSS_FT<"mov.d", FGR64Opnd, FGR64Opnd, IIFmove>, } /// Floating Point Memory Instructions -let Predicates = [IsN64, HasStdEnc], DecoderNamespace = "Mips64" in { - def LWC1_P8 : LW_FT<"lwc1", FGR32Opnd, IIFLoad, mem64, load>, - LW_FM<0x31>; - def SWC1_P8 : SW_FT<"swc1", FGR32Opnd, IIFStore, mem64, store>, - LW_FM<0x39>; - def LDC164_P8 : LW_FT<"ldc1", FGR64Opnd, IIFLoad, mem64, load>, - LW_FM<0x35> { - let isCodeGenOnly =1; - } - def SDC164_P8 : SW_FT<"sdc1", FGR64Opnd, IIFStore, mem64, store>, - LW_FM<0x3d> { - let isCodeGenOnly =1; - } -} - -let Predicates = [NotN64, HasStdEnc] in { - def LWC1 : LW_FT<"lwc1", FGR32Opnd, IIFLoad, mem, load>, LW_FM<0x31>; - def SWC1 : SW_FT<"swc1", FGR32Opnd, IIFStore, mem, store>, LW_FM<0x39>; +let Predicates = [HasStdEnc] in { + def LWC1 : LW_FT<"lwc1", FGR32Opnd, IIFLoad, load>, LW_FM<0x31>; + def SWC1 : SW_FT<"swc1", FGR32Opnd, IIFStore, store>, LW_FM<0x39>; } -let Predicates = [NotN64, HasMips64, HasStdEnc], - DecoderNamespace = "Mips64" in { - def LDC164 : LW_FT<"ldc1", FGR64Opnd, IIFLoad, mem, load>, LW_FM<0x35>; - def SDC164 : SW_FT<"sdc1", FGR64Opnd, IIFStore, mem, store>, LW_FM<0x3d>; +let Predicates = [HasMips64, HasStdEnc], DecoderNamespace = "Mips64" in { + def LDC164 : LW_FT<"ldc1", FGR64Opnd, IIFLoad, load>, LW_FM<0x35>; + def SDC164 : SW_FT<"sdc1", FGR64Opnd, IIFStore, store>, LW_FM<0x3d>; } -let Predicates = [NotN64, NotMips64, HasStdEnc] in { +let Predicates = [NotMips64, HasStdEnc] in { let isPseudo = 1, isCodeGenOnly = 1 in { - def PseudoLDC1 : LW_FT<"", AFGR64Opnd, IIFLoad, mem, load>; - def PseudoSDC1 : SW_FT<"", AFGR64Opnd, IIFStore, mem, store>; + def PseudoLDC1 : LW_FT<"", AFGR64Opnd, IIFLoad, load>; + def PseudoSDC1 : SW_FT<"", AFGR64Opnd, IIFStore, store>; } - def LDC1 : LW_FT<"ldc1", AFGR64Opnd, IIFLoad, mem>, LW_FM<0x35>; - def SDC1 : SW_FT<"sdc1", AFGR64Opnd, IIFStore, mem>, LW_FM<0x3d>; + def LDC1 : LW_FT<"ldc1", AFGR64Opnd, IIFLoad>, LW_FM<0x35>; + def SDC1 : SW_FT<"sdc1", AFGR64Opnd, IIFStore>, LW_FM<0x3d>; } // Indexed loads and stores. @@ -611,24 +595,17 @@ let Predicates = [IsFP64bit, HasStdEnc] in { // Patterns for loads/stores with a reg+imm operand. let AddedComplexity = 40 in { - let Predicates = [IsN64, HasStdEnc] in { - def : LoadRegImmPat; - def : StoreRegImmPat; - def : LoadRegImmPat; - def : StoreRegImmPat; - } - - let Predicates = [NotN64, HasStdEnc] in { + let Predicates = [HasStdEnc] in { def : LoadRegImmPat; def : StoreRegImmPat; } - let Predicates = [NotN64, HasMips64, HasStdEnc] in { + let Predicates = [HasMips64, HasStdEnc] in { def : LoadRegImmPat; def : StoreRegImmPat; } - let Predicates = [NotN64, NotMips64, HasStdEnc] in { + let Predicates = [NotMips64, HasStdEnc] in { def : LoadRegImmPat; def : StoreRegImmPat; } diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 9ba1879..3c90e75 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -278,34 +278,23 @@ def MipsMemAsmOperand : AsmOperandClass { } // Address operand -def mem : Operand { +def mem : Operand { let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops GPR32, simm16); + let MIOperandInfo = (ops ptr_rc, simm16); let EncoderMethod = "getMemEncoding"; let ParserMatchClass = MipsMemAsmOperand; let OperandType = "OPERAND_MEMORY"; } -def mem64 : Operand { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops GPR64, simm16_64); - let EncoderMethod = "getMemEncoding"; - let ParserMatchClass = MipsMemAsmOperand; - let OperandType = "OPERAND_MEMORY"; -} - -def mem_ea : Operand { +def mem_ea : Operand { let PrintMethod = "printMemOperandEA"; - let MIOperandInfo = (ops GPR32, simm16); + let MIOperandInfo = (ops ptr_rc, simm16); let EncoderMethod = "getMemEncoding"; let OperandType = "OPERAND_MEMORY"; } -def mem_ea_64 : Operand { - let PrintMethod = "printMemOperandEA"; - let MIOperandInfo = (ops GPR64, simm16_64); - let EncoderMethod = "getMemEncoding"; - let OperandType = "OPERAND_MEMORY"; +def PtrRC : Operand { + let MIOperandInfo = (ops ptr_rc); } // size operand of ext instruction @@ -457,91 +446,39 @@ class FMem op, dag outs, dag ins, string asmstr, list pattern, } // Memory Load/Store -class Load : - InstSE<(outs RO:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), - [(set RO:$rt, (OpNode Addr:$addr))], NoItinerary, FrmI, - !strconcat(opstr, ofsuffix)> { +class Load : + InstSE<(outs RO:$rt), (ins mem:$addr), !strconcat(opstr, "\t$rt, $addr"), + [(set RO:$rt, (OpNode Addr:$addr))], NoItinerary, FrmI, opstr> { let DecoderMethod = "DecodeMem"; let canFoldAsLoad = 1; let mayLoad = 1; } -class Store : - InstSE<(outs), (ins RO:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), - [(OpNode RO:$rt, Addr:$addr)], NoItinerary, FrmI, - !strconcat(opstr, ofsuffix)> { +class Store : + InstSE<(outs), (ins RO:$rt, mem:$addr), !strconcat(opstr, "\t$rt, $addr"), + [(OpNode RO:$rt, Addr:$addr)], NoItinerary, FrmI, opstr> { let DecoderMethod = "DecodeMem"; let mayStore = 1; } -multiclass LoadM { - def NAME : Load, - Requires<[NotN64, HasStdEnc]>; - def _P8 : Load, - Requires<[IsN64, HasStdEnc]> { - let DecoderNamespace = "Mips64"; - let isCodeGenOnly = 1; - } -} - -multiclass StoreM { - def NAME : Store, - Requires<[NotN64, HasStdEnc]>; - def _P8 : Store, - Requires<[IsN64, HasStdEnc]> { - let DecoderNamespace = "Mips64"; - let isCodeGenOnly = 1; - } -} - // Load/Store Left/Right let canFoldAsLoad = 1 in -class LoadLeftRight : - InstSE<(outs RO:$rt), (ins MemOpnd:$addr, RO:$src), +class LoadLeftRight : + InstSE<(outs RO:$rt), (ins mem:$addr, RO:$src), !strconcat(opstr, "\t$rt, $addr"), [(set RO:$rt, (OpNode addr:$addr, RO:$src))], NoItinerary, FrmI> { let DecoderMethod = "DecodeMem"; string Constraints = "$src = $rt"; } -class StoreLeftRight: - InstSE<(outs), (ins RO:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), +class StoreLeftRight : + InstSE<(outs), (ins RO:$rt, mem:$addr), !strconcat(opstr, "\t$rt, $addr"), [(OpNode RO:$rt, addr:$addr)], NoItinerary, FrmI> { let DecoderMethod = "DecodeMem"; } -multiclass LoadLeftRightM { - def NAME : LoadLeftRight, - Requires<[NotN64, HasStdEnc, NotInMicroMips]>; - def _P8 : LoadLeftRight, - Requires<[IsN64, HasStdEnc]> { - let DecoderNamespace = "Mips64"; - let isCodeGenOnly = 1; - } -} - -multiclass StoreLeftRightM { - def NAME : StoreLeftRight, - Requires<[NotN64, HasStdEnc, NotInMicroMips]>; - def _P8 : StoreLeftRight, - Requires<[IsN64, HasStdEnc]> { - let DecoderNamespace = "Mips64"; - let isCodeGenOnly = 1; - } -} - // Conditional Branch class CBranch : InstSE<(outs), (ins RO:$rs, RO:$rt, brtarget:$offset), @@ -749,8 +686,8 @@ class MoveToLOHI DefRegs>: let neverHasSideEffects = 1; } -class EffectiveAddress : - InstSE<(outs RO:$rt), (ins Mem:$addr), !strconcat(opstr, "\t$rt, $addr"), +class EffectiveAddress : + InstSE<(outs RO:$rt), (ins mem_ea:$addr), !strconcat(opstr, "\t$rt, $addr"), [(set RO:$rt, addr:$addr)], NoItinerary, FrmI> { let isCodeGenOnly = 1; let DecoderMethod = "DecodeMem"; @@ -807,36 +744,24 @@ class InsBase: } // Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*). -class Atomic2Ops : - PseudoSE<(outs DRC:$dst), (ins PRC:$ptr, DRC:$incr), - [(set DRC:$dst, (Op PRC:$ptr, DRC:$incr))]>; - -multiclass Atomic2Ops32 { - def NAME : Atomic2Ops, Requires<[NotN64, HasStdEnc]>; - def _P8 : Atomic2Ops, Requires<[IsN64, HasStdEnc]>; -} +class Atomic2Ops : + PseudoSE<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$incr), + [(set DRC:$dst, (Op iPTR:$ptr, DRC:$incr))]>; // Atomic Compare & Swap. -class AtomicCmpSwap : - PseudoSE<(outs DRC:$dst), (ins PRC:$ptr, DRC:$cmp, DRC:$swap), - [(set DRC:$dst, (Op PRC:$ptr, DRC:$cmp, DRC:$swap))]>; - -multiclass AtomicCmpSwap32 { - def NAME : AtomicCmpSwap, - Requires<[NotN64, HasStdEnc]>; - def _P8 : AtomicCmpSwap, - Requires<[IsN64, HasStdEnc]>; -} +class AtomicCmpSwap : + PseudoSE<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$cmp, DRC:$swap), + [(set DRC:$dst, (Op iPTR:$ptr, DRC:$cmp, DRC:$swap))]>; -class LLBase : - InstSE<(outs RO:$rt), (ins Mem:$addr), !strconcat(opstr, "\t$rt, $addr"), +class LLBase : + InstSE<(outs RO:$rt), (ins mem:$addr), !strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> { let DecoderMethod = "DecodeMem"; let mayLoad = 1; } -class SCBase : - InstSE<(outs RO:$dst), (ins RO:$rt, Mem:$addr), +class SCBase : + InstSE<(outs RO:$dst), (ins RO:$rt, mem:$addr), !strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> { let DecoderMethod = "DecodeMem"; let mayStore = 1; @@ -867,38 +792,38 @@ def ADJCALLSTACKUP : MipsPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), } let usesCustomInserter = 1 in { - defm ATOMIC_LOAD_ADD_I8 : Atomic2Ops32; - defm ATOMIC_LOAD_ADD_I16 : Atomic2Ops32; - defm ATOMIC_LOAD_ADD_I32 : Atomic2Ops32; - defm ATOMIC_LOAD_SUB_I8 : Atomic2Ops32; - defm ATOMIC_LOAD_SUB_I16 : Atomic2Ops32; - defm ATOMIC_LOAD_SUB_I32 : Atomic2Ops32; - defm ATOMIC_LOAD_AND_I8 : Atomic2Ops32; - defm ATOMIC_LOAD_AND_I16 : Atomic2Ops32; - defm ATOMIC_LOAD_AND_I32 : Atomic2Ops32; - defm ATOMIC_LOAD_OR_I8 : Atomic2Ops32; - defm ATOMIC_LOAD_OR_I16 : Atomic2Ops32; - defm ATOMIC_LOAD_OR_I32 : Atomic2Ops32; - defm ATOMIC_LOAD_XOR_I8 : Atomic2Ops32; - defm ATOMIC_LOAD_XOR_I16 : Atomic2Ops32; - defm ATOMIC_LOAD_XOR_I32 : Atomic2Ops32; - defm ATOMIC_LOAD_NAND_I8 : Atomic2Ops32; - defm ATOMIC_LOAD_NAND_I16 : Atomic2Ops32; - defm ATOMIC_LOAD_NAND_I32 : Atomic2Ops32; - - defm ATOMIC_SWAP_I8 : Atomic2Ops32; - defm ATOMIC_SWAP_I16 : Atomic2Ops32; - defm ATOMIC_SWAP_I32 : Atomic2Ops32; - - defm ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap32; - defm ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap32; - defm ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap32; + def ATOMIC_LOAD_ADD_I8 : Atomic2Ops; + def ATOMIC_LOAD_ADD_I16 : Atomic2Ops; + def ATOMIC_LOAD_ADD_I32 : Atomic2Ops; + def ATOMIC_LOAD_SUB_I8 : Atomic2Ops; + def ATOMIC_LOAD_SUB_I16 : Atomic2Ops; + def ATOMIC_LOAD_SUB_I32 : Atomic2Ops; + def ATOMIC_LOAD_AND_I8 : Atomic2Ops; + def ATOMIC_LOAD_AND_I16 : Atomic2Ops; + def ATOMIC_LOAD_AND_I32 : Atomic2Ops; + def ATOMIC_LOAD_OR_I8 : Atomic2Ops; + def ATOMIC_LOAD_OR_I16 : Atomic2Ops; + def ATOMIC_LOAD_OR_I32 : Atomic2Ops; + def ATOMIC_LOAD_XOR_I8 : Atomic2Ops; + def ATOMIC_LOAD_XOR_I16 : Atomic2Ops; + def ATOMIC_LOAD_XOR_I32 : Atomic2Ops; + def ATOMIC_LOAD_NAND_I8 : Atomic2Ops; + def ATOMIC_LOAD_NAND_I16 : Atomic2Ops; + def ATOMIC_LOAD_NAND_I32 : Atomic2Ops; + + def ATOMIC_SWAP_I8 : Atomic2Ops; + def ATOMIC_SWAP_I16 : Atomic2Ops; + def ATOMIC_SWAP_I32 : Atomic2Ops; + + def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap; + def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap; + def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap; } /// Pseudo instructions for loading and storing accumulator registers. let isPseudo = 1, isCodeGenOnly = 1 in { - defm LOAD_ACC64 : LoadM<"", ACC64>; - defm STORE_ACC64 : StoreM<"", ACC64>; + def LOAD_ACC64 : Load<"", ACC64>; + def STORE_ACC64 : Store<"", ACC64>; } //===----------------------------------------------------------------------===// @@ -969,23 +894,23 @@ let Predicates = [HasMips32r2, HasStdEnc] in { /// Load and Store Instructions /// aligned -defm LB : LoadM<"lb", GPR32Opnd, sextloadi8, IILoad>, MMRel, LW_FM<0x20>; -defm LBu : LoadM<"lbu", GPR32Opnd, zextloadi8, IILoad, addrDefault>, MMRel, - LW_FM<0x24>; -defm LH : LoadM<"lh", GPR32Opnd, sextloadi16, IILoad, addrDefault>, MMRel, - LW_FM<0x21>; -defm LHu : LoadM<"lhu", GPR32Opnd, zextloadi16, IILoad>, MMRel, LW_FM<0x25>; -defm LW : LoadM<"lw", GPR32Opnd, load, IILoad, addrDefault>, MMRel, - LW_FM<0x23>; -defm SB : StoreM<"sb", GPR32Opnd, truncstorei8, IIStore>, MMRel, LW_FM<0x28>; -defm SH : StoreM<"sh", GPR32Opnd, truncstorei16, IIStore>, MMRel, LW_FM<0x29>; -defm SW : StoreM<"sw", GPR32Opnd, store, IIStore>, MMRel, LW_FM<0x2b>; +def LB : Load<"lb", GPR32Opnd, sextloadi8, IILoad>, MMRel, LW_FM<0x20>; +def LBu : Load<"lbu", GPR32Opnd, zextloadi8, IILoad, addrDefault>, MMRel, + LW_FM<0x24>; +def LH : Load<"lh", GPR32Opnd, sextloadi16, IILoad, addrDefault>, MMRel, + LW_FM<0x21>; +def LHu : Load<"lhu", GPR32Opnd, zextloadi16, IILoad>, MMRel, LW_FM<0x25>; +def LW : Load<"lw", GPR32Opnd, load, IILoad, addrDefault>, MMRel, + LW_FM<0x23>; +def SB : Store<"sb", GPR32Opnd, truncstorei8, IIStore>, MMRel, LW_FM<0x28>; +def SH : Store<"sh", GPR32Opnd, truncstorei16, IIStore>, MMRel, LW_FM<0x29>; +def SW : Store<"sw", GPR32Opnd, store, IIStore>, MMRel, LW_FM<0x2b>; /// load/store left/right -defm LWL : LoadLeftRightM<"lwl", MipsLWL, GPR32Opnd>, LW_FM<0x22>; -defm LWR : LoadLeftRightM<"lwr", MipsLWR, GPR32Opnd>, LW_FM<0x26>; -defm SWL : StoreLeftRightM<"swl", MipsSWL, GPR32Opnd>, LW_FM<0x2a>; -defm SWR : StoreLeftRightM<"swr", MipsSWR, GPR32Opnd>, LW_FM<0x2e>; +def LWL : LoadLeftRight<"lwl", MipsLWL, GPR32Opnd>, LW_FM<0x22>; +def LWR : LoadLeftRight<"lwr", MipsLWR, GPR32Opnd>, LW_FM<0x26>; +def SWL : StoreLeftRight<"swl", MipsSWL, GPR32Opnd>, LW_FM<0x2a>; +def SWR : StoreLeftRight<"swr", MipsSWR, GPR32Opnd>, LW_FM<0x2e>; def SYNC : SYNC_FT, SYNC_FM; def TEQ : TEQ_FT<"teq", GPR32Opnd>, TEQ_FM<0x34>; @@ -1002,15 +927,8 @@ def DI : DEI_FT<"di", GPR32Opnd>, EI_FM<0>; def WAIT : WAIT_FT<"wait">; /// Load-linked, Store-conditional -let Predicates = [NotN64, HasStdEnc] in { - def LL : LLBase<"ll", GPR32Opnd, mem>, LW_FM<0x30>; - def SC : SCBase<"sc", GPR32Opnd, mem>, LW_FM<0x38>; -} - -let Predicates = [IsN64, HasStdEnc], DecoderNamespace = "Mips64" in { - def LL_P8 : LLBase<"ll", GPR32Opnd, mem64>, LW_FM<0x30>; - def SC_P8 : SCBase<"sc", GPR32Opnd, mem64>, LW_FM<0x38>; -} +def LL : LLBase<"ll", GPR32Opnd>, LW_FM<0x30>; +def SC : SCBase<"sc", GPR32Opnd>, LW_FM<0x38>; /// Jump and Branch Instructions def J : JumpFJ, FJ<2>, @@ -1093,7 +1011,7 @@ def NOP : PseudoSE<(outs), (ins), []>, PseudoInstExpansion<(SLL ZERO, ZERO, 0)>; // instructions. The same not happens for stack address copies, so an // add op with mem ComplexPattern is used and the stack address copy // can be matched. It's similar to Sparc LEA_ADDRi -def LEA_ADDiu : EffectiveAddress<"addiu", GPR32Opnd, mem_ea>, LW_FM<9>; +def LEA_ADDiu : EffectiveAddress<"addiu", GPR32Opnd>, LW_FM<9>; // MADD*/MSUB* def MADD : MArithR<"madd", 1>, MULT_FM<0x1c, 0>; @@ -1291,24 +1209,15 @@ def : MipsPat<(not GPR32:$in), (NOR GPR32Opnd:$in, ZERO)>; // extended loads -let Predicates = [NotN64, HasStdEnc] in { +let Predicates = [HasStdEnc] in { def : MipsPat<(i32 (extloadi1 addr:$src)), (LBu addr:$src)>; def : MipsPat<(i32 (extloadi8 addr:$src)), (LBu addr:$src)>; def : MipsPat<(i32 (extloadi16 addr:$src)), (LHu addr:$src)>; } -let Predicates = [IsN64, HasStdEnc] in { - def : MipsPat<(i32 (extloadi1 addr:$src)), (LBu_P8 addr:$src)>; - def : MipsPat<(i32 (extloadi8 addr:$src)), (LBu_P8 addr:$src)>; - def : MipsPat<(i32 (extloadi16 addr:$src)), (LHu_P8 addr:$src)>; -} // peepholes -let Predicates = [NotN64, HasStdEnc] in { - def : MipsPat<(store (i32 0), addr:$dst), (SW ZERO, addr:$dst)>; -} -let Predicates = [IsN64, HasStdEnc] in { - def : MipsPat<(store (i32 0), addr:$dst), (SW_P8 ZERO, addr:$dst)>; -} +let Predicates = [HasStdEnc] in +def : MipsPat<(store (i32 0), addr:$dst), (SW ZERO, addr:$dst)>; // brcond patterns multiclass BrcondPats; def : LoadRegImmPat; def : LoadRegImmPat; } - let Predicates = [IsN64, HasStdEnc] in { - def : LoadRegImmPat; - def : LoadRegImmPat; - def : LoadRegImmPat; - } } //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 6d76021..26891ef 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -47,6 +47,11 @@ MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST) unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; } +const TargetRegisterClass * +MipsRegisterInfo::getPointerRegClass(const MachineFunction &MF, + unsigned Kind) const { + return Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; +} unsigned MipsRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index 20ba41d..c867524 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -42,6 +42,9 @@ public: void adjustMipsStackFrame(MachineFunction &MF) const; /// Code Generation virtual methods... + const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF, + unsigned Kind) const; + unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const; const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index 2b0672c..edbbe5d 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -70,31 +70,23 @@ bool ExpandPseudo::expand() { bool ExpandPseudo::expandInstr(MachineBasicBlock &MBB, Iter I) { switch(I->getOpcode()) { case Mips::LOAD_CCOND_DSP: - case Mips::LOAD_CCOND_DSP_P8: expandLoadCCond(MBB, I); break; case Mips::STORE_CCOND_DSP: - case Mips::STORE_CCOND_DSP_P8: expandStoreCCond(MBB, I); break; case Mips::LOAD_ACC64: - case Mips::LOAD_ACC64_P8: case Mips::LOAD_ACC64DSP: - case Mips::LOAD_ACC64DSP_P8: expandLoadACC(MBB, I, 4); break; case Mips::LOAD_ACC128: - case Mips::LOAD_ACC128_P8: expandLoadACC(MBB, I, 8); break; case Mips::STORE_ACC64: - case Mips::STORE_ACC64_P8: case Mips::STORE_ACC64DSP: - case Mips::STORE_ACC64DSP_P8: expandStoreACC(MBB, I, 4); break; case Mips::STORE_ACC128: - case Mips::STORE_ACC128_P8: expandStoreACC(MBB, I, 8); break; case TargetOpcode::COPY: diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index 24a6836..d0c8e62 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -49,10 +49,8 @@ isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { unsigned Opc = MI->getOpcode(); - if ((Opc == Mips::LW) || (Opc == Mips::LW_P8) || (Opc == Mips::LD) || - (Opc == Mips::LD_P8) || (Opc == Mips::LWC1) || (Opc == Mips::LWC1_P8) || - (Opc == Mips::LDC1) || (Opc == Mips::LDC164) || - (Opc == Mips::LDC164_P8)) { + if ((Opc == Mips::LW) || (Opc == Mips::LD) || + (Opc == Mips::LWC1) || (Opc == Mips::LDC1) || (Opc == Mips::LDC164)) { if ((MI->getOperand(1).isFI()) && // is a stack slot (MI->getOperand(2).isImm()) && // the imm is zero (isZeroImm(MI->getOperand(2)))) { @@ -74,10 +72,8 @@ isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const { unsigned Opc = MI->getOpcode(); - if ((Opc == Mips::SW) || (Opc == Mips::SW_P8) || (Opc == Mips::SD) || - (Opc == Mips::SD_P8) || (Opc == Mips::SWC1) || (Opc == Mips::SWC1_P8) || - (Opc == Mips::SDC1) || (Opc == Mips::SDC164) || - (Opc == Mips::SDC164_P8)) { + if ((Opc == Mips::SW) || (Opc == Mips::SD) || + (Opc == Mips::SWC1) || (Opc == Mips::SDC1) || (Opc == Mips::SDC164)) { if ((MI->getOperand(1).isFI()) && // is a stack slot (MI->getOperand(2).isImm()) && // the imm is zero (isZeroImm(MI->getOperand(2)))) { @@ -186,23 +182,23 @@ storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Opc = 0; if (Mips::GPR32RegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::SW_P8 : Mips::SW; + Opc = Mips::SW; else if (Mips::GPR64RegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::SD_P8 : Mips::SD; + Opc = Mips::SD; else if (Mips::ACC64RegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::STORE_ACC64_P8 : Mips::STORE_ACC64; + Opc = Mips::STORE_ACC64; else if (Mips::ACC64DSPRegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::STORE_ACC64DSP_P8 : Mips::STORE_ACC64DSP; + Opc = Mips::STORE_ACC64DSP; else if (Mips::ACC128RegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::STORE_ACC128_P8 : Mips::STORE_ACC128; + Opc = Mips::STORE_ACC128; else if (Mips::DSPCCRegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::STORE_CCOND_DSP_P8 : Mips::STORE_CCOND_DSP; + Opc = Mips::STORE_CCOND_DSP; else if (Mips::FGR32RegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1; + Opc = Mips::SWC1; else if (Mips::AFGR64RegClass.hasSubClassEq(RC)) Opc = Mips::SDC1; else if (Mips::FGR64RegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::SDC164_P8 : Mips::SDC164; + Opc = Mips::SDC164; assert(Opc && "Register class not handled!"); BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)) @@ -219,23 +215,23 @@ loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Opc = 0; if (Mips::GPR32RegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::LW_P8 : Mips::LW; + Opc = Mips::LW; else if (Mips::GPR64RegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::LD_P8 : Mips::LD; + Opc = Mips::LD; else if (Mips::ACC64RegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::LOAD_ACC64_P8 : Mips::LOAD_ACC64; + Opc = Mips::LOAD_ACC64; else if (Mips::ACC64DSPRegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::LOAD_ACC64DSP_P8 : Mips::LOAD_ACC64DSP; + Opc = Mips::LOAD_ACC64DSP; else if (Mips::ACC128RegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::LOAD_ACC128_P8 : Mips::LOAD_ACC128; + Opc = Mips::LOAD_ACC128; else if (Mips::DSPCCRegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::LOAD_CCOND_DSP_P8 : Mips::LOAD_CCOND_DSP; + Opc = Mips::LOAD_CCOND_DSP; else if (Mips::FGR32RegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1; + Opc = Mips::LWC1; else if (Mips::AFGR64RegClass.hasSubClassEq(RC)) Opc = Mips::LDC1; else if (Mips::FGR64RegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164; + Opc = Mips::LDC164; assert(Opc && "Register class not handled!"); BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(Offset) -- cgit v1.1 From 3531db14c61957e7ad00ce972e9685864c3887da Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Tue, 20 Aug 2013 22:58:56 +0000 Subject: [mips] Define register class FGRH32 for the high half of the 64-bit floating point registers. We will need this register class later when we add definitions for instructions mfhc1 and mthc1. Also, remove sub-register indices sub_fpeven and sub_fpodd and use sub_lo and sub_hi instead. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188842 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 15 +++++++++++++++ lib/Target/Mips/Disassembler/MipsDisassembler.cpp | 17 +++++++++++++++++ lib/Target/Mips/MipsInstrFPU.td | 2 +- lib/Target/Mips/MipsRegisterInfo.td | 23 ++++++++++++++++++----- lib/Target/Mips/MipsSEFrameLowering.cpp | 4 ++-- lib/Target/Mips/MipsSEInstrInfo.cpp | 15 +++++++-------- 6 files changed, 60 insertions(+), 16 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index c9bc29a..e92d58a 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -110,6 +110,9 @@ class MipsAsmParser : public MCTargetAsmParser { parseFGR32Regs(SmallVectorImpl &Operands); MipsAsmParser::OperandMatchResultTy + parseFGRH32Regs(SmallVectorImpl &Operands); + + MipsAsmParser::OperandMatchResultTy parseFCCRegs(SmallVectorImpl &Operands); MipsAsmParser::OperandMatchResultTy @@ -224,6 +227,7 @@ public: Kind_GPR64, Kind_HWRegs, Kind_FGR32Regs, + Kind_FGRH32Regs, Kind_FGR64Regs, Kind_AFGR64Regs, Kind_CCRRegs, @@ -408,6 +412,10 @@ public: return (Kind == k_Register) && Reg.Kind == Kind_FGR32Regs; } + bool isFGRH32Asm() const { + return (Kind == k_Register) && Reg.Kind == Kind_FGRH32Regs; + } + bool isFCCRegsAsm() const { return (Kind == k_Register) && Reg.Kind == Kind_FCCRegs; } @@ -893,6 +901,7 @@ int MipsAsmParser::regKindToRegClass(int RegKind) { case MipsOperand::Kind_GPR64: return Mips::GPR64RegClassID; case MipsOperand::Kind_HWRegs: return Mips::HWRegsRegClassID; case MipsOperand::Kind_FGR32Regs: return Mips::FGR32RegClassID; + case MipsOperand::Kind_FGRH32Regs: return Mips::FGRH32RegClassID; case MipsOperand::Kind_FGR64Regs: return Mips::FGR64RegClassID; case MipsOperand::Kind_AFGR64Regs: return Mips::AFGR64RegClassID; case MipsOperand::Kind_CCRRegs: return Mips::CCRRegClassID; @@ -1310,6 +1319,7 @@ MipsAsmParser::parseRegs(SmallVectorImpl &Operands, case MipsOperand::Kind_AFGR64Regs: case MipsOperand::Kind_FGR64Regs: case MipsOperand::Kind_FGR32Regs: + case MipsOperand::Kind_FGRH32Regs: RegNum = matchFPURegisterName(RegName); if (RegKind == MipsOperand::Kind_AFGR64Regs) RegNum /= 2; @@ -1416,6 +1426,11 @@ MipsAsmParser::parseFGR32Regs(SmallVectorImpl &Operands) { } MipsAsmParser::OperandMatchResultTy +MipsAsmParser::parseFGRH32Regs(SmallVectorImpl &Operands) { + return parseRegs(Operands, (int) MipsOperand::Kind_FGRH32Regs); +} + +MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseFCCRegs(SmallVectorImpl &Operands) { return parseRegs(Operands, (int) MipsOperand::Kind_FCCRegs); } diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index 1f2d210..c6f3bab 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -118,6 +118,11 @@ static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeFGRH32RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + static DecodeStatus DecodeCCRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, @@ -390,6 +395,18 @@ static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeFGRH32RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + unsigned Reg = getReg(Decoder, Mips::FGRH32RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Reg)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeCCRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index 28fdbf5..92a828c 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -576,7 +576,7 @@ let Predicates = [IsFP64bit, HasStdEnc] in { def : MipsPat<(f64 (sint_to_fp GPR32Opnd:$src)), (PseudoCVT_D64_W GPR32Opnd:$src)>; def : MipsPat<(f32 (sint_to_fp GPR64Opnd:$src)), - (EXTRACT_SUBREG (PseudoCVT_S_L GPR64Opnd:$src), sub_32)>; + (EXTRACT_SUBREG (PseudoCVT_S_L GPR64Opnd:$src), sub_lo)>; def : MipsPat<(f64 (sint_to_fp GPR64Opnd:$src)), (PseudoCVT_D64_L GPR64Opnd:$src)>; diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index b7c005f..1125c2e 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -11,8 +11,6 @@ // Declarations that describe the MIPS register file //===----------------------------------------------------------------------===// let Namespace = "Mips" in { -def sub_fpeven : SubRegIndex<32>; -def sub_fpodd : SubRegIndex<32, 32>; def sub_32 : SubRegIndex<32>; def sub_64 : SubRegIndex<64>; def sub_lo : SubRegIndex<32>; @@ -55,13 +53,13 @@ class FPR Enc, string n> : MipsReg; // Mips 64-bit (aliased) FPU Registers class AFPR Enc, string n, list subregs> : MipsRegWithSubRegs { - let SubRegIndices = [sub_fpeven, sub_fpodd]; + let SubRegIndices = [sub_lo, sub_hi]; let CoveredBySubRegs = 1; } class AFPR64 Enc, string n, list subregs> : MipsRegWithSubRegs { - let SubRegIndices = [sub_32]; + let SubRegIndices = [sub_lo, sub_hi]; } // Mips 128-bit (aliased) MSA Registers @@ -157,6 +155,10 @@ let Namespace = "Mips" in { foreach I = 0-31 in def F#I : FPR, DwarfRegNum<[!add(I, 32)]>; + // Higher half of 64-bit FP registers. + foreach I = 0-31 in + def F_HI#I : FPR, DwarfRegNum<[!add(I, 32)]>; + /// Mips Double point precision FPU Registers (aliased /// with the single precision to hold 64 bit values) foreach I = 0-15 in @@ -166,7 +168,7 @@ let Namespace = "Mips" in { /// Mips Double point precision FPU Registers in MFP64 mode. foreach I = 0-31 in - def D#I#_64 : AFPR64("F"#I)]>, + def D#I#_64 : AFPR64("F"#I), !cast("F_HI"#I)]>, DwarfRegNum<[!add(I, 32)]>; /// Mips MSA registers @@ -321,6 +323,8 @@ def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>, Unallocatable; // * FGR32 - 32 32-bit registers (single float only mode) def FGR32 : RegisterClass<"Mips", [f32], 32, (sequence "F%u", 0, 31)>; +def FGRH32 : RegisterClass<"Mips", [f32], 32, (sequence "F_HI%u", 0, 31)>; + def AFGR64 : RegisterClass<"Mips", [f64], 64, (add // Return Values and Arguments D0, D1, @@ -423,6 +427,11 @@ def FGR32AsmOperand : MipsAsmRegOperand { let ParserMethod = "parseFGR32Regs"; } +def FGRH32AsmOperand : MipsAsmRegOperand { + let Name = "FGRH32Asm"; + let ParserMethod = "parseFGRH32Regs"; +} + def FCCRegsAsmOperand : MipsAsmRegOperand { let Name = "FCCRegsAsm"; let ParserMethod = "parseFCCRegs"; @@ -465,6 +474,10 @@ def FGR32Opnd : RegisterOperand { let ParserMatchClass = FGR32AsmOperand; } +def FGRH32Opnd : RegisterOperand { + let ParserMatchClass = FGRH32AsmOperand; +} + def FCCRegsOpnd : RegisterOperand { let ParserMatchClass = FCCRegsAsmOperand; } diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index edbbe5d..ab69453 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -313,9 +313,9 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const { // one for each of the paired single precision registers. if (Mips::AFGR64RegClass.contains(Reg)) { unsigned Reg0 = - MRI->getDwarfRegNum(RegInfo.getSubReg(Reg, Mips::sub_fpeven), true); + MRI->getDwarfRegNum(RegInfo.getSubReg(Reg, Mips::sub_lo), true); unsigned Reg1 = - MRI->getDwarfRegNum(RegInfo.getSubReg(Reg, Mips::sub_fpodd), true); + MRI->getDwarfRegNum(RegInfo.getSubReg(Reg, Mips::sub_hi), true); if (!STI.isLittle()) std::swap(Reg0, Reg1); diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index d0c8e62..86431a2 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -404,16 +404,15 @@ void MipsSEInstrInfo::expandCvtFPInt(MachineBasicBlock &MBB, unsigned DstReg = Dst.getReg(), SrcReg = Src.getReg(), TmpReg = DstReg; unsigned KillSrc = getKillRegState(Src.isKill()); DebugLoc DL = I->getDebugLoc(); - unsigned SubIdx = (IsI64 ? Mips::sub_32 : Mips::sub_fpeven); bool DstIsLarger, SrcIsLarger; tie(DstIsLarger, SrcIsLarger) = compareOpndSize(CvtOpc, *MBB.getParent()); if (DstIsLarger) - TmpReg = getRegisterInfo().getSubReg(DstReg, SubIdx); + TmpReg = getRegisterInfo().getSubReg(DstReg, Mips::sub_lo); if (SrcIsLarger) - DstReg = getRegisterInfo().getSubReg(DstReg, SubIdx); + DstReg = getRegisterInfo().getSubReg(DstReg, Mips::sub_lo); BuildMI(MBB, I, DL, MovDesc, TmpReg).addReg(SrcReg, KillSrc); BuildMI(MBB, I, DL, CvtDesc, DstReg).addReg(TmpReg, RegState::Kill); @@ -428,7 +427,7 @@ void MipsSEInstrInfo::expandExtractElementF64(MachineBasicBlock &MBB, DebugLoc dl = I->getDebugLoc(); assert(N < 2 && "Invalid immediate"); - unsigned SubIdx = N ? Mips::sub_fpodd : Mips::sub_fpeven; + unsigned SubIdx = N ? Mips::sub_hi : Mips::sub_lo; unsigned SubReg = getRegisterInfo().getSubReg(SrcReg, SubIdx); BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(SubReg); @@ -444,9 +443,9 @@ void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB, // mtc1 Lo, $fp // mtc1 Hi, $fp + 1 - BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_fpeven)) + BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_lo)) .addReg(LoReg); - BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_fpodd)) + BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_hi)) .addReg(HiReg); } @@ -482,8 +481,8 @@ void MipsSEInstrInfo::expandDPLoadStore(MachineBasicBlock &MBB, const TargetRegisterInfo &TRI = getRegisterInfo(); const MachineOperand &ValReg = I->getOperand(0); - unsigned LoReg = TRI.getSubReg(ValReg.getReg(), Mips::sub_fpeven); - unsigned HiReg = TRI.getSubReg(ValReg.getReg(), Mips::sub_fpodd); + unsigned LoReg = TRI.getSubReg(ValReg.getReg(), Mips::sub_lo); + unsigned HiReg = TRI.getSubReg(ValReg.getReg(), Mips::sub_hi); if (!TM.getSubtarget().isLittle()) std::swap(LoReg, HiReg); -- cgit v1.1 From c89cb45ecb861aa599a8b1c735b0ce4cd73e1397 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Tue, 20 Aug 2013 23:21:55 +0000 Subject: [mips] Remove predicates that were incorrectly or unnecessarily added. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188845 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips64InstrInfo.td | 2 +- lib/Target/Mips/MipsCallingConv.td | 2 +- lib/Target/Mips/MipsInstrFPU.td | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 659a7a2..11efdb7 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -288,7 +288,7 @@ defm : SetgeImmPats; // truncate def : MipsPat<(i32 (trunc GPR64:$src)), (SLL (EXTRACT_SUBREG GPR64:$src, sub_32), 0)>, - Requires<[IsN64, HasStdEnc]>; + Requires<[HasStdEnc]>; // 32-to-64-bit extension def : MipsPat<(i64 (anyext GPR32:$src)), (SLL64_32 GPR32:$src)>; diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td index ac40b11..ed515ef 100644 --- a/lib/Target/Mips/MipsCallingConv.td +++ b/lib/Target/Mips/MipsCallingConv.td @@ -27,7 +27,7 @@ def RetCC_MipsO32 : CallingConv<[ CCIfType<[f32], CCAssignToReg<[F0, F2]>>, // f64 are returned in register D0, D1 - CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToReg<[D0, D1]>>> + CCIfType<[f64], CCAssignToReg<[D0, D1]>> ]>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index 92a828c..aef1ace 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -358,12 +358,12 @@ let Predicates = [HasStdEnc] in { def SWC1 : SW_FT<"swc1", FGR32Opnd, IIFStore, store>, LW_FM<0x39>; } -let Predicates = [HasMips64, HasStdEnc], DecoderNamespace = "Mips64" in { +let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in { def LDC164 : LW_FT<"ldc1", FGR64Opnd, IIFLoad, load>, LW_FM<0x35>; def SDC164 : SW_FT<"sdc1", FGR64Opnd, IIFStore, store>, LW_FM<0x3d>; } -let Predicates = [NotMips64, HasStdEnc] in { +let Predicates = [NotFP64bit, HasStdEnc] in { let isPseudo = 1, isCodeGenOnly = 1 in { def PseudoLDC1 : LW_FT<"", AFGR64Opnd, IIFLoad, load>; def PseudoSDC1 : SW_FT<"", AFGR64Opnd, IIFStore, store>; @@ -600,12 +600,12 @@ let AddedComplexity = 40 in { def : StoreRegImmPat; } - let Predicates = [HasMips64, HasStdEnc] in { + let Predicates = [IsFP64bit, HasStdEnc] in { def : LoadRegImmPat; def : StoreRegImmPat; } - let Predicates = [NotMips64, HasStdEnc] in { + let Predicates = [NotFP64bit, HasStdEnc] in { def : LoadRegImmPat; def : StoreRegImmPat; } -- cgit v1.1 From ad341d48f0fc131d1c31a0c824736e70c34e0476 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Tue, 20 Aug 2013 23:38:40 +0000 Subject: [mips] Add support for calling convention CC_MipsO32_FP64, which is used when the size of floating point registers is 64-bit. Test case will be added when support for mfhc1 and mthc1 is added. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188847 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsCallingConv.td | 20 +++++++++++++--- lib/Target/Mips/MipsISelLowering.cpp | 42 +++++++++++++++++++++++----------- lib/Target/Mips/MipsISelLowering.h | 7 +++--- lib/Target/Mips/MipsRegisterInfo.cpp | 30 +++++++++++++++--------- lib/Target/Mips/MipsSEISelLowering.cpp | 2 +- lib/Target/Mips/MipsSubtarget.h | 1 + 6 files changed, 70 insertions(+), 32 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td index ed515ef..66391cb 100644 --- a/lib/Target/Mips/MipsCallingConv.td +++ b/lib/Target/Mips/MipsCallingConv.td @@ -26,8 +26,10 @@ def RetCC_MipsO32 : CallingConv<[ // f32 are returned in registers F0, F2 CCIfType<[f32], CCAssignToReg<[F0, F2]>>, - // f64 are returned in register D0, D1 - CCIfType<[f64], CCAssignToReg<[D0, D1]>> + // f64 arguments are returned in D0_64 and D1_64 in FP64bit mode or + // in D0 and D1 in FP32bit mode. + CCIfType<[f64], CCIfSubtarget<"isFP64bit()", CCAssignToReg<[D0_64, D1_64]>>>, + CCIfType<[f64], CCIfSubtarget<"isNotFP64bit()", CCAssignToReg<[D0, D1]>>> ]>; //===----------------------------------------------------------------------===// @@ -149,7 +151,16 @@ def RetCC_MipsEABI : CallingConv<[ //===----------------------------------------------------------------------===// def CC_MipsO32_FastCC : CallingConv<[ // f64 arguments are passed in double-precision floating pointer registers. - CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7, D8, D9]>>, + CCIfType<[f64], CCIfSubtarget<"isNotFP64bit()", + CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7, + D8, D9]>>>, + CCIfType<[f64], CCIfSubtarget<"isFP64bit()", + CCAssignToReg<[D0_64, D1_64, D2_64, D3_64, + D4_64, D5_64, D6_64, D7_64, + D8_64, D9_64, D10_64, D11_64, + D12_64, D13_64, D14_64, D15_64, + D16_64, D17_64, D18_64, + D19_64]>>>, // Stack parameter slots for f64 are 64-bit doublewords and 8-byte aligned. CCIfType<[f64], CCAssignToStack<8, 8>> @@ -224,6 +235,9 @@ def CSR_SingleFloatOnly : CalleeSavedRegs<(add (sequence "F%u", 31, 20), RA, FP, def CSR_O32 : CalleeSavedRegs<(add (sequence "D%u", 15, 10), RA, FP, (sequence "S%u", 7, 0))>; +def CSR_O32_FP64 : CalleeSavedRegs<(add (sequence "D%u_64", 31, 20), RA, FP, + (sequence "S%u", 7, 0))>; + def CSR_N32 : CalleeSavedRegs<(add D31_64, D29_64, D27_64, D25_64, D24_64, D23_64, D22_64, D21_64, RA_64, FP_64, GP_64, (sequence "S%u_64", 7, 0))>; diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 3601a13..c13f53a 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -2119,7 +2119,8 @@ SDValue MipsTargetLowering::lowerFP_TO_SINT(SDValue Op, static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State) { + ISD::ArgFlagsTy ArgFlags, CCState &State, + const uint16_t *F64Regs) { static const unsigned IntRegsSize=4, FloatRegsSize=2; @@ -2129,9 +2130,6 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, static const uint16_t F32Regs[] = { Mips::F12, Mips::F14 }; - static const uint16_t F64Regs[] = { - Mips::D6, Mips::D7 - }; // Do not process byval args here. if (ArgFlags.isByVal()) @@ -2200,6 +2198,22 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, return false; } +static bool CC_MipsO32_FP32(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + static const uint16_t F64Regs[] = { Mips::D6, Mips::D7 }; + + return CC_MipsO32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, F64Regs); +} + +static bool CC_MipsO32_FP64(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + static const uint16_t F64Regs[] = { Mips::D12_64, Mips::D12_64 }; + + return CC_MipsO32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, F64Regs); +} + #include "MipsGenCallingConv.inc" //===----------------------------------------------------------------------===// @@ -2312,7 +2326,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, getTargetMachine(), ArgLocs, *DAG.getContext()); MipsCC::SpecialCallingConvType SpecialCallingConv = getSpecialCallingConv(Callee); - MipsCC MipsCCInfo(CallConv, IsO32, CCInfo, SpecialCallingConv); + MipsCC MipsCCInfo(CallConv, IsO32, Subtarget->isFP64bit(), CCInfo, + SpecialCallingConv); MipsCCInfo.analyzeCallOperands(Outs, IsVarArg, getTargetMachine().Options.UseSoftFloat, @@ -2499,7 +2514,7 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, SmallVector RVLocs; CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), getTargetMachine(), RVLocs, *DAG.getContext()); - MipsCC MipsCCInfo(CallConv, IsO32, CCInfo); + MipsCC MipsCCInfo(CallConv, IsO32, Subtarget->isFP64bit(), CCInfo); MipsCCInfo.analyzeCallResult(Ins, getTargetMachine().Options.UseSoftFloat, CallNode, RetTy); @@ -2546,7 +2561,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, SmallVector ArgLocs; CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); - MipsCC MipsCCInfo(CallConv, IsO32, CCInfo); + MipsCC MipsCCInfo(CallConv, IsO32, Subtarget->isFP64bit(), CCInfo); Function::const_arg_iterator FuncArg = DAG.getMachineFunction().getFunction()->arg_begin(); bool UseSoftFloat = getTargetMachine().Options.UseSoftFloat; @@ -2590,7 +2605,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, else if (RegVT == MVT::f32) RC = &Mips::FGR32RegClass; else if (RegVT == MVT::f64) - RC = HasMips64 ? &Mips::FGR64RegClass : &Mips::AFGR64RegClass; + RC = Subtarget->isFP64bit() ? &Mips::FGR64RegClass : + &Mips::AFGR64RegClass; else llvm_unreachable("RegVT not supported by FormalArguments Lowering"); @@ -2705,7 +2721,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain, // CCState - Info about the registers and stack slot. CCState CCInfo(CallConv, IsVarArg, MF, getTargetMachine(), RVLocs, *DAG.getContext()); - MipsCC MipsCCInfo(CallConv, IsO32, CCInfo); + MipsCC MipsCCInfo(CallConv, IsO32, Subtarget->isFP64bit(), CCInfo); // Analyze return values. MipsCCInfo.analyzeReturn(Outs, getTargetMachine().Options.UseSoftFloat, @@ -3178,9 +3194,9 @@ MipsTargetLowering::MipsCC::SpecialCallingConvType } MipsTargetLowering::MipsCC::MipsCC( - CallingConv::ID CC, bool IsO32_, CCState &Info, + CallingConv::ID CC, bool IsO32_, bool IsFP64_, CCState &Info, MipsCC::SpecialCallingConvType SpecialCallingConv_) - : CCInfo(Info), CallConv(CC), IsO32(IsO32_), + : CCInfo(Info), CallConv(CC), IsO32(IsO32_), IsFP64(IsFP64_), SpecialCallingConv(SpecialCallingConv_){ // Pre-allocate reserved argument area. CCInfo.AllocateStack(reservedArgArea(), 1); @@ -3336,11 +3352,11 @@ llvm::CCAssignFn *MipsTargetLowering::MipsCC::fixedArgFn() const { if (SpecialCallingConv == Mips16RetHelperConv) return CC_Mips16RetHelper; - return IsO32 ? CC_MipsO32 : CC_MipsN; + return IsO32 ? (IsFP64 ? CC_MipsO32_FP64 : CC_MipsO32_FP32) : CC_MipsN; } llvm::CCAssignFn *MipsTargetLowering::MipsCC::varArgFn() const { - return IsO32 ? CC_MipsO32 : CC_MipsN_VarArg; + return IsO32 ? (IsFP64 ? CC_MipsO32_FP64 : CC_MipsO32_FP32) : CC_MipsN_VarArg; } const uint16_t *MipsTargetLowering::MipsCC::shadowRegs() const { diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 686a382..29671b0 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -244,9 +244,8 @@ namespace llvm { Mips16RetHelperConv, NoSpecialCallingConv }; - MipsCC( - CallingConv::ID CallConv, bool IsO32, CCState &Info, - SpecialCallingConvType SpecialCallingConv = NoSpecialCallingConv); + MipsCC(CallingConv::ID CallConv, bool IsO32, bool IsFP64, CCState &Info, + SpecialCallingConvType SpecialCallingConv = NoSpecialCallingConv); void analyzeCallOperands(const SmallVectorImpl &Outs, @@ -319,7 +318,7 @@ namespace llvm { CCState &CCInfo; CallingConv::ID CallConv; - bool IsO32; + bool IsO32, IsFP64; SpecialCallingConvType SpecialCallingConv; SmallVector ByValArgs; }; diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 26891ef..03d09a6 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -83,26 +83,34 @@ const uint16_t* MipsRegisterInfo:: getCalleeSavedRegs(const MachineFunction *MF) const { if (Subtarget.isSingleFloat()) return CSR_SingleFloatOnly_SaveList; - else if (!Subtarget.hasMips64()) - return CSR_O32_SaveList; - else if (Subtarget.isABI_N32()) + + if (Subtarget.isABI_N64()) + return CSR_N64_SaveList; + + if (Subtarget.isABI_N32()) return CSR_N32_SaveList; - assert(Subtarget.isABI_N64()); - return CSR_N64_SaveList; + if (Subtarget.isFP64bit()) + return CSR_O32_FP64_SaveList; + + return CSR_O32_SaveList; } const uint32_t* MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const { if (Subtarget.isSingleFloat()) return CSR_SingleFloatOnly_RegMask; - else if (!Subtarget.hasMips64()) - return CSR_O32_RegMask; - else if (Subtarget.isABI_N32()) + + if (Subtarget.isABI_N64()) + return CSR_N64_RegMask; + + if (Subtarget.isABI_N32()) return CSR_N32_RegMask; - assert(Subtarget.isABI_N64()); - return CSR_N64_RegMask; + if (Subtarget.isFP64bit()) + return CSR_O32_FP64_RegMask; + + return CSR_O32_RegMask; } const uint32_t *MipsRegisterInfo::getMips16RetHelperMask() { @@ -128,7 +136,7 @@ getReservedRegs(const MachineFunction &MF) const { for (unsigned I = 0; I < array_lengthof(ReservedGPR64); ++I) Reserved.set(ReservedGPR64[I]); - if (Subtarget.hasMips64()) { + if (Subtarget.isFP64bit()) { // Reserve all registers in AFGR64. for (RegIter Reg = Mips::AFGR64RegClass.begin(), EReg = Mips::AFGR64RegClass.end(); Reg != EReg; ++Reg) diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 750ec0e..fb72251 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -92,7 +92,7 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) // When dealing with single precision only, use libcalls if (!Subtarget->isSingleFloat()) { - if (HasMips64) + if (Subtarget->isFP64bit()) addRegisterClass(MVT::f64, &Mips::FGR64RegClass); else addRegisterClass(MVT::f64, &Mips::AFGR64RegClass); diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index 21d6938..03bef69 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -160,6 +160,7 @@ public: bool isLittle() const { return IsLittle; } bool isFP64bit() const { return IsFP64bit; } + bool isNotFP64bit() const { return !IsFP64bit; } bool isGP64bit() const { return IsGP64bit; } bool isGP32bit() const { return !IsGP64bit; } bool isSingleFloat() const { return IsSingleFloat; } -- cgit v1.1 From b1f4f120a50c392c85c6b4388d63e36251fce279 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Tue, 20 Aug 2013 23:47:25 +0000 Subject: [mips] Add support for mfhc1 and mthc1. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188848 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsInstrFPU.td | 29 ++++++++++++++++++++--------- lib/Target/Mips/MipsSEInstrInfo.cpp | 31 +++++++++++++++++++++++-------- lib/Target/Mips/MipsSEInstrInfo.h | 4 ++-- 3 files changed, 45 insertions(+), 19 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index aef1ace..ccf3458 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -338,6 +338,10 @@ def MFC1 : MFC1_FT<"mfc1", GPR32Opnd, FGR32Opnd, IIFmoveC1, bitconvert>, MFC1_FM<0>; def MTC1 : MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd, IIFmoveC1, bitconvert>, MFC1_FM<4>; +def MFHC1 : MFC1_FT<"mfhc1", GPR32Opnd, FGRH32Opnd, IIFmoveC1>, + MFC1_FM<3>; +def MTHC1 : MTC1_FT<"mthc1", FGRH32Opnd, GPR32Opnd, IIFmoveC1>, + MFC1_FM<7>; def DMFC1 : MFC1_FT<"dmfc1", GPR64Opnd, FGR64Opnd, IIFmoveC1, bitconvert>, MFC1_FM<1>; def DMTC1 : MTC1_FT<"dmtc1", FGR64Opnd, GPR64Opnd, IIFmoveC1, @@ -526,20 +530,27 @@ def FCMP_D64 : CEQS_FT<"d", FGR64, IIFcmp, MipsFPCmp>, CEQS_FM<17>, // This pseudo instr gets expanded into 2 mtc1 instrs after register // allocation. -def BuildPairF64 : - PseudoSE<(outs AFGR64Opnd:$dst), - (ins GPR32Opnd:$lo, GPR32Opnd:$hi), - [(set AFGR64Opnd:$dst, - (MipsBuildPairF64 GPR32Opnd:$lo, GPR32Opnd:$hi))]>; +class BuildPairF64Base : + PseudoSE<(outs RO:$dst), (ins GPR32Opnd:$lo, GPR32Opnd:$hi), + [(set RO:$dst, (MipsBuildPairF64 GPR32Opnd:$lo, GPR32Opnd:$hi))]>; + +def BuildPairF64 : BuildPairF64Base, + Requires<[NotFP64bit, HasStdEnc]>; +def BuildPairF64_64 : BuildPairF64Base, + Requires<[IsFP64bit, HasStdEnc]>; // This pseudo instr gets expanded into 2 mfc1 instrs after register // allocation. // if n is 0, lower part of src is extracted. // if n is 1, higher part of src is extracted. -def ExtractElementF64 : - PseudoSE<(outs GPR32Opnd:$dst), (ins AFGR64Opnd:$src, i32imm:$n), - [(set GPR32Opnd:$dst, - (MipsExtractElementF64 AFGR64Opnd:$src, imm:$n))]>; +class ExtractElementF64Base : + PseudoSE<(outs GPR32Opnd:$dst), (ins RO:$src, i32imm:$n), + [(set GPR32Opnd:$dst, (MipsExtractElementF64 RO:$src, imm:$n))]>; + +def ExtractElementF64 : ExtractElementF64Base, + Requires<[NotFP64bit, HasStdEnc]>; +def ExtractElementF64_64 : ExtractElementF64Base, + Requires<[IsFP64bit, HasStdEnc]>; //===----------------------------------------------------------------------===// // InstAliases. diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index 86431a2..a81db7f 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -263,10 +263,16 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { expandCvtFPInt(MBB, MI, Mips::CVT_D64_L, Mips::DMTC1, true); break; case Mips::BuildPairF64: - expandBuildPairF64(MBB, MI); + expandBuildPairF64(MBB, MI, false); + break; + case Mips::BuildPairF64_64: + expandBuildPairF64(MBB, MI, true); break; case Mips::ExtractElementF64: - expandExtractElementF64(MBB, MI); + expandExtractElementF64(MBB, MI, false); + break; + case Mips::ExtractElementF64_64: + expandExtractElementF64(MBB, MI, true); break; case Mips::PseudoLDC1: expandDPLoadStore(MBB, MI, Mips::LDC1, Mips::LWC1); @@ -419,22 +425,26 @@ void MipsSEInstrInfo::expandCvtFPInt(MachineBasicBlock &MBB, } void MipsSEInstrInfo::expandExtractElementF64(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { + MachineBasicBlock::iterator I, + bool FP64) const { unsigned DstReg = I->getOperand(0).getReg(); unsigned SrcReg = I->getOperand(1).getReg(); unsigned N = I->getOperand(2).getImm(); - const MCInstrDesc& Mfc1Tdd = get(Mips::MFC1); DebugLoc dl = I->getDebugLoc(); assert(N < 2 && "Invalid immediate"); unsigned SubIdx = N ? Mips::sub_hi : Mips::sub_lo; unsigned SubReg = getRegisterInfo().getSubReg(SrcReg, SubIdx); - BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(SubReg); + if (SubIdx == Mips::sub_hi && FP64) + BuildMI(MBB, I, dl, get(Mips::MFHC1), DstReg).addReg(SubReg); + else + BuildMI(MBB, I, dl, get(Mips::MFC1), DstReg).addReg(SubReg); } void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { + MachineBasicBlock::iterator I, + bool FP64) const { unsigned DstReg = I->getOperand(0).getReg(); unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg(); const MCInstrDesc& Mtc1Tdd = get(Mips::MTC1); @@ -445,8 +455,13 @@ void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB, // mtc1 Hi, $fp + 1 BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_lo)) .addReg(LoReg); - BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_hi)) - .addReg(HiReg); + + if (FP64) + BuildMI(MBB, I, dl, get(Mips::MTHC1), TRI.getSubReg(DstReg, Mips::sub_hi)) + .addReg(HiReg); + else + BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_hi)) + .addReg(HiReg); } /// Add 4 to the displacement of operand MO. diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h index d962ef0..6b4f89a 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.h +++ b/lib/Target/Mips/MipsSEInstrInfo.h @@ -101,9 +101,9 @@ private: unsigned CvtOpc, unsigned MovOpc, bool IsI64) const; void expandExtractElementF64(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; + MachineBasicBlock::iterator I, bool FP64) const; void expandBuildPairF64(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; + MachineBasicBlock::iterator I, bool FP64) const; void expandDPLoadStore(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned OpcD, unsigned OpcS) const; -- cgit v1.1 From d22b327b3d8fafade61fa2b4aaba5c9f3ee10d4d Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Wed, 21 Aug 2013 01:18:46 +0000 Subject: [micromips] Print instruction alias "not" if the last operand of a nor is zero. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188851 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/Target') diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp index 369fece..755a948 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp @@ -257,6 +257,7 @@ bool MipsInstPrinter::printAlias(const MCInst &MI, raw_ostream &OS) { // jalr $ra, $r1 => jalr $r1 return isReg(MI, 0) && printAlias("jalr", MI, 1, OS); case Mips::NOR: + case Mips::NOR_MM: // nor $r0, $r1, $zero => not $r0, $r1 return isReg(MI, 2) && printAlias("not", MI, 0, 1, OS); case Mips::NOR64: -- cgit v1.1 From d5a2eb092580397dbf0b791a4e4530ad12acb795 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Wed, 21 Aug 2013 02:37:25 +0000 Subject: X86TargetMachine.cpp: Clarify to emit GOT in i686-{cygming|win32}-elf for mcjit. I suppose all "lli -use-mcjit i686-*" should require GOT, (and to fail.) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188856 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86TargetMachine.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 49ebd1a..81423a3 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -92,7 +92,7 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, } else if (Subtarget.is64Bit()) { // PIC in 64 bit mode is always rip-rel. Subtarget.setPICStyle(PICStyles::RIPRel); - } else if (Subtarget.isTargetCygMing()) { + } else if (Subtarget.isTargetCOFF()) { Subtarget.setPICStyle(PICStyles::None); } else if (Subtarget.isTargetDarwin()) { if (getRelocationModel() == Reloc::PIC_) -- cgit v1.1 From 33b5fe7f160886a0718e8cad4ac0d896d5d5c46f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 21 Aug 2013 03:57:57 +0000 Subject: Rename mattr names for AVX-512 to from avx-512 -> avx512f, avx-512-pfi -> av512pf, avx-512-cdi -> avx512cd, avx-512-eri->avx512er. This matches better with official docs and what gcc patches appearto be using. I didn't touch the has* functions or the feature flag names to avoid change the td and lowering file while commits are still happening. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188859 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86.td | 8 ++++---- lib/Target/X86/X86Subtarget.cpp | 2 +- lib/Target/X86/X86Subtarget.h | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 461ea9b..ca49482 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -86,16 +86,16 @@ def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX", def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2", "Enable AVX2 instructions", [FeatureAVX]>; -def FeatureAVX512 : SubtargetFeature<"avx-512", "X86SSELevel", "AVX512", +def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F", "Enable AVX-512 instructions", [FeatureAVX2]>; -def FeatureERI : SubtargetFeature<"avx-512-eri", "HasERI", "true", +def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true", "Enable AVX-512 Exponential and Reciprocal Instructions", [FeatureAVX512]>; -def FeatureCDI : SubtargetFeature<"avx-512-cdi", "HasCDI", "true", +def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true", "Enable AVX-512 Conflict Detection Instructions", [FeatureAVX512]>; -def FeaturePFI : SubtargetFeature<"avx-512-pfi", "HasPFI", "true", +def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true", "Enable AVX-512 PreFetch Instructions", [FeatureAVX512]>; diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index d428d44..a887b81 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -352,7 +352,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { ToggleFeature(X86::FeatureRTM); } if (IsIntel && ((EBX >> 16) & 0x1)) { - X86SSELevel = AVX512; + X86SSELevel = AVX512F; ToggleFeature(X86::FeatureAVX512); } if (IsIntel && ((EBX >> 18) & 0x1)) { diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 8793238..0c1e999 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -42,7 +42,7 @@ enum Style { class X86Subtarget : public X86GenSubtargetInfo { protected: enum X86SSEEnum { - NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512 + NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F }; enum X863DNowEnum { @@ -258,7 +258,7 @@ public: bool hasSSE42() const { return X86SSELevel >= SSE42; } bool hasAVX() const { return X86SSELevel >= AVX; } bool hasAVX2() const { return X86SSELevel >= AVX2; } - bool hasAVX512() const { return X86SSELevel >= AVX512; } + bool hasAVX512() const { return X86SSELevel >= AVX512F; } bool hasFp256() const { return hasAVX(); } bool hasInt256() const { return hasAVX2(); } bool hasSSE4A() const { return HasSSE4A; } -- cgit v1.1 From ec7b5e9290bc0e1b266525a4757773e9ab87847e Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Wed, 21 Aug 2013 05:03:10 +0000 Subject: In LLVM FMA3 operands are dst, src1, src2, src3, however dst is not encoded as it is always src1. This was causing the encoding of the operands to be off by one. Patch by Chris Bieneman. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188866 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86CodeEmitter.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index 5d72b44..0f9c562 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -985,8 +985,14 @@ void Emitter::emitVEXOpcodePrefix(uint64_t TSFlags, if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) VEX_R = 0x0; - if (HasVEX_4V) - VEX_4V = getVEXRegisterEncoding(MI, 1); + if (HasVEX_4V) { + if (HasMemOp4) + VEX_4V = getVEXRegisterEncoding(MI, 1); + else + // FMA3 instructions operands are dst, src1, src2, src3 + // dst and src1 are the same and not encoded separately + VEX_4V = getVEXRegisterEncoding(MI, 2); + } if (X86II::isX86_64ExtendedReg( MI.getOperand(MemOperand+X86::AddrBaseReg).getReg())) -- cgit v1.1 From 5bdf397e25bf0e5a6473f44d49a04e8ff184c442 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 21 Aug 2013 05:57:45 +0000 Subject: Synchronize VEX JIT encoding code with the MCJIT version. Fix a bug in the MCJIT code where CurOp was being incremented even if the operand it was pointing at wasn't used. Maybe only matters if there are any EVEX_K instructions that aren't VEX_4V. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188868 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp | 2 +- lib/Target/X86/X86CodeEmitter.cpp | 21 ++++++++++----------- 2 files changed, 11 insertions(+), 12 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 8515879..7173d51 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -786,8 +786,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, VEX_4V = getVEXRegisterEncoding(MI, CurOp); if (HasEVEX && X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg())) EVEX_V2 = 0x0; + CurOp++; } - CurOp++; if (HasEVEX_K) EVEX_aaa = getWriteMaskRegisterEncoding(MI, CurOp++); diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index 0f9c562..e786ebb 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -982,16 +982,13 @@ void Emitter::emitVEXOpcodePrefix(uint64_t TSFlags, // FMA4: // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM) // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M), - if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_R = 0x0; + CurOp++; if (HasVEX_4V) { - if (HasMemOp4) - VEX_4V = getVEXRegisterEncoding(MI, 1); - else - // FMA3 instructions operands are dst, src1, src2, src3 - // dst and src1 are the same and not encoded separately - VEX_4V = getVEXRegisterEncoding(MI, 2); + VEX_4V = getVEXRegisterEncoding(MI, CurOp); + CurOp++; } if (X86II::isX86_64ExtendedReg( @@ -1002,7 +999,7 @@ void Emitter::emitVEXOpcodePrefix(uint64_t TSFlags, VEX_X = 0x0; if (HasVEX_4VOp3) - VEX_4V = getVEXRegisterEncoding(MI, X86::AddrNumOperands+1); + VEX_4V = getVEXRegisterEncoding(MI, CurOp+X86::AddrNumOperands); break; case X86II::MRM0m: case X86II::MRM1m: case X86II::MRM2m: case X86II::MRM3m: @@ -1012,7 +1009,7 @@ void Emitter::emitVEXOpcodePrefix(uint64_t TSFlags, // MemAddr // src1(VEX_4V), MemAddr if (HasVEX_4V) - VEX_4V = getVEXRegisterEncoding(MI, 0); + VEX_4V = getVEXRegisterEncoding(MI, CurOp++); if (X86II::isX86_64ExtendedReg( MI.getOperand(MemOperand+X86::AddrBaseReg).getReg())) @@ -1065,8 +1062,10 @@ void Emitter::emitVEXOpcodePrefix(uint64_t TSFlags, case X86II::MRM6r: case X86II::MRM7r: // MRM0r-MRM7r instructions forms: // dst(VEX_4V), src(ModR/M), imm8 - VEX_4V = getVEXRegisterEncoding(MI, 0); - if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg())) + VEX_4V = getVEXRegisterEncoding(MI, CurOp); + CurOp++; + + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0x0; break; default: // RawFrm -- cgit v1.1 From 099e5328fcfae96b406782d636fe02a4ecad4552 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Wed, 21 Aug 2013 08:48:25 +0000 Subject: [mips][msa] Define registers using foreach No functional change git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188893 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsRegisterInfo.td | 35 +++-------------------------------- 1 file changed, 3 insertions(+), 32 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index 1125c2e..f2e97b6 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -173,38 +173,9 @@ let Namespace = "Mips" in { /// Mips MSA registers /// MSA and FPU cannot both be present unless the FPU has 64-bit registers - def W0 : AFPR128<0, "w0", [D0_64]>, DwarfRegNum<[32]>; - def W1 : AFPR128<1, "w1", [D1_64]>, DwarfRegNum<[33]>; - def W2 : AFPR128<2, "w2", [D2_64]>, DwarfRegNum<[34]>; - def W3 : AFPR128<3, "w3", [D3_64]>, DwarfRegNum<[35]>; - def W4 : AFPR128<4, "w4", [D4_64]>, DwarfRegNum<[36]>; - def W5 : AFPR128<5, "w5", [D5_64]>, DwarfRegNum<[37]>; - def W6 : AFPR128<6, "w6", [D6_64]>, DwarfRegNum<[38]>; - def W7 : AFPR128<7, "w7", [D7_64]>, DwarfRegNum<[39]>; - def W8 : AFPR128<8, "w8", [D8_64]>, DwarfRegNum<[40]>; - def W9 : AFPR128<9, "w9", [D9_64]>, DwarfRegNum<[41]>; - def W10 : AFPR128<10, "w10", [D10_64]>, DwarfRegNum<[42]>; - def W11 : AFPR128<11, "w11", [D11_64]>, DwarfRegNum<[43]>; - def W12 : AFPR128<12, "w12", [D12_64]>, DwarfRegNum<[44]>; - def W13 : AFPR128<13, "w13", [D13_64]>, DwarfRegNum<[45]>; - def W14 : AFPR128<14, "w14", [D14_64]>, DwarfRegNum<[46]>; - def W15 : AFPR128<15, "w15", [D15_64]>, DwarfRegNum<[47]>; - def W16 : AFPR128<16, "w16", [D16_64]>, DwarfRegNum<[48]>; - def W17 : AFPR128<17, "w17", [D17_64]>, DwarfRegNum<[49]>; - def W18 : AFPR128<18, "w18", [D18_64]>, DwarfRegNum<[50]>; - def W19 : AFPR128<19, "w19", [D19_64]>, DwarfRegNum<[51]>; - def W20 : AFPR128<20, "w20", [D20_64]>, DwarfRegNum<[52]>; - def W21 : AFPR128<21, "w21", [D21_64]>, DwarfRegNum<[53]>; - def W22 : AFPR128<22, "w22", [D22_64]>, DwarfRegNum<[54]>; - def W23 : AFPR128<23, "w23", [D23_64]>, DwarfRegNum<[55]>; - def W24 : AFPR128<24, "w24", [D24_64]>, DwarfRegNum<[56]>; - def W25 : AFPR128<25, "w25", [D25_64]>, DwarfRegNum<[57]>; - def W26 : AFPR128<26, "w26", [D26_64]>, DwarfRegNum<[58]>; - def W27 : AFPR128<27, "w27", [D27_64]>, DwarfRegNum<[59]>; - def W28 : AFPR128<28, "w28", [D28_64]>, DwarfRegNum<[60]>; - def W29 : AFPR128<29, "w29", [D29_64]>, DwarfRegNum<[61]>; - def W30 : AFPR128<30, "w30", [D30_64]>, DwarfRegNum<[62]>; - def W31 : AFPR128<31, "w31", [D31_64]>, DwarfRegNum<[63]>; + foreach I = 0-31 in + def W#I : AFPR128<0, "w"#I, [!cast("D"#I#"_64")]>, + DwarfRegNum<[!add(I, 32)]>; // Hi/Lo registers def HI0 : Register<"ac0">, DwarfRegNum<[64]>; -- cgit v1.1 From d954716e7567282ff6f3d25b4f404bae006eed04 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 21 Aug 2013 08:58:08 +0000 Subject: [SystemZ] Add FI[EDX]BRA These are extensions of the existing FI[EDX]BR instructions, but use a spare bit to suppress inexact conditions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188894 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZInstrFP.td | 15 ++++++++++----- lib/Target/SystemZ/SystemZInstrFormats.td | 17 +++++++++++++++-- lib/Target/SystemZ/SystemZProcessors.td | 11 +++++++++-- lib/Target/SystemZ/SystemZSubtarget.cpp | 3 ++- lib/Target/SystemZ/SystemZSubtarget.h | 4 ++++ 5 files changed, 40 insertions(+), 10 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td index 9f5279e..b407b86 100644 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -212,15 +212,20 @@ def SQEB : UnaryRXE<"sqeb", 0xED14, loadu, FP32, 4>; def SQDB : UnaryRXE<"sqdb", 0xED15, loadu, FP64, 8>; // Round to an integer, with the second operand (modifier M3) specifying -// the rounding mode. -// -// These forms always check for inexact conditions. z196 added versions -// that allow this to suppressed (as for fnearbyint), but we don't yet -// support -march=z196. +// the rounding mode. These forms always check for inexact conditions. def FIEBR : UnaryRRF<"fieb", 0xB357, FP32, FP32>; def FIDBR : UnaryRRF<"fidb", 0xB35F, FP64, FP64>; def FIXBR : UnaryRRF<"fixb", 0xB347, FP128, FP128>; +// Extended forms of the previous three instructions. M4 can be set to 4 +// to suppress detection of inexact conditions. +def FIEBRA : UnaryRRF4<"fiebra", 0xB357, FP32, FP32>, + Requires<[FeatureFPExtension]>; +def FIDBRA : UnaryRRF4<"fidbra", 0xB35F, FP64, FP64>, + Requires<[FeatureFPExtension]>; +def FIXBRA : UnaryRRF4<"fixbra", 0xB347, FP128, FP128>, + Requires<[FeatureFPExtension]>; + // frint rounds according to the current mode (modifier 0) and detects // inexact conditions. def : Pat<(frint FP32:$src), (FIEBR 0, FP32:$src)>; diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index 1f80c27..39b7639 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -308,10 +308,11 @@ class InstRRF op, dag outs, dag ins, string asmstr, list pattern> bits<4> R1; bits<4> R2; bits<4> R3; + bits<4> R4; let Inst{31-16} = op; let Inst{15-12} = R3; - let Inst{11-8} = 0; + let Inst{11-8} = R4; let Inst{7-4} = R1; let Inst{3-0} = R2; } @@ -719,8 +720,14 @@ class UnaryRRF opcode, RegisterOperand cls1, mnemonic#"r\t$R1, $R3, $R2", []> { let OpKey = mnemonic ## cls1; let OpType = "reg"; + let R4 = 0; } +class UnaryRRF4 opcode, RegisterOperand cls1, + RegisterOperand cls2> + : InstRRF; + // These instructions are generated by if conversion. The old value of R1 // is added as an implicit use. class CondUnaryRRF opcode, RegisterOperand cls1, @@ -729,6 +736,7 @@ class CondUnaryRRF opcode, RegisterOperand cls1, mnemonic#"r$R3\t$R1, $R2", []>, Requires<[FeatureLoadStoreOnCond]> { let CCMaskLast = 1; + let R4 = 0; } // Like CondUnaryRRF, but used for the raw assembly form. The condition-code @@ -740,6 +748,7 @@ class AsmCondUnaryRRF opcode, RegisterOperand cls1, Requires<[FeatureLoadStoreOnCond]> { let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; + let R4 = 0; } // Like CondUnaryRRF, but with a fixed CC mask. @@ -751,6 +760,7 @@ class FixedCondUnaryRRF opcode, RegisterOperand cls1, let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; let R3 = ccmask; + let R4 = 0; } class UnaryRI opcode, SDPatternOperator operator, @@ -898,13 +908,16 @@ class BinaryRRF opcode, SDPatternOperator operator, [(set cls1:$R1, (operator cls1:$R3, cls2:$R2))]> { let OpKey = mnemonic ## cls1; let OpType = "reg"; + let R4 = 0; } class BinaryRRFK opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> : InstRRF; + [(set cls1:$R1, (operator cls1:$R2, cls2:$R3))]> { + let R4 = 0; +} multiclass BinaryRRAndK opcode1, bits<16> opcode2, SDPatternOperator operator, RegisterOperand cls1, diff --git a/lib/Target/SystemZ/SystemZProcessors.td b/lib/Target/SystemZ/SystemZProcessors.td index 7e14aa7..00d4338 100644 --- a/lib/Target/SystemZ/SystemZProcessors.td +++ b/lib/Target/SystemZ/SystemZProcessors.td @@ -31,8 +31,15 @@ def FeatureHighWord : SystemZFeature< "Assume that the high-word facility is installed" >; +def FeatureFPExtension : SystemZFeature< + "fp-extension", "FPExtension", + "Assume that the floating-point extension facility is installed" +>; + def : Processor<"z10", NoItineraries, []>; def : Processor<"z196", NoItineraries, - [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord]>; + [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord, + FeatureFPExtension]>; def : Processor<"zEC12", NoItineraries, - [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord]>; + [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord, + FeatureFPExtension]>; diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp index 036ec05..b6a6392 100644 --- a/lib/Target/SystemZ/SystemZSubtarget.cpp +++ b/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -21,7 +21,8 @@ SystemZSubtarget::SystemZSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS) : SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false), - HasLoadStoreOnCond(false), HasHighWord(false), TargetTriple(TT) { + HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false), + TargetTriple(TT) { std::string CPUName = CPU; if (CPUName.empty()) CPUName = "z10"; diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h index 4efb58d..f321cb2 100644 --- a/lib/Target/SystemZ/SystemZSubtarget.h +++ b/lib/Target/SystemZ/SystemZSubtarget.h @@ -30,6 +30,7 @@ protected: bool HasDistinctOps; bool HasLoadStoreOnCond; bool HasHighWord; + bool HasFPExtension; private: Triple TargetTriple; @@ -50,6 +51,9 @@ public: // Return true if the target has the high-word facility. bool hasHighWord() const { return HasHighWord; } + // Return true if the target has the floating-point extension facility. + bool hasFPExtension() const { return HasFPExtension; } + // Return true if GV can be accessed using LARL for reloc model RM // and code model CM. bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM, -- cgit v1.1 From d95865a2a2daeb7dd8b80c18e7409b28e7e4738a Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 21 Aug 2013 09:04:20 +0000 Subject: [SystemZ] Use FI[EDX]BRA for codegen git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188895 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelLowering.cpp | 9 +++++++++ lib/Target/SystemZ/SystemZInstrFP.td | 30 ++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 6710f89..7772b9e 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -209,6 +209,15 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) // We can use FI for FRINT. setOperationAction(ISD::FRINT, VT, Legal); + // We can use the extended form of FI for other rounding operations. + if (Subtarget.hasFPExtension()) { + setOperationAction(ISD::FNEARBYINT, VT, Legal); + setOperationAction(ISD::FFLOOR, VT, Legal); + setOperationAction(ISD::FCEIL, VT, Legal); + setOperationAction(ISD::FTRUNC, VT, Legal); + setOperationAction(ISD::FROUND, VT, Legal); + } + // No special instructions for these. setOperationAction(ISD::FSIN, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td index b407b86..dbe0fb5 100644 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -232,6 +232,36 @@ def : Pat<(frint FP32:$src), (FIEBR 0, FP32:$src)>; def : Pat<(frint FP64:$src), (FIDBR 0, FP64:$src)>; def : Pat<(frint FP128:$src), (FIXBR 0, FP128:$src)>; +let Predicates = [FeatureFPExtension] in { + // fnearbyint is like frint but does not detect inexact conditions. + def : Pat<(fnearbyint FP32:$src), (FIEBRA 0, FP32:$src, 4)>; + def : Pat<(fnearbyint FP64:$src), (FIDBRA 0, FP64:$src, 4)>; + def : Pat<(fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>; + + // floor is no longer allowed to raise an inexact condition, + // so restrict it to the cases where the condition can be suppressed. + // Mode 7 is round towards -inf. + def : Pat<(ffloor FP32:$src), (FIEBRA 7, FP32:$src, 4)>; + def : Pat<(ffloor FP64:$src), (FIDBRA 7, FP64:$src, 4)>; + def : Pat<(ffloor FP128:$src), (FIXBRA 7, FP128:$src, 4)>; + + // Same idea for ceil, where mode 6 is round towards +inf. + def : Pat<(fceil FP32:$src), (FIEBRA 6, FP32:$src, 4)>; + def : Pat<(fceil FP64:$src), (FIDBRA 6, FP64:$src, 4)>; + def : Pat<(fceil FP128:$src), (FIXBRA 6, FP128:$src, 4)>; + + // Same idea for trunc, where mode 5 is round towards zero. + def : Pat<(ftrunc FP32:$src), (FIEBRA 5, FP32:$src, 4)>; + def : Pat<(ftrunc FP64:$src), (FIDBRA 5, FP64:$src, 4)>; + def : Pat<(ftrunc FP128:$src), (FIXBRA 5, FP128:$src, 4)>; + + // Same idea for round, where mode 1 is round towards nearest with + // ties away from zero. + def : Pat<(frnd FP32:$src), (FIEBRA 1, FP32:$src, 4)>; + def : Pat<(frnd FP64:$src), (FIDBRA 1, FP64:$src, 4)>; + def : Pat<(frnd FP128:$src), (FIXBRA 1, FP128:$src, 4)>; +} + //===----------------------------------------------------------------------===// // Binary arithmetic //===----------------------------------------------------------------------===// -- cgit v1.1 From 187dedf21d268aecac6d95211de0c496299fdd13 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Wed, 21 Aug 2013 09:09:52 +0000 Subject: [mips][msa] Matheus Almeida pointed out a silly mistake in r188893. Fixed it. I accidentally changed the encoding of the MSA registers to zero instead of 0 to 31. This change restores the encoding the registers had prior to r188893. This didn't show up in the existing tests because direct-object emission isn't implemented yet for MSA. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188896 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsRegisterInfo.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index f2e97b6..0e762f2 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -174,7 +174,7 @@ let Namespace = "Mips" in { /// Mips MSA registers /// MSA and FPU cannot both be present unless the FPU has 64-bit registers foreach I = 0-31 in - def W#I : AFPR128<0, "w"#I, [!cast("D"#I#"_64")]>, + def W#I : AFPR128("D"#I#"_64")]>, DwarfRegNum<[!add(I, 32)]>; // Hi/Lo registers -- cgit v1.1 From df40f8e8ad0aa93defa44b8a136e8d871cfd44ea Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 21 Aug 2013 09:34:56 +0000 Subject: [SystemZ] Define remainig *MUL_LOHI patterns The initial port used MLG(R) for i64 UMUL_LOHI but left the other three combinations as not-legal-or-custom. Although 32x32->{32,32} multiplications exist, they're not as quick as doing a normal 64-bit multiplication, so it didn't seem like i32 SMUL_LOHI and UMUL_LOHI would be useful. There's also no direct instruction for i64 SMUL_LOHI, so it needs to be implemented in terms of UMUL_LOHI. However, not defining these patterns means that we don't convert division by a constant into multiplication, so this patch fills in the other cases. The new i64 SMUL_LOHI sequence is simpler than the one that we used previously for 64x64->128 multiplication, so int-mul-08.ll now tests the full sequence. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188898 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelLowering.cpp | 88 ++++++++++++++++++++++++------ lib/Target/SystemZ/SystemZISelLowering.h | 1 + 2 files changed, 73 insertions(+), 16 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 7772b9e..a1eecd7 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -128,9 +128,11 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::ROTR, VT, Expand); - // Use *MUL_LOHI where possible and a wider multiplication otherwise. + // Use *MUL_LOHI where possible instead of MULH*. setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::MULHU, VT, Expand); + setOperationAction(ISD::SMUL_LOHI, VT, Custom); + setOperationAction(ISD::UMUL_LOHI, VT, Custom); // We have instructions for signed but not unsigned FP conversion. setOperationAction(ISD::FP_TO_UINT, VT, Expand); @@ -165,14 +167,6 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) // Give LowerOperation the chance to replace 64-bit ORs with subregs. setOperationAction(ISD::OR, MVT::i64, Custom); - // The architecture has 32-bit SMUL_LOHI and UMUL_LOHI (MR and MLR), - // but they aren't really worth using. There is no 64-bit SMUL_LOHI, - // but there is a 64-bit UMUL_LOHI: MLGR. - setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); - setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom); - // FIXME: Can we support these natively? setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); @@ -1142,6 +1136,20 @@ static SDValue emitCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, DL, MVT::Glue, CmpOp0, CmpOp1); } +// Implement a 32-bit *MUL_LOHI operation by extending both operands to +// 64 bits. Extend is the extension type to use. Store the high part +// in Hi and the low part in Lo. +static void lowerMUL_LOHI32(SelectionDAG &DAG, SDLoc DL, + unsigned Extend, SDValue Op0, SDValue Op1, + SDValue &Hi, SDValue &Lo) { + Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0); + Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1); + SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1); + Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, DAG.getConstant(32, MVT::i64)); + Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi); + Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul); +} + // Lower a binary operation that produces two VT results, one in each // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation, // Extend extends Op0 to a GR128, and Opcode performs the GR128 operation @@ -1427,18 +1435,64 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { return DAG.getMergeValues(Ops, 2, DL); } -SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op, +SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc DL(Op); - assert(!is32Bit(VT) && "Only support 64-bit UMUL_LOHI"); + SDValue Ops[2]; + if (is32Bit(VT)) + // Just do a normal 64-bit multiplication and extract the results. + // We define this so that it can be used for constant division. + lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0), + Op.getOperand(1), Ops[1], Ops[0]); + else { + // Do a full 128-bit multiplication based on UMUL_LOHI64: + // + // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64) + // + // but using the fact that the upper halves are either all zeros + // or all ones: + // + // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64) + // + // and grouping the right terms together since they are quicker than the + // multiplication: + // + // (ll * rl) - (((lh & rl) + (ll & rh)) << 64) + SDValue C63 = DAG.getConstant(63, MVT::i64); + SDValue LL = Op.getOperand(0); + SDValue RL = Op.getOperand(1); + SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63); + SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63); + // UMUL_LOHI64 returns the low result in the odd register and the high + // result in the even register. SMUL_LOHI is defined to return the + // low half first, so the results are in reverse order. + lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64, + LL, RL, Ops[1], Ops[0]); + SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH); + SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL); + SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL); + Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum); + } + return DAG.getMergeValues(Ops, 2, DL); +} - // UMUL_LOHI64 returns the low result in the odd register and the high - // result in the even register. UMUL_LOHI is defined to return the - // low half first, so the results are in reverse order. +SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + SDLoc DL(Op); SDValue Ops[2]; - lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64, - Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); + if (is32Bit(VT)) + // Just do a normal 64-bit multiplication and extract the results. + // We define this so that it can be used for constant division. + lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0), + Op.getOperand(1), Ops[1], Ops[0]); + else + // UMUL_LOHI64 returns the low result in the odd register and the high + // result in the even register. UMUL_LOHI is defined to return the + // low half first, so the results are in reverse order. + lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64, + Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); return DAG.getMergeValues(Ops, 2, DL); } @@ -1706,6 +1760,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerVACOPY(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return lowerDYNAMIC_STACKALLOC(Op, DAG); + case ISD::SMUL_LOHI: + return lowerSMUL_LOHI(Op, DAG); case ISD::UMUL_LOHI: return lowerUMUL_LOHI(Op, DAG); case ISD::SDIVREM: diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 3692e1e..604453d 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -214,6 +214,7 @@ private: SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVACOPY(SDValue Op, SelectionDAG &DAG) const; SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; SDValue lowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSDIVREM(SDValue Op, SelectionDAG &DAG) const; SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; -- cgit v1.1 From 8ba76daba09e79b10c4aad8f4298433c6dafa6d5 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Wed, 21 Aug 2013 09:36:02 +0000 Subject: AVX-512: Added SHIFT instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188899 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 31 ++++++- lib/Target/X86/X86ISelLowering.h | 9 +- lib/Target/X86/X86InstrAVX512.td | 141 ++++++++++++++++++++++++++++++++ lib/Target/X86/X86InstrFragmentsSIMD.td | 3 + 4 files changed, 178 insertions(+), 6 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 3c3f09f..6e9ecef 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -11269,6 +11269,16 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } + case Intrinsic::x86_avx512_kortestz: + case Intrinsic::x86_avx512_kortestc: { + unsigned X86CC = (IntNo == Intrinsic::x86_avx512_kortestz)? X86::COND_E: X86::COND_B; + SDValue LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(1)); + SDValue RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(2)); + SDValue CC = DAG.getConstant(X86CC, MVT::i8); + SDValue Test = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS); + SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test); + return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); + } // SSE/AVX shift intrinsics case Intrinsic::x86_sse2_psll_w: @@ -12135,7 +12145,9 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, if (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || (Subtarget->hasInt256() && - (VT == MVT::v4i64 || VT == MVT::v8i32 || VT == MVT::v16i16))) { + (VT == MVT::v4i64 || VT == MVT::v8i32 || VT == MVT::v16i16)) || + (Subtarget->hasAVX512() && + (VT == MVT::v8i64 || VT == MVT::v16i32))) { if (Op.getOpcode() == ISD::SHL) return DAG.getNode(X86ISD::VSHLI, dl, VT, R, DAG.getConstant(ShiftAmt, MVT::i32)); @@ -12297,7 +12309,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, VT == MVT::v4i32 || VT == MVT::v8i16 || (Subtarget->hasInt256() && ((VT == MVT::v4i64 && Op.getOpcode() != ISD::SRA) || - VT == MVT::v8i32 || VT == MVT::v16i16))) { + VT == MVT::v8i32 || VT == MVT::v16i16)) || + (Subtarget->hasAVX512() && (VT == MVT::v8i64 || VT == MVT::v16i32))) { SDValue BaseShAmt; EVT EltVT = VT.getVectorElementType(); @@ -12365,6 +12378,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, case MVT::v4i64: case MVT::v8i32: case MVT::v16i16: + case MVT::v16i32: + case MVT::v8i64: return getTargetVShiftNode(X86ISD::VSHLI, dl, VT, R, BaseShAmt, DAG); } case ISD::SRA: @@ -12374,6 +12389,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, case MVT::v8i16: case MVT::v8i32: case MVT::v16i16: + case MVT::v16i32: + case MVT::v8i64: return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, R, BaseShAmt, DAG); } case ISD::SRL: @@ -12385,6 +12402,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, case MVT::v4i64: case MVT::v8i32: case MVT::v16i16: + case MVT::v16i32: + case MVT::v8i64: return getTargetVShiftNode(X86ISD::VSRLI, dl, VT, R, BaseShAmt, DAG); } } @@ -12393,7 +12412,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, // Special case in 32-bit mode, where i64 is expanded into high and low parts. if (!Subtarget->is64Bit() && - (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) && + (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64) || + (Subtarget->hasAVX512() && VT == MVT::v8i64)) && Amt.getOpcode() == ISD::BITCAST && Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { Amt = Amt.getOperand(0); @@ -12442,6 +12462,8 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, if (V.getNode()) return V; + if (Subtarget->hasAVX512() && (VT == MVT::v16i32 || VT == MVT::v8i64)) + return Op; // AVX2 has VPSLLV/VPSRAV/VPSRLV. if (Subtarget->hasInt256()) { if (Op.getOpcode() == ISD::SRL && @@ -13350,6 +13372,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM"; case X86ISD::PTEST: return "X86ISD::PTEST"; case X86ISD::TESTP: return "X86ISD::TESTP"; + case X86ISD::TESTM: return "X86ISD::TESTM"; + case X86ISD::KORTEST: return "X86ISD::KORTEST"; + case X86ISD::KTEST: return "X86ISD::KTEST"; case X86ISD::PALIGNR: return "X86ISD::PALIGNR"; case X86ISD::PSHUFD: return "X86ISD::PSHUFD"; case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW"; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index eafe027..40b2a9c 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -274,7 +274,7 @@ namespace llvm { // PCMP* - Vector integer comparisons. PCMPEQ, PCMPGT, - // PCMP*M - Vector integer comparisons, the result is in a mask vector + // PCMP*M - Vector integer comparisons, the result is in a mask vector. PCMPEQM, PCMPGTM, /// CMPM, CMPMU - Vector comparison generating mask bits for fp and @@ -295,12 +295,15 @@ namespace llvm { // MUL_IMM - X86 specific multiply by immediate. MUL_IMM, - // PTEST - Vector bitwise comparisons + // PTEST - Vector bitwise comparisons. PTEST, - // TESTP - Vector packed fp sign bitwise comparisons + // TESTP - Vector packed fp sign bitwise comparisons. TESTP, + // TESTM - Vector "test" in AVX-512, the result is in a mask vector. + TESTM, + // OR/AND test for masks KORTEST, KTEST, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index ccbd18e..c3fb801 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1691,3 +1691,144 @@ defm VDIVPDZ : avx512_fp_packed<0x5E, "divpd", fdiv, VR512, v8f64, f512mem, SSE_ALU_ITINS_P.d, 0>, EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; +//===----------------------------------------------------------------------===// +// AVX-512 VPTESTM instructions +//===----------------------------------------------------------------------===// + +multiclass avx512_vptest opc, string OpcodeStr, RegisterClass KRC, + RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag, + SDNode OpNode, ValueType vt> { + def rr : AVX5128I, EVEX_4V; + def rm : AVX5128I, EVEX_4V; +} + +defm VPTESTMDZ : avx512_vptest<0x27, "vptestmd", VK16, VR512, f512mem, + memopv16i32, X86testm, v16i32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem, memopv8i64, + X86testm, v8i64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + +//===----------------------------------------------------------------------===// +// AVX-512 Shift instructions +//===----------------------------------------------------------------------===// +multiclass avx512_shift_rmi opc, Format ImmFormR, Format ImmFormM, + string OpcodeStr, + SDNode OpNode, RegisterClass RC, ValueType vt, + X86MemOperand x86memop, PatFrag mem_frag> { + def ri : AVX512BIi8, EVEX_4V; + def mi: AVX512BIi8, EVEX_4V; +} + +multiclass avx512_shift_rrm opc, string OpcodeStr, SDNode OpNode, + RegisterClass RC, ValueType vt, ValueType SrcVT, + PatFrag bc_frag> { + // src2 is always 128-bit + def rr : AVX512BI, EVEX_4V; + def rm : AVX512BI, EVEX_4V; +} + +defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli, + VR512, v16i32, i512mem, memopv16i32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl, + VR512, v16i32, v4i32, bc_v4i32>, EVEX_V512, + EVEX_CD8<32, CD8VQ>; + +defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli, + VR512, v8i64, i512mem, memopv8i64>, EVEX_V512, + EVEX_CD8<64, CD8VF>, VEX_W; +defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl, + VR512, v8i64, v2i64, bc_v2i64>, EVEX_V512, + EVEX_CD8<64, CD8VQ>, VEX_W; + +defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli, + VR512, v16i32, i512mem, memopv16i32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl, + VR512, v16i32, v4i32, bc_v4i32>, EVEX_V512, + EVEX_CD8<32, CD8VQ>; + +defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli, + VR512, v8i64, i512mem, memopv8i64>, EVEX_V512, + EVEX_CD8<64, CD8VF>, VEX_W; +defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl, + VR512, v8i64, v2i64, bc_v2i64>, EVEX_V512, + EVEX_CD8<64, CD8VQ>, VEX_W; + +defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai, + VR512, v16i32, i512mem, memopv16i32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra, + VR512, v16i32, v4i32, bc_v4i32>, EVEX_V512, + EVEX_CD8<32, CD8VQ>; + +defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai, + VR512, v8i64, i512mem, memopv8i64>, EVEX_V512, + EVEX_CD8<64, CD8VF>, VEX_W; +defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra, + VR512, v8i64, v2i64, bc_v2i64>, EVEX_V512, + EVEX_CD8<64, CD8VQ>, VEX_W; + +//===-------------------------------------------------------------------===// +// Variable Bit Shifts +//===-------------------------------------------------------------------===// +multiclass avx512_var_shift opc, string OpcodeStr, SDNode OpNode, + RegisterClass RC, ValueType vt, + X86MemOperand x86memop, PatFrag mem_frag> { + def rr : AVX5128I, + EVEX_4V; + def rm : AVX5128I, + EVEX_4V; +} + +defm VPSLLVDZ : avx512_var_shift<0x47, "vpsllvd", shl, VR512, v16i32, + i512mem, memopv16i32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPSLLVQZ : avx512_var_shift<0x47, "vpsllvq", shl, VR512, v8i64, + i512mem, memopv8i64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; +defm VPSRLVDZ : avx512_var_shift<0x45, "vpsrlvd", srl, VR512, v16i32, + i512mem, memopv16i32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPSRLVQZ : avx512_var_shift<0x45, "vpsrlvq", srl, VR512, v8i64, + i512mem, memopv8i64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; +defm VPSRAVDZ : avx512_var_shift<0x46, "vpsravd", sra, VR512, v16i32, + i512mem, memopv16i32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64, + i512mem, memopv8i64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 9f1c999..b23da04 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -149,6 +149,9 @@ def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>; def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>; def X86ktest : SDNode<"X86ISD::KTEST", SDTX86CmpPTest>; +def X86testm : SDNode<"X86ISD::TESTM", SDTypeProfile<1, 2, [SDTCisVec<0>, + SDTCisVec<1>, + SDTCisSameAs<2, 1>]>>; def X86pmuludq : SDNode<"X86ISD::PMULUDQ", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, -- cgit v1.1 From 1a9f21abac47dcea0c62341b0ee4fd35481350b8 Mon Sep 17 00:00:00 2001 From: Mihai Popa Date: Wed, 21 Aug 2013 13:14:58 +0000 Subject: Make "mov" work for all Thumb2 MOV encodings According to the ARM specification, "mov" is a valid mnemonic for all Thumb2 MOV encodings. To achieve this, the patch adds one instruction alias with a special range condition to avoid collision with the Thumb1 MOV. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188901 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrInfo.td | 5 +++++ lib/Target/ARM/ARMInstrThumb2.td | 3 +++ lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 9 +++++++++ 3 files changed, 17 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 280757f..3b83667 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -710,6 +710,11 @@ def imm0_65535_expr : Operand { let ParserMatchClass = Imm0_65535ExprAsmOperand; } +def Imm256_65535ExprAsmOperand: ImmAsmOperand { let Name = "Imm256_65535Expr"; } +def imm256_65535_expr : Operand { + let ParserMatchClass = Imm256_65535ExprAsmOperand; +} + /// imm24b - True if the 32-bit immediate is encodable in 24 bits. def Imm24bitAsmOperand: ImmAsmOperand { let Name = "Imm24bit"; } def imm24b : Operand, ImmLeaf; + def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd), (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index df9306a..7467071 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -867,6 +867,15 @@ public: int64_t Value = CE->getValue(); return Value >= 0 && Value < 65536; } + bool isImm256_65535Expr() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast(getImm()); + // If it's not a constant expression, it'll generate a fixup and be + // handled later. + if (!CE) return true; + int64_t Value = CE->getValue(); + return Value >= 256 && Value < 65536; + } bool isImm0_65535Expr() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast(getImm()); -- cgit v1.1 From 52d35c24600186790d83a0830ebdc79fb71f878d Mon Sep 17 00:00:00 2001 From: Hao Liu Date: Wed, 21 Aug 2013 17:47:53 +0000 Subject: A minor change for an obvous problem caused by r188451: def imm0_63 : Operand, ImmLeaf= 0 && Imm < 63;}]>{ As it seems Imm <63 should be Imm <= 63. ImmLeaf is used in pattern match, but there is already a function check the shift amount range, so just remove ImmLeaf. Also add a test to check 63. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188911 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64InstrNEON.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 175c3aa..fb6d654 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -1414,7 +1414,7 @@ def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>; // Vector Shift (Immediate) -def imm0_63 : Operand, ImmLeaf= 0 && Imm < 63; }]> { +def imm0_63 : Operand { let ParserMatchClass = uimm6_asmoperand; } -- cgit v1.1 From 4a4844a022bee4ff904d4f32bd45f7fdaabb54b7 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 21 Aug 2013 20:36:42 +0000 Subject: Remove use of forbidden 'iostream' header. Also obsessively reorder the headers to be in something closer to alphabetical order. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188928 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Hexagon/HexagonSplitConst32AndConst64.cpp | 24 ++++++++++------------ 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp index 3bf2f20..5166f8e 100644 --- a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp +++ b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp @@ -16,33 +16,31 @@ // scheduled after register allocation. // //===----------------------------------------------------------------------===// + #define DEBUG_TYPE "xfer" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/ScheduleDAGInstrs.h" + +#include "HexagonTargetMachine.h" +#include "HexagonSubtarget.h" +#include "HexagonMachineFunctionInfo.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" -#include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/MathExtras.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "HexagonTargetMachine.h" -#include "HexagonSubtarget.h" -#include "HexagonMachineFunctionInfo.h" #include -#include - -#include "llvm/Support/CommandLine.h" -#define DEBUG_TYPE "xfer" - using namespace llvm; -- cgit v1.1 From 5464a92861c76f1e091cd219dee71ce9858eb195 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 21 Aug 2013 22:14:17 +0000 Subject: R600: Remove unnecessary casts Spotted by Bill Wendling. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188942 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index dd8b73f..692b3b7 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -115,14 +115,13 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::VSELECT, MVT::v2f32, Expand); setOperationAction(ISD::VSELECT, MVT::v4f32, Expand); - static const int IntTypes[] = { - (int)MVT::v2i32, - (int)MVT::v4i32 + static const MVT::SimpleValueType IntTypes[] = { + MVT::v2i32, MVT::v4i32 }; const size_t NumIntTypes = array_lengthof(IntTypes); for (unsigned int x = 0; x < NumIntTypes; ++x) { - MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x]; + MVT::SimpleValueType VT = IntTypes[x]; //Expand the following operations for the current type by default setOperationAction(ISD::ADD, VT, Expand); setOperationAction(ISD::AND, VT, Expand); @@ -142,14 +141,13 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::XOR, VT, Expand); } - static const int FloatTypes[] = { - (int)MVT::v2f32, - (int)MVT::v4f32 + static const MVT::SimpleValueType FloatTypes[] = { + MVT::v2f32, MVT::v4f32 }; const size_t NumFloatTypes = array_lengthof(FloatTypes); for (unsigned int x = 0; x < NumFloatTypes; ++x) { - MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x]; + MVT::SimpleValueType VT = FloatTypes[x]; setOperationAction(ISD::FADD, VT, Expand); setOperationAction(ISD::FDIV, VT, Expand); setOperationAction(ISD::FFLOOR, VT, Expand); -- cgit v1.1 From e2a48fbd9d1ec8bebb5b10cf8770d716f8bdda28 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Thu, 22 Aug 2013 00:14:24 +0000 Subject: ARM: R9 is not safe to use for tcGPR. Indirect tail-calls shouldn't use R9 for the branch destination, as it's not reliably a call-clobbered register. rdar://14793425 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188967 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMRegisterInfo.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index bb7d358..90c6a96 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -251,7 +251,7 @@ def hGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, tGPR)>; // to the saved value before the tail call, which would clobber a call address. // Note, getMinimalPhysRegClass(R0) returns tGPR because of the names of // this class and the preceding one(!) This is what we want. -def tcGPR : RegisterClass<"ARM", [i32], 32, (add R0, R1, R2, R3, R9, R12)> { +def tcGPR : RegisterClass<"ARM", [i32], 32, (add R0, R1, R2, R3, R12)> { let AltOrders = [(and tcGPR, tGPR)]; let AltOrderSelect = [{ return MF.getTarget().getSubtarget().isThumb1Only(); -- cgit v1.1 From bccc6f89b7a15abda5593a30c101ae85d1dc3b77 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Thu, 22 Aug 2013 06:51:04 +0000 Subject: ARM: respect tied 64-bit inlineasm operands when printing The code for 'Q' and 'R' operand modifiers needs to look through tied operands to discover the register class. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188990 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMAsmPrinter.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 13a22b1..485b1c5 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -496,6 +496,23 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, if (!FlagsOP.isImm()) return true; unsigned Flags = FlagsOP.getImm(); + + // This operand may not be the one that actually provides the register. If + // it's tied to a previous one then we should refer instead to that one + // for registers and their classes. + unsigned TiedIdx; + if (InlineAsm::isUseOperandTiedToDef(Flags, TiedIdx)) { + for (OpNum = InlineAsm::MIOp_FirstOperand; TiedIdx; --TiedIdx) { + unsigned OpFlags = MI->getOperand(OpNum).getImm(); + OpNum += InlineAsm::getNumOperandRegisters(OpFlags) + 1; + } + Flags = MI->getOperand(OpNum).getImm(); + + // Later code expects OpNum to be pointing at the register rather than + // the flags. + OpNum += 1; + } + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); unsigned RC; InlineAsm::hasRegClassConstraint(Flags, RC); -- cgit v1.1 From f7ab3a84b3e1b5a647ae9456a5edb99d86b35329 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Thu, 22 Aug 2013 09:57:11 +0000 Subject: ARM: use TableGen patterns to select CMOV operations. Back in the mists of time (2008), it seems TableGen couldn't handle the patterns necessary to match ARM's CMOV node that we convert select operations to, so we wrote a lot of fairly hairy C++ to do it for us. TableGen can deal with it now: there were a few minor differences to CodeGen (see tests), but nothing obviously worse that I could see, so we should probably address anything that *does* come up in a localised manner. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188995 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMExpandPseudoInsts.cpp | 32 ++++- lib/Target/ARM/ARMISelDAGToDAG.cpp | 222 +------------------------------- lib/Target/ARM/ARMInstrFormats.td | 10 ++ lib/Target/ARM/ARMInstrInfo.td | 55 ++++---- lib/Target/ARM/ARMInstrThumb.td | 6 +- lib/Target/ARM/ARMInstrThumb2.td | 118 +++++++---------- lib/Target/ARM/ARMInstrVFP.td | 20 +-- 7 files changed, 137 insertions(+), 326 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index beb843c..cedfc6d 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -727,7 +727,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MI.eraseFromParent(); return true; } - case ARM::MOVCCsr: { BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsr), (MI.getOperand(1).getReg())) @@ -743,13 +742,14 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MI.eraseFromParent(); return true; } + case ARM::t2MOVCCi16: case ARM::MOVCCi16: { - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi16), + unsigned NewOpc = AFI->isThumbFunction() ? ARM::t2MOVi16 : ARM::MOVi16; + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc), MI.getOperand(1).getReg()) .addImm(MI.getOperand(2).getImm()) .addImm(MI.getOperand(3).getImm()) // 'pred' .addReg(MI.getOperand(4).getReg()); - MI.eraseFromParent(); return true; } @@ -766,8 +766,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MI.eraseFromParent(); return true; } + case ARM::t2MVNCCi: case ARM::MVNCCi: { - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MVNi), + unsigned Opc = AFI->isThumbFunction() ? ARM::t2MVNi : ARM::MVNi; + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc), MI.getOperand(1).getReg()) .addImm(MI.getOperand(2).getImm()) .addImm(MI.getOperand(3).getImm()) // 'pred' @@ -777,6 +779,28 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MI.eraseFromParent(); return true; } + case ARM::t2MOVCClsl: + case ARM::t2MOVCClsr: + case ARM::t2MOVCCasr: + case ARM::t2MOVCCror: { + unsigned NewOpc; + switch (Opcode) { + case ARM::t2MOVCClsl: NewOpc = ARM::t2LSLri; break; + case ARM::t2MOVCClsr: NewOpc = ARM::t2LSRri; break; + case ARM::t2MOVCCasr: NewOpc = ARM::t2ASRri; break; + case ARM::t2MOVCCror: NewOpc = ARM::t2RORri; break; + default: llvm_unreachable("unexpeced conditional move"); + } + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc), + MI.getOperand(1).getReg()) + .addReg(MI.getOperand(2).getReg()) + .addImm(MI.getOperand(3).getImm()) + .addImm(MI.getOperand(4).getImm()) // 'pred' + .addReg(MI.getOperand(5).getReg()) + .addReg(0); // 's' bit + MI.eraseFromParent(); + return true; + } case ARM::Int_eh_sjlj_dispatchsetup: { MachineFunction &MF = *MI.getParent()->getParent(); const ARMBaseInstrInfo *AII = diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 3298b73..c2dbcb9 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -130,6 +130,13 @@ public: return true; } + bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) { + const ConstantSDNode *CN = cast(N); + Pred = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32); + Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32); + return true; + } + bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, SDValue &Offset, SDValue &Opc); bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, @@ -239,21 +246,6 @@ private: /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM. SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned); - /// SelectCMOVOp - Select CMOV instructions for ARM. - SDNode *SelectCMOVOp(SDNode *N); - SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, - ARMCC::CondCodes CCVal, SDValue CCR, - SDValue InFlag); - SDNode *SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, - ARMCC::CondCodes CCVal, SDValue CCR, - SDValue InFlag); - SDNode *SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, - ARMCC::CondCodes CCVal, SDValue CCR, - SDValue InFlag); - SDNode *SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, - ARMCC::CondCodes CCVal, SDValue CCR, - SDValue InFlag); - // Select special operations if node forms integer ABS pattern SDNode *SelectABSOp(SDNode *N); @@ -2321,204 +2313,6 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, return NULL; } -SDNode *ARMDAGToDAGISel:: -SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, - ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { - SDValue CPTmp0; - SDValue CPTmp1; - if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) { - unsigned SOVal = cast(CPTmp1)->getZExtValue(); - unsigned SOShOp = ARM_AM::getSORegShOp(SOVal); - unsigned Opc = 0; - switch (SOShOp) { - case ARM_AM::lsl: Opc = ARM::t2MOVCClsl; break; - case ARM_AM::lsr: Opc = ARM::t2MOVCClsr; break; - case ARM_AM::asr: Opc = ARM::t2MOVCCasr; break; - case ARM_AM::ror: Opc = ARM::t2MOVCCror; break; - default: - llvm_unreachable("Unknown so_reg opcode!"); - } - SDValue SOShImm = - CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32); - SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); - SDValue Ops[] = { FalseVal, CPTmp0, SOShImm, CC, CCR, InFlag }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32,Ops, 6); - } - return 0; -} - -SDNode *ARMDAGToDAGISel:: -SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, - ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { - SDValue CPTmp0; - SDValue CPTmp1; - SDValue CPTmp2; - if (SelectImmShifterOperand(TrueVal, CPTmp0, CPTmp2)) { - SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); - SDValue Ops[] = { FalseVal, CPTmp0, CPTmp2, CC, CCR, InFlag }; - return CurDAG->SelectNodeTo(N, ARM::MOVCCsi, MVT::i32, Ops, 6); - } - - if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) { - SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); - SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, InFlag }; - return CurDAG->SelectNodeTo(N, ARM::MOVCCsr, MVT::i32, Ops, 7); - } - return 0; -} - -SDNode *ARMDAGToDAGISel:: -SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, - ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { - ConstantSDNode *T = dyn_cast(TrueVal); - if (!T) - return 0; - - unsigned Opc = 0; - unsigned TrueImm = T->getZExtValue(); - if (is_t2_so_imm(TrueImm)) { - Opc = ARM::t2MOVCCi; - } else if (TrueImm <= 0xffff) { - Opc = ARM::t2MOVCCi16; - } else if (is_t2_so_imm_not(TrueImm)) { - TrueImm = ~TrueImm; - Opc = ARM::t2MVNCCi; - } else if (TrueVal.getNode()->hasOneUse() && Subtarget->hasV6T2Ops()) { - // Large immediate. - Opc = ARM::t2MOVCCi32imm; - } - - if (Opc) { - SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32); - SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); - SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); - } - - return 0; -} - -SDNode *ARMDAGToDAGISel:: -SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, - ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { - ConstantSDNode *T = dyn_cast(TrueVal); - if (!T) - return 0; - - unsigned Opc = 0; - unsigned TrueImm = T->getZExtValue(); - bool isSoImm = is_so_imm(TrueImm); - if (isSoImm) { - Opc = ARM::MOVCCi; - } else if (Subtarget->hasV6T2Ops() && TrueImm <= 0xffff) { - Opc = ARM::MOVCCi16; - } else if (is_so_imm_not(TrueImm)) { - TrueImm = ~TrueImm; - Opc = ARM::MVNCCi; - } else if (TrueVal.getNode()->hasOneUse() && - (Subtarget->hasV6T2Ops() || ARM_AM::isSOImmTwoPartVal(TrueImm))) { - // Large immediate. - Opc = ARM::MOVCCi32imm; - } - - if (Opc) { - SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32); - SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); - SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); - } - - return 0; -} - -SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { - EVT VT = N->getValueType(0); - SDValue FalseVal = N->getOperand(0); - SDValue TrueVal = N->getOperand(1); - SDValue CC = N->getOperand(2); - SDValue CCR = N->getOperand(3); - SDValue InFlag = N->getOperand(4); - assert(CC.getOpcode() == ISD::Constant); - assert(CCR.getOpcode() == ISD::Register); - ARMCC::CondCodes CCVal = - (ARMCC::CondCodes)cast(CC)->getZExtValue(); - - if (!Subtarget->isThumb1Only() && VT == MVT::i32) { - // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) - // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) - // Pattern complexity = 18 cost = 1 size = 0 - if (Subtarget->isThumb()) { - SDNode *Res = SelectT2CMOVShiftOp(N, FalseVal, TrueVal, - CCVal, CCR, InFlag); - if (!Res) - Res = SelectT2CMOVShiftOp(N, TrueVal, FalseVal, - ARMCC::getOppositeCondition(CCVal), CCR, InFlag); - if (Res) - return Res; - } else { - SDNode *Res = SelectARMCMOVShiftOp(N, FalseVal, TrueVal, - CCVal, CCR, InFlag); - if (!Res) - Res = SelectARMCMOVShiftOp(N, TrueVal, FalseVal, - ARMCC::getOppositeCondition(CCVal), CCR, InFlag); - if (Res) - return Res; - } - - // Pattern: (ARMcmov:i32 GPR:i32:$false, - // (imm:i32)<>:$true, - // (imm:i32):$cc) - // Emits: (MOVCCi:i32 GPR:i32:$false, - // (so_imm:i32 (imm:i32):$true), (imm:i32):$cc) - // Pattern complexity = 10 cost = 1 size = 0 - if (Subtarget->isThumb()) { - SDNode *Res = SelectT2CMOVImmOp(N, FalseVal, TrueVal, - CCVal, CCR, InFlag); - if (!Res) - Res = SelectT2CMOVImmOp(N, TrueVal, FalseVal, - ARMCC::getOppositeCondition(CCVal), CCR, InFlag); - if (Res) - return Res; - } else { - SDNode *Res = SelectARMCMOVImmOp(N, FalseVal, TrueVal, - CCVal, CCR, InFlag); - if (!Res) - Res = SelectARMCMOVImmOp(N, TrueVal, FalseVal, - ARMCC::getOppositeCondition(CCVal), CCR, InFlag); - if (Res) - return Res; - } - } - - // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) - // Emits: (MOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) - // Pattern complexity = 6 cost = 1 size = 0 - // - // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) - // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) - // Pattern complexity = 6 cost = 11 size = 0 - // - // Also VMOVScc and VMOVDcc. - SDValue Tmp2 = CurDAG->getTargetConstant(CCVal, MVT::i32); - SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag }; - unsigned Opc = 0; - switch (VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Illegal conditional move type!"); - case MVT::i32: - Opc = Subtarget->isThumb() - ? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo) - : ARM::MOVCCr; - break; - case MVT::f32: - Opc = ARM::VMOVScc; - break; - case MVT::f64: - Opc = ARM::VMOVDcc; - break; - } - return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5); -} - /// Target-specific DAG combining for ISD::XOR. /// Target-independent combining lowers SELECT_CC nodes of the form /// select_cc setg[ge] X, 0, X, -X @@ -2882,8 +2676,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue(Chain.getNode(), Chain.getResNo())); return NULL; } - case ARMISD::CMOV: - return SelectCMOVOp(N); case ARMISD::VZIP: { unsigned Opc = 0; EVT VT = N->getValueType(0); diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 1349476..6d4de3d 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -155,6 +155,16 @@ def pred : PredicateOperand, PredicateOp, + ComplexPattern { + let MIOperandInfo = (ops i32imm, i32imm); + let PrintMethod = "printPredicateOperand"; +} + // Conditional code result for instructions whose 's' bit is set, e.g. subs. def CCOutOperand : AsmOperandClass { let Name = "CCOut"; } def cc_out : OptionalDefOperand { diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 3b83667..a4ea69f 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -4146,56 +4146,65 @@ def BCCZi64 : PseudoInst<(outs), // Conditional moves -// FIXME: should be able to write a pattern for ARMcmov, but can't use -// a two-value operand where a dag node expects two operands. :( let neverHasSideEffects = 1 in { let isCommutable = 1, isSelect = 1 in -def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p), +def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, GPR:$Rm, cmovpred:$p), 4, IIC_iCMOVr, - [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; + [(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; def MOVCCsi : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, so_reg_imm:$shift, pred:$p), - 4, IIC_iCMOVsr, - [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_imm:$shift, - imm:$cc, CCR:$ccr))*/]>, + (ins GPR:$false, so_reg_imm:$shift, cmovpred:$p), + 4, IIC_iCMOVsr, + [(set GPR:$Rd, + (ARMcmov GPR:$false, so_reg_imm:$shift, + cmovpred:$p))]>, RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; def MOVCCsr : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, so_reg_reg:$shift, pred:$p), + (ins GPR:$false, so_reg_reg:$shift, cmovpred:$p), 4, IIC_iCMOVsr, - [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_reg:$shift, - imm:$cc, CCR:$ccr))*/]>, + [(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_reg:$shift, + cmovpred:$p))]>, RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; let isMoveImm = 1 in -def MOVCCi16 : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, imm0_65535_expr:$imm, pred:$p), - 4, IIC_iMOVi, - []>, +def MOVCCi16 + : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, imm0_65535_expr:$imm, cmovpred:$p), + 4, IIC_iMOVi, + [(set GPR:$Rd, (ARMcmov GPR:$false, imm0_65535:$imm, + cmovpred:$p))]>, RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>, Sched<[WriteALU]>; let isMoveImm = 1 in def MOVCCi : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, so_imm:$imm, pred:$p), + (ins GPR:$false, so_imm:$imm, cmovpred:$p), 4, IIC_iCMOVi, - [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm, imm:$cc, CCR:$ccr))*/]>, + [(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm, + cmovpred:$p))]>, RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; // Two instruction predicate mov immediate. let isMoveImm = 1 in -def MOVCCi32imm : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, i32imm:$src, pred:$p), - 8, IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">; +def MOVCCi32imm + : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, i32imm:$src, cmovpred:$p), + 8, IIC_iCMOVix2, + [(set GPR:$Rd, (ARMcmov GPR:$false, imm:$src, + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Requires<[HasV6T2]>; let isMoveImm = 1 in def MVNCCi : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, so_imm:$imm, pred:$p), + (ins GPR:$false, so_imm:$imm, cmovpred:$p), 4, IIC_iCMOVi, - [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>, + [(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, + cmovpred:$p))]>, RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; } // neverHasSideEffects diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 7e383d2..291b98a 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -1204,9 +1204,9 @@ def tUXTH : // A8.6.264 // Expanded after instruction selection into a branch sequence. let usesCustomInserter = 1 in // Expanded after instruction selection. def tMOVCCr_pseudo : - PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$cc), - NoItinerary, - [/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>; + PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, cmovpred:$p), + NoItinerary, + [(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, cmovpred:$p))]>; // tLEApcrel - Load a pc-relative address into a register without offending the // assembler. diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 3498e41..f3464cc 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3032,93 +3032,67 @@ defm t2TEQ : T2I_cmp_irs<0b0100, "teq", BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>; // Conditional moves -// FIXME: should be able to write a pattern for ARMcmov, but can't use -// a two-value operand where a dag node expects two operands. :( let neverHasSideEffects = 1 in { let isCommutable = 1, isSelect = 1 in def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd), - (ins rGPR:$false, rGPR:$Rm, pred:$p), + (ins rGPR:$false, rGPR:$Rm, cmovpred:$p), 4, IIC_iCMOVr, - [/*(set rGPR:$Rd, (ARMcmov rGPR:$false, rGPR:$Rm, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">, - Sched<[WriteALU]>; + [(set rGPR:$Rd, (ARMcmov rGPR:$false, rGPR:$Rm, + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; let isMoveImm = 1 in -def t2MOVCCi : t2PseudoInst<(outs rGPR:$Rd), - (ins rGPR:$false, t2_so_imm:$imm, pred:$p), +def t2MOVCCi + : t2PseudoInst<(outs rGPR:$Rd), + (ins rGPR:$false, t2_so_imm:$imm, cmovpred:$p), 4, IIC_iCMOVi, -[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm:$imm, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; + [(set rGPR:$Rd, (ARMcmov rGPR:$false,t2_so_imm:$imm, + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; -// FIXME: Pseudo-ize these. For now, just mark codegen only. let isCodeGenOnly = 1 in { let isMoveImm = 1 in -def t2MOVCCi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$false, imm0_65535_expr:$imm), - IIC_iCMOVi, - "movw", "\t$Rd, $imm", []>, - RegConstraint<"$false = $Rd">, Sched<[WriteALU]> { - let Inst{31-27} = 0b11110; - let Inst{25} = 1; - let Inst{24-21} = 0b0010; - let Inst{20} = 0; // The S bit. - let Inst{15} = 0; - - bits<4> Rd; - bits<16> imm; - - let Inst{11-8} = Rd; - let Inst{19-16} = imm{15-12}; - let Inst{26} = imm{11}; - let Inst{14-12} = imm{10-8}; - let Inst{7-0} = imm{7-0}; -} +def t2MOVCCi16 + : t2PseudoInst<(outs rGPR:$Rd), + (ins rGPR:$false, imm0_65535_expr:$imm, cmovpred:$p), + 4, IIC_iCMOVi, + [(set rGPR:$Rd, (ARMcmov rGPR:$false, imm0_65535:$imm, + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; let isMoveImm = 1 in -def t2MOVCCi32imm : PseudoInst<(outs rGPR:$dst), - (ins rGPR:$false, i32imm:$src, pred:$p), - IIC_iCMOVix2, []>, RegConstraint<"$false = $dst">; +def t2MVNCCi + : t2PseudoInst<(outs rGPR:$Rd), + (ins rGPR:$false, t2_so_imm:$imm, cmovpred:$p), + 4, IIC_iCMOVi, + [(set rGPR:$Rd, + (ARMcmov rGPR:$false, t2_so_imm_not:$imm, + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; + +class MOVCCShPseudo + : t2PseudoInst<(outs rGPR:$Rd), + (ins rGPR:$false, rGPR:$Rm, i32imm:$imm, cmovpred:$p), + 4, IIC_iCMOVsi, + [(set rGPR:$Rd, (ARMcmov rGPR:$false, + (opnode rGPR:$Rm, (i32 ty:$imm)), + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; + +def t2MOVCClsl : MOVCCShPseudo; +def t2MOVCClsr : MOVCCShPseudo; +def t2MOVCCasr : MOVCCShPseudo; +def t2MOVCCror : MOVCCShPseudo; let isMoveImm = 1 in -def t2MVNCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm), - IIC_iCMOVi, "mvn", "\t$Rd, $imm", -[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm_not:$imm, - imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">, Sched<[WriteALU]> { - let Inst{31-27} = 0b11110; - let Inst{25} = 0; - let Inst{24-21} = 0b0011; - let Inst{20} = 0; // The S bit. - let Inst{19-16} = 0b1111; // Rn - let Inst{15} = 0; -} - -class T2I_movcc_sh opcod, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list pattern> - : T2TwoRegShiftImm, Sched<[WriteALU]> { - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b01; - let Inst{24-21} = 0b0010; - let Inst{20} = 0; // The S bit. - let Inst{19-16} = 0b1111; // Rn - let Inst{5-4} = opcod; // Shift type. -} -def t2MOVCClsl : T2I_movcc_sh<0b00, (outs rGPR:$Rd), - (ins rGPR:$false, rGPR:$Rm, i32imm:$imm), - IIC_iCMOVsi, "lsl", ".w\t$Rd, $Rm, $imm", []>, - RegConstraint<"$false = $Rd">; -def t2MOVCClsr : T2I_movcc_sh<0b01, (outs rGPR:$Rd), - (ins rGPR:$false, rGPR:$Rm, i32imm:$imm), - IIC_iCMOVsi, "lsr", ".w\t$Rd, $Rm, $imm", []>, - RegConstraint<"$false = $Rd">; -def t2MOVCCasr : T2I_movcc_sh<0b10, (outs rGPR:$Rd), - (ins rGPR:$false, rGPR:$Rm, i32imm:$imm), - IIC_iCMOVsi, "asr", ".w\t$Rd, $Rm, $imm", []>, - RegConstraint<"$false = $Rd">; -def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd), - (ins rGPR:$false, rGPR:$Rm, i32imm:$imm), - IIC_iCMOVsi, "ror", ".w\t$Rd, $Rm, $imm", []>, - RegConstraint<"$false = $Rd">; +def t2MOVCCi32imm + : t2PseudoInst<(outs rGPR:$dst), + (ins rGPR:$false, i32imm:$src, cmovpred:$p), + 8, IIC_iCMOVix2, + [(set rGPR:$dst, (ARMcmov rGPR:$false, imm:$src, + cmovpred:$p))]>, + RegConstraint<"$false = $dst">; } // isCodeGenOnly = 1 } // neverHasSideEffects diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index f9cfa15..c6b8bc3 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -1466,15 +1466,17 @@ def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))), // let neverHasSideEffects = 1 in { -def VMOVDcc : ARMPseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, pred:$p), - 4, IIC_fpUNA64, - [/*(set DPR:$Dd, (ARMcmov DPR:$Dn, DPR:$Dm, imm:$cc))*/]>, - RegConstraint<"$Dn = $Dd">; - -def VMOVScc : ARMPseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, pred:$p), - 4, IIC_fpUNA32, - [/*(set SPR:$Sd, (ARMcmov SPR:$Sn, SPR:$Sm, imm:$cc))*/]>, - RegConstraint<"$Sn = $Sd">; +def VMOVDcc : PseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, cmovpred:$p), + IIC_fpUNA64, + [(set (f64 DPR:$Dd), + (ARMcmov DPR:$Dn, DPR:$Dm, cmovpred:$p))]>, + RegConstraint<"$Dn = $Dd">, Requires<[HasVFP2]>; + +def VMOVScc : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, cmovpred:$p), + IIC_fpUNA32, + [(set (f32 SPR:$Sd), + (ARMcmov SPR:$Sn, SPR:$Sm, cmovpred:$p))]>, + RegConstraint<"$Sn = $Sd">, Requires<[HasVFP2]>; } // neverHasSideEffects //===----------------------------------------------------------------------===// -- cgit v1.1 From 7ddda4704cdb24163591857e8d08614463cec335 Mon Sep 17 00:00:00 2001 From: Logan Chien Date: Thu, 22 Aug 2013 12:08:04 +0000 Subject: Fix ARM FastISel PIC function call. The function call to external function should come with PLT relocation type if the PIC relocation model is used. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189002 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index f4aaf09..628b514 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -2460,15 +2460,22 @@ bool ARMFastISel::SelectCall(const Instruction *I, MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)); + unsigned char OpFlags = 0; + + // Add MO_PLT for global address or external symbol in the PIC relocation + // model. + if (Subtarget->isTargetELF() && TM.getRelocationModel() == Reloc::PIC_) + OpFlags = ARMII::MO_PLT; + // ARM calls don't take a predicate, but tBL / tBLX do. if(isThumb2) AddDefaultPred(MIB); if (UseReg) MIB.addReg(CalleeReg); else if (!IntrMemName) - MIB.addGlobalAddress(GV, 0, 0); + MIB.addGlobalAddress(GV, 0, OpFlags); else - MIB.addExternalSymbol(IntrMemName, 0); + MIB.addExternalSymbol(IntrMemName, OpFlags); // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) -- cgit v1.1 From 1765e74c15c83db437018a3c9efabbeb4ce9cbde Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Thu, 22 Aug 2013 12:18:28 +0000 Subject: AVX-512: Added masked SHIFT commands, more encoding tests git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189005 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp | 11 ++--- lib/Target/X86/X86InstrAVX512.td | 56 ++++++++++++++++-------- 2 files changed, 44 insertions(+), 23 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 7173d51..0c9fd91 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -868,11 +868,12 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, case X86II::MRM6r: case X86II::MRM7r: // MRM0r-MRM7r instructions forms: // dst(VEX_4V), src(ModR/M), imm8 - VEX_4V = getVEXRegisterEncoding(MI, CurOp); - if (HasEVEX && X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg())) - EVEX_V2 = 0x0; - CurOp++; - + if (HasVEX_4V) { + VEX_4V = getVEXRegisterEncoding(MI, CurOp); + if (HasEVEX && X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg())) + EVEX_V2 = 0x0; + CurOp++; + } if (HasEVEX_K) EVEX_aaa = getWriteMaskRegisterEncoding(MI, CurOp++); diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index c3fb801..0cb946d 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1720,78 +1720,98 @@ defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem, memopv8i // AVX-512 Shift instructions //===----------------------------------------------------------------------===// multiclass avx512_shift_rmi opc, Format ImmFormR, Format ImmFormM, - string OpcodeStr, - SDNode OpNode, RegisterClass RC, ValueType vt, - X86MemOperand x86memop, PatFrag mem_frag> { + string OpcodeStr, SDNode OpNode, RegisterClass RC, + ValueType vt, X86MemOperand x86memop, PatFrag mem_frag, + RegisterClass KRC> { def ri : AVX512BIi8, EVEX_4V; + def rik : AVX512BIi8, EVEX_4V, EVEX_K; def mi: AVX512BIi8, EVEX_4V; + def mik: AVX512BIi8, EVEX_4V, EVEX_K; } multiclass avx512_shift_rrm opc, string OpcodeStr, SDNode OpNode, RegisterClass RC, ValueType vt, ValueType SrcVT, - PatFrag bc_frag> { + PatFrag bc_frag, RegisterClass KRC> { // src2 is always 128-bit def rr : AVX512BI, EVEX_4V; + def rrk : AVX512BI, EVEX_4V, EVEX_K; def rm : AVX512BI, EVEX_4V; + def rmk : AVX512BI, EVEX_4V, EVEX_K; } defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli, - VR512, v16i32, i512mem, memopv16i32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; + VR512, v16i32, i512mem, memopv16i32, VK16WM>, + EVEX_V512, EVEX_CD8<32, CD8VF>; defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl, - VR512, v16i32, v4i32, bc_v4i32>, EVEX_V512, + VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512, EVEX_CD8<32, CD8VQ>; defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli, - VR512, v8i64, i512mem, memopv8i64>, EVEX_V512, + VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl, - VR512, v8i64, v2i64, bc_v2i64>, EVEX_V512, + VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512, EVEX_CD8<64, CD8VQ>, VEX_W; defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli, - VR512, v16i32, i512mem, memopv16i32>, EVEX_V512, + VR512, v16i32, i512mem, memopv16i32, VK16WM>, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl, - VR512, v16i32, v4i32, bc_v4i32>, EVEX_V512, + VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512, EVEX_CD8<32, CD8VQ>; defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli, - VR512, v8i64, i512mem, memopv8i64>, EVEX_V512, + VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl, - VR512, v8i64, v2i64, bc_v2i64>, EVEX_V512, + VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512, EVEX_CD8<64, CD8VQ>, VEX_W; defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai, - VR512, v16i32, i512mem, memopv16i32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; + VR512, v16i32, i512mem, memopv16i32, VK16WM>, + EVEX_V512, EVEX_CD8<32, CD8VF>; defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra, - VR512, v16i32, v4i32, bc_v4i32>, EVEX_V512, + VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512, EVEX_CD8<32, CD8VQ>; defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai, - VR512, v8i64, i512mem, memopv8i64>, EVEX_V512, + VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra, - VR512, v8i64, v2i64, bc_v2i64>, EVEX_V512, + VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512, EVEX_CD8<64, CD8VQ>, VEX_W; //===-------------------------------------------------------------------===// -- cgit v1.1 From bad8d4ca599024de8fdc6255a4b73bb294f49239 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Thu, 22 Aug 2013 12:19:24 +0000 Subject: [ARM] Constrain some register classes in EmitAtomicBinary64 so that we pass these tests with -verify-machineinstrs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189006 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index f22c5b9..ebfa1b1 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -6360,6 +6360,8 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, MRI.constrainRegClass(destlo, &ARM::rGPRRegClass); MRI.constrainRegClass(desthi, &ARM::rGPRRegClass); MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); + MRI.constrainRegClass(vallo, &ARM::rGPRRegClass); + MRI.constrainRegClass(valhi, &ARM::rGPRRegClass); } MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); @@ -6467,6 +6469,8 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, // Store if (isThumb2) { + MRI.constrainRegClass(StoreLo, &ARM::rGPRRegClass); + MRI.constrainRegClass(StoreHi, &ARM::rGPRRegClass); AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2STREXD), storesuccess) .addReg(StoreLo).addReg(StoreHi).addReg(ptr)); } else { -- cgit v1.1 From 5f268555b967fccbfab6e7b69305d74006116927 Mon Sep 17 00:00:00 2001 From: Mihai Popa Date: Thu, 22 Aug 2013 13:16:07 +0000 Subject: Fix ARM vcvt encoding when the number of fractional bits is zero. The instruction to convert between floating point and fixed point representations takes an immediate operand for the number of fractional bits of the fixed point value. ARMARM specifies that when that number of bits is zero, the assembler should encode floating point/integer conversion instructions. This patch adds the necessary instruction aliases to achieve this behaviour. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189009 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index af4f4d1..59dea48 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -5461,6 +5461,25 @@ def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; } +def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0", + (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0", + (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0", + (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0", + (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>; + +def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0", + (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0", + (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0", + (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0", + (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>; + + // VCVT : Vector Convert Between Half-Precision and Single-Precision. def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, IIC_VUNAQ, "vcvt", "f16.f32", -- cgit v1.1 From 35eab1db2f21aee9678fe946a5d983a67285e7e4 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Thu, 22 Aug 2013 15:29:11 +0000 Subject: [ARMv8] Add CodeGen support for VSEL. This uses the ARMcmov pattern that Tim cleaned up in r188995. Thanks to Simon Tatham for his floating point help! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189024 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 94 +++++++++++++++++++++++++++++++++++++- lib/Target/ARM/ARMInstrVFP.td | 20 ++++---- 2 files changed, 105 insertions(+), 9 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index ebfa1b1..7021941 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -3178,6 +3178,61 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SelectTrue, SelectFalse, ISD::SETNE); } +static ISD::CondCode getInverseCCForVSEL(ISD::CondCode CC) { + if (CC == ISD::SETNE) + return ISD::SETEQ; + return ISD::getSetCCSwappedOperands(CC); +} + +static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, + bool &swpCmpOps, bool &swpVselOps) { + // Start by selecting the GE condition code for opcodes that return true for + // 'equality' + if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE || + CC == ISD::SETULE) + CondCode = ARMCC::GE; + + // and GT for opcodes that return false for 'equality'. + else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT || + CC == ISD::SETULT) + CondCode = ARMCC::GT; + + // Since we are constrained to GE/GT, if the opcode contains 'less', we need + // to swap the compare operands. + if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT || + CC == ISD::SETULT) + swpCmpOps = true; + + // Both GT and GE are ordered comparisons, and return false for 'unordered'. + // If we have an unordered opcode, we need to swap the operands to the VSEL + // instruction (effectively negating the condition). + // + // This also has the effect of swapping which one of 'less' or 'greater' + // returns true, so we also swap the compare operands. It also switches + // whether we return true for 'equality', so we compensate by picking the + // opposite condition code to our original choice. + if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE || + CC == ISD::SETUGT) { + swpCmpOps = !swpCmpOps; + swpVselOps = !swpVselOps; + CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT; + } + + // 'ordered' is 'anything but unordered', so use the VS condition code and + // swap the VSEL operands. + if (CC == ISD::SETO) { + CondCode = ARMCC::VS; + swpVselOps = true; + } + + // 'unordered or not equal' is 'anything but equal', so use the EQ condition + // code and swap the VSEL operands. + if (CC == ISD::SETUNE) { + CondCode = ARMCC::EQ; + swpVselOps = true; + } +} + SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDValue LHS = Op.getOperand(0); @@ -3188,15 +3243,52 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); if (LHS.getValueType() == MVT::i32) { + // Try to generate VSEL on ARMv8. + // The VSEL instruction can't use all the usual ARM condition + // codes: it only has two bits to select the condition code, so it's + // constrained to use only GE, GT, VS and EQ. + // + // To implement all the various ISD::SETXXX opcodes, we sometimes need to + // swap the operands of the previous compare instruction (effectively + // inverting the compare condition, swapping 'less' and 'greater') and + // sometimes need to swap the operands to the VSEL (which inverts the + // condition in the sense of firing whenever the previous condition didn't) + if (getSubtarget()->hasV8FP() && (TrueVal.getValueType() == MVT::f32 || + TrueVal.getValueType() == MVT::f64)) { + ARMCC::CondCodes CondCode = IntCCToARMCC(CC); + if (CondCode == ARMCC::LT || CondCode == ARMCC::LE || + CondCode == ARMCC::VC || CondCode == ARMCC::NE) { + CC = getInverseCCForVSEL(CC); + std::swap(TrueVal, FalseVal); + } + } + SDValue ARMcc; SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); - return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp); + return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, + Cmp); } ARMCC::CondCodes CondCode, CondCode2; FPCCToARMCC(CC, CondCode, CondCode2); + // Try to generate VSEL on ARMv8. + if (getSubtarget()->hasV8FP() && (TrueVal.getValueType() == MVT::f32 || + TrueVal.getValueType() == MVT::f64)) { + bool swpCmpOps = false; + bool swpVselOps = false; + checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps); + + if (CondCode == ARMCC::GT || CondCode == ARMCC::GE || + CondCode == ARMCC::VS || CondCode == ARMCC::EQ) { + if (swpCmpOps) + std::swap(LHS, RHS); + if (swpVselOps) + std::swap(TrueVal, FalseVal); + } + } + SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index c6b8bc3..b4df4d7 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -333,24 +333,28 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0, let D = VFPNeonA8Domain; } -multiclass vsel_inst opc> { - let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in { +multiclass vsel_inst opc, int CC> { + let DecoderNamespace = "VFPV8", PostEncoderMethod = "", + Uses = [CPSR], AddedComplexity = 4 in { def S : ASbInp<0b11100, opc, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), NoItinerary, !strconcat("vsel", op, ".f32\t$Sd, $Sn, $Sm"), - []>, Requires<[HasV8FP]>; + [(set SPR:$Sd, (ARMcmov SPR:$Sm, SPR:$Sn, CC))]>, + Requires<[HasV8FP]>; def D : ADbInp<0b11100, opc, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), NoItinerary, !strconcat("vsel", op, ".f64\t$Dd, $Dn, $Dm"), - []>, Requires<[HasV8FP]>; + [(set DPR:$Dd, (ARMcmov (f64 DPR:$Dm), (f64 DPR:$Dn), CC))]>, + Requires<[HasV8FP]>; } } -defm VSELGT : vsel_inst<"gt", 0b11>; -defm VSELGE : vsel_inst<"ge", 0b10>; -defm VSELEQ : vsel_inst<"eq", 0b00>; -defm VSELVS : vsel_inst<"vs", 0b01>; +// The CC constants here match ARMCC::CondCodes. +defm VSELGT : vsel_inst<"gt", 0b11, 12>; +defm VSELGE : vsel_inst<"ge", 0b10, 10>; +defm VSELEQ : vsel_inst<"eq", 0b00, 0>; +defm VSELVS : vsel_inst<"vs", 0b01, 6>; multiclass vmaxmin_inst { let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in { -- cgit v1.1 From 344cfb4db4b22f3330cbfc5ccb5e1149cd4e17e3 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 22 Aug 2013 20:21:02 +0000 Subject: R600/SI: Fix another case of illegal VGPR to SGPR copy This fixes a crash in Unigine Tropics. https://bugs.freedesktop.org/show_bug.cgi?id=68389 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189057 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIFixSGPRCopies.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIFixSGPRCopies.cpp b/lib/Target/R600/SIFixSGPRCopies.cpp index 435172a..7f07b01 100644 --- a/lib/Target/R600/SIFixSGPRCopies.cpp +++ b/lib/Target/R600/SIFixSGPRCopies.cpp @@ -143,7 +143,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { } unsigned Reg = MI.getOperand(0).getReg(); const TargetRegisterClass *RC = inferRegClass(TRI, MRI, Reg); - if (RC == &AMDGPU::VSrc_32RegClass) { + if (TRI->getCommonSubClass(RC, &AMDGPU::VReg_32RegClass)) { MRI.constrainRegClass(Reg, &AMDGPU::VReg_32RegClass); } } -- cgit v1.1 From d93969c32a6bbae3326a1f485c4c85be1cb39406 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Fri, 23 Aug 2013 02:25:47 +0000 Subject: Add an OtherPreserved field to the CalleeSaved TableGen class. This field specifies registers that are preserved across function calls, but that should not be included in the generates SaveList array. This can be used ot generate regmasks for architectures that save registers through other means, like SPARC's register windows. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189084 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Sparc/SparcCallingConv.td | 6 ++++++ lib/Target/Sparc/SparcRegisterInfo.cpp | 8 ++++++-- lib/Target/Sparc/SparcRegisterInfo.h | 1 + 3 files changed, 13 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Sparc/SparcCallingConv.td b/lib/Target/Sparc/SparcCallingConv.td index a181bcf..181165d 100644 --- a/lib/Target/Sparc/SparcCallingConv.td +++ b/lib/Target/Sparc/SparcCallingConv.td @@ -117,3 +117,9 @@ def CC_Sparc64 : CallingConv<[ // arguments whether they are passed in registers or not. CCCustom<"CC_Sparc64_Full"> ]>; + +// Callee-saved registers are handled by the register window mechanism. +def CSR : CalleeSavedRegs<(add)> { + let OtherPreserved = (add (sequence "I%u", 0, 7), + (sequence "L%u", 0, 7)); +} diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index dc97f06..0b0fe2d 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -40,8 +40,12 @@ SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st) const uint16_t* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - static const uint16_t CalleeSavedRegs[] = { 0 }; - return CalleeSavedRegs; + return CSR_SaveList; +} + +const uint32_t* +SparcRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { + return CSR_RegMask; } BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const { diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h index 6b77d4e..ae056cd 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.h +++ b/lib/Target/Sparc/SparcRegisterInfo.h @@ -32,6 +32,7 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo { /// Code Generation virtual methods... const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; + const uint32_t* getCallPreservedMask(CallingConv::ID CC) const; BitVector getReservedRegs(const MachineFunction &MF) const; -- cgit v1.1 From b58126124081a9bf8da1368441b00070ed2db232 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Fri, 23 Aug 2013 02:33:47 +0000 Subject: Use register masks on SPARC call instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189085 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Sparc/SparcISelLowering.cpp | 13 +++++++++++++ lib/Target/Sparc/SparcInstrInfo.td | 5 +---- 2 files changed, 14 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 4b0fa67..ce9cd94 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -880,6 +880,13 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) Ops.push_back(DAG.getRegister(toCallerWindow(RegsToPass[i].first), RegsToPass[i].second.getValueType())); + + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + if (InFlag.getNode()) Ops.push_back(InFlag); @@ -1112,6 +1119,12 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI, Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(CLI.CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + // Make sure the CopyToReg nodes are glued to the call instruction which // consumes the registers. if (InGlue.getNode()) diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index 2d228ea..f5aa581 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -556,10 +556,7 @@ let Uses = [FCC] in // Section B.24 - Call and Link Instruction, p. 125 // This is the only Format 1 instruction let Uses = [O6], - hasDelaySlot = 1, isCall = 1, - Defs = [O0, O1, O2, O3, O4, O5, O7, G1, G2, G3, G4, G5, G6, G7, - D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15, - ICC, FCC, Y] in { + hasDelaySlot = 1, isCall = 1 in { def CALL : InstSP<(outs), (ins calltarget:$dst, variable_ops), "call $dst", []> { bits<30> disp; -- cgit v1.1 From c73488a38ecb26340604706003e84cff7bd48ddf Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Fri, 23 Aug 2013 10:10:13 +0000 Subject: [mips][msa] Split MSA128 regset into size-specific sets containing the same registers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189095 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 926 ++++++++++++++++----------------- lib/Target/Mips/MipsRegisterInfo.td | 11 +- lib/Target/Mips/MipsSEISelLowering.cpp | 20 +- lib/Target/Mips/MipsSEISelLowering.h | 2 +- 4 files changed, 482 insertions(+), 477 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 541e3ad..24d2c91 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -815,675 +815,675 @@ class MSA_VEC_DESC_BASE, IsCommutable; + MSA128B, MSA128B>, IsCommutable; class ADD_A_H_DESC : MSA_3R_DESC_BASE<"add_a.h", int_mips_add_a_h, NoItinerary, - MSA128, MSA128>, IsCommutable; + MSA128H, MSA128H>, IsCommutable; class ADD_A_W_DESC : MSA_3R_DESC_BASE<"add_a.w", int_mips_add_a_w, NoItinerary, - MSA128, MSA128>, IsCommutable; + MSA128W, MSA128W>, IsCommutable; class ADD_A_D_DESC : MSA_3R_DESC_BASE<"add_a.d", int_mips_add_a_d, NoItinerary, - MSA128, MSA128>, IsCommutable; + MSA128D, MSA128D>, IsCommutable; class ADDS_A_B_DESC : MSA_3R_DESC_BASE<"adds_a.b", int_mips_adds_a_b, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128B, MSA128B>, IsCommutable; class ADDS_A_H_DESC : MSA_3R_DESC_BASE<"adds_a.h", int_mips_adds_a_h, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128H, MSA128H>, IsCommutable; class ADDS_A_W_DESC : MSA_3R_DESC_BASE<"adds_a.w", int_mips_adds_a_w, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128W, MSA128W>, IsCommutable; class ADDS_A_D_DESC : MSA_3R_DESC_BASE<"adds_a.d", int_mips_adds_a_d, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128D, MSA128D>, IsCommutable; class ADDS_S_B_DESC : MSA_3R_DESC_BASE<"adds_s.b", int_mips_adds_s_b, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128B, MSA128B>, IsCommutable; class ADDS_S_H_DESC : MSA_3R_DESC_BASE<"adds_s.h", int_mips_adds_s_h, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128H, MSA128H>, IsCommutable; class ADDS_S_W_DESC : MSA_3R_DESC_BASE<"adds_s.w", int_mips_adds_s_w, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128W, MSA128W>, IsCommutable; class ADDS_S_D_DESC : MSA_3R_DESC_BASE<"adds_s.d", int_mips_adds_s_d, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128D, MSA128D>, IsCommutable; class ADDS_U_B_DESC : MSA_3R_DESC_BASE<"adds_u.b", int_mips_adds_u_b, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128B, MSA128B>, IsCommutable; class ADDS_U_H_DESC : MSA_3R_DESC_BASE<"adds_u.h", int_mips_adds_u_h, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128H, MSA128H>, IsCommutable; class ADDS_U_W_DESC : MSA_3R_DESC_BASE<"adds_u.w", int_mips_adds_u_w, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128W, MSA128W>, IsCommutable; class ADDS_U_D_DESC : MSA_3R_DESC_BASE<"adds_u.d", int_mips_adds_u_d, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128D, MSA128D>, IsCommutable; class ADDV_B_DESC : MSA_3R_DESC_BASE<"addv.b", int_mips_addv_b, NoItinerary, - MSA128, MSA128>, IsCommutable; + MSA128B, MSA128B>, IsCommutable; class ADDV_H_DESC : MSA_3R_DESC_BASE<"addv.h", int_mips_addv_h, NoItinerary, - MSA128, MSA128>, IsCommutable; + MSA128H, MSA128H>, IsCommutable; class ADDV_W_DESC : MSA_3R_DESC_BASE<"addv.w", int_mips_addv_w, NoItinerary, - MSA128, MSA128>, IsCommutable; + MSA128W, MSA128W>, IsCommutable; class ADDV_D_DESC : MSA_3R_DESC_BASE<"addv.d", int_mips_addv_d, NoItinerary, - MSA128, MSA128>, IsCommutable; + MSA128D, MSA128D>, IsCommutable; class ADDVI_B_DESC : MSA_I5_DESC_BASE<"addvi.b", int_mips_addvi_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class ADDVI_H_DESC : MSA_I5_DESC_BASE<"addvi.h", int_mips_addvi_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class ADDVI_W_DESC : MSA_I5_DESC_BASE<"addvi.w", int_mips_addvi_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class ADDVI_D_DESC : MSA_I5_DESC_BASE<"addvi.d", int_mips_addvi_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class AND_V_DESC : MSA_VEC_DESC_BASE<"and.v", int_mips_and_v, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class ANDI_B_DESC : MSA_I8_DESC_BASE<"andi.b", int_mips_andi_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class ASUB_S_B_DESC : MSA_3R_DESC_BASE<"asub_s.b", int_mips_asub_s_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class ASUB_S_H_DESC : MSA_3R_DESC_BASE<"asub_s.h", int_mips_asub_s_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class ASUB_S_W_DESC : MSA_3R_DESC_BASE<"asub_s.w", int_mips_asub_s_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class ASUB_S_D_DESC : MSA_3R_DESC_BASE<"asub_s.d", int_mips_asub_s_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class ASUB_U_B_DESC : MSA_3R_DESC_BASE<"asub_u.b", int_mips_asub_u_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class ASUB_U_H_DESC : MSA_3R_DESC_BASE<"asub_u.h", int_mips_asub_u_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class ASUB_U_W_DESC : MSA_3R_DESC_BASE<"asub_u.w", int_mips_asub_u_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class ASUB_U_D_DESC : MSA_3R_DESC_BASE<"asub_u.d", int_mips_asub_u_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class AVE_S_B_DESC : MSA_3R_DESC_BASE<"ave_s.b", int_mips_ave_s_b, NoItinerary, - MSA128, MSA128>, IsCommutable; + MSA128B, MSA128B>, IsCommutable; class AVE_S_H_DESC : MSA_3R_DESC_BASE<"ave_s.h", int_mips_ave_s_h, NoItinerary, - MSA128, MSA128>, IsCommutable; + MSA128H, MSA128H>, IsCommutable; class AVE_S_W_DESC : MSA_3R_DESC_BASE<"ave_s.w", int_mips_ave_s_w, NoItinerary, - MSA128, MSA128>, IsCommutable; + MSA128W, MSA128W>, IsCommutable; class AVE_S_D_DESC : MSA_3R_DESC_BASE<"ave_s.d", int_mips_ave_s_d, NoItinerary, - MSA128, MSA128>, IsCommutable; + MSA128D, MSA128D>, IsCommutable; class AVE_U_B_DESC : MSA_3R_DESC_BASE<"ave_u.b", int_mips_ave_u_b, NoItinerary, - MSA128, MSA128>, IsCommutable; + MSA128B, MSA128B>, IsCommutable; class AVE_U_H_DESC : MSA_3R_DESC_BASE<"ave_u.h", int_mips_ave_u_h, NoItinerary, - MSA128, MSA128>, IsCommutable; + MSA128H, MSA128H>, IsCommutable; class AVE_U_W_DESC : MSA_3R_DESC_BASE<"ave_u.w", int_mips_ave_u_w, NoItinerary, - MSA128, MSA128>, IsCommutable; + MSA128W, MSA128W>, IsCommutable; class AVE_U_D_DESC : MSA_3R_DESC_BASE<"ave_u.d", int_mips_ave_u_d, NoItinerary, - MSA128, MSA128>, IsCommutable; + MSA128D, MSA128D>, IsCommutable; class AVER_S_B_DESC : MSA_3R_DESC_BASE<"aver_s.b", int_mips_aver_s_b, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128B, MSA128B>, IsCommutable; class AVER_S_H_DESC : MSA_3R_DESC_BASE<"aver_s.h", int_mips_aver_s_h, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128H, MSA128H>, IsCommutable; class AVER_S_W_DESC : MSA_3R_DESC_BASE<"aver_s.w", int_mips_aver_s_w, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128W, MSA128W>, IsCommutable; class AVER_S_D_DESC : MSA_3R_DESC_BASE<"aver_s.d", int_mips_aver_s_d, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128D, MSA128D>, IsCommutable; class AVER_U_B_DESC : MSA_3R_DESC_BASE<"aver_u.b", int_mips_aver_u_b, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128B, MSA128B>, IsCommutable; class AVER_U_H_DESC : MSA_3R_DESC_BASE<"aver_u.h", int_mips_aver_u_h, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128H, MSA128H>, IsCommutable; class AVER_U_W_DESC : MSA_3R_DESC_BASE<"aver_u.w", int_mips_aver_u_w, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128W, MSA128W>, IsCommutable; class AVER_U_D_DESC : MSA_3R_DESC_BASE<"aver_u.d", int_mips_aver_u_d, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128D, MSA128D>, IsCommutable; class BCLR_B_DESC : MSA_3R_DESC_BASE<"bclr.b", int_mips_bclr_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class BCLR_H_DESC : MSA_3R_DESC_BASE<"bclr.h", int_mips_bclr_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class BCLR_W_DESC : MSA_3R_DESC_BASE<"bclr.w", int_mips_bclr_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class BCLR_D_DESC : MSA_3R_DESC_BASE<"bclr.d", int_mips_bclr_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class BCLRI_B_DESC : MSA_BIT_B_DESC_BASE<"bclri.b", int_mips_bclri_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class BCLRI_H_DESC : MSA_BIT_H_DESC_BASE<"bclri.h", int_mips_bclri_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class BCLRI_W_DESC : MSA_BIT_W_DESC_BASE<"bclri.w", int_mips_bclri_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class BCLRI_D_DESC : MSA_BIT_D_DESC_BASE<"bclri.d", int_mips_bclri_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class BINSL_B_DESC : MSA_3R_DESC_BASE<"binsl.b", int_mips_binsl_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class BINSL_H_DESC : MSA_3R_DESC_BASE<"binsl.h", int_mips_binsl_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class BINSL_W_DESC : MSA_3R_DESC_BASE<"binsl.w", int_mips_binsl_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class BINSL_D_DESC : MSA_3R_DESC_BASE<"binsl.d", int_mips_binsl_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class BINSLI_B_DESC : MSA_BIT_B_DESC_BASE<"binsli.b", int_mips_binsli_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class BINSLI_H_DESC : MSA_BIT_H_DESC_BASE<"binsli.h", int_mips_binsli_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class BINSLI_W_DESC : MSA_BIT_W_DESC_BASE<"binsli.w", int_mips_binsli_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class BINSLI_D_DESC : MSA_BIT_D_DESC_BASE<"binsli.d", int_mips_binsli_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class BINSR_B_DESC : MSA_3R_DESC_BASE<"binsr.b", int_mips_binsr_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class BINSR_H_DESC : MSA_3R_DESC_BASE<"binsr.h", int_mips_binsr_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class BINSR_W_DESC : MSA_3R_DESC_BASE<"binsr.w", int_mips_binsr_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class BINSR_D_DESC : MSA_3R_DESC_BASE<"binsr.d", int_mips_binsr_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class BINSRI_B_DESC : MSA_BIT_B_DESC_BASE<"binsri.b", int_mips_binsri_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class BINSRI_H_DESC : MSA_BIT_H_DESC_BASE<"binsri.h", int_mips_binsri_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class BINSRI_W_DESC : MSA_BIT_W_DESC_BASE<"binsri.w", int_mips_binsri_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class BINSRI_D_DESC : MSA_BIT_D_DESC_BASE<"binsri.d", int_mips_binsri_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class BMNZ_V_DESC : MSA_VEC_DESC_BASE<"bmnz.v", int_mips_bmnz_v, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class BMNZI_B_DESC : MSA_I8_DESC_BASE<"bmnzi.b", int_mips_bmnzi_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class BMZ_V_DESC : MSA_VEC_DESC_BASE<"bmz.v", int_mips_bmz_v, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class BMZI_B_DESC : MSA_I8_DESC_BASE<"bmzi.b", int_mips_bmzi_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class BNEG_B_DESC : MSA_3R_DESC_BASE<"bneg.b", int_mips_bneg_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class BNEG_H_DESC : MSA_3R_DESC_BASE<"bneg.h", int_mips_bneg_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class BNEG_W_DESC : MSA_3R_DESC_BASE<"bneg.w", int_mips_bneg_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class BNEG_D_DESC : MSA_3R_DESC_BASE<"bneg.d", int_mips_bneg_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class BNEGI_B_DESC : MSA_BIT_B_DESC_BASE<"bnegi.b", int_mips_bnegi_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class BNEGI_H_DESC : MSA_BIT_H_DESC_BASE<"bnegi.h", int_mips_bnegi_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class BNEGI_W_DESC : MSA_BIT_W_DESC_BASE<"bnegi.w", int_mips_bnegi_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class BNEGI_D_DESC : MSA_BIT_D_DESC_BASE<"bnegi.d", int_mips_bnegi_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class BSEL_V_DESC : MSA_VEC_DESC_BASE<"bsel.v", int_mips_bsel_v, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class BSELI_B_DESC : MSA_I8_DESC_BASE<"bseli.b", int_mips_bseli_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class BSET_B_DESC : MSA_3R_DESC_BASE<"bset.b", int_mips_bset_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class BSET_H_DESC : MSA_3R_DESC_BASE<"bset.h", int_mips_bset_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class BSET_W_DESC : MSA_3R_DESC_BASE<"bset.w", int_mips_bset_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class BSET_D_DESC : MSA_3R_DESC_BASE<"bset.d", int_mips_bset_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class BSETI_B_DESC : MSA_BIT_B_DESC_BASE<"bseti.b", int_mips_bseti_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class BSETI_H_DESC : MSA_BIT_H_DESC_BASE<"bseti.h", int_mips_bseti_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class BSETI_W_DESC : MSA_BIT_W_DESC_BASE<"bseti.w", int_mips_bseti_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class BSETI_D_DESC : MSA_BIT_D_DESC_BASE<"bseti.d", int_mips_bseti_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class CEQ_B_DESC : MSA_3R_DESC_BASE<"ceq.b", int_mips_ceq_b, NoItinerary, - MSA128, MSA128>, IsCommutable; + MSA128B, MSA128B>, IsCommutable; class CEQ_H_DESC : MSA_3R_DESC_BASE<"ceq.h", int_mips_ceq_h, NoItinerary, - MSA128, MSA128>, IsCommutable; + MSA128H, MSA128H>, IsCommutable; class CEQ_W_DESC : MSA_3R_DESC_BASE<"ceq.w", int_mips_ceq_w, NoItinerary, - MSA128, MSA128>, IsCommutable; + MSA128W, MSA128W>, IsCommutable; class CEQ_D_DESC : MSA_3R_DESC_BASE<"ceq.d", int_mips_ceq_d, NoItinerary, - MSA128, MSA128>, IsCommutable; + MSA128D, MSA128D>, IsCommutable; class CEQI_B_DESC : MSA_SI5_DESC_BASE<"ceqi.b", int_mips_ceqi_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class CEQI_H_DESC : MSA_SI5_DESC_BASE<"ceqi.h", int_mips_ceqi_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class CEQI_W_DESC : MSA_SI5_DESC_BASE<"ceqi.w", int_mips_ceqi_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class CEQI_D_DESC : MSA_SI5_DESC_BASE<"ceqi.d", int_mips_ceqi_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class CLE_S_B_DESC : MSA_3R_DESC_BASE<"cle_s.b", int_mips_cle_s_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class CLE_S_H_DESC : MSA_3R_DESC_BASE<"cle_s.h", int_mips_cle_s_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class CLE_S_W_DESC : MSA_3R_DESC_BASE<"cle_s.w", int_mips_cle_s_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class CLE_S_D_DESC : MSA_3R_DESC_BASE<"cle_s.d", int_mips_cle_s_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class CLE_U_B_DESC : MSA_3R_DESC_BASE<"cle_u.b", int_mips_cle_u_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class CLE_U_H_DESC : MSA_3R_DESC_BASE<"cle_u.h", int_mips_cle_u_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class CLE_U_W_DESC : MSA_3R_DESC_BASE<"cle_u.w", int_mips_cle_u_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class CLE_U_D_DESC : MSA_3R_DESC_BASE<"cle_u.d", int_mips_cle_u_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class CLEI_S_B_DESC : MSA_SI5_DESC_BASE<"clei_s.b", int_mips_clei_s_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class CLEI_S_H_DESC : MSA_SI5_DESC_BASE<"clei_s.h", int_mips_clei_s_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class CLEI_S_W_DESC : MSA_SI5_DESC_BASE<"clei_s.w", int_mips_clei_s_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class CLEI_S_D_DESC : MSA_SI5_DESC_BASE<"clei_s.d", int_mips_clei_s_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class CLEI_U_B_DESC : MSA_SI5_DESC_BASE<"clei_u.b", int_mips_clei_u_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class CLEI_U_H_DESC : MSA_SI5_DESC_BASE<"clei_u.h", int_mips_clei_u_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class CLEI_U_W_DESC : MSA_SI5_DESC_BASE<"clei_u.w", int_mips_clei_u_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class CLEI_U_D_DESC : MSA_SI5_DESC_BASE<"clei_u.d", int_mips_clei_u_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class CLT_S_B_DESC : MSA_3R_DESC_BASE<"clt_s.b", int_mips_clt_s_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class CLT_S_H_DESC : MSA_3R_DESC_BASE<"clt_s.h", int_mips_clt_s_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class CLT_S_W_DESC : MSA_3R_DESC_BASE<"clt_s.w", int_mips_clt_s_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class CLT_S_D_DESC : MSA_3R_DESC_BASE<"clt_s.d", int_mips_clt_s_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class CLT_U_B_DESC : MSA_3R_DESC_BASE<"clt_u.b", int_mips_clt_u_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class CLT_U_H_DESC : MSA_3R_DESC_BASE<"clt_u.h", int_mips_clt_u_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class CLT_U_W_DESC : MSA_3R_DESC_BASE<"clt_u.w", int_mips_clt_u_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class CLT_U_D_DESC : MSA_3R_DESC_BASE<"clt_u.d", int_mips_clt_u_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class CLTI_S_B_DESC : MSA_SI5_DESC_BASE<"clti_s.b", int_mips_clti_s_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class CLTI_S_H_DESC : MSA_SI5_DESC_BASE<"clti_s.h", int_mips_clti_s_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class CLTI_S_W_DESC : MSA_SI5_DESC_BASE<"clti_s.w", int_mips_clti_s_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class CLTI_S_D_DESC : MSA_SI5_DESC_BASE<"clti_s.d", int_mips_clti_s_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class CLTI_U_B_DESC : MSA_SI5_DESC_BASE<"clti_u.b", int_mips_clti_u_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class CLTI_U_H_DESC : MSA_SI5_DESC_BASE<"clti_u.h", int_mips_clti_u_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class CLTI_U_W_DESC : MSA_SI5_DESC_BASE<"clti_u.w", int_mips_clti_u_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class CLTI_U_D_DESC : MSA_SI5_DESC_BASE<"clti_u.d", int_mips_clti_u_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class COPY_S_B_DESC : MSA_COPY_DESC_BASE<"copy_s.b", int_mips_copy_s_b, - NoItinerary, GPR32, MSA128>; + NoItinerary, GPR32, MSA128B>; class COPY_S_H_DESC : MSA_COPY_DESC_BASE<"copy_s.h", int_mips_copy_s_h, - NoItinerary, GPR32, MSA128>; + NoItinerary, GPR32, MSA128H>; class COPY_S_W_DESC : MSA_COPY_DESC_BASE<"copy_s.w", int_mips_copy_s_w, - NoItinerary, GPR32, MSA128>; + NoItinerary, GPR32, MSA128W>; class COPY_U_B_DESC : MSA_COPY_DESC_BASE<"copy_u.b", int_mips_copy_u_b, - NoItinerary, GPR32, MSA128>; + NoItinerary, GPR32, MSA128B>; class COPY_U_H_DESC : MSA_COPY_DESC_BASE<"copy_u.h", int_mips_copy_u_h, - NoItinerary, GPR32, MSA128>; + NoItinerary, GPR32, MSA128H>; class COPY_U_W_DESC : MSA_COPY_DESC_BASE<"copy_u.w", int_mips_copy_u_w, - NoItinerary, GPR32, MSA128>; + NoItinerary, GPR32, MSA128W>; class DIV_S_B_DESC : MSA_3R_DESC_BASE<"div_s.b", int_mips_div_s_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class DIV_S_H_DESC : MSA_3R_DESC_BASE<"div_s.h", int_mips_div_s_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class DIV_S_W_DESC : MSA_3R_DESC_BASE<"div_s.w", int_mips_div_s_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class DIV_S_D_DESC : MSA_3R_DESC_BASE<"div_s.d", int_mips_div_s_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class DIV_U_B_DESC : MSA_3R_DESC_BASE<"div_u.b", int_mips_div_u_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class DIV_U_H_DESC : MSA_3R_DESC_BASE<"div_u.h", int_mips_div_u_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class DIV_U_W_DESC : MSA_3R_DESC_BASE<"div_u.w", int_mips_div_u_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class DIV_U_D_DESC : MSA_3R_DESC_BASE<"div_u.d", int_mips_div_u_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class DOTP_S_B_DESC : MSA_3R_DESC_BASE<"dotp_s.b", int_mips_dotp_s_b, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128B, MSA128B>, IsCommutable; class DOTP_S_H_DESC : MSA_3R_DESC_BASE<"dotp_s.h", int_mips_dotp_s_h, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128H, MSA128H>, IsCommutable; class DOTP_S_W_DESC : MSA_3R_DESC_BASE<"dotp_s.w", int_mips_dotp_s_w, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128W, MSA128W>, IsCommutable; class DOTP_S_D_DESC : MSA_3R_DESC_BASE<"dotp_s.d", int_mips_dotp_s_d, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128D, MSA128D>, IsCommutable; class DOTP_U_B_DESC : MSA_3R_DESC_BASE<"dotp_u.b", int_mips_dotp_u_b, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128B, MSA128B>, IsCommutable; class DOTP_U_H_DESC : MSA_3R_DESC_BASE<"dotp_u.h", int_mips_dotp_u_h, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128H, MSA128H>, IsCommutable; class DOTP_U_W_DESC : MSA_3R_DESC_BASE<"dotp_u.w", int_mips_dotp_u_w, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128W, MSA128W>, IsCommutable; class DOTP_U_D_DESC : MSA_3R_DESC_BASE<"dotp_u.d", int_mips_dotp_u_d, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128D, MSA128D>, IsCommutable; class DPADD_S_H_DESC : MSA_3R_4R_DESC_BASE<"dpadd_s.h", int_mips_dpadd_s_h, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128H, MSA128B>, IsCommutable; class DPADD_S_W_DESC : MSA_3R_4R_DESC_BASE<"dpadd_s.w", int_mips_dpadd_s_w, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128W, MSA128H>, IsCommutable; class DPADD_S_D_DESC : MSA_3R_4R_DESC_BASE<"dpadd_s.d", int_mips_dpadd_s_d, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128D, MSA128W>, IsCommutable; class DPADD_U_H_DESC : MSA_3R_4R_DESC_BASE<"dpadd_u.h", int_mips_dpadd_u_h, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128H, MSA128B>, IsCommutable; class DPADD_U_W_DESC : MSA_3R_4R_DESC_BASE<"dpadd_u.w", int_mips_dpadd_u_w, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128W, MSA128H>, IsCommutable; class DPADD_U_D_DESC : MSA_3R_4R_DESC_BASE<"dpadd_u.d", int_mips_dpadd_u_d, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128D, MSA128W>, IsCommutable; class DPSUB_S_H_DESC : MSA_3R_4R_DESC_BASE<"dpsub_s.h", int_mips_dpsub_s_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128B>; class DPSUB_S_W_DESC : MSA_3R_4R_DESC_BASE<"dpsub_s.w", int_mips_dpsub_s_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128H>; class DPSUB_S_D_DESC : MSA_3R_4R_DESC_BASE<"dpsub_s.d", int_mips_dpsub_s_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128W>; class DPSUB_U_H_DESC : MSA_3R_4R_DESC_BASE<"dpsub_u.h", int_mips_dpsub_u_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128B>; class DPSUB_U_W_DESC : MSA_3R_4R_DESC_BASE<"dpsub_u.w", int_mips_dpsub_u_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128H>; class DPSUB_U_D_DESC : MSA_3R_4R_DESC_BASE<"dpsub_u.d", int_mips_dpsub_u_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128W>; class FADD_W_DESC : MSA_3RF_DESC_BASE<"fadd.w", int_mips_fadd_w, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128W, MSA128W>, IsCommutable; class FADD_D_DESC : MSA_3RF_DESC_BASE<"fadd.d", int_mips_fadd_d, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128D, MSA128D>, IsCommutable; class FCEQ_W_DESC : MSA_3RF_DESC_BASE<"fceq.w", int_mips_fceq_w, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128W, MSA128W>, IsCommutable; class FCEQ_D_DESC : MSA_3RF_DESC_BASE<"fceq.d", int_mips_fceq_d, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128D, MSA128D>, IsCommutable; class FCLASS_W_DESC : MSA_2RF_DESC_BASE<"fclass.w", int_mips_fclass_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class FCLASS_D_DESC : MSA_2RF_DESC_BASE<"fclass.d", int_mips_fclass_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class FCLE_W_DESC : MSA_3RF_DESC_BASE<"fcle.w", int_mips_fcle_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class FCLE_D_DESC : MSA_3RF_DESC_BASE<"fcle.d", int_mips_fcle_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class FCLT_W_DESC : MSA_3RF_DESC_BASE<"fclt.w", int_mips_fclt_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class FCLT_D_DESC : MSA_3RF_DESC_BASE<"fclt.d", int_mips_fclt_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class FCNE_W_DESC : MSA_3RF_DESC_BASE<"fcne.w", int_mips_fcne_w, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128W, MSA128W>, IsCommutable; class FCNE_D_DESC : MSA_3RF_DESC_BASE<"fcne.d", int_mips_fcne_d, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128D, MSA128D>, IsCommutable; class FCUN_W_DESC : MSA_3RF_DESC_BASE<"fcun.w", int_mips_fcun_w, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128W, MSA128W>, IsCommutable; class FCUN_D_DESC : MSA_3RF_DESC_BASE<"fcun.d", int_mips_fcun_d, - NoItinerary, MSA128, MSA128>, + NoItinerary, MSA128D, MSA128D>, IsCommutable; class FDIV_W_DESC : MSA_3RF_DESC_BASE<"fdiv.w", int_mips_fdiv_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class FDIV_D_DESC : MSA_3RF_DESC_BASE<"fdiv.d", int_mips_fdiv_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class FEXDO_H_DESC : MSA_3RF_DESC_BASE<"fexdo.h", int_mips_fexdo_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128W>; class FEXDO_W_DESC : MSA_3RF_DESC_BASE<"fexdo.w", int_mips_fexdo_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128D>; class FEXP2_W_DESC : MSA_3RF_DESC_BASE<"fexp2.w", int_mips_fexp2_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class FEXP2_D_DESC : MSA_3RF_DESC_BASE<"fexp2.d", int_mips_fexp2_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class FEXUPL_W_DESC : MSA_2RF_DESC_BASE<"fexupl.w", int_mips_fexupl_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128H>; class FEXUPL_D_DESC : MSA_2RF_DESC_BASE<"fexupl.d", int_mips_fexupl_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128W>; class FEXUPR_W_DESC : MSA_2RF_DESC_BASE<"fexupr.w", int_mips_fexupr_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128H>; class FEXUPR_D_DESC : MSA_2RF_DESC_BASE<"fexupr.d", int_mips_fexupr_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128W>; class FFINT_S_W_DESC : MSA_2RF_DESC_BASE<"ffint_s.w", int_mips_ffint_s_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class FFINT_S_D_DESC : MSA_2RF_DESC_BASE<"ffint_s.d", int_mips_ffint_s_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class FFINT_U_W_DESC : MSA_2RF_DESC_BASE<"ffint_u.w", int_mips_ffint_u_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class FFINT_U_D_DESC : MSA_2RF_DESC_BASE<"ffint_u.d", int_mips_ffint_u_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class FFQL_W_DESC : MSA_2RF_DESC_BASE<"ffql.w", int_mips_ffql_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128H>; class FFQL_D_DESC : MSA_2RF_DESC_BASE<"ffql.d", int_mips_ffql_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128W>; class FFQR_W_DESC : MSA_2RF_DESC_BASE<"ffqr.w", int_mips_ffqr_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128H>; class FFQR_D_DESC : MSA_2RF_DESC_BASE<"ffqr.d", int_mips_ffqr_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128W>; class FILL_B_DESC : MSA_2R_DESC_BASE<"fill.b", int_mips_fill_b, - NoItinerary, MSA128, GPR32>; + NoItinerary, MSA128B, GPR32>; class FILL_H_DESC : MSA_2R_DESC_BASE<"fill.h", int_mips_fill_h, - NoItinerary, MSA128, GPR32>; + NoItinerary, MSA128H, GPR32>; class FILL_W_DESC : MSA_2R_DESC_BASE<"fill.w", int_mips_fill_w, - NoItinerary, MSA128, GPR32>; + NoItinerary, MSA128W, GPR32>; class FLOG2_W_DESC : MSA_2RF_DESC_BASE<"flog2.w", int_mips_flog2_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class FLOG2_D_DESC : MSA_2RF_DESC_BASE<"flog2.d", int_mips_flog2_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class FMADD_W_DESC : MSA_3RF_4RF_DESC_BASE<"fmadd.w", int_mips_fmadd_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class FMADD_D_DESC : MSA_3RF_4RF_DESC_BASE<"fmadd.d", int_mips_fmadd_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class FMAX_W_DESC : MSA_3RF_DESC_BASE<"fmax.w", int_mips_fmax_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class FMAX_D_DESC : MSA_3RF_DESC_BASE<"fmax.d", int_mips_fmax_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class FMAX_A_W_DESC : MSA_3RF_DESC_BASE<"fmax_a.w", int_mips_fmax_a_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class FMAX_A_D_DESC : MSA_3RF_DESC_BASE<"fmax_a.d", int_mips_fmax_a_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class FMIN_W_DESC : MSA_3RF_DESC_BASE<"fmin.w", int_mips_fmin_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class FMIN_D_DESC : MSA_3RF_DESC_BASE<"fmin.d", int_mips_fmin_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class FMIN_A_W_DESC : MSA_3RF_DESC_BASE<"fmin_a.w", int_mips_fmin_a_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class FMIN_A_D_DESC : MSA_3RF_DESC_BASE<"fmin_a.d", int_mips_fmin_a_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class FMSUB_W_DESC : MSA_3RF_4RF_DESC_BASE<"fmsub.w", int_mips_fmsub_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class FMSUB_D_DESC : MSA_3RF_4RF_DESC_BASE<"fmsub.d", int_mips_fmsub_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class FMUL_W_DESC : MSA_3RF_DESC_BASE<"fmul.w", int_mips_fmul_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class FMUL_D_DESC : MSA_3RF_DESC_BASE<"fmul.d", int_mips_fmul_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class FRINT_W_DESC : MSA_2RF_DESC_BASE<"frint.w", int_mips_frint_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class FRINT_D_DESC : MSA_2RF_DESC_BASE<"frint.d", int_mips_frint_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class FRCP_W_DESC : MSA_2RF_DESC_BASE<"frcp.w", int_mips_frcp_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class FRCP_D_DESC : MSA_2RF_DESC_BASE<"frcp.d", int_mips_frcp_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class FRSQRT_W_DESC : MSA_2RF_DESC_BASE<"frsqrt.w", int_mips_frsqrt_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class FRSQRT_D_DESC : MSA_2RF_DESC_BASE<"frsqrt.d", int_mips_frsqrt_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class FSEQ_W_DESC : MSA_3RF_DESC_BASE<"fseq.w", int_mips_fseq_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class FSEQ_D_DESC : MSA_3RF_DESC_BASE<"fseq.d", int_mips_fseq_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class FSLE_W_DESC : MSA_3RF_DESC_BASE<"fsle.w", int_mips_fsle_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class FSLE_D_DESC : MSA_3RF_DESC_BASE<"fsle.d", int_mips_fsle_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class FSLT_W_DESC : MSA_3RF_DESC_BASE<"fslt.w", int_mips_fslt_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class FSLT_D_DESC : MSA_3RF_DESC_BASE<"fslt.d", int_mips_fslt_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class FSNE_W_DESC : MSA_3RF_DESC_BASE<"fsne.w", int_mips_fsne_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class FSNE_D_DESC : MSA_3RF_DESC_BASE<"fsne.d", int_mips_fsne_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class FSQRT_W_DESC : MSA_2RF_DESC_BASE<"fsqrt.w", int_mips_fsqrt_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class FSQRT_D_DESC : MSA_2RF_DESC_BASE<"fsqrt.d", int_mips_fsqrt_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class FSUB_W_DESC : MSA_3RF_DESC_BASE<"fsub.w", int_mips_fsub_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class FSUB_D_DESC : MSA_3RF_DESC_BASE<"fsub.d", int_mips_fsub_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class FTINT_S_W_DESC : MSA_2RF_DESC_BASE<"ftint_s.w", int_mips_ftint_s_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class FTINT_S_D_DESC : MSA_2RF_DESC_BASE<"ftint_s.d", int_mips_ftint_s_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class FTINT_U_W_DESC : MSA_2RF_DESC_BASE<"ftint_u.w", int_mips_ftint_u_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class FTINT_U_D_DESC : MSA_2RF_DESC_BASE<"ftint_u.d", int_mips_ftint_u_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class FTQ_H_DESC : MSA_3RF_DESC_BASE<"ftq.h", int_mips_ftq_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128W>; class FTQ_W_DESC : MSA_3RF_DESC_BASE<"ftq.w", int_mips_ftq_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128D>; class ILVEV_B_DESC : MSA_3R_DESC_BASE<"ilvev.b", int_mips_ilvev_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class ILVEV_H_DESC : MSA_3R_DESC_BASE<"ilvev.h", int_mips_ilvev_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class ILVEV_W_DESC : MSA_3R_DESC_BASE<"ilvev.w", int_mips_ilvev_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class ILVEV_D_DESC : MSA_3R_DESC_BASE<"ilvev.d", int_mips_ilvev_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class ILVL_B_DESC : MSA_3R_DESC_BASE<"ilvl.b", int_mips_ilvl_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class ILVL_H_DESC : MSA_3R_DESC_BASE<"ilvl.h", int_mips_ilvl_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class ILVL_W_DESC : MSA_3R_DESC_BASE<"ilvl.w", int_mips_ilvl_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class ILVL_D_DESC : MSA_3R_DESC_BASE<"ilvl.d", int_mips_ilvl_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class ILVOD_B_DESC : MSA_3R_DESC_BASE<"ilvod.b", int_mips_ilvod_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class ILVOD_H_DESC : MSA_3R_DESC_BASE<"ilvod.h", int_mips_ilvod_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class ILVOD_W_DESC : MSA_3R_DESC_BASE<"ilvod.w", int_mips_ilvod_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class ILVOD_D_DESC : MSA_3R_DESC_BASE<"ilvod.d", int_mips_ilvod_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class ILVR_B_DESC : MSA_3R_DESC_BASE<"ilvr.b", int_mips_ilvr_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class ILVR_H_DESC : MSA_3R_DESC_BASE<"ilvr.h", int_mips_ilvr_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class ILVR_W_DESC : MSA_3R_DESC_BASE<"ilvr.w", int_mips_ilvr_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class ILVR_D_DESC : MSA_3R_DESC_BASE<"ilvr.d", int_mips_ilvr_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class INSERT_B_DESC : MSA_INSERT_DESC_BASE<"insert.b", int_mips_insert_b, - NoItinerary, MSA128, GPR32>; + NoItinerary, MSA128B, GPR32>; class INSERT_H_DESC : MSA_INSERT_DESC_BASE<"insert.h", int_mips_insert_h, - NoItinerary, MSA128, GPR32>; + NoItinerary, MSA128H, GPR32>; class INSERT_W_DESC : MSA_INSERT_DESC_BASE<"insert.w", int_mips_insert_w, - NoItinerary, MSA128, GPR32>; + NoItinerary, MSA128W, GPR32>; class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", int_mips_insve_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", int_mips_insve_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", int_mips_insve_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", int_mips_insve_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class LD_DESC_BASE; -class LD_H_DESC : LD_DESC_BASE<"ld.h", load, v8i16, NoItinerary, MSA128>; -class LD_W_DESC : LD_DESC_BASE<"ld.w", load, v4i32, NoItinerary, MSA128>; -class LD_D_DESC : LD_DESC_BASE<"ld.d", load, v2i64, NoItinerary, MSA128>; +class LD_B_DESC : LD_DESC_BASE<"ld.b", load, v16i8, NoItinerary, MSA128B>; +class LD_H_DESC : LD_DESC_BASE<"ld.h", load, v8i16, NoItinerary, MSA128H>; +class LD_W_DESC : LD_DESC_BASE<"ld.w", load, v4i32, NoItinerary, MSA128W>; +class LD_D_DESC : LD_DESC_BASE<"ld.d", load, v2i64, NoItinerary, MSA128D>; class LDI_B_DESC : MSA_I10_DESC_BASE<"ldi.b", int_mips_ldi_b, - NoItinerary, MSA128>; + NoItinerary, MSA128B>; class LDI_H_DESC : MSA_I10_DESC_BASE<"ldi.h", int_mips_ldi_h, - NoItinerary, MSA128>; + NoItinerary, MSA128H>; class LDI_W_DESC : MSA_I10_DESC_BASE<"ldi.w", int_mips_ldi_w, - NoItinerary, MSA128>; + NoItinerary, MSA128W>; class LDI_D_DESC : MSA_I10_DESC_BASE<"ldi.d", int_mips_ldi_d, - NoItinerary, MSA128>; + NoItinerary, MSA128D>; class MADD_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"madd_q.h", int_mips_madd_q_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class MADD_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"madd_q.w", int_mips_madd_q_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class MADDR_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"maddr_q.h", int_mips_maddr_q_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class MADDR_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"maddr_q.w", int_mips_maddr_q_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class MADDV_B_DESC : MSA_3R_4R_DESC_BASE<"maddv.b", int_mips_maddv_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class MADDV_H_DESC : MSA_3R_4R_DESC_BASE<"maddv.h", int_mips_maddv_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class MADDV_W_DESC : MSA_3R_4R_DESC_BASE<"maddv.w", int_mips_maddv_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class MADDV_D_DESC : MSA_3R_4R_DESC_BASE<"maddv.d", int_mips_maddv_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class MAX_A_B_DESC : MSA_3R_DESC_BASE<"max_a.b", int_mips_max_a_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class MAX_A_H_DESC : MSA_3R_DESC_BASE<"max_a.h", int_mips_max_a_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class MAX_A_W_DESC : MSA_3R_DESC_BASE<"max_a.w", int_mips_max_a_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class MAX_A_D_DESC : MSA_3R_DESC_BASE<"max_a.d", int_mips_max_a_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class MAX_S_B_DESC : MSA_3R_DESC_BASE<"max_s.b", int_mips_max_s_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class MAX_S_H_DESC : MSA_3R_DESC_BASE<"max_s.h", int_mips_max_s_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class MAX_S_W_DESC : MSA_3R_DESC_BASE<"max_s.w", int_mips_max_s_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class MAX_S_D_DESC : MSA_3R_DESC_BASE<"max_s.d", int_mips_max_s_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class MAX_U_B_DESC : MSA_3R_DESC_BASE<"max_u.b", int_mips_max_u_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class MAX_U_H_DESC : MSA_3R_DESC_BASE<"max_u.h", int_mips_max_u_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class MAX_U_W_DESC : MSA_3R_DESC_BASE<"max_u.w", int_mips_max_u_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class MAX_U_D_DESC : MSA_3R_DESC_BASE<"max_u.d", int_mips_max_u_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class MAXI_S_B_DESC : MSA_I5_DESC_BASE<"maxi_s.b", int_mips_maxi_s_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class MAXI_S_H_DESC : MSA_I5_DESC_BASE<"maxi_s.h", int_mips_maxi_s_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class MAXI_S_W_DESC : MSA_I5_DESC_BASE<"maxi_s.w", int_mips_maxi_s_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class MAXI_S_D_DESC : MSA_I5_DESC_BASE<"maxi_s.d", int_mips_maxi_s_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class MAXI_U_B_DESC : MSA_I5_DESC_BASE<"maxi_u.b", int_mips_maxi_u_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class MAXI_U_H_DESC : MSA_I5_DESC_BASE<"maxi_u.h", int_mips_maxi_u_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class MAXI_U_W_DESC : MSA_I5_DESC_BASE<"maxi_u.w", int_mips_maxi_u_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class MAXI_U_D_DESC : MSA_I5_DESC_BASE<"maxi_u.d", int_mips_maxi_u_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class MIN_A_B_DESC : MSA_3R_DESC_BASE<"min_a.b", int_mips_min_a_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class MIN_A_H_DESC : MSA_3R_DESC_BASE<"min_a.h", int_mips_min_a_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class MIN_A_W_DESC : MSA_3R_DESC_BASE<"min_a.w", int_mips_min_a_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class MIN_A_D_DESC : MSA_3R_DESC_BASE<"min_a.d", int_mips_min_a_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class MIN_S_B_DESC : MSA_3R_DESC_BASE<"min_s.b", int_mips_min_s_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class MIN_S_H_DESC : MSA_3R_DESC_BASE<"min_s.h", int_mips_min_s_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class MIN_S_W_DESC : MSA_3R_DESC_BASE<"min_s.w", int_mips_min_s_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class MIN_S_D_DESC : MSA_3R_DESC_BASE<"min_s.d", int_mips_min_s_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class MIN_U_B_DESC : MSA_3R_DESC_BASE<"min_u.b", int_mips_min_u_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class MIN_U_H_DESC : MSA_3R_DESC_BASE<"min_u.h", int_mips_min_u_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class MIN_U_W_DESC : MSA_3R_DESC_BASE<"min_u.w", int_mips_min_u_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class MIN_U_D_DESC : MSA_3R_DESC_BASE<"min_u.d", int_mips_min_u_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class MINI_S_B_DESC : MSA_I5_DESC_BASE<"mini_s.b", int_mips_mini_s_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class MINI_S_H_DESC : MSA_I5_DESC_BASE<"mini_s.h", int_mips_mini_s_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class MINI_S_W_DESC : MSA_I5_DESC_BASE<"mini_s.w", int_mips_mini_s_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class MINI_S_D_DESC : MSA_I5_DESC_BASE<"mini_s.d", int_mips_mini_s_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class MINI_U_B_DESC : MSA_I5_DESC_BASE<"mini_u.b", int_mips_mini_u_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class MINI_U_H_DESC : MSA_I5_DESC_BASE<"mini_u.h", int_mips_mini_u_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class MINI_U_W_DESC : MSA_I5_DESC_BASE<"mini_u.w", int_mips_mini_u_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class MINI_U_D_DESC : MSA_I5_DESC_BASE<"mini_u.d", int_mips_mini_u_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class MOD_S_B_DESC : MSA_3R_DESC_BASE<"mod_s.b", int_mips_mod_s_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class MOD_S_H_DESC : MSA_3R_DESC_BASE<"mod_s.h", int_mips_mod_s_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class MOD_S_W_DESC : MSA_3R_DESC_BASE<"mod_s.w", int_mips_mod_s_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class MOD_S_D_DESC : MSA_3R_DESC_BASE<"mod_s.d", int_mips_mod_s_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class MOD_U_B_DESC : MSA_3R_DESC_BASE<"mod_u.b", int_mips_mod_u_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class MOD_U_H_DESC : MSA_3R_DESC_BASE<"mod_u.h", int_mips_mod_u_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class MOD_U_W_DESC : MSA_3R_DESC_BASE<"mod_u.w", int_mips_mod_u_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class MOD_U_D_DESC : MSA_3R_DESC_BASE<"mod_u.d", int_mips_mod_u_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class MSUB_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"msub_q.h", int_mips_msub_q_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class MSUB_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"msub_q.w", int_mips_msub_q_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class MSUBR_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"msubr_q.h", int_mips_msubr_q_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class MSUBR_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"msubr_q.w", int_mips_msubr_q_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class MSUBV_B_DESC : MSA_3R_4R_DESC_BASE<"msubv.b", int_mips_msubv_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class MSUBV_H_DESC : MSA_3R_4R_DESC_BASE<"msubv.h", int_mips_msubv_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class MSUBV_W_DESC : MSA_3R_4R_DESC_BASE<"msubv.w", int_mips_msubv_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class MSUBV_D_DESC : MSA_3R_4R_DESC_BASE<"msubv.d", int_mips_msubv_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class MUL_Q_H_DESC : MSA_3RF_DESC_BASE<"mul_q.h", int_mips_mul_q_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class MUL_Q_W_DESC : MSA_3RF_DESC_BASE<"mul_q.w", int_mips_mul_q_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class MULR_Q_H_DESC : MSA_3RF_DESC_BASE<"mulr_q.h", int_mips_mulr_q_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class MULR_Q_W_DESC : MSA_3RF_DESC_BASE<"mulr_q.w", int_mips_mulr_q_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class MULV_B_DESC : MSA_3R_DESC_BASE<"mulv.b", int_mips_mulv_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class MULV_H_DESC : MSA_3R_DESC_BASE<"mulv.h", int_mips_mulv_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class MULV_W_DESC : MSA_3R_DESC_BASE<"mulv.w", int_mips_mulv_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class MULV_D_DESC : MSA_3R_DESC_BASE<"mulv.d", int_mips_mulv_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class NLOC_B_DESC : MSA_2R_DESC_BASE<"nloc.b", int_mips_nloc_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class NLOC_H_DESC : MSA_2R_DESC_BASE<"nloc.h", int_mips_nloc_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class NLOC_W_DESC : MSA_2R_DESC_BASE<"nloc.w", int_mips_nloc_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class NLOC_D_DESC : MSA_2R_DESC_BASE<"nloc.d", int_mips_nloc_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class NLZC_B_DESC : MSA_2R_DESC_BASE<"nlzc.b", int_mips_nlzc_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class NLZC_H_DESC : MSA_2R_DESC_BASE<"nlzc.h", int_mips_nlzc_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class NLZC_W_DESC : MSA_2R_DESC_BASE<"nlzc.w", int_mips_nlzc_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class NLZC_D_DESC : MSA_2R_DESC_BASE<"nlzc.d", int_mips_nlzc_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class NOR_V_DESC : MSA_VEC_DESC_BASE<"nor.v", int_mips_nor_v, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class NORI_B_DESC : MSA_I8_DESC_BASE<"nori.b", int_mips_nori_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class OR_V_DESC : MSA_VEC_DESC_BASE<"or.v", int_mips_or_v, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class ORI_B_DESC : MSA_I8_DESC_BASE<"ori.b", int_mips_ori_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class PCKEV_B_DESC : MSA_3R_DESC_BASE<"pckev.b", int_mips_pckev_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class PCKEV_H_DESC : MSA_3R_DESC_BASE<"pckev.h", int_mips_pckev_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class PCKEV_W_DESC : MSA_3R_DESC_BASE<"pckev.w", int_mips_pckev_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class PCKEV_D_DESC : MSA_3R_DESC_BASE<"pckev.d", int_mips_pckev_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class PCKOD_B_DESC : MSA_3R_DESC_BASE<"pckod.b", int_mips_pckod_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class PCKOD_H_DESC : MSA_3R_DESC_BASE<"pckod.h", int_mips_pckod_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class PCKOD_W_DESC : MSA_3R_DESC_BASE<"pckod.w", int_mips_pckod_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class PCKOD_D_DESC : MSA_3R_DESC_BASE<"pckod.d", int_mips_pckod_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class PCNT_B_DESC : MSA_2R_DESC_BASE<"pcnt.b", int_mips_pcnt_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class PCNT_H_DESC : MSA_2R_DESC_BASE<"pcnt.h", int_mips_pcnt_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class PCNT_W_DESC : MSA_2R_DESC_BASE<"pcnt.w", int_mips_pcnt_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class PCNT_D_DESC : MSA_2R_DESC_BASE<"pcnt.d", int_mips_pcnt_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class SAT_S_B_DESC : MSA_BIT_B_DESC_BASE<"sat_s.b", int_mips_sat_s_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class SAT_S_H_DESC : MSA_BIT_H_DESC_BASE<"sat_s.h", int_mips_sat_s_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class SAT_S_W_DESC : MSA_BIT_W_DESC_BASE<"sat_s.w", int_mips_sat_s_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class SAT_S_D_DESC : MSA_BIT_D_DESC_BASE<"sat_s.d", int_mips_sat_s_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class SAT_U_B_DESC : MSA_BIT_B_DESC_BASE<"sat_u.b", int_mips_sat_u_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class SAT_U_H_DESC : MSA_BIT_H_DESC_BASE<"sat_u.h", int_mips_sat_u_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class SAT_U_W_DESC : MSA_BIT_W_DESC_BASE<"sat_u.w", int_mips_sat_u_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class SAT_U_D_DESC : MSA_BIT_D_DESC_BASE<"sat_u.d", int_mips_sat_u_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class SHF_B_DESC : MSA_I8_DESC_BASE<"shf.b", int_mips_shf_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class SHF_H_DESC : MSA_I8_DESC_BASE<"shf.h", int_mips_shf_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class SHF_W_DESC : MSA_I8_DESC_BASE<"shf.w", int_mips_shf_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class SLD_B_DESC : MSA_3R_DESC_BASE<"sld.b", int_mips_sld_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class SLD_H_DESC : MSA_3R_DESC_BASE<"sld.h", int_mips_sld_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class SLD_W_DESC : MSA_3R_DESC_BASE<"sld.w", int_mips_sld_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class SLD_D_DESC : MSA_3R_DESC_BASE<"sld.d", int_mips_sld_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class SLDI_B_DESC : MSA_BIT_B_DESC_BASE<"sldi.b", int_mips_sldi_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class SLDI_H_DESC : MSA_BIT_H_DESC_BASE<"sldi.h", int_mips_sldi_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class SLDI_W_DESC : MSA_BIT_W_DESC_BASE<"sldi.w", int_mips_sldi_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class SLDI_D_DESC : MSA_BIT_D_DESC_BASE<"sldi.d", int_mips_sldi_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class SLL_B_DESC : MSA_3R_DESC_BASE<"sll.b", int_mips_sll_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class SLL_H_DESC : MSA_3R_DESC_BASE<"sll.h", int_mips_sll_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class SLL_W_DESC : MSA_3R_DESC_BASE<"sll.w", int_mips_sll_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class SLL_D_DESC : MSA_3R_DESC_BASE<"sll.d", int_mips_sll_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class SLLI_B_DESC : MSA_BIT_B_DESC_BASE<"slli.b", int_mips_slli_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class SLLI_H_DESC : MSA_BIT_H_DESC_BASE<"slli.h", int_mips_slli_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class SLLI_W_DESC : MSA_BIT_W_DESC_BASE<"slli.w", int_mips_slli_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class SLLI_D_DESC : MSA_BIT_D_DESC_BASE<"slli.d", int_mips_slli_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class SPLAT_B_DESC : MSA_3R_DESC_BASE<"splat.b", int_mips_splat_b, NoItinerary, - MSA128, MSA128, GPR32>; + MSA128B, MSA128B, GPR32>; class SPLAT_H_DESC : MSA_3R_DESC_BASE<"splat.h", int_mips_splat_h, NoItinerary, - MSA128, MSA128, GPR32>; + MSA128H, MSA128H, GPR32>; class SPLAT_W_DESC : MSA_3R_DESC_BASE<"splat.w", int_mips_splat_w, NoItinerary, - MSA128, MSA128, GPR32>; + MSA128W, MSA128W, GPR32>; class SPLAT_D_DESC : MSA_3R_DESC_BASE<"splat.d", int_mips_splat_d, NoItinerary, - MSA128, MSA128, GPR32>; + MSA128D, MSA128D, GPR32>; class SPLATI_B_DESC : MSA_BIT_B_DESC_BASE<"splati.b", int_mips_splati_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class SPLATI_H_DESC : MSA_BIT_H_DESC_BASE<"splati.h", int_mips_splati_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class SPLATI_W_DESC : MSA_BIT_W_DESC_BASE<"splati.w", int_mips_splati_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class SPLATI_D_DESC : MSA_BIT_D_DESC_BASE<"splati.d", int_mips_splati_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class SRA_B_DESC : MSA_3R_DESC_BASE<"sra.b", int_mips_sra_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class SRA_H_DESC : MSA_3R_DESC_BASE<"sra.h", int_mips_sra_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class SRA_W_DESC : MSA_3R_DESC_BASE<"sra.w", int_mips_sra_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class SRA_D_DESC : MSA_3R_DESC_BASE<"sra.d", int_mips_sra_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class SRAI_B_DESC : MSA_BIT_B_DESC_BASE<"srai.b", int_mips_srai_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class SRAI_H_DESC : MSA_BIT_H_DESC_BASE<"srai.h", int_mips_srai_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class SRAI_W_DESC : MSA_BIT_W_DESC_BASE<"srai.w", int_mips_srai_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class SRAI_D_DESC : MSA_BIT_D_DESC_BASE<"srai.d", int_mips_srai_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class SRL_B_DESC : MSA_3R_DESC_BASE<"srl.b", int_mips_srl_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", int_mips_srl_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class SRL_W_DESC : MSA_3R_DESC_BASE<"srl.w", int_mips_srl_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class SRL_D_DESC : MSA_3R_DESC_BASE<"srl.d", int_mips_srl_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class SRLI_B_DESC : MSA_BIT_B_DESC_BASE<"srli.b", int_mips_srli_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class SRLI_H_DESC : MSA_BIT_H_DESC_BASE<"srli.h", int_mips_srli_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class SRLI_W_DESC : MSA_BIT_W_DESC_BASE<"srli.w", int_mips_srli_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class SRLI_D_DESC : MSA_BIT_D_DESC_BASE<"srli.d", int_mips_srli_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class ST_DESC_BASE; -class ST_H_DESC : ST_DESC_BASE<"st.h", store, v8i16, NoItinerary, MSA128>; -class ST_W_DESC : ST_DESC_BASE<"st.w", store, v4i32, NoItinerary, MSA128>; -class ST_D_DESC : ST_DESC_BASE<"st.d", store, v2i64, NoItinerary, MSA128>; +class ST_B_DESC : ST_DESC_BASE<"st.b", store, v16i8, NoItinerary, MSA128B>; +class ST_H_DESC : ST_DESC_BASE<"st.h", store, v8i16, NoItinerary, MSA128H>; +class ST_W_DESC : ST_DESC_BASE<"st.w", store, v4i32, NoItinerary, MSA128W>; +class ST_D_DESC : ST_DESC_BASE<"st.d", store, v2i64, NoItinerary, MSA128D>; class SUBS_S_B_DESC : MSA_3R_DESC_BASE<"subs_s.b", int_mips_subs_s_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class SUBS_S_H_DESC : MSA_3R_DESC_BASE<"subs_s.h", int_mips_subs_s_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class SUBS_S_W_DESC : MSA_3R_DESC_BASE<"subs_s.w", int_mips_subs_s_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class SUBS_S_D_DESC : MSA_3R_DESC_BASE<"subs_s.d", int_mips_subs_s_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class SUBS_U_B_DESC : MSA_3R_DESC_BASE<"subs_u.b", int_mips_subs_u_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class SUBS_U_H_DESC : MSA_3R_DESC_BASE<"subs_u.h", int_mips_subs_u_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class SUBS_U_W_DESC : MSA_3R_DESC_BASE<"subs_u.w", int_mips_subs_u_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class SUBS_U_D_DESC : MSA_3R_DESC_BASE<"subs_u.d", int_mips_subs_u_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class SUBSUS_U_B_DESC : MSA_3R_DESC_BASE<"subsus_u.b", int_mips_subsus_u_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class SUBSUS_U_H_DESC : MSA_3R_DESC_BASE<"subsus_u.h", int_mips_subsus_u_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class SUBSUS_U_W_DESC : MSA_3R_DESC_BASE<"subsus_u.w", int_mips_subsus_u_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class SUBSUS_U_D_DESC : MSA_3R_DESC_BASE<"subsus_u.d", int_mips_subsus_u_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class SUBSUU_S_B_DESC : MSA_3R_DESC_BASE<"subsuu_s.b", int_mips_subsuu_s_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class SUBSUU_S_H_DESC : MSA_3R_DESC_BASE<"subsuu_s.h", int_mips_subsuu_s_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class SUBSUU_S_W_DESC : MSA_3R_DESC_BASE<"subsuu_s.w", int_mips_subsuu_s_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class SUBSUU_S_D_DESC : MSA_3R_DESC_BASE<"subsuu_s.d", int_mips_subsuu_s_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class SUBV_B_DESC : MSA_3R_DESC_BASE<"subv.b", int_mips_subv_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class SUBV_H_DESC : MSA_3R_DESC_BASE<"subv.h", int_mips_subv_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class SUBV_W_DESC : MSA_3R_DESC_BASE<"subv.w", int_mips_subv_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class SUBV_D_DESC : MSA_3R_DESC_BASE<"subv.d", int_mips_subv_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class SUBVI_B_DESC : MSA_I5_DESC_BASE<"subvi.b", int_mips_subvi_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class SUBVI_H_DESC : MSA_I5_DESC_BASE<"subvi.h", int_mips_subvi_h, NoItinerary, - MSA128, MSA128>; + MSA128H, MSA128H>; class SUBVI_W_DESC : MSA_I5_DESC_BASE<"subvi.w", int_mips_subvi_w, NoItinerary, - MSA128, MSA128>; + MSA128W, MSA128W>; class SUBVI_D_DESC : MSA_I5_DESC_BASE<"subvi.d", int_mips_subvi_d, NoItinerary, - MSA128, MSA128>; + MSA128D, MSA128D>; class VSHF_B_DESC : MSA_3R_DESC_BASE<"vshf.b", int_mips_vshf_b, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128B, MSA128B>; class VSHF_H_DESC : MSA_3R_DESC_BASE<"vshf.h", int_mips_vshf_h, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128H, MSA128H>; class VSHF_W_DESC : MSA_3R_DESC_BASE<"vshf.w", int_mips_vshf_w, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128W, MSA128W>; class VSHF_D_DESC : MSA_3R_DESC_BASE<"vshf.d", int_mips_vshf_d, - NoItinerary, MSA128, MSA128>; + NoItinerary, MSA128D, MSA128D>; class XOR_V_DESC : MSA_VEC_DESC_BASE<"xor.v", int_mips_xor_v, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; class XORI_B_DESC : MSA_I8_DESC_BASE<"xori.b", int_mips_xori_b, NoItinerary, - MSA128, MSA128>; + MSA128B, MSA128B>; // Instruction defs. def ADD_A_B : ADD_A_B_ENC, ADD_A_B_DESC, Requires<[HasMSA]>; def ADD_A_H : ADD_A_H_ENC, ADD_A_H_DESC, Requires<[HasMSA]>; @@ -2546,9 +2546,9 @@ def LD_FW : MSAPat<(v4f32 (load addr:$addr)), def LD_FD : MSAPat<(v2f64 (load addr:$addr)), (LD_D addr:$addr)>; -def ST_FH : MSAPat<(store (v8f16 MSA128:$ws), addr:$addr), - (ST_H MSA128:$ws, addr:$addr)>; -def ST_FW : MSAPat<(store (v4f32 MSA128:$ws), addr:$addr), - (ST_W MSA128:$ws, addr:$addr)>; -def ST_FD : MSAPat<(store (v2f64 MSA128:$ws), addr:$addr), - (ST_D MSA128:$ws, addr:$addr)>; +def ST_FH : MSAPat<(store (v8f16 MSA128H:$ws), addr:$addr), + (ST_H MSA128H:$ws, addr:$addr)>; +def ST_FW : MSAPat<(store (v4f32 MSA128W:$ws), addr:$addr), + (ST_W MSA128W:$ws, addr:$addr)>; +def ST_FD : MSAPat<(store (v2f64 MSA128D:$ws), addr:$addr), + (ST_D MSA128D:$ws, addr:$addr)>; diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index 0e762f2..022a9c0 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -318,9 +318,14 @@ def CCR : RegisterClass<"Mips", [i32], 32, (sequence "FCR%u", 0, 31)>, def FCC : RegisterClass<"Mips", [i32], 32, (sequence "FCC%u", 0, 7)>, Unallocatable; -def MSA128: RegisterClass<"Mips", - [v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64], - 128, (sequence "W%u", 0, 31)>; +def MSA128B: RegisterClass<"Mips", [v16i8], 128, + (sequence "W%u", 0, 31)>; +def MSA128H: RegisterClass<"Mips", [v8i16, v8f16], 128, + (sequence "W%u", 0, 31)>; +def MSA128W: RegisterClass<"Mips", [v4i32, v4f32], 128, + (sequence "W%u", 0, 31)>; +def MSA128D: RegisterClass<"Mips", [v2i64, v2f64], 128, + (sequence "W%u", 0, 31)>; // Hi/Lo Registers def LO32 : RegisterClass<"Mips", [i32], 32, (add LO0)>; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index fb72251..dfa16c0 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -78,13 +78,13 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::MUL, MVT::v2i16, Legal); if (Subtarget->hasMSA()) { - addMSAType(MVT::v16i8); - addMSAType(MVT::v8i16); - addMSAType(MVT::v4i32); - addMSAType(MVT::v2i64); - addMSAType(MVT::v8f16); - addMSAType(MVT::v4f32); - addMSAType(MVT::v2f64); + addMSAType(MVT::v16i8, &Mips::MSA128BRegClass); + addMSAType(MVT::v8i16, &Mips::MSA128HRegClass); + addMSAType(MVT::v4i32, &Mips::MSA128WRegClass); + addMSAType(MVT::v2i64, &Mips::MSA128DRegClass); + addMSAType(MVT::v8f16, &Mips::MSA128HRegClass); + addMSAType(MVT::v4f32, &Mips::MSA128WRegClass); + addMSAType(MVT::v2f64, &Mips::MSA128DRegClass); } if (!TM.Options.UseSoftFloat) { @@ -133,9 +133,9 @@ llvm::createMipsSETargetLowering(MipsTargetMachine &TM) { return new MipsSETargetLowering(TM); } -void -MipsSETargetLowering::addMSAType(MVT::SimpleValueType Ty) { - addRegisterClass(Ty, &Mips::MSA128RegClass); +void MipsSETargetLowering:: +addMSAType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { + addRegisterClass(Ty, RC); // Expand all builtin opcodes. for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h index b56036d..d672677 100644 --- a/lib/Target/Mips/MipsSEISelLowering.h +++ b/lib/Target/Mips/MipsSEISelLowering.h @@ -22,7 +22,7 @@ namespace llvm { public: explicit MipsSETargetLowering(MipsTargetMachine &TM); - void addMSAType(MVT::SimpleValueType Ty); + void addMSAType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC); virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const; -- cgit v1.1 From 287c84a0b45cc826b1200f4cf4be3547d2fcd69c Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Fri, 23 Aug 2013 10:16:39 +0000 Subject: ARM: make sure ARM-mode pseudo-inst requires IsARM I'd forgotten that "Requires" blocks override rather than add to the constraints, so my pseudo-instruction was being selected in Thumb mode leading to nonsense instructions. rdar://problem/14817358 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189096 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrInfo.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index a4ea69f..df64a09 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -4197,7 +4197,7 @@ def MOVCCi32imm 8, IIC_iCMOVix2, [(set GPR:$Rd, (ARMcmov GPR:$false, imm:$src, cmovpred:$p))]>, - RegConstraint<"$false = $Rd">, Requires<[HasV6T2]>; + RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>; let isMoveImm = 1 in def MVNCCi : ARMPseudoInst<(outs GPR:$Rd), -- cgit v1.1 From a8a7099c1849fcbb4a68642a292fd0250aa46505 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 23 Aug 2013 10:27:02 +0000 Subject: Turn MipsOptimizeMathLibCalls into a target-independent scalar transform ...so that it can be used for z too. Most of the code is the same. The only real change is to use TargetTransformInfo to test when a sqrt instruction is available. The pass is opt-in because at the moment it only handles sqrt. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189097 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/CMakeLists.txt | 1 - lib/Target/Mips/Mips.h | 1 - lib/Target/Mips/MipsOptimizeMathLibCalls.cpp | 175 --------------------------- lib/Target/Mips/MipsTargetMachine.cpp | 3 +- lib/Target/SystemZ/SystemZTargetMachine.cpp | 7 ++ 5 files changed, 9 insertions(+), 178 deletions(-) delete mode 100644 lib/Target/Mips/MipsOptimizeMathLibCalls.cpp (limited to 'lib/Target') diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index aedb78b..6acc9a8 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -35,7 +35,6 @@ add_llvm_target(MipsCodeGen MipsMachineFunction.cpp MipsModuleISelDAGToDAG.cpp MipsOs16.cpp - MipsOptimizeMathLibCalls.cpp MipsRegisterInfo.cpp MipsSEFrameLowering.cpp MipsSEInstrInfo.cpp diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h index b88c0d2..e796deb 100644 --- a/lib/Target/Mips/Mips.h +++ b/lib/Target/Mips/Mips.h @@ -28,7 +28,6 @@ namespace llvm { FunctionPass *createMipsJITCodeEmitterPass(MipsTargetMachine &TM, JITCodeEmitter &JCE); FunctionPass *createMipsConstantIslandPass(MipsTargetMachine &tm); - FunctionPass *createMipsOptimizeMathLibCalls(MipsTargetMachine &TM); } // end namespace llvm; #endif diff --git a/lib/Target/Mips/MipsOptimizeMathLibCalls.cpp b/lib/Target/Mips/MipsOptimizeMathLibCalls.cpp deleted file mode 100644 index de3f09c..0000000 --- a/lib/Target/Mips/MipsOptimizeMathLibCalls.cpp +++ /dev/null @@ -1,175 +0,0 @@ -//===---- MipsOptimizeMathLibCalls.cpp - Optimize math lib calls. ----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass does an IR transformation which enables the backend to emit native -// math instructions. -// -//===----------------------------------------------------------------------===// - -#include "MipsTargetMachine.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" - -using namespace llvm; - -static cl::opt DisableOpt("disable-mips-math-optimization", - cl::init(false), - cl::desc("MIPS: Disable math lib call " - "optimization."), cl::Hidden); - -namespace { - class MipsOptimizeMathLibCalls : public FunctionPass { - public: - static char ID; - - MipsOptimizeMathLibCalls(MipsTargetMachine &TM_) : - FunctionPass(ID), TM(TM_) {} - - virtual const char *getPassName() const { - return "MIPS: Optimize calls to math library functions."; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - - virtual bool runOnFunction(Function &F); - - private: - /// Optimize calls to sqrt. - bool optimizeSQRT(CallInst *Call, Function *CalledFunc, - BasicBlock &CurrBB, - Function::iterator &BB); - - const TargetMachine &TM; - }; - - char MipsOptimizeMathLibCalls::ID = 0; -} - -FunctionPass *llvm::createMipsOptimizeMathLibCalls(MipsTargetMachine &TM) { - return new MipsOptimizeMathLibCalls(TM); -} - -void MipsOptimizeMathLibCalls::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - FunctionPass::getAnalysisUsage(AU); -} - -bool MipsOptimizeMathLibCalls::runOnFunction(Function &F) { - if (DisableOpt) - return false; - - const MipsSubtarget &Subtarget = TM.getSubtarget(); - - if (Subtarget.inMips16Mode()) - return false; - - bool Changed = false; - Function::iterator CurrBB; - const TargetLibraryInfo *LibInfo = &getAnalysis(); - - for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE;) { - CurrBB = BB++; - - for (BasicBlock::iterator II = CurrBB->begin(), IE = CurrBB->end(); - II != IE; ++II) { - CallInst *Call = dyn_cast(&*II); - Function *CalledFunc; - - if (!Call || !(CalledFunc = Call->getCalledFunction())) - continue; - - LibFunc::Func LibFunc; - Attribute A = CalledFunc->getAttributes() - .getAttribute(AttributeSet::FunctionIndex, "use-soft-float"); - - // Skip if function has "use-soft-float" attribute. - if ((A.isStringAttribute() && (A.getValueAsString() == "true")) || - TM.Options.UseSoftFloat) - continue; - - // Skip if function either has local linkage or is not a known library - // function. - if (CalledFunc->hasLocalLinkage() || !CalledFunc->hasName() || - !LibInfo->getLibFunc(CalledFunc->getName(), LibFunc)) - continue; - - switch (LibFunc) { - case LibFunc::sqrtf: - case LibFunc::sqrt: - if (optimizeSQRT(Call, CalledFunc, *CurrBB, BB)) - break; - continue; - default: - continue; - } - - Changed = true; - break; - } - } - - return Changed; -} - -bool MipsOptimizeMathLibCalls::optimizeSQRT(CallInst *Call, - Function *CalledFunc, - BasicBlock &CurrBB, - Function::iterator &BB) { - // There is no need to change the IR, since backend will emit sqrt - // instruction if the call has already been marked read-only. - if (Call->onlyReadsMemory()) - return false; - - // Do the following transformation: - // - // (before) - // dst = sqrt(src) - // - // (after) - // v0 = sqrt_noreadmem(src) # native sqrt instruction. - // if (v0 is a NaN) - // v1 = sqrt(src) # library call. - // dst = phi(v0, v1) - // - - // Move all instructions following Call to newly created block JoinBB. - // Create phi and replace all uses. - BasicBlock *JoinBB = llvm::SplitBlock(&CurrBB, Call->getNextNode(), this); - IRBuilder<> Builder(JoinBB, JoinBB->begin()); - PHINode *Phi = Builder.CreatePHI(Call->getType(), 2); - Call->replaceAllUsesWith(Phi); - - // Create basic block LibCallBB and insert a call to library function sqrt. - BasicBlock *LibCallBB = BasicBlock::Create(CurrBB.getContext(), "call.sqrt", - CurrBB.getParent(), JoinBB); - Builder.SetInsertPoint(LibCallBB); - Instruction *LibCall = Call->clone(); - Builder.Insert(LibCall); - Builder.CreateBr(JoinBB); - - // Add attribute "readnone" so that backend can use a native sqrt instruction - // for this call. Insert a FP compare instruction and a conditional branch - // at the end of CurrBB. - Call->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone); - CurrBB.getTerminator()->eraseFromParent(); - Builder.SetInsertPoint(&CurrBB); - Value *FCmp = Builder.CreateFCmpOEQ(Call, Call); - Builder.CreateCondBr(FCmp, JoinBB, LibCallBB); - - // Add phi operands. - Phi->addIncoming(Call, &CurrBB); - Phi->addIncoming(LibCall, LibCallBB); - - BB = JoinBB; - return true; -} diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index ced6a09..f25afe3 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -32,6 +32,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; @@ -160,7 +161,7 @@ void MipsPassConfig::addIRPasses() { addPass(createMipsOs16(getMipsTargetMachine())); if (getMipsSubtarget().inMips16HardFloat()) addPass(createMips16HardFloat(getMipsTargetMachine())); - addPass(createMipsOptimizeMathLibCalls(getMipsTargetMachine())); + addPass(createPartiallyInlineLibCallsPass()); } // Install an instruction selector pass using // the ISelDag to gen Mips code. diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp index 856183c..f276152 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -10,6 +10,7 @@ #include "SystemZTargetMachine.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; @@ -47,12 +48,18 @@ public: return getTM(); } + virtual void addIRPasses() LLVM_OVERRIDE; virtual bool addInstSelector() LLVM_OVERRIDE; virtual bool addPreSched2() LLVM_OVERRIDE; virtual bool addPreEmitPass() LLVM_OVERRIDE; }; } // end anonymous namespace +void SystemZPassConfig::addIRPasses() { + TargetPassConfig::addIRPasses(); + addPass(createPartiallyInlineLibCallsPass()); +} + bool SystemZPassConfig::addInstSelector() { addPass(createSystemZISelDag(getSystemZTargetMachine(), getOptLevel())); return false; -- cgit v1.1 From 65ddcfa8c1c05aeecd9d4fb062bb121e376aaceb Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 23 Aug 2013 11:18:53 +0000 Subject: [SystemZ] Prefer LHI;ST... over LAY;MV... If we had a store of an integer to memory, and the integer and store size were suitable for a form of MV..., we used MV... no matter what. We could then have sequences like: lay %r2, 0(%r3,%r4) mvi 0(%r2), 4 In these cases it seems better to force the constant into a register and use a normal store: lhi %r2, 4 stc %r2, 0(%r3, %r4) since %r2 is more likely to be hoisted and is easier to rematerialize. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189098 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 25 +++++++++++++++++++++++++ lib/Target/SystemZ/SystemZInstrFormats.td | 26 ++++++++++++++++---------- lib/Target/SystemZ/SystemZOperands.td | 3 +++ 3 files changed, 44 insertions(+), 10 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index d9794b1..e0d5437 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -159,6 +159,12 @@ class SystemZDAGToDAGISel : public SelectionDAGISel { bool selectBDAddr(SystemZAddressingMode::DispRange DR, SDValue Addr, SDValue &Base, SDValue &Disp); + // Try to match Addr as a FormBDX address with displacement type DR. + // Return true on success and if the result had no index. Store the + // base and displacement in Base and Disp respectively. + bool selectMVIAddr(SystemZAddressingMode::DispRange DR, SDValue Addr, + SDValue &Base, SDValue &Disp); + // Try to match Addr as a FormBDX* address of form Form with // displacement type DR. Return true on success, storing the base, // displacement and index in Base, Disp and Index respectively. @@ -189,6 +195,14 @@ class SystemZDAGToDAGISel : public SelectionDAGISel { return selectBDAddr(SystemZAddressingMode::Disp20Pair, Addr, Base, Disp); } + // MVI matching routines used by SystemZOperands.td. + bool selectMVIAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp) { + return selectMVIAddr(SystemZAddressingMode::Disp12Pair, Addr, Base, Disp); + } + bool selectMVIAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp) { + return selectMVIAddr(SystemZAddressingMode::Disp20Pair, Addr, Base, Disp); + } + // BDX matching routines used by SystemZOperands.td. bool selectBDXAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp, SDValue &Index) { @@ -575,6 +589,17 @@ bool SystemZDAGToDAGISel::selectBDAddr(SystemZAddressingMode::DispRange DR, return true; } +bool SystemZDAGToDAGISel::selectMVIAddr(SystemZAddressingMode::DispRange DR, + SDValue Addr, SDValue &Base, + SDValue &Disp) { + SystemZAddressingMode AM(SystemZAddressingMode::FormBDXNormal, DR); + if (!selectAddress(Addr, AM) || AM.Index.getNode()) + return false; + + getAddressOperands(AM, Addr.getValueType(), Base, Disp); + return true; +} + bool SystemZDAGToDAGISel::selectBDXAddr(SystemZAddressingMode::AddrForm Form, SystemZAddressingMode::DispRange DR, SDValue Addr, SDValue &Base, diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index 39b7639..f538332 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -627,27 +627,33 @@ class StoreMultipleRSY opcode, RegisterOperand cls> let mayStore = 1; } +// StoreSI* instructions are used to store an integer to memory, but the +// addresses are more restricted than for normal stores. If we are in the +// situation of having to force either the address into a register or the +// constant into a register, it's usually better to do the latter. +// We therefore match the address in the same way as a normal store and +// only use the StoreSI* instruction if the matched address is suitable. class StoreSI opcode, SDPatternOperator operator, - Immediate imm, AddressingMode mode = bdaddr12only> - : InstSI + : InstSI { + [(operator imm:$I2, mviaddr12pair:$BD1)]> { let mayStore = 1; } class StoreSIY opcode, SDPatternOperator operator, - Immediate imm, AddressingMode mode = bdaddr20only> - : InstSIY + : InstSIY { + [(operator imm:$I2, mviaddr20pair:$BD1)]> { let mayStore = 1; } class StoreSIL opcode, SDPatternOperator operator, Immediate imm> - : InstSIL { + [(operator imm:$I2, mviaddr12pair:$BD1)]> { let mayStore = 1; } @@ -655,9 +661,9 @@ multiclass StoreSIPair siOpcode, bits<16> siyOpcode, SDPatternOperator operator, Immediate imm> { let DispKey = mnemonic in { let DispSize = "12" in - def "" : StoreSI; + def "" : StoreSI; let DispSize = "20" in - def Y : StoreSIY; + def Y : StoreSIY; } } diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td index 9d79439..eb96dba 100644 --- a/lib/Target/SystemZ/SystemZOperands.td +++ b/lib/Target/SystemZ/SystemZOperands.td @@ -435,6 +435,7 @@ def BDLAddr64Disp12Len8 : AddressAsmOperand<"BDLAddr", "64", "12", "Len8">; // is one of: // shift : base + displacement (32-bit) // bdaddr : base + displacement +// mviaddr : like bdaddr, but reject cases with a natural index // bdxaddr : base + displacement + index // laaddr : like bdxaddr, but used for Load Address operations // dynalloc : base + displacement + index + ADJDYNALLOC @@ -460,6 +461,8 @@ def bdaddr12only : BDMode <"BDAddr", "64", "12", "Only">; def bdaddr12pair : BDMode <"BDAddr", "64", "12", "Pair">; def bdaddr20only : BDMode <"BDAddr", "64", "20", "Only">; def bdaddr20pair : BDMode <"BDAddr", "64", "20", "Pair">; +def mviaddr12pair : BDMode <"MVIAddr", "64", "12", "Pair">; +def mviaddr20pair : BDMode <"MVIAddr", "64", "20", "Pair">; def bdxaddr12only : BDXMode<"BDXAddr", "64", "12", "Only">; def bdxaddr12pair : BDXMode<"BDXAddr", "64", "12", "Pair">; def bdxaddr20only : BDXMode<"BDXAddr", "64", "20", "Only">; -- cgit v1.1 From 35c93e4e42d7a35a90e89211fa62f478e25ba0a4 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 23 Aug 2013 11:27:19 +0000 Subject: [SystemZ] Try reversing comparisons whose first operand is in memory This allows us to make more use of the many compare reg,mem instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189099 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelLowering.cpp | 71 ++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index a1eecd7..65b2ad2 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1110,6 +1110,69 @@ static bool preferUnsignedComparison(SelectionDAG &DAG, SDValue CmpOp0, return false; } +// Return true if Op is either an unextended load, or a load with the +// extension type given by IsUnsigned. +static bool isNaturalMemoryOperand(SDValue Op, bool IsUnsigned) { + LoadSDNode *Load = dyn_cast(Op.getNode()); + if (Load) + switch (Load->getExtensionType()) { + case ISD::NON_EXTLOAD: + case ISD::EXTLOAD: + return true; + case ISD::SEXTLOAD: + return !IsUnsigned; + case ISD::ZEXTLOAD: + return IsUnsigned; + default: + break; + } + return false; +} + +// Return true if it is better to swap comparison operands Op0 and Op1. +// IsUnsigned says whether an integer comparison is signed or unsigned. +static bool shouldSwapCmpOperands(SDValue Op0, SDValue Op1, + bool IsUnsigned) { + // Leave f128 comparisons alone, since they have no memory forms. + if (Op0.getValueType() == MVT::f128) + return false; + + // Always keep a floating-point constant second, since comparisons with + // zero can use LOAD TEST and comparisons with other constants make a + // natural memory operand. + if (isa(Op1)) + return false; + + // Never swap comparisons with zero since there are many ways to optimize + // those later. + ConstantSDNode *COp1 = dyn_cast(Op1); + if (COp1 && COp1->getZExtValue() == 0) + return false; + + // Look for cases where Cmp0 is a single-use load and Cmp1 isn't. + // In that case we generally prefer the memory to be second. + if ((isNaturalMemoryOperand(Op0, IsUnsigned) && Op0.hasOneUse()) && + !(isNaturalMemoryOperand(Op1, IsUnsigned) && Op1.hasOneUse())) { + // The only exceptions are when the second operand is a constant and + // we can use things like CHHSI. + if (!COp1) + return true; + if (IsUnsigned) { + // The memory-immediate instructions require 16-bit unsigned integers. + if (isUInt<16>(COp1->getZExtValue())) + return false; + } else { + // There are no comparisons between integers and signed memory bytes. + // The others require 16-bit signed integers. + if (cast(Op0.getNode())->getMemoryVT() == MVT::i8 || + isInt<16>(COp1->getSExtValue())) + return false; + } + return true; + } + return false; +} + // Return a target node that compares CmpOp0 with CmpOp1 and stores a // 2-bit result in CC. Set CCValid to the CCMASK_* of all possible // 2-bit results and CCMask to the subset of those results that are @@ -1131,6 +1194,14 @@ static SDValue emitCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, IsUnsigned = true; } + if (shouldSwapCmpOperands(CmpOp0, CmpOp1, IsUnsigned)) { + std::swap(CmpOp0, CmpOp1); + CCMask = ((CCMask & SystemZ::CCMASK_CMP_EQ) | + (CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) | + (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) | + (CCMask & SystemZ::CCMASK_CMP_UO)); + } + SDLoc DL(CmpOp0); return DAG.getNode((IsUnsigned ? SystemZISD::UCMP : SystemZISD::CMP), DL, MVT::Glue, CmpOp0, CmpOp1); -- cgit v1.1 From a550b51bac50493db75a7b5788a3f2c3b62fd913 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 23 Aug 2013 11:36:42 +0000 Subject: [SystemZ] Add basic prefetch support Just the instructions and intrinsics for now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189100 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/README.txt | 4 ---- lib/Target/SystemZ/SystemZ.h | 4 ++++ lib/Target/SystemZ/SystemZISelLowering.cpp | 26 ++++++++++++++++++++++++++ lib/Target/SystemZ/SystemZISelLowering.h | 8 +++++++- lib/Target/SystemZ/SystemZInstrFormats.td | 20 ++++++++++++++++++++ lib/Target/SystemZ/SystemZInstrInfo.td | 7 +++++++ lib/Target/SystemZ/SystemZOperators.td | 6 ++++++ 7 files changed, 70 insertions(+), 5 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt index 4107685..d5361fb 100644 --- a/lib/Target/SystemZ/README.txt +++ b/lib/Target/SystemZ/README.txt @@ -35,10 +35,6 @@ performance measurements. -- -We don't support prefetching yet. - --- - There is no scheduling support. -- diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h index bb6ceac..ea6c7d1 100644 --- a/lib/Target/SystemZ/SystemZ.h +++ b/lib/Target/SystemZ/SystemZ.h @@ -57,6 +57,10 @@ namespace llvm { const unsigned CCMASK_SRST_NOTFOUND = CCMASK_2; const unsigned CCMASK_SRST = CCMASK_1 | CCMASK_2; + // Mask assignments for PFD. + const unsigned PFD_READ = 1; + const unsigned PFD_WRITE = 2; + // Return true if Val fits an LLILL operand. static inline bool isImmLL(uint64_t Val) { return (Val & ~0x000000000000ffffULL) == 0; diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 65b2ad2..20afab7 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -194,6 +194,9 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) setOperationAction(ISD::STACKSAVE, MVT::Other, Custom); setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom); + // Handle prefetches with PFD or PFDRL. + setOperationAction(ISD::PREFETCH, MVT::Other, Custom); + // Handle floating-point types. for (unsigned I = MVT::FIRST_FP_VALUETYPE; I <= MVT::LAST_FP_VALUETYPE; @@ -1806,6 +1809,26 @@ SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op, SystemZ::R15D, Op.getOperand(1)); } +SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, + SelectionDAG &DAG) const { + bool IsData = cast(Op.getOperand(4))->getZExtValue(); + if (!IsData) + // Just preserve the chain. + return Op.getOperand(0); + + bool IsWrite = cast(Op.getOperand(2))->getZExtValue(); + unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ; + MemIntrinsicSDNode *Node = cast(Op.getNode()); + SDValue Ops[] = { + Op.getOperand(0), + DAG.getConstant(Code, MVT::i32), + Op.getOperand(1) + }; + return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, SDLoc(Op), + Node->getVTList(), Ops, array_lengthof(Ops), + Node->getMemoryVT(), Node->getMemOperand()); +} + SDValue SystemZTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { @@ -1869,6 +1892,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerSTACKSAVE(Op, DAG); case ISD::STACKRESTORE: return lowerSTACKRESTORE(Op, DAG); + case ISD::PREFETCH: + return lowerPREFETCH(Op, DAG); default: llvm_unreachable("Unexpected node to lower"); } @@ -1909,6 +1934,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(ATOMIC_LOADW_UMIN); OPCODE(ATOMIC_LOADW_UMAX); OPCODE(ATOMIC_CMP_SWAPW); + OPCODE(PREFETCH); } return NULL; #undef OPCODE diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 604453d..f6a2ce0 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -132,7 +132,12 @@ namespace SystemZISD { // operand into the high bits // Operand 4: the negative of operand 2, for rotating the other way // Operand 5: the width of the field in bits (8 or 16) - ATOMIC_CMP_SWAPW + ATOMIC_CMP_SWAPW, + + // Prefetch from the second operand using the 4-bit control code in + // the first operand. The code is 1 for a load prefetch and 2 for + // a store prefetch. + PREFETCH }; } @@ -225,6 +230,7 @@ private: SDValue lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const; // If the last instruction before MBBI in MBB was some form of COMPARE, // try to replace it with a COMPARE AND BRANCH just before MBBI. diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index f538332..a7e18ec 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -540,6 +540,10 @@ class InstSS op, dag outs, dag ins, string asmstr, list pattern> // One output operand and five input operands. The first two operands // are registers and the other three are immediates. // +// Prefetch: +// One 4-bit immediate operand and one address operand. The immediate +// operand is 1 for a load prefetch and 2 for a store prefetch. +// // The format determines which input operands are tied to output operands, // and also determines the shape of any address operand. // @@ -1304,6 +1308,22 @@ class RotateSelectRIEf opcode, RegisterOperand cls1, let DisableEncoding = "$R1src"; } +class PrefetchRXY opcode, SDPatternOperator operator> + : InstRXY; + +class PrefetchRILPC opcode, + SDPatternOperator operator> + : InstRIL { + // We want PC-relative addresses to be tried ahead of BD and BDX addresses. + // However, BDXs have two extra operands and are therefore 6 units more + // complex. + let AddedComplexity = 7; +} + // A floating-point load-and test operation. Create both a normal unary // operation and one that acts as a comparison against zero. multiclass LoadAndTestRRE opcode, diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index d857a57..8e1f5ac 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -1035,6 +1035,13 @@ let mayLoad = 1, Defs = [CC], Uses = [R0W] in defm CLST : StringRRE<"clst", 0xB25D, z_strcmp>; //===----------------------------------------------------------------------===// +// Prefetch +//===----------------------------------------------------------------------===// + +def PFD : PrefetchRXY<"pfd", 0xE336, z_prefetch>; +def PFDRL : PrefetchRILPC<"pfdrl", 0xC62, z_prefetch>; + +//===----------------------------------------------------------------------===// // Atomic operations //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index 5745e29..e2c43d6 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -64,6 +64,9 @@ def SDT_ZString : SDTypeProfile<1, 3, SDTCisPtrTy<2>, SDTCisVT<3, i32>]>; def SDT_ZI32Intrinsic : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>; +def SDT_ZPrefetch : SDTypeProfile<0, 2, + [SDTCisVT<0, i8>, + SDTCisPtrTy<1>]>; //===----------------------------------------------------------------------===// // Node definitions @@ -130,6 +133,9 @@ def z_search_string : SDNode<"SystemZISD::SEARCH_STRING", SDT_ZString, [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>; def z_ipm : SDNode<"SystemZISD::IPM", SDT_ZI32Intrinsic, [SDNPInGlue]>; +def z_prefetch : SDNode<"SystemZISD::PREFETCH", SDT_ZPrefetch, + [SDNPHasChain, SDNPMayLoad, SDNPMayStore, + SDNPMemOperand]>; //===----------------------------------------------------------------------===// // Pattern fragments -- cgit v1.1 From 5768bb8d77892926dff0d078b1fb08c14ea791f3 Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Fri, 23 Aug 2013 11:53:55 +0000 Subject: Add function attribute 'optnone'. This function attribute indicates that the function is not optimized by any optimization or code generator passes with the exception of interprocedural optimization passes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189101 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/CppBackend/CPPBackend.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/Target') diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 0ddcad2..ace4b74 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -497,6 +497,7 @@ void CppWriter::printAttributes(const AttributeSet &PAL, HANDLE_ATTR(ReadOnly); HANDLE_ATTR(NoInline); HANDLE_ATTR(AlwaysInline); + HANDLE_ATTR(OptimizeNone); HANDLE_ATTR(OptimizeForSize); HANDLE_ATTR(StackProtect); HANDLE_ATTR(StackProtectReq); -- cgit v1.1 From a0b2d332c114571716746ba90c815cfb6f68d4ab Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Fri, 23 Aug 2013 12:01:13 +0000 Subject: [ARMv8] Add CodeGen for VMAXNM/VMINNM. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189103 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 16 ++++++++++++++++ lib/Target/ARM/ARMISelLowering.h | 2 ++ lib/Target/ARM/ARMInstrInfo.td | 7 ++++++- lib/Target/ARM/ARMInstrVFP.td | 12 +++++++----- 4 files changed, 31 insertions(+), 6 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 7021941..74353c1 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1069,6 +1069,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; case ARMISD::FMAX: return "ARMISD::FMAX"; case ARMISD::FMIN: return "ARMISD::FMIN"; + case ARMISD::VMAXNM: return "ARMISD::VMAX"; + case ARMISD::VMINNM: return "ARMISD::VMIN"; case ARMISD::BFI: return "ARMISD::BFI"; case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; @@ -3276,6 +3278,20 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { // Try to generate VSEL on ARMv8. if (getSubtarget()->hasV8FP() && (TrueVal.getValueType() == MVT::f32 || TrueVal.getValueType() == MVT::f64)) { + // We can select VMAXNM/VMINNM from a compare followed by a select with the + // same operands, as follows: + // c = fcmp [ogt, olt, ugt, ult] a, b + // select c, a, b + // We only do this in unsafe-fp-math, because signed zeros and NaNs are + // handled differently than the original code sequence. + if (getTargetMachine().Options.UnsafeFPMath && LHS == TrueVal && + RHS == FalseVal) { + if (CC == ISD::SETOGT || CC == ISD::SETUGT) + return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal); + if (CC == ISD::SETOLT || CC == ISD::SETULT) + return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal); + } + bool swpCmpOps = false; bool swpVselOps = false; checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 44c769f..be7811f 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -186,6 +186,8 @@ namespace llvm { // Floating-point max and min: FMAX, FMIN, + VMAXNM, + VMINNM, // Bit-field insert BFI, diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index df64a09..dc9a6d2 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -71,6 +71,9 @@ def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; +def SDT_ARMVMAXNM : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>, SDTCisFP<2>]>; +def SDT_ARMVMINNM : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>, SDTCisFP<2>]>; + def SDTBinaryArithWithFlags : SDTypeProfile<2, 2, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, @@ -174,9 +177,11 @@ def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>; def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; - def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>; +def ARMvmaxnm : SDNode<"ARMISD::VMAXNM", SDT_ARMVMAXNM, []>; +def ARMvminnm : SDNode<"ARMISD::VMINNM", SDT_ARMVMINNM, []>; + //===----------------------------------------------------------------------===// // ARM Instruction Predicate Definitions. // diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index b4df4d7..3bb4d6f 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -356,22 +356,24 @@ defm VSELGE : vsel_inst<"ge", 0b10, 10>; defm VSELEQ : vsel_inst<"eq", 0b00, 0>; defm VSELVS : vsel_inst<"vs", 0b01, 6>; -multiclass vmaxmin_inst { +multiclass vmaxmin_inst { let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in { def S : ASbInp<0b11101, 0b00, opc, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), NoItinerary, !strconcat(op, ".f32\t$Sd, $Sn, $Sm"), - []>, Requires<[HasV8FP]>; + [(set SPR:$Sd, (SD SPR:$Sn, SPR:$Sm))]>, + Requires<[HasV8FP]>; def D : ADbInp<0b11101, 0b00, opc, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), NoItinerary, !strconcat(op, ".f64\t$Dd, $Dn, $Dm"), - []>, Requires<[HasV8FP]>; + [(set DPR:$Dd, (f64 (SD (f64 DPR:$Dn), (f64 DPR:$Dm))))]>, + Requires<[HasV8FP]>; } } -defm VMAXNM : vmaxmin_inst<"vmaxnm", 0>; -defm VMINNM : vmaxmin_inst<"vminnm", 1>; +defm VMAXNM : vmaxmin_inst<"vmaxnm", 0, ARMvmaxnm>; +defm VMINNM : vmaxmin_inst<"vminnm", 1, ARMvminnm>; // Match reassociated forms only if not sign dependent rounding. def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)), -- cgit v1.1 From 6cbb39e556c94ef47a8e02e3a4bb87eb91df5aa3 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Fri, 23 Aug 2013 15:20:56 +0000 Subject: [ARM] Fix another ARM FastISel -verify-machineinstrs issue. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189109 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 628b514..0927e9a 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -1182,6 +1182,7 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, (const TargetRegisterClass*)&ARM::tGPRRegClass : (const TargetRegisterClass*)&ARM::GPRRegClass); unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri; + SrcReg = constrainOperandRegClass(TII.get(Opc), SrcReg, 1); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), Res) .addReg(SrcReg).addImm(1)); @@ -1253,6 +1254,7 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, ARMSimplifyAddress(Addr, VT, useAM3); // Create the base instruction, then add the operands. + SrcReg = constrainOperandRegClass(TII.get(StrOpc), SrcReg, 0); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(StrOpc)) .addReg(SrcReg); @@ -1856,6 +1858,8 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { if (SrcReg2 == 0) return false; unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass); + SrcReg1 = constrainOperandRegClass(TII.get(Opc), SrcReg1, 1); + SrcReg2 = constrainOperandRegClass(TII.get(Opc), SrcReg2, 2); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) .addReg(SrcReg1).addReg(SrcReg2)); -- cgit v1.1 From f89c7498706185ef7cd974fa3459f2cbdcfba73e Mon Sep 17 00:00:00 2001 From: Jim Cownie Date: Fri, 23 Aug 2013 15:51:37 +0000 Subject: Checking commit access; added one space git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189111 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/README.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index b4285a0..4fbad88 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -2039,5 +2039,5 @@ GCC: sbbl %eax, %eax andl $-10, %eax addl $5, %eax - + //===---------------------------------------------------------------------===// -- cgit v1.1 From 4aa8bdaa46b455d446c10325533dcbdc65eeecdd Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Fri, 23 Aug 2013 20:21:34 +0000 Subject: Rename features to match what gcc and clang use. There is no advantage in being different and using the same names simplifies clang a bit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189141 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86.td | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index ca49482..2c3643d 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -50,10 +50,10 @@ def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3", def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3", "Enable SSSE3 instructions", [FeatureSSE3]>; -def FeatureSSE41 : SubtargetFeature<"sse41", "X86SSELevel", "SSE41", +def FeatureSSE41 : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41", "Enable SSE 4.1 instructions", [FeatureSSSE3]>; -def FeatureSSE42 : SubtargetFeature<"sse42", "X86SSELevel", "SSE42", +def FeatureSSE42 : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42", "Enable SSE 4.2 instructions", [FeatureSSE41]>; def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow", @@ -119,7 +119,7 @@ def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", [FeatureSSE2]>; def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true", "Support MOVBE instruction">; -def FeatureRDRAND : SubtargetFeature<"rdrand", "HasRDRAND", "true", +def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true", "Support RDRAND instruction">; def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true", "Support 16-bit floating point conversion instructions">; -- cgit v1.1 From 30632d2fbb3c46e552a68aa7961b12eb44cece01 Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Sat, 24 Aug 2013 01:17:23 +0000 Subject: [NVPTX] Re-enable assembly printing support for inline assembly This support was removed by accident during the MC conversion git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189160 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 111 +++++++++++++++++++++++++++++++++++ lib/Target/NVPTX/NVPTXAsmPrinter.h | 9 ++- 2 files changed, 119 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index a2b9bec..12f18c4 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -20,6 +20,7 @@ #include "NVPTXRegisterInfo.h" #include "NVPTXTargetMachine.h" #include "NVPTXUtilities.h" +#include "InstPrinter/NVPTXInstPrinter.h" #include "cl_common_defines.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/ConstantFolding.h" @@ -1993,6 +1994,116 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) { return false; } +/// PrintAsmOperand - Print out an operand for an inline asm expression. +/// +bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, raw_ostream &O) { + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) + return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); + case 'r': + break; + } + } + + printOperand(MI, OpNo, O); + + return false; +} + +bool NVPTXAsmPrinter::PrintAsmMemoryOperand( + const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, + const char *ExtraCode, raw_ostream &O) { + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier + + O << '['; + printMemOperand(MI, OpNo, O); + O << ']'; + + return false; +} + +void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, + raw_ostream &O, const char *Modifier) { + const MachineOperand &MO = MI->getOperand(opNum); + switch (MO.getType()) { + case MachineOperand::MO_Register: + if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + if (MO.getReg() == NVPTX::VRDepot) + O << DEPOTNAME << getFunctionNumber(); + else + O << NVPTXInstPrinter::getRegisterName(MO.getReg()); + } else { + emitVirtualRegister(MO.getReg(), false, O); + } + return; + + case MachineOperand::MO_Immediate: + if (!Modifier) + O << MO.getImm(); + else if (strstr(Modifier, "vec") == Modifier) + printVecModifiedImmediate(MO, Modifier, O); + else + llvm_unreachable( + "Don't know how to handle modifier on immediate operand"); + return; + + case MachineOperand::MO_FPImmediate: + printFPConstant(MO.getFPImm(), O); + break; + + case MachineOperand::MO_GlobalAddress: + O << *Mang->getSymbol(MO.getGlobal()); + break; + + case MachineOperand::MO_ExternalSymbol: { + const char *symbname = MO.getSymbolName(); + if (strstr(symbname, ".PARAM") == symbname) { + unsigned index; + sscanf(symbname + 6, "%u[];", &index); + printParamName(index, O); + } else if (strstr(symbname, ".HLPPARAM") == symbname) { + unsigned index; + sscanf(symbname + 9, "%u[];", &index); + O << *CurrentFnSym << "_param_" << index << "_offset"; + } else + O << symbname; + break; + } + + case MachineOperand::MO_MachineBasicBlock: + O << *MO.getMBB()->getSymbol(); + return; + + default: + llvm_unreachable("Operand type not supported."); + } +} + +void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, + raw_ostream &O, const char *Modifier) { + printOperand(MI, opNum, O); + + if (Modifier && !strcmp(Modifier, "add")) { + O << ", "; + printOperand(MI, opNum + 1, O); + } else { + if (MI->getOperand(opNum + 1).isImm() && + MI->getOperand(opNum + 1).getImm() == 0) + return; // don't print ',0' or '+0' + O << "+"; + printOperand(MI, opNum + 1, O); + } +} + + // Force static initialization. extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() { RegisterAsmPrinter X(TheNVPTXTarget32); diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h index 27bfa54..06ea102 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -222,7 +222,14 @@ private: bool isImageType(const Type *Ty); void printReturnValStr(const Function *, raw_ostream &O); void printReturnValStr(const MachineFunction &MF, raw_ostream &O); - + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &); + void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O, + const char *Modifier = 0); + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &); protected: bool doInitialization(Module &M); bool doFinalization(Module &M); -- cgit v1.1 From cbbd1eeecb2bf49915c4bd0e20b1e9bad8696ada Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Sat, 24 Aug 2013 01:24:44 +0000 Subject: Start to add the builtind to the mips16 exclusion lists for fp. I need to add the rest of these to the list or else to delay putting out the actual stub until later in code generation when I know if the external function ever got emitted. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189161 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16HardFloat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp index 46b04c3..617c178 100644 --- a/lib/Target/Mips/Mips16HardFloat.cpp +++ b/lib/Target/Mips/Mips16HardFloat.cpp @@ -325,7 +325,7 @@ static void assureFPCallStub(Function &F, Module *M, // Functions that are inline intrinsics don't need helpers. // static const char *IntrinsicInline[] = - {"fabs"}; + {"fabs", "llvm.powi.f64"}; static bool isIntrinsicInline(Function *F) { return std::binary_search( -- cgit v1.1 From 0570be84048b4c2a979923c583054de147590016 Mon Sep 17 00:00:00 2001 From: Shuxin Yang Date: Sat, 24 Aug 2013 17:53:16 +0000 Subject: Revert 189161 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189176 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16HardFloat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp index 617c178..46b04c3 100644 --- a/lib/Target/Mips/Mips16HardFloat.cpp +++ b/lib/Target/Mips/Mips16HardFloat.cpp @@ -325,7 +325,7 @@ static void assureFPCallStub(Function &F, Module *M, // Functions that are inline intrinsics don't need helpers. // static const char *IntrinsicInline[] = - {"fabs", "llvm.powi.f64"}; + {"fabs"}; static bool isIntrinsicInline(Function *F) { return std::binary_search( -- cgit v1.1 From f86778a84898b773c54e249a015be183acf13c30 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 24 Aug 2013 19:50:11 +0000 Subject: Remove trailing whitespace. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189178 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/Disassembler/X86Disassembler.cpp | 2 +- lib/Target/X86/X86InstrInfo.td | 42 ++++++++++++------------- 2 files changed, 22 insertions(+), 22 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 82af6fa..9cda49c 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -315,7 +315,7 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, // operand is 64 bits wide. Do nothing. break; } - + if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation, insn.immediateOffset, insn.immediateSize, mcInst, Dis)) diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 0960a2a..4c97576 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -278,28 +278,28 @@ def ptr_rc_nosp : PointerLikeRegClass<1>; // *mem - Operand definitions for the funky X86 addressing mode operands. // -def X86MemAsmOperand : AsmOperandClass { - let Name = "Mem"; let PredicateMethod = "isMem"; +def X86MemAsmOperand : AsmOperandClass { + let Name = "Mem"; let PredicateMethod = "isMem"; } -def X86Mem8AsmOperand : AsmOperandClass { +def X86Mem8AsmOperand : AsmOperandClass { let Name = "Mem8"; let PredicateMethod = "isMem8"; } -def X86Mem16AsmOperand : AsmOperandClass { +def X86Mem16AsmOperand : AsmOperandClass { let Name = "Mem16"; let PredicateMethod = "isMem16"; } -def X86Mem32AsmOperand : AsmOperandClass { +def X86Mem32AsmOperand : AsmOperandClass { let Name = "Mem32"; let PredicateMethod = "isMem32"; } -def X86Mem64AsmOperand : AsmOperandClass { +def X86Mem64AsmOperand : AsmOperandClass { let Name = "Mem64"; let PredicateMethod = "isMem64"; } -def X86Mem80AsmOperand : AsmOperandClass { +def X86Mem80AsmOperand : AsmOperandClass { let Name = "Mem80"; let PredicateMethod = "isMem80"; } -def X86Mem128AsmOperand : AsmOperandClass { +def X86Mem128AsmOperand : AsmOperandClass { let Name = "Mem128"; let PredicateMethod = "isMem128"; } -def X86Mem256AsmOperand : AsmOperandClass { +def X86Mem256AsmOperand : AsmOperandClass { let Name = "Mem256"; let PredicateMethod = "isMem256"; } @@ -343,29 +343,29 @@ def opaque48mem : X86MemOperand<"printopaquemem">; def opaque80mem : X86MemOperand<"printopaquemem">; def opaque512mem : X86MemOperand<"printopaquemem">; -def i8mem : X86MemOperand<"printi8mem"> { +def i8mem : X86MemOperand<"printi8mem"> { let ParserMatchClass = X86Mem8AsmOperand; } -def i16mem : X86MemOperand<"printi16mem"> { +def i16mem : X86MemOperand<"printi16mem"> { let ParserMatchClass = X86Mem16AsmOperand; } -def i32mem : X86MemOperand<"printi32mem"> { +def i32mem : X86MemOperand<"printi32mem"> { let ParserMatchClass = X86Mem32AsmOperand; } -def i64mem : X86MemOperand<"printi64mem"> { +def i64mem : X86MemOperand<"printi64mem"> { let ParserMatchClass = X86Mem64AsmOperand; } -def i128mem : X86MemOperand<"printi128mem"> { +def i128mem : X86MemOperand<"printi128mem"> { let ParserMatchClass = X86Mem128AsmOperand; } -def i256mem : X86MemOperand<"printi256mem"> { +def i256mem : X86MemOperand<"printi256mem"> { let ParserMatchClass = X86Mem256AsmOperand; } -def i512mem : X86MemOperand<"printi512mem"> { +def i512mem : X86MemOperand<"printi512mem"> { let ParserMatchClass = X86Mem512AsmOperand; } -def f32mem : X86MemOperand<"printf32mem"> { +def f32mem : X86MemOperand<"printf32mem"> { let ParserMatchClass = X86Mem32AsmOperand; } -def f64mem : X86MemOperand<"printf64mem"> { +def f64mem : X86MemOperand<"printf64mem"> { let ParserMatchClass = X86Mem64AsmOperand; } -def f80mem : X86MemOperand<"printf80mem"> { +def f80mem : X86MemOperand<"printf80mem"> { let ParserMatchClass = X86Mem80AsmOperand; } -def f128mem : X86MemOperand<"printf128mem"> { +def f128mem : X86MemOperand<"printf128mem"> { let ParserMatchClass = X86Mem128AsmOperand; } -def f256mem : X86MemOperand<"printf256mem">{ +def f256mem : X86MemOperand<"printf256mem">{ let ParserMatchClass = X86Mem256AsmOperand; } def f512mem : X86MemOperand<"printf512mem">{ let ParserMatchClass = X86Mem512AsmOperand; } -- cgit v1.1 From 916f1a1470a9bc79c6361c333650b4f16061c611 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 24 Aug 2013 20:31:14 +0000 Subject: Add hasSideEffects/mayLoad/mayStore flags to the X86 moffs8/moffs16/moffs32/moffs64 versions of move. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189182 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.td | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 4c97576..f4118da 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1072,9 +1072,12 @@ def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src), [(store i64immSExt32:$src, addr:$dst)], IIC_MOV_MEM>; } // SchedRW +let hasSideEffects = 0 in { + /// moffs8, moffs16 and moffs32 versions of moves. The immediate is a /// 32-bit offset from the PC. These are only valid in x86-32 mode. let SchedRW = [WriteALU] in { +let mayLoad = 1 in { def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src), "mov{b}\t{$src, %al|al, $src}", [], IIC_MOV_MEM>, Requires<[In32BitMode]>; @@ -1084,6 +1087,8 @@ def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src), def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins offset32:$src), "mov{l}\t{$src, %eax|eax, $src}", [], IIC_MOV_MEM>, Requires<[In32BitMode]>; +} +let mayStore = 1 in { def MOV8ao8 : Ii32 <0xA2, RawFrm, (outs offset8:$dst), (ins), "mov{b}\t{%al, $dst|$dst, al}", [], IIC_MOV_MEM>, Requires<[In32BitMode]>; @@ -1094,9 +1099,11 @@ def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins), "mov{l}\t{%eax, $dst|$dst, eax}", [], IIC_MOV_MEM>, Requires<[In32BitMode]>; } +} // These forms all have full 64-bit absolute addresses in their instructions // and use the movabs mnemonic to indicate this specific form. +let mayLoad = 1 in { def MOV64o8a : RIi64_NOREX<0xA0, RawFrm, (outs), (ins offset64:$src), "movabs{b}\t{$src, %al|al, $src}", []>, Requires<[In64BitMode]>; @@ -1109,7 +1116,9 @@ def MOV64o32a : RIi64_NOREX<0xA1, RawFrm, (outs), (ins offset64:$src), def MOV64o64a : RIi64<0xA1, RawFrm, (outs), (ins offset64:$src), "movabs{q}\t{$src, %rax|rax, $src}", []>, Requires<[In64BitMode]>; +} +let mayStore = 1 in { def MOV64ao8 : RIi64_NOREX<0xA2, RawFrm, (outs offset64:$dst), (ins), "movabs{b}\t{%al, $dst|$dst, al}", []>, Requires<[In64BitMode]>; @@ -1122,6 +1131,8 @@ def MOV64ao32 : RIi64_NOREX<0xA3, RawFrm, (outs offset64:$dst), (ins), def MOV64ao64 : RIi64<0xA3, RawFrm, (outs offset64:$dst), (ins), "movabs{q}\t{%rax, $dst|$dst, rax}", []>, Requires<[In64BitMode]>; +} +} // hasSideEffects = 0 let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src), -- cgit v1.1 From 5e5c3069cd92741852f10a7395133d6d79c5bce7 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Sun, 25 Aug 2013 02:40:25 +0000 Subject: Start to add the LLVM builtins to the mips16 exclusion lists for fp. I need to add the rest of these to the list or else to delay putting out the actual stub until later in code generation when I know if the external function ever got emitted Resubmit this patch. The target triple needs to be added to the test so that clang does not tell the backend the wrong target when the host is BSD. There is a clang bug in here somewhere that I need to track down. At Mips this has been filed internally as a bug. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189186 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16HardFloat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp index 46b04c3..617c178 100644 --- a/lib/Target/Mips/Mips16HardFloat.cpp +++ b/lib/Target/Mips/Mips16HardFloat.cpp @@ -325,7 +325,7 @@ static void assureFPCallStub(Function &F, Module *M, // Functions that are inline intrinsics don't need helpers. // static const char *IntrinsicInline[] = - {"fabs"}; + {"fabs", "llvm.powi.f64"}; static bool isIntrinsicInline(Function *F) { return std::binary_search( -- cgit v1.1 From 41f7baf181ef55fb6935ded8ced3797701a681ca Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Sun, 25 Aug 2013 12:54:30 +0000 Subject: AVX-512: added UNPACK instructions and tests for all-zero/all-ones vectors git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189189 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 129 ++++++++++++++++++++++--------------- lib/Target/X86/X86InstrAVX512.td | 62 ++++++++++++++++++ lib/Target/X86/X86InstrInfo.cpp | 6 +- 3 files changed, 145 insertions(+), 52 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6e9ecef..a00f848 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3866,37 +3866,46 @@ SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp, static bool isUNPCKLMask(ArrayRef Mask, MVT VT, bool HasInt256, bool V2IsSplat = false) { - if (VT.is512BitVector()) - return false; - assert((VT.is128BitVector() || VT.is256BitVector()) && - "Unsupported vector type for unpckh"); + assert(VT.getSizeInBits() >= 128 && + "Unsupported vector type for unpckl"); + // AVX defines UNPCK* to operate independently on 128-bit lanes. + unsigned NumLanes; + unsigned NumOf256BitLanes; unsigned NumElts = VT.getVectorNumElements(); - if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 && - (!HasInt256 || (NumElts != 16 && NumElts != 32))) + if (VT.is256BitVector()) { + if (NumElts != 4 && NumElts != 8 && + (!HasInt256 || (NumElts != 16 && NumElts != 32))) return false; + NumLanes = 2; + NumOf256BitLanes = 1; + } else if (VT.is512BitVector()) { + assert(VT.getScalarType().getSizeInBits() >= 32 && + "Unsupported vector type for unpckh"); + NumLanes = 2; + NumOf256BitLanes = 2; + } else { + NumLanes = 1; + NumOf256BitLanes = 1; + } - // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate - // independently on 128-bit lanes. - unsigned NumLanes = VT.getSizeInBits()/128; - unsigned NumLaneElts = NumElts/NumLanes; + unsigned NumEltsInStride = NumElts/NumOf256BitLanes; + unsigned NumLaneElts = NumEltsInStride/NumLanes; - for (unsigned l = 0; l != NumElts; l += NumLaneElts) { - for (unsigned i = 0, j = l; i != NumLaneElts; i += 2, ++j) { - int BitI = Mask[l+i]; - int BitI1 = Mask[l+i+1]; - if (!isUndefOrEqual(BitI, j)) - return false; - if (V2IsSplat) { - if (!isUndefOrEqual(BitI1, NumElts)) + for (unsigned l256 = 0; l256 < NumOf256BitLanes; l256 += 1) { + for (unsigned l = 0; l != NumEltsInStride; l += NumLaneElts) { + for (unsigned i = 0, j = l; i != NumLaneElts; i += 2, ++j) { + int BitI = Mask[l256*NumEltsInStride+l+i]; + int BitI1 = Mask[l256*NumEltsInStride+l+i+1]; + if (!isUndefOrEqual(BitI, j+l256*NumElts)) return false; - } else { - if (!isUndefOrEqual(BitI1, j + NumElts)) + if (V2IsSplat && !isUndefOrEqual(BitI1, NumElts)) + return false; + if (!isUndefOrEqual(BitI1, j+l256*NumElts+NumEltsInStride)) return false; } } } - return true; } @@ -3904,33 +3913,42 @@ static bool isUNPCKLMask(ArrayRef Mask, MVT VT, /// specifies a shuffle of elements that is suitable for input to UNPCKH. static bool isUNPCKHMask(ArrayRef Mask, MVT VT, bool HasInt256, bool V2IsSplat = false) { - unsigned NumElts = VT.getVectorNumElements(); - - if (VT.is512BitVector()) - return false; - assert((VT.is128BitVector() || VT.is256BitVector()) && + assert(VT.getSizeInBits() >= 128 && "Unsupported vector type for unpckh"); - if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 && - (!HasInt256 || (NumElts != 16 && NumElts != 32))) + // AVX defines UNPCK* to operate independently on 128-bit lanes. + unsigned NumLanes; + unsigned NumOf256BitLanes; + unsigned NumElts = VT.getVectorNumElements(); + if (VT.is256BitVector()) { + if (NumElts != 4 && NumElts != 8 && + (!HasInt256 || (NumElts != 16 && NumElts != 32))) return false; + NumLanes = 2; + NumOf256BitLanes = 1; + } else if (VT.is512BitVector()) { + assert(VT.getScalarType().getSizeInBits() >= 32 && + "Unsupported vector type for unpckh"); + NumLanes = 2; + NumOf256BitLanes = 2; + } else { + NumLanes = 1; + NumOf256BitLanes = 1; + } - // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate - // independently on 128-bit lanes. - unsigned NumLanes = VT.getSizeInBits()/128; - unsigned NumLaneElts = NumElts/NumLanes; + unsigned NumEltsInStride = NumElts/NumOf256BitLanes; + unsigned NumLaneElts = NumEltsInStride/NumLanes; - for (unsigned l = 0; l != NumElts; l += NumLaneElts) { - for (unsigned i = 0, j = l+NumLaneElts/2; i != NumLaneElts; i += 2, ++j) { - int BitI = Mask[l+i]; - int BitI1 = Mask[l+i+1]; - if (!isUndefOrEqual(BitI, j)) - return false; - if (V2IsSplat) { - if (isUndefOrEqual(BitI1, NumElts)) + for (unsigned l256 = 0; l256 < NumOf256BitLanes; l256 += 1) { + for (unsigned l = 0; l != NumEltsInStride; l += NumLaneElts) { + for (unsigned i = 0, j = l+NumLaneElts/2; i != NumLaneElts; i += 2, ++j) { + int BitI = Mask[l256*NumEltsInStride+l+i]; + int BitI1 = Mask[l256*NumEltsInStride+l+i+1]; + if (!isUndefOrEqual(BitI, j+l256*NumElts)) return false; - } else { - if (!isUndefOrEqual(BitI1, j+NumElts)) + if (V2IsSplat && !isUndefOrEqual(BitI1, NumElts)) + return false; + if (!isUndefOrEqual(BitI1, j+l256*NumElts+NumEltsInStride)) return false; } } @@ -4336,7 +4354,7 @@ bool X86::isVEXTRACT256Index(SDNode *N) { static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) { MVT VT = N->getSimpleValueType(0); - assert((VT.is128BitVector() || VT.is256BitVector()) && + assert((VT.getSizeInBits() >= 128) && "Unsupported vector type for PSHUF/SHUFP"); // Handle 128 and 256-bit vector lengths. AVX defines PSHUF/SHUFP to operate @@ -4345,10 +4363,10 @@ static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) { unsigned NumLanes = VT.getSizeInBits()/128; unsigned NumLaneElts = NumElts/NumLanes; - assert((NumLaneElts == 2 || NumLaneElts == 4) && - "Only supports 2 or 4 elements per lane"); + assert((NumLaneElts == 2 || NumLaneElts == 4 || NumLaneElts == 8) && + "Only supports 2, 4 or 8 elements per lane"); - unsigned Shift = (NumLaneElts == 4) ? 1 : 0; + unsigned Shift = (NumLaneElts >= 4) ? 1 : 0; unsigned Mask = 0; for (unsigned i = 0; i != NumElts; ++i) { int Elt = N->getMaskElt(i); @@ -4680,6 +4698,11 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, array_lengthof(Ops)); } + } else if (VT.is512BitVector()) { // AVX-512 + SDValue Cst = DAG.getTargetConstant(0, MVT::i32); + SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, + Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops, 16); } else llvm_unreachable("Unexpected vector type"); @@ -5674,10 +5697,13 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { DAG.getIntPtrConstant(0)); } - if (!isSplatVector(Op.getNode())) - llvm_unreachable("Unsupported predicate operation"); - + // Splat vector (with undefs) SDValue In = Op.getOperand(0); + for (unsigned i = 1, e = Op.getNumOperands(); i != e; ++i) { + if (Op.getOperand(i) != In && Op.getOperand(i).getOpcode() != ISD::UNDEF) + llvm_unreachable("Unsupported predicate operation"); + } + SDValue EFLAGS, X86CC; if (In.getOpcode() == ISD::SETCC) { SDValue Op0 = In.getOperand(0); @@ -5759,7 +5785,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasInt256())) return Op; - return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl); + if (!VT.is512BitVector()) + return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl); } SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, DAG); @@ -5841,7 +5868,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 || (ExtVT == MVT::i64 && Subtarget->is64Bit())) { - if (VT.is256BitVector()) { + if (VT.is256BitVector() || VT.is512BitVector()) { SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl); return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec, Item, DAG.getIntPtrConstant(0)); diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 0cb946d..17be5df 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1561,6 +1561,68 @@ def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))), (VPMULUDQZrr VR512:$src1, VR512:$src2)>; //===----------------------------------------------------------------------===// +// AVX-512 - Unpack Instructions +//===----------------------------------------------------------------------===// + +multiclass avx512_unpack_fp opc, SDNode OpNode, ValueType vt, + PatFrag mem_frag, RegisterClass RC, + X86MemOperand x86memop, string asm, + Domain d> { + def rr : AVX512PI, EVEX_4V, TB; + def rm : AVX512PI, EVEX_4V, TB; +} + +defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, memopv8f64, + VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, memopv8f64, + VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, memopv8f64, + VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, memopv8f64, + VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + +multiclass avx512_unpack_int opc, string OpcodeStr, SDNode OpNode, + ValueType OpVT, RegisterClass RC, PatFrag memop_frag, + X86MemOperand x86memop> { + def rr : AVX512BI, EVEX_4V; + def rm : AVX512BI, EVEX_4V; +} +defm VPUNPCKLDQZ : avx512_unpack_int<0x62, "vpunpckldq", X86Unpckl, v16i32, + VR512, memopv16i32, i512mem>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPUNPCKLQDQZ : avx512_unpack_int<0x6C, "vpunpcklqdq", X86Unpckl, v8i64, + VR512, memopv8i64, i512mem>, EVEX_V512, + VEX_W, EVEX_CD8<64, CD8VF>; +defm VPUNPCKHDQZ : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32, + VR512, memopv16i32, i512mem>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64, + VR512, memopv8i64, i512mem>, EVEX_V512, + VEX_W, EVEX_CD8<64, CD8VF>; + +//===----------------------------------------------------------------------===// // AVX-512 Logical Instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 71df2bb..c4c090b 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -2939,7 +2939,6 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, if (X86::FR32XRegClass.contains(DestReg) && X86::GR32RegClass.contains(SrcReg)) // Copy from a GR32 register to a FR32 register. return HasAVX512 ? X86::VMOVDI2SSZrr : (HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr); - return 0; } @@ -3781,6 +3780,8 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { case X86::AVX_SET0: assert(HasAVX && "AVX not supported"); return Expand2AddrUndef(MIB, get(X86::VXORPSYrr)); + case X86::AVX512_512_SET0: + return Expand2AddrUndef(MIB, get(X86::VPXORDZrr)); case X86::V_SETALLONES: return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr)); case X86::AVX2_SETALLONES: @@ -3788,6 +3789,9 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { case X86::TEST8ri_NOREX: MI->setDesc(get(X86::TEST8ri)); return true; + case X86::KSET0W: return Expand2AddrUndef(MIB, get(X86::KXORWrr)); + case X86::KSET1B: + case X86::KSET1W: return Expand2AddrUndef(MIB, get(X86::KXNORWrr)); } return false; } -- cgit v1.1 From 5ec8afa7cf9ae11def585fff043b0eabd735ac28 Mon Sep 17 00:00:00 2001 From: Venkatraman Govindaraju Date: Sun, 25 Aug 2013 17:03:02 +0000 Subject: [Sparc] Added V9's extra floating point registers and their aliases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189195 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Sparc/SparcRegisterInfo.cpp | 9 ++++++ lib/Target/Sparc/SparcRegisterInfo.td | 50 +++++++++++++++++++++++++++++++++- 2 files changed, 58 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index 0b0fe2d..d940ae6 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -69,6 +69,15 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(SP::G0); Reserved.set(SP::G6); Reserved.set(SP::G7); + + // Unaliased double registers are not available in non-V9 targets. + if (!Subtarget.isV9()) { + for (unsigned n = 0; n != 16; ++n) { + for (MCRegAliasIterator AI(SP::D16 + n, this, true); AI.isValid(); ++AI) + Reserved.set(*AI); + } + } + return Reserved; } diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td index b239f80..2a575c0 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.td +++ b/lib/Target/Sparc/SparcRegisterInfo.td @@ -23,6 +23,8 @@ class SparcCtrlReg: Register { let Namespace = "SP" in { def sub_even : SubRegIndex<32>; def sub_odd : SubRegIndex<32, 32>; +def sub_even64 : SubRegIndex<64>; +def sub_odd64 : SubRegIndex<64, 64>; } // Registers are identified with 5-bit ID numbers. @@ -39,6 +41,13 @@ class Rd Enc, string n, list subregs> : SparcReg { let CoveredBySubRegs = 1; } +// Rq - Slots in the FP register file for 128-bit floating-point values. +class Rq Enc, string n, list subregs> : SparcReg { + let SubRegs = subregs; + let SubRegIndices = [sub_even64, sub_odd64]; + let CoveredBySubRegs = 1; +} + // Control Registers def ICC : SparcCtrlReg<"ICC">; // This represents icc and xcc in 64-bit code. def FCC : SparcCtrlReg<"FCC">; @@ -132,6 +141,43 @@ def D13 : Rd<26, "F26", [F26, F27]>, DwarfRegNum<[85]>; def D14 : Rd<28, "F28", [F28, F29]>, DwarfRegNum<[86]>; def D15 : Rd<30, "F30", [F30, F31]>, DwarfRegNum<[87]>; +// Unaliased double precision floating point registers. +// FIXME: Define DwarfRegNum for these registers. +def D16 : SparcReg< 1, "F32">; +def D17 : SparcReg< 3, "F34">; +def D18 : SparcReg< 5, "F36">; +def D19 : SparcReg< 7, "F38">; +def D20 : SparcReg< 9, "F40">; +def D21 : SparcReg<11, "F42">; +def D22 : SparcReg<13, "F44">; +def D23 : SparcReg<15, "F46">; +def D24 : SparcReg<17, "F48">; +def D25 : SparcReg<19, "F50">; +def D26 : SparcReg<21, "F52">; +def D27 : SparcReg<23, "F54">; +def D28 : SparcReg<25, "F56">; +def D29 : SparcReg<27, "F58">; +def D30 : SparcReg<29, "F60">; +def D31 : SparcReg<31, "F62">; + +// Aliases of the F* registers used to hold 128-bit for values (long doubles). +def Q0 : Rq< 0, "F0", [D0, D1]>; +def Q1 : Rq< 4, "F4", [D2, D3]>; +def Q2 : Rq< 8, "F8", [D4, D5]>; +def Q3 : Rq<12, "F12", [D6, D7]>; +def Q4 : Rq<16, "F16", [D8, D9]>; +def Q5 : Rq<20, "F20", [D10, D11]>; +def Q6 : Rq<24, "F24", [D12, D13]>; +def Q7 : Rq<28, "F28", [D14, D15]>; +def Q8 : Rq< 1, "F32", [D16, D17]>; +def Q9 : Rq< 5, "F36", [D18, D19]>; +def Q10 : Rq< 9, "F40", [D20, D21]>; +def Q11 : Rq<13, "F44", [D22, D23]>; +def Q12 : Rq<17, "F48", [D24, D25]>; +def Q13 : Rq<21, "F52", [D26, D27]>; +def Q14 : Rq<25, "F56", [D28, D29]>; +def Q15 : Rq<29, "F60", [D30, D31]>; + // Register classes. // // FIXME: the register order should be defined in terms of the preferred @@ -155,4 +201,6 @@ def I64Regs : RegisterClass<"SP", [i64], 64, (add IntRegs)>; // Floating point register classes. def FPRegs : RegisterClass<"SP", [f32], 32, (sequence "F%u", 0, 31)>; -def DFPRegs : RegisterClass<"SP", [f64], 64, (sequence "D%u", 0, 15)>; +def DFPRegs : RegisterClass<"SP", [f64], 64, (sequence "D%u", 0, 31)>; + +def QFPRegs : RegisterClass<"SP", [f128], 128, (sequence "Q%u", 0, 15)>; -- cgit v1.1 From 2f17d0facf6a489a051c86c015453c2b102e5c37 Mon Sep 17 00:00:00 2001 From: Venkatraman Govindaraju Date: Sun, 25 Aug 2013 18:30:06 +0000 Subject: [Sparc] Add long double (f128) instructions to sparc backend. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189198 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Sparc/Sparc.td | 4 + lib/Target/Sparc/SparcISelLowering.cpp | 134 +++++++++++++++++++++++++++++++++ lib/Target/Sparc/SparcInstrInfo.td | 108 ++++++++++++++++++++++++++ lib/Target/Sparc/SparcSubtarget.cpp | 3 +- lib/Target/Sparc/SparcSubtarget.h | 2 + 5 files changed, 250 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/Sparc/Sparc.td b/lib/Target/Sparc/Sparc.td index d42c40f..0df48f6 100644 --- a/lib/Target/Sparc/Sparc.td +++ b/lib/Target/Sparc/Sparc.td @@ -30,6 +30,10 @@ def FeatureVIS : SubtargetFeature<"vis", "IsVIS", "true", "Enable UltraSPARC Visual Instruction Set extensions">; +def FeatureHardQuad + : SubtargetFeature<"hard-quad-float", "HasHardQuad", "true", + "Enable quad-word floating point instructions">; + //===----------------------------------------------------------------------===// // Register File, Calling Conv, Instruction Descriptions //===----------------------------------------------------------------------===// diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index ce9cd94..654a03a 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -1257,15 +1257,21 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) addRegisterClass(MVT::i32, &SP::IntRegsRegClass); addRegisterClass(MVT::f32, &SP::FPRegsRegClass); addRegisterClass(MVT::f64, &SP::DFPRegsRegClass); + addRegisterClass(MVT::f128, &SP::QFPRegsRegClass); if (Subtarget->is64Bit()) addRegisterClass(MVT::i64, &SP::I64RegsRegClass); // Turn FP extload into load/fextend setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); + // Sparc doesn't have i1 sign extending load setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + // Turn FP truncstore into trunc + store. setTruncStoreAction(MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::f128, MVT::f32, Expand); + setTruncStoreAction(MVT::f128, MVT::f64, Expand); // Custom legalize GlobalAddress nodes into LO/HI parts. setOperationAction(ISD::GlobalAddress, getPointerTy(), Custom); @@ -1299,9 +1305,12 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setOperationAction(ISD::SELECT, MVT::i32, Expand); setOperationAction(ISD::SELECT, MVT::f32, Expand); setOperationAction(ISD::SELECT, MVT::f64, Expand); + setOperationAction(ISD::SELECT, MVT::f128, Expand); + setOperationAction(ISD::SETCC, MVT::i32, Expand); setOperationAction(ISD::SETCC, MVT::f32, Expand); setOperationAction(ISD::SETCC, MVT::f64, Expand); + setOperationAction(ISD::SETCC, MVT::f128, Expand); // Sparc doesn't have BRCOND either, it has BR_CC. setOperationAction(ISD::BRCOND, MVT::Other, Expand); @@ -1310,10 +1319,12 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setOperationAction(ISD::BR_CC, MVT::i32, Custom); setOperationAction(ISD::BR_CC, MVT::f32, Custom); setOperationAction(ISD::BR_CC, MVT::f64, Custom); + setOperationAction(ISD::BR_CC, MVT::f128, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f128, Custom); if (Subtarget->is64Bit()) { setOperationAction(ISD::BITCAST, MVT::f64, Expand); @@ -1334,6 +1345,11 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setOperationAction(ISD::FABS, MVT::f64, Custom); } + setOperationAction(ISD::FSIN , MVT::f128, Expand); + setOperationAction(ISD::FCOS , MVT::f128, Expand); + setOperationAction(ISD::FSINCOS, MVT::f128, Expand); + setOperationAction(ISD::FREM , MVT::f128, Expand); + setOperationAction(ISD::FMA , MVT::f128, Expand); setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f64, Expand); @@ -1352,8 +1368,10 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setOperationAction(ISD::ROTL , MVT::i32, Expand); setOperationAction(ISD::ROTR , MVT::i32, Expand); setOperationAction(ISD::BSWAP, MVT::i32, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + setOperationAction(ISD::FPOW , MVT::f128, Expand); setOperationAction(ISD::FPOW , MVT::f64, Expand); setOperationAction(ISD::FPOW , MVT::f32, Expand); @@ -1387,6 +1405,31 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) if (Subtarget->isV9()) setOperationAction(ISD::CTPOP, MVT::i32, Legal); + if (Subtarget->isV9() && Subtarget->hasHardQuad()) { + setOperationAction(ISD::LOAD, MVT::f128, Legal); + setOperationAction(ISD::STORE, MVT::f128, Legal); + } else { + setOperationAction(ISD::LOAD, MVT::f128, Custom); + setOperationAction(ISD::STORE, MVT::f128, Custom); + } + + if (Subtarget->hasHardQuad()) { + setOperationAction(ISD::FADD, MVT::f128, Legal); + setOperationAction(ISD::FSUB, MVT::f128, Legal); + setOperationAction(ISD::FMUL, MVT::f128, Legal); + setOperationAction(ISD::FDIV, MVT::f128, Legal); + setOperationAction(ISD::FSQRT, MVT::f128, Legal); + setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal); + setOperationAction(ISD::FP_ROUND, MVT::f64, Legal); + if (Subtarget->isV9()) { + setOperationAction(ISD::FNEG, MVT::f128, Legal); + setOperationAction(ISD::FABS, MVT::f128, Legal); + } else { + setOperationAction(ISD::FNEG, MVT::f128, Custom); + setOperationAction(ISD::FABS, MVT::f128, Custom); + } + } + setMinFunctionAlignment(2); computeRegisterProperties(); @@ -1800,6 +1843,94 @@ static SDValue LowerF64Op(SDValue Op, SelectionDAG &DAG) return DstReg64; } +// Lower a f128 load into two f64 loads. +static SDValue LowerF128Load(SDValue Op, SelectionDAG &DAG) +{ + SDLoc dl(Op); + LoadSDNode *LdNode = dyn_cast(Op.getNode()); + assert(LdNode && LdNode->getOffset().getOpcode() == ISD::UNDEF + && "Unexpected node type"); + + SDValue Hi64 = DAG.getLoad(MVT::f64, + dl, + LdNode->getChain(), + LdNode->getBasePtr(), + LdNode->getPointerInfo(), + false, false, false, 8); + EVT addrVT = LdNode->getBasePtr().getValueType(); + SDValue LoPtr = DAG.getNode(ISD::ADD, dl, addrVT, + LdNode->getBasePtr(), + DAG.getConstant(8, addrVT)); + SDValue Lo64 = DAG.getLoad(MVT::f64, + dl, + LdNode->getChain(), + LoPtr, + LdNode->getPointerInfo(), + false, false, false, 8); + + SDValue SubRegEven = DAG.getTargetConstant(SP::sub_even64, MVT::i32); + SDValue SubRegOdd = DAG.getTargetConstant(SP::sub_odd64, MVT::i32); + + SDNode *InFP128 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, MVT::f128); + InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, + MVT::f128, + SDValue(InFP128, 0), + Hi64, + SubRegEven); + InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, + MVT::f128, + SDValue(InFP128, 0), + Lo64, + SubRegOdd); + SDValue OutChains[2] = { SDValue(Hi64.getNode(), 1), + SDValue(Lo64.getNode(), 1) }; + SDValue OutChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &OutChains[0], 2); + SDValue Ops[2] = {SDValue(InFP128,0), OutChain}; + return DAG.getMergeValues(Ops, 2, dl); +} + +// Lower a f128 store into two f64 stores. +static SDValue LowerF128Store(SDValue Op, SelectionDAG &DAG) { + SDLoc dl(Op); + StoreSDNode *StNode = dyn_cast(Op.getNode()); + assert(StNode && StNode->getOffset().getOpcode() == ISD::UNDEF + && "Unexpected node type"); + SDValue SubRegEven = DAG.getTargetConstant(SP::sub_even64, MVT::i32); + SDValue SubRegOdd = DAG.getTargetConstant(SP::sub_odd64, MVT::i32); + + SDNode *Hi64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, + dl, + MVT::f64, + StNode->getValue(), + SubRegEven); + SDNode *Lo64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, + dl, + MVT::f64, + StNode->getValue(), + SubRegOdd); + SDValue OutChains[2]; + OutChains[0] = DAG.getStore(StNode->getChain(), + dl, + SDValue(Hi64, 0), + StNode->getBasePtr(), + MachinePointerInfo(), + false, false, 8); + EVT addrVT = StNode->getBasePtr().getValueType(); + SDValue LoPtr = DAG.getNode(ISD::ADD, dl, addrVT, + StNode->getBasePtr(), + DAG.getConstant(8, addrVT)); + OutChains[1] = DAG.getStore(StNode->getChain(), + dl, + SDValue(Lo64, 0), + LoPtr, + MachinePointerInfo(), + false, false, 8); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &OutChains[0], 2); +} + SDValue SparcTargetLowering:: LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { @@ -1822,6 +1953,9 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::VASTART: return LowerVASTART(Op, DAG, *this); case ISD::VAARG: return LowerVAARG(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); + + case ISD::LOAD: return LowerF128Load(Op, DAG); + case ISD::STORE: return LowerF128Store(Op, DAG); } } diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index f5aa581..a08d1cb 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -39,6 +39,10 @@ def HasNoV9 : Predicate<"!Subtarget.isV9()">; // HasVIS - This is true when the target processor has VIS extensions. def HasVIS : Predicate<"Subtarget.isVIS()">; +// HasHardQuad - This is true when the target processor supports quad floating +// point instructions. +def HasHardQuad : Predicate<"Subtarget.hasHardQuad()">; + // UseDeprecatedInsts - This predicate is true when the target processor is a // V8, or when it is V9 but the V8 deprecated instructions are efficient enough // to use when appropriate. In either of these cases, the instruction selector @@ -354,6 +358,16 @@ def LDDFri : F3_2<3, 0b100011, (outs DFPRegs:$dst), (ins MEMri:$addr), "ldd [$addr], $dst", [(set f64:$dst, (load ADDRri:$addr))]>; +def LDQFrr : F3_1<3, 0b100010, + (outs QFPRegs:$dst), (ins MEMrr:$addr), + "ldq [$addr], $dst", + [(set f128:$dst, (load ADDRrr:$addr))]>, + Requires<[HasV9, HasHardQuad]>; +def LDQFri : F3_2<3, 0b100010, + (outs QFPRegs:$dst), (ins MEMri:$addr), + "ldq [$addr], $dst", + [(set f128:$dst, (load ADDRri:$addr))]>, + Requires<[HasV9, HasHardQuad]>; // Section B.4 - Store Integer Instructions, p. 95 def STBrr : F3_1<3, 0b000101, @@ -398,6 +412,16 @@ def STDFri : F3_2<3, 0b100111, (outs), (ins MEMri:$addr, DFPRegs:$src), "std $src, [$addr]", [(store f64:$src, ADDRri:$addr)]>; +def STQFrr : F3_1<3, 0b100110, + (outs), (ins MEMrr:$addr, QFPRegs:$src), + "stq $src, [$addr]", + [(store f128:$src, ADDRrr:$addr)]>, + Requires<[HasV9, HasHardQuad]>; +def STQFri : F3_2<3, 0b100110, + (outs), (ins MEMri:$addr, QFPRegs:$src), + "stq $src, [$addr]", + [(store f128:$src, ADDRri:$addr)]>, + Requires<[HasV9, HasHardQuad]>; // Section B.9 - SETHI Instruction, p. 104 def SETHIi: F2_1<0b100, @@ -599,6 +623,11 @@ def FITOD : F3_3<2, 0b110100, 0b011001000, (outs DFPRegs:$dst), (ins FPRegs:$src), "fitod $src, $dst", [(set DFPRegs:$dst, (SPitof FPRegs:$src))]>; +def FITOQ : F3_3<2, 0b110100, 0b011001100, + (outs QFPRegs:$dst), (ins FPRegs:$src), + "fitoq $src, $dst", + [(set QFPRegs:$dst, (SPitof FPRegs:$src))]>, + Requires<[HasHardQuad]>; // Convert Floating-point to Integer Instructions, p. 142 def FSTOI : F3_3<2, 0b110100, 0b011010001, @@ -609,16 +638,41 @@ def FDTOI : F3_3<2, 0b110100, 0b011010010, (outs FPRegs:$dst), (ins DFPRegs:$src), "fdtoi $src, $dst", [(set FPRegs:$dst, (SPftoi DFPRegs:$src))]>; +def FQTOI : F3_3<2, 0b110100, 0b011010011, + (outs FPRegs:$dst), (ins QFPRegs:$src), + "fqtoi $src, $dst", + [(set FPRegs:$dst, (SPftoi QFPRegs:$src))]>, + Requires<[HasHardQuad]>; // Convert between Floating-point Formats Instructions, p. 143 def FSTOD : F3_3<2, 0b110100, 0b011001001, (outs DFPRegs:$dst), (ins FPRegs:$src), "fstod $src, $dst", [(set f64:$dst, (fextend f32:$src))]>; +def FSTOQ : F3_3<2, 0b110100, 0b011001101, + (outs QFPRegs:$dst), (ins FPRegs:$src), + "fstoq $src, $dst", + [(set f128:$dst, (fextend f32:$src))]>, + Requires<[HasHardQuad]>; def FDTOS : F3_3<2, 0b110100, 0b011000110, (outs FPRegs:$dst), (ins DFPRegs:$src), "fdtos $src, $dst", [(set f32:$dst, (fround f64:$src))]>; +def FDTOQ : F3_3<2, 0b110100, 0b01101110, + (outs QFPRegs:$dst), (ins DFPRegs:$src), + "fdtoq $src, $dst", + [(set f128:$dst, (fextend f64:$src))]>, + Requires<[HasHardQuad]>; +def FQTOS : F3_3<2, 0b110100, 0b011000111, + (outs FPRegs:$dst), (ins QFPRegs:$src), + "fqtos $src, $dst", + [(set f32:$dst, (fround f128:$src))]>, + Requires<[HasHardQuad]>; +def FQTOD : F3_3<2, 0b110100, 0b011001011, + (outs DFPRegs:$dst), (ins QFPRegs:$src), + "fqtod $src, $dst", + [(set f64:$dst, (fround f128:$src))]>, + Requires<[HasHardQuad]>; // Floating-point Move Instructions, p. 144 def FMOVS : F3_3<2, 0b110100, 0b000000001, @@ -643,6 +697,11 @@ def FSQRTD : F3_3<2, 0b110100, 0b000101010, (outs DFPRegs:$dst), (ins DFPRegs:$src), "fsqrtd $src, $dst", [(set f64:$dst, (fsqrt f64:$src))]>; +def FSQRTQ : F3_3<2, 0b110100, 0b000101011, + (outs QFPRegs:$dst), (ins QFPRegs:$src), + "fsqrtq $src, $dst", + [(set f128:$dst, (fsqrt f128:$src))]>, + Requires<[HasHardQuad]>; @@ -655,6 +714,12 @@ def FADDD : F3_3<2, 0b110100, 0b001000010, (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2), "faddd $src1, $src2, $dst", [(set f64:$dst, (fadd f64:$src1, f64:$src2))]>; +def FADDQ : F3_3<2, 0b110100, 0b001000011, + (outs QFPRegs:$dst), (ins QFPRegs:$src1, QFPRegs:$src2), + "faddq $src1, $src2, $dst", + [(set f128:$dst, (fadd f128:$src1, f128:$src2))]>, + Requires<[HasHardQuad]>; + def FSUBS : F3_3<2, 0b110100, 0b001000101, (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2), "fsubs $src1, $src2, $dst", @@ -663,6 +728,12 @@ def FSUBD : F3_3<2, 0b110100, 0b001000110, (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2), "fsubd $src1, $src2, $dst", [(set f64:$dst, (fsub f64:$src1, f64:$src2))]>; +def FSUBQ : F3_3<2, 0b110100, 0b001000111, + (outs QFPRegs:$dst), (ins QFPRegs:$src1, QFPRegs:$src2), + "fsubq $src1, $src2, $dst", + [(set f128:$dst, (fsub f128:$src1, f128:$src2))]>, + Requires<[HasHardQuad]>; + // Floating-point Multiply and Divide Instructions, p. 147 def FMULS : F3_3<2, 0b110100, 0b001001001, @@ -673,11 +744,24 @@ def FMULD : F3_3<2, 0b110100, 0b001001010, (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2), "fmuld $src1, $src2, $dst", [(set f64:$dst, (fmul f64:$src1, f64:$src2))]>; +def FMULQ : F3_3<2, 0b110100, 0b001001011, + (outs QFPRegs:$dst), (ins QFPRegs:$src1, QFPRegs:$src2), + "fmulq $src1, $src2, $dst", + [(set f128:$dst, (fmul f128:$src1, f128:$src2))]>, + Requires<[HasHardQuad]>; + def FSMULD : F3_3<2, 0b110100, 0b001101001, (outs DFPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2), "fsmuld $src1, $src2, $dst", [(set f64:$dst, (fmul (fextend f32:$src1), (fextend f32:$src2)))]>; +def FDMULQ : F3_3<2, 0b110100, 0b001101110, + (outs QFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2), + "fdmulq $src1, $src2, $dst", + [(set f128:$dst, (fmul (fextend f64:$src1), + (fextend f64:$src2)))]>, + Requires<[HasHardQuad]>; + def FDIVS : F3_3<2, 0b110100, 0b001001101, (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2), "fdivs $src1, $src2, $dst", @@ -686,6 +770,11 @@ def FDIVD : F3_3<2, 0b110100, 0b001001110, (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2), "fdivd $src1, $src2, $dst", [(set f64:$dst, (fdiv f64:$src1, f64:$src2))]>; +def FDIVQ : F3_3<2, 0b110100, 0b001001111, + (outs QFPRegs:$dst), (ins QFPRegs:$src1, QFPRegs:$src2), + "fdivq $src1, $src2, $dst", + [(set f128:$dst, (fdiv f128:$src1, f128:$src2))]>, + Requires<[HasHardQuad]>; // Floating-point Compare Instructions, p. 148 // Note: the 2nd template arg is different for these guys. @@ -701,6 +790,11 @@ let Defs = [FCC] in { (outs), (ins DFPRegs:$src1, DFPRegs:$src2), "fcmpd $src1, $src2\n\tnop", [(SPcmpfcc f64:$src1, f64:$src2)]>; + def FCMPQ : F3_3<2, 0b110101, 0b001010011, + (outs), (ins QFPRegs:$src1, QFPRegs:$src2), + "fcmpq $src1, $src2\n\tnop", + [(SPcmpfcc f128:$src1, f128:$src2)]>, + Requires<[HasHardQuad]>; } //===----------------------------------------------------------------------===// @@ -762,14 +856,28 @@ let Predicates = [HasV9] in { def FMOVD : F3_3<2, 0b110100, 0b000000010, (outs DFPRegs:$dst), (ins DFPRegs:$src), "fmovd $src, $dst", []>; + def FMOVQ : F3_3<2, 0b110100, 0b000000011, + (outs QFPRegs:$dst), (ins QFPRegs:$src), + "fmovq $src, $dst", []>, + Requires<[HasHardQuad]>; def FNEGD : F3_3<2, 0b110100, 0b000000110, (outs DFPRegs:$dst), (ins DFPRegs:$src), "fnegd $src, $dst", [(set f64:$dst, (fneg f64:$src))]>; + def FNEGQ : F3_3<2, 0b110100, 0b000000111, + (outs QFPRegs:$dst), (ins QFPRegs:$src), + "fnegq $src, $dst", + [(set f128:$dst, (fneg f128:$src))]>, + Requires<[HasHardQuad]>; def FABSD : F3_3<2, 0b110100, 0b000001010, (outs DFPRegs:$dst), (ins DFPRegs:$src), "fabsd $src, $dst", [(set f64:$dst, (fabs f64:$src))]>; + def FABSQ : F3_3<2, 0b110100, 0b000001011, + (outs QFPRegs:$dst), (ins QFPRegs:$src), + "fabsq $src, $dst", + [(set f128:$dst, (fabs f128:$src))]>, + Requires<[HasHardQuad]>; } // POPCrr - This does a ctpop of a 64-bit register. As such, we have to clear diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp index f9ce098..7d09d0e 100644 --- a/lib/Target/Sparc/SparcSubtarget.cpp +++ b/lib/Target/Sparc/SparcSubtarget.cpp @@ -30,7 +30,8 @@ SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU, IsV9(false), V8DeprecatedInsts(false), IsVIS(false), - Is64Bit(is64Bit) { + Is64Bit(is64Bit), + HasHardQuad(false) { // Determine default and user specified characteristics std::string CPUName = CPU; diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h index 2bf599d..0f81cc9 100644 --- a/lib/Target/Sparc/SparcSubtarget.h +++ b/lib/Target/Sparc/SparcSubtarget.h @@ -29,6 +29,7 @@ class SparcSubtarget : public SparcGenSubtargetInfo { bool V8DeprecatedInsts; bool IsVIS; bool Is64Bit; + bool HasHardQuad; public: SparcSubtarget(const std::string &TT, const std::string &CPU, @@ -37,6 +38,7 @@ public: bool isV9() const { return IsV9; } bool isVIS() const { return IsVIS; } bool useDeprecatedV8Instructions() const { return V8DeprecatedInsts; } + bool hasHardQuad() const { return HasHardQuad; } /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. -- cgit v1.1 From a4959f3f6eb9b6ab3cbbe085a2797208682e96c6 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 25 Aug 2013 22:23:38 +0000 Subject: First round of fixes for the x86 fixes for the x86 move accumulator from/to memory offset instructions. -Assembly parser now properly check the size of the memory operation specified in intel syntax. So 'mov word ptr [5], al' is no longer accepted. -x86-32 disassembly of these instructions no longer sign extends the 32-bit address immediate based on size. -Intel syntax printing prints the ptr size and places brackets around the address immediate. Known remaining issues with these instructions: -Segment override prefix is not supported. PR16962 and PR16961. -Immediate size should be changed by address size prefix. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189201 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmParser/X86AsmParser.cpp | 39 +++++++++++++++++ lib/Target/X86/Disassembler/X86Disassembler.cpp | 29 ++++++------- lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp | 16 +++++++ lib/Target/X86/InstPrinter/X86ATTInstPrinter.h | 16 ++++++- lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp | 16 +++++++ lib/Target/X86/InstPrinter/X86IntelInstPrinter.h | 20 ++++++++- lib/Target/X86/X86InstrInfo.td | 50 +++++++++++++++++----- 7 files changed, 156 insertions(+), 30 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index ced12fc..a090b33 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -849,6 +849,23 @@ struct X86Operand : public MCParsedAsmOperand { !getMemIndexReg() && getMemScale() == 1; } + bool isMemOffs8() const { + return Kind == Memory && !getMemSegReg() && !getMemBaseReg() && + !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 8); + } + bool isMemOffs16() const { + return Kind == Memory && !getMemSegReg() && !getMemBaseReg() && + !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 16); + } + bool isMemOffs32() const { + return Kind == Memory && !getMemSegReg() && !getMemBaseReg() && + !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 32); + } + bool isMemOffs64() const { + return Kind == Memory && !getMemSegReg() && !getMemBaseReg() && + !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 64); + } + bool isReg() const { return Kind == Register; } void addExpr(MCInst &Inst, const MCExpr *Expr) const { @@ -931,6 +948,28 @@ struct X86Operand : public MCParsedAsmOperand { Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); } + void addMemOffs8Operands(MCInst &Inst, unsigned N) const { + addMemOffsOperands(Inst, N); + } + void addMemOffs16Operands(MCInst &Inst, unsigned N) const { + addMemOffsOperands(Inst, N); + } + void addMemOffs32Operands(MCInst &Inst, unsigned N) const { + addMemOffsOperands(Inst, N); + } + void addMemOffs64Operands(MCInst &Inst, unsigned N) const { + addMemOffsOperands(Inst, N); + } + + void addMemOffsOperands(MCInst &Inst, unsigned N) const { + assert((N == 1) && "Invalid number of operands!"); + // Add as immediates when possible. + if (const MCConstantExpr *CE = dyn_cast(getMemDisp())) + Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + else + Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); + } + static X86Operand *CreateToken(StringRef Str, SMLoc Loc) { SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size()); X86Operand *Res = new X86Operand(Token, Loc, EndLoc); diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 9cda49c..903e36c 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -231,16 +231,18 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, default: break; case 1: - type = TYPE_MOFFS8; + if(immediate & 0x80) + immediate |= ~(0xffull); break; case 2: - type = TYPE_MOFFS16; + if(immediate & 0x8000) + immediate |= ~(0xffffull); break; case 4: - type = TYPE_MOFFS32; + if(immediate & 0x80000000) + immediate |= ~(0xffffffffull); break; case 8: - type = TYPE_MOFFS64; break; } } @@ -263,16 +265,18 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, Opcode != X86::VMPSADBWrri && Opcode != X86::VDPPSYrri && Opcode != X86::VDPPSYrmi && Opcode != X86::VDPPDrri && Opcode != X86::VINSERTPSrr) - type = TYPE_MOFFS8; + if(immediate & 0x80) + immediate |= ~(0xffull); break; case ENCODING_IW: - type = TYPE_MOFFS16; + if(immediate & 0x8000) + immediate |= ~(0xffffull); break; case ENCODING_ID: - type = TYPE_MOFFS32; + if(immediate & 0x80000000) + immediate |= ~(0xffffffffull); break; case ENCODING_IO: - type = TYPE_MOFFS64; break; } } @@ -292,25 +296,16 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, case TYPE_REL8: isBranch = true; pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize; - // fall through to sign extend the immediate if needed. - case TYPE_MOFFS8: if(immediate & 0x80) immediate |= ~(0xffull); break; - case TYPE_MOFFS16: - if(immediate & 0x8000) - immediate |= ~(0xffffull); - break; case TYPE_REL32: case TYPE_REL64: isBranch = true; pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize; - // fall through to sign extend the immediate if needed. - case TYPE_MOFFS32: if(immediate & 0x80000000) immediate |= ~(0xffffffffull); break; - case TYPE_MOFFS64: default: // operand is 64 bits wide. Do nothing. break; diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index b9d0082..4439311 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -215,3 +215,19 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op, O << markup(">"); } + +void X86ATTInstPrinter::printMemOffset(const MCInst *MI, unsigned Op, + raw_ostream &O) { + const MCOperand &DispSpec = MI->getOperand(Op); + + O << markup(""); +} diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h index 8d05256..a8fab72 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h @@ -42,7 +42,8 @@ public: void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &OS); void printAVXCC(const MCInst *MI, unsigned Op, raw_ostream &OS); void printPCRelImm(const MCInst *MI, unsigned OpNo, raw_ostream &OS); - + void printMemOffset(const MCInst *MI, unsigned OpNo, raw_ostream &OS); + void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { printMemReference(MI, OpNo, O); } @@ -86,6 +87,19 @@ public: void printf512mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { printMemReference(MI, OpNo, O); } + + void printMemOffs8(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + printMemOffset(MI, OpNo, O); + } + void printMemOffs16(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + printMemOffset(MI, OpNo, O); + } + void printMemOffs32(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + printMemOffset(MI, OpNo, O); + } + void printMemOffs64(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + printMemOffset(MI, OpNo, O); + } }; } diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index 9dfc9a9..e7e7b15 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -200,3 +200,19 @@ void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op, O << ']'; } + +void X86IntelInstPrinter::printMemOffset(const MCInst *MI, unsigned Op, + raw_ostream &O) { + const MCOperand &DispSpec = MI->getOperand(Op); + + O << '['; + + if (DispSpec.isImm()) { + O << formatImm(DispSpec.getImm()); + } else { + assert(DispSpec.isExpr() && "non-immediate displacement?"); + O << *DispSpec.getExpr(); + } + + O << ']'; +} diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h index 45beeda..590bf68 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h @@ -39,7 +39,8 @@ public: void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O); void printAVXCC(const MCInst *MI, unsigned Op, raw_ostream &O); void printPCRelImm(const MCInst *MI, unsigned OpNo, raw_ostream &O); - + void printMemOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { O << "opaque ptr "; printMemReference(MI, OpNo, O); @@ -97,6 +98,23 @@ public: O << "zmmword ptr "; printMemReference(MI, OpNo, O); } + + void printMemOffs8(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + O << "byte ptr "; + printMemOffset(MI, OpNo, O); + } + void printMemOffs16(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + O << "word ptr "; + printMemOffset(MI, OpNo, O); + } + void printMemOffs32(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + O << "dword ptr "; + printMemOffset(MI, OpNo, O); + } + void printMemOffs64(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + O << "qword ptr "; + printMemOffset(MI, OpNo, O); + } }; } diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index f4118da..adad17a 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -439,17 +439,45 @@ let OperandType = "OPERAND_PCREL", def i32imm_pcrel : Operand; def i16imm_pcrel : Operand; -def offset8 : Operand; -def offset16 : Operand; -def offset32 : Operand; -def offset64 : Operand; - // Branch targets have OtherVT type and print as pc-relative values. def brtarget : Operand; def brtarget8 : Operand; } +def X86MemOffs8AsmOperand : AsmOperandClass { + let Name = "MemOffs8"; + let SuperClasses = [X86Mem8AsmOperand]; +} +def X86MemOffs16AsmOperand : AsmOperandClass { + let Name = "MemOffs16"; + let SuperClasses = [X86Mem16AsmOperand]; +} +def X86MemOffs32AsmOperand : AsmOperandClass { + let Name = "MemOffs32"; + let SuperClasses = [X86Mem32AsmOperand]; +} +def X86MemOffs64AsmOperand : AsmOperandClass { + let Name = "MemOffs64"; + let SuperClasses = [X86Mem64AsmOperand]; +} + +let OperandType = "OPERAND_MEMORY" in { +def offset8 : Operand { + let ParserMatchClass = X86MemOffs8AsmOperand; + let PrintMethod = "printMemOffs8"; } +def offset16 : Operand { + let ParserMatchClass = X86MemOffs16AsmOperand; + let PrintMethod = "printMemOffs16"; } +def offset32 : Operand { + let ParserMatchClass = X86MemOffs32AsmOperand; + let PrintMethod = "printMemOffs32"; } +def offset64 : Operand { + let ParserMatchClass = X86MemOffs64AsmOperand; + let PrintMethod = "printMemOffs64"; } +} + + def SSECC : Operand { let PrintMethod = "printSSECC"; let OperandType = "OPERAND_IMMEDIATE"; @@ -1104,13 +1132,13 @@ def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins), // These forms all have full 64-bit absolute addresses in their instructions // and use the movabs mnemonic to indicate this specific form. let mayLoad = 1 in { -def MOV64o8a : RIi64_NOREX<0xA0, RawFrm, (outs), (ins offset64:$src), +def MOV64o8a : RIi64_NOREX<0xA0, RawFrm, (outs), (ins offset8:$src), "movabs{b}\t{$src, %al|al, $src}", []>, Requires<[In64BitMode]>; -def MOV64o16a : RIi64_NOREX<0xA1, RawFrm, (outs), (ins offset64:$src), +def MOV64o16a : RIi64_NOREX<0xA1, RawFrm, (outs), (ins offset16:$src), "movabs{w}\t{$src, %ax|ax, $src}", []>, OpSize, Requires<[In64BitMode]>; -def MOV64o32a : RIi64_NOREX<0xA1, RawFrm, (outs), (ins offset64:$src), +def MOV64o32a : RIi64_NOREX<0xA1, RawFrm, (outs), (ins offset32:$src), "movabs{l}\t{$src, %eax|eax, $src}", []>, Requires<[In64BitMode]>; def MOV64o64a : RIi64<0xA1, RawFrm, (outs), (ins offset64:$src), @@ -1119,13 +1147,13 @@ def MOV64o64a : RIi64<0xA1, RawFrm, (outs), (ins offset64:$src), } let mayStore = 1 in { -def MOV64ao8 : RIi64_NOREX<0xA2, RawFrm, (outs offset64:$dst), (ins), +def MOV64ao8 : RIi64_NOREX<0xA2, RawFrm, (outs offset8:$dst), (ins), "movabs{b}\t{%al, $dst|$dst, al}", []>, Requires<[In64BitMode]>; -def MOV64ao16 : RIi64_NOREX<0xA3, RawFrm, (outs offset64:$dst), (ins), +def MOV64ao16 : RIi64_NOREX<0xA3, RawFrm, (outs offset16:$dst), (ins), "movabs{w}\t{%ax, $dst|$dst, ax}", []>, OpSize, Requires<[In64BitMode]>; -def MOV64ao32 : RIi64_NOREX<0xA3, RawFrm, (outs offset64:$dst), (ins), +def MOV64ao32 : RIi64_NOREX<0xA3, RawFrm, (outs offset32:$dst), (ins), "movabs{l}\t{%eax, $dst|$dst, eax}", []>, Requires<[In64BitMode]>; def MOV64ao64 : RIi64<0xA3, RawFrm, (outs offset64:$dst), (ins), -- cgit v1.1 From 3fad2bcd25d5f46d98ea7e41c6654833f197b960 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Sun, 25 Aug 2013 22:33:42 +0000 Subject: [PowerPC] Add fast-isel branch and compare selection. First chunk of actual fast-isel selection code. This handles direct and indirect branches, as well as feeding compares for direct branches. PPCFastISel::PPCEmitIntExt() is just roughed in and will be expanded in a future patch. This also corrects a problem with selection for constant pool entries in JIT mode or with small code model. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189202 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCFastISel.cpp | 281 +++++++++++++++++++++++++++++++++++-- 1 file changed, 272 insertions(+), 9 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 8cbf1fb..ebc7057 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -89,9 +89,19 @@ class PPCFastISel : public FastISel { virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, const LoadInst *LI); virtual bool FastLowerArguments(); + virtual unsigned FastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm); + + // Instruction selection routines. + private: + bool SelectBranch(const Instruction *I); + bool SelectIndirectBr(const Instruction *I); // Utility routines. private: + bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value, + bool isZExt, unsigned DestReg); + bool PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, + unsigned DestReg, bool IsZExt); unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT); unsigned PPCMaterializeInt(const Constant *C, MVT VT); unsigned PPCMaterialize32BitInt(int64_t Imm, @@ -106,10 +116,241 @@ class PPCFastISel : public FastISel { } // end anonymous namespace +static Optional getComparePred(CmpInst::Predicate Pred) { + switch (Pred) { + // These are not representable with any single compare. + case CmpInst::FCMP_FALSE: + case CmpInst::FCMP_UEQ: + case CmpInst::FCMP_UGT: + case CmpInst::FCMP_UGE: + case CmpInst::FCMP_ULT: + case CmpInst::FCMP_ULE: + case CmpInst::FCMP_UNE: + case CmpInst::FCMP_TRUE: + default: + return Optional(); + + case CmpInst::FCMP_OEQ: + case CmpInst::ICMP_EQ: + return PPC::PRED_EQ; + + case CmpInst::FCMP_OGT: + case CmpInst::ICMP_UGT: + case CmpInst::ICMP_SGT: + return PPC::PRED_GT; + + case CmpInst::FCMP_OGE: + case CmpInst::ICMP_UGE: + case CmpInst::ICMP_SGE: + return PPC::PRED_GE; + + case CmpInst::FCMP_OLT: + case CmpInst::ICMP_ULT: + case CmpInst::ICMP_SLT: + return PPC::PRED_LT; + + case CmpInst::FCMP_OLE: + case CmpInst::ICMP_ULE: + case CmpInst::ICMP_SLE: + return PPC::PRED_LE; + + case CmpInst::FCMP_ONE: + case CmpInst::ICMP_NE: + return PPC::PRED_NE; + + case CmpInst::FCMP_ORD: + return PPC::PRED_NU; + + case CmpInst::FCMP_UNO: + return PPC::PRED_UN; + } +} + +// Attempt to fast-select a branch instruction. +bool PPCFastISel::SelectBranch(const Instruction *I) { + const BranchInst *BI = cast(I); + MachineBasicBlock *BrBB = FuncInfo.MBB; + MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; + MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; + + // For now, just try the simplest case where it's fed by a compare. + if (const CmpInst *CI = dyn_cast(BI->getCondition())) { + Optional OptPPCPred = getComparePred(CI->getPredicate()); + if (!OptPPCPred) + return false; + + PPC::Predicate PPCPred = OptPPCPred.getValue(); + + // Take advantage of fall-through opportunities. + if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { + std::swap(TBB, FBB); + PPCPred = PPC::InvertPredicate(PPCPred); + } + + unsigned CondReg = createResultReg(&PPC::CRRCRegClass); + + if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(), + CondReg)) + return false; + + BuildMI(*BrBB, FuncInfo.InsertPt, DL, TII.get(PPC::BCC)) + .addImm(PPCPred).addReg(CondReg).addMBB(TBB); + FastEmitBranch(FBB, DL); + FuncInfo.MBB->addSuccessor(TBB); + return true; + + } else if (const ConstantInt *CI = + dyn_cast(BI->getCondition())) { + uint64_t Imm = CI->getZExtValue(); + MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; + FastEmitBranch(Target, DL); + return true; + } + + // FIXME: ARM looks for a case where the block containing the compare + // has been split from the block containing the branch. If this happens, + // there is a vreg available containing the result of the compare. I'm + // not sure we can do much, as we've lost the predicate information with + // the compare instruction -- we have a 4-bit CR but don't know which bit + // to test here. + return false; +} + +// Attempt to emit a compare of the two source values. Signed and unsigned +// comparisons are supported. Return false if we can't handle it. +bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, + bool IsZExt, unsigned DestReg) { + Type *Ty = SrcValue1->getType(); + EVT SrcEVT = TLI.getValueType(Ty, true); + if (!SrcEVT.isSimple()) + return false; + MVT SrcVT = SrcEVT.getSimpleVT(); + + // See if operand 2 is an immediate encodeable in the compare. + // FIXME: Operands are not in canonical order at -O0, so an immediate + // operand in position 1 is a lost opportunity for now. We are + // similar to ARM in this regard. + long Imm = 0; + bool UseImm = false; + + // Only 16-bit integer constants can be represented in compares for + // PowerPC. Others will be materialized into a register. + if (const ConstantInt *ConstInt = dyn_cast(SrcValue2)) { + if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 || + SrcVT == MVT::i8 || SrcVT == MVT::i1) { + const APInt &CIVal = ConstInt->getValue(); + Imm = (IsZExt) ? (long)CIVal.getZExtValue() : (long)CIVal.getSExtValue(); + if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm))) + UseImm = true; + } + } + + unsigned CmpOpc; + bool NeedsExt = false; + switch (SrcVT.SimpleTy) { + default: return false; + case MVT::f32: + CmpOpc = PPC::FCMPUS; + break; + case MVT::f64: + CmpOpc = PPC::FCMPUD; + break; + case MVT::i1: + case MVT::i8: + case MVT::i16: + NeedsExt = true; + // Intentional fall-through. + case MVT::i32: + if (!UseImm) + CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW; + else + CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI; + break; + case MVT::i64: + if (!UseImm) + CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD; + else + CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI; + break; + } + + unsigned SrcReg1 = getRegForValue(SrcValue1); + if (SrcReg1 == 0) + return false; + + unsigned SrcReg2 = 0; + if (!UseImm) { + SrcReg2 = getRegForValue(SrcValue2); + if (SrcReg2 == 0) + return false; + } + + if (NeedsExt) { + unsigned ExtReg = createResultReg(&PPC::GPRCRegClass); + if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt)) + return false; + SrcReg1 = ExtReg; + + if (!UseImm) { + unsigned ExtReg = createResultReg(&PPC::GPRCRegClass); + if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt)) + return false; + SrcReg2 = ExtReg; + } + } + + if (!UseImm) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc), DestReg) + .addReg(SrcReg1).addReg(SrcReg2); + else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc), DestReg) + .addReg(SrcReg1).addImm(Imm); + + return true; +} + +// Attempt to emit an integer extend of SrcReg into DestReg. Both +// signed and zero extensions are supported. Return false if we +// can't handle it. Not yet implemented. +bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, + unsigned DestReg, bool IsZExt) { + return (SrcVT == MVT::i8 && SrcReg && DestVT == MVT::i8 && DestReg + && IsZExt && false); +} + +// Attempt to fast-select an indirect branch instruction. +bool PPCFastISel::SelectIndirectBr(const Instruction *I) { + unsigned AddrReg = getRegForValue(I->getOperand(0)); + if (AddrReg == 0) + return false; + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::MTCTR8)) + .addReg(AddrReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::BCTR8)); + + const IndirectBrInst *IB = cast(I); + for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i) + FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]); + + return true; +} + // Attempt to fast-select an instruction that wasn't handled by -// the table-generated machinery. TBD. +// the table-generated machinery. bool PPCFastISel::TargetSelectInstruction(const Instruction *I) { - return I && false; + + switch (I->getOpcode()) { + case Instruction::Br: + return SelectBranch(I); + case Instruction::IndirectBr: + return SelectIndirectBr(I); + // Here add other flavors of Instruction::XXX that automated + // cases don't catch. For example, switches are terminators + // that aren't yet handled. + default: + break; + } + return false; } // Materialize a floating-point constant into a register, and return @@ -131,15 +372,18 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) { MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Align); - // For small code model, generate a LDtocCPT. - if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) + unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD; + unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); + + // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)). + if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocCPT), - DestReg) - .addConstantPoolIndex(Idx).addReg(PPC::X2).addMemOperand(MMO); - else { + TmpReg) + .addConstantPoolIndex(Idx).addReg(PPC::X2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + .addImm(0).addReg(TmpReg).addMemOperand(MMO); + } else { // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)). - unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD; - unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDIStocHA), TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) @@ -312,6 +556,25 @@ bool PPCFastISel::FastLowerArguments() { return false; } +// Handle materializing integer constants into a register. This is not +// automatically generated for PowerPC, so must be explicitly created here. +unsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) { + + if (Opc != ISD::Constant) + return 0; + + if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && + VT != MVT::i8 && VT != MVT::i1) + return 0; + + const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass : + &PPC::GPRCRegClass); + if (VT == MVT::i64) + return PPCMaterialize64BitInt(Imm, RC); + else + return PPCMaterialize32BitInt(Imm, RC); +} + namespace llvm { // Create the fast instruction selector for PowerPC64 ELF. FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo, -- cgit v1.1 From c648f85f7b4c8ffd429832bc7c602b2ae5c2bbc7 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 25 Aug 2013 23:18:05 +0000 Subject: Put some of the AVX-512 parsing stuff in a more consistent place with the existing functions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189204 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmParser/X86AsmParser.cpp | 20 +++++++++----------- lib/Target/X86/X86InstrInfo.td | 13 ++++++------- 2 files changed, 15 insertions(+), 18 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index a090b33..aa9524b 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -814,6 +814,9 @@ struct X86Operand : public MCParsedAsmOperand { bool isMem256() const { return Kind == Memory && (!Mem.Size || Mem.Size == 256); } + bool isMem512() const { + return Kind == Memory && (!Mem.Size || Mem.Size == 512); + } bool isMemVX32() const { return Kind == Memory && (!Mem.Size || Mem.Size == 32) && @@ -840,10 +843,6 @@ struct X86Operand : public MCParsedAsmOperand { getMemIndexReg() >= X86::ZMM0 && getMemIndexReg() <= X86::ZMM31; } - bool isMem512() const { - return Kind == Memory && (!Mem.Size || Mem.Size == 512); - } - bool isAbsMem() const { return Kind == Memory && !getMemSegReg() && !getMemBaseReg() && !getMemIndexReg() && getMemScale() == 1; @@ -907,28 +906,27 @@ struct X86Operand : public MCParsedAsmOperand { void addMem256Operands(MCInst &Inst, unsigned N) const { addMemOperands(Inst, N); } + void addMem512Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); + } void addMemVX32Operands(MCInst &Inst, unsigned N) const { addMemOperands(Inst, N); } void addMemVY32Operands(MCInst &Inst, unsigned N) const { addMemOperands(Inst, N); } - void addMemVX64Operands(MCInst &Inst, unsigned N) const { + void addMemVZ32Operands(MCInst &Inst, unsigned N) const { addMemOperands(Inst, N); } - void addMemVY64Operands(MCInst &Inst, unsigned N) const { + void addMemVX64Operands(MCInst &Inst, unsigned N) const { addMemOperands(Inst, N); } - - void addMemVZ32Operands(MCInst &Inst, unsigned N) const { + void addMemVY64Operands(MCInst &Inst, unsigned N) const { addMemOperands(Inst, N); } void addMemVZ64Operands(MCInst &Inst, unsigned N) const { addMemOperands(Inst, N); } - void addMem512Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); - } void addMemOperands(MCInst &Inst, unsigned N) const { assert((N == 5) && "Invalid number of operands!"); diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index adad17a..f6a115f 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -302,6 +302,9 @@ def X86Mem128AsmOperand : AsmOperandClass { def X86Mem256AsmOperand : AsmOperandClass { let Name = "Mem256"; let PredicateMethod = "isMem256"; } +def X86Mem512AsmOperand : AsmOperandClass { + let Name = "Mem512"; let PredicateMethod = "isMem512"; +} // Gather mem operands def X86MemVX32Operand : AsmOperandClass { @@ -310,22 +313,18 @@ def X86MemVX32Operand : AsmOperandClass { def X86MemVY32Operand : AsmOperandClass { let Name = "MemVY32"; let PredicateMethod = "isMemVY32"; } +def X86MemVZ32Operand : AsmOperandClass { + let Name = "MemVZ32"; let PredicateMethod = "isMemVZ32"; +} def X86MemVX64Operand : AsmOperandClass { let Name = "MemVX64"; let PredicateMethod = "isMemVX64"; } def X86MemVY64Operand : AsmOperandClass { let Name = "MemVY64"; let PredicateMethod = "isMemVY64"; } - def X86MemVZ64Operand : AsmOperandClass { let Name = "MemVZ64"; let PredicateMethod = "isMemVZ64"; } -def X86MemVZ32Operand : AsmOperandClass { - let Name = "MemVZ32"; let PredicateMethod = "isMemVZ32"; -} -def X86Mem512AsmOperand : AsmOperandClass { - let Name = "Mem512"; let PredicateMethod = "isMem512"; -} def X86AbsMemAsmOperand : AsmOperandClass { let Name = "AbsMem"; -- cgit v1.1 From f0509f3637dcd6c9e915f0ad112088a4f75ffa51 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 26 Aug 2013 00:13:09 +0000 Subject: Remove some unnecessary PredicateMethod overrides. Add RenderMethod overrides to remove forwarding in the X86AsmParser code itself. No functional change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189205 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmParser/X86AsmParser.cpp | 56 ------------------------------- lib/Target/X86/X86InstrInfo.td | 34 ++++++++++--------- 2 files changed, 19 insertions(+), 71 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index aa9524b..d5d9c52 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -885,49 +885,6 @@ struct X86Operand : public MCParsedAsmOperand { addExpr(Inst, getImm()); } - void addMem8Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); - } - void addMem16Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); - } - void addMem32Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); - } - void addMem64Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); - } - void addMem80Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); - } - void addMem128Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); - } - void addMem256Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); - } - void addMem512Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); - } - void addMemVX32Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); - } - void addMemVY32Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); - } - void addMemVZ32Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); - } - void addMemVX64Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); - } - void addMemVY64Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); - } - void addMemVZ64Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); - } - void addMemOperands(MCInst &Inst, unsigned N) const { assert((N == 5) && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); @@ -946,19 +903,6 @@ struct X86Operand : public MCParsedAsmOperand { Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); } - void addMemOffs8Operands(MCInst &Inst, unsigned N) const { - addMemOffsOperands(Inst, N); - } - void addMemOffs16Operands(MCInst &Inst, unsigned N) const { - addMemOffsOperands(Inst, N); - } - void addMemOffs32Operands(MCInst &Inst, unsigned N) const { - addMemOffsOperands(Inst, N); - } - void addMemOffs64Operands(MCInst &Inst, unsigned N) const { - addMemOffsOperands(Inst, N); - } - void addMemOffsOperands(MCInst &Inst, unsigned N) const { assert((N == 1) && "Invalid number of operands!"); // Add as immediates when possible. diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index f6a115f..c2d1807 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -279,51 +279,51 @@ def ptr_rc_nosp : PointerLikeRegClass<1>; // *mem - Operand definitions for the funky X86 addressing mode operands. // def X86MemAsmOperand : AsmOperandClass { - let Name = "Mem"; let PredicateMethod = "isMem"; + let Name = "Mem"; } def X86Mem8AsmOperand : AsmOperandClass { - let Name = "Mem8"; let PredicateMethod = "isMem8"; + let Name = "Mem8"; let RenderMethod = "addMemOperands"; } def X86Mem16AsmOperand : AsmOperandClass { - let Name = "Mem16"; let PredicateMethod = "isMem16"; + let Name = "Mem16"; let RenderMethod = "addMemOperands"; } def X86Mem32AsmOperand : AsmOperandClass { - let Name = "Mem32"; let PredicateMethod = "isMem32"; + let Name = "Mem32"; let RenderMethod = "addMemOperands"; } def X86Mem64AsmOperand : AsmOperandClass { - let Name = "Mem64"; let PredicateMethod = "isMem64"; + let Name = "Mem64"; let RenderMethod = "addMemOperands"; } def X86Mem80AsmOperand : AsmOperandClass { - let Name = "Mem80"; let PredicateMethod = "isMem80"; + let Name = "Mem80"; let RenderMethod = "addMemOperands"; } def X86Mem128AsmOperand : AsmOperandClass { - let Name = "Mem128"; let PredicateMethod = "isMem128"; + let Name = "Mem128"; let RenderMethod = "addMemOperands"; } def X86Mem256AsmOperand : AsmOperandClass { - let Name = "Mem256"; let PredicateMethod = "isMem256"; + let Name = "Mem256"; let RenderMethod = "addMemOperands"; } def X86Mem512AsmOperand : AsmOperandClass { - let Name = "Mem512"; let PredicateMethod = "isMem512"; + let Name = "Mem512"; let RenderMethod = "addMemOperands"; } // Gather mem operands def X86MemVX32Operand : AsmOperandClass { - let Name = "MemVX32"; let PredicateMethod = "isMemVX32"; + let Name = "MemVX32"; let RenderMethod = "addMemOperands"; } def X86MemVY32Operand : AsmOperandClass { - let Name = "MemVY32"; let PredicateMethod = "isMemVY32"; + let Name = "MemVY32"; let RenderMethod = "addMemOperands"; } def X86MemVZ32Operand : AsmOperandClass { - let Name = "MemVZ32"; let PredicateMethod = "isMemVZ32"; + let Name = "MemVZ32"; let RenderMethod = "addMemOperands"; } def X86MemVX64Operand : AsmOperandClass { - let Name = "MemVX64"; let PredicateMethod = "isMemVX64"; + let Name = "MemVX64"; let RenderMethod = "addMemOperands"; } def X86MemVY64Operand : AsmOperandClass { - let Name = "MemVY64"; let PredicateMethod = "isMemVY64"; + let Name = "MemVY64"; let RenderMethod = "addMemOperands"; } def X86MemVZ64Operand : AsmOperandClass { - let Name = "MemVZ64"; let PredicateMethod = "isMemVZ64"; + let Name = "MemVZ64"; let RenderMethod = "addMemOperands"; } def X86AbsMemAsmOperand : AsmOperandClass { @@ -446,18 +446,22 @@ def brtarget8 : Operand; def X86MemOffs8AsmOperand : AsmOperandClass { let Name = "MemOffs8"; + let RenderMethod = "addMemOffsOperands"; let SuperClasses = [X86Mem8AsmOperand]; } def X86MemOffs16AsmOperand : AsmOperandClass { let Name = "MemOffs16"; + let RenderMethod = "addMemOffsOperands"; let SuperClasses = [X86Mem16AsmOperand]; } def X86MemOffs32AsmOperand : AsmOperandClass { let Name = "MemOffs32"; + let RenderMethod = "addMemOffsOperands"; let SuperClasses = [X86Mem32AsmOperand]; } def X86MemOffs64AsmOperand : AsmOperandClass { let Name = "MemOffs64"; + let RenderMethod = "addMemOffsOperands"; let SuperClasses = [X86Mem64AsmOperand]; } -- cgit v1.1 From 318f4e679b6293bf28eb288126846c4c650528be Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 26 Aug 2013 00:39:04 +0000 Subject: Make sure x86 instructions using ssmem/sdmem operand types are only able to parse memory operands of the proper size in Intel syntax. Primarily affects some of sse cvt instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189206 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrFragmentsSIMD.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index b23da04..ab7c602 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -251,13 +251,13 @@ def sse_load_f64 : ComplexPattern { let PrintMethod = "printf32mem"; let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); - let ParserMatchClass = X86MemAsmOperand; + let ParserMatchClass = X86Mem32AsmOperand; let OperandType = "OPERAND_MEMORY"; } def sdmem : Operand { let PrintMethod = "printf64mem"; let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); - let ParserMatchClass = X86MemAsmOperand; + let ParserMatchClass = X86Mem64AsmOperand; let OperandType = "OPERAND_MEMORY"; } -- cgit v1.1 From e4bf77a1282bfdacb61bae192fdf79a696be780a Mon Sep 17 00:00:00 2001 From: Vladimir Medic Date: Mon, 26 Aug 2013 10:02:40 +0000 Subject: This patch implements trap instructions for mips. The test cases are added. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189213 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsInstrFormats.td | 11 +++++++++++ lib/Target/Mips/MipsInstrInfo.td | 22 ++++++++++++++++++++++ 2 files changed, 33 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index 28abb7e..b05363d 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -479,6 +479,17 @@ class TEQ_FM funct> { let Inst{5-0} = funct; } +class TEQI_FM funct> { + bits<5> rs; + bits<16> imm16; + + bits<32> Inst; + + let Inst{31-26} = 1; + let Inst{25-21} = rs; + let Inst{20-16} = funct; + let Inst{15-0} = imm16; +} //===----------------------------------------------------------------------===// // System calls format //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 3c90e75..94f9ee6 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -630,6 +630,9 @@ class TEQ_FT : InstSE<(outs), (ins RO:$rs, RO:$rt, uimm16:$code_), !strconcat(opstr, "\t$rs, $rt, $code_"), [], NoItinerary, FrmI>; +class TEQI_FT : + InstSE<(outs), (ins RO:$rs, uimm16:$imm16), + !strconcat(opstr, "\t$rs, $imm16"), [], NoItinerary, FrmOther>; // Mul, Div class Mult DefRegs> : @@ -914,6 +917,18 @@ def SWR : StoreLeftRight<"swr", MipsSWR, GPR32Opnd>, LW_FM<0x2e>; def SYNC : SYNC_FT, SYNC_FM; def TEQ : TEQ_FT<"teq", GPR32Opnd>, TEQ_FM<0x34>; +def TGE : TEQ_FT<"tge", GPR32Opnd>, TEQ_FM<0x30>; +def TGEU : TEQ_FT<"tgeu", GPR32Opnd>, TEQ_FM<0x31>; +def TLT : TEQ_FT<"tlt", GPR32Opnd>, TEQ_FM<0x32>; +def TLTU : TEQ_FT<"tltu", GPR32Opnd>, TEQ_FM<0x33>; +def TNE : TEQ_FT<"tne", GPR32Opnd>, TEQ_FM<0x36>; + +def TEQI : TEQI_FT<"teqi", GPR32Opnd>, TEQI_FM<0xc>; +def TGEI : TEQI_FT<"tgei", GPR32Opnd>, TEQI_FM<0x8>; +def TGEIU : TEQI_FT<"tgeiu", GPR32Opnd>, TEQI_FM<0x9>; +def TLTI : TEQI_FT<"tlti", GPR32Opnd>, TEQI_FM<0xa>; +def TTLTIU : TEQI_FT<"tltiu", GPR32Opnd>, TEQI_FM<0xb>; +def TNEI : TEQI_FT<"tnei", GPR32Opnd>, TEQI_FM<0xe>; def BREAK : BRK_FT<"break">, BRK_FM<0xd>; def SYSCALL : SYS_FT<"syscall">, SYS_FM<0xc>; @@ -1093,6 +1108,13 @@ def : InstAlias<"break $imm", (BREAK uimm10:$imm, 0), 1>; def : InstAlias<"break", (BREAK 0, 0), 1>; def : InstAlias<"ei", (EI ZERO), 1>; def : InstAlias<"di", (DI ZERO), 1>; + +def : InstAlias<"teq $rs, $rt", (TEQ GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>; +def : InstAlias<"tge $rs, $rt", (TGE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>; +def : InstAlias<"tgeu $rs, $rt", (TGEU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>; +def : InstAlias<"tlt $rs, $rt", (TLT GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>; +def : InstAlias<"tltu $rs, $rt", (TLTU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>; +def : InstAlias<"tne $rs, $rt", (TNE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>; //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions //===----------------------------------------------------------------------===// -- cgit v1.1 From 92bfb547700550fcdb668862533e4952a8d74969 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Mon, 26 Aug 2013 12:45:35 +0000 Subject: AVX-512: Added shuffle instructions - VPSHUFD, VPERMILPS, VMOVDDUP, VMOVLHPS, VMOVHLPS, VSHUFPS, VALIGN single and double forms. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189215 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 68 ++++++++++-------- lib/Target/X86/X86InstrAVX512.td | 139 ++++++++++++++++++++++++++++++++++++- lib/Target/X86/X86InstrSSE.td | 4 +- 3 files changed, 177 insertions(+), 34 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a00f848..6a7ca7d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3600,7 +3600,7 @@ static bool isPALIGNRMask(ArrayRef Mask, MVT VT, return false; unsigned NumElts = VT.getVectorNumElements(); - unsigned NumLanes = VT.getSizeInBits()/128; + unsigned NumLanes = VT.is512BitVector() ? 1: VT.getSizeInBits()/128; unsigned NumLaneElts = NumElts/NumLanes; // Do not handle 64-bit element shuffles with palignr. @@ -3683,10 +3683,7 @@ static void CommuteVectorShuffleMask(SmallVectorImpl &Mask, /// specifies a shuffle of elements that is suitable for input to 128/256-bit /// SHUFPS and SHUFPD. If Commuted is true, then it checks for sources to be /// reverse of what x86 shuffles want. -static bool isSHUFPMask(ArrayRef Mask, MVT VT, bool HasFp256, - bool Commuted = false) { - if (!HasFp256 && VT.is256BitVector()) - return false; +static bool isSHUFPMask(ArrayRef Mask, MVT VT, bool Commuted = false) { unsigned NumElems = VT.getVectorNumElements(); unsigned NumLanes = VT.getSizeInBits()/128; @@ -3695,6 +3692,10 @@ static bool isSHUFPMask(ArrayRef Mask, MVT VT, bool HasFp256, if (NumLaneElems != 2 && NumLaneElems != 4) return false; + unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + bool symetricMaskRequired = + (VT.getSizeInBits() >= 256) && (EltSize == 32); + // VSHUFPSY divides the resulting vector into 4 chunks. // The sources are also splitted into 4 chunks, and each destination // chunk must come from a different source chunk. @@ -3714,6 +3715,7 @@ static bool isSHUFPMask(ArrayRef Mask, MVT VT, bool HasFp256, // // DST => Y3..Y2, X3..X2, Y1..Y0, X1..X0 // + SmallVector MaskVal(NumLaneElems, -1); unsigned HalfLaneElems = NumLaneElems/2; for (unsigned l = 0; l != NumElems; l += NumLaneElems) { for (unsigned i = 0; i != NumLaneElems; ++i) { @@ -3724,9 +3726,13 @@ static bool isSHUFPMask(ArrayRef Mask, MVT VT, bool HasFp256, // For VSHUFPSY, the mask of the second half must be the same as the // first but with the appropriate offsets. This works in the same way as // VPERMILPS works with masks. - if (NumElems != 8 || l == 0 || Mask[i] < 0) + if (!symetricMaskRequired || Idx < 0) + continue; + if (MaskVal[i] < 0) { + MaskVal[i] = Idx - l; continue; - if (!isUndefOrEqual(Idx, Mask[i]+l)) + } + if ((signed)(Idx - l) != MaskVal[i]) return false; } } @@ -4158,31 +4164,32 @@ static bool isPermImmMask(ArrayRef Mask, MVT VT, unsigned& Imm8) { /// to the same elements of the low, but to the higher half of the source. /// In VPERMILPD the two lanes could be shuffled independently of each other /// with the same restriction that lanes can't be crossed. Also handles PSHUFDY. -static bool isVPERMILPMask(ArrayRef Mask, MVT VT, bool HasFp256) { - if (!HasFp256) +static bool isVPERMILPMask(ArrayRef Mask, MVT VT) { + unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + if (VT.getSizeInBits() < 256 || EltSize < 32) return false; - + bool symetricMaskRequired = (EltSize == 32); unsigned NumElts = VT.getVectorNumElements(); - // Only match 256-bit with 32/64-bit types - if (!VT.is256BitVector() || (NumElts != 4 && NumElts != 8)) - return false; unsigned NumLanes = VT.getSizeInBits()/128; unsigned LaneSize = NumElts/NumLanes; + // 2 or 4 elements in one lane + + SmallVector ExpectedMaskVal(LaneSize, -1); for (unsigned l = 0; l != NumElts; l += LaneSize) { for (unsigned i = 0; i != LaneSize; ++i) { if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize)) return false; - if (NumElts != 8 || l == 0) - continue; - // VPERMILPS handling - if (Mask[i] < 0) - continue; - if (!isUndefOrEqual(Mask[i+l], Mask[i]+l)) - return false; + if (symetricMaskRequired) { + if (ExpectedMaskVal[i] < 0 && Mask[i+l] >= 0) { + ExpectedMaskVal[i] = Mask[i+l] - l; + continue; + } + if (!isUndefOrEqual(Mask[i+l], ExpectedMaskVal[i]+l)) + return false; + } } } - return true; } @@ -4431,10 +4438,11 @@ static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) { /// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction. static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) { MVT VT = SVOp->getSimpleValueType(0); - unsigned EltSize = VT.getVectorElementType().getSizeInBits() >> 3; + unsigned EltSize = VT.is512BitVector() ? 1 : + VT.getVectorElementType().getSizeInBits() >> 3; unsigned NumElts = VT.getVectorNumElements(); - unsigned NumLanes = VT.getSizeInBits()/128; + unsigned NumLanes = VT.is512BitVector() ? 1 : VT.getSizeInBits()/128; unsigned NumLaneElts = NumElts/NumLanes; int Val = 0; @@ -7407,7 +7415,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { } // Normalize the node to match x86 shuffle ops if needed - if (!V2IsUndef && (isSHUFPMask(M, VT, HasFp256, /* Commuted */ true))) + if (!V2IsUndef && (isSHUFPMask(M, VT, /* Commuted */ true))) return CommuteVectorShuffle(SVOp, DAG); // The checks below are all present in isShuffleMaskLegal, but they are @@ -7430,7 +7438,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { getShufflePSHUFLWImmediate(SVOp), DAG); - if (isSHUFPMask(M, VT, HasFp256)) + if (isSHUFPMask(M, VT)) return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V2, getShuffleSHUFImmediate(SVOp), DAG); @@ -7449,8 +7457,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG); // Handle VPERMILPS/D* permutations - if (isVPERMILPMask(M, VT, HasFp256)) { - if (HasInt256 && VT == MVT::v8i32) + if (isVPERMILPMask(M, VT)) { + if ((HasInt256 && VT == MVT::v8i32) || VT == MVT::v16i32) return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, getShuffleSHUFImmediate(SVOp), DAG); return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1, @@ -13621,7 +13629,7 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, return (SVT.getVectorNumElements() == 2 || ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isMOVLMask(M, SVT) || - isSHUFPMask(M, SVT, Subtarget->hasFp256()) || + isSHUFPMask(M, SVT) || isPSHUFDMask(M, SVT) || isPSHUFHWMask(M, SVT, Subtarget->hasInt256()) || isPSHUFLWMask(M, SVT, Subtarget->hasInt256()) || @@ -13646,8 +13654,8 @@ X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl &Mask, if (NumElts == 4 && SVT.is128BitVector()) { return (isMOVLMask(Mask, SVT) || isCommutedMOVLMask(Mask, SVT, true) || - isSHUFPMask(Mask, SVT, Subtarget->hasFp256()) || - isSHUFPMask(Mask, SVT, Subtarget->hasFp256(), /* Commuted */ true)); + isSHUFPMask(Mask, SVT) || + isSHUFPMask(Mask, SVT, /* Commuted */ true)); } return false; } diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 17be5df..cf4a0f5 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1621,6 +1621,45 @@ defm VPUNPCKHDQZ : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32, defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64, VR512, memopv8i64, i512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +//===----------------------------------------------------------------------===// +// AVX-512 - PSHUFD +// + +multiclass avx512_pshuf_imm opc, string OpcodeStr, RegisterClass RC, + SDNode OpNode, PatFrag mem_frag, + X86MemOperand x86memop, ValueType OpVT> { + def ri : AVX512Ii8, + EVEX; + def mi : AVX512Ii8, EVEX; +} + +defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, memopv16i32, + i512mem, v16i32>, OpSize, EVEX_V512, EVEX_CD8<32, CD8VF>; + +let ExeDomain = SSEPackedSingle in +defm VPERMILPSZ : avx512_pshuf_imm<0x04, "vpermilps", VR512, X86VPermilp, + memopv16f32, i512mem, v16f32>, OpSize, TA, EVEX_V512, + EVEX_CD8<32, CD8VF>; +let ExeDomain = SSEPackedDouble in +defm VPERMILPDZ : avx512_pshuf_imm<0x05, "vpermilpd", VR512, X86VPermilp, + memopv8f64, i512mem, v8f64>, OpSize, TA, EVEX_V512, + VEX_W, EVEX_CD8<32, CD8VF>; + +def : Pat<(v16i32 (X86VPermilp VR512:$src1, (i8 imm:$imm))), + (VPERMILPSZri VR512:$src1, imm:$imm)>; +def : Pat<(v8i64 (X86VPermilp VR512:$src1, (i8 imm:$imm))), + (VPERMILPDZri VR512:$src1, imm:$imm)>; //===----------------------------------------------------------------------===// // AVX-512 Logical Instructions @@ -1774,8 +1813,8 @@ multiclass avx512_vptest opc, string OpcodeStr, RegisterClass KRC, defm VPTESTMDZ : avx512_vptest<0x27, "vptestmd", VK16, VR512, f512mem, memopv16i32, X86testm, v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem, memopv8i64, - X86testm, v8i64>, EVEX_V512, VEX_W, +defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem, + memopv8i64, X86testm, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; //===----------------------------------------------------------------------===// @@ -1914,3 +1953,99 @@ defm VPSRAVDZ : avx512_var_shift<0x46, "vpsravd", sra, VR512, v16i32, defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64, i512mem, memopv8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + +//===----------------------------------------------------------------------===// +// AVX-512 - MOVDDUP +//===----------------------------------------------------------------------===// + +multiclass avx512_movddup { +def rr : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set RC:$dst, (VT (X86Movddup RC:$src)))]>, EVEX; +def rm : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set RC:$dst, + (VT (X86Movddup (memop_frag addr:$src))))]>, EVEX; +} + +defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, memopv8f64>, + VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; +def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))), + (VMOVDDUPZrm addr:$src)>; + +def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst), + (ins VR128X:$src1, VR128X:$src2), + "vmovlhps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))], + IIC_SSE_MOV_LH>, EVEX_4V; +def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst), + (ins VR128X:$src1, VR128X:$src2), + "vmovhlps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))], + IIC_SSE_MOV_LH>, EVEX_4V; + +// MOVLHPS patterns +def : Pat<(v4i32 (X86Movlhps VR128X:$src1, VR128X:$src2)), + (VMOVLHPSZrr VR128X:$src1, VR128X:$src2)>; +def : Pat<(v2i64 (X86Movlhps VR128X:$src1, VR128X:$src2)), + (VMOVLHPSZrr (v2i64 VR128X:$src1), VR128X:$src2)>; + +// MOVHLPS patterns +def : Pat<(v4i32 (X86Movhlps VR128X:$src1, VR128X:$src2)), + (VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>; +//===----------------------------------------------------------------------===// +// VSHUFPS - VSHUFPD Operations + +multiclass avx512_shufp { + def rmi : AVX512PIi8<0xC6, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, x86memop:$src2, i8imm:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2), + (i8 imm:$src3))))], d, IIC_SSE_SHUFP>, + EVEX_4V, TB, Sched<[WriteShuffleLd, ReadAfterLd]>; + def rri : AVX512PIi8<0xC6, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2, i8imm:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, + (i8 imm:$src3))))], d, IIC_SSE_SHUFP>, + EVEX_4V, TB, Sched<[WriteShuffle]>; +} + +defm VSHUFPSZ : avx512_shufp, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VSHUFPDZ : avx512_shufp, OpSize, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; + + +multiclass avx512_alignr { + def rri : AVX512AIi8<0x03, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2, i8imm:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, EVEX_4V; + def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, x86memop:$src2, i8imm:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, EVEX_4V; +} +defm VALIGND : avx512_alignr<"valignd", VR512, i512mem>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VALIGNQ : avx512_alignr<"valignq", VR512, i512mem>, + VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; + +def : Pat<(v16f32 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))), + (VALIGNDrri VR512:$src2, VR512:$src1, imm:$imm)>; +def : Pat<(v8f64 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))), + (VALIGNQrri VR512:$src2, VR512:$src1, imm:$imm)>; +def : Pat<(v16i32 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))), + (VALIGNDrri VR512:$src2, VR512:$src1, imm:$imm)>; +def : Pat<(v8i64 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))), + (VALIGNQrri VR512:$src2, VR512:$src1, imm:$imm)>; + diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 4eaba38..9b27e27 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1327,7 +1327,7 @@ let Predicates = [UseSSE2] in { // SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions //===----------------------------------------------------------------------===// -let AddedComplexity = 20 in { +let AddedComplexity = 20, Predicates = [UseAVX] in { def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -1358,7 +1358,7 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in { IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; } -let Predicates = [HasAVX] in { +let Predicates = [UseAVX] in { // MOVLHPS patterns def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), (VMOVLHPSrr VR128:$src1, VR128:$src2)>; -- cgit v1.1 From da25cd3e6de8f21005590c2de49868f883cf2410 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 26 Aug 2013 15:05:36 +0000 Subject: SelectionDAG: Use correct pointer size when lowering function arguments v2 This adds minimal support to the SelectionDAG for handling address spaces with different pointer sizes. The SelectionDAG should now correctly lower pointer function arguments to the correct size as well as generate the correct code when lowering getelementptr. This patch also updates the R600 DataLayout to use 32-bit pointers for the local address space. v2: - Add more helper functions to TargetLoweringBase - Use CHECK-LABEL for tests git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189221 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 4 +++- lib/Target/R600/AMDGPUSubtarget.cpp | 4 ++++ lib/Target/R600/SIISelLowering.cpp | 2 +- lib/Target/R600/SIInstructions.td | 9 ++++----- 4 files changed, 12 insertions(+), 7 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 692b3b7..56a9a2b 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -233,6 +233,8 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, const DataLayout *TD = getTargetMachine().getDataLayout(); GlobalAddressSDNode *G = cast(Op); + + assert(G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS); // XXX: What does the value of G->getOffset() mean? assert(G->getOffset() == 0 && "Do not know what to do with an non-zero offset"); @@ -244,7 +246,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, // XXX: Account for alignment? MFI->LDSSize += Size; - return DAG.getConstant(Offset, TD->getPointerSize() == 8 ? MVT::i64 : MVT::i32); + return DAG.getConstant(Offset, getPointerTy(G->getAddressSpace())); } void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG, diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp index 8b2e445..53cfe84 100644 --- a/lib/Target/R600/AMDGPUSubtarget.cpp +++ b/lib/Target/R600/AMDGPUSubtarget.cpp @@ -97,6 +97,10 @@ AMDGPUSubtarget::getDataLayout() const { DataLayout.append("-p:32:32:32"); } + if (Gen >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { + DataLayout.append("-p3:32:32:32"); + } + return DataLayout; } diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index dc721e9..9cbba6c 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -87,7 +87,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); - setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setTargetDAGCombine(ISD::SELECT_CC); diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 436a2cd..8c52a2e 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1746,14 +1746,13 @@ def : Pat < /********** ======================= **********/ def : Pat < - (local_load i64:$src0), - (i32 (DS_READ_B32 0, (EXTRACT_SUBREG $src0, sub0), - (EXTRACT_SUBREG $src0, sub0), (EXTRACT_SUBREG $src0, sub0), 0, 0)) + (local_load i32:$src0), + (i32 (DS_READ_B32 0, $src0, $src0, $src0, 0, 0)) >; def : Pat < - (local_store i32:$src1, i64:$src0), - (DS_WRITE_B32 0, (EXTRACT_SUBREG $src0, sub0), $src1, $src1, 0, 0) + (local_store i32:$src1, i32:$src0), + (DS_WRITE_B32 0, $src0, $src1, $src1, 0, 0) >; /********** ================== **********/ -- cgit v1.1 From 7a0282daeb214f14d75249cc2d90302c44586c4e Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 26 Aug 2013 15:05:44 +0000 Subject: R600: Add support for v4i32 and v2i32 local stores git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189222 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 152 +++++++++++++++++++++------------ lib/Target/R600/AMDGPUISelLowering.h | 12 +-- lib/Target/R600/R600ISelLowering.cpp | 2 +- 3 files changed, 107 insertions(+), 59 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 56a9a2b..9df835f 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -67,6 +67,13 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::STORE, MVT::f64, Promote); AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64); + // Custom lowering of vector stores is required for local address space + // stores. + setOperationAction(ISD::STORE, MVT::v4i32, Custom); + // XXX: Native v2i32 local address space stores are possible, but not + // currently implemented. + setOperationAction(ISD::STORE, MVT::v2i32, Custom); + setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom); setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom); setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom); @@ -221,7 +228,7 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); - case ISD::STORE: return LowerVectorStore(Op, DAG); + case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); } return Op; @@ -417,7 +424,98 @@ SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op, return Op; } +SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op, + SelectionDAG &DAG) const { + StoreSDNode *Store = dyn_cast(Op); + EVT MemVT = Store->getMemoryVT(); + unsigned MemBits = MemVT.getSizeInBits(); + + // Byte stores are really expensive, so if possible, try to pack + // 32-bit vector truncatating store into an i32 store. + // XXX: We could also handle optimize other vector bitwidths + if (!MemVT.isVector() || MemBits > 32) { + return SDValue(); + } + + SDLoc DL(Op); + const SDValue &Value = Store->getValue(); + EVT VT = Value.getValueType(); + const SDValue &Ptr = Store->getBasePtr(); + EVT MemEltVT = MemVT.getVectorElementType(); + unsigned MemEltBits = MemEltVT.getSizeInBits(); + unsigned MemNumElements = MemVT.getVectorNumElements(); + EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()); + SDValue Mask; + switch(MemEltBits) { + case 8: + Mask = DAG.getConstant(0xFF, PackedVT); + break; + case 16: + Mask = DAG.getConstant(0xFFFF, PackedVT); + break; + default: + llvm_unreachable("Cannot lower this vector store"); + } + SDValue PackedValue; + for (unsigned i = 0; i < MemNumElements; ++i) { + EVT ElemVT = VT.getVectorElementType(); + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value, + DAG.getConstant(i, MVT::i32)); + Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT); + Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask); + SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT); + Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift); + if (i == 0) { + PackedValue = Elt; + } else { + PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt); + } + } + return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr, + MachinePointerInfo(Store->getMemOperand()->getValue()), + Store->isVolatile(), Store->isNonTemporal(), + Store->getAlignment()); +} + +SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op, + SelectionDAG &DAG) const { + StoreSDNode *Store = cast(Op); + EVT MemEltVT = Store->getMemoryVT().getVectorElementType(); + EVT EltVT = Store->getValue().getValueType().getVectorElementType(); + EVT PtrVT = Store->getBasePtr().getValueType(); + unsigned NumElts = Store->getMemoryVT().getVectorNumElements(); + SDLoc SL(Op); + + SmallVector Chains; + + for (unsigned i = 0, e = NumElts; i != e; ++i) { + SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, + Store->getValue(), DAG.getConstant(i, MVT::i32)); + SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, + Store->getBasePtr(), + DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), + PtrVT)); + Chains.push_back(DAG.getStore(Store->getChain(), SL, Val, Ptr, + MachinePointerInfo(Store->getMemOperand()->getValue()), + Store->isVolatile(), Store->isNonTemporal(), + Store->getAlignment())); + } + return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, &Chains[0], NumElts); +} + +SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG); + if (Result.getNode()) { + return Result; + } + StoreSDNode *Store = cast(Op); + if (Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && + Store->getValue().getValueType().isVector()) { + return SplitVectorStore(Op, DAG); + } + return SDValue(); +} SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const { @@ -524,58 +622,6 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, return DAG.getMergeValues(Ops, 2, DL); } -SDValue AMDGPUTargetLowering::LowerVectorStore(const SDValue &Op, - SelectionDAG &DAG) const { - StoreSDNode *Store = dyn_cast(Op); - EVT MemVT = Store->getMemoryVT(); - unsigned MemBits = MemVT.getSizeInBits(); - - // Byte stores are really expensive, so if possible, try to pack - // 32-bit vector truncatating store into an i32 store. - // XXX: We could also handle optimize other vector bitwidths - if (!MemVT.isVector() || MemBits > 32) { - return SDValue(); - } - - SDLoc DL(Op); - const SDValue &Value = Store->getValue(); - EVT VT = Value.getValueType(); - const SDValue &Ptr = Store->getBasePtr(); - EVT MemEltVT = MemVT.getVectorElementType(); - unsigned MemEltBits = MemEltVT.getSizeInBits(); - unsigned MemNumElements = MemVT.getVectorNumElements(); - EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()); - SDValue Mask; - switch(MemEltBits) { - case 8: - Mask = DAG.getConstant(0xFF, PackedVT); - break; - case 16: - Mask = DAG.getConstant(0xFFFF, PackedVT); - break; - default: - llvm_unreachable("Cannot lower this vector store"); - } - SDValue PackedValue; - for (unsigned i = 0; i < MemNumElements; ++i) { - EVT ElemVT = VT.getVectorElementType(); - SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value, - DAG.getConstant(i, MVT::i32)); - Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT); - Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask); - SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT); - Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift); - if (i == 0) { - PackedValue = Elt; - } else { - PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt); - } - } - return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr, - MachinePointerInfo(Store->getMemOperand()->getValue()), - Store->isVolatile(), Store->isNonTemporal(), - Store->getAlignment()); -} //===----------------------------------------------------------------------===// // Helper functions diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index e3a0dcc..f739aed 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -31,6 +31,12 @@ private: SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + /// \brief Lower vector stores by merging the vector elements into an integer + /// of the same bitwidth. + SDValue MergeVectorStore(const SDValue &Op, SelectionDAG &DAG) const; + /// \brief Split a vector store into multiple scalar stores. + /// \returns The resulting chain. + SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; protected: @@ -44,17 +50,13 @@ protected: unsigned Reg, EVT VT) const; SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const; - + SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; bool isHWTrueValue(SDValue Op) const; bool isHWFalseValue(SDValue Op) const; void AnalyzeFormalArguments(CCState &State, const SmallVectorImpl &Ins) const; - /// \brief Lower vector stores by merging the vector elements into an integer - /// of the same bitwidth. - SDValue LowerVectorStore(const SDValue &Op, SelectionDAG &DAG) const; - public: AMDGPUTargetLowering(TargetMachine &TM); diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index b822431..e846ff4 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -1002,7 +1002,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { SDValue Value = Op.getOperand(1); SDValue Ptr = Op.getOperand(2); - SDValue Result = AMDGPUTargetLowering::LowerVectorStore(Op, DAG); + SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG); if (Result.getNode()) { return Result; } -- cgit v1.1 From 8e78012457682d335ee97cf2859dfe03b7e2ae93 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 26 Aug 2013 15:05:49 +0000 Subject: R600: Add support for i8 and i16 local memory stores git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189223 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 4 ++-- lib/Target/R600/AMDGPUInstrInfo.td | 7 +++++++ lib/Target/R600/AMDGPUInstructions.td | 20 +++++++++++++++----- lib/Target/R600/R600Defines.h | 3 ++- lib/Target/R600/R600InstrFormats.td | 2 ++ lib/Target/R600/R600InstrInfo.cpp | 3 ++- lib/Target/R600/R600Instructions.td | 24 +++++++++++++++++++++--- lib/Target/R600/SIInstructions.td | 12 +++++++++--- 8 files changed, 60 insertions(+), 15 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 9df835f..88867b6 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -495,9 +495,9 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op, Store->getBasePtr(), DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), PtrVT)); - Chains.push_back(DAG.getStore(Store->getChain(), SL, Val, Ptr, + Chains.push_back(DAG.getTruncStore(Store->getChain(), SL, Val, Ptr, MachinePointerInfo(Store->getMemOperand()->getValue()), - Store->isVolatile(), Store->isNonTemporal(), + MemEltVT, Store->isVolatile(), Store->isNonTemporal(), Store->getAlignment())); } return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, &Chains[0], NumElts); diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td index c61993a..c0d757e 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.td +++ b/lib/Target/R600/AMDGPUInstrInfo.td @@ -73,6 +73,13 @@ def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE", SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>, [SDNPHasChain, SDNPMayStore]>; +// MSKOR instructions are atomic memory instructions used mainly for storing +// 8-bit and 16-bit values. The definition is: +// +// MSKOR(dst, mask, src) MEM[dst] = ((MEM[dst] & ~mask) | src) +// +// src0: vec4(src, 0, 0, mask) +// src1: dst - rat offset (aka pointer) in dwords def AMDGPUstore_mskor : SDNode<"AMDGPUISD::STORE_MSKOR", SDTypeProfile<0, 2, []>, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index df0bade..3227f94 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -156,13 +156,23 @@ def truncstorei16_global : PatFrag<(ops node:$val, node:$ptr), return isGlobalStore(dyn_cast(N)); }]>; -def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return isLocalLoad(dyn_cast(N)); -}]>; - def local_store : PatFrag<(ops node:$val, node:$ptr), (store node:$val, node:$ptr), [{ - return isLocalStore(dyn_cast(N)); + return isLocalStore(dyn_cast(N)); +}]>; + +def truncstorei8_local : PatFrag<(ops node:$val, node:$ptr), + (truncstorei8 node:$val, node:$ptr), [{ + return isLocalStore(dyn_cast(N)); +}]>; + +def truncstorei16_local : PatFrag<(ops node:$val, node:$ptr), + (truncstorei16 node:$val, node:$ptr), [{ + return isLocalStore(dyn_cast(N)); +}]>; + +def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return isLocalLoad(dyn_cast(N)); }]>; def mskor_global : PatFrag<(ops node:$val, node:$ptr), diff --git a/lib/Target/R600/R600Defines.h b/lib/Target/R600/R600Defines.h index 8dc9ebb..1781f2a 100644 --- a/lib/Target/R600/R600Defines.h +++ b/lib/Target/R600/R600Defines.h @@ -45,7 +45,8 @@ namespace R600_InstFlag { ALU_INST = (1 << 14), LDS_1A = (1 << 15), LDS_1A1D = (1 << 16), - IS_EXPORT = (1 << 17) + IS_EXPORT = (1 << 17), + LDS_1A2D = (1 << 18) }; } diff --git a/lib/Target/R600/R600InstrFormats.td b/lib/Target/R600/R600InstrFormats.td index 2ae3311..ae3046d 100644 --- a/lib/Target/R600/R600InstrFormats.td +++ b/lib/Target/R600/R600InstrFormats.td @@ -30,6 +30,7 @@ class InstR600 pattern, bit TEXInst = 0; bit ALUInst = 0; bit IsExport = 0; + bit LDS_1A2D = 0; let Namespace = "AMDGPU"; let OutOperandList = outs; @@ -55,6 +56,7 @@ class InstR600 pattern, let TSFlags{15} = LDS_1A; let TSFlags{16} = LDS_1A1D; let TSFlags{17} = IsExport; + let TSFlags{18} = LDS_1A2D; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 9548a34..4e0607f 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -149,7 +149,8 @@ bool R600InstrInfo::isLDSInstr(unsigned Opcode) const { unsigned TargetFlags = get(Opcode).TSFlags; return ((TargetFlags & R600_InstFlag::LDS_1A) | - (TargetFlags & R600_InstFlag::LDS_1A1D)); + (TargetFlags & R600_InstFlag::LDS_1A1D) | + (TargetFlags & R600_InstFlag::LDS_1A2D)); } bool R600InstrInfo::isTransOnly(unsigned Opcode) const { diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index b059a81..3d92278 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1657,13 +1657,31 @@ class R600_LDS_1A1D lds_op, string name, list pattern> : let LDS_1A1D = 1; } -def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET", - [(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))] ->; +class R600_LDS_1A2D lds_op, string name, list pattern> : + R600_LDS < + lds_op, + (outs), + (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel, + R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel, + R600_Reg32:$src2, REL:$src2_rel, SEL:$src2_sel, + LAST:$last, R600_Pred:$pred_sel, BANK_SWIZZLE:$bank_swizzle), + " "#name# "$last $src0$src0_rel, $src1$src1_rel, $src2$src2_rel, $pred_sel", + pattern> { + let LDS_1A2D = 1; +} def LDS_WRITE : R600_LDS_1A1D <0xD, "LDS_WRITE", [(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)] >; +def LDS_BYTE_WRITE : R600_LDS_1A1D<0x12, "LDS_BYTE_WRITE", + [(truncstorei8_local i32:$src1, i32:$src0)] +>; +def LDS_SHORT_WRITE : R600_LDS_1A1D<0x13, "LDS_SHORT_WRITE", + [(truncstorei16_local i32:$src1, i32:$src0)] +>; +def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET", + [(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))] +>; // TRUNC is used for the FLT_TO_INT instructions to work around a // perceived problem where the rounding modes are applied differently diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 8c52a2e..785dbf1 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -392,6 +392,8 @@ defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">; } // End isCompare = 1 def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>; +def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>; +def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>; def DS_READ_B32 : DS_Load_Helper <0x00000036, "DS_READ_B32", VReg_32>; //def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>; @@ -1750,11 +1752,15 @@ def : Pat < (i32 (DS_READ_B32 0, $src0, $src0, $src0, 0, 0)) >; -def : Pat < - (local_store i32:$src1, i32:$src0), - (DS_WRITE_B32 0, $src0, $src1, $src1, 0, 0) +class DSWritePat : Pat < + (frag i32:$src1, i32:$src0), + (inst 0, $src0, $src1, $src1, 0, 0) >; +def : DSWritePat ; +def : DSWritePat ; +def : DSWritePat ; + /********** ================== **********/ /********** SMRD Patterns **********/ /********** ================== **********/ -- cgit v1.1 From a01cdea9c660dc8b295782a4ab560d0039ff7571 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 26 Aug 2013 15:05:59 +0000 Subject: R600: Add support for i8 and i16 local memory loads git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189225 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUInstructions.td | 20 ++++++++++++++++++-- lib/Target/R600/R600ISelLowering.cpp | 29 +++++++++++++++-------------- lib/Target/R600/R600Instructions.td | 12 ++++++++++++ lib/Target/R600/SIInstructions.td | 14 ++++++++++++++ 4 files changed, 59 insertions(+), 16 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 3227f94..dec6082 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -96,6 +96,10 @@ def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i8; }]>; +def az_extloadi8_global : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{ + return isGlobalLoad(dyn_cast(N)); +}]>; + def sextloadi8_global : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{ return isGlobalLoad(dyn_cast(N)); }]>; @@ -108,8 +112,12 @@ def sextloadi8_constant : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{ return isConstantLoad(dyn_cast(N), -1); }]>; -def az_extloadi8_global : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{ - return isGlobalLoad(dyn_cast(N)); +def az_extloadi8_local : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{ + return isLocalLoad(dyn_cast(N)); +}]>; + +def sextloadi8_local : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{ + return isLocalLoad(dyn_cast(N)); }]>; def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ @@ -132,6 +140,14 @@ def sextloadi16_constant : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{ return isConstantLoad(dyn_cast(N), -1); }]>; +def az_extloadi16_local : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{ + return isLocalLoad(dyn_cast(N)); +}]>; + +def sextloadi16_local : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{ + return isLocalLoad(dyn_cast(N)); +}]>; + def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i32; }]>; diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index e846ff4..9bc8e8a 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -104,7 +104,21 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( static_cast(MF->getTarget().getInstrInfo()); switch (MI->getOpcode()) { - default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); + default: + if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::LDS_1A) { + MachineInstrBuilder NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), + TII->get(MI->getOpcode()), + AMDGPU::OQAP); + for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) { + NewMI.addOperand(MI->getOperand(i)); + } + TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV, + MI->getOperand(0).getReg(), + AMDGPU::OQAP); + } else { + return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); + } + break; case AMDGPU::CLAMP_R600: { MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV, @@ -140,19 +154,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( break; } - case AMDGPU::LDS_READ_RET: { - MachineInstrBuilder NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), - TII->get(MI->getOpcode()), - AMDGPU::OQAP); - for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) { - NewMI.addOperand(MI->getOperand(i)); - } - TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV, - MI->getOperand(0).getReg(), - AMDGPU::OQAP); - break; - } - case AMDGPU::MOV_IMM_F32: TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(), MI->getOperand(1).getFPImm()->getValueAPF() diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 3d92278..f5c0266 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1682,6 +1682,18 @@ def LDS_SHORT_WRITE : R600_LDS_1A1D<0x13, "LDS_SHORT_WRITE", def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET", [(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))] >; +def LDS_BYTE_READ_RET : R600_LDS_1A <0x36, "LDS_BYTE_READ_RET", + [(set i32:$dst, (sextloadi8_local i32:$src0))] +>; +def LDS_UBYTE_READ_RET : R600_LDS_1A <0x37, "LDS_UBYTE_READ_RET", + [(set i32:$dst, (az_extloadi8_local i32:$src0))] +>; +def LDS_SHORT_READ_RET : R600_LDS_1A <0x38, "LDS_SHORT_READ_RET", + [(set i32:$dst, (sextloadi16_local i32:$src0))] +>; +def LDS_USHORT_READ_RET : R600_LDS_1A <0x39, "LDS_USHORT_READ_RET", + [(set i32:$dst, (az_extloadi16_local i32:$src0))] +>; // TRUNC is used for the FLT_TO_INT instructions to work around a // perceived problem where the rounding modes are applied differently diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 785dbf1..136f69c 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -395,6 +395,10 @@ def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>; def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>; def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>; def DS_READ_B32 : DS_Load_Helper <0x00000036, "DS_READ_B32", VReg_32>; +def DS_READ_I8 : DS_Load_Helper <0x00000039, "DS_READ_I8", VReg_32>; +def DS_READ_U8 : DS_Load_Helper <0x0000003a, "DS_READ_U8", VReg_32>; +def DS_READ_I16 : DS_Load_Helper <0x0000003b, "DS_READ_I16", VReg_32>; +def DS_READ_U16 : DS_Load_Helper <0x0000003c, "DS_READ_U16", VReg_32>; //def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>; //def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>; @@ -1747,6 +1751,16 @@ def : Pat < /********** Load/Store Patterns **********/ /********** ======================= **********/ +class DSReadPat : Pat < + (frag i32:$src0), + (vt (inst 0, $src0, $src0, $src0, 0, 0)) +>; + +def : DSReadPat ; +def : DSReadPat ; +def : DSReadPat ; +def : DSReadPat ; +def : DSReadPat ; def : Pat < (local_load i32:$src0), (i32 (DS_READ_B32 0, $src0, $src0, $src0, 0, 0)) -- cgit v1.1 From d08a9303614355cfdcac5f2c27c09ce809565423 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 26 Aug 2013 15:06:04 +0000 Subject: R600: Add support for vector local memory loads git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189226 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 23 +++++++++++++++++++++++ lib/Target/R600/AMDGPUISelLowering.h | 2 ++ lib/Target/R600/R600ISelLowering.cpp | 8 ++++++++ lib/Target/R600/SIISelLowering.cpp | 17 +++++++++++++++++ 4 files changed, 50 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 88867b6..1237323 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -424,6 +424,29 @@ SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op, return Op; } +SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op, + SelectionDAG &DAG) const { + LoadSDNode *Load = dyn_cast(Op); + EVT MemEltVT = Load->getMemoryVT().getVectorElementType(); + EVT EltVT = Op.getValueType().getVectorElementType(); + EVT PtrVT = Load->getBasePtr().getValueType(); + unsigned NumElts = Load->getMemoryVT().getVectorNumElements(); + SmallVector Loads; + SDLoc SL(Op); + + for (unsigned i = 0, e = NumElts; i != e; ++i) { + SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(), + DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), PtrVT)); + Loads.push_back(DAG.getExtLoad(Load->getExtensionType(), SL, EltVT, + Load->getChain(), Ptr, + MachinePointerInfo(Load->getMemOperand()->getValue()), + MemEltVT, Load->isVolatile(), Load->isNonTemporal(), + Load->getAlignment())); + } + return DAG.getNode(ISD::BUILD_VECTOR, SL, Op.getValueType(), &Loads[0], + Loads.size()); +} + SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op, SelectionDAG &DAG) const { StoreSDNode *Store = dyn_cast(Op); diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index f739aed..75ac4c2 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -50,6 +50,8 @@ protected: unsigned Reg, EVT VT) const; SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const; + /// \brief Split a vector load into multiple scalar loads. + SDValue SplitVectorLoad(const SDValue &Op, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; bool isHWTrueValue(SDValue Op) const; bool isHWFalseValue(SDValue Op) const; diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 9bc8e8a..f0242b8 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -1155,6 +1155,14 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const SDValue Ptr = Op.getOperand(1); SDValue LoweredLoad; + if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) { + SDValue MergedValues[2] = { + SplitVectorLoad(Op, DAG), + Chain + }; + return DAG.getMergeValues(MergedValues, 2, DL); + } + int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace()); if (ConstantBlock > -1) { SDValue Result; diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 9cbba6c..f196059 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -66,6 +66,10 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::BITCAST, MVT::i128, Legal); + // We need to custom lower vector stores from local memory + setOperationAction(ISD::LOAD, MVT::v2i32, Custom); + setOperationAction(ISD::LOAD, MVT::v4i32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); @@ -368,6 +372,19 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); + case ISD::LOAD: { + LoadSDNode *Load = dyn_cast(Op); + if (Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && + Op.getValueType().isVector()) { + SDValue MergedValues[2] = { + SplitVectorLoad(Op, DAG), + Load->getChain() + }; + return DAG.getMergeValues(MergedValues, 2, SDLoc(Op)); + } else { + return SDValue(); + } + } case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG); case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG); -- cgit v1.1 From 055d20742642a7392d5931b61f2ea09c60c204dd Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Mon, 26 Aug 2013 19:42:51 +0000 Subject: [PowerPC] More fast-isel chunks (returns and integer extends) Incremental improvement to fast-isel for PPC64. This allows us to select on ret, sext, and zext. Filling in sext/zext improves some of the existing logic in handling compare-immediates that needed extends. A simplified return convention for fast-isel is also added to the PPC64 calling conventions. All call/return processing for DAG selection is handled with custom code, so there isn't an existing CC to rely on here. The include of PPCGenCallingConv.inc causes compiler warnings due to the 32-bit calling conventions that are not used, so the dummy function "usePPC32CCs()" is added here to silence those. Test cases for the return and extend logic are added. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189266 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCCallingConv.td | 20 ++++ lib/Target/PowerPC/PPCFastISel.cpp | 222 ++++++++++++++++++++++++++++++++++- lib/Target/PowerPC/PPCInstr64Bit.td | 16 +++ 3 files changed, 255 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index a584188..9c937ed 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -37,6 +37,26 @@ def RetCC_PPC : CallingConv<[ ]>; +// Note that we don't currently have calling conventions for 64-bit +// PowerPC, but handle all the complexities of the ABI in the lowering +// logic. FIXME: See if the logic can be simplified with use of CCs. +// This may require some extensions to current table generation. + +// Simple return-value convention for 64-bit ELF PowerPC fast isel. +// All small ints are promoted to i64. Vector types, quadword ints, +// and multiple register returns are "supported" to avoid compile +// errors, but none are handled by the fast selector. +def RetCC_PPC64_ELF_FIS : CallingConv<[ + CCIfType<[i8], CCPromoteToType>, + CCIfType<[i16], CCPromoteToType>, + CCIfType<[i32], CCPromoteToType>, + CCIfType<[i64], CCAssignToReg<[X3, X4]>>, + CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>, + CCIfType<[f32], CCAssignToReg<[F1, F2]>>, + CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>, + CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>> +]>; + //===----------------------------------------------------------------------===// // PowerPC System V Release 4 32-bit ABI //===----------------------------------------------------------------------===// diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index ebc7057..8db4432 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -95,6 +95,8 @@ class PPCFastISel : public FastISel { private: bool SelectBranch(const Instruction *I); bool SelectIndirectBr(const Instruction *I); + bool SelectRet(const Instruction *I); + bool SelectIntExt(const Instruction *I); // Utility routines. private: @@ -109,6 +111,10 @@ class PPCFastISel : public FastISel { unsigned PPCMaterialize64BitInt(int64_t Imm, const TargetRegisterClass *RC); + // Call handling routines. + private: + CCAssignFn *usePPC32CCs(unsigned Flag); + private: #include "PPCGenFastISel.inc" @@ -116,6 +122,21 @@ class PPCFastISel : public FastISel { } // end anonymous namespace +#include "PPCGenCallingConv.inc" + +// Function whose sole purpose is to kill compiler warnings +// stemming from unused functions included from PPCGenCallingConv.inc. +CCAssignFn *PPCFastISel::usePPC32CCs(unsigned Flag) { + if (Flag == 1) + return CC_PPC32_SVR4; + else if (Flag == 2) + return CC_PPC32_SVR4_ByVal; + else if (Flag == 3) + return CC_PPC32_SVR4_VarArg; + else + return RetCC_PPC; +} + static Optional getComparePred(CmpInst::Predicate Pred) { switch (Pred) { // These are not representable with any single compare. @@ -309,13 +330,164 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, return true; } +// Attempt to fast-select a return instruction. +bool PPCFastISel::SelectRet(const Instruction *I) { + + if (!FuncInfo.CanLowerReturn) + return false; + + const ReturnInst *Ret = cast(I); + const Function &F = *I->getParent()->getParent(); + + // Build a list of return value registers. + SmallVector RetRegs; + CallingConv::ID CC = F.getCallingConv(); + + if (Ret->getNumOperands() > 0) { + SmallVector Outs; + GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); + + // Analyze operands of the call, assigning locations to each operand. + SmallVector ValLocs; + CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, *Context); + CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS); + const Value *RV = Ret->getOperand(0); + + // FIXME: Only one output register for now. + if (ValLocs.size() > 1) + return false; + + // Special case for returning a constant integer of any size. + // Materialize the constant as an i64 and copy it to the return + // register. This avoids an unnecessary extend or truncate. + if (isa(*RV)) { + const Constant *C = cast(RV); + unsigned SrcReg = PPCMaterializeInt(C, MVT::i64); + unsigned RetReg = ValLocs[0].getLocReg(); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + RetReg).addReg(SrcReg); + RetRegs.push_back(RetReg); + + } else { + unsigned Reg = getRegForValue(RV); + + if (Reg == 0) + return false; + + // Copy the result values into the output registers. + for (unsigned i = 0; i < ValLocs.size(); ++i) { + + CCValAssign &VA = ValLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + RetRegs.push_back(VA.getLocReg()); + unsigned SrcReg = Reg + VA.getValNo(); + + EVT RVEVT = TLI.getValueType(RV->getType()); + if (!RVEVT.isSimple()) + return false; + MVT RVVT = RVEVT.getSimpleVT(); + MVT DestVT = VA.getLocVT(); + + if (RVVT != DestVT && RVVT != MVT::i8 && + RVVT != MVT::i16 && RVVT != MVT::i32) + return false; + + if (RVVT != DestVT) { + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: + llvm_unreachable("Full value assign but types don't match?"); + case CCValAssign::AExt: + case CCValAssign::ZExt: { + const TargetRegisterClass *RC = + (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; + unsigned TmpReg = createResultReg(RC); + if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true)) + return false; + SrcReg = TmpReg; + break; + } + case CCValAssign::SExt: { + const TargetRegisterClass *RC = + (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; + unsigned TmpReg = createResultReg(RC); + if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false)) + return false; + SrcReg = TmpReg; + break; + } + } + } + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), RetRegs[i]) + .addReg(SrcReg); + } + } + } + + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(PPC::BLR)); + + for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) + MIB.addReg(RetRegs[i], RegState::Implicit); + + return true; +} + // Attempt to emit an integer extend of SrcReg into DestReg. Both // signed and zero extensions are supported. Return false if we -// can't handle it. Not yet implemented. +// can't handle it. bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg, bool IsZExt) { - return (SrcVT == MVT::i8 && SrcReg && DestVT == MVT::i8 && DestReg - && IsZExt && false); + if (DestVT != MVT::i32 && DestVT != MVT::i64) + return false; + if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32) + return false; + + // Signed extensions use EXTSB, EXTSH, EXTSW. + if (!IsZExt) { + unsigned Opc; + if (SrcVT == MVT::i8) + Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64; + else if (SrcVT == MVT::i16) + Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64; + else { + assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??"); + Opc = PPC::EXTSW_32_64; + } + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + .addReg(SrcReg); + + // Unsigned 32-bit extensions use RLWINM. + } else if (DestVT == MVT::i32) { + unsigned MB; + if (SrcVT == MVT::i8) + MB = 24; + else { + assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??"); + MB = 16; + } + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::RLWINM), + DestReg) + .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31); + + // Unsigned 64-bit extensions use RLDICL (with a 32-bit source). + } else { + unsigned MB; + if (SrcVT == MVT::i8) + MB = 56; + else if (SrcVT == MVT::i16) + MB = 48; + else + MB = 32; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(PPC::RLDICL_32_64), DestReg) + .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB); + } + + return true; } // Attempt to fast-select an indirect branch instruction. @@ -335,6 +507,45 @@ bool PPCFastISel::SelectIndirectBr(const Instruction *I) { return true; } +// Attempt to fast-select an integer extend instruction. +bool PPCFastISel::SelectIntExt(const Instruction *I) { + Type *DestTy = I->getType(); + Value *Src = I->getOperand(0); + Type *SrcTy = Src->getType(); + + bool IsZExt = isa(I); + unsigned SrcReg = getRegForValue(Src); + if (!SrcReg) return false; + + EVT SrcEVT, DestEVT; + SrcEVT = TLI.getValueType(SrcTy, true); + DestEVT = TLI.getValueType(DestTy, true); + if (!SrcEVT.isSimple()) + return false; + if (!DestEVT.isSimple()) + return false; + + MVT SrcVT = SrcEVT.getSimpleVT(); + MVT DestVT = DestEVT.getSimpleVT(); + + // If we know the register class needed for the result of this + // instruction, use it. Otherwise pick the register class of the + // correct size that does not contain X0/R0, since we don't know + // whether downstream uses permit that assignment. + unsigned AssignedReg = FuncInfo.ValueMap[I]; + const TargetRegisterClass *RC = + (AssignedReg ? MRI.getRegClass(AssignedReg) : + (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass : + &PPC::GPRC_and_GPRC_NOR0RegClass)); + unsigned ResultReg = createResultReg(RC); + + if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt)) + return false; + + UpdateValueMap(I, ResultReg); + return true; +} + // Attempt to fast-select an instruction that wasn't handled by // the table-generated machinery. bool PPCFastISel::TargetSelectInstruction(const Instruction *I) { @@ -344,6 +555,11 @@ bool PPCFastISel::TargetSelectInstruction(const Instruction *I) { return SelectBranch(I); case Instruction::IndirectBr: return SelectIndirectBr(I); + case Instruction::Ret: + return SelectRet(I); + case Instruction::ZExt: + case Instruction::SExt: + return SelectIntExt(I); // Here add other flavors of Instruction::XXX that automated // cases don't catch. For example, switches are terminators // that aren't yet handled. diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index f78bb38..9257904 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -506,6 +506,14 @@ defm EXTSH8 : XForm_11r<31, 922, (outs g8rc:$rA), (ins g8rc:$rS), [(set i64:$rA, (sext_inreg i64:$rS, i16))]>; } // Interpretation64Bit +// For fast-isel: +let isCodeGenOnly = 1 in { +def EXTSB8_32_64 : XForm_11<31, 954, (outs g8rc:$rA), (ins gprc:$rS), + "extsb $rA, $rS", IntSimple, []>, isPPC64; +def EXTSH8_32_64 : XForm_11<31, 922, (outs g8rc:$rA), (ins gprc:$rS), + "extsh $rA, $rS", IntSimple, []>, isPPC64; +} // isCodeGenOnly for fast-isel + defm EXTSW : XForm_11r<31, 986, (outs g8rc:$rA), (ins g8rc:$rS), "extsw", "$rA, $rS", IntSimple, [(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64; @@ -569,6 +577,14 @@ defm RLDICL : MDForm_1r<30, 0, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE), "rldicl", "$rA, $rS, $SH, $MBE", IntRotateDI, []>, isPPC64; +// For fast-isel: +let isCodeGenOnly = 1 in +def RLDICL_32_64 : MDForm_1<30, 0, + (outs g8rc:$rA), + (ins gprc:$rS, u6imm:$SH, u6imm:$MBE), + "rldicl $rA, $rS, $SH, $MBE", IntRotateDI, + []>, isPPC64; +// End fast-isel. defm RLDICR : MDForm_1r<30, 1, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE), "rldicr", "$rA, $rS, $SH, $MBE", IntRotateDI, -- cgit v1.1 From bb4066123d91c0347cb6333caca527d1a979e703 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Mon, 26 Aug 2013 20:07:25 +0000 Subject: ARM: Fix ELF global base reg intialization. The create machine code wasn't properly in SSA, which the machine verifier properly complains about. Now that fast-isel is closer to verifier clean, errors like this show up more clearly. Additionally, the Thumb pseudo tPICADD was used for both ARM and Thumb mode functions, which is obviously wrong. Fix that along the way. Test case is part of the following commit which will finish making an additional fast-isel test verifier clean an enable it for the regression test suite. This commit is separate since its not just a verifier cleanup, but an actual correctness issue. rdar://12594152 (for the fast-isel verifier aspects) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189269 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrInfo.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 868dd47..3508795 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/MC/MCAsmInfo.h" @@ -119,20 +120,24 @@ namespace { MachineBasicBlock &FirstMBB = MF.front(); MachineBasicBlock::iterator MBBI = FirstMBB.begin(); DebugLoc DL = FirstMBB.findDebugLoc(MBBI); - unsigned GlobalBaseReg = AFI->getGlobalBaseReg(); + unsigned TempReg = + MF.getRegInfo().createVirtualRegister(&ARM::rGPRRegClass); unsigned Opc = TM->getSubtarget().isThumb2() ? ARM::t2LDRpci : ARM::LDRcp; const TargetInstrInfo &TII = *TM->getInstrInfo(); MachineInstrBuilder MIB = BuildMI(FirstMBB, MBBI, DL, - TII.get(Opc), GlobalBaseReg) + TII.get(Opc), TempReg) .addConstantPoolIndex(Idx); if (Opc == ARM::LDRcp) MIB.addImm(0); AddDefaultPred(MIB); // Fix the GOT address by adding pc. + unsigned GlobalBaseReg = AFI->getGlobalBaseReg(); + Opc = TM->getSubtarget().isThumb2() ? ARM::tPICADD + : ARM::PICADD; BuildMI(FirstMBB, MBBI, DL, TII.get(ARM::tPICADD), GlobalBaseReg) - .addReg(GlobalBaseReg).addImm(ARMPCLabelIndex); + .addReg(TempReg).addImm(ARMPCLabelIndex); return true; } -- cgit v1.1 From e3dad19e0de5c639886055c09da1f4faaa8556f9 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Mon, 26 Aug 2013 20:07:29 +0000 Subject: ARM: FastISel verifier error cleanup. Constant pool and global value reference instructions need more restricted register classes than plain GPR. rdar://12594152 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189270 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 0927e9a..2431477 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -653,6 +653,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) { .addConstantPoolIndex(Idx)); else // The extra immediate is for addrmode2. + DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), DestReg) .addConstantPoolIndex(Idx) @@ -728,6 +729,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { AddOptionalDefs(MIB); } else { // The extra immediate is for addrmode2. + DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0); MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), DestReg) .addConstantPoolIndex(Idx) @@ -3027,12 +3029,14 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, unsigned DestReg1 = createResultReg(TLI.getRegClassFor(VT)); // Load value. if (isThumb2) { + DestReg1 = constrainOperandRegClass(TII.get(ARM::t2LDRpci), DestReg1, 0); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::t2LDRpci), DestReg1) .addConstantPoolIndex(Idx)); Opc = UseGOTOFF ? ARM::t2ADDrr : ARM::t2LDRs; } else { // The extra immediate is for addrmode2. + DestReg1 = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg1, 0); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), DestReg1) .addConstantPoolIndex(Idx).addImm(0)); @@ -3046,6 +3050,9 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, } unsigned DestReg2 = createResultReg(TLI.getRegClassFor(VT)); + DestReg2 = constrainOperandRegClass(TII.get(Opc), DestReg2, 0); + DestReg1 = constrainOperandRegClass(TII.get(Opc), DestReg1, 1); + GlobalBaseReg = constrainOperandRegClass(TII.get(Opc), GlobalBaseReg, 2); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg2) .addReg(DestReg1) -- cgit v1.1 From 7c42ede04579373a2d3e124b4417d89430d541f3 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Mon, 26 Aug 2013 20:11:46 +0000 Subject: Dummy code to silence warning from 4189266 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189272 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 9 +++++++++ lib/Target/PowerPC/PPCISelLowering.h | 2 ++ 2 files changed, 11 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index bc55d38..28b9ba9 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1810,6 +1810,15 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, #include "PPCGenCallingConv.inc" +// Function whose sole purpose is to kill compiler warnings +// stemming from unused functions included from PPCGenCallingConv.inc. +CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const { + if (Flag == 1) + return RetCC_PPC64_ELF_FIS; /* CC_PPC64_ELF_FIS in future patch. */ + else + return RetCC_PPC64_ELF_FIS; +} + bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index aa5e821..df3af35 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -627,6 +627,8 @@ namespace llvm { SDValue DAGCombineFastRecip(SDValue Op, DAGCombinerInfo &DCI) const; SDValue DAGCombineFastRecipFSQRT(SDValue Op, DAGCombinerInfo &DCI) const; + + CCAssignFn *useFastISelCCs(unsigned Flag) const; }; namespace PPC { -- cgit v1.1 From 383a810b129aa5120d6a7f6e88e141ec4a45f61b Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Mon, 26 Aug 2013 20:22:05 +0000 Subject: ARM: Constrain regclass for TSTri instruction. Get the register class right for the TST instruction. This keeps the machine verifier happy, enabling us to turn it on for another test. rdar://12594152 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189274 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 2431477..3cbb9a8 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -1380,6 +1380,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { (isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) { unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri; unsigned OpReg = getRegForValue(TI->getOperand(0)); + OpReg = constrainOperandRegClass(TII.get(TstOpc), OpReg, 0); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc)) .addReg(OpReg).addImm(1)); @@ -1417,6 +1418,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { // and it left a value for us in a virtual register. Ergo, we test // the one-bit value left in the virtual register. unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri; + CmpReg = constrainOperandRegClass(TII.get(TstOpc), CmpReg, 0); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc)) .addReg(CmpReg).addImm(1)); -- cgit v1.1 From 9c3dd1b0d1e96ef408b68da3b06c6ebd6c943601 Mon Sep 17 00:00:00 2001 From: Charles Davis Date: Tue, 27 Aug 2013 05:00:43 +0000 Subject: Move everything depending on Object/MachOFormat.h over to Support/MachO.h. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189315 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp | 32 ++--- .../ARM/MCTargetDesc/ARMMachObjectWriter.cpp | 128 ++++++++++---------- lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp | 6 +- .../PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp | 90 +++++++------- lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 10 +- .../X86/MCTargetDesc/X86MachORelocationInfo.cpp | 24 ++-- .../X86/MCTargetDesc/X86MachObjectWriter.cpp | 132 ++++++++++----------- 7 files changed, 211 insertions(+), 211 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index b1e25d8..828442f 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -25,9 +25,9 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCValue.h" -#include "llvm/Object/MachOFormat.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachO.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -640,16 +640,16 @@ public: // FIXME: This should be in a separate file. class DarwinARMAsmBackend : public ARMAsmBackend { public: - const object::mach::CPUSubtypeARM Subtype; + const MachO::CPUSubTypeARM Subtype; DarwinARMAsmBackend(const Target &T, const StringRef TT, - object::mach::CPUSubtypeARM st) + MachO::CPUSubTypeARM st) : ARMAsmBackend(T, TT), Subtype(st) { HasDataInCodeSupport = true; } MCObjectWriter *createObjectWriter(raw_ostream &OS) const { return createARMMachObjectWriter(OS, /*Is64Bit=*/false, - object::mach::CTM_ARM, + MachO::CPU_TYPE_ARM, Subtype); } @@ -664,18 +664,18 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT, StringRef Triple TheTriple(TT); if (TheTriple.isOSDarwin()) { - object::mach::CPUSubtypeARM CS = - StringSwitch(TheTriple.getArchName()) - .Cases("armv4t", "thumbv4t", object::mach::CSARM_V4T) - .Cases("armv5e", "thumbv5e",object::mach::CSARM_V5TEJ) - .Cases("armv6", "thumbv6", object::mach::CSARM_V6) - .Cases("armv6m", "thumbv6m", object::mach::CSARM_V6M) - .Cases("armv7em", "thumbv7em", object::mach::CSARM_V7EM) - .Cases("armv7f", "thumbv7f", object::mach::CSARM_V7F) - .Cases("armv7k", "thumbv7k", object::mach::CSARM_V7K) - .Cases("armv7m", "thumbv7m", object::mach::CSARM_V7M) - .Cases("armv7s", "thumbv7s", object::mach::CSARM_V7S) - .Default(object::mach::CSARM_V7); + MachO::CPUSubTypeARM CS = + StringSwitch(TheTriple.getArchName()) + .Cases("armv4t", "thumbv4t", MachO::CPU_SUBTYPE_ARM_V4T) + .Cases("armv5e", "thumbv5e", MachO::CPU_SUBTYPE_ARM_V5TEJ) + .Cases("armv6", "thumbv6", MachO::CPU_SUBTYPE_ARM_V6) + .Cases("armv6m", "thumbv6m", MachO::CPU_SUBTYPE_ARM_V6M) + .Cases("armv7em", "thumbv7em", MachO::CPU_SUBTYPE_ARM_V7EM) + .Cases("armv7f", "thumbv7f", MachO::CPU_SUBTYPE_ARM_V7F) + .Cases("armv7k", "thumbv7k", MachO::CPU_SUBTYPE_ARM_V7K) + .Cases("armv7m", "thumbv7m", MachO::CPU_SUBTYPE_ARM_V7M) + .Cases("armv7s", "thumbv7s", MachO::CPU_SUBTYPE_ARM_V7S) + .Default(MachO::CPU_SUBTYPE_ARM_V7); return new DarwinARMAsmBackend(T, TT, CS); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index b9efe74..ee43f5f 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -20,10 +20,9 @@ #include "llvm/MC/MCMachOSymbolFlags.h" #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCValue.h" -#include "llvm/Object/MachOFormat.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachO.h" using namespace llvm; -using namespace llvm::object; namespace { class ARMMachObjectWriter : public MCMachObjectTargetWriter { @@ -63,7 +62,7 @@ public: static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, unsigned &Log2Size) { - RelocType = unsigned(macho::RIT_Vanilla); + RelocType = unsigned(MachO::ARM_RELOC_VANILLA); Log2Size = ~0U; switch (Kind) { @@ -92,21 +91,21 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, case ARM::fixup_arm_uncondbl: case ARM::fixup_arm_condbl: case ARM::fixup_arm_blx: - RelocType = unsigned(macho::RIT_ARM_Branch24Bit); + RelocType = unsigned(MachO::ARM_RELOC_BR24); // Report as 'long', even though that is not quite accurate. Log2Size = llvm::Log2_32(4); return true; // Handle Thumb branches. case ARM::fixup_arm_thumb_br: - RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit); + RelocType = unsigned(MachO::ARM_THUMB_RELOC_BR22); Log2Size = llvm::Log2_32(2); return true; case ARM::fixup_t2_uncondbranch: case ARM::fixup_arm_thumb_bl: case ARM::fixup_arm_thumb_blx: - RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit); + RelocType = unsigned(MachO::ARM_THUMB_RELOC_BR22); Log2Size = llvm::Log2_32(4); return true; @@ -121,23 +120,23 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, // 1 - thumb instructions case ARM::fixup_arm_movt_hi16: case ARM::fixup_arm_movt_hi16_pcrel: - RelocType = unsigned(macho::RIT_ARM_Half); + RelocType = unsigned(MachO::ARM_RELOC_HALF); Log2Size = 1; return true; case ARM::fixup_t2_movt_hi16: case ARM::fixup_t2_movt_hi16_pcrel: - RelocType = unsigned(macho::RIT_ARM_Half); + RelocType = unsigned(MachO::ARM_RELOC_HALF); Log2Size = 3; return true; case ARM::fixup_arm_movw_lo16: case ARM::fixup_arm_movw_lo16_pcrel: - RelocType = unsigned(macho::RIT_ARM_Half); + RelocType = unsigned(MachO::ARM_RELOC_HALF); Log2Size = 0; return true; case ARM::fixup_t2_movw_lo16: case ARM::fixup_t2_movw_lo16_pcrel: - RelocType = unsigned(macho::RIT_ARM_Half); + RelocType = unsigned(MachO::ARM_RELOC_HALF); Log2Size = 2; return true; } @@ -153,7 +152,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, uint64_t &FixedValue) { uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); - unsigned Type = macho::RIT_ARM_Half; + unsigned Type = MachO::ARM_RELOC_HALF; // See . const MCSymbol *A = &Target.getSymA()->getSymbol(); @@ -179,7 +178,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, "' can not be undefined in a subtraction expression"); // Select the appropriate difference relocation type. - Type = macho::RIT_ARM_HalfDifference; + Type = MachO::ARM_RELOC_HALF_SECTDIFF; Value2 = Writer->getSymbolAddress(B_SD, Layout); FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent()); } @@ -223,29 +222,28 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, break; } - if (Type == macho::RIT_ARM_HalfDifference) { + MachO::scattered_relocation_info MRE; + if (Type == MachO::ARM_RELOC_HALF_SECTDIFF) { uint32_t OtherHalf = MovtBit ? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16); - macho::RelocationEntry MRE; - MRE.Word0 = ((OtherHalf << 0) | - (macho::RIT_Pair << 24) | - (MovtBit << 28) | - (ThumbBit << 29) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value2; + MRE.r_address = OtherHalf; + MRE.r_type = MachO::ARM_RELOC_PAIR; + MRE.r_length = ((MovtBit << 0) | + (ThumbBit << 1)); + MRE.r_pcrel = IsPCRel; + MRE.r_scattered = 1; + MRE.r_value = Value2; Writer->addRelocation(Fragment->getParent(), MRE); } - macho::RelocationEntry MRE; - MRE.Word0 = ((FixupOffset << 0) | - (Type << 24) | - (MovtBit << 28) | - (ThumbBit << 29) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value; + MRE.r_address = FixupOffset; + MRE.r_type = Type; + MRE.r_length = ((MovtBit << 0) | + (ThumbBit << 1)); + MRE.r_pcrel = IsPCRel; + MRE.r_scattered = 1; + MRE.r_value = Value; Writer->addRelocation(Fragment->getParent(), MRE); } @@ -259,7 +257,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, uint64_t &FixedValue) { uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); - unsigned Type = macho::RIT_Vanilla; + unsigned Type = MachO::ARM_RELOC_VANILLA; // See . const MCSymbol *A = &Target.getSymA()->getSymbol(); @@ -284,31 +282,30 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, "' can not be undefined in a subtraction expression"); // Select the appropriate difference relocation type. - Type = macho::RIT_Difference; + Type = MachO::ARM_RELOC_SECTDIFF; Value2 = Writer->getSymbolAddress(B_SD, Layout); FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent()); } + MachO::scattered_relocation_info MRE; // Relocations are written out in reverse order, so the PAIR comes first. - if (Type == macho::RIT_Difference || - Type == macho::RIT_Generic_LocalDifference) { - macho::RelocationEntry MRE; - MRE.Word0 = ((0 << 0) | - (macho::RIT_Pair << 24) | - (Log2Size << 28) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value2; + if (Type == MachO::ARM_RELOC_SECTDIFF || + Type == MachO::ARM_RELOC_LOCAL_SECTDIFF) { + MRE.r_address = 0; + MRE.r_type = MachO::ARM_RELOC_PAIR; + MRE.r_length = Log2Size; + MRE.r_pcrel = IsPCRel; + MRE.r_scattered = 1; + MRE.r_value = Value2; Writer->addRelocation(Fragment->getParent(), MRE); } - macho::RelocationEntry MRE; - MRE.Word0 = ((FixupOffset << 0) | - (Type << 24) | - (Log2Size << 28) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value; + MRE.r_address = FixupOffset; + MRE.r_type = Type; + MRE.r_length = Log2Size; + MRE.r_pcrel = IsPCRel; + MRE.r_scattered = 1; + MRE.r_value = Value; Writer->addRelocation(Fragment->getParent(), MRE); } @@ -326,13 +323,13 @@ bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer, switch (RelocType) { default: return false; - case macho::RIT_ARM_Branch24Bit: + case MachO::ARM_RELOC_BR24: // PC pre-adjustment of 8 for these instructions. Value -= 8; // ARM BL/BLX has a 25-bit offset. Range = 0x1ffffff; break; - case macho::RIT_ARM_ThumbBranch22Bit: + case MachO::ARM_THUMB_RELOC_BR22: // PC pre-adjustment of 4 for these instructions. Value -= 4; // Thumb BL/BLX has a 24-bit offset. @@ -361,7 +358,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, uint64_t &FixedValue) { unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); unsigned Log2Size; - unsigned RelocType = macho::RIT_Vanilla; + unsigned RelocType = MachO::ARM_RELOC_VANILLA; if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) // If we failed to get fixup kind info, it's because there's no legal // relocation type for the fixup kind. This happens when it's a fixup that's @@ -374,7 +371,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, // scattered relocation entry. Differences always require scattered // relocations. if (Target.getSymB()) { - if (RelocType == macho::RIT_ARM_Half) + if (RelocType == MachO::ARM_RELOC_HALF) return RecordARMScatteredHalfRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, FixedValue); return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, @@ -392,7 +389,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, // // Is this right for ARM? uint32_t Offset = Target.getConstant(); - if (IsPCRel && RelocType == macho::RIT_Vanilla) + if (IsPCRel && RelocType == MachO::ARM_RELOC_VANILLA) Offset += 1 << Log2Size; if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD)) return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, @@ -445,17 +442,17 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, } // struct relocation_info (8 bytes) - macho::RelocationEntry MRE; - MRE.Word0 = FixupOffset; - MRE.Word1 = ((Index << 0) | - (IsPCRel << 24) | - (Log2Size << 25) | - (IsExtern << 27) | - (Type << 28)); + MachO::relocation_info MRE; + MRE.r_address = FixupOffset; + MRE.r_symbolnum = Index; + MRE.r_pcrel = IsPCRel; + MRE.r_length = Log2Size; + MRE.r_extern = IsExtern; + MRE.r_type = Type; // Even when it's not a scattered relocation, movw/movt always uses // a PAIR relocation. - if (Type == macho::RIT_ARM_Half) { + if (Type == MachO::ARM_RELOC_HALF) { // The other-half value only gets populated for the movt and movw // relocation entries. uint32_t Value = 0; @@ -474,11 +471,12 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, Value = FixedValue & 0xffff; break; } - macho::RelocationEntry MREPair; - MREPair.Word0 = Value; - MREPair.Word1 = ((0xffffff) | - (Log2Size << 25) | - (macho::RIT_Pair << 28)); + MachO::relocation_info MREPair; + MREPair.r_address = Value; + MREPair.r_symbolnum = 0xffffff; + MREPair.r_length = Log2Size; + MREPair.r_pcrel = MREPair.r_extern = 0; + MREPair.r_type = MachO::ARM_RELOC_PAIR; Writer->addRelocation(Fragment->getParent(), MREPair); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index dd61954..ffeac43 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -16,9 +16,9 @@ #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCValue.h" -#include "llvm/Object/MachOFormat.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachO.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; @@ -164,8 +164,8 @@ namespace { return createPPCMachObjectWriter( OS, /*Is64Bit=*/is64, - (is64 ? object::mach::CTM_PowerPC64 : object::mach::CTM_PowerPC), - object::mach::CSPPC_ALL); + (is64 ? MachO::CPU_TYPE_POWERPC64 : MachO::CPU_TYPE_POWERPC), + MachO::CPU_SUBTYPE_POWERPC_ALL); } virtual bool doesSectionRequireSymbols(const MCSection &Section) const { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp index fc9b892..aea3c5b 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp @@ -16,12 +16,11 @@ #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCValue.h" -#include "llvm/Object/MachOFormat.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" +#include "llvm/Support/MachO.h" using namespace llvm; -using namespace llvm::object; namespace { class PPCMachObjectWriter : public MCMachObjectTargetWriter { @@ -90,29 +89,29 @@ static unsigned getRelocType(const MCValue &Target, Target.isAbsolute() ? MCSymbolRefExpr::VK_None : Target.getSymA()->getKind(); // determine the type of the relocation - unsigned Type = macho::RIT_Vanilla; + unsigned Type = MachO::GENERIC_RELOC_VANILLA; if (IsPCRel) { // relative to PC switch ((unsigned)FixupKind) { default: report_fatal_error("Unimplemented fixup kind (relative)"); case PPC::fixup_ppc_br24: - Type = macho::RIT_PPC_BR24; // R_PPC_REL24 + Type = MachO::PPC_RELOC_BR24; // R_PPC_REL24 break; case PPC::fixup_ppc_brcond14: - Type = macho::RIT_PPC_BR14; + Type = MachO::PPC_RELOC_BR14; break; case PPC::fixup_ppc_half16: switch (Modifier) { default: llvm_unreachable("Unsupported modifier for half16 fixup"); case MCSymbolRefExpr::VK_PPC_HA: - Type = macho::RIT_PPC_HA16; + Type = MachO::PPC_RELOC_HA16; break; case MCSymbolRefExpr::VK_PPC_LO: - Type = macho::RIT_PPC_LO16; + Type = MachO::PPC_RELOC_LO16; break; case MCSymbolRefExpr::VK_PPC_HI: - Type = macho::RIT_PPC_HI16; + Type = MachO::PPC_RELOC_HI16; break; } break; @@ -126,13 +125,13 @@ static unsigned getRelocType(const MCValue &Target, default: llvm_unreachable("Unsupported modifier for half16 fixup"); case MCSymbolRefExpr::VK_PPC_HA: - Type = macho::RIT_PPC_HA16_SECTDIFF; + Type = MachO::PPC_RELOC_HA16_SECTDIFF; break; case MCSymbolRefExpr::VK_PPC_LO: - Type = macho::RIT_PPC_LO16_SECTDIFF; + Type = MachO::PPC_RELOC_LO16_SECTDIFF; break; case MCSymbolRefExpr::VK_PPC_HI: - Type = macho::RIT_PPC_HI16_SECTDIFF; + Type = MachO::PPC_RELOC_HI16_SECTDIFF; break; } break; @@ -145,30 +144,34 @@ static unsigned getRelocType(const MCValue &Target, return Type; } -static void makeRelocationInfo(macho::RelocationEntry &MRE, +static void makeRelocationInfo(MachO::any_relocation_info &MRE, const uint32_t FixupOffset, const uint32_t Index, const unsigned IsPCRel, const unsigned Log2Size, const unsigned IsExtern, const unsigned Type) { - MRE.Word0 = FixupOffset; + MRE.r_word0 = FixupOffset; // The bitfield offsets that work (as determined by trial-and-error) // are different than what is documented in the mach-o manuals. - // Is this an endianness issue w/ PPC? - MRE.Word1 = ((Index << 8) | // was << 0 - (IsPCRel << 7) | // was << 24 - (Log2Size << 5) | // was << 25 - (IsExtern << 4) | // was << 27 - (Type << 0)); // was << 28 + // This appears to be an endianness issue; reversing the order of the + // documented bitfields in fixes this (but + // breaks x86/ARM assembly). + MRE.r_word1 = ((Index << 8) | // was << 0 + (IsPCRel << 7) | // was << 24 + (Log2Size << 5) | // was << 25 + (IsExtern << 4) | // was << 27 + (Type << 0)); // was << 28 } static void -makeScatteredRelocationInfo(macho::RelocationEntry &MRE, const uint32_t Addr, - const unsigned Type, const unsigned Log2Size, - const unsigned IsPCRel, const uint32_t Value2) { - // For notes on bitfield positions and endianness, see: - // https://developer.apple.com/library/mac/documentation/developertools/conceptual/MachORuntime/Reference/reference.html#//apple_ref/doc/uid/20001298-scattered_relocation_entry - MRE.Word0 = ((Addr << 0) | (Type << 24) | (Log2Size << 28) | (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value2; +makeScatteredRelocationInfo(MachO::scattered_relocation_info &MRE, + const uint32_t Addr, const unsigned Type, + const unsigned Log2Size, const unsigned IsPCRel, + const uint32_t Value2) { + MRE.r_scattered = true; + MRE.r_pcrel = IsPCRel; + MRE.r_length = Log2Size; + MRE.r_type = Type; + MRE.r_address = Addr; + MRE.r_value = Value2; } /// Compute fixup offset (address). @@ -223,18 +226,19 @@ bool PPCMachObjectWriter::RecordScatteredRelocation( report_fatal_error("symbol '" + B->getSymbol().getName() + "' can not be undefined in a subtraction expression"); - // FIXME: is Type correct? see include/llvm/Object/MachOFormat.h + // FIXME: is Type correct? see include/llvm/Support/MachO.h Value2 = Writer->getSymbolAddress(B_SD, Layout); FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent()); } // FIXME: does FixedValue get used?? // Relocations are written out in reverse order, so the PAIR comes first. - if (Type == macho::RIT_PPC_SECTDIFF || Type == macho::RIT_PPC_HI16_SECTDIFF || - Type == macho::RIT_PPC_LO16_SECTDIFF || - Type == macho::RIT_PPC_HA16_SECTDIFF || - Type == macho::RIT_PPC_LO14_SECTDIFF || - Type == macho::RIT_PPC_LOCAL_SECTDIFF) { + if (Type == MachO::PPC_RELOC_SECTDIFF || + Type == MachO::PPC_RELOC_HI16_SECTDIFF || + Type == MachO::PPC_RELOC_LO16_SECTDIFF || + Type == MachO::PPC_RELOC_HA16_SECTDIFF || + Type == MachO::PPC_RELOC_LO14_SECTDIFF || + Type == MachO::PPC_RELOC_LOCAL_SECTDIFF) { // X86 had this piece, but ARM does not // If the offset is too large to fit in a scattered relocation, // we're hosed. It's an unfortunate limitation of the MachO format. @@ -253,7 +257,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation( // see PPCMCExpr::EvaluateAsRelocatableImpl() uint32_t other_half = 0; switch (Type) { - case macho::RIT_PPC_LO16_SECTDIFF: + case MachO::PPC_RELOC_LO16_SECTDIFF: other_half = (FixedValue >> 16) & 0xffff; // applyFixupOffset longer extracts the high part because it now assumes // this was already done. @@ -262,12 +266,12 @@ bool PPCMachObjectWriter::RecordScatteredRelocation( // So we need to adjust FixedValue again here. FixedValue &= 0xffff; break; - case macho::RIT_PPC_HA16_SECTDIFF: + case MachO::PPC_RELOC_HA16_SECTDIFF: other_half = FixedValue & 0xffff; FixedValue = ((FixedValue >> 16) + ((FixedValue & 0x8000) ? 1 : 0)) & 0xffff; break; - case macho::RIT_PPC_HI16_SECTDIFF: + case MachO::PPC_RELOC_HI16_SECTDIFF: other_half = FixedValue & 0xffff; FixedValue = (FixedValue >> 16) & 0xffff; break; @@ -276,9 +280,9 @@ bool PPCMachObjectWriter::RecordScatteredRelocation( break; } - macho::RelocationEntry MRE; - makeScatteredRelocationInfo(MRE, other_half, macho::RIT_Pair, Log2Size, - IsPCRel, Value2); + MachO::scattered_relocation_info MRE; + makeScatteredRelocationInfo(MRE, other_half, MachO::GENERIC_RELOC_PAIR, + Log2Size, IsPCRel, Value2); Writer->addRelocation(Fragment->getParent(), MRE); } else { // If the offset is more than 24-bits, it won't fit in a scattered @@ -291,7 +295,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation( if (FixupOffset > 0xffffff) return false; } - macho::RelocationEntry MRE; + MachO::scattered_relocation_info MRE; makeScatteredRelocationInfo(MRE, FixupOffset, Type, Log2Size, IsPCRel, Value); Writer->addRelocation(Fragment->getParent(), MRE); return true; @@ -312,7 +316,8 @@ void PPCMachObjectWriter::RecordPPCRelocation( // relocations. if (Target.getSymB() && // Q: are branch targets ever scattered? - RelocType != macho::RIT_PPC_BR24 && RelocType != macho::RIT_PPC_BR14) { + RelocType != MachO::PPC_RELOC_BR24 && + RelocType != MachO::PPC_RELOC_BR14) { RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, Log2Size, FixedValue); return; @@ -369,8 +374,7 @@ void PPCMachObjectWriter::RecordPPCRelocation( FixedValue -= Writer->getSectionAddress(Fragment->getParent()); } - // struct relocation_info (8 bytes) - macho::RelocationEntry MRE; + MachO::any_relocation_info MRE; makeRelocationInfo(MRE, FixupOffset, Index, IsPCRel, Log2Size, IsExtern, Type); Writer->addRelocation(Fragment->getParent(), MRE); diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 598ddee..fc3bae3 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -19,10 +19,10 @@ #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" -#include "llvm/Object/MachOFormat.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachO.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -395,8 +395,8 @@ public: MCObjectWriter *createObjectWriter(raw_ostream &OS) const { return createX86MachObjectWriter(OS, /*Is64Bit=*/false, - object::mach::CTM_i386, - object::mach::CSX86_ALL); + MachO::CPU_TYPE_I386, + MachO::CPU_SUBTYPE_I386_ALL); } }; @@ -409,8 +409,8 @@ public: MCObjectWriter *createObjectWriter(raw_ostream &OS) const { return createX86MachObjectWriter(OS, /*Is64Bit=*/true, - object::mach::CTM_x86_64, - object::mach::CSX86_ALL); + MachO::CPU_TYPE_X86_64, + MachO::CPU_SUBTYPE_X86_64_ALL); } virtual bool doesSectionRequireSymbols(const MCSection &Section) const { diff --git a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp index 75b5acf..209b1d0 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp @@ -17,7 +17,7 @@ using namespace llvm; using namespace object; -using namespace macho; +using namespace MachO; namespace { class X86_64MachORelocationInfo : public MCRelocationInfo { @@ -33,7 +33,7 @@ public: StringRef SymName; SymI->getName(SymName); uint64_t SymAddr; SymI->getAddress(SymAddr); - RelocationEntry RE = Obj->getRelocation(Rel.getRawDataRefImpl()); + any_relocation_info RE = Obj->getRelocation(Rel.getRawDataRefImpl()); bool isPCRel = Obj->getAnyRelocationPCRel(RE); MCSymbol *Sym = Ctx.GetOrCreateSymbol(SymName); @@ -43,44 +43,44 @@ public: const MCExpr *Expr = 0; switch(RelType) { - case RIT_X86_64_TLV: + case X86_64_RELOC_TLV: Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); break; - case RIT_X86_64_Signed4: + case X86_64_RELOC_SIGNED_4: Expr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Sym, Ctx), MCConstantExpr::Create(4, Ctx), Ctx); break; - case RIT_X86_64_Signed2: + case X86_64_RELOC_SIGNED_2: Expr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Sym, Ctx), MCConstantExpr::Create(2, Ctx), Ctx); break; - case RIT_X86_64_Signed1: + case X86_64_RELOC_SIGNED_1: Expr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Sym, Ctx), MCConstantExpr::Create(1, Ctx), Ctx); break; - case RIT_X86_64_GOTLoad: + case X86_64_RELOC_GOT_LOAD: Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Ctx); break; - case RIT_X86_64_GOT: + case X86_64_RELOC_GOT: Expr = MCSymbolRefExpr::Create(Sym, isPCRel ? MCSymbolRefExpr::VK_GOTPCREL : MCSymbolRefExpr::VK_GOT, Ctx); break; - case RIT_X86_64_Subtractor: + case X86_64_RELOC_SUBTRACTOR: { RelocationRef RelNext; Obj->getRelocationNext(Rel.getRawDataRefImpl(), RelNext); - RelocationEntry RENext = Obj->getRelocation(RelNext.getRawDataRefImpl()); + any_relocation_info RENext = Obj->getRelocation(RelNext.getRawDataRefImpl()); // X86_64_SUBTRACTOR must be followed by a relocation of type - // X86_64_RELOC_UNSIGNED . + // X86_64_RELOC_UNSIGNED. // NOTE: Scattered relocations don't exist on x86_64. unsigned RType = Obj->getAnyRelocationType(RENext); - if (RType != RIT_X86_64_Unsigned) + if (RType != X86_64_RELOC_UNSIGNED) report_fatal_error("Expected X86_64_RELOC_UNSIGNED after " "X86_64_RELOC_SUBTRACTOR."); diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index 6eff224..45437e0 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -16,12 +16,11 @@ #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCValue.h" -#include "llvm/Object/MachOFormat.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" +#include "llvm/Support/MachO.h" using namespace llvm; -using namespace llvm::object; namespace { class X86MachObjectWriter : public MCMachObjectTargetWriter { @@ -132,7 +131,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, if (Target.isAbsolute()) { // constant // SymbolNum of 0 indicates the absolute section. - Type = macho::RIT_X86_64_Unsigned; + Type = MachO::X86_64_RELOC_UNSIGNED; Index = 0; // FIXME: I believe this is broken, I don't think the linker can understand @@ -141,7 +140,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, // is to use an absolute symbol (which we don't support yet). if (IsPCRel) { IsExtern = 1; - Type = macho::RIT_X86_64_Branch; + Type = MachO::X86_64_RELOC_BRANCH; } } else if (Target.getSymB()) { // A - B + constant const MCSymbol *A = &Target.getSymA()->getSymbol(); @@ -193,15 +192,15 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, Index = A_SD.getFragment()->getParent()->getOrdinal() + 1; IsExtern = 0; } - Type = macho::RIT_X86_64_Unsigned; - - macho::RelocationEntry MRE; - MRE.Word0 = FixupOffset; - MRE.Word1 = ((Index << 0) | - (IsPCRel << 24) | - (Log2Size << 25) | - (IsExtern << 27) | - (Type << 28)); + Type = MachO::X86_64_RELOC_UNSIGNED; + + MachO::relocation_info MRE; + MRE.r_address = FixupOffset; + MRE.r_symbolnum = Index; + MRE.r_pcrel = IsPCRel; + MRE.r_length = Log2Size; + MRE.r_extern = IsExtern; + MRE.r_type = Type; Writer->addRelocation(Fragment->getParent(), MRE); if (B_Base) { @@ -212,7 +211,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, Index = B_SD.getFragment()->getParent()->getOrdinal() + 1; IsExtern = 0; } - Type = macho::RIT_X86_64_Subtractor; + Type = MachO::X86_64_RELOC_SUBTRACTOR; } else { const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); MCSymbolData &SD = Asm.getSymbolData(*Symbol); @@ -272,15 +271,15 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, // rewrite the movq to an leaq at link time if the symbol ends up in // the same linkage unit. if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load) - Type = macho::RIT_X86_64_GOTLoad; + Type = MachO::X86_64_RELOC_GOT_LOAD; else - Type = macho::RIT_X86_64_GOT; + Type = MachO::X86_64_RELOC_GOT; } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { - Type = macho::RIT_X86_64_TLV; + Type = MachO::X86_64_RELOC_TLV; } else if (Modifier != MCSymbolRefExpr::VK_None) { report_fatal_error("unsupported symbol modifier in relocation"); } else { - Type = macho::RIT_X86_64_Signed; + Type = MachO::X86_64_RELOC_SIGNED; // The Darwin x86_64 relocation format has a problem where it cannot // encode an address (L + ) which is outside the atom @@ -297,9 +296,9 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, // (the additional bias), but instead appear to just look at the final // offset. switch (-(Target.getConstant() + (1LL << Log2Size))) { - case 1: Type = macho::RIT_X86_64_Signed1; break; - case 2: Type = macho::RIT_X86_64_Signed2; break; - case 4: Type = macho::RIT_X86_64_Signed4; break; + case 1: Type = MachO::X86_64_RELOC_SIGNED_1; break; + case 2: Type = MachO::X86_64_RELOC_SIGNED_2; break; + case 4: Type = MachO::X86_64_RELOC_SIGNED_4; break; } } } else { @@ -307,24 +306,24 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, report_fatal_error("unsupported symbol modifier in branch " "relocation"); - Type = macho::RIT_X86_64_Branch; + Type = MachO::X86_64_RELOC_BRANCH; } } else { if (Modifier == MCSymbolRefExpr::VK_GOT) { - Type = macho::RIT_X86_64_GOT; + Type = MachO::X86_64_RELOC_GOT; } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) { // GOTPCREL is allowed as a modifier on non-PCrel instructions, in which // case all we do is set the PCrel bit in the relocation entry; this is // used with exception handling, for example. The source is required to // include any necessary offset directly. - Type = macho::RIT_X86_64_GOT; + Type = MachO::X86_64_RELOC_GOT; IsPCRel = 1; } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { report_fatal_error("TLVP symbol modifier should have been rip-rel"); } else if (Modifier != MCSymbolRefExpr::VK_None) report_fatal_error("unsupported symbol modifier in relocation"); else - Type = macho::RIT_X86_64_Unsigned; + Type = MachO::X86_64_RELOC_UNSIGNED; } } @@ -332,13 +331,13 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, FixedValue = Value; // struct relocation_info (8 bytes) - macho::RelocationEntry MRE; - MRE.Word0 = FixupOffset; - MRE.Word1 = ((Index << 0) | - (IsPCRel << 24) | - (Log2Size << 25) | - (IsExtern << 27) | - (Type << 28)); + MachO::relocation_info MRE; + MRE.r_address = FixupOffset; + MRE.r_symbolnum = Index; + MRE.r_pcrel = IsPCRel; + MRE.r_length = Log2Size; + MRE.r_extern = IsExtern; + MRE.r_type = Type; Writer->addRelocation(Fragment->getParent(), MRE); } @@ -352,7 +351,7 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, uint64_t &FixedValue) { uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); - unsigned Type = macho::RIT_Vanilla; + unsigned Type = MachO::GENERIC_RELOC_VANILLA; // See . const MCSymbol *A = &Target.getSymA()->getSymbol(); @@ -379,15 +378,16 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, // Note that there is no longer any semantic difference between these two // relocation types from the linkers point of view, this is done solely for // pedantic compatibility with 'as'. - Type = A_SD->isExternal() ? (unsigned)macho::RIT_Difference : - (unsigned)macho::RIT_Generic_LocalDifference; + Type = A_SD->isExternal() ? (unsigned)MachO::GENERIC_RELOC_SECTDIFF : + (unsigned)MachO::GENERIC_RELOC_LOCAL_SECTDIFF; Value2 = Writer->getSymbolAddress(B_SD, Layout); FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent()); } + MachO::scattered_relocation_info MRE; // Relocations are written out in reverse order, so the PAIR comes first. - if (Type == macho::RIT_Difference || - Type == macho::RIT_Generic_LocalDifference) { + if (Type == MachO::GENERIC_RELOC_SECTDIFF || + Type == MachO::GENERIC_RELOC_LOCAL_SECTDIFF) { // If the offset is too large to fit in a scattered relocation, // we're hosed. It's an unfortunate limitation of the MachO format. if (FixupOffset > 0xffffff) { @@ -401,13 +401,12 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, llvm_unreachable("fatal error returned?!"); } - macho::RelocationEntry MRE; - MRE.Word0 = ((0 << 0) | - (macho::RIT_Pair << 24) | - (Log2Size << 28) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value2; + MRE.r_address = 0; + MRE.r_type = MachO::GENERIC_RELOC_PAIR; + MRE.r_length = Log2Size; + MRE.r_pcrel = IsPCRel; + MRE.r_scattered = 1; + MRE.r_value = Value2; Writer->addRelocation(Fragment->getParent(), MRE); } else { // If the offset is more than 24-bits, it won't fit in a scattered @@ -421,13 +420,12 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, return false; } - macho::RelocationEntry MRE; - MRE.Word0 = ((FixupOffset << 0) | - (Type << 24) | - (Log2Size << 28) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value; + MRE.r_address = FixupOffset; + MRE.r_type = Type; + MRE.r_length = Log2Size; + MRE.r_pcrel = IsPCRel; + MRE.r_scattered = 1; + MRE.r_value = Value; Writer->addRelocation(Fragment->getParent(), MRE); return true; } @@ -469,13 +467,13 @@ void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer, } // struct relocation_info (8 bytes) - macho::RelocationEntry MRE; - MRE.Word0 = Value; - MRE.Word1 = ((Index << 0) | - (IsPCRel << 24) | - (Log2Size << 25) | - (1 << 27) | // Extern - (macho::RIT_Generic_TLV << 28)); // Type + MachO::relocation_info MRE; + MRE.r_address = Value; + MRE.r_symbolnum = Index; + MRE.r_pcrel = IsPCRel; + MRE.r_length = Log2Size; + MRE.r_extern = 1; + MRE.r_type = MachO::GENERIC_RELOC_TLV; Writer->addRelocation(Fragment->getParent(), MRE); } @@ -535,7 +533,7 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, // // FIXME: Currently, these are never generated (see code below). I cannot // find a case where they are actually emitted. - Type = macho::RIT_Vanilla; + Type = MachO::GENERIC_RELOC_VANILLA; } else { // Resolve constant variables. if (SD->getSymbol().isVariable()) { @@ -566,17 +564,17 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, if (IsPCRel) FixedValue -= Writer->getSectionAddress(Fragment->getParent()); - Type = macho::RIT_Vanilla; + Type = MachO::GENERIC_RELOC_VANILLA; } // struct relocation_info (8 bytes) - macho::RelocationEntry MRE; - MRE.Word0 = FixupOffset; - MRE.Word1 = ((Index << 0) | - (IsPCRel << 24) | - (Log2Size << 25) | - (IsExtern << 27) | - (Type << 28)); + MachO::relocation_info MRE; + MRE.r_address = FixupOffset; + MRE.r_symbolnum = Index; + MRE.r_pcrel = IsPCRel; + MRE.r_length = Log2Size; + MRE.r_extern = IsExtern; + MRE.r_type = Type; Writer->addRelocation(Fragment->getParent(), MRE); } -- cgit v1.1 From f69a29b23a116a3520f185054290c445abf9aa62 Mon Sep 17 00:00:00 2001 From: Charles Davis Date: Tue, 27 Aug 2013 05:38:30 +0000 Subject: Revert "Fix the build broken by r189315." and "Move everything depending on Object/MachOFormat.h over to Support/MachO.h." This reverts commits r189319 and r189315. r189315 broke some tests on what I believe are big-endian platforms. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189321 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp | 32 ++--- .../ARM/MCTargetDesc/ARMMachObjectWriter.cpp | 128 ++++++++++---------- lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp | 6 +- .../PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp | 90 +++++++------- lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 10 +- .../X86/MCTargetDesc/X86MachORelocationInfo.cpp | 24 ++-- .../X86/MCTargetDesc/X86MachObjectWriter.cpp | 132 +++++++++++---------- 7 files changed, 211 insertions(+), 211 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 828442f..b1e25d8 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -25,9 +25,9 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCValue.h" +#include "llvm/Object/MachOFormat.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -640,16 +640,16 @@ public: // FIXME: This should be in a separate file. class DarwinARMAsmBackend : public ARMAsmBackend { public: - const MachO::CPUSubTypeARM Subtype; + const object::mach::CPUSubtypeARM Subtype; DarwinARMAsmBackend(const Target &T, const StringRef TT, - MachO::CPUSubTypeARM st) + object::mach::CPUSubtypeARM st) : ARMAsmBackend(T, TT), Subtype(st) { HasDataInCodeSupport = true; } MCObjectWriter *createObjectWriter(raw_ostream &OS) const { return createARMMachObjectWriter(OS, /*Is64Bit=*/false, - MachO::CPU_TYPE_ARM, + object::mach::CTM_ARM, Subtype); } @@ -664,18 +664,18 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT, StringRef Triple TheTriple(TT); if (TheTriple.isOSDarwin()) { - MachO::CPUSubTypeARM CS = - StringSwitch(TheTriple.getArchName()) - .Cases("armv4t", "thumbv4t", MachO::CPU_SUBTYPE_ARM_V4T) - .Cases("armv5e", "thumbv5e", MachO::CPU_SUBTYPE_ARM_V5TEJ) - .Cases("armv6", "thumbv6", MachO::CPU_SUBTYPE_ARM_V6) - .Cases("armv6m", "thumbv6m", MachO::CPU_SUBTYPE_ARM_V6M) - .Cases("armv7em", "thumbv7em", MachO::CPU_SUBTYPE_ARM_V7EM) - .Cases("armv7f", "thumbv7f", MachO::CPU_SUBTYPE_ARM_V7F) - .Cases("armv7k", "thumbv7k", MachO::CPU_SUBTYPE_ARM_V7K) - .Cases("armv7m", "thumbv7m", MachO::CPU_SUBTYPE_ARM_V7M) - .Cases("armv7s", "thumbv7s", MachO::CPU_SUBTYPE_ARM_V7S) - .Default(MachO::CPU_SUBTYPE_ARM_V7); + object::mach::CPUSubtypeARM CS = + StringSwitch(TheTriple.getArchName()) + .Cases("armv4t", "thumbv4t", object::mach::CSARM_V4T) + .Cases("armv5e", "thumbv5e",object::mach::CSARM_V5TEJ) + .Cases("armv6", "thumbv6", object::mach::CSARM_V6) + .Cases("armv6m", "thumbv6m", object::mach::CSARM_V6M) + .Cases("armv7em", "thumbv7em", object::mach::CSARM_V7EM) + .Cases("armv7f", "thumbv7f", object::mach::CSARM_V7F) + .Cases("armv7k", "thumbv7k", object::mach::CSARM_V7K) + .Cases("armv7m", "thumbv7m", object::mach::CSARM_V7M) + .Cases("armv7s", "thumbv7s", object::mach::CSARM_V7S) + .Default(object::mach::CSARM_V7); return new DarwinARMAsmBackend(T, TT, CS); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index ee43f5f..b9efe74 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -20,9 +20,10 @@ #include "llvm/MC/MCMachOSymbolFlags.h" #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCValue.h" +#include "llvm/Object/MachOFormat.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" using namespace llvm; +using namespace llvm::object; namespace { class ARMMachObjectWriter : public MCMachObjectTargetWriter { @@ -62,7 +63,7 @@ public: static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, unsigned &Log2Size) { - RelocType = unsigned(MachO::ARM_RELOC_VANILLA); + RelocType = unsigned(macho::RIT_Vanilla); Log2Size = ~0U; switch (Kind) { @@ -91,21 +92,21 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, case ARM::fixup_arm_uncondbl: case ARM::fixup_arm_condbl: case ARM::fixup_arm_blx: - RelocType = unsigned(MachO::ARM_RELOC_BR24); + RelocType = unsigned(macho::RIT_ARM_Branch24Bit); // Report as 'long', even though that is not quite accurate. Log2Size = llvm::Log2_32(4); return true; // Handle Thumb branches. case ARM::fixup_arm_thumb_br: - RelocType = unsigned(MachO::ARM_THUMB_RELOC_BR22); + RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit); Log2Size = llvm::Log2_32(2); return true; case ARM::fixup_t2_uncondbranch: case ARM::fixup_arm_thumb_bl: case ARM::fixup_arm_thumb_blx: - RelocType = unsigned(MachO::ARM_THUMB_RELOC_BR22); + RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit); Log2Size = llvm::Log2_32(4); return true; @@ -120,23 +121,23 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, // 1 - thumb instructions case ARM::fixup_arm_movt_hi16: case ARM::fixup_arm_movt_hi16_pcrel: - RelocType = unsigned(MachO::ARM_RELOC_HALF); + RelocType = unsigned(macho::RIT_ARM_Half); Log2Size = 1; return true; case ARM::fixup_t2_movt_hi16: case ARM::fixup_t2_movt_hi16_pcrel: - RelocType = unsigned(MachO::ARM_RELOC_HALF); + RelocType = unsigned(macho::RIT_ARM_Half); Log2Size = 3; return true; case ARM::fixup_arm_movw_lo16: case ARM::fixup_arm_movw_lo16_pcrel: - RelocType = unsigned(MachO::ARM_RELOC_HALF); + RelocType = unsigned(macho::RIT_ARM_Half); Log2Size = 0; return true; case ARM::fixup_t2_movw_lo16: case ARM::fixup_t2_movw_lo16_pcrel: - RelocType = unsigned(MachO::ARM_RELOC_HALF); + RelocType = unsigned(macho::RIT_ARM_Half); Log2Size = 2; return true; } @@ -152,7 +153,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, uint64_t &FixedValue) { uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); - unsigned Type = MachO::ARM_RELOC_HALF; + unsigned Type = macho::RIT_ARM_Half; // See . const MCSymbol *A = &Target.getSymA()->getSymbol(); @@ -178,7 +179,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, "' can not be undefined in a subtraction expression"); // Select the appropriate difference relocation type. - Type = MachO::ARM_RELOC_HALF_SECTDIFF; + Type = macho::RIT_ARM_HalfDifference; Value2 = Writer->getSymbolAddress(B_SD, Layout); FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent()); } @@ -222,28 +223,29 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, break; } - MachO::scattered_relocation_info MRE; - if (Type == MachO::ARM_RELOC_HALF_SECTDIFF) { + if (Type == macho::RIT_ARM_HalfDifference) { uint32_t OtherHalf = MovtBit ? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16); - MRE.r_address = OtherHalf; - MRE.r_type = MachO::ARM_RELOC_PAIR; - MRE.r_length = ((MovtBit << 0) | - (ThumbBit << 1)); - MRE.r_pcrel = IsPCRel; - MRE.r_scattered = 1; - MRE.r_value = Value2; + macho::RelocationEntry MRE; + MRE.Word0 = ((OtherHalf << 0) | + (macho::RIT_Pair << 24) | + (MovtBit << 28) | + (ThumbBit << 29) | + (IsPCRel << 30) | + macho::RF_Scattered); + MRE.Word1 = Value2; Writer->addRelocation(Fragment->getParent(), MRE); } - MRE.r_address = FixupOffset; - MRE.r_type = Type; - MRE.r_length = ((MovtBit << 0) | - (ThumbBit << 1)); - MRE.r_pcrel = IsPCRel; - MRE.r_scattered = 1; - MRE.r_value = Value; + macho::RelocationEntry MRE; + MRE.Word0 = ((FixupOffset << 0) | + (Type << 24) | + (MovtBit << 28) | + (ThumbBit << 29) | + (IsPCRel << 30) | + macho::RF_Scattered); + MRE.Word1 = Value; Writer->addRelocation(Fragment->getParent(), MRE); } @@ -257,7 +259,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, uint64_t &FixedValue) { uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); - unsigned Type = MachO::ARM_RELOC_VANILLA; + unsigned Type = macho::RIT_Vanilla; // See . const MCSymbol *A = &Target.getSymA()->getSymbol(); @@ -282,30 +284,31 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, "' can not be undefined in a subtraction expression"); // Select the appropriate difference relocation type. - Type = MachO::ARM_RELOC_SECTDIFF; + Type = macho::RIT_Difference; Value2 = Writer->getSymbolAddress(B_SD, Layout); FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent()); } - MachO::scattered_relocation_info MRE; // Relocations are written out in reverse order, so the PAIR comes first. - if (Type == MachO::ARM_RELOC_SECTDIFF || - Type == MachO::ARM_RELOC_LOCAL_SECTDIFF) { - MRE.r_address = 0; - MRE.r_type = MachO::ARM_RELOC_PAIR; - MRE.r_length = Log2Size; - MRE.r_pcrel = IsPCRel; - MRE.r_scattered = 1; - MRE.r_value = Value2; + if (Type == macho::RIT_Difference || + Type == macho::RIT_Generic_LocalDifference) { + macho::RelocationEntry MRE; + MRE.Word0 = ((0 << 0) | + (macho::RIT_Pair << 24) | + (Log2Size << 28) | + (IsPCRel << 30) | + macho::RF_Scattered); + MRE.Word1 = Value2; Writer->addRelocation(Fragment->getParent(), MRE); } - MRE.r_address = FixupOffset; - MRE.r_type = Type; - MRE.r_length = Log2Size; - MRE.r_pcrel = IsPCRel; - MRE.r_scattered = 1; - MRE.r_value = Value; + macho::RelocationEntry MRE; + MRE.Word0 = ((FixupOffset << 0) | + (Type << 24) | + (Log2Size << 28) | + (IsPCRel << 30) | + macho::RF_Scattered); + MRE.Word1 = Value; Writer->addRelocation(Fragment->getParent(), MRE); } @@ -323,13 +326,13 @@ bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer, switch (RelocType) { default: return false; - case MachO::ARM_RELOC_BR24: + case macho::RIT_ARM_Branch24Bit: // PC pre-adjustment of 8 for these instructions. Value -= 8; // ARM BL/BLX has a 25-bit offset. Range = 0x1ffffff; break; - case MachO::ARM_THUMB_RELOC_BR22: + case macho::RIT_ARM_ThumbBranch22Bit: // PC pre-adjustment of 4 for these instructions. Value -= 4; // Thumb BL/BLX has a 24-bit offset. @@ -358,7 +361,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, uint64_t &FixedValue) { unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); unsigned Log2Size; - unsigned RelocType = MachO::ARM_RELOC_VANILLA; + unsigned RelocType = macho::RIT_Vanilla; if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) // If we failed to get fixup kind info, it's because there's no legal // relocation type for the fixup kind. This happens when it's a fixup that's @@ -371,7 +374,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, // scattered relocation entry. Differences always require scattered // relocations. if (Target.getSymB()) { - if (RelocType == MachO::ARM_RELOC_HALF) + if (RelocType == macho::RIT_ARM_Half) return RecordARMScatteredHalfRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, FixedValue); return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, @@ -389,7 +392,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, // // Is this right for ARM? uint32_t Offset = Target.getConstant(); - if (IsPCRel && RelocType == MachO::ARM_RELOC_VANILLA) + if (IsPCRel && RelocType == macho::RIT_Vanilla) Offset += 1 << Log2Size; if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD)) return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, @@ -442,17 +445,17 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, } // struct relocation_info (8 bytes) - MachO::relocation_info MRE; - MRE.r_address = FixupOffset; - MRE.r_symbolnum = Index; - MRE.r_pcrel = IsPCRel; - MRE.r_length = Log2Size; - MRE.r_extern = IsExtern; - MRE.r_type = Type; + macho::RelocationEntry MRE; + MRE.Word0 = FixupOffset; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); // Even when it's not a scattered relocation, movw/movt always uses // a PAIR relocation. - if (Type == MachO::ARM_RELOC_HALF) { + if (Type == macho::RIT_ARM_Half) { // The other-half value only gets populated for the movt and movw // relocation entries. uint32_t Value = 0; @@ -471,12 +474,11 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, Value = FixedValue & 0xffff; break; } - MachO::relocation_info MREPair; - MREPair.r_address = Value; - MREPair.r_symbolnum = 0xffffff; - MREPair.r_length = Log2Size; - MREPair.r_pcrel = MREPair.r_extern = 0; - MREPair.r_type = MachO::ARM_RELOC_PAIR; + macho::RelocationEntry MREPair; + MREPair.Word0 = Value; + MREPair.Word1 = ((0xffffff) | + (Log2Size << 25) | + (macho::RIT_Pair << 28)); Writer->addRelocation(Fragment->getParent(), MREPair); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index ffeac43..dd61954 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -16,9 +16,9 @@ #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCValue.h" +#include "llvm/Object/MachOFormat.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; @@ -164,8 +164,8 @@ namespace { return createPPCMachObjectWriter( OS, /*Is64Bit=*/is64, - (is64 ? MachO::CPU_TYPE_POWERPC64 : MachO::CPU_TYPE_POWERPC), - MachO::CPU_SUBTYPE_POWERPC_ALL); + (is64 ? object::mach::CTM_PowerPC64 : object::mach::CTM_PowerPC), + object::mach::CSPPC_ALL); } virtual bool doesSectionRequireSymbols(const MCSection &Section) const { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp index aea3c5b..fc9b892 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp @@ -16,11 +16,12 @@ #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCValue.h" +#include "llvm/Object/MachOFormat.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" -#include "llvm/Support/MachO.h" using namespace llvm; +using namespace llvm::object; namespace { class PPCMachObjectWriter : public MCMachObjectTargetWriter { @@ -89,29 +90,29 @@ static unsigned getRelocType(const MCValue &Target, Target.isAbsolute() ? MCSymbolRefExpr::VK_None : Target.getSymA()->getKind(); // determine the type of the relocation - unsigned Type = MachO::GENERIC_RELOC_VANILLA; + unsigned Type = macho::RIT_Vanilla; if (IsPCRel) { // relative to PC switch ((unsigned)FixupKind) { default: report_fatal_error("Unimplemented fixup kind (relative)"); case PPC::fixup_ppc_br24: - Type = MachO::PPC_RELOC_BR24; // R_PPC_REL24 + Type = macho::RIT_PPC_BR24; // R_PPC_REL24 break; case PPC::fixup_ppc_brcond14: - Type = MachO::PPC_RELOC_BR14; + Type = macho::RIT_PPC_BR14; break; case PPC::fixup_ppc_half16: switch (Modifier) { default: llvm_unreachable("Unsupported modifier for half16 fixup"); case MCSymbolRefExpr::VK_PPC_HA: - Type = MachO::PPC_RELOC_HA16; + Type = macho::RIT_PPC_HA16; break; case MCSymbolRefExpr::VK_PPC_LO: - Type = MachO::PPC_RELOC_LO16; + Type = macho::RIT_PPC_LO16; break; case MCSymbolRefExpr::VK_PPC_HI: - Type = MachO::PPC_RELOC_HI16; + Type = macho::RIT_PPC_HI16; break; } break; @@ -125,13 +126,13 @@ static unsigned getRelocType(const MCValue &Target, default: llvm_unreachable("Unsupported modifier for half16 fixup"); case MCSymbolRefExpr::VK_PPC_HA: - Type = MachO::PPC_RELOC_HA16_SECTDIFF; + Type = macho::RIT_PPC_HA16_SECTDIFF; break; case MCSymbolRefExpr::VK_PPC_LO: - Type = MachO::PPC_RELOC_LO16_SECTDIFF; + Type = macho::RIT_PPC_LO16_SECTDIFF; break; case MCSymbolRefExpr::VK_PPC_HI: - Type = MachO::PPC_RELOC_HI16_SECTDIFF; + Type = macho::RIT_PPC_HI16_SECTDIFF; break; } break; @@ -144,34 +145,30 @@ static unsigned getRelocType(const MCValue &Target, return Type; } -static void makeRelocationInfo(MachO::any_relocation_info &MRE, +static void makeRelocationInfo(macho::RelocationEntry &MRE, const uint32_t FixupOffset, const uint32_t Index, const unsigned IsPCRel, const unsigned Log2Size, const unsigned IsExtern, const unsigned Type) { - MRE.r_word0 = FixupOffset; + MRE.Word0 = FixupOffset; // The bitfield offsets that work (as determined by trial-and-error) // are different than what is documented in the mach-o manuals. - // This appears to be an endianness issue; reversing the order of the - // documented bitfields in fixes this (but - // breaks x86/ARM assembly). - MRE.r_word1 = ((Index << 8) | // was << 0 - (IsPCRel << 7) | // was << 24 - (Log2Size << 5) | // was << 25 - (IsExtern << 4) | // was << 27 - (Type << 0)); // was << 28 + // Is this an endianness issue w/ PPC? + MRE.Word1 = ((Index << 8) | // was << 0 + (IsPCRel << 7) | // was << 24 + (Log2Size << 5) | // was << 25 + (IsExtern << 4) | // was << 27 + (Type << 0)); // was << 28 } static void -makeScatteredRelocationInfo(MachO::scattered_relocation_info &MRE, - const uint32_t Addr, const unsigned Type, - const unsigned Log2Size, const unsigned IsPCRel, - const uint32_t Value2) { - MRE.r_scattered = true; - MRE.r_pcrel = IsPCRel; - MRE.r_length = Log2Size; - MRE.r_type = Type; - MRE.r_address = Addr; - MRE.r_value = Value2; +makeScatteredRelocationInfo(macho::RelocationEntry &MRE, const uint32_t Addr, + const unsigned Type, const unsigned Log2Size, + const unsigned IsPCRel, const uint32_t Value2) { + // For notes on bitfield positions and endianness, see: + // https://developer.apple.com/library/mac/documentation/developertools/conceptual/MachORuntime/Reference/reference.html#//apple_ref/doc/uid/20001298-scattered_relocation_entry + MRE.Word0 = ((Addr << 0) | (Type << 24) | (Log2Size << 28) | (IsPCRel << 30) | + macho::RF_Scattered); + MRE.Word1 = Value2; } /// Compute fixup offset (address). @@ -226,19 +223,18 @@ bool PPCMachObjectWriter::RecordScatteredRelocation( report_fatal_error("symbol '" + B->getSymbol().getName() + "' can not be undefined in a subtraction expression"); - // FIXME: is Type correct? see include/llvm/Support/MachO.h + // FIXME: is Type correct? see include/llvm/Object/MachOFormat.h Value2 = Writer->getSymbolAddress(B_SD, Layout); FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent()); } // FIXME: does FixedValue get used?? // Relocations are written out in reverse order, so the PAIR comes first. - if (Type == MachO::PPC_RELOC_SECTDIFF || - Type == MachO::PPC_RELOC_HI16_SECTDIFF || - Type == MachO::PPC_RELOC_LO16_SECTDIFF || - Type == MachO::PPC_RELOC_HA16_SECTDIFF || - Type == MachO::PPC_RELOC_LO14_SECTDIFF || - Type == MachO::PPC_RELOC_LOCAL_SECTDIFF) { + if (Type == macho::RIT_PPC_SECTDIFF || Type == macho::RIT_PPC_HI16_SECTDIFF || + Type == macho::RIT_PPC_LO16_SECTDIFF || + Type == macho::RIT_PPC_HA16_SECTDIFF || + Type == macho::RIT_PPC_LO14_SECTDIFF || + Type == macho::RIT_PPC_LOCAL_SECTDIFF) { // X86 had this piece, but ARM does not // If the offset is too large to fit in a scattered relocation, // we're hosed. It's an unfortunate limitation of the MachO format. @@ -257,7 +253,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation( // see PPCMCExpr::EvaluateAsRelocatableImpl() uint32_t other_half = 0; switch (Type) { - case MachO::PPC_RELOC_LO16_SECTDIFF: + case macho::RIT_PPC_LO16_SECTDIFF: other_half = (FixedValue >> 16) & 0xffff; // applyFixupOffset longer extracts the high part because it now assumes // this was already done. @@ -266,12 +262,12 @@ bool PPCMachObjectWriter::RecordScatteredRelocation( // So we need to adjust FixedValue again here. FixedValue &= 0xffff; break; - case MachO::PPC_RELOC_HA16_SECTDIFF: + case macho::RIT_PPC_HA16_SECTDIFF: other_half = FixedValue & 0xffff; FixedValue = ((FixedValue >> 16) + ((FixedValue & 0x8000) ? 1 : 0)) & 0xffff; break; - case MachO::PPC_RELOC_HI16_SECTDIFF: + case macho::RIT_PPC_HI16_SECTDIFF: other_half = FixedValue & 0xffff; FixedValue = (FixedValue >> 16) & 0xffff; break; @@ -280,9 +276,9 @@ bool PPCMachObjectWriter::RecordScatteredRelocation( break; } - MachO::scattered_relocation_info MRE; - makeScatteredRelocationInfo(MRE, other_half, MachO::GENERIC_RELOC_PAIR, - Log2Size, IsPCRel, Value2); + macho::RelocationEntry MRE; + makeScatteredRelocationInfo(MRE, other_half, macho::RIT_Pair, Log2Size, + IsPCRel, Value2); Writer->addRelocation(Fragment->getParent(), MRE); } else { // If the offset is more than 24-bits, it won't fit in a scattered @@ -295,7 +291,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation( if (FixupOffset > 0xffffff) return false; } - MachO::scattered_relocation_info MRE; + macho::RelocationEntry MRE; makeScatteredRelocationInfo(MRE, FixupOffset, Type, Log2Size, IsPCRel, Value); Writer->addRelocation(Fragment->getParent(), MRE); return true; @@ -316,8 +312,7 @@ void PPCMachObjectWriter::RecordPPCRelocation( // relocations. if (Target.getSymB() && // Q: are branch targets ever scattered? - RelocType != MachO::PPC_RELOC_BR24 && - RelocType != MachO::PPC_RELOC_BR14) { + RelocType != macho::RIT_PPC_BR24 && RelocType != macho::RIT_PPC_BR14) { RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, Log2Size, FixedValue); return; @@ -374,7 +369,8 @@ void PPCMachObjectWriter::RecordPPCRelocation( FixedValue -= Writer->getSectionAddress(Fragment->getParent()); } - MachO::any_relocation_info MRE; + // struct relocation_info (8 bytes) + macho::RelocationEntry MRE; makeRelocationInfo(MRE, FixupOffset, Index, IsPCRel, Log2Size, IsExtern, Type); Writer->addRelocation(Fragment->getParent(), MRE); diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index fc3bae3..598ddee 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -19,10 +19,10 @@ #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" +#include "llvm/Object/MachOFormat.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -395,8 +395,8 @@ public: MCObjectWriter *createObjectWriter(raw_ostream &OS) const { return createX86MachObjectWriter(OS, /*Is64Bit=*/false, - MachO::CPU_TYPE_I386, - MachO::CPU_SUBTYPE_I386_ALL); + object::mach::CTM_i386, + object::mach::CSX86_ALL); } }; @@ -409,8 +409,8 @@ public: MCObjectWriter *createObjectWriter(raw_ostream &OS) const { return createX86MachObjectWriter(OS, /*Is64Bit=*/true, - MachO::CPU_TYPE_X86_64, - MachO::CPU_SUBTYPE_X86_64_ALL); + object::mach::CTM_x86_64, + object::mach::CSX86_ALL); } virtual bool doesSectionRequireSymbols(const MCSection &Section) const { diff --git a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp index 209b1d0..75b5acf 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp @@ -17,7 +17,7 @@ using namespace llvm; using namespace object; -using namespace MachO; +using namespace macho; namespace { class X86_64MachORelocationInfo : public MCRelocationInfo { @@ -33,7 +33,7 @@ public: StringRef SymName; SymI->getName(SymName); uint64_t SymAddr; SymI->getAddress(SymAddr); - any_relocation_info RE = Obj->getRelocation(Rel.getRawDataRefImpl()); + RelocationEntry RE = Obj->getRelocation(Rel.getRawDataRefImpl()); bool isPCRel = Obj->getAnyRelocationPCRel(RE); MCSymbol *Sym = Ctx.GetOrCreateSymbol(SymName); @@ -43,44 +43,44 @@ public: const MCExpr *Expr = 0; switch(RelType) { - case X86_64_RELOC_TLV: + case RIT_X86_64_TLV: Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); break; - case X86_64_RELOC_SIGNED_4: + case RIT_X86_64_Signed4: Expr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Sym, Ctx), MCConstantExpr::Create(4, Ctx), Ctx); break; - case X86_64_RELOC_SIGNED_2: + case RIT_X86_64_Signed2: Expr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Sym, Ctx), MCConstantExpr::Create(2, Ctx), Ctx); break; - case X86_64_RELOC_SIGNED_1: + case RIT_X86_64_Signed1: Expr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Sym, Ctx), MCConstantExpr::Create(1, Ctx), Ctx); break; - case X86_64_RELOC_GOT_LOAD: + case RIT_X86_64_GOTLoad: Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Ctx); break; - case X86_64_RELOC_GOT: + case RIT_X86_64_GOT: Expr = MCSymbolRefExpr::Create(Sym, isPCRel ? MCSymbolRefExpr::VK_GOTPCREL : MCSymbolRefExpr::VK_GOT, Ctx); break; - case X86_64_RELOC_SUBTRACTOR: + case RIT_X86_64_Subtractor: { RelocationRef RelNext; Obj->getRelocationNext(Rel.getRawDataRefImpl(), RelNext); - any_relocation_info RENext = Obj->getRelocation(RelNext.getRawDataRefImpl()); + RelocationEntry RENext = Obj->getRelocation(RelNext.getRawDataRefImpl()); // X86_64_SUBTRACTOR must be followed by a relocation of type - // X86_64_RELOC_UNSIGNED. + // X86_64_RELOC_UNSIGNED . // NOTE: Scattered relocations don't exist on x86_64. unsigned RType = Obj->getAnyRelocationType(RENext); - if (RType != X86_64_RELOC_UNSIGNED) + if (RType != RIT_X86_64_Unsigned) report_fatal_error("Expected X86_64_RELOC_UNSIGNED after " "X86_64_RELOC_SUBTRACTOR."); diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index 45437e0..6eff224 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -16,11 +16,12 @@ #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCValue.h" +#include "llvm/Object/MachOFormat.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" -#include "llvm/Support/MachO.h" using namespace llvm; +using namespace llvm::object; namespace { class X86MachObjectWriter : public MCMachObjectTargetWriter { @@ -131,7 +132,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, if (Target.isAbsolute()) { // constant // SymbolNum of 0 indicates the absolute section. - Type = MachO::X86_64_RELOC_UNSIGNED; + Type = macho::RIT_X86_64_Unsigned; Index = 0; // FIXME: I believe this is broken, I don't think the linker can understand @@ -140,7 +141,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, // is to use an absolute symbol (which we don't support yet). if (IsPCRel) { IsExtern = 1; - Type = MachO::X86_64_RELOC_BRANCH; + Type = macho::RIT_X86_64_Branch; } } else if (Target.getSymB()) { // A - B + constant const MCSymbol *A = &Target.getSymA()->getSymbol(); @@ -192,15 +193,15 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, Index = A_SD.getFragment()->getParent()->getOrdinal() + 1; IsExtern = 0; } - Type = MachO::X86_64_RELOC_UNSIGNED; - - MachO::relocation_info MRE; - MRE.r_address = FixupOffset; - MRE.r_symbolnum = Index; - MRE.r_pcrel = IsPCRel; - MRE.r_length = Log2Size; - MRE.r_extern = IsExtern; - MRE.r_type = Type; + Type = macho::RIT_X86_64_Unsigned; + + macho::RelocationEntry MRE; + MRE.Word0 = FixupOffset; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); Writer->addRelocation(Fragment->getParent(), MRE); if (B_Base) { @@ -211,7 +212,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, Index = B_SD.getFragment()->getParent()->getOrdinal() + 1; IsExtern = 0; } - Type = MachO::X86_64_RELOC_SUBTRACTOR; + Type = macho::RIT_X86_64_Subtractor; } else { const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); MCSymbolData &SD = Asm.getSymbolData(*Symbol); @@ -271,15 +272,15 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, // rewrite the movq to an leaq at link time if the symbol ends up in // the same linkage unit. if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load) - Type = MachO::X86_64_RELOC_GOT_LOAD; + Type = macho::RIT_X86_64_GOTLoad; else - Type = MachO::X86_64_RELOC_GOT; + Type = macho::RIT_X86_64_GOT; } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { - Type = MachO::X86_64_RELOC_TLV; + Type = macho::RIT_X86_64_TLV; } else if (Modifier != MCSymbolRefExpr::VK_None) { report_fatal_error("unsupported symbol modifier in relocation"); } else { - Type = MachO::X86_64_RELOC_SIGNED; + Type = macho::RIT_X86_64_Signed; // The Darwin x86_64 relocation format has a problem where it cannot // encode an address (L + ) which is outside the atom @@ -296,9 +297,9 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, // (the additional bias), but instead appear to just look at the final // offset. switch (-(Target.getConstant() + (1LL << Log2Size))) { - case 1: Type = MachO::X86_64_RELOC_SIGNED_1; break; - case 2: Type = MachO::X86_64_RELOC_SIGNED_2; break; - case 4: Type = MachO::X86_64_RELOC_SIGNED_4; break; + case 1: Type = macho::RIT_X86_64_Signed1; break; + case 2: Type = macho::RIT_X86_64_Signed2; break; + case 4: Type = macho::RIT_X86_64_Signed4; break; } } } else { @@ -306,24 +307,24 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, report_fatal_error("unsupported symbol modifier in branch " "relocation"); - Type = MachO::X86_64_RELOC_BRANCH; + Type = macho::RIT_X86_64_Branch; } } else { if (Modifier == MCSymbolRefExpr::VK_GOT) { - Type = MachO::X86_64_RELOC_GOT; + Type = macho::RIT_X86_64_GOT; } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) { // GOTPCREL is allowed as a modifier on non-PCrel instructions, in which // case all we do is set the PCrel bit in the relocation entry; this is // used with exception handling, for example. The source is required to // include any necessary offset directly. - Type = MachO::X86_64_RELOC_GOT; + Type = macho::RIT_X86_64_GOT; IsPCRel = 1; } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { report_fatal_error("TLVP symbol modifier should have been rip-rel"); } else if (Modifier != MCSymbolRefExpr::VK_None) report_fatal_error("unsupported symbol modifier in relocation"); else - Type = MachO::X86_64_RELOC_UNSIGNED; + Type = macho::RIT_X86_64_Unsigned; } } @@ -331,13 +332,13 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, FixedValue = Value; // struct relocation_info (8 bytes) - MachO::relocation_info MRE; - MRE.r_address = FixupOffset; - MRE.r_symbolnum = Index; - MRE.r_pcrel = IsPCRel; - MRE.r_length = Log2Size; - MRE.r_extern = IsExtern; - MRE.r_type = Type; + macho::RelocationEntry MRE; + MRE.Word0 = FixupOffset; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); Writer->addRelocation(Fragment->getParent(), MRE); } @@ -351,7 +352,7 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, uint64_t &FixedValue) { uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); - unsigned Type = MachO::GENERIC_RELOC_VANILLA; + unsigned Type = macho::RIT_Vanilla; // See . const MCSymbol *A = &Target.getSymA()->getSymbol(); @@ -378,16 +379,15 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, // Note that there is no longer any semantic difference between these two // relocation types from the linkers point of view, this is done solely for // pedantic compatibility with 'as'. - Type = A_SD->isExternal() ? (unsigned)MachO::GENERIC_RELOC_SECTDIFF : - (unsigned)MachO::GENERIC_RELOC_LOCAL_SECTDIFF; + Type = A_SD->isExternal() ? (unsigned)macho::RIT_Difference : + (unsigned)macho::RIT_Generic_LocalDifference; Value2 = Writer->getSymbolAddress(B_SD, Layout); FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent()); } - MachO::scattered_relocation_info MRE; // Relocations are written out in reverse order, so the PAIR comes first. - if (Type == MachO::GENERIC_RELOC_SECTDIFF || - Type == MachO::GENERIC_RELOC_LOCAL_SECTDIFF) { + if (Type == macho::RIT_Difference || + Type == macho::RIT_Generic_LocalDifference) { // If the offset is too large to fit in a scattered relocation, // we're hosed. It's an unfortunate limitation of the MachO format. if (FixupOffset > 0xffffff) { @@ -401,12 +401,13 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, llvm_unreachable("fatal error returned?!"); } - MRE.r_address = 0; - MRE.r_type = MachO::GENERIC_RELOC_PAIR; - MRE.r_length = Log2Size; - MRE.r_pcrel = IsPCRel; - MRE.r_scattered = 1; - MRE.r_value = Value2; + macho::RelocationEntry MRE; + MRE.Word0 = ((0 << 0) | + (macho::RIT_Pair << 24) | + (Log2Size << 28) | + (IsPCRel << 30) | + macho::RF_Scattered); + MRE.Word1 = Value2; Writer->addRelocation(Fragment->getParent(), MRE); } else { // If the offset is more than 24-bits, it won't fit in a scattered @@ -420,12 +421,13 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, return false; } - MRE.r_address = FixupOffset; - MRE.r_type = Type; - MRE.r_length = Log2Size; - MRE.r_pcrel = IsPCRel; - MRE.r_scattered = 1; - MRE.r_value = Value; + macho::RelocationEntry MRE; + MRE.Word0 = ((FixupOffset << 0) | + (Type << 24) | + (Log2Size << 28) | + (IsPCRel << 30) | + macho::RF_Scattered); + MRE.Word1 = Value; Writer->addRelocation(Fragment->getParent(), MRE); return true; } @@ -467,13 +469,13 @@ void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer, } // struct relocation_info (8 bytes) - MachO::relocation_info MRE; - MRE.r_address = Value; - MRE.r_symbolnum = Index; - MRE.r_pcrel = IsPCRel; - MRE.r_length = Log2Size; - MRE.r_extern = 1; - MRE.r_type = MachO::GENERIC_RELOC_TLV; + macho::RelocationEntry MRE; + MRE.Word0 = Value; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (1 << 27) | // Extern + (macho::RIT_Generic_TLV << 28)); // Type Writer->addRelocation(Fragment->getParent(), MRE); } @@ -533,7 +535,7 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, // // FIXME: Currently, these are never generated (see code below). I cannot // find a case where they are actually emitted. - Type = MachO::GENERIC_RELOC_VANILLA; + Type = macho::RIT_Vanilla; } else { // Resolve constant variables. if (SD->getSymbol().isVariable()) { @@ -564,17 +566,17 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, if (IsPCRel) FixedValue -= Writer->getSectionAddress(Fragment->getParent()); - Type = MachO::GENERIC_RELOC_VANILLA; + Type = macho::RIT_Vanilla; } // struct relocation_info (8 bytes) - MachO::relocation_info MRE; - MRE.r_address = FixupOffset; - MRE.r_symbolnum = Index; - MRE.r_pcrel = IsPCRel; - MRE.r_length = Log2Size; - MRE.r_extern = IsExtern; - MRE.r_type = Type; + macho::RelocationEntry MRE; + MRE.Word0 = FixupOffset; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); Writer->addRelocation(Fragment->getParent(), MRE); } -- cgit v1.1 From 1567abe74f519e542786bdb82664f68b10afda0b Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Tue, 27 Aug 2013 08:39:25 +0000 Subject: AVX-512: Added FMA instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189326 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 2 +- lib/Target/X86/X86InstrAVX512.td | 197 +++++++++++++++++++++++++++++++++++++ 2 files changed, 198 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6a7ca7d..f9b3f1a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -9132,7 +9132,7 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo::getConstantPool(), false, false, false, Alignment); if (VT.isVector()) { - MVT XORVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64; + MVT XORVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits()/64); return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getNode(ISD::XOR, dl, XORVT, DAG.getNode(ISD::BITCAST, dl, XORVT, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index cf4a0f5..057d551 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1994,6 +1994,203 @@ def : Pat<(v2i64 (X86Movlhps VR128X:$src1, VR128X:$src2)), // MOVHLPS patterns def : Pat<(v4i32 (X86Movhlps VR128X:$src1, VR128X:$src2)), (VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>; + +//===----------------------------------------------------------------------===// +// FMA - Fused Multiply Operations +// +let Constraints = "$src1 = $dst" in { +multiclass avx512_fma3p_rm opc, string OpcodeStr, + RegisterClass RC, X86MemOperand x86memop, + PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag, + string BrdcstStr, SDNode OpNode, ValueType OpVT> { + def r: AVX512FMA3; + + let mayLoad = 1 in + def m: AVX512FMA3; + def mb: AVX512FMA3, EVEX_B; +} +} // Constraints = "$src1 = $dst" + +let ExeDomain = SSEPackedSingle in { + defm VFMADD213PSZ : avx512_fma3p_rm<0xA8, "vfmadd213ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fmadd, v16f32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; + defm VFMSUB213PSZ : avx512_fma3p_rm<0xAA, "vfmsub213ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fmsub, v16f32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; + defm VFMADDSUB213PSZ : avx512_fma3p_rm<0xA6, "vfmaddsub213ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fmaddsub, v16f32>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + defm VFMSUBADD213PSZ : avx512_fma3p_rm<0xA7, "vfmsubadd213ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fmsubadd, v16f32>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + defm VFNMADD213PSZ : avx512_fma3p_rm<0xAC, "vfnmadd213ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fnmadd, v16f32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; + defm VFNMSUB213PSZ : avx512_fma3p_rm<0xAE, "vfnmsub213ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fnmsub, v16f32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +} +let ExeDomain = SSEPackedDouble in { + defm VFMADD213PDZ : avx512_fma3p_rm<0xA8, "vfmadd213pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fmadd, v8f64>, EVEX_V512, + VEX_W, EVEX_CD8<64, CD8VF>; + defm VFMSUB213PDZ : avx512_fma3p_rm<0xAA, "vfmsub213pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fmsub, v8f64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + defm VFMADDSUB213PDZ : avx512_fma3p_rm<0xA6, "vfmaddsub213pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fmaddsub, v8f64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + defm VFMSUBADD213PDZ : avx512_fma3p_rm<0xA7, "vfmsubadd213pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fmsubadd, v8f64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + defm VFNMADD213PDZ : avx512_fma3p_rm<0xAC, "vfnmadd213pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fnmadd, v8f64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + defm VFNMSUB213PDZ : avx512_fma3p_rm<0xAE, "vfnmsub213pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fnmsub, v8f64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; +} + +let Constraints = "$src1 = $dst" in { +multiclass avx512_fma3p_m132 opc, string OpcodeStr, + RegisterClass RC, X86MemOperand x86memop, + PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag, + string BrdcstStr, SDNode OpNode, ValueType OpVT> { + let mayLoad = 1 in + def m: AVX512FMA3; + def mb: AVX512FMA3, EVEX_B; +} +} // Constraints = "$src1 = $dst" + + +let ExeDomain = SSEPackedSingle in { + defm VFMADD132PSZ : avx512_fma3p_m132<0x98, "vfmadd132ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fmadd, v16f32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; + defm VFMSUB132PSZ : avx512_fma3p_m132<0x9A, "vfmsub132ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fmsub, v16f32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; + defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fmaddsub, v16f32>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fmsubadd, v16f32>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + defm VFNMADD132PSZ : avx512_fma3p_m132<0x9C, "vfnmadd132ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fnmadd, v16f32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; + defm VFNMSUB132PSZ : avx512_fma3p_m132<0x9E, "vfnmsub132ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fnmsub, v16f32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +} +let ExeDomain = SSEPackedDouble in { + defm VFMADD132PDZ : avx512_fma3p_m132<0x98, "vfmadd132pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fmadd, v8f64>, EVEX_V512, + VEX_W, EVEX_CD8<64, CD8VF>; + defm VFMSUB132PDZ : avx512_fma3p_m132<0x9A, "vfmsub132pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fmsub, v8f64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fmaddsub, v8f64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fmsubadd, v8f64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fnmadd, v8f64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fnmsub, v8f64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; +} + +// Scalar FMA +let Constraints = "$src1 = $dst" in { +multiclass avx512_fma3s_rm opc, string OpcodeStr, SDNode OpNode, + RegisterClass RC, ValueType OpVT, + X86MemOperand x86memop, Operand memop, + PatFrag mem_frag> { + let isCommutable = 1 in + def r : AVX512FMA3; + let mayLoad = 1 in + def m : AVX512FMA3; +} + +} // Constraints = "$src1 = $dst" + +defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss{z}", X86Fmadd, FR32X, + f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; +defm VFMADDSDZ : avx512_fma3s_rm<0xA9, "vfmadd213sd{z}", X86Fmadd, FR64X, + f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VFMSUBSSZ : avx512_fma3s_rm<0xAB, "vfmsub213ss{z}", X86Fmsub, FR32X, + f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; +defm VFMSUBSDZ : avx512_fma3s_rm<0xAB, "vfmsub213sd{z}", X86Fmsub, FR64X, + f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VFNMADDSSZ : avx512_fma3s_rm<0xAD, "vfnmadd213ss{z}", X86Fnmadd, FR32X, + f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; +defm VFNMADDSDZ : avx512_fma3s_rm<0xAD, "vfnmadd213sd{z}", X86Fnmadd, FR64X, + f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VFNMSUBSSZ : avx512_fma3s_rm<0xAF, "vfnmsub213ss{z}", X86Fnmsub, FR32X, + f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; +defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd{z}", X86Fnmsub, FR64X, + f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; + //===----------------------------------------------------------------------===// // VSHUFPS - VSHUFPD Operations -- cgit v1.1 From c9617b9a9dfcb550adcf06f83a58a5e522414cc1 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Tue, 27 Aug 2013 09:40:30 +0000 Subject: [mips][msa] Added bitconverts for vector types for big and little-endian git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189330 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsInstrInfo.td | 2 + lib/Target/Mips/MipsMSAInstrInfo.td | 151 +++++++++++++++++++++++++++++++++++- 2 files changed, 151 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 94f9ee6..a391a89 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -186,6 +186,8 @@ def InMicroMips : Predicate<"Subtarget.inMicroMipsMode()">, AssemblerPredicate<"FeatureMicroMips">; def NotInMicroMips : Predicate<"!Subtarget.inMicroMipsMode()">, AssemblerPredicate<"!FeatureMicroMips">; +def IsLE : Predicate<"Subtarget.isLittle()">; +def IsBE : Predicate<"!Subtarget.isLittle()">; class MipsPat : Pat { let Predicates = [HasStdEnc]; diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 24d2c91..8383646 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -2536,8 +2536,8 @@ def XOR_V : XOR_V_ENC, XOR_V_DESC, Requires<[HasMSA]>; def XORI_B : XORI_B_ENC, XORI_B_DESC, Requires<[HasMSA]>; // Patterns. -class MSAPat : - Pat, Requires<[pred]>; +class MSAPat pred = [HasMSA]> : + Pat, Requires; def LD_FH : MSAPat<(v8f16 (load addr:$addr)), (LD_H addr:$addr)>; @@ -2552,3 +2552,150 @@ def ST_FW : MSAPat<(store (v4f32 MSA128W:$ws), addr:$addr), (ST_W MSA128W:$ws, addr:$addr)>; def ST_FD : MSAPat<(store (v2f64 MSA128D:$ws), addr:$addr), (ST_D MSA128D:$ws, addr:$addr)>; + +class MSABitconvertPat preds = [HasMSA]> : + MSAPat<(DstVT (bitconvert SrcVT:$src)), + (COPY_TO_REGCLASS SrcVT:$src, DstRC), preds>; + +// These are endian-independant because the element size doesnt change +def : MSABitconvertPat; +def : MSABitconvertPat; +def : MSABitconvertPat; +def : MSABitconvertPat; +def : MSABitconvertPat; +def : MSABitconvertPat; + +// Little endian bitcasts are always no-ops +def : MSABitconvertPat; +def : MSABitconvertPat; +def : MSABitconvertPat; +def : MSABitconvertPat; +def : MSABitconvertPat; +def : MSABitconvertPat; + +def : MSABitconvertPat; +def : MSABitconvertPat; +def : MSABitconvertPat; +def : MSABitconvertPat; +def : MSABitconvertPat; + +def : MSABitconvertPat; +def : MSABitconvertPat; +def : MSABitconvertPat; +def : MSABitconvertPat; +def : MSABitconvertPat; + +def : MSABitconvertPat; +def : MSABitconvertPat; +def : MSABitconvertPat; +def : MSABitconvertPat; +def : MSABitconvertPat; + +def : MSABitconvertPat; +def : MSABitconvertPat; +def : MSABitconvertPat; +def : MSABitconvertPat; +def : MSABitconvertPat; + +def : MSABitconvertPat; +def : MSABitconvertPat; +def : MSABitconvertPat; +def : MSABitconvertPat; +def : MSABitconvertPat; + +// Big endian bitcasts expand to shuffle instructions. +// This is because bitcast is defined to be a store/load sequence and the +// vector store/load instructions are mixed-endian with respect to the vector +// as a whole (little endian with respect to element order, but big endian +// elements). + +class MSABitconvertReverseQuartersPat : + MSAPat<(DstVT (bitconvert SrcVT:$src)), + (COPY_TO_REGCLASS (Insn (COPY_TO_REGCLASS SrcVT:$src, ViaRC), 27), + DstRC), + [HasMSA, IsBE]>; + +class MSABitconvertReverseHalvesPat : + MSAPat<(DstVT (bitconvert SrcVT:$src)), + (COPY_TO_REGCLASS (Insn (COPY_TO_REGCLASS SrcVT:$src, ViaRC), 177), + DstRC), + [HasMSA, IsBE]>; + +class MSABitconvertReverseBInHPat : + MSABitconvertReverseHalvesPat; + +class MSABitconvertReverseBInWPat : + MSABitconvertReverseQuartersPat; + +class MSABitconvertReverseBInDPat : + MSAPat<(DstVT (bitconvert SrcVT:$src)), + (COPY_TO_REGCLASS + (SHF_W + (COPY_TO_REGCLASS + (SHF_B (COPY_TO_REGCLASS SrcVT:$src, MSA128B), 27), + MSA128W), 177), + DstRC), + [HasMSA, IsBE]>; + +class MSABitconvertReverseHInWPat : + MSABitconvertReverseHalvesPat; + +class MSABitconvertReverseHInDPat : + MSABitconvertReverseQuartersPat; + +class MSABitconvertReverseWInDPat : + MSABitconvertReverseHalvesPat; + +def : MSABitconvertReverseBInHPat; +def : MSABitconvertReverseBInHPat; +def : MSABitconvertReverseBInWPat; +def : MSABitconvertReverseBInWPat; +def : MSABitconvertReverseBInDPat; +def : MSABitconvertReverseBInDPat; + +def : MSABitconvertReverseBInHPat; +def : MSABitconvertReverseHInWPat; +def : MSABitconvertReverseHInWPat; +def : MSABitconvertReverseHInDPat; +def : MSABitconvertReverseHInDPat; + +def : MSABitconvertReverseBInHPat; +def : MSABitconvertReverseHInWPat; +def : MSABitconvertReverseHInWPat; +def : MSABitconvertReverseHInDPat; +def : MSABitconvertReverseHInDPat; + +def : MSABitconvertReverseBInWPat; +def : MSABitconvertReverseHInWPat; +def : MSABitconvertReverseHInWPat; +def : MSABitconvertReverseWInDPat; +def : MSABitconvertReverseWInDPat; + +def : MSABitconvertReverseBInWPat; +def : MSABitconvertReverseHInWPat; +def : MSABitconvertReverseHInWPat; +def : MSABitconvertReverseWInDPat; +def : MSABitconvertReverseWInDPat; + +def : MSABitconvertReverseBInDPat; +def : MSABitconvertReverseHInDPat; +def : MSABitconvertReverseHInDPat; +def : MSABitconvertReverseWInDPat; +def : MSABitconvertReverseWInDPat; + +def : MSABitconvertReverseBInDPat; +def : MSABitconvertReverseHInDPat; +def : MSABitconvertReverseHInDPat; +def : MSABitconvertReverseWInDPat; +def : MSABitconvertReverseWInDPat; -- cgit v1.1 From 842a1be06c53757e7498c9894abc1431b633a92f Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 27 Aug 2013 09:54:29 +0000 Subject: [SystemZ] Extend memcpy and memset support to all constant lengths Lengths up to a certain threshold (currently 6 * 256) use a series of MVCs. Lengths above that threshold use a loop to handle X*256 bytes followed by a single MVC to handle the excess (if any). This loop will also be needed in future when support for variable lengths is added. Because the same tablegen classes are used to define MVC and CLC, the patch also has the side-effect of defining a pseudo loop instruction for CLC. That instruction isn't used yet (and wouldn't be handled correctly if it were). I'm planning to use it soon though. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189331 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelLowering.cpp | 168 +++++++++++++++++++++---- lib/Target/SystemZ/SystemZISelLowering.h | 17 ++- lib/Target/SystemZ/SystemZInstrFP.td | 6 +- lib/Target/SystemZ/SystemZInstrFormats.td | 29 +++-- lib/Target/SystemZ/SystemZInstrInfo.td | 18 +-- lib/Target/SystemZ/SystemZOperands.td | 7 +- lib/Target/SystemZ/SystemZOperators.td | 11 +- lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp | 64 ++++++---- 8 files changed, 239 insertions(+), 81 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 20afab7..216ca34 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1917,7 +1917,9 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(UDIVREM32); OPCODE(UDIVREM64); OPCODE(MVC); + OPCODE(MVC_LOOP); OPCODE(CLC); + OPCODE(CLC_LOOP); OPCODE(STRCMP); OPCODE(STPCPY); OPCODE(SEARCH_STRING); @@ -1952,18 +1954,31 @@ static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) { return NewMBB; } -// Split MBB after MI and return the new block (the one that contains -// instructions after MI). -static MachineBasicBlock *splitBlockAfter(MachineInstr *MI, - MachineBasicBlock *MBB) { +// Split MBB before MI and return the new block (the one that contains MI). +static MachineBasicBlock *splitBlockBefore(MachineInstr *MI, + MachineBasicBlock *MBB) { MachineBasicBlock *NewMBB = emitBlockAfter(MBB); - NewMBB->splice(NewMBB->begin(), MBB, - llvm::next(MachineBasicBlock::iterator(MI)), - MBB->end()); + NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end()); NewMBB->transferSuccessorsAndUpdatePHIs(MBB); return NewMBB; } +// Force base value Base into a register before MI. Return the register. +static unsigned forceReg(MachineInstr *MI, MachineOperand &Base, + const SystemZInstrInfo *TII) { + if (Base.isReg()) + return Base.getReg(); + + MachineBasicBlock *MBB = MI->getParent(); + MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); + BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LA), Reg) + .addOperand(Base).addImm(0).addReg(0); + return Reg; +} + // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI. MachineBasicBlock * SystemZTargetLowering::emitSelect(MachineInstr *MI, @@ -1978,7 +1993,7 @@ SystemZTargetLowering::emitSelect(MachineInstr *MI, DebugLoc DL = MI->getDebugLoc(); MachineBasicBlock *StartMBB = MBB; - MachineBasicBlock *JoinMBB = splitBlockAfter(MI, MBB); + MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB); MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); // StartMBB: @@ -1999,7 +2014,7 @@ SystemZTargetLowering::emitSelect(MachineInstr *MI, // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ] // ... MBB = JoinMBB; - BuildMI(*MBB, MBB->begin(), DL, TII->get(SystemZ::PHI), DestReg) + BuildMI(*MBB, MI, DL, TII->get(SystemZ::PHI), DestReg) .addReg(TrueReg).addMBB(StartMBB) .addReg(FalseReg).addMBB(FalseMBB); @@ -2046,7 +2061,7 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI, CCMask ^= CCValid; MachineBasicBlock *StartMBB = MBB; - MachineBasicBlock *JoinMBB = splitBlockAfter(MI, MBB); + MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB); MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); // StartMBB: @@ -2122,7 +2137,7 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI, // Insert a basic block for the main loop. MachineBasicBlock *StartMBB = MBB; - MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB); + MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); // StartMBB: @@ -2244,7 +2259,7 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI, // Insert 3 basic blocks for the loop. MachineBasicBlock *StartMBB = MBB; - MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB); + MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB); MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB); @@ -2351,7 +2366,7 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI, // Insert 2 basic blocks for the loop. MachineBasicBlock *StartMBB = MBB; - MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB); + MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); MachineBasicBlock *SetMBB = emitBlockAfter(LoopMBB); @@ -2465,17 +2480,126 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI, MachineBasicBlock *MBB, unsigned Opcode) const { const SystemZInstrInfo *TII = TM.getInstrInfo(); + MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); DebugLoc DL = MI->getDebugLoc(); - MachineOperand DestBase = MI->getOperand(0); + MachineOperand DestBase = earlyUseOperand(MI->getOperand(0)); uint64_t DestDisp = MI->getOperand(1).getImm(); - MachineOperand SrcBase = MI->getOperand(2); + MachineOperand SrcBase = earlyUseOperand(MI->getOperand(2)); uint64_t SrcDisp = MI->getOperand(3).getImm(); uint64_t Length = MI->getOperand(4).getImm(); - BuildMI(*MBB, MI, DL, TII->get(Opcode)) - .addOperand(DestBase).addImm(DestDisp).addImm(Length) - .addOperand(SrcBase).addImm(SrcDisp); + // Check for the loop form, in which operand 5 is the trip count. + if (MI->getNumExplicitOperands() > 5) { + bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase); + + uint64_t StartCountReg = MI->getOperand(5).getReg(); + uint64_t StartSrcReg = forceReg(MI, SrcBase, TII); + uint64_t StartDestReg = (HaveSingleBase ? StartSrcReg : + forceReg(MI, DestBase, TII)); + + const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass; + uint64_t ThisSrcReg = MRI.createVirtualRegister(RC); + uint64_t ThisDestReg = (HaveSingleBase ? ThisSrcReg : + MRI.createVirtualRegister(RC)); + uint64_t NextSrcReg = MRI.createVirtualRegister(RC); + uint64_t NextDestReg = (HaveSingleBase ? NextSrcReg : + MRI.createVirtualRegister(RC)); + + RC = &SystemZ::GR64BitRegClass; + uint64_t ThisCountReg = MRI.createVirtualRegister(RC); + uint64_t NextCountReg = MRI.createVirtualRegister(RC); + + MachineBasicBlock *StartMBB = MBB; + MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); + MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); + + // StartMBB: + // # fall through to LoopMMB + MBB->addSuccessor(LoopMBB); + + // LoopMBB: + // %ThisDestReg = phi [ %StartDestReg, StartMBB ], + // [ %NextDestReg, LoopMBB ] + // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ], + // [ %NextSrcReg, LoopMBB ] + // %ThisCountReg = phi [ %StartCountReg, StartMBB ], + // [ %NextCountReg, LoopMBB ] + // PFD 2, 768+DestDisp(%ThisDestReg) + // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg) + // %NextDestReg = LA 256(%ThisDestReg) + // %NextSrcReg = LA 256(%ThisSrcReg) + // %NextCountReg = AGHI %ThisCountReg, -1 + // CGHI %NextCountReg, 0 + // JLH LoopMBB + // # fall through to DoneMMB + // + // The AGHI, CGHI and JLH should be converted to BRCTG by later passes. + MBB = LoopMBB; + + BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg) + .addReg(StartDestReg).addMBB(StartMBB) + .addReg(NextDestReg).addMBB(LoopMBB); + if (!HaveSingleBase) + BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg) + .addReg(StartSrcReg).addMBB(StartMBB) + .addReg(NextSrcReg).addMBB(LoopMBB); + BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg) + .addReg(StartCountReg).addMBB(StartMBB) + .addReg(NextCountReg).addMBB(LoopMBB); + BuildMI(MBB, DL, TII->get(SystemZ::PFD)) + .addImm(SystemZ::PFD_WRITE) + .addReg(ThisDestReg).addImm(DestDisp + 768).addReg(0); + BuildMI(MBB, DL, TII->get(Opcode)) + .addReg(ThisDestReg).addImm(DestDisp).addImm(256) + .addReg(ThisSrcReg).addImm(SrcDisp); + BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg) + .addReg(ThisDestReg).addImm(256).addReg(0); + if (!HaveSingleBase) + BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg) + .addReg(ThisSrcReg).addImm(256).addReg(0); + BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg) + .addReg(ThisCountReg).addImm(-1); + BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) + .addReg(NextCountReg).addImm(0); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) + .addMBB(LoopMBB); + MBB->addSuccessor(LoopMBB); + MBB->addSuccessor(DoneMBB); + + DestBase = MachineOperand::CreateReg(NextDestReg, false); + SrcBase = MachineOperand::CreateReg(NextSrcReg, false); + Length &= 255; + MBB = DoneMBB; + } + // Handle any remaining bytes with straight-line code. + while (Length > 0) { + uint64_t ThisLength = std::min(Length, uint64_t(256)); + // The previous iteration might have created out-of-range displacements. + // Apply them using LAY if so. + if (!isUInt<12>(DestDisp)) { + unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); + BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LAY), Reg) + .addOperand(DestBase).addImm(DestDisp).addReg(0); + DestBase = MachineOperand::CreateReg(Reg, false); + DestDisp = 0; + } + if (!isUInt<12>(SrcDisp)) { + unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); + BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LAY), Reg) + .addOperand(SrcBase).addImm(SrcDisp).addReg(0); + SrcBase = MachineOperand::CreateReg(Reg, false); + SrcDisp = 0; + } + BuildMI(*MBB, MI, DL, TII->get(Opcode)) + .addOperand(DestBase).addImm(DestDisp).addImm(ThisLength) + .addOperand(SrcBase).addImm(SrcDisp); + DestDisp += ThisLength; + SrcDisp += ThisLength; + Length -= ThisLength; + } MI->eraseFromParent(); return MBB; @@ -2503,7 +2627,7 @@ SystemZTargetLowering::emitStringWrapper(MachineInstr *MI, uint64_t End2Reg = MRI.createVirtualRegister(RC); MachineBasicBlock *StartMBB = MBB; - MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB); + MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); // StartMBB: @@ -2765,9 +2889,11 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { case SystemZ::ATOMIC_CMP_SWAPW: return emitAtomicCmpSwapW(MI, MBB); - case SystemZ::MVCWrapper: + case SystemZ::MVCSequence: + case SystemZ::MVCLoop: return emitMemMemWrapper(MI, MBB, SystemZ::MVC); - case SystemZ::CLCWrapper: + case SystemZ::CLCSequence: + case SystemZ::CLCLoop: return emitMemMemWrapper(MI, MBB, SystemZ::CLC); case SystemZ::CLSTLoop: return emitStringWrapper(MI, MBB, SystemZ::CLST); diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index f6a2ce0..9831777 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -74,16 +74,25 @@ namespace SystemZISD { UDIVREM32, UDIVREM64, - // Use MVC to copy bytes from one memory location to another. - // The first operand is the target address, the second operand is the - // source address, and the third operand is the constant length. + // Use a series of MVCs to copy bytes from one memory location to another. + // The operands are: + // - the target address + // - the source address + // - the constant length + // // This isn't a memory opcode because we'd need to attach two // MachineMemOperands rather than one. MVC, + // Like MVC, but implemented as a loop that handles X*256 bytes + // followed by straight-line code to handle the rest (if any). + // The value of X is passed as an additional operand. + MVC_LOOP, + // Use CLC to compare two blocks of memory, with the same comments - // as for MVC. + // as for MVC and MVC_LOOP. CLC, + CLC_LOOP, // Use an MVST-based sequence to implement stpcpy(). STPCPY, diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td index dbe0fb5..8d0dcb6 100644 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -86,9 +86,9 @@ def : CopySign128; -defm LoadStoreF32 : MVCLoadStore; -defm LoadStoreF64 : MVCLoadStore; -defm LoadStoreF128 : MVCLoadStore; +defm LoadStoreF32 : MVCLoadStore; +defm LoadStoreF64 : MVCLoadStore; +defm LoadStoreF128 : MVCLoadStore; //===----------------------------------------------------------------------===// // Load instructions diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index a7e18ec..7f2f9f8 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -1426,23 +1426,26 @@ class AtomicLoadWBinaryReg class AtomicLoadWBinaryImm : AtomicLoadWBinary; -// Define an instruction that operates on two fixed-length blocks of memory. -// The real instruction uses a bdladdr12onlylen8 for the first operand and a -// bdaddr12only for the second, with the length of the second operand being -// implicitly the same as the first. This arrangement matches the underlying -// assembly syntax. However, for instruction selection it's easier to have -// two normal bdaddr12onlys and a separate length operand, so define a pseudo -// instruction for that too. +// Define an instruction that operates on two fixed-length blocks of memory, +// and associated pseudo instructions for operating on blocks of any size. +// The Sequence form uses a straight-line sequence of instructions and +// the Loop form uses a loop of length-256 instructions followed by +// another instruction to handle the excess. multiclass MemorySS opcode, - SDPatternOperator operator> { + SDPatternOperator sequence, SDPatternOperator loop> { def "" : InstSS; - let usesCustomInserter = 1 in - def Wrapper : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src, - imm32len8:$length), - [(operator bdaddr12only:$dest, bdaddr12only:$src, - imm32len8:$length)]>; + let usesCustomInserter = 1 in { + def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src, + imm64:$length), + [(sequence bdaddr12only:$dest, bdaddr12only:$src, + imm64:$length)]>; + def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src, + imm64:$length, GR64:$count256), + [(loop bdaddr12only:$dest, bdaddr12only:$src, + imm64:$length, GR64:$count256)]>; + } } // Define an instruction that operates on two strings, both terminated diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 8e1f5ac..399b48a 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -344,25 +344,25 @@ def MVGHI : StoreSIL<"mvghi", 0xE548, store, imm64sx16>; // Memory-to-memory moves. let mayLoad = 1, mayStore = 1 in - defm MVC : MemorySS<"mvc", 0xD2, z_mvc>; + defm MVC : MemorySS<"mvc", 0xD2, z_mvc, z_mvc_loop>; // String moves. let mayLoad = 1, mayStore = 1, Defs = [CC], Uses = [R0W] in defm MVST : StringRRE<"mvst", 0xB255, z_stpcpy>; defm LoadStore8_32 : MVCLoadStore; + MVCSequence, 1>; defm LoadStore16_32 : MVCLoadStore; -defm LoadStore32_32 : MVCLoadStore; + MVCSequence, 2>; +defm LoadStore32_32 : MVCLoadStore; defm LoadStore8 : MVCLoadStore; + MVCSequence, 1>; defm LoadStore16 : MVCLoadStore; + MVCSequence, 2>; defm LoadStore32 : MVCLoadStore; -defm LoadStore64 : MVCLoadStore; + MVCSequence, 4>; +defm LoadStore64 : MVCLoadStore; //===----------------------------------------------------------------------===// // Sign extensions @@ -1028,7 +1028,7 @@ defm : ZXB; // Memory-to-memory comparison. let mayLoad = 1, Defs = [CC] in - defm CLC : MemorySS<"clc", 0xD5, z_clc>; + defm CLC : MemorySS<"clc", 0xD5, z_clc, z_clc_loop>; // String comparison. let mayLoad = 1, Defs = [CC], Uses = [R0W] in diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td index eb96dba..421e41f 100644 --- a/lib/Target/SystemZ/SystemZOperands.td +++ b/lib/Target/SystemZ/SystemZOperands.td @@ -219,11 +219,6 @@ def uimm8 : Immediate; // i32 immediates //===----------------------------------------------------------------------===// -// Immediates for 8-bit lengths. -def imm32len8 : Immediate(N->getZExtValue() - 1); -}], NOOP_SDNodeXForm, "U32Imm">; - // Immediates for the lower and upper 16 bits of an i32, with the other // bits of the i32 being zero. def imm32ll16 : Immediate(-N->getSExtValue()); }], NEGIMM32, "U32Imm">; -def imm64 : ImmLeaf; +def imm64 : ImmLeaf, Operand; //===----------------------------------------------------------------------===// // Floating-point immediates diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index e2c43d6..ff64ea8 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -57,7 +57,12 @@ def SDT_ZAtomicCmpSwapW : SDTypeProfile<1, 6, def SDT_ZMemMemLength : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisPtrTy<1>, - SDTCisVT<2, i32>]>; + SDTCisVT<2, i64>]>; +def SDT_ZMemMemLoop : SDTypeProfile<0, 4, + [SDTCisPtrTy<0>, + SDTCisPtrTy<1>, + SDTCisVT<2, i64>, + SDTCisVT<3, i64>]>; def SDT_ZString : SDTypeProfile<1, 3, [SDTCisPtrTy<0>, SDTCisPtrTy<1>, @@ -123,8 +128,12 @@ def z_atomic_cmp_swapw : AtomicWOp<"ATOMIC_CMP_SWAPW", SDT_ZAtomicCmpSwapW>; def z_mvc : SDNode<"SystemZISD::MVC", SDT_ZMemMemLength, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_mvc_loop : SDNode<"SystemZISD::MVC_LOOP", SDT_ZMemMemLoop, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; def z_clc : SDNode<"SystemZISD::CLC", SDT_ZMemMemLength, [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>; +def z_clc_loop : SDNode<"SystemZISD::CLC_LOOP", SDT_ZMemMemLoop, + [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>; def z_strcmp : SDNode<"SystemZISD::STRCMP", SDT_ZString, [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>; def z_stpcpy : SDNode<"SystemZISD::STPCPY", SDT_ZString, diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index 638c3ee..6026b1f 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -25,6 +25,30 @@ SystemZSelectionDAGInfo(const SystemZTargetMachine &TM) SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() { } +// Use MVC to copy Size bytes from Src to Dest, deciding whether to use +// a loop or straight-line code. +static SDValue emitMVC(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Dst, SDValue Src, uint64_t Size) { + EVT PtrVT = Src.getValueType(); + // The heuristic we use is to prefer loops for anything that would + // require 7 or more MVCs. With these kinds of sizes there isn't + // much to choose between straight-line code and looping code, + // since the time will be dominated by the MVCs themselves. + // However, the loop has 4 or 5 instructions (depending on whether + // the base addresses can be proved equal), so there doesn't seem + // much point using a loop for 5 * 256 bytes or fewer. Anything in + // the range (5 * 256, 6 * 256) will need another instruction after + // the loop, so it doesn't seem worth using a loop then either. + // The next value up, 6 * 256, can be implemented in the same + // number of straight-line MVCs as 6 * 256 - 1. + if (Size > 6 * 256) + return DAG.getNode(SystemZISD::MVC_LOOP, DL, MVT::Other, Chain, Dst, Src, + DAG.getConstant(Size, PtrVT), + DAG.getConstant(Size / 256, PtrVT)); + return DAG.getNode(SystemZISD::MVC, DL, MVT::Other, Chain, Dst, Src, + DAG.getConstant(Size, PtrVT)); +} + SDValue SystemZSelectionDAGInfo:: EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, @@ -34,14 +58,8 @@ EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, if (IsVolatile) return SDValue(); - if (ConstantSDNode *CSize = dyn_cast(Size)) { - uint64_t Bytes = CSize->getZExtValue(); - if (Bytes >= 1 && Bytes <= 0x100) { - // A single MVC. - return DAG.getNode(SystemZISD::MVC, DL, MVT::Other, - Chain, Dst, Src, Size); - } - } + if (ConstantSDNode *CSize = dyn_cast(Size)) + return emitMVC(DAG, DL, Chain, Dst, Src, CSize->getZExtValue()); return SDValue(); } @@ -65,7 +83,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, SDValue Dst, SDValue Byte, SDValue Size, unsigned Align, bool IsVolatile, MachinePointerInfo DstPtrInfo) const { - EVT DstVT = Dst.getValueType(); + EVT PtrVT = Dst.getValueType(); if (IsVolatile) return SDValue(); @@ -89,8 +107,8 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, Align, DstPtrInfo); if (Size2 == 0) return Chain1; - Dst = DAG.getNode(ISD::ADD, DL, DstVT, Dst, - DAG.getConstant(Size1, DstVT)); + Dst = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, + DAG.getConstant(Size1, PtrVT)); DstPtrInfo = DstPtrInfo.getWithOffset(Size1); SDValue Chain2 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size2, std::min(Align, Size1), DstPtrInfo); @@ -103,8 +121,8 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, false, false, Align); if (Bytes == 1) return Chain1; - SDValue Dst2 = DAG.getNode(ISD::ADD, DL, DstVT, Dst, - DAG.getConstant(1, DstVT)); + SDValue Dst2 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, + DAG.getConstant(1, PtrVT)); SDValue Chain2 = DAG.getStore(Chain, DL, Byte, Dst2, DstPtrInfo.getWithOffset(1), false, false, 1); @@ -112,16 +130,13 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, } } assert(Bytes >= 2 && "Should have dealt with 0- and 1-byte cases already"); - if (Bytes <= 0x101) { - // Copy the byte to the first location and then use MVC to copy - // it to the rest. - Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, - false, false, Align); - SDValue Dst2 = DAG.getNode(ISD::ADD, DL, DstVT, Dst, - DAG.getConstant(1, DstVT)); - return DAG.getNode(SystemZISD::MVC, DL, MVT::Other, Chain, Dst2, Dst, - DAG.getConstant(Bytes - 1, MVT::i32)); - } + // Copy the byte to the first location and then use MVC to copy + // it to the rest. + Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, + false, false, Align); + SDValue DstPlus1 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, + DAG.getConstant(1, PtrVT)); + return emitMVC(DAG, DL, Chain, DstPlus1, Dst, Bytes - 1); } return SDValue(); } @@ -144,13 +159,14 @@ EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, SDValue Src1, SDValue Src2, SDValue Size, MachinePointerInfo Op1PtrInfo, MachinePointerInfo Op2PtrInfo) const { + EVT PtrVT = Src1.getValueType(); if (ConstantSDNode *CSize = dyn_cast(Size)) { uint64_t Bytes = CSize->getZExtValue(); if (Bytes >= 1 && Bytes <= 0x100) { // A single CLC. SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); Chain = DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, - Src1, Src2, Size); + Src1, Src2, Size, DAG.getConstant(0, PtrVT)); SDValue Glue = Chain.getValue(1); return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); } -- cgit v1.1 From 62f61bf9077418801be5e2039333727079819844 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Tue, 27 Aug 2013 10:04:21 +0000 Subject: [mips][msa] Added spill/reload support git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189332 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsSEInstrInfo.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index a81db7f..e7967a8 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -199,6 +199,14 @@ storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Opc = Mips::SDC1; else if (Mips::FGR64RegClass.hasSubClassEq(RC)) Opc = Mips::SDC164; + else if (RC->hasType(MVT::v16i8)) + Opc = Mips::ST_B; + else if (RC->hasType(MVT::v8i16) || RC->hasType(MVT::v8f16)) + Opc = Mips::ST_H; + else if (RC->hasType(MVT::v4i32) || RC->hasType(MVT::v4f32)) + Opc = Mips::ST_W; + else if (RC->hasType(MVT::v2i64) || RC->hasType(MVT::v2f64)) + Opc = Mips::ST_D; assert(Opc && "Register class not handled!"); BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)) @@ -232,6 +240,14 @@ loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Opc = Mips::LDC1; else if (Mips::FGR64RegClass.hasSubClassEq(RC)) Opc = Mips::LDC164; + else if (RC->hasType(MVT::v16i8)) + Opc = Mips::LD_B; + else if (RC->hasType(MVT::v8i16) || RC->hasType(MVT::v8f16)) + Opc = Mips::LD_H; + else if (RC->hasType(MVT::v4i32) || RC->hasType(MVT::v4f32)) + Opc = Mips::LD_W; + else if (RC->hasType(MVT::v2i64) || RC->hasType(MVT::v2f64)) + Opc = Mips::LD_D; assert(Opc && "Register class not handled!"); BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(Offset) -- cgit v1.1 From c59efb626a7a11ec661981e96ba09bba0497b731 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Tue, 27 Aug 2013 10:31:36 +0000 Subject: ARM: add natural patterns for vaddhl and vsubhl. These instructions aren't particularly complicated and it's well worth having patterns for some reasonably useful LLVM IR that will match them. Soon we should be able to switch Clang over to producing this natural version. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189335 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 59dea48..e86e658 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -3979,6 +3979,13 @@ defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", int_arm_neon_vraddhn, 1>; +def : Pat<(v8i8 (trunc (NEONvshru (add (v8i16 QPR:$Vn), QPR:$Vm), 8))), + (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>; +def : Pat<(v4i16 (trunc (NEONvshru (add (v4i32 QPR:$Vn), QPR:$Vm), 16))), + (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>; +def : Pat<(v2i32 (trunc (NEONvshru (add (v2i64 QPR:$Vn), QPR:$Vm), 32))), + (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>; + // Vector Multiply Operations. // VMUL : Vector Multiply (integer, polynomial and floating-point) @@ -4262,6 +4269,13 @@ defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", int_arm_neon_vrsubhn, 0>; +def : Pat<(v8i8 (trunc (NEONvshru (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))), + (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>; +def : Pat<(v4i16 (trunc (NEONvshru (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))), + (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>; +def : Pat<(v2i32 (trunc (NEONvshru (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))), + (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>; + // Vector Comparisons. // VCEQ : Vector Compare Equal -- cgit v1.1 From dcfa0f7a408e54f15f0237daf2336df852053c6b Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Tue, 27 Aug 2013 11:24:16 +0000 Subject: [ARMv8] Add some negative tests for the recent VFP/NEON instructions. Fix two issues I found while writing these tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189341 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 4 ++-- lib/Target/ARM/ARMInstrVFP.td | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index e86e658..a5d8985 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -5758,9 +5758,9 @@ multiclass VRINT_FPI op9_7, SDPatternOperator Int> { } } - def : InstAlias(NAME#"D") DPR:$Dd, DPR:$Dm)>; - def : InstAlias(NAME#"Q") QPR:$Qd, QPR:$Qm)>; } diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 3bb4d6f..34d83c6 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -669,6 +669,11 @@ multiclass vrint_inst_zrx { let Inst{7} = op2; let Inst{16} = op; } + + def : InstAlias(NAME#"S") SPR:$Sd, SPR:$Sm, pred:$p)>; + def : InstAlias(NAME#"D") DPR:$Dd, DPR:$Dm, pred:$p)>; } defm VRINTZ : vrint_inst_zrx<"z", 0, 1>; -- cgit v1.1 From d371d4f0f473e447108e1860059e66ed3283cba7 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Tue, 27 Aug 2013 13:54:04 +0000 Subject: AVX-512: added conversion instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189349 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 187 +++++++++++++++++++++++++++++++++++++++ lib/Target/X86/X86InstrSSE.td | 46 ++++++---- 2 files changed, 214 insertions(+), 19 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 057d551..90eb7d9 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -2192,6 +2192,193 @@ defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd{z}", X86Fnmsub, FR64X, f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; //===----------------------------------------------------------------------===// +// AVX-512 Scalar convert from sign integer to float/double +//===----------------------------------------------------------------------===// + +multiclass avx512_vcvtsi opc, RegisterClass SrcRC, RegisterClass DstRC, + X86MemOperand x86memop, string asm> { +let neverHasSideEffects = 1 in { + def rr : SI, EVEX_4V; + let mayLoad = 1 in + def rm : SI, EVEX_4V; +} // neverHasSideEffects = 1 +} + +defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}{z}">, + XS, VEX_LIG, EVEX_CD8<32, CD8VT1>; +defm VCVTSI2SS64Z : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}{z}">, + XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; +defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}{z}">, + XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; +defm VCVTSI2SD64Z : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}{z}">, + XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; + +def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), + (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; +def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), + (VCVTSI2SS64Zrm (f32 (IMPLICIT_DEF)), addr:$src)>; +def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))), + (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; +def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))), + (VCVTSI2SD64Zrm (f64 (IMPLICIT_DEF)), addr:$src)>; + +def : Pat<(f32 (sint_to_fp GR32:$src)), + (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; +def : Pat<(f32 (sint_to_fp GR64:$src)), + (VCVTSI2SS64Zrr (f32 (IMPLICIT_DEF)), GR64:$src)>; +def : Pat<(f64 (sint_to_fp GR32:$src)), + (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; +def : Pat<(f64 (sint_to_fp GR64:$src)), + (VCVTSI2SD64Zrr (f64 (IMPLICIT_DEF)), GR64:$src)>; + + +//===----------------------------------------------------------------------===// +// AVX-512 Convert form float to double and back +//===----------------------------------------------------------------------===// +let neverHasSideEffects = 1 in { +def VCVTSS2SDZrr : AVX512XSI<0x5A, MRMSrcReg, (outs FR64X:$dst), + (ins FR32X:$src1, FR32X:$src2), + "vcvtss2sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>; +let mayLoad = 1 in +def VCVTSS2SDZrm : AVX512XSI<0x5A, MRMSrcMem, (outs FR64X:$dst), + (ins FR32X:$src1, f32mem:$src2), + "vcvtss2sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>, + EVEX_CD8<32, CD8VT1>; + +// Convert scalar double to scalar single +def VCVTSD2SSZrr : AVX512XDI<0x5A, MRMSrcReg, (outs FR32X:$dst), + (ins FR64X:$src1, FR64X:$src2), + "vcvtsd2ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2F]>; +let mayLoad = 1 in +def VCVTSD2SSZrm : AVX512XDI<0x5A, MRMSrcMem, (outs FR32X:$dst), + (ins FR64X:$src1, f64mem:$src2), + "vcvtsd2ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX_4V, VEX_LIG, VEX_W, + Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<64, CD8VT1>; +} + +def : Pat<(f64 (fextend FR32X:$src)), (VCVTSS2SDZrr FR32X:$src, FR32X:$src)>, + Requires<[HasAVX512]>; +def : Pat<(fextend (loadf32 addr:$src)), + (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512]>; + +def : Pat<(extloadf32 addr:$src), + (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, + Requires<[HasAVX512, OptForSize]>; + +def : Pat<(extloadf32 addr:$src), + (VCVTSS2SDZrr (f32 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>, + Requires<[HasAVX512, OptForSpeed]>; + +def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>, + Requires<[HasAVX512]>; + +multiclass avx512_vcvt_fp opc, string asm, RegisterClass SrcRC, + RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag, + X86MemOperand x86memop, ValueType OpVT, ValueType InVT, + Domain d> { +let neverHasSideEffects = 1 in { + def rr : AVX512PI, EVEX; + let mayLoad = 1 in + def rm : AVX512PI, EVEX; +} // neverHasSideEffects = 1 +} + +defm VCVTPD2PSZ : avx512_vcvt_fp<0x5A, "vcvtpd2ps", VR512, VR256X, fround, + memopv8f64, f512mem, v8f32, v8f64, + SSEPackedSingle>, EVEX_V512, VEX_W, OpSize, + EVEX_CD8<64, CD8VF>; + +defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend, + memopv4f64, f256mem, v8f64, v8f32, + SSEPackedDouble>, EVEX_V512, EVEX_CD8<32, CD8VH>; +def : Pat<(v8f64 (extloadv8f32 addr:$src)), + (VCVTPS2PDZrm addr:$src)>; + +//===----------------------------------------------------------------------===// +// AVX-512 Vector convert from sign integer to float/double +//===----------------------------------------------------------------------===// + +defm VCVTDQ2PSZ : avx512_vcvt_fp<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp, + memopv8i64, i512mem, v16f32, v16i32, + SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>; + +defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp, + memopv4i64, i256mem, v8f64, v8i32, + SSEPackedDouble>, EVEX_V512, XS, + EVEX_CD8<32, CD8VH>; + +defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint, + memopv16f32, f512mem, v16i32, v16f32, + SSEPackedSingle>, EVEX_V512, XS, + EVEX_CD8<32, CD8VF>; + +defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint, + memopv8f64, f512mem, v8i32, v8f64, + SSEPackedDouble>, EVEX_V512, OpSize, VEX_W, + EVEX_CD8<64, CD8VF>; + +defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint, + memopv16f32, f512mem, v16i32, v16f32, + SSEPackedSingle>, EVEX_V512, + EVEX_CD8<32, CD8VF>; + +defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint, + memopv8f64, f512mem, v8i32, v8f64, + SSEPackedDouble>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + +defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp, + memopv4i64, f256mem, v8f64, v8i32, + SSEPackedDouble>, EVEX_V512, XS, + EVEX_CD8<32, CD8VH>; + +defm VCVTUDQ2PSZ : avx512_vcvt_fp<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp, + memopv16i32, f512mem, v16f32, v16i32, + SSEPackedSingle>, EVEX_V512, XD, + EVEX_CD8<32, CD8VF>; + +def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))), + (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr + (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; + + +def : Pat<(int_x86_avx512_cvtdq2_ps_512 VR512:$src), + (VCVTDQ2PSZrr VR512:$src)>; +def : Pat<(int_x86_avx512_cvtdq2_ps_512 (bitconvert (memopv8i64 addr:$src))), + (VCVTDQ2PSZrm addr:$src)>; + +def VCVTPS2DQZrr : AVX512BI<0x5B, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src), + "vcvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR512:$dst, + (int_x86_avx512_cvt_ps2dq_512 VR512:$src))], + IIC_SSE_CVT_PS_RR>, EVEX, EVEX_V512; +def VCVTPS2DQZrm : AVX512BI<0x5B, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src), + "vcvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR512:$dst, + (int_x86_avx512_cvt_ps2dq_512 (memopv16f32 addr:$src)))], + IIC_SSE_CVT_PS_RM>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; + + +let Predicates = [HasAVX512] in { + def : Pat<(v8f32 (fround (loadv8f64 addr:$src))), + (VCVTPD2PSZrm addr:$src)>; + def : Pat<(v8f64 (extloadv8f32 addr:$src)), + (VCVTPS2PDZrm addr:$src)>; +} +//===----------------------------------------------------------------------===// // VSHUFPS - VSHUFPD Operations multiclass avx512_shufp opc, RegisterClass SrcRC, RegisterClass DstRC, X86MemOperand x86memop, string asm> { -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, Predicates = [UseAVX] in { def rr : SI, Sched<[WriteCvtI2F]>; @@ -1452,6 +1452,7 @@ let neverHasSideEffects = 1 in { } // neverHasSideEffects = 1 } +let Predicates = [UseAVX] in { defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, "cvttss2si\t{$src, $dst|$dst, $src}", SSE_CVT_SS2SI_32>, @@ -1485,7 +1486,7 @@ def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", (VCVTTSD2SI64rr GR64:$dst, FR64:$src), 0>; def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", (VCVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>; - +} // The assembler can recognize rr 64-bit instructions by seeing a rxx // register, but the same isn't true when only using memory operands, // provide other assembly "l" and "q" forms to address this explicitly @@ -1499,12 +1500,12 @@ defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD, VEX_4V, VEX_W, VEX_LIG; -def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", +let Predicates = [UseAVX] in { + def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", (VCVTSI2SSrm FR64:$dst, FR64:$src1, i32mem:$src)>; -def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", + def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src)>; -let Predicates = [HasAVX] in { def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), @@ -1606,19 +1607,21 @@ multiclass sse12_cvt_sint_3addr opc, RegisterClass SrcRC, itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } +let Predicates = [UseAVX] in { defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD, VEX, VEX_LIG; defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG; - +} defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD; defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD, REX_W; +let Predicates = [UseAVX] in { defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}", SSE_CVT_Scalar, 0>, XS, VEX_4V; @@ -1633,7 +1636,7 @@ defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}", SSE_CVT_Scalar, 0>, XD, VEX_4V, VEX_W; - +} let Constraints = "$src1 = $dst" in { defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, int_x86_sse_cvtsi2ss, i32mem, loadi32, @@ -1652,6 +1655,7 @@ let Constraints = "$src1 = $dst" in { /// SSE 1 Only // Aliases for intrinsics +let Predicates = [UseAVX] in { defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, ssmem, sse_load_f32, "cvttss2si", SSE_CVT_SS2SI_32>, XS, VEX; @@ -1666,6 +1670,7 @@ defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, "cvttsd2si", SSE_CVT_SD2SI>, XD, VEX, VEX_W; +} defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, ssmem, sse_load_f32, "cvttss2si", SSE_CVT_SS2SI_32>, XS; @@ -1679,13 +1684,14 @@ defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, "cvttsd2si", SSE_CVT_SD2SI>, XD, REX_W; +let Predicates = [UseAVX] in { defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, ssmem, sse_load_f32, "cvtss2si", SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG; defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, ssmem, sse_load_f32, "cvtss2si", SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG; - +} defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, ssmem, sse_load_f32, "cvtss2si", SSE_CVT_SS2SI_32>, XS; @@ -1707,6 +1713,7 @@ defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem, SSEPackedSingle, SSE_CVT_PS>, TB, Requires<[UseSSE2]>; +let Predicates = [UseAVX] in { def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", (VCVTSS2SIrr GR32:$dst, VR128:$src), 0>; def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", @@ -1723,6 +1730,7 @@ def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", (VCVTSD2SI64rr GR64:$dst, VR128:$src), 0>; def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", (VCVTSD2SI64rm GR64:$dst, sdmem:$src), 0>; +} def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", (CVTSS2SIrr GR32:$dst, VR128:$src), 0>; @@ -1744,7 +1752,7 @@ def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", /// SSE 2 Only // Convert scalar double to scalar single -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, Predicates = [UseAVX] in { def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src1, FR64:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], @@ -1760,7 +1768,7 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), } def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, - Requires<[HasAVX]>; + Requires<[UseAVX]>; def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", @@ -1778,14 +1786,14 @@ def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg, "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>, + IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[UseAVX]>, Sched<[WriteCvtF2F]>; def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, sse_load_f64:$src2))], - IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>, + IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[UseAVX]>, Sched<[WriteCvtF2FLd, ReadAfterLd]>; let Constraints = "$src1 = $dst" in { @@ -1807,7 +1815,7 @@ def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg, // Convert scalar single to scalar double // SSE2 instructions with XS prefix -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, Predicates = [UseAVX] in { def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src1, FR32:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -1824,16 +1832,16 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), } def : Pat<(f64 (fextend FR32:$src)), - (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[HasAVX]>; + (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[UseAVX]>; def : Pat<(fextend (loadf32 addr:$src)), - (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX]>; + (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX]>; def : Pat<(extloadf32 addr:$src), (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, - Requires<[HasAVX, OptForSize]>; + Requires<[UseAVX, OptForSize]>; def : Pat<(extloadf32 addr:$src), (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>, - Requires<[HasAVX, OptForSpeed]>; + Requires<[UseAVX, OptForSpeed]>; def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", @@ -1861,14 +1869,14 @@ def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>, + IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[UseAVX]>, Sched<[WriteCvtF2F]>; def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], - IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>, + IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[UseAVX]>, Sched<[WriteCvtF2FLd, ReadAfterLd]>; let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, -- cgit v1.1 From 0b90c6223d9c49b5e0dc4bf4e53796b0714d7b80 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Tue, 27 Aug 2013 17:38:16 +0000 Subject: [ARMv8] Add MC support for the new load/store acquire/release instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189388 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrFormats.td | 54 +++++++++-- lib/Target/ARM/ARMInstrInfo.td | 46 ++++++++- lib/Target/ARM/ARMInstrThumb2.td | 135 +++++++++++++++++++++++--- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 5 +- lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp | 5 +- 5 files changed, 218 insertions(+), 27 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 6d4de3d..e505e1a 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -500,8 +500,7 @@ class JTI; -// Atomic load/store instructions -class AIldrex opcod, dag oops, dag iops, InstrItinClass itin, +class AIldr_ex_or_acq opcod, bits<2> opcod2, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> : I { @@ -512,23 +511,52 @@ class AIldrex opcod, dag oops, dag iops, InstrItinClass itin, let Inst{20} = 1; let Inst{19-16} = addr; let Inst{15-12} = Rt; - let Inst{11-0} = 0b111110011111; + let Inst{11-10} = 0b11; + let Inst{9-8} = opcod2; + let Inst{7-0} = 0b10011111; } -class AIstrex opcod, dag oops, dag iops, InstrItinClass itin, +class AIstr_ex_or_rel opcod, bits<2> opcod2, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> : I { - bits<4> Rd; bits<4> Rt; bits<4> addr; let Inst{27-23} = 0b00011; let Inst{22-21} = opcod; let Inst{20} = 0; let Inst{19-16} = addr; - let Inst{15-12} = Rd; - let Inst{11-4} = 0b11111001; + let Inst{11-10} = 0b11; + let Inst{9-8} = opcod2; + let Inst{7-4} = 0b1001; let Inst{3-0} = Rt; } +// Atomic load/store instructions +class AIldrex opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : AIldr_ex_or_acq; + +class AIstrex opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : AIstr_ex_or_rel { + bits<4> Rd; + let Inst{15-12} = Rd; +} + +// Exclusive load/store instructions + +class AIldaex opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : AIldr_ex_or_acq, + Requires<[IsARM, HasV8]>; + +class AIstlex opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : AIstr_ex_or_rel, + Requires<[IsARM, HasV8]> { + bits<4> Rd; + let Inst{15-12} = Rd; +} + class AIswp pattern> : AI { bits<4> Rt; @@ -545,6 +573,18 @@ class AIswp pattern> let Unpredictable{11-8} = 0b1111; let DecoderMethod = "DecodeSwap"; } +// Acquire/Release load/store instructions +class AIldracq opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : AIldr_ex_or_acq, + Requires<[IsARM, HasV8]>; + +class AIstrrel opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : AIstr_ex_or_rel, + Requires<[IsARM, HasV8]> { + let Inst{15-12} = 0b1111; +} // addrmode1 instructions class AI1 opcod, dag oops, dag iops, Format f, InstrItinClass itin, diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index dc9a6d2..201b640 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -2295,6 +2295,13 @@ def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2), []>, Requires<[IsARM, HasV5TE]>; } +def LDA : AIldracq<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "lda", "\t$Rt, $addr", []>; +def LDAB : AIldracq<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "ldab", "\t$Rt, $addr", []>; +def LDAH : AIldracq<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "ldah", "\t$Rt, $addr", []>; + // Indexed loads multiclass AI2_ldridx { @@ -2837,6 +2844,12 @@ multiclass AI3strT op, string opc> { defm STRHT : AI3strT<0b1011, "strht">; +def STL : AIstrrel<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "stl", "\t$Rt, $addr", []>; +def STLB : AIstrrel<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "stlb", "\t$Rt, $addr", []>; +def STLH : AIstrrel<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "stlh", "\t$Rt, $addr", []>; //===----------------------------------------------------------------------===// // Load / store multiple Instructions. @@ -4423,8 +4436,7 @@ def strex_4 : PatFrag<(ops node:$val, node:$ptr), let mayLoad = 1 in { def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr), - NoItinerary, - "ldrexb", "\t$Rt, $addr", + NoItinerary, "ldrexb", "\t$Rt, $addr", [(set GPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>; def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr), NoItinerary, "ldrexh", "\t$Rt, $addr", @@ -4433,10 +4445,22 @@ def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), NoItinerary, "ldrex", "\t$Rt, $addr", [(set GPR:$Rt, (ldrex_4 addr_offset_none:$addr))]>; let hasExtraDefRegAllocReq = 1 in -def LDREXD: AIldrex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr), +def LDREXD : AIldrex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr), NoItinerary, "ldrexd", "\t$Rt, $addr", []> { let DecoderMethod = "DecodeDoubleRegLoad"; } + +def LDAEXB : AIldaex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "ldaexb", "\t$Rt, $addr", []>; +def LDAEXH : AIldaex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "ldaexh", "\t$Rt, $addr", []>; +def LDAEX : AIldaex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "ldaex", "\t$Rt, $addr", []>; +let hasExtraDefRegAllocReq = 1 in +def LDAEXD : AIldaex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr), + NoItinerary, "ldaexd", "\t$Rt, $addr", []> { + let DecoderMethod = "DecodeDoubleRegLoad"; +} } let mayStore = 1, Constraints = "@earlyclobber $Rd" in { @@ -4455,8 +4479,22 @@ def STREXD : AIstrex<0b01, (outs GPR:$Rd), NoItinerary, "strexd", "\t$Rd, $Rt, $addr", []> { let DecoderMethod = "DecodeDoubleRegStore"; } +def STLEXB: AIstlex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), + NoItinerary, "stlexb", "\t$Rd, $Rt, $addr", + []>; +def STLEXH: AIstlex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), + NoItinerary, "stlexh", "\t$Rd, $Rt, $addr", + []>; +def STLEX : AIstlex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), + NoItinerary, "stlex", "\t$Rd, $Rt, $addr", + []>; +let hasExtraSrcRegAllocReq = 1 in +def STLEXD : AIstlex<0b01, (outs GPR:$Rd), + (ins GPRPairOp:$Rt, addr_offset_none:$addr), + NoItinerary, "stlexd", "\t$Rd, $Rt, $addr", []> { + let DecoderMethod = "DecodeDoubleRegStore"; +} } - def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", [(int_arm_clrex)]>, diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index f3464cc..c8c9c9b 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1396,6 +1396,28 @@ def t2LDRHT : T2IldT<0, 0b01, "ldrht", IIC_iLoad_bh_i>; def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt", IIC_iLoad_bh_i>; def t2LDRSHT : T2IldT<1, 0b01, "ldrsht", IIC_iLoad_bh_i>; +class T2Ildacq bits23_20, bits<2> bit54, dag oops, dag iops, string opc, string asm, list pattern> + : Thumb2I { + bits<4> Rt; + bits<4> addr; + + let Inst{31-27} = 0b11101; + let Inst{26-24} = 0b000; + let Inst{23-20} = bits23_20; + let Inst{11-6} = 0b111110; + let Inst{5-4} = bit54; + let Inst{3-0} = 0b1111; + + // Encode instruction operands + let Inst{19-16} = addr; + let Inst{15-12} = Rt; +} + +def t2LDA : T2Ildacq<0b1101, 0b10, (outs rGPR:$Rt), (ins addr_offset_none:$addr), "lda", "\t$Rt, $addr", []>; +def t2LDAB : T2Ildacq<0b1101, 0b00, (outs rGPR:$Rt), (ins addr_offset_none:$addr), "ldab", "\t$Rt, $addr", []>; +def t2LDAH : T2Ildacq<0b1101, 0b01, (outs rGPR:$Rt), (ins addr_offset_none:$addr), "ldah", "\t$Rt, $addr", []>; + + // Store defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si, GPR, BinOpFrag<(store node:$LHS, node:$RHS)>>; @@ -1539,6 +1561,29 @@ def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb), IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr$imm", "$addr.base = $wb", []>; +class T2Istrrel bit54, dag oops, dag iops, string opc, string asm, list pattern> + : Thumb2I { + bits<4> Rt; + bits<4> addr; + + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0001100; + let Inst{11-6} = 0b111110; + let Inst{5-4} = bit54; + let Inst{3-0} = 0b1111; + + // Encode instruction operands + let Inst{19-16} = addr; + let Inst{15-12} = Rt; +} + +def t2STL : T2Istrrel<0b10, (outs rGPR:$Rt), (ins addr_offset_none:$addr), + "stl", "\t$Rt, $addr", []>; +def t2STLB : T2Istrrel<0b00, (outs rGPR:$Rt), (ins addr_offset_none:$addr), + "stlb", "\t$Rt, $addr", []>; +def t2STLH : T2Istrrel<0b01, (outs rGPR:$Rt), (ins addr_offset_none:$addr), + "stlh", "\t$Rt, $addr", []>; + // T2Ipl (Preload Data/Instruction) signals the memory system of possible future // data/instruction access. // instr_write is inverted for Thumb mode: (prefetch 3) -> (preload 0), @@ -3126,15 +3171,14 @@ def t2ISB : T2I<(outs), (ins instsyncb_opt:$opt), NoItinerary, let Inst{3-0} = opt; } -class T2I_ldrex opcod, dag oops, dag iops, AddrMode am, int sz, +class T2I_ldrex opcod, dag oops, dag iops, AddrMode am, int sz, InstrItinClass itin, string opc, string asm, string cstr, list pattern, bits<4> rt2 = 0b1111> : Thumb2I { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0001101; let Inst{11-8} = rt2; - let Inst{7-6} = 0b01; - let Inst{5-4} = opcod; + let Inst{7-4} = opcod; let Inst{3-0} = 0b1111; bits<4> addr; @@ -3142,15 +3186,14 @@ class T2I_ldrex opcod, dag oops, dag iops, AddrMode am, int sz, let Inst{19-16} = addr; let Inst{15-12} = Rt; } -class T2I_strex opcod, dag oops, dag iops, AddrMode am, int sz, +class T2I_strex opcod, dag oops, dag iops, AddrMode am, int sz, InstrItinClass itin, string opc, string asm, string cstr, list pattern, bits<4> rt2 = 0b1111> : Thumb2I { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0001100; let Inst{11-8} = rt2; - let Inst{7-6} = 0b01; - let Inst{5-4} = opcod; + let Inst{7-4} = opcod; bits<4> Rd; bits<4> addr; @@ -3161,11 +3204,11 @@ class T2I_strex opcod, dag oops, dag iops, AddrMode am, int sz, } let mayLoad = 1 in { -def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins addr_offset_none:$addr), +def t2LDREXB : T2I_ldrex<0b0100, (outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldrexb", "\t$Rt, $addr", "", [(set rGPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>; -def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins addr_offset_none:$addr), +def t2LDREXH : T2I_ldrex<0b0101, (outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldrexh", "\t$Rt, $addr", "", [(set rGPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>; @@ -3183,7 +3226,7 @@ def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_imm0_1020s4:$addr), let Inst{7-0} = addr{7-0}; } let hasExtraDefRegAllocReq = 1 in -def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), +def t2LDREXD : T2I_ldrex<0b0111, (outs rGPR:$Rt, rGPR:$Rt2), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", "", @@ -3191,16 +3234,48 @@ def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), bits<4> Rt2; let Inst{11-8} = Rt2; } +def t2LDAEXB : T2I_ldrex<0b1100, (outs rGPR:$Rt), (ins addr_offset_none:$addr), + AddrModeNone, 4, NoItinerary, + "ldaexb", "\t$Rt, $addr", "", + []>, Requires<[IsThumb, HasV8]>; +def t2LDAEXH : T2I_ldrex<0b1101, (outs rGPR:$Rt), (ins addr_offset_none:$addr), + AddrModeNone, 4, NoItinerary, + "ldaexh", "\t$Rt, $addr", "", + []>, Requires<[IsThumb, HasV8]>; +def t2LDAEX : Thumb2I<(outs rGPR:$Rt), (ins addr_offset_none:$addr), + AddrModeNone, 4, NoItinerary, + "ldaex", "\t$Rt, $addr", "", + []>, Requires<[IsThumb, HasV8]> { + bits<4> Rt; + bits<4> addr; + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0001101; + let Inst{19-16} = addr; + let Inst{15-12} = Rt; + let Inst{11-8} = 0b1111; + let Inst{7-0} = 0b11101111; +} +let hasExtraDefRegAllocReq = 1 in +def t2LDAEXD : T2I_ldrex<0b1111, (outs rGPR:$Rt, rGPR:$Rt2), + (ins addr_offset_none:$addr), + AddrModeNone, 4, NoItinerary, + "ldaexd", "\t$Rt, $Rt2, $addr", "", + [], {?, ?, ?, ?}>, Requires<[HasV8]> { + bits<4> Rt2; + let Inst{11-8} = Rt2; + + let Inst{7} = 1; +} } let mayStore = 1, Constraints = "@earlyclobber $Rd" in { -def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd), +def t2STREXB : T2I_strex<0b0100, (outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "strexb", "\t$Rd, $Rt, $addr", "", [(set rGPR:$Rd, (strex_1 rGPR:$Rt, addr_offset_none:$addr))]>; -def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd), +def t2STREXH : T2I_strex<0b0101, (outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "strexh", "\t$Rd, $Rt, $addr", "", @@ -3224,7 +3299,7 @@ def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, let Inst{7-0} = addr{7-0}; } let hasExtraSrcRegAllocReq = 1 in -def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd), +def t2STREXD : T2I_strex<0b0111, (outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [], @@ -3232,6 +3307,42 @@ def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd), bits<4> Rt2; let Inst{11-8} = Rt2; } +def t2STLEXB : T2I_strex<0b1100, (outs rGPR:$Rd), + (ins rGPR:$Rt, addr_offset_none:$addr), + AddrModeNone, 4, NoItinerary, + "stlexb", "\t$Rd, $Rt, $addr", "", + []>, Requires<[IsThumb, HasV8]>; + +def t2STLEXH : T2I_strex<0b1101, (outs rGPR:$Rd), + (ins rGPR:$Rt, addr_offset_none:$addr), + AddrModeNone, 4, NoItinerary, + "stlexh", "\t$Rd, $Rt, $addr", "", + []>, Requires<[IsThumb, HasV8]>; + +def t2STLEX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, + addr_offset_none:$addr), + AddrModeNone, 4, NoItinerary, + "stlex", "\t$Rd, $Rt, $addr", "", + []>, Requires<[IsThumb, HasV8]> { + bits<4> Rd; + bits<4> Rt; + bits<4> addr; + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0001100; + let Inst{19-16} = addr; + let Inst{15-12} = Rt; + let Inst{11-4} = 0b11111110; + let Inst{3-0} = Rd; +} +let hasExtraSrcRegAllocReq = 1 in +def t2STLEXD : T2I_strex<0b1111, (outs rGPR:$Rd), + (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr), + AddrModeNone, 4, NoItinerary, + "stlexd", "\t$Rd, $Rt, $Rt2, $addr", "", [], + {?, ?, ?, ?}>, Requires<[IsThumb, HasV8]> { + bits<4> Rt2; + let Inst{11-8} = Rt2; +} } def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", [(int_arm_clrex)]>, diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 7467071..b0037f0 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -5201,8 +5201,9 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // expressed as a GPRPair, so we have to manually merge them. // FIXME: We would really like to be able to tablegen'erate this. if (!isThumb() && Operands.size() > 4 && - (Mnemonic == "ldrexd" || Mnemonic == "strexd")) { - bool isLoad = (Mnemonic == "ldrexd"); + (Mnemonic == "ldrexd" || Mnemonic == "strexd" || Mnemonic == "ldaexd" || + Mnemonic == "stlexd")) { + bool isLoad = (Mnemonic == "ldrexd" || Mnemonic == "ldaexd"); unsigned Idx = isLoad ? 2 : 3; ARMOperand* Op1 = static_cast(Operands[Idx]); ARMOperand* Op2 = static_cast(Operands[Idx+1]); diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 97da232..8b99c17 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -249,9 +249,10 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, // GPRs. However, when decoding them, the two GRPs cannot be automatically // expressed as a GPRPair, so we have to manually merge them. // FIXME: We would really like to be able to tablegen'erate this. - if (Opcode == ARM::LDREXD || Opcode == ARM::STREXD) { + if (Opcode == ARM::LDREXD || Opcode == ARM::STREXD || + Opcode == ARM::LDAEXD || Opcode == ARM::STLEXD) { const MCRegisterClass& MRC = MRI.getRegClass(ARM::GPRRegClassID); - bool isStore = Opcode == ARM::STREXD; + bool isStore = Opcode == ARM::STREXD || Opcode == ARM::STLEXD; unsigned Reg = MI->getOperand(isStore ? 1 : 0).getReg(); if (MRC.contains(Reg)) { MCInst NewMI; -- cgit v1.1 From b0bb2d6636ed7b1d089a37b3cf7913d06bb49f37 Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Tue, 27 Aug 2013 19:45:28 +0000 Subject: Changed comment git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189396 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index aa927bd..02e93b8 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -190,7 +190,7 @@ static void LowerDextDins(MCInst& InstIn) { } /// EncodeInstruction - Emit the instruction. -/// Size the instruction (currently only 4 bytes +/// Size the instruction with Desc.getSize(). void MipsMCCodeEmitter:: EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups) const -- cgit v1.1 From 66b7139b1be1ddce410d97499d5831231c6be267 Mon Sep 17 00:00:00 2001 From: Joerg Sonnenberger Date: Tue, 27 Aug 2013 20:23:19 +0000 Subject: Given target assembler parsers a chance to handle variant expressions first. Use this to turn the PPC modifiers into PPC specific expressions, allowing them to work on constants. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189400 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 27 +++++++++++++++++++++++++++ lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp | 7 ++++--- 2 files changed, 31 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index a8f7509..bfa9bb7 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -235,6 +235,10 @@ public: virtual bool ParseDirective(AsmToken DirectiveID); unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, unsigned Kind); + + virtual const MCExpr *applyModifierToExpr(const MCExpr *E, + MCSymbolRefExpr::VariantKind, + MCContext &Ctx); }; /// PPCOperand - Instances of this class represent a parsed PowerPC machine @@ -1363,3 +1367,26 @@ unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp, return Match_InvalidOperand; } +const MCExpr * +PPCAsmParser::applyModifierToExpr(const MCExpr *E, + MCSymbolRefExpr::VariantKind Variant, + MCContext &Ctx) { + switch (Variant) { + case MCSymbolRefExpr::VK_PPC_LO: + return PPCMCExpr::Create(PPCMCExpr::VK_PPC_LO, E, false, Ctx); + case MCSymbolRefExpr::VK_PPC_HI: + return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HI, E, false, Ctx); + case MCSymbolRefExpr::VK_PPC_HA: + return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HA, E, false, Ctx); + case MCSymbolRefExpr::VK_PPC_HIGHER: + return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHER, E, false, Ctx); + case MCSymbolRefExpr::VK_PPC_HIGHERA: + return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHERA, E, false, Ctx); + case MCSymbolRefExpr::VK_PPC_HIGHEST: + return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHEST, E, false, Ctx); + case MCSymbolRefExpr::VK_PPC_HIGHESTA: + return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHESTA, E, false, Ctx); + default: + return 0; + } +} diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp index 9529267..633cd36 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp @@ -54,7 +54,7 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout) const { MCValue Value; - if (!getSubExpr()->EvaluateAsRelocatable(Value, *Layout)) + if (Layout && !getSubExpr()->EvaluateAsRelocatable(Value, *Layout)) return false; if (Value.isAbsolute()) { @@ -85,7 +85,7 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, break; } Res = MCValue::get(Result); - } else { + } else if (Layout) { MCContext &Context = Layout->getAssembler().getContext(); const MCSymbolRefExpr *Sym = Value.getSymA(); MCSymbolRefExpr::VariantKind Modifier = Sym->getKind(); @@ -118,7 +118,8 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, } Sym = MCSymbolRefExpr::Create(&Sym->getSymbol(), Modifier, Context); Res = MCValue::get(Sym, Value.getSymB(), Value.getConstant()); - } + } else + return false; return true; } -- cgit v1.1 From 7cde9d0286a8976feebb20e61612fb999527f630 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Tue, 27 Aug 2013 21:56:17 +0000 Subject: [ms-inline asm] Support offsets after segment registers Summary: MASM let's you do stuff like 'MOV FS:20, EAX' and 'MOV EAX, FS:20' Reviewers: craig.topper, rnk Reviewed By: rnk CC: llvm-commits Differential Revision: http://llvm-reviews.chandlerc.com/D1470 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189407 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmParser/X86AsmParser.cpp | 110 +++++++++++++++++++----------- 1 file changed, 70 insertions(+), 40 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index d5d9c52..7eb42c4 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -497,8 +497,9 @@ private: X86Operand *ParseIntelOffsetOfOperator(); X86Operand *ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp); X86Operand *ParseIntelOperator(unsigned OpKind); - X86Operand *ParseIntelMemOperand(unsigned SegReg, int64_t ImmDisp, - SMLoc StartLoc); + X86Operand *ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size); + X86Operand *ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, + unsigned Size); X86Operand *ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End); X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc Start, int64_t ImmDisp, unsigned Size); @@ -1405,46 +1406,67 @@ X86Operand *X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val, return 0; } -/// ParseIntelMemOperand - Parse intel style memory operand. -X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, - int64_t ImmDisp, - SMLoc Start) { - const AsmToken &Tok = Parser.getTok(); - SMLoc End; - - unsigned Size = getIntelMemOperandSize(Tok.getString()); - if (Size) { - Parser.Lex(); // Eat operand size (e.g., byte, word). - if (Tok.getString() != "PTR" && Tok.getString() != "ptr") - return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!"); - Parser.Lex(); // Eat ptr. - } - - // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ]. +/// \brief Parse intel style segment override. +X86Operand *X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, + SMLoc Start, + unsigned Size) { + assert(SegReg != 0 && "Tried to parse a segment override without a segment!"); + const AsmToken &Tok = Parser.getTok(); // Eat colon. + if (Tok.isNot(AsmToken::Colon)) + return ErrorOperand(Tok.getLoc(), "Expected ':' token!"); + Parser.Lex(); // Eat ':' + + int64_t ImmDisp = 0; if (getLexer().is(AsmToken::Integer)) { + ImmDisp = Tok.getIntVal(); + AsmToken ImmDispToken = Parser.Lex(); // Eat the integer. + if (isParsingInlineAsm()) - InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, - Tok.getLoc())); - int64_t ImmDisp = Tok.getIntVal(); - Parser.Lex(); // Eat the integer. - if (getLexer().isNot(AsmToken::LBrac)) - return ErrorOperand(Start, "Expected '[' token!"); - return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size); + InstInfo->AsmRewrites->push_back( + AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc())); + + if (getLexer().isNot(AsmToken::LBrac)) { + // An immediate following a 'segment register', 'colon' token sequence can + // be followed by a bracketed expression. If it isn't we know we have our + // final segment override. + const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext()); + return X86Operand::CreateMem(SegReg, Disp, /*BaseReg=*/0, /*IndexReg=*/0, + /*Scale=*/1, Start, ImmDispToken.getEndLoc(), + Size); + } } if (getLexer().is(AsmToken::LBrac)) return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size); - if (!ParseRegister(SegReg, Start, End)) { - // Handel SegReg : [ ... ] - if (getLexer().isNot(AsmToken::Colon)) - return ErrorOperand(Start, "Expected ':' token!"); - Parser.Lex(); // Eat : - if (getLexer().isNot(AsmToken::LBrac)) - return ErrorOperand(Start, "Expected '[' token!"); - return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size); + const MCExpr *Val; + SMLoc End; + if (!isParsingInlineAsm()) { + if (getParser().parsePrimaryExpr(Val, End)) + return ErrorOperand(Tok.getLoc(), "Unexpected token!"); + + return X86Operand::CreateMem(Val, Start, End, Size); } + InlineAsmIdentifierInfo Info; + StringRef Identifier = Tok.getString(); + if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info, + /*Unevaluated*/ false, End)) + return Err; + return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0, + /*Scale=*/1, Start, End, Size, Identifier, Info); +} + +/// ParseIntelMemOperand - Parse intel style memory operand. +X86Operand *X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp, SMLoc Start, + unsigned Size) { + const AsmToken &Tok = Parser.getTok(); + SMLoc End; + + // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ]. + if (getLexer().is(AsmToken::LBrac)) + return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size); + const MCExpr *Val; if (!isParsingInlineAsm()) { if (getParser().parsePrimaryExpr(Val, End)) @@ -1458,7 +1480,7 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info, /*Unevaluated*/ false, End)) return Err; - return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0, + return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0, /*Scale=*/1, Start, End, Size, Identifier, Info); } @@ -1574,7 +1596,7 @@ X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) { X86Operand *X86AsmParser::ParseIntelOperand() { const AsmToken &Tok = Parser.getTok(); - SMLoc Start = Tok.getLoc(), End; + SMLoc Start, End; // Offset, length, type and size operators. if (isParsingInlineAsm()) { @@ -1589,6 +1611,15 @@ X86Operand *X86AsmParser::ParseIntelOperand() { return ParseIntelOperator(IOK_TYPE); } + unsigned Size = getIntelMemOperandSize(Tok.getString()); + if (Size) { + Parser.Lex(); // Eat operand size (e.g., byte, word). + if (Tok.getString() != "PTR" && Tok.getString() != "ptr") + return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!"); + Parser.Lex(); // Eat ptr. + } + Start = Tok.getLoc(); + // Immediate. if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) || getLexer().is(AsmToken::LParen)) { @@ -1620,23 +1651,22 @@ X86Operand *X86AsmParser::ParseIntelOperand() { "before bracketed expr."); // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ]. - return ParseIntelMemOperand(/*SegReg=*/0, Imm, Start); + return ParseIntelMemOperand(Imm, Start, Size); } // Register. unsigned RegNo = 0; if (!ParseRegister(RegNo, Start, End)) { // If this is a segment register followed by a ':', then this is the start - // of a memory reference, otherwise this is a normal register reference. + // of a segment override, otherwise this is a normal register reference. if (getLexer().isNot(AsmToken::Colon)) return X86Operand::CreateReg(RegNo, Start, End); - getParser().Lex(); // Eat the colon. - return ParseIntelMemOperand(/*SegReg=*/RegNo, /*Disp=*/0, Start); + return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size); } // Memory operand. - return ParseIntelMemOperand(/*SegReg=*/0, /*Disp=*/0, Start); + return ParseIntelMemOperand(/*Disp=*/0, Start, Size); } X86Operand *X86AsmParser::ParseATTOperand() { -- cgit v1.1 From 6d55dfaf9124c3e5e54190090478d2f6384a51d4 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Wed, 28 Aug 2013 00:34:17 +0000 Subject: [mips] Set isAllocatable and CoveredBySubRegs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189430 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsRegisterInfo.td | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index 022a9c0..22c4890 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -60,6 +60,7 @@ class AFPR Enc, string n, list subregs> class AFPR64 Enc, string n, list subregs> : MipsRegWithSubRegs { let SubRegIndices = [sub_lo, sub_hi]; + let CoveredBySubRegs = 1; } // Mips 128-bit (aliased) MSA Registers @@ -294,7 +295,8 @@ def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>, Unallocatable; // * FGR32 - 32 32-bit registers (single float only mode) def FGR32 : RegisterClass<"Mips", [f32], 32, (sequence "F%u", 0, 31)>; -def FGRH32 : RegisterClass<"Mips", [f32], 32, (sequence "F_HI%u", 0, 31)>; +def FGRH32 : RegisterClass<"Mips", [f32], 32, (sequence "F_HI%u", 0, 31)>, + Unallocatable; def AFGR64 : RegisterClass<"Mips", [f64], 64, (add // Return Values and Arguments -- cgit v1.1 From bf19dba2d4c7927832d3037c15e0101afb730415 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Wed, 28 Aug 2013 00:42:50 +0000 Subject: [mips] Clean up definitions of move word from/to coprocessor instructions. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189431 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips64InstrInfo.td | 30 +++++++++--------------------- lib/Target/Mips/MipsInstrInfo.td | 36 +++++++++++------------------------- 2 files changed, 20 insertions(+), 46 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 11efdb7..baf9c1b 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -320,28 +320,16 @@ def : InstAlias<"dadd $rs, $rt, $imm", 0>; /// Move between CPU and coprocessor registers -let DecoderNamespace = "Mips64" in { -def DMFC0_3OP64 : MFC3OP<(outs GPR64Opnd:$rt), - (ins GPR64Opnd:$rd, uimm16:$sel), - "dmfc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 1>; -def DMTC0_3OP64 : MFC3OP<(outs GPR64Opnd:$rd, uimm16:$sel), - (ins GPR64Opnd:$rt), - "dmtc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 5>; -def DMFC2_3OP64 : MFC3OP<(outs GPR64Opnd:$rt), - (ins GPR64Opnd:$rd, uimm16:$sel), - "dmfc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 1>; -def DMTC2_3OP64 : MFC3OP<(outs GPR64Opnd:$rd, uimm16:$sel), - (ins GPR64Opnd:$rt), - "dmtc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 5>; +let DecoderNamespace = "Mips64", Predicates = [HasMips64] in { +def DMFC0 : MFC3OP<"dmfc0", GPR64Opnd>, MFC3OP_FM<0x10, 1>; +def DMTC0 : MFC3OP<"dmtc0", GPR64Opnd>, MFC3OP_FM<0x10, 5>; +def DMFC2 : MFC3OP<"dmfc2", GPR64Opnd>, MFC3OP_FM<0x12, 1>; +def DMTC2 : MFC3OP<"dmtc2", GPR64Opnd>, MFC3OP_FM<0x12, 5>; } // Two operand (implicit 0 selector) versions: -def : InstAlias<"dmfc0 $rt, $rd", - (DMFC0_3OP64 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>; -def : InstAlias<"dmtc0 $rt, $rd", - (DMTC0_3OP64 GPR64Opnd:$rd, 0, GPR64Opnd:$rt), 0>; -def : InstAlias<"dmfc2 $rt, $rd", - (DMFC2_3OP64 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>; -def : InstAlias<"dmtc2 $rt, $rd", - (DMTC2_3OP64 GPR64Opnd:$rd, 0, GPR64Opnd:$rt), 0>; +def : InstAlias<"dmfc0 $rt, $rd", (DMFC0 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>; +def : InstAlias<"dmtc0 $rt, $rd", (DMTC0 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>; +def : InstAlias<"dmfc2 $rt, $rd", (DMFC2 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>; +def : InstAlias<"dmtc2 $rt, $rd", (DMTC2 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index a391a89..714c7a8 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -773,8 +773,9 @@ class SCBase : let Constraints = "$rt = $dst"; } -class MFC3OP : - InstSE; +class MFC3OP : + InstSE<(outs RO:$rt, RO:$rd, uimm16:$sel), (ins), + !strconcat(asmstr, "\t$rt, $rd, $sel"), [], NoItinerary, FrmFR>; let isBarrier = 1, isTerminator = 1, isCodeGenOnly = 1 in def TRAP : InstSE<(outs), (ins), "break", [(trap)], NoItinerary, FrmOther> { @@ -1046,21 +1047,10 @@ def EXT : ExtBase<"ext", GPR32Opnd>, EXT_FM<0>; def INS : InsBase<"ins", GPR32Opnd>, EXT_FM<4>; /// Move Control Registers From/To CPU Registers -def MFC0_3OP : MFC3OP<(outs GPR32Opnd:$rt), - (ins GPR32Opnd:$rd, uimm16:$sel), - "mfc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 0>; - -def MTC0_3OP : MFC3OP<(outs GPR32Opnd:$rd, uimm16:$sel), - (ins GPR32Opnd:$rt), - "mtc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 4>; - -def MFC2_3OP : MFC3OP<(outs GPR32Opnd:$rt), - (ins GPR32Opnd:$rd, uimm16:$sel), - "mfc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 0>; - -def MTC2_3OP : MFC3OP<(outs GPR32Opnd:$rd, uimm16:$sel), - (ins GPR32Opnd:$rt), - "mtc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 4>; +def MFC0 : MFC3OP<"mfc0", GPR32Opnd>, MFC3OP_FM<0x10, 0>; +def MTC0 : MFC3OP<"mtc0", GPR32Opnd>, MFC3OP_FM<0x10, 4>; +def MFC2 : MFC3OP<"mfc2", GPR32Opnd>, MFC3OP_FM<0x12, 0>; +def MTC2 : MFC3OP<"mtc2", GPR32Opnd>, MFC3OP_FM<0x12, 4>; //===----------------------------------------------------------------------===// // Instruction aliases @@ -1092,14 +1082,10 @@ def : InstAlias<"xor $rs, $rt, $imm", def : InstAlias<"or $rs, $rt, $imm", (ORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>; def : InstAlias<"nop", (SLL ZERO, ZERO, 0), 1>; -def : InstAlias<"mfc0 $rt, $rd", - (MFC0_3OP GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>; -def : InstAlias<"mtc0 $rt, $rd", - (MTC0_3OP GPR32Opnd:$rd, 0, GPR32Opnd:$rt), 0>; -def : InstAlias<"mfc2 $rt, $rd", - (MFC2_3OP GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>; -def : InstAlias<"mtc2 $rt, $rd", - (MTC2_3OP GPR32Opnd:$rd, 0, GPR32Opnd:$rt), 0>; +def : InstAlias<"mfc0 $rt, $rd", (MFC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>; +def : InstAlias<"mtc0 $rt, $rd", (MTC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>; +def : InstAlias<"mfc2 $rt, $rd", (MFC2 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>; +def : InstAlias<"mtc2 $rt, $rd", (MTC2 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>; def : InstAlias<"bnez $rs,$offset", (BNE GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>; def : InstAlias<"beqz $rs,$offset", -- cgit v1.1 From a796d90c0ed7ebd5d58fced43c60afc2e9bf6225 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Wed, 28 Aug 2013 00:55:15 +0000 Subject: [mips] Use ptr_rc to simplify definitions of base+index load/store instructions. Also, fix predicates. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189432 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 93 ++++++++++++++++++++++- lib/Target/Mips/Disassembler/MipsDisassembler.cpp | 21 ++++- lib/Target/Mips/MipsDSPInstrInfo.td | 5 +- lib/Target/Mips/MipsInstrFPU.td | 66 ++++++---------- lib/Target/Mips/MipsInstrInfo.td | 7 ++ 5 files changed, 143 insertions(+), 49 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index e92d58a..7214cb5 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -88,6 +88,11 @@ class MipsAsmParser : public MCTargetAsmParser { MipsAsmParser::OperandMatchResultTy parseMemOperand(SmallVectorImpl &Operands); + bool parsePtrReg(SmallVectorImpl &Operands, int RegKind); + + MipsAsmParser::OperandMatchResultTy + parsePtrReg(SmallVectorImpl &Operands); + MipsAsmParser::OperandMatchResultTy parseGPR32(SmallVectorImpl &Operands); @@ -179,6 +184,10 @@ class MipsAsmParser : public MCTargetAsmParser { return (STI.getFeatureBits() & Mips::FeatureFP64Bit) != 0; } + bool isN64() const { + return STI.getFeatureBits() & Mips::FeatureN64; + } + int matchRegisterName(StringRef Symbol, bool is64BitReg); int matchCPURegisterName(StringRef Symbol); @@ -245,6 +254,7 @@ private: k_Memory, k_PostIndexRegister, k_Register, + k_PtrReg, k_Token } Kind; @@ -284,6 +294,11 @@ public: Inst.addOperand(MCOperand::CreateReg(getReg())); } + void addPtrRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(getPtrReg())); + } + void addExpr(MCInst &Inst, const MCExpr *Expr) const{ // Add as immediate when possible. Null MCExpr = 0. if (Expr == 0) @@ -313,6 +328,7 @@ public: bool isImm() const { return Kind == k_Immediate; } bool isToken() const { return Kind == k_Token; } bool isMem() const { return Kind == k_Memory; } + bool isPtrReg() const { return Kind == k_PtrReg; } StringRef getToken() const { assert(Kind == k_Token && "Invalid access!"); @@ -324,8 +340,13 @@ public: return Reg.RegNum; } + unsigned getPtrReg() const { + assert((Kind == k_PtrReg) && "Invalid access!"); + return Reg.RegNum; + } + void setRegKind(RegisterKind RegKind) { - assert((Kind == k_Register) && "Invalid access!"); + assert((Kind == k_Register || Kind == k_PtrReg) && "Invalid access!"); Reg.Kind = RegKind; } @@ -361,6 +382,14 @@ public: return Op; } + static MipsOperand *CreatePtrReg(unsigned RegNum, SMLoc S, SMLoc E) { + MipsOperand *Op = new MipsOperand(k_PtrReg); + Op->Reg.RegNum = RegNum; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + static MipsOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { MipsOperand *Op = new MipsOperand(k_Immediate); Op->Imm.Val = Val; @@ -1289,6 +1318,68 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand( return MatchOperand_Success; } +bool +MipsAsmParser::parsePtrReg(SmallVectorImpl &Operands, + int RegKind) { + // If the first token is not '$' we have an error. + if (Parser.getTok().isNot(AsmToken::Dollar)) + return false; + + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); + AsmToken::TokenKind TkKind = getLexer().getKind(); + int Reg; + + if (TkKind == AsmToken::Integer) { + Reg = matchRegisterByNumber(Parser.getTok().getIntVal(), + regKindToRegClass(RegKind)); + if (Reg == -1) + return false; + } else if (TkKind == AsmToken::Identifier) { + if ((Reg = matchCPURegisterName(Parser.getTok().getString().lower())) == -1) + return false; + Reg = getReg(regKindToRegClass(RegKind), Reg); + } else { + return false; + } + + MipsOperand *Op = MipsOperand::CreatePtrReg(Reg, S, Parser.getTok().getLoc()); + Op->setRegKind((MipsOperand::RegisterKind)RegKind); + Operands.push_back(Op); + Parser.Lex(); + return true; +} + +MipsAsmParser::OperandMatchResultTy +MipsAsmParser::parsePtrReg(SmallVectorImpl &Operands) { + MipsOperand::RegisterKind RegKind = isN64() ? MipsOperand::Kind_GPR64 : + MipsOperand::Kind_GPR32; + + // Parse index register. + if (!parsePtrReg(Operands, RegKind)) + return MatchOperand_NoMatch; + + // Parse '('. + if (Parser.getTok().isNot(AsmToken::LParen)) + return MatchOperand_NoMatch; + + Operands.push_back(MipsOperand::CreateToken("(", getLexer().getLoc())); + Parser.Lex(); + + // Parse base register. + if (!parsePtrReg(Operands, RegKind)) + return MatchOperand_NoMatch; + + // Parse ')'. + if (Parser.getTok().isNot(AsmToken::RParen)) + return MatchOperand_NoMatch; + + Operands.push_back(MipsOperand::CreateToken(")", getLexer().getLoc())); + Parser.Lex(); + + return MatchOperand_Success; +} + MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseRegs(SmallVectorImpl &Operands, int RegKind) { diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index c6f3bab..6e12a5d 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -35,14 +35,18 @@ public: /// MipsDisassemblerBase(const MCSubtargetInfo &STI, const MCRegisterInfo *Info, bool bigEndian) : - MCDisassembler(STI), RegInfo(Info), isBigEndian(bigEndian) {} + MCDisassembler(STI), RegInfo(Info), + IsN64(STI.getFeatureBits() & Mips::FeatureN64), isBigEndian(bigEndian) {} virtual ~MipsDisassemblerBase() {} const MCRegisterInfo *getRegInfo() const { return RegInfo.get(); } + bool isN64() const { return IsN64; } + private: OwningPtr RegInfo; + bool IsN64; protected: bool isBigEndian; }; @@ -103,6 +107,11 @@ static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodePtrRegisterClass(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + static DecodeStatus DecodeDSPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, @@ -364,6 +373,16 @@ static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodePtrRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (static_cast(Decoder)->isN64()) + return DecodeGPR64RegisterClass(Inst, RegNo, Address, Decoder); + + return DecodeGPR32RegisterClass(Inst, RegNo, Address, Decoder); +} + static DecodeStatus DecodeDSPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td index 53e3389..b3e01b1 100644 --- a/lib/Target/Mips/MipsDSPInstrInfo.td +++ b/lib/Target/Mips/MipsDSPInstrInfo.td @@ -348,10 +348,9 @@ class SHLL_QB_R2_DESC_BASE { dag OutOperandList = (outs GPR32Opnd:$rd); - dag InOperandList = (ins GPR32Opnd:$base, GPR32Opnd:$index); + dag InOperandList = (ins PtrRC:$base, PtrRC:$index); string AsmString = !strconcat(instr_asm, "\t$rd, ${index}(${base})"); - list Pattern = [(set GPR32Opnd:$rd, - (OpNode GPR32Opnd:$base, GPR32Opnd:$index))]; + list Pattern = [(set GPR32Opnd:$rd, (OpNode iPTR:$base, iPTR:$index))]; InstrItinClass Itinerary = itin; bit mayLoad = 1; } diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index ccf3458..536dff6 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -171,19 +171,19 @@ class NMADDS_FT; -class LWXC1_FT : - InstSE<(outs DRC:$fd), (ins PRC:$base, PRC:$index), + InstSE<(outs DRC:$fd), (ins PtrRC:$base, PtrRC:$index), !strconcat(opstr, "\t$fd, ${index}(${base})"), - [(set DRC:$fd, (OpNode (add PRC:$base, PRC:$index)))], Itin, FrmFI> { + [(set DRC:$fd, (OpNode (add iPTR:$base, iPTR:$index)))], Itin, FrmFI> { let AddedComplexity = 20; } -class SWXC1_FT : - InstSE<(outs), (ins DRC:$fs, PRC:$base, PRC:$index), + InstSE<(outs), (ins DRC:$fs, PtrRC:$base, PtrRC:$index), !strconcat(opstr, "\t$fs, ${index}(${base})"), - [(OpNode DRC:$fs, (add PRC:$base, PRC:$index))], Itin, FrmFI> { + [(OpNode DRC:$fs, (add iPTR:$base, iPTR:$index))], Itin, FrmFI> { let AddedComplexity = 20; } @@ -378,52 +378,30 @@ let Predicates = [NotFP64bit, HasStdEnc] in { // Indexed loads and stores. let Predicates = [HasFPIdx, HasStdEnc] in { - def LWXC1 : LWXC1_FT<"lwxc1", FGR32Opnd, GPR32Opnd, IIFLoad, load>, - LWXC1_FM<0>; - def SWXC1 : SWXC1_FT<"swxc1", FGR32Opnd, GPR32Opnd, IIFStore, store>, - SWXC1_FM<8>; + def LWXC1 : LWXC1_FT<"lwxc1", FGR32Opnd, IIFLoad, load>, LWXC1_FM<0>; + def SWXC1 : SWXC1_FT<"swxc1", FGR32Opnd, IIFStore, store>, SWXC1_FM<8>; } -let Predicates = [HasMips32r2, NotMips64, HasStdEnc] in { - def LDXC1 : LWXC1_FT<"ldxc1", AFGR64Opnd, GPR32Opnd, IIFLoad, load>, - LWXC1_FM<1>; - def SDXC1 : SWXC1_FT<"sdxc1", AFGR64Opnd, GPR32Opnd, IIFStore, store>, - SWXC1_FM<9>; +let Predicates = [HasFPIdx, NotFP64bit, HasStdEnc] in { + def LDXC1 : LWXC1_FT<"ldxc1", AFGR64Opnd, IIFLoad, load>, LWXC1_FM<1>; + def SDXC1 : SWXC1_FT<"sdxc1", AFGR64Opnd, IIFStore, store>, SWXC1_FM<9>; } -let Predicates = [HasMips64, NotN64, HasStdEnc], DecoderNamespace="Mips64" in { - def LDXC164 : LWXC1_FT<"ldxc1", FGR64Opnd, GPR32Opnd, IIFLoad, load>, - LWXC1_FM<1>; - def SDXC164 : SWXC1_FT<"sdxc1", FGR64Opnd, GPR32Opnd, IIFStore, store>, - SWXC1_FM<9>; +let Predicates = [HasFPIdx, IsFP64bit, HasStdEnc], + DecoderNamespace="Mips64" in { + def LDXC164 : LWXC1_FT<"ldxc1", FGR64Opnd, IIFLoad, load>, LWXC1_FM<1>; + def SDXC164 : SWXC1_FT<"sdxc1", FGR64Opnd, IIFStore, store>, SWXC1_FM<9>; } -// n64 -let Predicates = [IsN64, HasStdEnc], isCodeGenOnly=1 in { - def LWXC1_P8 : LWXC1_FT<"lwxc1", FGR32Opnd, GPR64Opnd, IIFLoad, load>, - LWXC1_FM<0>; - def LDXC164_P8 : LWXC1_FT<"ldxc1", FGR64Opnd, GPR64Opnd, IIFLoad, - load>, LWXC1_FM<1>; - def SWXC1_P8 : SWXC1_FT<"swxc1", FGR32Opnd, GPR64Opnd, IIFStore, - store>, SWXC1_FM<8>; - def SDXC164_P8 : SWXC1_FT<"sdxc1", FGR64Opnd, GPR64Opnd, IIFStore, - store>, SWXC1_FM<9>; +// Load/store doubleword indexed unaligned. +let Predicates = [NotFP64bit, HasStdEnc] in { + def LUXC1 : LWXC1_FT<"luxc1", AFGR64Opnd, IIFLoad>, LWXC1_FM<0x5>; + def SUXC1 : SWXC1_FT<"suxc1", AFGR64Opnd, IIFStore>, SWXC1_FM<0xd>; } -// Load/store doubleword indexed unaligned. -let Predicates = [NotMips64, HasStdEnc] in { - def LUXC1 : LWXC1_FT<"luxc1", AFGR64Opnd, GPR32Opnd, IIFLoad>, - LWXC1_FM<0x5>; - def SUXC1 : SWXC1_FT<"suxc1", AFGR64Opnd, GPR32Opnd, IIFStore>, - SWXC1_FM<0xd>; -} - -let Predicates = [HasMips64, HasStdEnc], - DecoderNamespace="Mips64" in { - def LUXC164 : LWXC1_FT<"luxc1", FGR64Opnd, GPR32Opnd, IIFLoad>, - LWXC1_FM<0x5>; - def SUXC164 : SWXC1_FT<"suxc1", FGR64Opnd, GPR32Opnd, IIFStore>, - SWXC1_FM<0xd>; +let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace="Mips64" in { + def LUXC164 : LWXC1_FT<"luxc1", FGR64Opnd, IIFLoad>, LWXC1_FM<0x5>; + def SUXC164 : SWXC1_FT<"suxc1", FGR64Opnd, IIFStore>, SWXC1_FM<0xd>; } /// Floating-point Aritmetic diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 714c7a8..5518b8c 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -279,6 +279,11 @@ def MipsMemAsmOperand : AsmOperandClass { let ParserMethod = "parseMemOperand"; } +def PtrRegAsmOperand : AsmOperandClass { + let Name = "PtrReg"; + let ParserMethod = "parsePtrReg"; +} + // Address operand def mem : Operand { let PrintMethod = "printMemOperand"; @@ -297,6 +302,8 @@ def mem_ea : Operand { def PtrRC : Operand { let MIOperandInfo = (ops ptr_rc); + let DecoderMethod = "DecodePtrRegisterClass"; + let ParserMatchClass = PtrRegAsmOperand; } // size operand of ext instruction -- cgit v1.1 From 6a15b6a269e79a8382348812ac32221fdc25b7ca Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Wed, 28 Aug 2013 03:04:02 +0000 Subject: X86Subtarget.h: Recognize x86_64-cygwin. In the LLVM side, x86_64-cygwin is almost as same as x86_64-mingw32. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189436 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86Subtarget.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 0c1e999..14048cf 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -328,14 +328,11 @@ public: bool isTargetEnvMacho() const { return TargetTriple.isEnvironmentMachO(); } bool isTargetWin64() const { - // FIXME: x86_64-cygwin has not been released yet. return In64BitMode && TargetTriple.isOSWindows(); } bool isTargetWin32() const { - // FIXME: Cygwin is included for isTargetWin64 -- should it be included - // here too? - return !In64BitMode && (isTargetMingw() || isTargetWindows()); + return !In64BitMode && (isTargetCygMing() || isTargetWindows()); } bool isPICStyleSet() const { return PICStyle != PICStyles::None; } -- cgit v1.1 From 5be1752dcd514a0f92c5c380194015b26f0bf0b3 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Wed, 28 Aug 2013 03:04:09 +0000 Subject: X86JITInfo.cpp: Apply x64 version of X86CompilationCallback() to Cygwin64. For now, (defined(X86_64_JIT) && defined(__CYGWIN__)) satisfies Cygwin64. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189437 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86JITInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index fc86e1e..e99f2d9 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -127,7 +127,7 @@ extern "C" { "movaps %xmm6, 96(%rsp)\n" "movaps %xmm7, 112(%rsp)\n" // JIT callee -#ifdef _WIN64 +#if defined(_WIN64) || defined(__CYGWIN__) "subq $32, %rsp\n" "movq %rbp, %rcx\n" // Pass prev frame and return address "movq 8(%rbp), %rdx\n" -- cgit v1.1 From 6b6345f0167fb9ddf60bdc51d616d7a9ae590442 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Wed, 28 Aug 2013 04:04:28 +0000 Subject: Remove support for the .debug_inlined section. No known software in use supports it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189439 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 7815ae9..36b6eb4 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -62,7 +62,6 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) { PCSymbol = "."; SupportsDebugInformation = true; - DwarfUsesInlineInfoSection = true; UseDataRegionDirectives = MarkedJTDataRegions; // Exceptions handling -- cgit v1.1 From ead2d5a4be3012699252e015ce774733590bf9b0 Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Wed, 28 Aug 2013 08:30:47 +0000 Subject: Fix use of uninitialized value added in r189400 (found by MemorySanitizer) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189456 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp index 633cd36..d7e8402 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp @@ -54,7 +54,7 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout) const { MCValue Value; - if (Layout && !getSubExpr()->EvaluateAsRelocatable(Value, *Layout)) + if (!Layout || !getSubExpr()->EvaluateAsRelocatable(Value, *Layout)) return false; if (Value.isAbsolute()) { @@ -85,7 +85,7 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, break; } Res = MCValue::get(Result); - } else if (Layout) { + } else { MCContext &Context = Layout->getAssembler().getContext(); const MCSymbolRefExpr *Sym = Value.getSymA(); MCSymbolRefExpr::VariantKind Modifier = Sym->getKind(); @@ -118,8 +118,7 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, } Sym = MCSymbolRefExpr::Create(&Sym->getSymbol(), Modifier, Context); Res = MCValue::get(Sym, Value.getSymB(), Value.getConstant()); - } else - return false; + } return true; } -- cgit v1.1 From 47e70960945ecb33a361987a9745e3dc80a1c78c Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 28 Aug 2013 09:01:51 +0000 Subject: [SystemZ] Extend memcmp support to all constant lengths This uses the infrastructure added for memcpy and memmove in r189331. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189458 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelLowering.cpp | 83 ++++++++++++++++++++------ lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp | 35 ++++++++--- 2 files changed, 92 insertions(+), 26 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 216ca34..dd230c6 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1954,6 +1954,18 @@ static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) { return NewMBB; } +// Split MBB after MI and return the new block (the one that contains +// instructions after MI). +static MachineBasicBlock *splitBlockAfter(MachineInstr *MI, + MachineBasicBlock *MBB) { + MachineBasicBlock *NewMBB = emitBlockAfter(MBB); + NewMBB->splice(NewMBB->begin(), MBB, + llvm::next(MachineBasicBlock::iterator(MI)), + MBB->end()); + NewMBB->transferSuccessorsAndUpdatePHIs(MBB); + return NewMBB; +} + // Split MBB before MI and return the new block (the one that contains MI). static MachineBasicBlock *splitBlockBefore(MachineInstr *MI, MachineBasicBlock *MBB) { @@ -2490,6 +2502,11 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI, uint64_t SrcDisp = MI->getOperand(3).getImm(); uint64_t Length = MI->getOperand(4).getImm(); + // When generating more than one CLC, all but the last will need to + // branch to the end when a difference is found. + MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ? + splitBlockAfter(MI, MBB) : 0); + // Check for the loop form, in which operand 5 is the trip count. if (MI->getNumExplicitOperands() > 5) { bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase); @@ -2514,6 +2531,7 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI, MachineBasicBlock *StartMBB = MBB; MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); + MachineBasicBlock *NextMBB = (EndMBB ? emitBlockAfter(LoopMBB) : LoopMBB); // StartMBB: // # fall through to LoopMMB @@ -2521,39 +2539,54 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI, // LoopMBB: // %ThisDestReg = phi [ %StartDestReg, StartMBB ], - // [ %NextDestReg, LoopMBB ] + // [ %NextDestReg, NextMBB ] // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ], - // [ %NextSrcReg, LoopMBB ] + // [ %NextSrcReg, NextMBB ] // %ThisCountReg = phi [ %StartCountReg, StartMBB ], - // [ %NextCountReg, LoopMBB ] - // PFD 2, 768+DestDisp(%ThisDestReg) + // [ %NextCountReg, NextMBB ] + // ( PFD 2, 768+DestDisp(%ThisDestReg) ) // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg) - // %NextDestReg = LA 256(%ThisDestReg) - // %NextSrcReg = LA 256(%ThisSrcReg) - // %NextCountReg = AGHI %ThisCountReg, -1 - // CGHI %NextCountReg, 0 - // JLH LoopMBB - // # fall through to DoneMMB + // ( JLH EndMBB ) // - // The AGHI, CGHI and JLH should be converted to BRCTG by later passes. + // The prefetch is used only for MVC. The JLH is used only for CLC. MBB = LoopMBB; BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg) .addReg(StartDestReg).addMBB(StartMBB) - .addReg(NextDestReg).addMBB(LoopMBB); + .addReg(NextDestReg).addMBB(NextMBB); if (!HaveSingleBase) BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg) .addReg(StartSrcReg).addMBB(StartMBB) - .addReg(NextSrcReg).addMBB(LoopMBB); + .addReg(NextSrcReg).addMBB(NextMBB); BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg) .addReg(StartCountReg).addMBB(StartMBB) - .addReg(NextCountReg).addMBB(LoopMBB); - BuildMI(MBB, DL, TII->get(SystemZ::PFD)) - .addImm(SystemZ::PFD_WRITE) - .addReg(ThisDestReg).addImm(DestDisp + 768).addReg(0); + .addReg(NextCountReg).addMBB(NextMBB); + if (Opcode == SystemZ::MVC) + BuildMI(MBB, DL, TII->get(SystemZ::PFD)) + .addImm(SystemZ::PFD_WRITE) + .addReg(ThisDestReg).addImm(DestDisp + 768).addReg(0); BuildMI(MBB, DL, TII->get(Opcode)) .addReg(ThisDestReg).addImm(DestDisp).addImm(256) .addReg(ThisSrcReg).addImm(SrcDisp); + if (EndMBB) { + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) + .addMBB(EndMBB); + MBB->addSuccessor(EndMBB); + MBB->addSuccessor(NextMBB); + } + + // NextMBB: + // %NextDestReg = LA 256(%ThisDestReg) + // %NextSrcReg = LA 256(%ThisSrcReg) + // %NextCountReg = AGHI %ThisCountReg, -1 + // CGHI %NextCountReg, 0 + // JLH LoopMBB + // # fall through to DoneMMB + // + // The AGHI, CGHI and JLH should be converted to BRCTG by later passes. + MBB = NextMBB; + BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg) .addReg(ThisDestReg).addImm(256).addReg(0); if (!HaveSingleBase) @@ -2599,6 +2632,22 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI, DestDisp += ThisLength; SrcDisp += ThisLength; Length -= ThisLength; + // If there's another CLC to go, branch to the end if a difference + // was found. + if (EndMBB && Length > 0) { + MachineBasicBlock *NextMBB = splitBlockBefore(MI, MBB); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) + .addMBB(EndMBB); + MBB->addSuccessor(EndMBB); + MBB->addSuccessor(NextMBB); + MBB = NextMBB; + } + } + if (EndMBB) { + MBB->addSuccessor(EndMBB); + MBB = EndMBB; + MBB->addLiveIn(SystemZ::CC); } MI->eraseFromParent(); diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index 6026b1f..dc2f225 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -141,6 +141,28 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, return SDValue(); } +// Use CLC to compare [Src1, Src1 + Size) with [Src2, Src2 + Size), +// deciding whether to use a loop or straight-line code. +static SDValue emitCLC(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src1, SDValue Src2, uint64_t Size) { + SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); + EVT PtrVT = Src1.getValueType(); + // A two-CLC sequence is a clear win over a loop, not least because it + // needs only one branch. A three-CLC sequence needs the same number + // of branches as a loop (i.e. 2), but is shorter. That brings us to + // lengths greater than 768 bytes. It seems relatively likely that + // a difference will be found within the first 768 bytes, so we just + // optimize for the smallest number of branch instructions, in order + // to avoid polluting the prediction buffer too much. A loop only ever + // needs 2 branches, whereas a straight-line sequence would need 3 or more. + if (Size > 3 * 256) + return DAG.getNode(SystemZISD::CLC_LOOP, DL, VTs, Chain, Src1, Src2, + DAG.getConstant(Size, PtrVT), + DAG.getConstant(Size / 256, PtrVT)); + return DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, Src1, Src2, + DAG.getConstant(Size, PtrVT)); +} + // Convert the current CC value into an integer that is 0 if CC == 0, // less than zero if CC == 1 and greater than zero if CC >= 2. // The sequence starts with IPM, which puts CC into bits 29 and 28 @@ -159,17 +181,12 @@ EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, SDValue Src1, SDValue Src2, SDValue Size, MachinePointerInfo Op1PtrInfo, MachinePointerInfo Op2PtrInfo) const { - EVT PtrVT = Src1.getValueType(); if (ConstantSDNode *CSize = dyn_cast(Size)) { uint64_t Bytes = CSize->getZExtValue(); - if (Bytes >= 1 && Bytes <= 0x100) { - // A single CLC. - SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); - Chain = DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, - Src1, Src2, Size, DAG.getConstant(0, PtrVT)); - SDValue Glue = Chain.getValue(1); - return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); - } + assert(Bytes > 0 && "Caller should have handled 0-size case"); + Chain = emitCLC(DAG, DL, Chain, Src1, Src2, Bytes); + SDValue Glue = Chain.getValue(1); + return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); } return std::make_pair(SDValue(), SDValue()); } -- cgit v1.1 From f00539cc5a5e66ce6b7ce3779b00fd381e2d2dee Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Wed, 28 Aug 2013 10:12:09 +0000 Subject: [mips][msa] Added f[cs]af, f[cs]or, f[cs]ueq, f[cs]ul[et], f[cs]une, fsun, ftrunc_[su], hadd_[su], hsub_[su], sr[al]r, sr[al]ri git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189467 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 341 ++++++++++++++++++++++++++++++++++-- 1 file changed, 327 insertions(+), 14 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 8383646..e7688ca 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -258,6 +258,9 @@ class DPSUB_U_D_ENC : MSA_3R_FMT<0b101, 0b11, 0b010011>; class FADD_W_ENC : MSA_3RF_FMT<0b0000, 0b0, 0b011011>; class FADD_D_ENC : MSA_3RF_FMT<0b0000, 0b1, 0b011011>; +class FCAF_W_ENC : MSA_3RF_FMT<0b0000, 0b0, 0b011010>; +class FCAF_D_ENC : MSA_3RF_FMT<0b0000, 0b1, 0b011010>; + class FCEQ_W_ENC : MSA_3RF_FMT<0b0010, 0b0, 0b011010>; class FCEQ_D_ENC : MSA_3RF_FMT<0b0010, 0b1, 0b011010>; @@ -270,12 +273,27 @@ class FCLE_D_ENC : MSA_3RF_FMT<0b0110, 0b1, 0b011010>; class FCLT_W_ENC : MSA_3RF_FMT<0b0100, 0b0, 0b011010>; class FCLT_D_ENC : MSA_3RF_FMT<0b0100, 0b1, 0b011010>; -class FCNE_W_ENC : MSA_3RF_FMT<0b0011, 0b0, 0b011010>; -class FCNE_D_ENC : MSA_3RF_FMT<0b0011, 0b1, 0b011010>; +class FCNE_W_ENC : MSA_3RF_FMT<0b0011, 0b0, 0b011100>; +class FCNE_D_ENC : MSA_3RF_FMT<0b0011, 0b1, 0b011100>; + +class FCOR_W_ENC : MSA_3RF_FMT<0b0001, 0b0, 0b011100>; +class FCOR_D_ENC : MSA_3RF_FMT<0b0001, 0b1, 0b011100>; + +class FCUEQ_W_ENC : MSA_3RF_FMT<0b0011, 0b0, 0b011010>; +class FCUEQ_D_ENC : MSA_3RF_FMT<0b0011, 0b1, 0b011010>; + +class FCULE_W_ENC : MSA_3RF_FMT<0b0111, 0b0, 0b011010>; +class FCULE_D_ENC : MSA_3RF_FMT<0b0111, 0b1, 0b011010>; + +class FCULT_W_ENC : MSA_3RF_FMT<0b0101, 0b0, 0b011010>; +class FCULT_D_ENC : MSA_3RF_FMT<0b0101, 0b1, 0b011010>; class FCUN_W_ENC : MSA_3RF_FMT<0b0001, 0b0, 0b011010>; class FCUN_D_ENC : MSA_3RF_FMT<0b0001, 0b1, 0b011010>; +class FCUNE_W_ENC : MSA_3RF_FMT<0b0010, 0b0, 0b011100>; +class FCUNE_D_ENC : MSA_3RF_FMT<0b0010, 0b1, 0b011100>; + class FDIV_W_ENC : MSA_3RF_FMT<0b0011, 0b0, 0b011011>; class FDIV_D_ENC : MSA_3RF_FMT<0b0011, 0b1, 0b011011>; @@ -340,6 +358,9 @@ class FRCP_D_ENC : MSA_2RF_FMT<0b110010101, 0b1, 0b011110>; class FRSQRT_W_ENC : MSA_2RF_FMT<0b110010100, 0b0, 0b011110>; class FRSQRT_D_ENC : MSA_2RF_FMT<0b110010100, 0b1, 0b011110>; +class FSAF_W_ENC : MSA_3RF_FMT<0b1000, 0b0, 0b011010>; +class FSAF_D_ENC : MSA_3RF_FMT<0b1000, 0b1, 0b011010>; + class FSEQ_W_ENC : MSA_3RF_FMT<0b1010, 0b0, 0b011010>; class FSEQ_D_ENC : MSA_3RF_FMT<0b1010, 0b1, 0b011010>; @@ -349,8 +370,11 @@ class FSLE_D_ENC : MSA_3RF_FMT<0b1110, 0b1, 0b011010>; class FSLT_W_ENC : MSA_3RF_FMT<0b1100, 0b0, 0b011010>; class FSLT_D_ENC : MSA_3RF_FMT<0b1100, 0b1, 0b011010>; -class FSNE_W_ENC : MSA_3RF_FMT<0b1011, 0b0, 0b011010>; -class FSNE_D_ENC : MSA_3RF_FMT<0b1011, 0b1, 0b011010>; +class FSNE_W_ENC : MSA_3RF_FMT<0b1011, 0b0, 0b011100>; +class FSNE_D_ENC : MSA_3RF_FMT<0b1011, 0b1, 0b011100>; + +class FSOR_W_ENC : MSA_3RF_FMT<0b1001, 0b0, 0b011100>; +class FSOR_D_ENC : MSA_3RF_FMT<0b1001, 0b1, 0b011100>; class FSQRT_W_ENC : MSA_2RF_FMT<0b110010011, 0b0, 0b011110>; class FSQRT_D_ENC : MSA_2RF_FMT<0b110010011, 0b1, 0b011110>; @@ -358,6 +382,27 @@ class FSQRT_D_ENC : MSA_2RF_FMT<0b110010011, 0b1, 0b011110>; class FSUB_W_ENC : MSA_3RF_FMT<0b0001, 0b0, 0b011011>; class FSUB_D_ENC : MSA_3RF_FMT<0b0001, 0b1, 0b011011>; +class FSUEQ_W_ENC : MSA_3RF_FMT<0b1011, 0b0, 0b011010>; +class FSUEQ_D_ENC : MSA_3RF_FMT<0b1011, 0b1, 0b011010>; + +class FSULE_W_ENC : MSA_3RF_FMT<0b1111, 0b0, 0b011010>; +class FSULE_D_ENC : MSA_3RF_FMT<0b1111, 0b1, 0b011010>; + +class FSULT_W_ENC : MSA_3RF_FMT<0b1101, 0b0, 0b011010>; +class FSULT_D_ENC : MSA_3RF_FMT<0b1101, 0b1, 0b011010>; + +class FSUN_W_ENC : MSA_3RF_FMT<0b1001, 0b0, 0b011010>; +class FSUN_D_ENC : MSA_3RF_FMT<0b1001, 0b1, 0b011010>; + +class FSUNE_W_ENC : MSA_3RF_FMT<0b1010, 0b0, 0b011100>; +class FSUNE_D_ENC : MSA_3RF_FMT<0b1010, 0b1, 0b011100>; + +class FTRUNC_S_W_ENC : MSA_2RF_FMT<0b110100000, 0b0, 0b011110>; +class FTRUNC_S_D_ENC : MSA_2RF_FMT<0b110100000, 0b1, 0b011110>; + +class FTRUNC_U_W_ENC : MSA_2RF_FMT<0b110100001, 0b0, 0b011110>; +class FTRUNC_U_D_ENC : MSA_2RF_FMT<0b110100001, 0b1, 0b011110>; + class FTINT_S_W_ENC : MSA_2RF_FMT<0b110011100, 0b0, 0b011110>; class FTINT_S_D_ENC : MSA_2RF_FMT<0b110011100, 0b1, 0b011110>; @@ -367,6 +412,22 @@ class FTINT_U_D_ENC : MSA_2RF_FMT<0b110011101, 0b1, 0b011110>; class FTQ_H_ENC : MSA_3RF_FMT<0b1010, 0b0, 0b011011>; class FTQ_W_ENC : MSA_3RF_FMT<0b1010, 0b1, 0b011011>; +class HADD_S_H_ENC : MSA_3R_FMT<0b100, 0b01, 0b010101>; +class HADD_S_W_ENC : MSA_3R_FMT<0b100, 0b10, 0b010101>; +class HADD_S_D_ENC : MSA_3R_FMT<0b100, 0b11, 0b010101>; + +class HADD_U_H_ENC : MSA_3R_FMT<0b101, 0b01, 0b010101>; +class HADD_U_W_ENC : MSA_3R_FMT<0b101, 0b10, 0b010101>; +class HADD_U_D_ENC : MSA_3R_FMT<0b101, 0b11, 0b010101>; + +class HSUB_S_H_ENC : MSA_3R_FMT<0b110, 0b01, 0b010101>; +class HSUB_S_W_ENC : MSA_3R_FMT<0b110, 0b10, 0b010101>; +class HSUB_S_D_ENC : MSA_3R_FMT<0b110, 0b11, 0b010101>; + +class HSUB_U_H_ENC : MSA_3R_FMT<0b111, 0b01, 0b010101>; +class HSUB_U_W_ENC : MSA_3R_FMT<0b111, 0b10, 0b010101>; +class HSUB_U_D_ENC : MSA_3R_FMT<0b111, 0b11, 0b010101>; + class ILVEV_B_ENC : MSA_3R_FMT<0b110, 0b00, 0b010100>; class ILVEV_H_ENC : MSA_3R_FMT<0b110, 0b01, 0b010100>; class ILVEV_W_ENC : MSA_3R_FMT<0b110, 0b10, 0b010100>; @@ -406,11 +467,11 @@ class LDI_H_ENC : MSA_I10_FMT<0b010, 0b01, 0b001100>; class LDI_W_ENC : MSA_I10_FMT<0b010, 0b10, 0b001100>; class LDI_D_ENC : MSA_I10_FMT<0b010, 0b11, 0b001100>; -class MADD_Q_H_ENC : MSA_3RF_FMT<0b0010, 0b0, 0b011100>; -class MADD_Q_W_ENC : MSA_3RF_FMT<0b0010, 0b1, 0b011100>; +class MADD_Q_H_ENC : MSA_3RF_FMT<0b0101, 0b0, 0b011100>; +class MADD_Q_W_ENC : MSA_3RF_FMT<0b0101, 0b1, 0b011100>; -class MADDR_Q_H_ENC : MSA_3RF_FMT<0b0011, 0b0, 0b011100>; -class MADDR_Q_W_ENC : MSA_3RF_FMT<0b0011, 0b1, 0b011100>; +class MADDR_Q_H_ENC : MSA_3RF_FMT<0b1101, 0b0, 0b011100>; +class MADDR_Q_W_ENC : MSA_3RF_FMT<0b1101, 0b1, 0b011100>; class MADDV_B_ENC : MSA_3R_FMT<0b001, 0b00, 0b010010>; class MADDV_H_ENC : MSA_3R_FMT<0b001, 0b01, 0b010010>; @@ -477,11 +538,11 @@ class MOD_U_H_ENC : MSA_3R_FMT<0b111, 0b01, 0b010010>; class MOD_U_W_ENC : MSA_3R_FMT<0b111, 0b10, 0b010010>; class MOD_U_D_ENC : MSA_3R_FMT<0b111, 0b11, 0b010010>; -class MSUB_Q_H_ENC : MSA_3RF_FMT<0b0100, 0b0, 0b011100>; -class MSUB_Q_W_ENC : MSA_3RF_FMT<0b0100, 0b1, 0b011100>; +class MSUB_Q_H_ENC : MSA_3RF_FMT<0b0110, 0b0, 0b011100>; +class MSUB_Q_W_ENC : MSA_3RF_FMT<0b0110, 0b1, 0b011100>; -class MSUBR_Q_H_ENC : MSA_3RF_FMT<0b0101, 0b0, 0b011100>; -class MSUBR_Q_W_ENC : MSA_3RF_FMT<0b0101, 0b1, 0b011100>; +class MSUBR_Q_H_ENC : MSA_3RF_FMT<0b1110, 0b0, 0b011100>; +class MSUBR_Q_W_ENC : MSA_3RF_FMT<0b1110, 0b1, 0b011100>; class MSUBV_B_ENC : MSA_3R_FMT<0b010, 0b00, 0b010010>; class MSUBV_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b010010>; @@ -491,8 +552,8 @@ class MSUBV_D_ENC : MSA_3R_FMT<0b010, 0b11, 0b010010>; class MUL_Q_H_ENC : MSA_3RF_FMT<0b0000, 0b0, 0b011100>; class MUL_Q_W_ENC : MSA_3RF_FMT<0b0000, 0b1, 0b011100>; -class MULR_Q_H_ENC : MSA_3RF_FMT<0b0001, 0b0, 0b011100>; -class MULR_Q_W_ENC : MSA_3RF_FMT<0b0001, 0b1, 0b011100>; +class MULR_Q_H_ENC : MSA_3RF_FMT<0b1100, 0b0, 0b011100>; +class MULR_Q_W_ENC : MSA_3RF_FMT<0b1100, 0b1, 0b011100>; class MULV_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b010010>; class MULV_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b010010>; @@ -586,6 +647,16 @@ class SRAI_H_ENC : MSA_BIT_H_FMT<0b001, 0b001001>; class SRAI_W_ENC : MSA_BIT_W_FMT<0b001, 0b001001>; class SRAI_D_ENC : MSA_BIT_D_FMT<0b001, 0b001001>; +class SRAR_B_ENC : MSA_3R_FMT<0b001, 0b00, 0b010101>; +class SRAR_H_ENC : MSA_3R_FMT<0b001, 0b01, 0b010101>; +class SRAR_W_ENC : MSA_3R_FMT<0b001, 0b10, 0b010101>; +class SRAR_D_ENC : MSA_3R_FMT<0b001, 0b11, 0b010101>; + +class SRARI_B_ENC : MSA_BIT_B_FMT<0b010, 0b001010>; +class SRARI_H_ENC : MSA_BIT_H_FMT<0b010, 0b001010>; +class SRARI_W_ENC : MSA_BIT_W_FMT<0b010, 0b001010>; +class SRARI_D_ENC : MSA_BIT_D_FMT<0b010, 0b001010>; + class SRL_B_ENC : MSA_3R_FMT<0b010, 0b00, 0b001101>; class SRL_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b001101>; class SRL_W_ENC : MSA_3R_FMT<0b010, 0b10, 0b001101>; @@ -596,6 +667,16 @@ class SRLI_H_ENC : MSA_BIT_H_FMT<0b010, 0b001001>; class SRLI_W_ENC : MSA_BIT_W_FMT<0b010, 0b001001>; class SRLI_D_ENC : MSA_BIT_D_FMT<0b010, 0b001001>; +class SRLR_B_ENC : MSA_3R_FMT<0b010, 0b00, 0b010101>; +class SRLR_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b010101>; +class SRLR_W_ENC : MSA_3R_FMT<0b010, 0b10, 0b010101>; +class SRLR_D_ENC : MSA_3R_FMT<0b010, 0b11, 0b010101>; + +class SRLRI_B_ENC : MSA_BIT_B_FMT<0b011, 0b001010>; +class SRLRI_H_ENC : MSA_BIT_H_FMT<0b011, 0b001010>; +class SRLRI_W_ENC : MSA_BIT_W_FMT<0b011, 0b001010>; +class SRLRI_D_ENC : MSA_BIT_D_FMT<0b011, 0b001010>; + class ST_B_ENC : MSA_I5_FMT<0b111, 0b00, 0b000111>; class ST_H_ENC : MSA_I5_FMT<0b111, 0b01, 0b000111>; class ST_W_ENC : MSA_I5_FMT<0b111, 0b10, 0b000111>; @@ -1245,6 +1326,13 @@ class FADD_D_DESC : MSA_3RF_DESC_BASE<"fadd.d", int_mips_fadd_d, NoItinerary, MSA128D, MSA128D>, IsCommutable; +class FCAF_W_DESC : MSA_3RF_DESC_BASE<"fcaf.w", int_mips_fcaf_w, + NoItinerary, MSA128W, MSA128W>, + IsCommutable; +class FCAF_D_DESC : MSA_3RF_DESC_BASE<"fcaf.d", int_mips_fcaf_d, + NoItinerary, MSA128D, MSA128D>, + IsCommutable; + class FCEQ_W_DESC : MSA_3RF_DESC_BASE<"fceq.w", int_mips_fceq_w, NoItinerary, MSA128W, MSA128W>, IsCommutable; @@ -1274,6 +1362,34 @@ class FCNE_D_DESC : MSA_3RF_DESC_BASE<"fcne.d", int_mips_fcne_d, NoItinerary, MSA128D, MSA128D>, IsCommutable; +class FCOR_W_DESC : MSA_3RF_DESC_BASE<"fcor.w", int_mips_fcor_w, + NoItinerary, MSA128W, MSA128W>, + IsCommutable; +class FCOR_D_DESC : MSA_3RF_DESC_BASE<"fcor.d", int_mips_fcor_d, + NoItinerary, MSA128D, MSA128D>, + IsCommutable; + +class FCUEQ_W_DESC : MSA_3RF_DESC_BASE<"fcueq.w", int_mips_fcueq_w, + NoItinerary, MSA128W, MSA128W>, + IsCommutable; +class FCUEQ_D_DESC : MSA_3RF_DESC_BASE<"fcueq.d", int_mips_fcueq_d, + NoItinerary, MSA128D, MSA128D>, + IsCommutable; + +class FCULE_W_DESC : MSA_3RF_DESC_BASE<"fcule.w", int_mips_fcule_w, + NoItinerary, MSA128W, MSA128W>, + IsCommutable; +class FCULE_D_DESC : MSA_3RF_DESC_BASE<"fcule.d", int_mips_fcule_d, + NoItinerary, MSA128D, MSA128D>, + IsCommutable; + +class FCULT_W_DESC : MSA_3RF_DESC_BASE<"fcult.w", int_mips_fcult_w, + NoItinerary, MSA128W, MSA128W>, + IsCommutable; +class FCULT_D_DESC : MSA_3RF_DESC_BASE<"fcult.d", int_mips_fcult_d, + NoItinerary, MSA128D, MSA128D>, + IsCommutable; + class FCUN_W_DESC : MSA_3RF_DESC_BASE<"fcun.w", int_mips_fcun_w, NoItinerary, MSA128W, MSA128W>, IsCommutable; @@ -1281,6 +1397,13 @@ class FCUN_D_DESC : MSA_3RF_DESC_BASE<"fcun.d", int_mips_fcun_d, NoItinerary, MSA128D, MSA128D>, IsCommutable; +class FCUNE_W_DESC : MSA_3RF_DESC_BASE<"fcune.w", int_mips_fcune_w, + NoItinerary, MSA128W, MSA128W>, + IsCommutable; +class FCUNE_D_DESC : MSA_3RF_DESC_BASE<"fcune.d", int_mips_fcune_d, + NoItinerary, MSA128D, MSA128D>, + IsCommutable; + class FDIV_W_DESC : MSA_3RF_DESC_BASE<"fdiv.w", int_mips_fdiv_w, NoItinerary, MSA128W, MSA128W>; class FDIV_D_DESC : MSA_3RF_DESC_BASE<"fdiv.d", int_mips_fdiv_d, @@ -1388,6 +1511,11 @@ class FRSQRT_W_DESC : MSA_2RF_DESC_BASE<"frsqrt.w", int_mips_frsqrt_w, class FRSQRT_D_DESC : MSA_2RF_DESC_BASE<"frsqrt.d", int_mips_frsqrt_d, NoItinerary, MSA128D, MSA128D>; +class FSAF_W_DESC : MSA_3RF_DESC_BASE<"fsaf.w", int_mips_fsaf_w, + NoItinerary, MSA128W, MSA128W>; +class FSAF_D_DESC : MSA_3RF_DESC_BASE<"fsaf.d", int_mips_fsaf_d, + NoItinerary, MSA128D, MSA128D>; + class FSEQ_W_DESC : MSA_3RF_DESC_BASE<"fseq.w", int_mips_fseq_w, NoItinerary, MSA128W, MSA128W>; class FSEQ_D_DESC : MSA_3RF_DESC_BASE<"fseq.d", int_mips_fseq_d, @@ -1408,6 +1536,11 @@ class FSNE_W_DESC : MSA_3RF_DESC_BASE<"fsne.w", int_mips_fsne_w, class FSNE_D_DESC : MSA_3RF_DESC_BASE<"fsne.d", int_mips_fsne_d, NoItinerary, MSA128D, MSA128D>; +class FSOR_W_DESC : MSA_3RF_DESC_BASE<"fsor.w", int_mips_fsor_w, + NoItinerary, MSA128W, MSA128W>; +class FSOR_D_DESC : MSA_3RF_DESC_BASE<"fsor.d", int_mips_fsor_d, + NoItinerary, MSA128D, MSA128D>; + class FSQRT_W_DESC : MSA_2RF_DESC_BASE<"fsqrt.w", int_mips_fsqrt_w, NoItinerary, MSA128W, MSA128W>; class FSQRT_D_DESC : MSA_2RF_DESC_BASE<"fsqrt.d", int_mips_fsqrt_d, @@ -1418,6 +1551,41 @@ class FSUB_W_DESC : MSA_3RF_DESC_BASE<"fsub.w", int_mips_fsub_w, class FSUB_D_DESC : MSA_3RF_DESC_BASE<"fsub.d", int_mips_fsub_d, NoItinerary, MSA128D, MSA128D>; +class FSUEQ_W_DESC : MSA_3RF_DESC_BASE<"fsueq.w", int_mips_fsueq_w, + NoItinerary, MSA128W, MSA128W>; +class FSUEQ_D_DESC : MSA_3RF_DESC_BASE<"fsueq.d", int_mips_fsueq_d, + NoItinerary, MSA128D, MSA128D>; + +class FSULE_W_DESC : MSA_3RF_DESC_BASE<"fsule.w", int_mips_fsule_w, + NoItinerary, MSA128W, MSA128W>; +class FSULE_D_DESC : MSA_3RF_DESC_BASE<"fsule.d", int_mips_fsule_d, + NoItinerary, MSA128D, MSA128D>; + +class FSULT_W_DESC : MSA_3RF_DESC_BASE<"fsult.w", int_mips_fsult_w, + NoItinerary, MSA128W, MSA128W>; +class FSULT_D_DESC : MSA_3RF_DESC_BASE<"fsult.d", int_mips_fsult_d, + NoItinerary, MSA128D, MSA128D>; + +class FSUN_W_DESC : MSA_3RF_DESC_BASE<"fsun.w", int_mips_fsun_w, + NoItinerary, MSA128W, MSA128W>; +class FSUN_D_DESC : MSA_3RF_DESC_BASE<"fsun.d", int_mips_fsun_d, + NoItinerary, MSA128D, MSA128D>; + +class FSUNE_W_DESC : MSA_3RF_DESC_BASE<"fsune.w", int_mips_fsune_w, + NoItinerary, MSA128W, MSA128W>; +class FSUNE_D_DESC : MSA_3RF_DESC_BASE<"fsune.d", int_mips_fsune_d, + NoItinerary, MSA128D, MSA128D>; + +class FTRUNC_S_W_DESC : MSA_2RF_DESC_BASE<"ftrunc_s.w", int_mips_ftrunc_s_w, + NoItinerary, MSA128W, MSA128W>; +class FTRUNC_S_D_DESC : MSA_2RF_DESC_BASE<"ftrunc_s.d", int_mips_ftrunc_s_d, + NoItinerary, MSA128D, MSA128D>; + +class FTRUNC_U_W_DESC : MSA_2RF_DESC_BASE<"ftrunc_u.w", int_mips_ftrunc_u_w, + NoItinerary, MSA128W, MSA128W>; +class FTRUNC_U_D_DESC : MSA_2RF_DESC_BASE<"ftrunc_u.d", int_mips_ftrunc_u_d, + NoItinerary, MSA128D, MSA128D>; + class FTINT_S_W_DESC : MSA_2RF_DESC_BASE<"ftint_s.w", int_mips_ftint_s_w, NoItinerary, MSA128W, MSA128W>; class FTINT_S_D_DESC : MSA_2RF_DESC_BASE<"ftint_s.d", int_mips_ftint_s_d, @@ -1433,6 +1601,34 @@ class FTQ_H_DESC : MSA_3RF_DESC_BASE<"ftq.h", int_mips_ftq_h, class FTQ_W_DESC : MSA_3RF_DESC_BASE<"ftq.w", int_mips_ftq_w, NoItinerary, MSA128W, MSA128D>; +class HADD_S_H_DESC : MSA_3R_DESC_BASE<"hadd_s.h", int_mips_hadd_s_h, + NoItinerary, MSA128H, MSA128B>; +class HADD_S_W_DESC : MSA_3R_DESC_BASE<"hadd_s.w", int_mips_hadd_s_w, + NoItinerary, MSA128W, MSA128H>; +class HADD_S_D_DESC : MSA_3R_DESC_BASE<"hadd_s.d", int_mips_hadd_s_d, + NoItinerary, MSA128D, MSA128W>; + +class HADD_U_H_DESC : MSA_3R_DESC_BASE<"hadd_u.h", int_mips_hadd_u_h, + NoItinerary, MSA128H, MSA128B>; +class HADD_U_W_DESC : MSA_3R_DESC_BASE<"hadd_u.w", int_mips_hadd_u_w, + NoItinerary, MSA128W, MSA128H>; +class HADD_U_D_DESC : MSA_3R_DESC_BASE<"hadd_u.d", int_mips_hadd_u_d, + NoItinerary, MSA128D, MSA128W>; + +class HSUB_S_H_DESC : MSA_3R_DESC_BASE<"hsub_s.h", int_mips_hsub_s_h, + NoItinerary, MSA128H, MSA128B>; +class HSUB_S_W_DESC : MSA_3R_DESC_BASE<"hsub_s.w", int_mips_hsub_s_w, + NoItinerary, MSA128W, MSA128H>; +class HSUB_S_D_DESC : MSA_3R_DESC_BASE<"hsub_s.d", int_mips_hsub_s_d, + NoItinerary, MSA128D, MSA128W>; + +class HSUB_U_H_DESC : MSA_3R_DESC_BASE<"hsub_u.h", int_mips_hsub_u_h, + NoItinerary, MSA128H, MSA128B>; +class HSUB_U_W_DESC : MSA_3R_DESC_BASE<"hsub_u.w", int_mips_hsub_u_w, + NoItinerary, MSA128W, MSA128H>; +class HSUB_U_D_DESC : MSA_3R_DESC_BASE<"hsub_u.d", int_mips_hsub_u_d, + NoItinerary, MSA128D, MSA128W>; + class ILVEV_B_DESC : MSA_3R_DESC_BASE<"ilvev.b", int_mips_ilvev_b, NoItinerary, MSA128B, MSA128B>; class ILVEV_H_DESC : MSA_3R_DESC_BASE<"ilvev.h", int_mips_ilvev_h, NoItinerary, @@ -1828,6 +2024,24 @@ class SRAI_W_DESC : MSA_BIT_W_DESC_BASE<"srai.w", int_mips_srai_w, class SRAI_D_DESC : MSA_BIT_D_DESC_BASE<"srai.d", int_mips_srai_d, NoItinerary, MSA128D, MSA128D>; +class SRAR_B_DESC : MSA_3R_DESC_BASE<"srar.b", int_mips_srar_b, NoItinerary, + MSA128B, MSA128B>; +class SRAR_H_DESC : MSA_3R_DESC_BASE<"srar.h", int_mips_srar_h, NoItinerary, + MSA128H, MSA128H>; +class SRAR_W_DESC : MSA_3R_DESC_BASE<"srar.w", int_mips_srar_w, NoItinerary, + MSA128W, MSA128W>; +class SRAR_D_DESC : MSA_3R_DESC_BASE<"srar.d", int_mips_srar_d, NoItinerary, + MSA128D, MSA128D>; + +class SRARI_B_DESC : MSA_BIT_B_DESC_BASE<"srari.b", int_mips_srari_b, + NoItinerary, MSA128B, MSA128B>; +class SRARI_H_DESC : MSA_BIT_H_DESC_BASE<"srari.h", int_mips_srari_h, + NoItinerary, MSA128H, MSA128H>; +class SRARI_W_DESC : MSA_BIT_W_DESC_BASE<"srari.w", int_mips_srari_w, + NoItinerary, MSA128W, MSA128W>; +class SRARI_D_DESC : MSA_BIT_D_DESC_BASE<"srari.d", int_mips_srari_d, + NoItinerary, MSA128D, MSA128D>; + class SRL_B_DESC : MSA_3R_DESC_BASE<"srl.b", int_mips_srl_b, NoItinerary, MSA128B, MSA128B>; class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", int_mips_srl_h, NoItinerary, @@ -1846,6 +2060,24 @@ class SRLI_W_DESC : MSA_BIT_W_DESC_BASE<"srli.w", int_mips_srli_w, class SRLI_D_DESC : MSA_BIT_D_DESC_BASE<"srli.d", int_mips_srli_d, NoItinerary, MSA128D, MSA128D>; +class SRLR_B_DESC : MSA_3R_DESC_BASE<"srlr.b", int_mips_srlr_b, NoItinerary, + MSA128B, MSA128B>; +class SRLR_H_DESC : MSA_3R_DESC_BASE<"srlr.h", int_mips_srlr_h, NoItinerary, + MSA128H, MSA128H>; +class SRLR_W_DESC : MSA_3R_DESC_BASE<"srlr.w", int_mips_srlr_w, NoItinerary, + MSA128W, MSA128W>; +class SRLR_D_DESC : MSA_3R_DESC_BASE<"srlr.d", int_mips_srlr_d, NoItinerary, + MSA128D, MSA128D>; + +class SRLRI_B_DESC : MSA_BIT_B_DESC_BASE<"srlri.b", int_mips_srlri_b, + NoItinerary, MSA128B, MSA128B>; +class SRLRI_H_DESC : MSA_BIT_H_DESC_BASE<"srlri.h", int_mips_srlri_h, + NoItinerary, MSA128H, MSA128H>; +class SRLRI_W_DESC : MSA_BIT_W_DESC_BASE<"srlri.w", int_mips_srlri_w, + NoItinerary, MSA128W, MSA128W>; +class SRLRI_D_DESC : MSA_BIT_D_DESC_BASE<"srlri.d", int_mips_srlri_d, + NoItinerary, MSA128D, MSA128D>; + class ST_DESC_BASE { @@ -2154,6 +2386,9 @@ def DPSUB_U_D : DPSUB_U_D_ENC, DPSUB_U_D_DESC, Requires<[HasMSA]>; def FADD_W : FADD_W_ENC, FADD_W_DESC, Requires<[HasMSA]>; def FADD_D : FADD_D_ENC, FADD_D_DESC, Requires<[HasMSA]>; +def FCAF_W : FCAF_W_ENC, FCAF_W_DESC, Requires<[HasMSA]>; +def FCAF_D : FCAF_D_ENC, FCAF_D_DESC, Requires<[HasMSA]>; + def FCEQ_W : FCEQ_W_ENC, FCEQ_W_DESC, Requires<[HasMSA]>; def FCEQ_D : FCEQ_D_ENC, FCEQ_D_DESC, Requires<[HasMSA]>; @@ -2169,9 +2404,24 @@ def FCLASS_D : FCLASS_D_ENC, FCLASS_D_DESC, Requires<[HasMSA]>; def FCNE_W : FCNE_W_ENC, FCNE_W_DESC, Requires<[HasMSA]>; def FCNE_D : FCNE_D_ENC, FCNE_D_DESC, Requires<[HasMSA]>; +def FCOR_W : FCOR_W_ENC, FCOR_W_DESC, Requires<[HasMSA]>; +def FCOR_D : FCOR_D_ENC, FCOR_D_DESC, Requires<[HasMSA]>; + +def FCUEQ_W : FCUEQ_W_ENC, FCUEQ_W_DESC, Requires<[HasMSA]>; +def FCUEQ_D : FCUEQ_D_ENC, FCUEQ_D_DESC, Requires<[HasMSA]>; + +def FCULE_W : FCULE_W_ENC, FCULE_W_DESC, Requires<[HasMSA]>; +def FCULE_D : FCULE_D_ENC, FCULE_D_DESC, Requires<[HasMSA]>; + +def FCULT_W : FCULT_W_ENC, FCULT_W_DESC, Requires<[HasMSA]>; +def FCULT_D : FCULT_D_ENC, FCULT_D_DESC, Requires<[HasMSA]>; + def FCUN_W : FCUN_W_ENC, FCUN_W_DESC, Requires<[HasMSA]>; def FCUN_D : FCUN_D_ENC, FCUN_D_DESC, Requires<[HasMSA]>; +def FCUNE_W : FCUNE_W_ENC, FCUNE_W_DESC, Requires<[HasMSA]>; +def FCUNE_D : FCUNE_D_ENC, FCUNE_D_DESC, Requires<[HasMSA]>; + def FDIV_W : FDIV_W_ENC, FDIV_W_DESC, Requires<[HasMSA]>; def FDIV_D : FDIV_D_ENC, FDIV_D_DESC, Requires<[HasMSA]>; @@ -2236,6 +2486,9 @@ def FRCP_D : FRCP_D_ENC, FRCP_D_DESC, Requires<[HasMSA]>; def FRSQRT_W : FRSQRT_W_ENC, FRSQRT_W_DESC, Requires<[HasMSA]>; def FRSQRT_D : FRSQRT_D_ENC, FRSQRT_D_DESC, Requires<[HasMSA]>; +def FSAF_W : FSAF_W_ENC, FSAF_W_DESC, Requires<[HasMSA]>; +def FSAF_D : FSAF_D_ENC, FSAF_D_DESC, Requires<[HasMSA]>; + def FSEQ_W : FSEQ_W_ENC, FSEQ_W_DESC, Requires<[HasMSA]>; def FSEQ_D : FSEQ_D_ENC, FSEQ_D_DESC, Requires<[HasMSA]>; @@ -2248,12 +2501,36 @@ def FSLT_D : FSLT_D_ENC, FSLT_D_DESC, Requires<[HasMSA]>; def FSNE_W : FSNE_W_ENC, FSNE_W_DESC, Requires<[HasMSA]>; def FSNE_D : FSNE_D_ENC, FSNE_D_DESC, Requires<[HasMSA]>; +def FSOR_W : FSOR_W_ENC, FSOR_W_DESC, Requires<[HasMSA]>; +def FSOR_D : FSOR_D_ENC, FSOR_D_DESC, Requires<[HasMSA]>; + def FSQRT_W : FSQRT_W_ENC, FSQRT_W_DESC, Requires<[HasMSA]>; def FSQRT_D : FSQRT_D_ENC, FSQRT_D_DESC, Requires<[HasMSA]>; def FSUB_W : FSUB_W_ENC, FSUB_W_DESC, Requires<[HasMSA]>; def FSUB_D : FSUB_D_ENC, FSUB_D_DESC, Requires<[HasMSA]>; +def FSUEQ_W : FSUEQ_W_ENC, FSUEQ_W_DESC, Requires<[HasMSA]>; +def FSUEQ_D : FSUEQ_D_ENC, FSUEQ_D_DESC, Requires<[HasMSA]>; + +def FSULE_W : FSULE_W_ENC, FSULE_W_DESC, Requires<[HasMSA]>; +def FSULE_D : FSULE_D_ENC, FSULE_D_DESC, Requires<[HasMSA]>; + +def FSULT_W : FSULT_W_ENC, FSULT_W_DESC, Requires<[HasMSA]>; +def FSULT_D : FSULT_D_ENC, FSULT_D_DESC, Requires<[HasMSA]>; + +def FSUN_W : FSUN_W_ENC, FSUN_W_DESC, Requires<[HasMSA]>; +def FSUN_D : FSUN_D_ENC, FSUN_D_DESC, Requires<[HasMSA]>; + +def FSUNE_W : FSUNE_W_ENC, FSUNE_W_DESC, Requires<[HasMSA]>; +def FSUNE_D : FSUNE_D_ENC, FSUNE_D_DESC, Requires<[HasMSA]>; + +def FTRUNC_S_W : FTRUNC_S_W_ENC, FTRUNC_S_W_DESC, Requires<[HasMSA]>; +def FTRUNC_S_D : FTRUNC_S_D_ENC, FTRUNC_S_D_DESC, Requires<[HasMSA]>; + +def FTRUNC_U_W : FTRUNC_U_W_ENC, FTRUNC_U_W_DESC, Requires<[HasMSA]>; +def FTRUNC_U_D : FTRUNC_U_D_ENC, FTRUNC_U_D_DESC, Requires<[HasMSA]>; + def FTINT_S_W : FTINT_S_W_ENC, FTINT_S_W_DESC, Requires<[HasMSA]>; def FTINT_S_D : FTINT_S_D_ENC, FTINT_S_D_DESC, Requires<[HasMSA]>; @@ -2263,6 +2540,22 @@ def FTINT_U_D : FTINT_U_D_ENC, FTINT_U_D_DESC, Requires<[HasMSA]>; def FTQ_H : FTQ_H_ENC, FTQ_H_DESC, Requires<[HasMSA]>; def FTQ_W : FTQ_W_ENC, FTQ_W_DESC, Requires<[HasMSA]>; +def HADD_S_H : HADD_S_H_ENC, HADD_S_H_DESC, Requires<[HasMSA]>; +def HADD_S_W : HADD_S_W_ENC, HADD_S_W_DESC, Requires<[HasMSA]>; +def HADD_S_D : HADD_S_D_ENC, HADD_S_D_DESC, Requires<[HasMSA]>; + +def HADD_U_H : HADD_U_H_ENC, HADD_U_H_DESC, Requires<[HasMSA]>; +def HADD_U_W : HADD_U_W_ENC, HADD_U_W_DESC, Requires<[HasMSA]>; +def HADD_U_D : HADD_U_D_ENC, HADD_U_D_DESC, Requires<[HasMSA]>; + +def HSUB_S_H : HSUB_S_H_ENC, HSUB_S_H_DESC, Requires<[HasMSA]>; +def HSUB_S_W : HSUB_S_W_ENC, HSUB_S_W_DESC, Requires<[HasMSA]>; +def HSUB_S_D : HSUB_S_D_ENC, HSUB_S_D_DESC, Requires<[HasMSA]>; + +def HSUB_U_H : HSUB_U_H_ENC, HSUB_U_H_DESC, Requires<[HasMSA]>; +def HSUB_U_W : HSUB_U_W_ENC, HSUB_U_W_DESC, Requires<[HasMSA]>; +def HSUB_U_D : HSUB_U_D_ENC, HSUB_U_D_DESC, Requires<[HasMSA]>; + def ILVEV_B : ILVEV_B_ENC, ILVEV_B_DESC, Requires<[HasMSA]>; def ILVEV_H : ILVEV_H_ENC, ILVEV_H_DESC, Requires<[HasMSA]>; def ILVEV_W : ILVEV_W_ENC, ILVEV_W_DESC, Requires<[HasMSA]>; @@ -2481,6 +2774,16 @@ def SRAI_H : SRAI_H_ENC, SRAI_H_DESC, Requires<[HasMSA]>; def SRAI_W : SRAI_W_ENC, SRAI_W_DESC, Requires<[HasMSA]>; def SRAI_D : SRAI_D_ENC, SRAI_D_DESC, Requires<[HasMSA]>; +def SRAR_B : SRAR_B_ENC, SRAR_B_DESC, Requires<[HasMSA]>; +def SRAR_H : SRAR_H_ENC, SRAR_H_DESC, Requires<[HasMSA]>; +def SRAR_W : SRAR_W_ENC, SRAR_W_DESC, Requires<[HasMSA]>; +def SRAR_D : SRAR_D_ENC, SRAR_D_DESC, Requires<[HasMSA]>; + +def SRARI_B : SRARI_B_ENC, SRARI_B_DESC, Requires<[HasMSA]>; +def SRARI_H : SRARI_H_ENC, SRARI_H_DESC, Requires<[HasMSA]>; +def SRARI_W : SRARI_W_ENC, SRARI_W_DESC, Requires<[HasMSA]>; +def SRARI_D : SRARI_D_ENC, SRARI_D_DESC, Requires<[HasMSA]>; + def SRL_B : SRL_B_ENC, SRL_B_DESC, Requires<[HasMSA]>; def SRL_H : SRL_H_ENC, SRL_H_DESC, Requires<[HasMSA]>; def SRL_W : SRL_W_ENC, SRL_W_DESC, Requires<[HasMSA]>; @@ -2491,6 +2794,16 @@ def SRLI_H : SRLI_H_ENC, SRLI_H_DESC, Requires<[HasMSA]>; def SRLI_W : SRLI_W_ENC, SRLI_W_DESC, Requires<[HasMSA]>; def SRLI_D : SRLI_D_ENC, SRLI_D_DESC, Requires<[HasMSA]>; +def SRLR_B : SRLR_B_ENC, SRLR_B_DESC, Requires<[HasMSA]>; +def SRLR_H : SRLR_H_ENC, SRLR_H_DESC, Requires<[HasMSA]>; +def SRLR_W : SRLR_W_ENC, SRLR_W_DESC, Requires<[HasMSA]>; +def SRLR_D : SRLR_D_ENC, SRLR_D_DESC, Requires<[HasMSA]>; + +def SRLRI_B : SRLRI_B_ENC, SRLRI_B_DESC, Requires<[HasMSA]>; +def SRLRI_H : SRLRI_H_ENC, SRLRI_H_DESC, Requires<[HasMSA]>; +def SRLRI_W : SRLRI_W_ENC, SRLRI_W_DESC, Requires<[HasMSA]>; +def SRLRI_D : SRLRI_D_ENC, SRLRI_D_DESC, Requires<[HasMSA]>; + def ST_B: ST_B_ENC, ST_B_DESC, Requires<[HasMSA]>; def ST_H: ST_H_ENC, ST_H_DESC, Requires<[HasMSA]>; def ST_W: ST_W_ENC, ST_W_DESC, Requires<[HasMSA]>; -- cgit v1.1 From a6c3a4ee76ef8464d3c83472e15af521ade7eeb4 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Wed, 28 Aug 2013 10:26:24 +0000 Subject: [mips][msa] Added cfcmsa, and ctcmsa The MSA control registers have been added as reserved registers, and are only used via ISD::Copy(To|From)Reg. The intrinsics are lowered into these nodes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189468 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrFormats.td | 5 ++++ lib/Target/Mips/MipsMSAInstrInfo.td | 24 +++++++++++++++++ lib/Target/Mips/MipsRegisterInfo.cpp | 10 +++++++ lib/Target/Mips/MipsRegisterInfo.td | 13 +++++++++ lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 48 ++++++++++++++++++++++++++++++++++ lib/Target/Mips/MipsSEISelDAGToDAG.h | 2 ++ lib/Target/Mips/MipsSEInstrInfo.cpp | 4 +++ 7 files changed, 106 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrFormats.td b/lib/Target/Mips/MipsMSAInstrFormats.td index 35082c2..f337f9d 100644 --- a/lib/Target/Mips/MipsMSAInstrFormats.td +++ b/lib/Target/Mips/MipsMSAInstrFormats.td @@ -69,6 +69,11 @@ class MSA_3RF_FMT major, bits<1> df, bits<6> minor>: MSAInst { let Inst{5-0} = minor; } +class MSA_ELM_FMT major, bits<6> minor>: MSAInst { + let Inst{25-16} = major; + let Inst{5-0} = minor; +} + class MSA_ELM_B_FMT major, bits<6> minor>: MSAInst { let Inst{25-22} = major; let Inst{21-20} = 0b00; diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index e7688ca..e53b29e 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -171,6 +171,8 @@ class CEQI_H_ENC : MSA_I5_FMT<0b000, 0b01, 0b000111>; class CEQI_W_ENC : MSA_I5_FMT<0b000, 0b10, 0b000111>; class CEQI_D_ENC : MSA_I5_FMT<0b000, 0b11, 0b000111>; +class CFCMSA_ENC : MSA_ELM_FMT<0b0001111110, 0b011001>; + class CLE_S_B_ENC : MSA_3R_FMT<0b100, 0b00, 0b001111>; class CLE_S_H_ENC : MSA_3R_FMT<0b100, 0b01, 0b001111>; class CLE_S_W_ENC : MSA_3R_FMT<0b100, 0b10, 0b001111>; @@ -219,6 +221,8 @@ class COPY_U_B_ENC : MSA_ELM_B_FMT<0b0011, 0b011001>; class COPY_U_H_ENC : MSA_ELM_H_FMT<0b0011, 0b011001>; class COPY_U_W_ENC : MSA_ELM_W_FMT<0b0011, 0b011001>; +class CTCMSA_ENC : MSA_ELM_FMT<0b0000111110, 0b011001>; + class DIV_S_B_ENC : MSA_3R_FMT<0b100, 0b00, 0b010010>; class DIV_S_H_ENC : MSA_3R_FMT<0b100, 0b01, 0b010010>; class DIV_S_W_ENC : MSA_3R_FMT<0b100, 0b10, 0b010010>; @@ -1155,6 +1159,14 @@ class CEQI_W_DESC : MSA_SI5_DESC_BASE<"ceqi.w", int_mips_ceqi_w, NoItinerary, class CEQI_D_DESC : MSA_SI5_DESC_BASE<"ceqi.d", int_mips_ceqi_d, NoItinerary, MSA128D, MSA128D>; +class CFCMSA_DESC { + dag OutOperandList = (outs GPR32:$rd); + dag InOperandList = (ins MSACtrl:$cs); + string AsmString = "cfcmsa\t$rd, $cs"; + InstrItinClass Itinerary = NoItinerary; + bit hasSideEffects = 1; +} + class CLE_S_B_DESC : MSA_3R_DESC_BASE<"cle_s.b", int_mips_cle_s_b, NoItinerary, MSA128B, MSA128B>; class CLE_S_H_DESC : MSA_3R_DESC_BASE<"cle_s.h", int_mips_cle_s_h, NoItinerary, @@ -1241,6 +1253,14 @@ class COPY_U_H_DESC : MSA_COPY_DESC_BASE<"copy_u.h", int_mips_copy_u_h, class COPY_U_W_DESC : MSA_COPY_DESC_BASE<"copy_u.w", int_mips_copy_u_w, NoItinerary, GPR32, MSA128W>; +class CTCMSA_DESC { + dag OutOperandList = (outs); + dag InOperandList = (ins MSACtrl:$cd, GPR32:$rs); + string AsmString = "ctcmsa\t$cd, $rs"; + InstrItinClass Itinerary = NoItinerary; + bit hasSideEffects = 1; +} + class DIV_S_B_DESC : MSA_3R_DESC_BASE<"div_s.b", int_mips_div_s_b, NoItinerary, MSA128B, MSA128B>; class DIV_S_H_DESC : MSA_3R_DESC_BASE<"div_s.h", int_mips_div_s_h, NoItinerary, @@ -2299,6 +2319,8 @@ def CEQI_H : CEQI_H_ENC, CEQI_H_DESC, Requires<[HasMSA]>; def CEQI_W : CEQI_W_ENC, CEQI_W_DESC, Requires<[HasMSA]>; def CEQI_D : CEQI_D_ENC, CEQI_D_DESC, Requires<[HasMSA]>; +def CFCMSA : CFCMSA_ENC, CFCMSA_DESC, Requires<[HasMSA]>; + def CLE_S_B : CLE_S_B_ENC, CLE_S_B_DESC, Requires<[HasMSA]>; def CLE_S_H : CLE_S_H_ENC, CLE_S_H_DESC, Requires<[HasMSA]>; def CLE_S_W : CLE_S_W_ENC, CLE_S_W_DESC, Requires<[HasMSA]>; @@ -2347,6 +2369,8 @@ def COPY_U_B : COPY_U_B_ENC, COPY_U_B_DESC, Requires<[HasMSA]>; def COPY_U_H : COPY_U_H_ENC, COPY_U_H_DESC, Requires<[HasMSA]>; def COPY_U_W : COPY_U_W_ENC, COPY_U_W_DESC, Requires<[HasMSA]>; +def CTCMSA : CTCMSA_ENC, CTCMSA_DESC, Requires<[HasMSA]>; + def DIV_S_B : DIV_S_B_ENC, DIV_S_B_DESC, Requires<[HasMSA]>; def DIV_S_H : DIV_S_H_ENC, DIV_S_H_DESC, Requires<[HasMSA]>; def DIV_S_W : DIV_S_W_ENC, DIV_S_W_DESC, Requires<[HasMSA]>; diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 03d09a6..ee81bae 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -167,6 +167,16 @@ getReservedRegs(const MachineFunction &MF) const { Reserved.set(Mips::DSPEFI); Reserved.set(Mips::DSPOutFlag); + // Reserve MSA control registers. + Reserved.set(Mips::MSAIR); + Reserved.set(Mips::MSACSR); + Reserved.set(Mips::MSAAccess); + Reserved.set(Mips::MSASave); + Reserved.set(Mips::MSAModify); + Reserved.set(Mips::MSARequest); + Reserved.set(Mips::MSAMap); + Reserved.set(Mips::MSAUnmap); + // Reserve RA if in mips16 mode. if (Subtarget.inMips16Mode()) { Reserved.set(Mips::RA); diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index 22c4890..cef5ebf 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -231,6 +231,16 @@ let Namespace = "Mips" in { def DSPOutFlag : RegisterWithSubRegs<"", [DSPOutFlag16_19, DSPOutFlag20, DSPOutFlag21, DSPOutFlag22, DSPOutFlag23]>; + + // MSA-ASE control registers. + def MSAIR : Register<"0">; + def MSACSR : Register<"1">; + def MSAAccess : Register<"2">; + def MSASave : Register<"3">; + def MSAModify : Register<"4">; + def MSARequest : Register<"5">; + def MSAMap : Register<"6">; + def MSAUnmap : Register<"7">; } //===----------------------------------------------------------------------===// @@ -329,6 +339,9 @@ def MSA128W: RegisterClass<"Mips", [v4i32, v4f32], 128, def MSA128D: RegisterClass<"Mips", [v2i64, v2f64], 128, (sequence "W%u", 0, 31)>; +def MSACtrl: RegisterClass<"Mips", [i32], 32, (add + MSAIR, MSACSR, MSAAccess, MSASave, MSAModify, MSARequest, MSAMap, MSAUnmap)>; + // Hi/Lo Registers def LO32 : RegisterClass<"Mips", [i32], 32, (add LO0)>; def HI32 : RegisterClass<"Mips", [i32], 32, (add HI0)>; diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 203196e..221aeda 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -66,6 +66,21 @@ void MipsSEDAGToDAGISel::addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI, MIB.addReg(Mips::DSPEFI, Flag); } +unsigned MipsSEDAGToDAGISel::getMSACtrlReg(const SDValue RegIdx) const { + switch (cast(RegIdx)->getZExtValue()) { + default: + llvm_unreachable("Could not map int to register"); + case 0: return Mips::MSAIR; + case 1: return Mips::MSACSR; + case 2: return Mips::MSAAccess; + case 3: return Mips::MSASave; + case 4: return Mips::MSAModify; + case 5: return Mips::MSARequest; + case 6: return Mips::MSAMap; + case 7: return Mips::MSAUnmap; + } +} + bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr& MI) { unsigned DstReg = 0, ZeroReg = 0; @@ -432,6 +447,39 @@ std::pair MipsSEDAGToDAGISel::selectNode(SDNode *Node) { return std::make_pair(true, RegOpnd); } + case ISD::INTRINSIC_W_CHAIN: { + switch (cast(Node->getOperand(1))->getZExtValue()) { + default: + break; + + case Intrinsic::mips_cfcmsa: { + SDValue ChainIn = Node->getOperand(0); + SDValue RegIdx = Node->getOperand(2); + SDValue Reg = CurDAG->getCopyFromReg(ChainIn, DL, + getMSACtrlReg(RegIdx), MVT::i32); + return std::make_pair(true, Reg.getNode()); + } + } + break; + } + + case ISD::INTRINSIC_VOID: { + switch (cast(Node->getOperand(1))->getZExtValue()) { + default: + break; + + case Intrinsic::mips_ctcmsa: { + SDValue ChainIn = Node->getOperand(0); + SDValue RegIdx = Node->getOperand(2); + SDValue Value = Node->getOperand(3); + SDValue ChainOut = CurDAG->getCopyToReg(ChainIn, DL, + getMSACtrlReg(RegIdx), Value); + return std::make_pair(true, ChainOut.getNode()); + } + } + break; + } + case MipsISD::ThreadPointer: { EVT PtrVT = getTargetLowering()->getPointerTy(); unsigned RdhwrOpc, DestReg; diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h index 09a15b0..9961934 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.h +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -30,6 +30,8 @@ private: void addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI, MachineFunction &MF); + unsigned getMSACtrlReg(const SDValue RegIdx) const; + bool replaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&); std::pair selectMULT(SDNode *N, unsigned Opc, SDLoc dl, diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index e7967a8..374837e 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -110,6 +110,8 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); return; } + else if (Mips::MSACtrlRegClass.contains(SrcReg)) + Opc = Mips::CFCMSA; } else if (Mips::GPR32RegClass.contains(SrcReg)) { // Copy from CPU Reg. if (Mips::CCRRegClass.contains(DestReg)) @@ -130,6 +132,8 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addReg(DestReg, RegState::ImplicitDefine); return; } + else if (Mips::MSACtrlRegClass.contains(DestReg)) + Opc = Mips::CTCMSA; } else if (Mips::FGR32RegClass.contains(DestReg, SrcReg)) Opc = Mips::FMOV_S; -- cgit v1.1 From 477168192c98e1f75a5bc6db3d34a177f327bd34 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 28 Aug 2013 10:31:43 +0000 Subject: [SystemZ] Add support for TMHH, TMHL, TMLH and TMLL For now just handles simple comparisons of an ANDed value with zero. The CC value provides enough information to do any comparison for a 2-bit mask, and some nonzero comparisons with more populated masks, but that's all future work. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189469 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZ.h | 7 ++++ lib/Target/SystemZ/SystemZISelLowering.cpp | 61 +++++++++++++++++++++++++----- lib/Target/SystemZ/SystemZISelLowering.h | 4 ++ lib/Target/SystemZ/SystemZInstrInfo.td | 13 +++++++ lib/Target/SystemZ/SystemZOperators.td | 1 + 5 files changed, 76 insertions(+), 10 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h index ea6c7d1..051ba1d 100644 --- a/lib/Target/SystemZ/SystemZ.h +++ b/lib/Target/SystemZ/SystemZ.h @@ -57,6 +57,13 @@ namespace llvm { const unsigned CCMASK_SRST_NOTFOUND = CCMASK_2; const unsigned CCMASK_SRST = CCMASK_1 | CCMASK_2; + // Condition-code mask assignments for TEST UNDER MASK. + const unsigned CCMASK_TM_ALL_0 = CCMASK_0; + const unsigned CCMASK_TM_MIXED_MSB_0 = CCMASK_1; + const unsigned CCMASK_TM_MIXED_MSB_1 = CCMASK_2; + const unsigned CCMASK_TM_ALL_1 = CCMASK_3; + const unsigned CCMASK_TM = CCMASK_ANY; + // Mask assignments for PFD. const unsigned PFD_READ = 1; const unsigned PFD_WRITE = 2; diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index dd230c6..1ab1ef4 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1072,8 +1072,8 @@ static void adjustSubwordCmp(SelectionDAG &DAG, bool &IsUnsigned, // Return true if a comparison described by CCMask, CmpOp0 and CmpOp1 // is an equality comparison that is better implemented using unsigned // rather than signed comparison instructions. -static bool preferUnsignedComparison(SelectionDAG &DAG, SDValue CmpOp0, - SDValue CmpOp1, unsigned CCMask) { +static bool preferUnsignedComparison(SDValue CmpOp0, SDValue CmpOp1, + unsigned CCMask) { // The test must be for equality or inequality. if (CCMask != SystemZ::CCMASK_CMP_EQ && CCMask != SystemZ::CCMASK_CMP_NE) return false; @@ -1176,12 +1176,52 @@ static bool shouldSwapCmpOperands(SDValue Op0, SDValue Op1, return false; } +// See whether the comparison (Opcode CmpOp0, CmpOp1) can be implemented +// as a TEST UNDER MASK instruction when the condition being tested is +// as described by CCValid and CCMask. Update the arguments with the +// TM version if so. +static void adjustForTestUnderMask(unsigned &Opcode, SDValue &CmpOp0, + SDValue &CmpOp1, unsigned &CCValid, + unsigned &CCMask) { + // For now we just handle equality and inequality with zero. + if (CCMask != SystemZ::CCMASK_CMP_EQ && + (CCMask ^ CCValid) != SystemZ::CCMASK_CMP_EQ) + return; + ConstantSDNode *ConstCmpOp1 = dyn_cast(CmpOp1); + if (!ConstCmpOp1 || ConstCmpOp1->getZExtValue() != 0) + return; + + // Check whether the nonconstant input is an AND with a constant mask. + if (CmpOp0.getOpcode() != ISD::AND) + return; + SDValue AndOp0 = CmpOp0.getOperand(0); + SDValue AndOp1 = CmpOp0.getOperand(1); + ConstantSDNode *Mask = dyn_cast(AndOp1.getNode()); + if (!Mask) + return; + + // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL. + uint64_t MaskVal = Mask->getZExtValue(); + if (!SystemZ::isImmLL(MaskVal) && !SystemZ::isImmLH(MaskVal) && + !SystemZ::isImmHL(MaskVal) && !SystemZ::isImmHH(MaskVal)) + return; + + // Go ahead and make the change. + Opcode = SystemZISD::TM; + CmpOp0 = AndOp0; + CmpOp1 = AndOp1; + CCValid = SystemZ::CCMASK_TM; + CCMask = (CCMask == SystemZ::CCMASK_CMP_EQ ? + SystemZ::CCMASK_TM_ALL_0 : + SystemZ::CCMASK_TM_ALL_0 ^ CCValid); +} + // Return a target node that compares CmpOp0 with CmpOp1 and stores a // 2-bit result in CC. Set CCValid to the CCMASK_* of all possible // 2-bit results and CCMask to the subset of those results that are // associated with Cond. -static SDValue emitCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, - ISD::CondCode Cond, unsigned &CCValid, +static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, SDValue CmpOp0, + SDValue CmpOp1, ISD::CondCode Cond, unsigned &CCValid, unsigned &CCMask) { bool IsUnsigned = false; CCMask = CCMaskForCondCode(Cond); @@ -1193,7 +1233,7 @@ static SDValue emitCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, CCMask &= CCValid; adjustZeroCmp(DAG, IsUnsigned, CmpOp0, CmpOp1, CCMask); adjustSubwordCmp(DAG, IsUnsigned, CmpOp0, CmpOp1, CCMask); - if (preferUnsignedComparison(DAG, CmpOp0, CmpOp1, CCMask)) + if (preferUnsignedComparison(CmpOp0, CmpOp1, CCMask)) IsUnsigned = true; } @@ -1205,9 +1245,9 @@ static SDValue emitCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, (CCMask & SystemZ::CCMASK_CMP_UO)); } - SDLoc DL(CmpOp0); - return DAG.getNode((IsUnsigned ? SystemZISD::UCMP : SystemZISD::CMP), - DL, MVT::Glue, CmpOp0, CmpOp1); + unsigned Opcode = (IsUnsigned ? SystemZISD::UCMP : SystemZISD::CMP); + adjustForTestUnderMask(Opcode, CmpOp0, CmpOp1, CCValid, CCMask); + return DAG.getNode(Opcode, DL, MVT::Glue, CmpOp0, CmpOp1); } // Implement a 32-bit *MUL_LOHI operation by extending both operands to @@ -1256,7 +1296,7 @@ SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); unsigned CCValid, CCMask; - SDValue Flags = emitCmp(DAG, CmpOp0, CmpOp1, CC, CCValid, CCMask); + SDValue Flags = emitCmp(DAG, DL, CmpOp0, CmpOp1, CC, CCValid, CCMask); return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(), Chain, DAG.getConstant(CCValid, MVT::i32), DAG.getConstant(CCMask, MVT::i32), Dest, Flags); @@ -1272,7 +1312,7 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, SDLoc DL(Op); unsigned CCValid, CCMask; - SDValue Flags = emitCmp(DAG, CmpOp0, CmpOp1, CC, CCValid, CCMask); + SDValue Flags = emitCmp(DAG, DL, CmpOp0, CmpOp1, CC, CCValid, CCMask); SmallVector Ops; Ops.push_back(TrueOp); @@ -1908,6 +1948,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(PCREL_WRAPPER); OPCODE(CMP); OPCODE(UCMP); + OPCODE(TM); OPCODE(BR_CCMASK); OPCODE(SELECT_CCMASK); OPCODE(ADJDYNALLOC); diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 9831777..ec5051f 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -45,6 +45,10 @@ namespace SystemZISD { // Likewise unsigned integer comparison. UCMP, + // Test under mask. The first operand is ANDed with the second operand + // and the condition codes are set on the result. + TM, + // Branches if a condition is true. Operand 0 is the chain operand; // operand 1 is the 4-bit condition-code mask, with bit N in // big-endian order meaning "branch if CC=N"; operand 2 is the diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 399b48a..7793818 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -1034,6 +1034,19 @@ let mayLoad = 1, Defs = [CC] in let mayLoad = 1, Defs = [CC], Uses = [R0W] in defm CLST : StringRRE<"clst", 0xB25D, z_strcmp>; +// Test under mask. +let Defs = [CC] in { + let isCodeGenOnly = 1 in { + def TMLL32 : CompareRI<"tmll", 0xA71, z_tm, GR32, imm32ll16>; + def TMLH32 : CompareRI<"tmlh", 0xA70, z_tm, GR32, imm32lh16>; + } + + def TMLL : CompareRI<"tmll", 0xA71, z_tm, GR64, imm64ll16>; + def TMLH : CompareRI<"tmlh", 0xA70, z_tm, GR64, imm64lh16>; + def TMHL : CompareRI<"tmhl", 0xA73, z_tm, GR64, imm64hl16>; + def TMHH : CompareRI<"tmhh", 0xA72, z_tm, GR64, imm64hh16>; +} + //===----------------------------------------------------------------------===// // Prefetch //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index ff64ea8..822195c 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -96,6 +96,7 @@ def z_sibcall : SDNode<"SystemZISD::SIBCALL", SDT_ZCall, def z_pcrel_wrapper : SDNode<"SystemZISD::PCREL_WRAPPER", SDT_ZWrapPtr, []>; def z_cmp : SDNode<"SystemZISD::CMP", SDT_ZCmp, [SDNPOutGlue]>; def z_ucmp : SDNode<"SystemZISD::UCMP", SDT_ZCmp, [SDNPOutGlue]>; +def z_tm : SDNode<"SystemZISD::TM", SDT_ZCmp, [SDNPOutGlue]>; def z_br_ccmask : SDNode<"SystemZISD::BR_CCMASK", SDT_ZBRCCMask, [SDNPHasChain, SDNPInGlue]>; def z_select_ccmask : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask, -- cgit v1.1 From abbcf3bd47ad8ffa70f48ebd924f99fff5c22131 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Wed, 28 Aug 2013 10:44:47 +0000 Subject: [mips][msa] Added move.v git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189471 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 12 ++++++++++++ lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 16 ++++++++++++++++ 2 files changed, 28 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index e53b29e..eb95c78 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -542,6 +542,8 @@ class MOD_U_H_ENC : MSA_3R_FMT<0b111, 0b01, 0b010010>; class MOD_U_W_ENC : MSA_3R_FMT<0b111, 0b10, 0b010010>; class MOD_U_D_ENC : MSA_3R_FMT<0b111, 0b11, 0b010010>; +class MOVE_V_ENC : MSA_ELM_FMT<0b0010111110, 0b011001>; + class MSUB_Q_H_ENC : MSA_3RF_FMT<0b0110, 0b0, 0b011100>; class MSUB_Q_W_ENC : MSA_3RF_FMT<0b0110, 0b1, 0b011100>; @@ -1852,6 +1854,14 @@ class MOD_U_W_DESC : MSA_3R_DESC_BASE<"mod_u.w", int_mips_mod_u_w, NoItinerary, class MOD_U_D_DESC : MSA_3R_DESC_BASE<"mod_u.d", int_mips_mod_u_d, NoItinerary, MSA128D, MSA128D>; +class MOVE_V_DESC { + dag OutOperandList = (outs MSA128B:$wd); + dag InOperandList = (ins MSA128B:$ws); + string AsmString = "move.v\t$wd, $ws"; + list Pattern = []; + InstrItinClass Itinerary = NoItinerary; +} + class MSUB_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"msub_q.h", int_mips_msub_q_h, NoItinerary, MSA128H, MSA128H>; class MSUB_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"msub_q.w", int_mips_msub_q_w, @@ -2689,6 +2699,8 @@ def MOD_U_H : MOD_U_H_ENC, MOD_U_H_DESC, Requires<[HasMSA]>; def MOD_U_W : MOD_U_W_ENC, MOD_U_W_DESC, Requires<[HasMSA]>; def MOD_U_D : MOD_U_D_ENC, MOD_U_D_DESC, Requires<[HasMSA]>; +def MOVE_V : MOVE_V_ENC, MOVE_V_DESC, Requires<[HasMSA]>; + def MSUB_Q_H : MSUB_Q_H_ENC, MSUB_Q_H_DESC, Requires<[HasMSA]>; def MSUB_Q_W : MSUB_Q_W_ENC, MSUB_Q_W_DESC, Requires<[HasMSA]>; diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 221aeda..27fcaa3 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -463,6 +463,22 @@ std::pair MipsSEDAGToDAGISel::selectNode(SDNode *Node) { break; } + case ISD::INTRINSIC_WO_CHAIN: { + switch (cast(Node->getOperand(0))->getZExtValue()) { + default: + break; + + case Intrinsic::mips_move_v: + // Like an assignment but will always produce a move.v even if + // unnecessary. + return std::make_pair(true, + CurDAG->getMachineNode(Mips::MOVE_V, DL, + Node->getValueType(0), + Node->getOperand(1))); + } + break; + } + case ISD::INTRINSIC_VOID: { switch (cast(Node->getOperand(1))->getZExtValue()) { default: -- cgit v1.1 From 2fb982aa720ec1ef149b2d9add2673c313f08792 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Wed, 28 Aug 2013 11:21:58 +0000 Subject: AVX-512: added SQRT, VRSQRT14, VCOMISS, VUCOMISS, VRCP14, VPABS git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189472 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 443 +++++++++++++++++++++++++++++++++++++++ lib/Target/X86/X86InstrSSE.td | 46 ++-- 2 files changed, 466 insertions(+), 23 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 90eb7d9..95b0de4 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -2378,6 +2378,433 @@ let Predicates = [HasAVX512] in { def : Pat<(v8f64 (extloadv8f32 addr:$src)), (VCVTPS2PDZrm addr:$src)>; } + +let Defs = [EFLAGS], Predicates = [HasAVX512] in { + defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32, + "ucomiss{z}">, TB, EVEX, VEX_LIG, + EVEX_CD8<32, CD8VT1>; + defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64, + "ucomisd{z}">, TB, OpSize, EVEX, + VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; + let Pattern = [] in { + defm VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, undef, v4f32, f128mem, load, + "comiss{z}">, TB, EVEX, VEX_LIG, + EVEX_CD8<32, CD8VT1>; + defm VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, undef, v2f64, f128mem, load, + "comisd{z}">, TB, OpSize, EVEX, + VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; + } + defm Int_VUCOMISSZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem, + load, "ucomiss">, TB, EVEX, VEX_LIG, + EVEX_CD8<32, CD8VT1>; + defm Int_VUCOMISDZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem, + load, "ucomisd">, TB, OpSize, EVEX, + VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; + + defm Int_VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem, + load, "comiss">, TB, EVEX, VEX_LIG, + EVEX_CD8<32, CD8VT1>; + defm Int_VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem, + load, "comisd">, TB, OpSize, EVEX, + VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; +} + +/// avx512_unop_p - AVX-512 unops in packed form. +multiclass avx512_fp_unop_p opc, string OpcodeStr, SDNode OpNode> { + def PSZr : AVX5128I, + EVEX, EVEX_V512; + def PSZm : AVX5128I, + EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; + def PDZr : AVX5128I, + EVEX, EVEX_V512, VEX_W; + def PDZm : AVX5128I, + EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +} + +/// avx512_fp_unop_p_int - AVX-512 intrinsics unops in packed forms. +multiclass avx512_fp_unop_p_int opc, string OpcodeStr, + Intrinsic V16F32Int, Intrinsic V8F64Int> { + def PSZr_Int : AVX5128I, + EVEX, EVEX_V512; + def PSZm_Int : AVX5128I, EVEX, + EVEX_V512, EVEX_CD8<32, CD8VF>; + def PDZr_Int : AVX5128I, + EVEX, EVEX_V512, VEX_W; + def PDZm_Int : AVX5128I, + EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +} + +/// avx512_fp_unop_s - AVX-512 unops in scalar form. +multiclass avx512_fp_unop_s opc, string OpcodeStr, + Intrinsic F32Int, Intrinsic F64Int> { + let hasSideEffects = 0 in { + def SSZr : AVX5128I, EVEX_4V; + let mayLoad = 1 in { + def SSZm : AVX5128I, EVEX_4V, EVEX_CD8<32, CD8VT1>; + def SSZm_Int : AVX5128I, + EVEX_4V, EVEX_CD8<32, CD8VT1>; + } + def SDZr : AVX5128I, + EVEX_4V, VEX_W; + let mayLoad = 1 in { + def SDZm : AVX5128I, + EVEX_4V, VEX_W, EVEX_CD8<32, CD8VT1>; + def SDZm_Int : AVX5128I, + EVEX_4V, VEX_W, EVEX_CD8<32, CD8VT1>; + } +} +} + +defm VRCP14 : avx512_fp_unop_s<0x4D, "vrcp14", int_x86_avx512_rcp14_ss, + int_x86_avx512_rcp14_sd>, + avx512_fp_unop_p<0x4C, "vrcp14", X86frcp>, + avx512_fp_unop_p_int<0x4C, "vrcp14", + int_x86_avx512_rcp14_ps_512, int_x86_avx512_rcp14_pd_512>; + +defm VRSQRT14 : avx512_fp_unop_s<0x4F, "vrsqrt14", int_x86_avx512_rsqrt14_ss, + int_x86_avx512_rsqrt14_sd>, + avx512_fp_unop_p<0x4E, "vrsqrt14", X86frsqrt>, + avx512_fp_unop_p_int<0x4E, "vrsqrt14", + int_x86_avx512_rsqrt14_ps_512, int_x86_avx512_rsqrt14_pd_512>; + +multiclass avx512_sqrt_packed opc, string OpcodeStr, SDNode OpNode, + Intrinsic V16F32Int, Intrinsic V8F64Int, + OpndItins itins_s, OpndItins itins_d> { + def PSZrr :AVX512PSI, + EVEX, EVEX_V512; + + let mayLoad = 1 in + def PSZrm : AVX512PSI, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; + + def PDZrr : AVX512PDI, + EVEX, EVEX_V512; + + let mayLoad = 1 in + def PDZrm : AVX512PDI, EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>; + + def PSZr_Int : AVX512PSI, + EVEX, EVEX_V512; + def PSZm_Int : AVX512PSI, EVEX, + EVEX_V512, EVEX_CD8<32, CD8VF>; + def PDZr_Int : AVX512PDI, + EVEX, EVEX_V512, VEX_W; + def PDZm_Int : AVX512PDI, + EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +} + +multiclass avx512_sqrt_scalar opc, string OpcodeStr, + Intrinsic F32Int, Intrinsic F64Int, + OpndItins itins_s, OpndItins itins_d> { + def SSZr : SI, XS, EVEX_4V; + def SSZr_Int : SIi8, XS, EVEX_4V; + let mayLoad = 1 in { + def SSZm : SI, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>; + def SSZm_Int : SIi8, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>; + } + def SDZr : SI, + XD, EVEX_4V, VEX_W; + def SDZr_Int : SIi8, XD, EVEX_4V, VEX_W; + let mayLoad = 1 in { + def SDZm : SI, + XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>; + def SDZm_Int : SIi8, + XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>; + } +} + + +defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt", + int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd, + SSE_SQRTSS, SSE_SQRTSD>, + avx512_sqrt_packed<0x51, "vsqrt", fsqrt, + int_x86_avx512_sqrt_ps_512, int_x86_avx512_sqrt_pd_512, + SSE_SQRTPS, SSE_SQRTPD>; + +def : Pat<(f32 (fsqrt FR32X:$src)), + (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>; +def : Pat<(f32 (fsqrt (load addr:$src))), + (VSQRTSSZm (f32 (IMPLICIT_DEF)), addr:$src)>, + Requires<[OptForSize]>; +def : Pat<(f64 (fsqrt FR64X:$src)), + (VSQRTSDZr (f64 (IMPLICIT_DEF)), FR64X:$src)>; +def : Pat<(f64 (fsqrt (load addr:$src))), + (VSQRTSDZm (f64 (IMPLICIT_DEF)), addr:$src)>, + Requires<[OptForSize]>; + +def : Pat<(f32 (X86frsqrt FR32X:$src)), + (VRSQRT14SSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>; +def : Pat<(f32 (X86frsqrt (load addr:$src))), + (VRSQRT14SSZm (f32 (IMPLICIT_DEF)), addr:$src)>, + Requires<[OptForSize]>; + +def : Pat<(f32 (X86frcp FR32X:$src)), + (VRCP14SSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>; +def : Pat<(f32 (X86frcp (load addr:$src))), + (VRCP14SSZm (f32 (IMPLICIT_DEF)), addr:$src)>, + Requires<[OptForSize]>; + +multiclass avx512_fp_unop_rm opcps, bits<8> opcpd, string OpcodeStr, + X86MemOperand x86memop, RegisterClass RC, + PatFrag mem_frag32, PatFrag mem_frag64, + Intrinsic V4F32Int, Intrinsic V2F64Int, + CD8VForm VForm> { +let ExeDomain = SSEPackedSingle in { + // Intrinsic operation, reg. + // Vector intrinsic operation, reg + def PSr : AVX512AIi8; + + // Vector intrinsic operation, mem + def PSm : AVX512AIi8, + EVEX_CD8<32, VForm>; +} // ExeDomain = SSEPackedSingle + +let ExeDomain = SSEPackedDouble in { + // Vector intrinsic operation, reg + def PDr : AVX512AIi8; + + // Vector intrinsic operation, mem + def PDm : AVX512AIi8, + EVEX_CD8<64, VForm>; +} // ExeDomain = SSEPackedDouble +} + +multiclass avx512_fp_binop_rm opcss, bits<8> opcsd, + string OpcodeStr, + Intrinsic F32Int, + Intrinsic F64Int> { +let ExeDomain = GenericDomain in { + // Operation, reg. + let hasSideEffects = 0 in + def SSr : AVX512AIi8; + + // Intrinsic operation, reg. + def SSr_Int : AVX512AIi8; + + // Intrinsic operation, mem. + def SSm : AVX512AIi8, + EVEX_CD8<32, CD8VT1>; + + // Operation, reg. + let hasSideEffects = 0 in + def SDr : AVX512AIi8, VEX_W; + + // Intrinsic operation, reg. + def SDr_Int : AVX512AIi8, + VEX_W; + + // Intrinsic operation, mem. + def SDm : AVX512AIi8, + VEX_W, EVEX_CD8<64, CD8VT1>; +} // ExeDomain = GenericDomain +} + +let Predicates = [HasAVX512] in { + defm VRNDSCALE : avx512_fp_binop_rm<0x0A, 0x0B, "vrndscale", + int_x86_avx512_rndscale_ss, + int_x86_avx512_rndscale_sd>, EVEX_4V; + + defm VRNDSCALEZ : avx512_fp_unop_rm<0x08, 0x09, "vrndscale", f256mem, VR512, + memopv16f32, memopv8f64, + int_x86_avx512_rndscale_ps_512, + int_x86_avx512_rndscale_pd_512, CD8VF>, + EVEX, EVEX_V512; +} + +def : Pat<(ffloor FR32X:$src), + (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x1))>; +def : Pat<(f64 (ffloor FR64X:$src)), + (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x1))>; +def : Pat<(f32 (fnearbyint FR32X:$src)), + (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0xC))>; +def : Pat<(f64 (fnearbyint FR64X:$src)), + (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0xC))>; +def : Pat<(f32 (fceil FR32X:$src)), + (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x2))>; +def : Pat<(f64 (fceil FR64X:$src)), + (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x2))>; +def : Pat<(f32 (frint FR32X:$src)), + (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x4))>; +def : Pat<(f64 (frint FR64X:$src)), + (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x4))>; +def : Pat<(f32 (ftrunc FR32X:$src)), + (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x3))>; +def : Pat<(f64 (ftrunc FR64X:$src)), + (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x3))>; + +def : Pat<(v16f32 (ffloor VR512:$src)), + (VRNDSCALEZPSr VR512:$src, (i32 0x1))>; +def : Pat<(v16f32 (fnearbyint VR512:$src)), + (VRNDSCALEZPSr VR512:$src, (i32 0xC))>; +def : Pat<(v16f32 (fceil VR512:$src)), + (VRNDSCALEZPSr VR512:$src, (i32 0x2))>; +def : Pat<(v16f32 (frint VR512:$src)), + (VRNDSCALEZPSr VR512:$src, (i32 0x4))>; +def : Pat<(v16f32 (ftrunc VR512:$src)), + (VRNDSCALEZPSr VR512:$src, (i32 0x3))>; + +def : Pat<(v8f64 (ffloor VR512:$src)), + (VRNDSCALEZPDr VR512:$src, (i32 0x1))>; +def : Pat<(v8f64 (fnearbyint VR512:$src)), + (VRNDSCALEZPDr VR512:$src, (i32 0xC))>; +def : Pat<(v8f64 (fceil VR512:$src)), + (VRNDSCALEZPDr VR512:$src, (i32 0x2))>; +def : Pat<(v8f64 (frint VR512:$src)), + (VRNDSCALEZPDr VR512:$src, (i32 0x4))>; +def : Pat<(v8f64 (ftrunc VR512:$src)), + (VRNDSCALEZPDr VR512:$src, (i32 0x3))>; + //===----------------------------------------------------------------------===// // VSHUFPS - VSHUFPD Operations @@ -2433,3 +2860,19 @@ def : Pat<(v16i32 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))), def : Pat<(v8i64 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))), (VALIGNQrri VR512:$src2, VR512:$src1, imm:$imm)>; +multiclass avx512_vpabs opc, string OpcodeStr, RegisterClass RC, + X86MemOperand x86memop> { + def rr : AVX5128I, + EVEX; + def rm : AVX5128I, + EVEX; +} + +defm VPABSD : avx512_vpabs<0x1E, "vpabsd", VR512, i512mem>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPABSQ : avx512_vpabs<0x1F, "vpabsq", VR512, i512mem>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 2d7ac73..b1cfbee 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3322,30 +3322,30 @@ defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, SSE_RCPS>, sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, int_x86_avx_rcp_ps_256, SSE_RCPP>; -def : Pat<(f32 (fsqrt FR32:$src)), - (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; -def : Pat<(f32 (fsqrt (load addr:$src))), - (VSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>, - Requires<[HasAVX, OptForSize]>; -def : Pat<(f64 (fsqrt FR64:$src)), - (VSQRTSDr (f64 (IMPLICIT_DEF)), FR64:$src)>, Requires<[HasAVX]>; -def : Pat<(f64 (fsqrt (load addr:$src))), - (VSQRTSDm (f64 (IMPLICIT_DEF)), addr:$src)>, - Requires<[HasAVX, OptForSize]>; - -def : Pat<(f32 (X86frsqrt FR32:$src)), - (VRSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; -def : Pat<(f32 (X86frsqrt (load addr:$src))), - (VRSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>, - Requires<[HasAVX, OptForSize]>; - -def : Pat<(f32 (X86frcp FR32:$src)), - (VRCPSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; -def : Pat<(f32 (X86frcp (load addr:$src))), - (VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>, - Requires<[HasAVX, OptForSize]>; +let Predicates = [UseAVX] in { + def : Pat<(f32 (fsqrt FR32:$src)), + (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; + def : Pat<(f32 (fsqrt (load addr:$src))), + (VSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>, + Requires<[HasAVX, OptForSize]>; + def : Pat<(f64 (fsqrt FR64:$src)), + (VSQRTSDr (f64 (IMPLICIT_DEF)), FR64:$src)>, Requires<[HasAVX]>; + def : Pat<(f64 (fsqrt (load addr:$src))), + (VSQRTSDm (f64 (IMPLICIT_DEF)), addr:$src)>, + Requires<[HasAVX, OptForSize]>; + + def : Pat<(f32 (X86frsqrt FR32:$src)), + (VRSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; + def : Pat<(f32 (X86frsqrt (load addr:$src))), + (VRSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>, + Requires<[HasAVX, OptForSize]>; + + def : Pat<(f32 (X86frcp FR32:$src)), + (VRCPSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; + def : Pat<(f32 (X86frcp (load addr:$src))), + (VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>, + Requires<[HasAVX, OptForSize]>; -let Predicates = [HasAVX] in { def : Pat<(int_x86_sse_sqrt_ss VR128:$src), (COPY_TO_REGCLASS (VSQRTSSr (f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS VR128:$src, FR32)), -- cgit v1.1 From 2fd3e67dc6438cee5e32e0d7d7d42891df7edd96 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Wed, 28 Aug 2013 12:04:29 +0000 Subject: [mips][msa] Added load/store intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189476 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelDAGToDAG.cpp | 6 ++ lib/Target/Mips/MipsISelDAGToDAG.h | 5 ++ lib/Target/Mips/MipsInstrInfo.td | 3 + lib/Target/Mips/MipsMSAInstrInfo.td | 109 +++++++++++++++++++++++++++------ lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 14 +++++ lib/Target/Mips/MipsSEISelDAGToDAG.h | 3 + lib/Target/Mips/MipsSEISelLowering.cpp | 62 ++++++++++++++++++- lib/Target/Mips/MipsSEISelLowering.h | 1 + 8 files changed, 182 insertions(+), 21 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 15e43d4..725d9b4 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -69,6 +69,12 @@ bool MipsDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base, return false; } +bool MipsDAGToDAGISel::selectAddrRegReg(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + llvm_unreachable("Unimplemented function."); + return false; +} + bool MipsDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const { llvm_unreachable("Unimplemented function."); diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h index a137a60..e98d590 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.h +++ b/lib/Target/Mips/MipsISelDAGToDAG.h @@ -57,6 +57,11 @@ private: virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset) const; + // Complex Pattern. + /// (reg + reg). + virtual bool selectAddrRegReg(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + /// Fall back on this function if all else fails. virtual bool selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 5518b8c..9b6c857 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -376,6 +376,9 @@ def addr : def addrRegImm : ComplexPattern; +def addrRegReg : + ComplexPattern; + def addrDefault : ComplexPattern; diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index eb95c78..53fceb7 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -466,10 +466,15 @@ class LD_H_ENC : MSA_I5_FMT<0b110, 0b01, 0b000111>; class LD_W_ENC : MSA_I5_FMT<0b110, 0b10, 0b000111>; class LD_D_ENC : MSA_I5_FMT<0b110, 0b11, 0b000111>; -class LDI_B_ENC : MSA_I10_FMT<0b010, 0b00, 0b001100>; -class LDI_H_ENC : MSA_I10_FMT<0b010, 0b01, 0b001100>; -class LDI_W_ENC : MSA_I10_FMT<0b010, 0b10, 0b001100>; -class LDI_D_ENC : MSA_I10_FMT<0b010, 0b11, 0b001100>; +class LDI_B_ENC : MSA_I10_FMT<0b010, 0b00, 0b001100>; +class LDI_H_ENC : MSA_I10_FMT<0b010, 0b01, 0b001100>; +class LDI_W_ENC : MSA_I10_FMT<0b010, 0b10, 0b001100>; +class LDI_D_ENC : MSA_I10_FMT<0b010, 0b11, 0b001100>; + +class LDX_B_ENC : MSA_3R_FMT<0b110, 0b00, 0b001111>; +class LDX_H_ENC : MSA_3R_FMT<0b110, 0b01, 0b001111>; +class LDX_W_ENC : MSA_3R_FMT<0b110, 0b10, 0b001111>; +class LDX_D_ENC : MSA_3R_FMT<0b110, 0b11, 0b001111>; class MADD_Q_H_ENC : MSA_3RF_FMT<0b0101, 0b0, 0b011100>; class MADD_Q_W_ENC : MSA_3RF_FMT<0b0101, 0b1, 0b011100>; @@ -688,6 +693,11 @@ class ST_H_ENC : MSA_I5_FMT<0b111, 0b01, 0b000111>; class ST_W_ENC : MSA_I5_FMT<0b111, 0b10, 0b000111>; class ST_D_ENC : MSA_I5_FMT<0b111, 0b11, 0b000111>; +class STX_B_ENC : MSA_3R_FMT<0b111, 0b00, 0b001111>; +class STX_H_ENC : MSA_3R_FMT<0b111, 0b01, 0b001111>; +class STX_W_ENC : MSA_3R_FMT<0b111, 0b10, 0b001111>; +class STX_D_ENC : MSA_3R_FMT<0b111, 0b11, 0b001111>; + class SUBS_S_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b010001>; class SUBS_S_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b010001>; class SUBS_S_W_ENC : MSA_3R_FMT<0b000, 0b10, 0b010001>; @@ -1705,7 +1715,7 @@ class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", int_mips_insve_d, class LD_DESC_BASE { + Operand MemOpnd = mem, ComplexPattern Addr = addrRegImm> { dag OutOperandList = (outs RCWD:$wd); dag InOperandList = (ins MemOpnd:$addr); string AsmString = !strconcat(instr_asm, "\t$wd, $addr"); @@ -1727,6 +1737,21 @@ class LDI_W_DESC : MSA_I10_DESC_BASE<"ldi.w", int_mips_ldi_w, class LDI_D_DESC : MSA_I10_DESC_BASE<"ldi.d", int_mips_ldi_d, NoItinerary, MSA128D>; +class LDX_DESC_BASE { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins MemOpnd:$addr); + string AsmString = !strconcat(instr_asm, "\t$wd, $addr"); + list Pattern = [(set RCWD:$wd, (TyNode (OpNode Addr:$addr)))]; + InstrItinClass Itinerary = itin; +} + +class LDX_B_DESC : LDX_DESC_BASE<"ldx.b", load, v16i8, NoItinerary, MSA128B>; +class LDX_H_DESC : LDX_DESC_BASE<"ldx.h", load, v8i16, NoItinerary, MSA128H>; +class LDX_W_DESC : LDX_DESC_BASE<"ldx.w", load, v4i32, NoItinerary, MSA128W>; +class LDX_D_DESC : LDX_DESC_BASE<"ldx.d", load, v2i64, NoItinerary, MSA128D>; + class MADD_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"madd_q.h", int_mips_madd_q_h, NoItinerary, MSA128H, MSA128H>; class MADD_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"madd_q.w", int_mips_madd_q_w, @@ -2110,7 +2135,7 @@ class SRLRI_D_DESC : MSA_BIT_D_DESC_BASE<"srlri.d", int_mips_srlri_d, class ST_DESC_BASE { + Operand MemOpnd = mem, ComplexPattern Addr = addrRegImm> { dag OutOperandList = (outs); dag InOperandList = (ins RCWD:$wd, MemOpnd:$addr); string AsmString = !strconcat(instr_asm, "\t$wd, $addr"); @@ -2118,12 +2143,26 @@ class ST_DESC_BASE; class ST_H_DESC : ST_DESC_BASE<"st.h", store, v8i16, NoItinerary, MSA128H>; class ST_W_DESC : ST_DESC_BASE<"st.w", store, v4i32, NoItinerary, MSA128W>; class ST_D_DESC : ST_DESC_BASE<"st.d", store, v2i64, NoItinerary, MSA128D>; +class STX_DESC_BASE { + dag OutOperandList = (outs); + dag InOperandList = (ins RCWD:$wd, MemOpnd:$addr); + string AsmString = !strconcat(instr_asm, "\t$wd, $addr"); + list Pattern = [(OpNode (TyNode RCWD:$wd), Addr:$addr)]; + InstrItinClass Itinerary = itin; +} + +class STX_B_DESC : STX_DESC_BASE<"stx.b", store, v16i8, NoItinerary, MSA128B>; +class STX_H_DESC : STX_DESC_BASE<"stx.h", store, v8i16, NoItinerary, MSA128H>; +class STX_W_DESC : STX_DESC_BASE<"stx.w", store, v4i32, NoItinerary, MSA128W>; +class STX_D_DESC : STX_DESC_BASE<"stx.d", store, v2i64, NoItinerary, MSA128D>; + class SUBS_S_B_DESC : MSA_3R_DESC_BASE<"subs_s.b", int_mips_subs_s_b, NoItinerary, MSA128B, MSA128B>; class SUBS_S_H_DESC : MSA_3R_DESC_BASE<"subs_s.h", int_mips_subs_s_h, @@ -2628,6 +2667,11 @@ def LDI_B : LDI_B_ENC, LDI_B_DESC, Requires<[HasMSA]>; def LDI_H : LDI_H_ENC, LDI_H_DESC, Requires<[HasMSA]>; def LDI_W : LDI_W_ENC, LDI_W_DESC, Requires<[HasMSA]>; +def LDX_B: LDX_B_ENC, LDX_B_DESC, Requires<[HasMSA]>; +def LDX_H: LDX_H_ENC, LDX_H_DESC, Requires<[HasMSA]>; +def LDX_W: LDX_W_ENC, LDX_W_DESC, Requires<[HasMSA]>; +def LDX_D: LDX_D_ENC, LDX_D_DESC, Requires<[HasMSA]>; + def MADD_Q_H : MADD_Q_H_ENC, MADD_Q_H_DESC, Requires<[HasMSA]>; def MADD_Q_W : MADD_Q_W_ENC, MADD_Q_W_DESC, Requires<[HasMSA]>; @@ -2845,6 +2889,11 @@ def ST_H: ST_H_ENC, ST_H_DESC, Requires<[HasMSA]>; def ST_W: ST_W_ENC, ST_W_DESC, Requires<[HasMSA]>; def ST_D: ST_D_ENC, ST_D_DESC, Requires<[HasMSA]>; +def STX_B: STX_B_ENC, STX_B_DESC, Requires<[HasMSA]>; +def STX_H: STX_H_ENC, STX_H_DESC, Requires<[HasMSA]>; +def STX_W: STX_W_ENC, STX_W_DESC, Requires<[HasMSA]>; +def STX_D: STX_D_ENC, STX_D_DESC, Requires<[HasMSA]>; + def SUBS_S_B : SUBS_S_B_ENC, SUBS_S_B_DESC, Requires<[HasMSA]>; def SUBS_S_H : SUBS_S_H_ENC, SUBS_S_H_DESC, Requires<[HasMSA]>; def SUBS_S_W : SUBS_S_W_ENC, SUBS_S_W_DESC, Requires<[HasMSA]>; @@ -2888,19 +2937,39 @@ def XORI_B : XORI_B_ENC, XORI_B_DESC, Requires<[HasMSA]>; class MSAPat pred = [HasMSA]> : Pat, Requires; -def LD_FH : MSAPat<(v8f16 (load addr:$addr)), - (LD_H addr:$addr)>; -def LD_FW : MSAPat<(v4f32 (load addr:$addr)), - (LD_W addr:$addr)>; -def LD_FD : MSAPat<(v2f64 (load addr:$addr)), - (LD_D addr:$addr)>; - -def ST_FH : MSAPat<(store (v8f16 MSA128H:$ws), addr:$addr), - (ST_H MSA128H:$ws, addr:$addr)>; -def ST_FW : MSAPat<(store (v4f32 MSA128W:$ws), addr:$addr), - (ST_W MSA128W:$ws, addr:$addr)>; -def ST_FD : MSAPat<(store (v2f64 MSA128D:$ws), addr:$addr), - (ST_D MSA128D:$ws, addr:$addr)>; +def : MSAPat<(v16i8 (load addr:$addr)), (LD_B addr:$addr)>; +def : MSAPat<(v8i16 (load addr:$addr)), (LD_H addr:$addr)>; +def : MSAPat<(v4i32 (load addr:$addr)), (LD_W addr:$addr)>; +def : MSAPat<(v2i64 (load addr:$addr)), (LD_D addr:$addr)>; +def : MSAPat<(v8f16 (load addr:$addr)), (LD_H addr:$addr)>; +def : MSAPat<(v4f32 (load addr:$addr)), (LD_W addr:$addr)>; +def : MSAPat<(v2f64 (load addr:$addr)), (LD_D addr:$addr)>; + +def : MSAPat<(v8f16 (load addrRegImm:$addr)), (LD_H addrRegImm:$addr)>; +def : MSAPat<(v4f32 (load addrRegImm:$addr)), (LD_W addrRegImm:$addr)>; +def : MSAPat<(v2f64 (load addrRegImm:$addr)), (LD_D addrRegImm:$addr)>; + +def : MSAPat<(store (v16i8 MSA128B:$ws), addr:$addr), + (ST_B MSA128B:$ws, addr:$addr)>; +def : MSAPat<(store (v8i16 MSA128H:$ws), addr:$addr), + (ST_H MSA128H:$ws, addr:$addr)>; +def : MSAPat<(store (v4i32 MSA128W:$ws), addr:$addr), + (ST_W MSA128W:$ws, addr:$addr)>; +def : MSAPat<(store (v2i64 MSA128D:$ws), addr:$addr), + (ST_D MSA128D:$ws, addr:$addr)>; +def : MSAPat<(store (v8f16 MSA128H:$ws), addr:$addr), + (ST_H MSA128H:$ws, addr:$addr)>; +def : MSAPat<(store (v4f32 MSA128W:$ws), addr:$addr), + (ST_W MSA128W:$ws, addr:$addr)>; +def : MSAPat<(store (v2f64 MSA128D:$ws), addr:$addr), + (ST_D MSA128D:$ws, addr:$addr)>; + +def ST_FH : MSAPat<(store (v8f16 MSA128H:$ws), addrRegImm:$addr), + (ST_H MSA128H:$ws, addrRegImm:$addr)>; +def ST_FW : MSAPat<(store (v4f32 MSA128W:$ws), addrRegImm:$addr), + (ST_W MSA128W:$ws, addrRegImm:$addr)>; +def ST_FD : MSAPat<(store (v2f64 MSA128D:$ws), addrRegImm:$addr), + (ST_D MSA128D:$ws, addrRegImm:$addr)>; class MSABitconvertPat preds = [HasMSA]> : diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 27fcaa3..7161850 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -316,6 +316,20 @@ bool MipsSEDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base, return false; } +/// ComplexPattern used on MipsInstrInfo +/// Used on Mips Load/Store instructions +bool MipsSEDAGToDAGISel::selectAddrRegReg(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + // Operand is a result from an ADD. + if (Addr.getOpcode() == ISD::ADD) { + Base = Addr.getOperand(0); + Offset = Addr.getOperand(1); + return true; + } + + return false; +} + bool MipsSEDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const { Base = Addr; diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h index 9961934..22e597e 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.h +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -43,6 +43,9 @@ private: virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset) const; + virtual bool selectAddrRegReg(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + virtual bool selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index dfa16c0..5341277 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -125,6 +125,9 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) setTargetDAGCombine(ISD::SUBE); setTargetDAGCombine(ISD::MUL); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); + computeRegisterProperties(); } @@ -174,6 +177,7 @@ SDValue MipsSETargetLowering::LowerOperation(SDValue Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); + case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG); } return MipsTargetLowering::LowerOperation(Op, DAG); @@ -726,9 +730,24 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, } } +static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr) { + SDLoc DL(Op); + SDValue ChainIn = Op->getOperand(0); + SDValue Address = Op->getOperand(2); + SDValue Offset = Op->getOperand(3); + EVT ResTy = Op->getValueType(0); + EVT PtrTy = Address->getValueType(0); + + Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); + + return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), false, + false, false, 16); +} + SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const { - switch (cast(Op->getOperand(1))->getZExtValue()) { + unsigned Intr = cast(Op->getOperand(1))->getZExtValue(); + switch (Intr) { default: return SDValue(); case Intrinsic::mips_extp: @@ -771,6 +790,47 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH); case Intrinsic::mips_dpsqx_sa_w_ph: return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH); + case Intrinsic::mips_ld_b: + case Intrinsic::mips_ld_h: + case Intrinsic::mips_ld_w: + case Intrinsic::mips_ld_d: + case Intrinsic::mips_ldx_b: + case Intrinsic::mips_ldx_h: + case Intrinsic::mips_ldx_w: + case Intrinsic::mips_ldx_d: + return lowerMSALoadIntr(Op, DAG, Intr); + } +} + +static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr) { + SDLoc DL(Op); + SDValue ChainIn = Op->getOperand(0); + SDValue Value = Op->getOperand(2); + SDValue Address = Op->getOperand(3); + SDValue Offset = Op->getOperand(4); + EVT PtrTy = Address->getValueType(0); + + Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); + + return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(), false, + false, 16); +} + +SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op, + SelectionDAG &DAG) const { + unsigned Intr = cast(Op->getOperand(1))->getZExtValue(); + switch (Intr) { + default: + return SDValue(); + case Intrinsic::mips_st_b: + case Intrinsic::mips_st_h: + case Intrinsic::mips_st_w: + case Intrinsic::mips_st_d: + case Intrinsic::mips_stx_b: + case Intrinsic::mips_stx_h: + case Intrinsic::mips_stx_w: + case Intrinsic::mips_stx_d: + return lowerMSAStoreIntr(Op, DAG, Intr); } } diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h index d672677..de43092 100644 --- a/lib/Target/Mips/MipsSEISelLowering.h +++ b/lib/Target/Mips/MipsSEISelLowering.h @@ -63,6 +63,7 @@ namespace llvm { SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; MachineBasicBlock *emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const; -- cgit v1.1 From 3c380d5e28f86984b147fcd424736c498773f37e Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Wed, 28 Aug 2013 12:14:50 +0000 Subject: [mips][msa] Added bnz.df, bnz.v, bz.df, and bz.v These intrinsics are legalized to V(ALL|ANY)_(NON)?ZERO nodes, are matched as SN?Z_[BHWDV]_PSEUDO pseudo's, and emitted as a branch/mov sequence to evaluate to 0 or 1. Note: The resulting code is sub-optimal since it doesnt seem to be possible to feed the result of an intrinsic directly into a brcond. At the moment it uses (SETCC (VALL_ZERO $ws), 0, SETEQ) and similar which unnecessarily evaluates the boolean twice. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189478 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 4 ++ lib/Target/Mips/MipsISelLowering.h | 6 ++ lib/Target/Mips/MipsMSAInstrFormats.td | 5 ++ lib/Target/Mips/MipsMSAInstrInfo.td | 95 ++++++++++++++++++++++++++- lib/Target/Mips/MipsSEISelLowering.cpp | 114 ++++++++++++++++++++++++++++++++- lib/Target/Mips/MipsSEISelLowering.h | 3 + 6 files changed, 225 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index c13f53a..4d1f329 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -208,6 +208,10 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::SHRL_DSP: return "MipsISD::SHRL_DSP"; case MipsISD::SETCC_DSP: return "MipsISD::SETCC_DSP"; case MipsISD::SELECT_CC_DSP: return "MipsISD::SELECT_CC_DSP"; + case MipsISD::VALL_ZERO: return "MipsISD::VALL_ZERO"; + case MipsISD::VANY_ZERO: return "MipsISD::VANY_ZERO"; + case MipsISD::VALL_NONZERO: return "MipsISD::VALL_NONZERO"; + case MipsISD::VANY_NONZERO: return "MipsISD::VANY_NONZERO"; default: return NULL; } } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 29671b0..4cc5a6a 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -152,6 +152,12 @@ namespace llvm { SETCC_DSP, SELECT_CC_DSP, + // Vector comparisons + VALL_ZERO, + VANY_ZERO, + VALL_NONZERO, + VANY_NONZERO, + // Load/Store Left/Right nodes. LWL = ISD::FIRST_TARGET_MEMORY_OPCODE, LWR, diff --git a/lib/Target/Mips/MipsMSAInstrFormats.td b/lib/Target/Mips/MipsMSAInstrFormats.td index f337f9d..b011674 100644 --- a/lib/Target/Mips/MipsMSAInstrFormats.td +++ b/lib/Target/Mips/MipsMSAInstrFormats.td @@ -119,3 +119,8 @@ class MSA_VEC_FMT major, bits<6> minor>: MSAInst { let Inst{25-21} = major; let Inst{5-0} = minor; } + +class MSA_VECS10_FMT major, bits<6> minor>: MSAInst { + let Inst{25-21} = major; + let Inst{5-0} = minor; +} diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 53fceb7..1814b1c 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -11,6 +11,13 @@ // //===----------------------------------------------------------------------===// +def SDT_MipsVecCond : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>; + +def MipsVAllNonZero : SDNode<"MipsISD::VALL_NONZERO", SDT_MipsVecCond>; +def MipsVAnyNonZero : SDNode<"MipsISD::VANY_NONZERO", SDT_MipsVecCond>; +def MipsVAllZero : SDNode<"MipsISD::VALL_ZERO", SDT_MipsVecCond>; +def MipsVAnyZero : SDNode<"MipsISD::VANY_ZERO", SDT_MipsVecCond>; + def immSExt5 : ImmLeaf(Imm);}]>; def immSExt10: ImmLeaf(Imm);}]>; @@ -147,7 +154,14 @@ class BNEGI_H_ENC : MSA_BIT_H_FMT<0b101, 0b001001>; class BNEGI_W_ENC : MSA_BIT_W_FMT<0b101, 0b001001>; class BNEGI_D_ENC : MSA_BIT_D_FMT<0b101, 0b001001>; -class BSEL_V_ENC : MSA_VEC_FMT<0b00110, 0b011110>; +class BNZ_B_ENC : MSA_I10_FMT<0b000, 0b00, 0b001100>; +class BNZ_H_ENC : MSA_I10_FMT<0b000, 0b01, 0b001100>; +class BNZ_W_ENC : MSA_I10_FMT<0b000, 0b10, 0b001100>; +class BNZ_D_ENC : MSA_I10_FMT<0b000, 0b11, 0b001100>; + +class BNZ_V_ENC : MSA_VEC_FMT<0b01000, 0b011110>; + +class BSEL_V_ENC : MSA_VECS10_FMT<0b00110, 0b011110>; class BSELI_B_ENC : MSA_I8_FMT<0b10, 0b000001>; @@ -161,6 +175,13 @@ class BSETI_H_ENC : MSA_BIT_H_FMT<0b100, 0b001001>; class BSETI_W_ENC : MSA_BIT_W_FMT<0b100, 0b001001>; class BSETI_D_ENC : MSA_BIT_D_FMT<0b100, 0b001001>; +class BZ_B_ENC : MSA_I10_FMT<0b001, 0b00, 0b001100>; +class BZ_H_ENC : MSA_I10_FMT<0b001, 0b01, 0b001100>; +class BZ_W_ENC : MSA_I10_FMT<0b001, 0b10, 0b001100>; +class BZ_D_ENC : MSA_I10_FMT<0b001, 0b11, 0b001100>; + +class BZ_V_ENC : MSA_VECS10_FMT<0b01001, 0b011110>; + class CEQ_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b001111>; class CEQ_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b001111>; class CEQ_W_ENC : MSA_3R_FMT<0b000, 0b10, 0b001111>; @@ -875,6 +896,18 @@ class MSA_3RF_4RF_DESC_BASE : MSA_3R_4R_DESC_BASE; +class MSA_CBRANCH_DESC_BASE { + dag OutOperandList = (outs); + dag InOperandList = (ins RCWD:$wd, brtarget:$offset); + string AsmString = !strconcat(instr_asm, "\t$wd, $offset"); + list Pattern = []; + InstrItinClass Itinerary = IIBranch; + bit isBranch = 1; + bit isTerminator = 1; + bit hasDelaySlot = 1; + list Defs = [AT]; +} + class MSA_INSERT_DESC_BASE { @@ -1129,6 +1162,13 @@ class BNEGI_W_DESC : MSA_BIT_W_DESC_BASE<"bnegi.w", int_mips_bnegi_w, class BNEGI_D_DESC : MSA_BIT_D_DESC_BASE<"bnegi.d", int_mips_bnegi_d, NoItinerary, MSA128D, MSA128D>; +class BNZ_B_DESC : MSA_CBRANCH_DESC_BASE<"bnz.b", MSA128B>; +class BNZ_H_DESC : MSA_CBRANCH_DESC_BASE<"bnz.h", MSA128H>; +class BNZ_W_DESC : MSA_CBRANCH_DESC_BASE<"bnz.w", MSA128W>; +class BNZ_D_DESC : MSA_CBRANCH_DESC_BASE<"bnz.d", MSA128D>; + +class BNZ_V_DESC : MSA_CBRANCH_DESC_BASE<"bnz.v", MSA128B>; + class BSEL_V_DESC : MSA_VEC_DESC_BASE<"bsel.v", int_mips_bsel_v, NoItinerary, MSA128B, MSA128B>; @@ -1153,6 +1193,13 @@ class BSETI_W_DESC : MSA_BIT_W_DESC_BASE<"bseti.w", int_mips_bseti_w, class BSETI_D_DESC : MSA_BIT_D_DESC_BASE<"bseti.d", int_mips_bseti_d, NoItinerary, MSA128D, MSA128D>; +class BZ_B_DESC : MSA_CBRANCH_DESC_BASE<"bz.b", MSA128B>; +class BZ_H_DESC : MSA_CBRANCH_DESC_BASE<"bz.h", MSA128H>; +class BZ_W_DESC : MSA_CBRANCH_DESC_BASE<"bz.w", MSA128W>; +class BZ_D_DESC : MSA_CBRANCH_DESC_BASE<"bz.d", MSA128D>; + +class BZ_V_DESC : MSA_CBRANCH_DESC_BASE<"bz.v", MSA128B>; + class CEQ_B_DESC : MSA_3R_DESC_BASE<"ceq.b", int_mips_ceq_b, NoItinerary, MSA128B, MSA128B>, IsCommutable; class CEQ_H_DESC : MSA_3R_DESC_BASE<"ceq.h", int_mips_ceq_h, NoItinerary, @@ -2344,6 +2391,13 @@ def BNEGI_H : BNEGI_H_ENC, BNEGI_H_DESC, Requires<[HasMSA]>; def BNEGI_W : BNEGI_W_ENC, BNEGI_W_DESC, Requires<[HasMSA]>; def BNEGI_D : BNEGI_D_ENC, BNEGI_D_DESC, Requires<[HasMSA]>; +def BNZ_B : BNZ_B_ENC, BNZ_B_DESC, Requires<[HasMSA]>; +def BNZ_H : BNZ_H_ENC, BNZ_H_DESC, Requires<[HasMSA]>; +def BNZ_W : BNZ_W_ENC, BNZ_W_DESC, Requires<[HasMSA]>; +def BNZ_D : BNZ_D_ENC, BNZ_D_DESC, Requires<[HasMSA]>; + +def BNZ_V : BNZ_V_ENC, BNZ_V_DESC, Requires<[HasMSA]>; + def BSEL_V : BSEL_V_ENC, BSEL_V_DESC, Requires<[HasMSA]>; def BSELI_B : BSELI_B_ENC, BSELI_B_DESC, Requires<[HasMSA]>; @@ -2358,6 +2412,13 @@ def BSETI_H : BSETI_H_ENC, BSETI_H_DESC, Requires<[HasMSA]>; def BSETI_W : BSETI_W_ENC, BSETI_W_DESC, Requires<[HasMSA]>; def BSETI_D : BSETI_D_ENC, BSETI_D_DESC, Requires<[HasMSA]>; +def BZ_B : BZ_B_ENC, BZ_B_DESC, Requires<[HasMSA]>; +def BZ_H : BZ_H_ENC, BZ_H_DESC, Requires<[HasMSA]>; +def BZ_W : BZ_W_ENC, BZ_W_DESC, Requires<[HasMSA]>; +def BZ_D : BZ_D_ENC, BZ_D_DESC, Requires<[HasMSA]>; + +def BZ_V : BZ_V_ENC, BZ_V_DESC, Requires<[HasMSA]>; + def CEQ_B : CEQ_B_ENC, CEQ_B_DESC, Requires<[HasMSA]>; def CEQ_H : CEQ_H_ENC, CEQ_H_DESC, Requires<[HasMSA]>; def CEQ_W : CEQ_W_ENC, CEQ_W_DESC, Requires<[HasMSA]>; @@ -3117,3 +3178,35 @@ def : MSABitconvertReverseHInDPat; def : MSABitconvertReverseHInDPat; def : MSABitconvertReverseWInDPat; def : MSABitconvertReverseWInDPat; + +// Pseudos used to implement BNZ.df, and BZ.df + +class MSA_CBRANCH_PSEUDO_DESC_BASE : + MipsPseudo<(outs GPR32:$dst), + (ins RCWS:$ws), + [(set GPR32:$dst, (OpNode (TyNode RCWS:$ws)))]> { + bit usesCustomInserter = 1; +} + +def SNZ_B_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE; +def SNZ_H_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE; +def SNZ_W_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE; +def SNZ_D_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE; +def SNZ_V_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE; + +def SZ_B_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE; +def SZ_H_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE; +def SZ_W_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE; +def SZ_D_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE; +def SZ_V_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 5341277..9108211 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -125,6 +125,7 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) setTargetDAGCombine(ISD::SUBE); setTargetDAGCombine(ISD::MUL); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); @@ -554,6 +555,26 @@ MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB); case Mips::BPOSGE32_PSEUDO: return emitBPOSGE32(MI, BB); + case Mips::SNZ_B_PSEUDO: + return emitMSACBranchPseudo(MI, BB, Mips::BNZ_B); + case Mips::SNZ_H_PSEUDO: + return emitMSACBranchPseudo(MI, BB, Mips::BNZ_H); + case Mips::SNZ_W_PSEUDO: + return emitMSACBranchPseudo(MI, BB, Mips::BNZ_W); + case Mips::SNZ_D_PSEUDO: + return emitMSACBranchPseudo(MI, BB, Mips::BNZ_D); + case Mips::SNZ_V_PSEUDO: + return emitMSACBranchPseudo(MI, BB, Mips::BNZ_V); + case Mips::SZ_B_PSEUDO: + return emitMSACBranchPseudo(MI, BB, Mips::BZ_B); + case Mips::SZ_H_PSEUDO: + return emitMSACBranchPseudo(MI, BB, Mips::BZ_H); + case Mips::SZ_W_PSEUDO: + return emitMSACBranchPseudo(MI, BB, Mips::BZ_W); + case Mips::SZ_D_PSEUDO: + return emitMSACBranchPseudo(MI, BB, Mips::BZ_D); + case Mips::SZ_V_PSEUDO: + return emitMSACBranchPseudo(MI, BB, Mips::BZ_V); } } @@ -690,6 +711,16 @@ static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { return DAG.getMergeValues(Vals, 2, DL); } +static SDValue lowerMSABranchIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { + SDLoc DL(Op); + SDValue Value = Op->getOperand(1); + EVT ResTy = Op->getValueType(0); + + SDValue Result = DAG.getNode(Opc, DL, ResTy, Value); + + return Result; +} + SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { switch (cast(Op->getOperand(0))->getZExtValue()) { @@ -727,6 +758,20 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, return lowerDSPIntr(Op, DAG, MipsISD::MSub); case Intrinsic::mips_msubu: return lowerDSPIntr(Op, DAG, MipsISD::MSubu); + case Intrinsic::mips_bnz_b: + case Intrinsic::mips_bnz_h: + case Intrinsic::mips_bnz_w: + case Intrinsic::mips_bnz_d: + return lowerMSABranchIntr(Op, DAG, MipsISD::VALL_NONZERO); + case Intrinsic::mips_bnz_v: + return lowerMSABranchIntr(Op, DAG, MipsISD::VANY_NONZERO); + case Intrinsic::mips_bz_b: + case Intrinsic::mips_bz_h: + case Intrinsic::mips_bz_w: + case Intrinsic::mips_bz_d: + return lowerMSABranchIntr(Op, DAG, MipsISD::VALL_ZERO); + case Intrinsic::mips_bz_v: + return lowerMSABranchIntr(Op, DAG, MipsISD::VANY_ZERO); } } @@ -830,7 +875,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op, case Intrinsic::mips_stx_h: case Intrinsic::mips_stx_w: case Intrinsic::mips_stx_d: - return lowerMSAStoreIntr(Op, DAG, Intr); + return lowerMSAStoreIntr(Op, DAG, Intr); } } @@ -896,3 +941,70 @@ emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{ MI->eraseFromParent(); // The pseudo instruction is gone now. return Sink; } + +MachineBasicBlock * MipsSETargetLowering:: +emitMSACBranchPseudo(MachineInstr *MI, MachineBasicBlock *BB, + unsigned BranchOp) const{ + // $bb: + // vany_nonzero $rd, $ws + // => + // $bb: + // bnz.b $ws, $tbb + // b $fbb + // $fbb: + // li $rd1, 0 + // b $sink + // $tbb: + // li $rd2, 1 + // $sink: + // $rd = phi($rd1, $fbb, $rd2, $tbb) + + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetRegisterClass *RC = &Mips::GPR32RegClass; + DebugLoc DL = MI->getDebugLoc(); + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB)); + MachineFunction *F = BB->getParent(); + MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, FBB); + F->insert(It, TBB); + F->insert(It, Sink); + + // Transfer the remainder of BB and its successor edges to Sink. + Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + Sink->transferSuccessorsAndUpdatePHIs(BB); + + // Add successors. + BB->addSuccessor(FBB); + BB->addSuccessor(TBB); + FBB->addSuccessor(Sink); + TBB->addSuccessor(Sink); + + // Insert the real bnz.b instruction to $BB. + BuildMI(BB, DL, TII->get(BranchOp)) + .addReg(MI->getOperand(1).getReg()) + .addMBB(TBB); + + // Fill $FBB. + unsigned RD1 = RegInfo.createVirtualRegister(RC); + BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), RD1) + .addReg(Mips::ZERO).addImm(0); + BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); + + // Fill $TBB. + unsigned RD2 = RegInfo.createVirtualRegister(RC); + BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), RD2) + .addReg(Mips::ZERO).addImm(1); + + // Insert phi function to $Sink. + BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), + MI->getOperand(0).getReg()) + .addReg(RD1).addMBB(FBB).addReg(RD2).addMBB(TBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return Sink; +} diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h index de43092..d1a18e1 100644 --- a/lib/Target/Mips/MipsSEISelLowering.h +++ b/lib/Target/Mips/MipsSEISelLowering.h @@ -67,6 +67,9 @@ namespace llvm { MachineBasicBlock *emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const; + MachineBasicBlock *emitMSACBranchPseudo(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned BranchOp) const; }; } -- cgit v1.1 From c85bb78714e8e05fb3022148320ea685d7f98d60 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Wed, 28 Aug 2013 12:15:16 +0000 Subject: ARM: add patterns for vqdmlal with separate vqdmull and vqadds The vqdmlal and vqdmlls instructions are really just a fused pair consisting of a vqdmull.sN and a vqadd.sN. This adds patterns to LLVM so that we can switch Clang's CodeGen over to generating these instead of the special vqdmlal intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189480 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index a5d8985..cc455ad 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -4143,6 +4143,25 @@ defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, "vqdmlal", "s", int_arm_neon_vqdmlal>; defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>; +def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), + (v4i16 DPR:$Vm))))), + (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; +def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), + (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), + (v2i32 DPR:$Vm))))), + (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; +def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), + (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), + imm:$lane)))))), + (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; +def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), + (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), + (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), + imm:$lane)))))), + (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; + // VMLS : Vector Multiply Subtract (integer and floating-point) defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; @@ -4200,6 +4219,25 @@ defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; +def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), + (v4i16 DPR:$Vm))))), + (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; +def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), + (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), + (v2i32 DPR:$Vm))))), + (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; +def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), + (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), + imm:$lane)))))), + (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; +def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), + (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), + (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), + imm:$lane)))))), + (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; + // Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", v2f32, fmul_su, fadd_mlx>, -- cgit v1.1 From d3128a4a5a8531a256a224422d7da178d18459eb Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Wed, 28 Aug 2013 14:33:33 +0000 Subject: ARM: remove unused v(add|sub)hn and vqdml[as]l intrinsics. Clang is now generating cleaner IR, so this removes the old variants which should be completely unused. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189481 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index cc455ad..49ae334 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -3973,8 +3973,7 @@ defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vqadd", "u", int_arm_neon_vqaddu, 1>; // VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) -defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", - int_arm_neon_vaddhn, 1>; +defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>; // VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", int_arm_neon_vraddhn, 1>; @@ -4140,8 +4139,8 @@ defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, - "vqdmlal", "s", int_arm_neon_vqdmlal>; -defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>; + "vqdmlal", "s", null_frag>; +defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>; def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), @@ -4216,8 +4215,8 @@ defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>; // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, - "vqdmlsl", "s", int_arm_neon_vqdmlsl>; -defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; + "vqdmlsl", "s", null_frag>; +defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", null_frag>; def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), @@ -4301,8 +4300,7 @@ defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vqsub", "u", int_arm_neon_vqsubu, 0>; // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) -defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", - int_arm_neon_vsubhn, 0>; +defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>; // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", int_arm_neon_vrsubhn, 0>; -- cgit v1.1 From bafb5f8d9f415340d9035ee9430f9480da9a50fb Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Wed, 28 Aug 2013 14:33:35 +0000 Subject: [ARMv8] Add a missing IsThumb to t2LDAEXD. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189482 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrThumb2.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index c8c9c9b..0c5f169 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3260,7 +3260,7 @@ def t2LDAEXD : T2I_ldrex<0b1111, (outs rGPR:$Rt, rGPR:$Rt2), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldaexd", "\t$Rt, $Rt2, $addr", "", - [], {?, ?, ?, ?}>, Requires<[HasV8]> { + [], {?, ?, ?, ?}>, Requires<[IsThumb, HasV8]> { bits<4> Rt2; let Inst{11-8} = Rt2; -- cgit v1.1 From 22266c1d4817fc30355a90bb264ede08482bba3a Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Wed, 28 Aug 2013 14:39:19 +0000 Subject: ARM: Use "dmb sy" for barriers on M-class CPUs The usual default of "dmb ish" (inner-shareable) isn't even a valid instruction on v6M or v7M (well, it does the same thing but software is strongly discouraged from using it) so we should emit a full-system barrier there. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189483 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 74353c1..063f1d4 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -2600,7 +2600,10 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, ConstantSDNode *OrdN = cast(Op.getOperand(1)); AtomicOrdering Ord = static_cast(OrdN->getZExtValue()); unsigned Domain = ARM_MB::ISH; - if (Subtarget->isSwift() && Ord == Release) { + if (Subtarget->isMClass()) { + // Only a full system barrier exists in the M-class architectures. + Domain = ARM_MB::SY; + } else if (Subtarget->isSwift() && Ord == Release) { // Swift happens to implement ISHST barriers in a way that's compatible with // Release semantics but weaker than ISH so we'd be fools not to use // it. Beware: other processors probably don't! -- cgit v1.1 From b2e5453821ef27306036a9961818cf530a3ca8cb Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Wed, 28 Aug 2013 16:39:20 +0000 Subject: [ARMv8] Fix a few things in one swoop. # Add some negative tests. # Fix some formatting issues. # Add some missing IsThumb / ARMv8 # Fix some outs / ins mistakes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189490 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrInfo.td | 6 +++--- lib/Target/ARM/ARMInstrThumb2.td | 28 +++++++++++++++++----------- 2 files changed, 20 insertions(+), 14 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 201b640..08b3ef2 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -2844,11 +2844,11 @@ multiclass AI3strT op, string opc> { defm STRHT : AI3strT<0b1011, "strht">; -def STL : AIstrrel<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), +def STL : AIstrrel<0b00, (outs), (ins GPR:$Rt, addr_offset_none:$addr), NoItinerary, "stl", "\t$Rt, $addr", []>; -def STLB : AIstrrel<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr), +def STLB : AIstrrel<0b10, (outs), (ins GPR:$Rt, addr_offset_none:$addr), NoItinerary, "stlb", "\t$Rt, $addr", []>; -def STLH : AIstrrel<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr), +def STLH : AIstrrel<0b11, (outs), (ins GPR:$Rt, addr_offset_none:$addr), NoItinerary, "stlh", "\t$Rt, $addr", []>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 0c5f169..5d0c484 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1396,8 +1396,10 @@ def t2LDRHT : T2IldT<0, 0b01, "ldrht", IIC_iLoad_bh_i>; def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt", IIC_iLoad_bh_i>; def t2LDRSHT : T2IldT<1, 0b01, "ldrsht", IIC_iLoad_bh_i>; -class T2Ildacq bits23_20, bits<2> bit54, dag oops, dag iops, string opc, string asm, list pattern> - : Thumb2I { +class T2Ildacq bits23_20, bits<2> bit54, dag oops, dag iops, + string opc, string asm, list pattern> + : Thumb2I, Requires<[IsThumb, HasV8]> { bits<4> Rt; bits<4> addr; @@ -1413,10 +1415,12 @@ class T2Ildacq bits23_20, bits<2> bit54, dag oops, dag iops, string opc, let Inst{15-12} = Rt; } -def t2LDA : T2Ildacq<0b1101, 0b10, (outs rGPR:$Rt), (ins addr_offset_none:$addr), "lda", "\t$Rt, $addr", []>; -def t2LDAB : T2Ildacq<0b1101, 0b00, (outs rGPR:$Rt), (ins addr_offset_none:$addr), "ldab", "\t$Rt, $addr", []>; -def t2LDAH : T2Ildacq<0b1101, 0b01, (outs rGPR:$Rt), (ins addr_offset_none:$addr), "ldah", "\t$Rt, $addr", []>; - +def t2LDA : T2Ildacq<0b1101, 0b10, (outs rGPR:$Rt), + (ins addr_offset_none:$addr), "lda", "\t$Rt, $addr", []>; +def t2LDAB : T2Ildacq<0b1101, 0b00, (outs rGPR:$Rt), + (ins addr_offset_none:$addr), "ldab", "\t$Rt, $addr", []>; +def t2LDAH : T2Ildacq<0b1101, 0b01, (outs rGPR:$Rt), + (ins addr_offset_none:$addr), "ldah", "\t$Rt, $addr", []>; // Store defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si, GPR, @@ -1561,8 +1565,10 @@ def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb), IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr$imm", "$addr.base = $wb", []>; -class T2Istrrel bit54, dag oops, dag iops, string opc, string asm, list pattern> - : Thumb2I { +class T2Istrrel bit54, dag oops, dag iops, + string opc, string asm, list pattern> + : Thumb2I, Requires<[IsThumb, HasV8]> { bits<4> Rt; bits<4> addr; @@ -1577,11 +1583,11 @@ class T2Istrrel bit54, dag oops, dag iops, string opc, string asm, list< let Inst{15-12} = Rt; } -def t2STL : T2Istrrel<0b10, (outs rGPR:$Rt), (ins addr_offset_none:$addr), +def t2STL : T2Istrrel<0b10, (outs), (ins rGPR:$Rt, addr_offset_none:$addr), "stl", "\t$Rt, $addr", []>; -def t2STLB : T2Istrrel<0b00, (outs rGPR:$Rt), (ins addr_offset_none:$addr), +def t2STLB : T2Istrrel<0b00, (outs), (ins rGPR:$Rt, addr_offset_none:$addr), "stlb", "\t$Rt, $addr", []>; -def t2STLH : T2Istrrel<0b01, (outs rGPR:$Rt), (ins addr_offset_none:$addr), +def t2STLH : T2Istrrel<0b01, (outs), (ins rGPR:$Rt, addr_offset_none:$addr), "stlh", "\t$Rt, $addr", []>; // T2Ipl (Preload Data/Instruction) signals the memory system of possible future -- cgit v1.1 From e54726a87a49e3254696b05787f4635dc59fe750 Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Thu, 29 Aug 2013 00:19:03 +0000 Subject: The darwin integrated assembler for X86 in 64-bit mode is not rejecting 32-bit absolute addressing in instructions likei this: mov $_f, %rsi which is not supported in 64-bit mode. rdar://8827134 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189543 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index 6eff224..9db4cab 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -323,8 +323,13 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, report_fatal_error("TLVP symbol modifier should have been rip-rel"); } else if (Modifier != MCSymbolRefExpr::VK_None) report_fatal_error("unsupported symbol modifier in relocation"); - else + else { Type = macho::RIT_X86_64_Unsigned; + unsigned Kind = Fixup.getKind(); + if (Kind == X86::reloc_signed_4byte) + report_fatal_error("32-bit absolute addressing is not supported in " + "64-bit mode"); + } } } -- cgit v1.1 From 738073c4aa474e27c9d3c991daf593bddce54718 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Thu, 29 Aug 2013 03:25:05 +0000 Subject: Add useAA() to TargetSubtargetInfo There are several optional (off-by-default) features in CodeGen that can make use of alias analysis. These features are important for generating code for some kinds of cores (for example the (in-order) PPC A2 core). This adds a useAA() function to TargetSubtargetInfo to allow these features to be enabled by default on a per-subtarget basis. Here is the first use of this function: To control the default of the -enable-aa-sched-mi feature. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189563 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/TargetSubtargetInfo.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/TargetSubtargetInfo.cpp b/lib/Target/TargetSubtargetInfo.cpp index af0cef6..f624c32 100644 --- a/lib/Target/TargetSubtargetInfo.cpp +++ b/lib/Target/TargetSubtargetInfo.cpp @@ -35,3 +35,7 @@ bool TargetSubtargetInfo::enablePostRAScheduler( return false; } +bool TargetSubtargetInfo::useAA() const { + return false; +} + -- cgit v1.1 From 4edfa2278aa34876abffe67bfb66c0f92bd597a5 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Thu, 29 Aug 2013 11:56:53 +0000 Subject: AVX-512: added extend and truncate instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189580 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 144 +++++++++++++++++++++++++------- lib/Target/X86/X86ISelLowering.h | 6 ++ lib/Target/X86/X86InstrAVX512.td | 117 ++++++++++++++++++++++++++ lib/Target/X86/X86InstrFragmentsSIMD.td | 7 ++ 4 files changed, 245 insertions(+), 29 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f9b3f1a..81008e9 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -8856,6 +8856,37 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); } +static SDValue LowerZERO_EXTEND_AVX512(SDValue Op, + SelectionDAG &DAG) { + MVT VT = Op->getValueType(0).getSimpleVT(); + SDValue In = Op->getOperand(0); + MVT InVT = In.getValueType().getSimpleVT(); + SDLoc DL(Op); + unsigned int NumElts = VT.getVectorNumElements(); + if (NumElts != 8 && NumElts != 16) + return SDValue(); + + if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) + return DAG.getNode(X86ISD::VZEXT, DL, VT, In); + + EVT ExtVT = (NumElts == 8)? MVT::v8i64 : MVT::v16i32; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + // Now we have only mask extension + assert(InVT.getVectorElementType() == MVT::i1); + SDValue Cst = DAG.getTargetConstant(1, ExtVT.getScalarType()); + const Constant *C = (dyn_cast(Cst))->getConstantIntValue(); + SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy()); + unsigned Alignment = cast(CP)->getAlignment(); + SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP, + MachinePointerInfo::getConstantPool(), + false, false, false, Alignment); + + SDValue Brcst = DAG.getNode(X86ISD::VBROADCASTM, DL, ExtVT, In, Ld); + if (VT.is512BitVector()) + return Brcst; + return DAG.getNode(X86ISD::VTRUNC, DL, VT, Brcst); +} + static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { if (Subtarget->hasFp256()) { @@ -8874,6 +8905,9 @@ static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget, SDValue In = Op.getOperand(0); MVT SVT = In.getSimpleValueType(); + if (VT.is512BitVector() || SVT.getVectorElementType() == MVT::i1) + return LowerZERO_EXTEND_AVX512(Op, DAG); + if (Subtarget->hasFp256()) { SDValue Res = LowerAVXExtend(Op, DAG, Subtarget); if (Res.getNode()) @@ -8902,11 +8936,37 @@ static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget, SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); - MVT VT = Op.getSimpleValueType(); + MVT VT = Op.getSimpleValueType(); SDValue In = Op.getOperand(0); - MVT SVT = In.getSimpleValueType(); - - if ((VT == MVT::v4i32) && (SVT == MVT::v4i64)) { + MVT InVT = In.getSimpleValueType(); + assert(VT.getVectorNumElements() == InVT.getVectorNumElements() && + "Invalid TRUNCATE operation"); + + if (InVT.is512BitVector() || VT.getVectorElementType() == MVT::i1) { + if (VT.getVectorElementType().getSizeInBits() >=8) + return DAG.getNode(X86ISD::VTRUNC, DL, VT, In); + + assert(VT.getVectorElementType() == MVT::i1 && "Unexpected vector type"); + unsigned NumElts = InVT.getVectorNumElements(); + assert ((NumElts == 8 || NumElts == 16) && "Unexpected vector type"); + if (InVT.getSizeInBits() < 512) { + MVT ExtVT = (NumElts == 16)? MVT::v16i32 : MVT::v8i64; + In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In); + InVT = ExtVT; + } + SDValue Cst = DAG.getTargetConstant(1, InVT.getVectorElementType()); + const Constant *C = (dyn_cast(Cst))->getConstantIntValue(); + SDValue CP = DAG.getConstantPool(C, getPointerTy()); + unsigned Alignment = cast(CP)->getAlignment(); + SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP, + MachinePointerInfo::getConstantPool(), + false, false, false, Alignment); + SDValue OneV = DAG.getNode(X86ISD::VBROADCAST, DL, InVT, Ld); + SDValue And = DAG.getNode(ISD::AND, DL, InVT, OneV, In); + return DAG.getNode(X86ISD::TESTM, DL, VT, And, And); + } + + if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) { // On AVX2, v4i64 -> v4i32 becomes VPERMD. if (Subtarget->hasInt256()) { static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1}; @@ -8937,7 +8997,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask2); } - if ((VT == MVT::v8i16) && (SVT == MVT::v8i32)) { + if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) { // On AVX2, v8i32 -> v8i16 becomed PSHUFB. if (Subtarget->hasInt256()) { In = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, In); @@ -8995,11 +9055,9 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { } // Handle truncation of V256 to V128 using shuffles. - if (!VT.is128BitVector() || !SVT.is256BitVector()) + if (!VT.is128BitVector() || !InVT.is256BitVector()) return SDValue(); - assert(VT.getVectorNumElements() != SVT.getVectorNumElements() && - "Invalid op"); assert(Subtarget->hasFp256() && "256-bit vector without AVX!"); unsigned NumElems = VT.getVectorNumElements(); @@ -10282,28 +10340,29 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, SelectionDAG &DAG) { MVT InVT = In.getSimpleValueType(); SDLoc dl(Op); - if (InVT.getVectorElementType().getSizeInBits() >=8 && - VT.getVectorElementType().getSizeInBits() >= 32) + unsigned int NumElts = VT.getVectorNumElements(); + if (NumElts != 8 && NumElts != 16) + return SDValue(); + + if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) return DAG.getNode(X86ISD::VSEXT, dl, VT, In); - if (InVT.getVectorElementType() == MVT::i1) { - unsigned int NumElts = InVT.getVectorNumElements(); - assert ((NumElts == 8 || NumElts == 16) && - "Unsupported SIGN_EXTEND operation"); - if (VT.getVectorElementType().getSizeInBits() >= 32) { - Constant *C = - ConstantInt::get(*DAG.getContext(), - (NumElts == 8)? APInt(64, ~0ULL): APInt(32, ~0U)); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy()); - unsigned Alignment = cast(CP)->getAlignment(); - SDValue Ld = DAG.getLoad(VT.getScalarType(), dl, DAG.getEntryNode(), CP, - MachinePointerInfo::getConstantPool(), - false, false, false, Alignment); - return DAG.getNode(X86ISD::VBROADCASTM, dl, VT, In, Ld); - } - } - return SDValue(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + assert (InVT.getVectorElementType() == MVT::i1 && "Unexpected vector type"); + + MVT ExtVT = (NumElts == 8) ? MVT::v8i64 : MVT::v16i32; + Constant *C = ConstantInt::get(*DAG.getContext(), + APInt::getAllOnesValue(ExtVT.getScalarType().getSizeInBits())); + + SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy()); + unsigned Alignment = cast(CP)->getAlignment(); + SDValue Ld = DAG.getLoad(ExtVT.getScalarType(), dl, DAG.getEntryNode(), CP, + MachinePointerInfo::getConstantPool(), + false, false, false, Alignment); + SDValue Brcst = DAG.getNode(X86ISD::VBROADCASTM, dl, ExtVT, In, Ld); + if (VT.is512BitVector()) + return Brcst; + return DAG.getNode(X86ISD::VTRUNC, dl, VT, Brcst); } static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget, @@ -11142,10 +11201,14 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { case Intrinsic::x86_sse2_max_pd: case Intrinsic::x86_avx_max_ps_256: case Intrinsic::x86_avx_max_pd_256: + case Intrinsic::x86_avx512_max_ps_512: + case Intrinsic::x86_avx512_max_pd_512: case Intrinsic::x86_sse_min_ps: case Intrinsic::x86_sse2_min_pd: case Intrinsic::x86_avx_min_ps_256: - case Intrinsic::x86_avx_min_pd_256: { + case Intrinsic::x86_avx_min_pd_256: + case Intrinsic::x86_avx512_min_ps_512: + case Intrinsic::x86_avx512_min_pd_512: { unsigned Opcode; switch (IntNo) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. @@ -11153,12 +11216,16 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { case Intrinsic::x86_sse2_max_pd: case Intrinsic::x86_avx_max_ps_256: case Intrinsic::x86_avx_max_pd_256: + case Intrinsic::x86_avx512_max_ps_512: + case Intrinsic::x86_avx512_max_pd_512: Opcode = X86ISD::FMAX; break; case Intrinsic::x86_sse_min_ps: case Intrinsic::x86_sse2_min_pd: case Intrinsic::x86_avx_min_ps_256: case Intrinsic::x86_avx_min_pd_256: + case Intrinsic::x86_avx512_min_ps_512: + case Intrinsic::x86_avx512_min_pd_512: Opcode = X86ISD::FMIN; break; } @@ -13375,6 +13442,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD"; case X86ISD::VZEXT: return "X86ISD::VZEXT"; case X86ISD::VSEXT: return "X86ISD::VSEXT"; + case X86ISD::VTRUNC: return "X86ISD::VTRUNC"; + case X86ISD::VTRUNCM: return "X86ISD::VTRUNCM"; case X86ISD::VFPEXT: return "X86ISD::VFPEXT"; case X86ISD::VFPROUND: return "X86ISD::VFPROUND"; case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ"; @@ -16274,6 +16343,23 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS); } + if (Subtarget->hasAVX512() && VT.isVector() && + Cond.getValueType().getVectorElementType() == MVT::i1) { + // v16i8 (select v16i1, v16i8, v16i8) does not have a proper + // lowering on AVX-512. In this case we convert it to + // v16i8 (select v16i8, v16i8, v16i8) and use AVX instruction. + // The same situation for all 128 and 256-bit vectors of i8 and i16 + EVT OpVT = LHS.getValueType(); + if ((OpVT.is128BitVector() || OpVT.is256BitVector()) && + (OpVT.getVectorElementType() == MVT::i8 || + OpVT.getVectorElementType() == MVT::i16)) { + Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, OpVT, Cond); + DCI.AddToWorklist(Cond.getNode()); + return DAG.getNode(N->getOpcode(), DL, OpVT, Cond, LHS, RHS); + } + else + return SDValue(); + } // If this is a select between two integer constants, try to do some // optimizations. if (ConstantSDNode *TrueC = dyn_cast(LHS)) { diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 40b2a9c..632a5b6 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -254,6 +254,12 @@ namespace llvm { // VSEXT - Vector integer signed-extend. VSEXT, + // VTRUNC - Vector integer truncate. + VTRUNC, + + // VTRUNC - Vector integer truncate with mask. + VTRUNCM, + // VFPEXT - Vector FP extend. VFPEXT, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 95b0de4..6b2f160 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -2805,6 +2805,123 @@ def : Pat<(v8f64 (frint VR512:$src)), def : Pat<(v8f64 (ftrunc VR512:$src)), (VRNDSCALEZPDr VR512:$src, (i32 0x3))>; +//------------------------------------------------- +// Integer truncate and extend operations +//------------------------------------------------- + +multiclass avx512_trunc_sat opc, string OpcodeStr, + RegisterClass dstRC, RegisterClass srcRC, + RegisterClass KRC, X86MemOperand x86memop> { + def rr : AVX512XS8I, EVEX; + + def krr : AVX512XS8I, EVEX, EVEX_KZ; + + def mr : AVX512XS8I, EVEX; +} +defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM, + i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; +defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM, + i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; +defm VPMOVUSQB : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM, + i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; +defm VPMOVQW : avx512_trunc_sat<0x34, "vpmovqw", VR128X, VR512, VK8WM, + i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; +defm VPMOVSQW : avx512_trunc_sat<0x24, "vpmovsqw", VR128X, VR512, VK8WM, + i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; +defm VPMOVUSQW : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM, + i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; +defm VPMOVQD : avx512_trunc_sat<0x35, "vpmovqd", VR256X, VR512, VK8WM, + i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; +defm VPMOVSQD : avx512_trunc_sat<0x25, "vpmovsqd", VR256X, VR512, VK8WM, + i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; +defm VPMOVUSQD : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM, + i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; +defm VPMOVDW : avx512_trunc_sat<0x33, "vpmovdw", VR256X, VR512, VK16WM, + i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; +defm VPMOVSDW : avx512_trunc_sat<0x23, "vpmovsdw", VR256X, VR512, VK16WM, + i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; +defm VPMOVUSDW : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM, + i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; +defm VPMOVDB : avx512_trunc_sat<0x31, "vpmovdb", VR128X, VR512, VK16WM, + i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; +defm VPMOVSDB : avx512_trunc_sat<0x21, "vpmovsdb", VR128X, VR512, VK16WM, + i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; +defm VPMOVUSDB : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM, + i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; + +def : Pat<(v16i8 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQBrr VR512:$src)>; +def : Pat<(v8i16 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQWrr VR512:$src)>; +def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr VR512:$src)>; +def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>; +def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>; + +def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))), + (VPMOVDBkrr VK16WM:$mask, VR512:$src)>; +def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))), + (VPMOVDWkrr VK16WM:$mask, VR512:$src)>; +def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))), + (VPMOVQWkrr VK8WM:$mask, VR512:$src)>; +def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))), + (VPMOVQDkrr VK8WM:$mask, VR512:$src)>; + + +multiclass avx512_extend opc, string OpcodeStr, RegisterClass DstRC, + RegisterClass SrcRC, SDNode OpNode, PatFrag mem_frag, + X86MemOperand x86memop, ValueType OpVT, ValueType InVT> { + + def rr : AVX5128I, EVEX; + def rm : AVX5128I, + EVEX; +} + +defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VR512, VR128X, X86vzext, + memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512, + EVEX_CD8<8, CD8VQ>; +defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VR512, VR128X, X86vzext, + memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512, + EVEX_CD8<8, CD8VO>; +defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VR512, VR256X, X86vzext, + memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512, + EVEX_CD8<16, CD8VH>; +defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VR512, VR128X, X86vzext, + memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512, + EVEX_CD8<16, CD8VQ>; +defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VR512, VR256X, X86vzext, + memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512, + EVEX_CD8<32, CD8VH>; + +defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VR512, VR128X, X86vsext, + memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512, + EVEX_CD8<8, CD8VQ>; +defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VR512, VR128X, X86vsext, + memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512, + EVEX_CD8<8, CD8VO>; +defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VR512, VR256X, X86vsext, + memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512, + EVEX_CD8<16, CD8VH>; +defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VR512, VR128X, X86vsext, + memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512, + EVEX_CD8<16, CD8VQ>; +defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VR512, VR256X, X86vsext, + memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512, + EVEX_CD8<32, CD8VH>; + //===----------------------------------------------------------------------===// // VSHUFPS - VSHUFPD Operations diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index ab7c602..f504480 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -105,6 +105,13 @@ def X86vsext : SDNode<"X86ISD::VSEXT", SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<0>, SDTCisInt<1>]>>; +def X86vtrunc : SDNode<"X86ISD::VTRUNC", + SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisInt<0>, SDTCisInt<1>]>>; +def X86vtruncm : SDNode<"X86ISD::VTRUNCM", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisInt<0>, SDTCisInt<1>, + SDTCisVec<2>, SDTCisInt<2>]>>; def X86vfpext : SDNode<"X86ISD::VFPEXT", SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, SDTCisFP<0>, SDTCisFP<1>]>>; -- cgit v1.1 From 441c557708b5dbe91f1799baf790ad418c23ea70 Mon Sep 17 00:00:00 2001 From: Cameron Esfahani Date: Thu, 29 Aug 2013 20:23:14 +0000 Subject: Clean up some usage of Triple. The base class has methods for determining if the target is iOS and Linux. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189604 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64Subtarget.h | 2 +- lib/Target/ARM/ARMISelLowering.cpp | 2 +- lib/Target/ARM/ARMSubtarget.h | 6 +++--- lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp | 3 +-- lib/Target/TargetLibraryInfo.cpp | 4 ++-- lib/Target/X86/X86Subtarget.h | 6 ++---- 6 files changed, 10 insertions(+), 13 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h index 35a7c8d..4ffb605 100644 --- a/lib/Target/AArch64/AArch64Subtarget.h +++ b/lib/Target/AArch64/AArch64Subtarget.h @@ -46,7 +46,7 @@ public: bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const; bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } - bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; } + bool isTargetLinux() const { return TargetTriple.isOSLinux(); } bool hasNEON() const { return HasNEON; } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 063f1d4..df9c78a 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -421,7 +421,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } // Use divmod compiler-rt calls for iOS 5.0 and later. - if (Subtarget->getTargetTriple().getOS() == Triple::IOS && + if (Subtarget->getTargetTriple().isiOS() && !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) { setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index ad7f1b3..b13ff9d 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -275,10 +275,10 @@ public: const Triple &getTargetTriple() const { return TargetTriple; } - bool isTargetIOS() const { return TargetTriple.getOS() == Triple::IOS; } + bool isTargetIOS() const { return TargetTriple.isiOS(); } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } - bool isTargetNaCl() const { return TargetTriple.getOS() == Triple::NaCl; } - bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; } + bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); } + bool isTargetLinux() const { return TargetTriple.isOSLinux(); } bool isTargetELF() const { return !isTargetDarwin(); } // ARM EABI is the bare-metal EABI described in ARM ABI documents and // can be accessed via -target arm-none-eabi. This is NOT GNUEABI. diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index a247b02..e4c3727 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -58,8 +58,7 @@ public: } bool isTargetDarwin() const { Triple TT(STI.getTargetTriple()); - Triple::OSType OS = TT.getOS(); - return OS == Triple::Darwin || OS == Triple::MacOSX || OS == Triple::IOS; + return TT.isOSDarwin(); } unsigned getMachineSoImmOpValue(unsigned SoImm) const; diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp index 8696b57..972f7cb 100644 --- a/lib/Target/TargetLibraryInfo.cpp +++ b/lib/Target/TargetLibraryInfo.cpp @@ -350,7 +350,7 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T, if (T.isMacOSX()) { if (T.isMacOSXVersionLT(10, 5)) TLI.setUnavailable(LibFunc::memset_pattern16); - } else if (T.getOS() == Triple::IOS) { + } else if (T.isiOS()) { if (T.isOSVersionLT(3, 0)) TLI.setUnavailable(LibFunc::memset_pattern16); } else { @@ -562,7 +562,7 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T, } // The following functions are available on at least Linux: - if (T.getOS() != Triple::Linux) { + if (!T.isOSLinux()) { TLI.setUnavailable(LibFunc::dunder_strdup); TLI.setUnavailable(LibFunc::dunder_strtok_r); TLI.setUnavailable(LibFunc::dunder_isoc99_scanf); diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 14048cf..67b88eb 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -311,10 +311,8 @@ public: return (TargetTriple.getEnvironment() == Triple::ELF || TargetTriple.isOSBinFormatELF()); } - bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; } - bool isTargetNaCl() const { - return TargetTriple.getOS() == Triple::NaCl; - } + bool isTargetLinux() const { return TargetTriple.isOSLinux(); } + bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); } bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); } bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); } bool isTargetWindows() const { return TargetTriple.getOS() == Triple::Win32; } -- cgit v1.1 From 88acef0b8e93d065aa4de164422ce4c546a7cd5f Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Thu, 29 Aug 2013 22:41:46 +0000 Subject: ARM: Improve pattern for isel mul of vector by scalar. In addition to recognizing when the multiply's second argument is coming from an explicit VDUPLANE, also look for a plain scalar f32 reference and reference it via the corresponding vector lane. rdar://14870054 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189619 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 49ae334..f1bd37e 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -4022,6 +4022,17 @@ def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; + +def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), + (VMULslfd DPR:$Rn, + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), + (i32 0))>; +def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), + (VMULslfq QPR:$Rn, + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), + (i32 0))>; + + // VQDMULH : Vector Saturating Doubling Multiply Returning High Half defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, IIC_VMULi32Q, -- cgit v1.1 From 7248968fa529726b44d41bd25403d50c74db4bc4 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Fri, 30 Aug 2013 02:29:45 +0000 Subject: [PowerPC] Add loads, stores, and related things to fast-isel. This is the next big chunk of fast-isel code. The primary purpose is to implement selection of loads and stores, but there is a lot of drag-along to support this. The common code to analyze addresses for both loads and stores is substantial. It's also necessary to add the materialization code for global values. Related to load-store processing is the code to fold loads into integer extends, since otherwise we generate lots of redundant instructions. We also need to add some overrides to some FastEmit routines to ensure we don't assign GPR 0 to a virtual register when this would change the meaning of an instruction. I added handling selection of a few binary arithmetic instructions, to enable committing some test cases I wrote a while back. Finally, ap couple of miscellaneous changes: * I cleaned up some poor style from a previous patch in PPCISelLowering.cpp, pointed out by David Blaikie. * I enlarged the Addr.Offset field to avoid sign problems with 32-bit offsets. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189636 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCFastISel.cpp | 768 ++++++++++++++++++++++++++++++++- lib/Target/PowerPC/PPCISelLowering.cpp | 6 +- lib/Target/PowerPC/PPCInstr64Bit.td | 9 + 3 files changed, 776 insertions(+), 7 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 8db4432..a308ade 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -52,7 +52,7 @@ typedef struct Address { int FI; } Base; - int Offset; + long Offset; // Innocuous defaults for our address. Address() @@ -90,21 +90,45 @@ class PPCFastISel : public FastISel { const LoadInst *LI); virtual bool FastLowerArguments(); virtual unsigned FastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm); + virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + uint64_t Imm); + virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill); + virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill); // Instruction selection routines. private: + bool SelectLoad(const Instruction *I); + bool SelectStore(const Instruction *I); bool SelectBranch(const Instruction *I); bool SelectIndirectBr(const Instruction *I); + bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode); bool SelectRet(const Instruction *I); bool SelectIntExt(const Instruction *I); // Utility routines. private: + bool isTypeLegal(Type *Ty, MVT &VT); + bool isLoadTypeLegal(Type *Ty, MVT &VT); bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt, unsigned DestReg); + bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, + const TargetRegisterClass *RC, bool IsZExt = true, + unsigned FP64LoadOpc = PPC::LFD); + bool PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr); + bool PPCComputeAddress(const Value *Obj, Address &Addr); + void PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset, + unsigned &IndexReg); bool PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg, bool IsZExt); unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT); + unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT); unsigned PPCMaterializeInt(const Constant *C, MVT VT); unsigned PPCMaterialize32BitInt(int64_t Imm, const TargetRegisterClass *RC); @@ -187,6 +211,439 @@ static Optional getComparePred(CmpInst::Predicate Pred) { } } +// Determine whether the type Ty is simple enough to be handled by +// fast-isel, and return its equivalent machine type in VT. +// FIXME: Copied directly from ARM -- factor into base class? +bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) { + EVT Evt = TLI.getValueType(Ty, true); + + // Only handle simple types. + if (Evt == MVT::Other || !Evt.isSimple()) return false; + VT = Evt.getSimpleVT(); + + // Handle all legal types, i.e. a register that will directly hold this + // value. + return TLI.isTypeLegal(VT); +} + +// Determine whether the type Ty is simple enough to be handled by +// fast-isel as a load target, and return its equivalent machine type in VT. +bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) { + if (isTypeLegal(Ty, VT)) return true; + + // If this is a type than can be sign or zero-extended to a basic operation + // go ahead and accept it now. + if (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) { + return true; + } + + return false; +} + +// Given a value Obj, create an Address object Addr that represents its +// address. Return false if we can't handle it. +bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) { + const User *U = NULL; + unsigned Opcode = Instruction::UserOp1; + if (const Instruction *I = dyn_cast(Obj)) { + // Don't walk into other basic blocks unless the object is an alloca from + // another block, otherwise it may not have a virtual register assigned. + if (FuncInfo.StaticAllocaMap.count(static_cast(Obj)) || + FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { + Opcode = I->getOpcode(); + U = I; + } + } else if (const ConstantExpr *C = dyn_cast(Obj)) { + Opcode = C->getOpcode(); + U = C; + } + + switch (Opcode) { + default: + break; + case Instruction::BitCast: + // Look through bitcasts. + return PPCComputeAddress(U->getOperand(0), Addr); + case Instruction::IntToPtr: + // Look past no-op inttoptrs. + if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) + return PPCComputeAddress(U->getOperand(0), Addr); + break; + case Instruction::PtrToInt: + // Look past no-op ptrtoints. + if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) + return PPCComputeAddress(U->getOperand(0), Addr); + break; + case Instruction::GetElementPtr: { + Address SavedAddr = Addr; + long TmpOffset = Addr.Offset; + + // Iterate through the GEP folding the constants into offsets where + // we can. + gep_type_iterator GTI = gep_type_begin(U); + for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end(); + II != IE; ++II, ++GTI) { + const Value *Op = *II; + if (StructType *STy = dyn_cast(*GTI)) { + const StructLayout *SL = TD.getStructLayout(STy); + unsigned Idx = cast(Op)->getZExtValue(); + TmpOffset += SL->getElementOffset(Idx); + } else { + uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); + for (;;) { + if (const ConstantInt *CI = dyn_cast(Op)) { + // Constant-offset addressing. + TmpOffset += CI->getSExtValue() * S; + break; + } + if (isa(Op) && + (!isa(Op) || + FuncInfo.MBBMap[cast(Op)->getParent()] + == FuncInfo.MBB) && + isa(cast(Op)->getOperand(1))) { + // An add (in the same block) with a constant operand. Fold the + // constant. + ConstantInt *CI = + cast(cast(Op)->getOperand(1)); + TmpOffset += CI->getSExtValue() * S; + // Iterate on the other operand. + Op = cast(Op)->getOperand(0); + continue; + } + // Unsupported + goto unsupported_gep; + } + } + } + + // Try to grab the base operand now. + Addr.Offset = TmpOffset; + if (PPCComputeAddress(U->getOperand(0), Addr)) return true; + + // We failed, restore everything and try the other options. + Addr = SavedAddr; + + unsupported_gep: + break; + } + case Instruction::Alloca: { + const AllocaInst *AI = cast(Obj); + DenseMap::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) { + Addr.BaseType = Address::FrameIndexBase; + Addr.Base.FI = SI->second; + return true; + } + break; + } + } + + // FIXME: References to parameters fall through to the behavior + // below. They should be able to reference a frame index since + // they are stored to the stack, so we can get "ld rx, offset(r1)" + // instead of "addi ry, r1, offset / ld rx, 0(ry)". Obj will + // just contain the parameter. Try to handle this with a FI. + + // Try to get this in a register if nothing else has worked. + if (Addr.Base.Reg == 0) + Addr.Base.Reg = getRegForValue(Obj); + + // Prevent assignment of base register to X0, which is inappropriate + // for loads and stores alike. + if (Addr.Base.Reg != 0) + MRI.setRegClass(Addr.Base.Reg, &PPC::G8RC_and_G8RC_NOX0RegClass); + + return Addr.Base.Reg != 0; +} + +// Fix up some addresses that can't be used directly. For example, if +// an offset won't fit in an instruction field, we may need to move it +// into an index register. +void PPCFastISel::PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset, + unsigned &IndexReg) { + + // Check whether the offset fits in the instruction field. + if (!isInt<16>(Addr.Offset)) + UseOffset = false; + + // If this is a stack pointer and the offset needs to be simplified then + // put the alloca address into a register, set the base type back to + // register and continue. This should almost never happen. + if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) { + unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDI8), + ResultReg).addFrameIndex(Addr.Base.FI).addImm(0); + Addr.Base.Reg = ResultReg; + Addr.BaseType = Address::RegBase; + } + + if (!UseOffset) { + IntegerType *OffsetTy = ((VT == MVT::i32) ? Type::getInt32Ty(*Context) + : Type::getInt64Ty(*Context)); + const ConstantInt *Offset = + ConstantInt::getSigned(OffsetTy, (int64_t)(Addr.Offset)); + IndexReg = PPCMaterializeInt(Offset, MVT::i64); + assert(IndexReg && "Unexpected error in PPCMaterializeInt!"); + } +} + +// Emit a load instruction if possible, returning true if we succeeded, +// otherwise false. See commentary below for how the register class of +// the load is determined. +bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, + const TargetRegisterClass *RC, + bool IsZExt, unsigned FP64LoadOpc) { + unsigned Opc; + bool UseOffset = true; + + // If ResultReg is given, it determines the register class of the load. + // Otherwise, RC is the register class to use. If the result of the + // load isn't anticipated in this block, both may be zero, in which + // case we must make a conservative guess. In particular, don't assign + // R0 or X0 to the result register, as the result may be used in a load, + // store, add-immediate, or isel that won't permit this. (Though + // perhaps the spill and reload of live-exit values would handle this?) + const TargetRegisterClass *UseRC = + (ResultReg ? MRI.getRegClass(ResultReg) : + (RC ? RC : + (VT == MVT::f64 ? &PPC::F8RCRegClass : + (VT == MVT::f32 ? &PPC::F4RCRegClass : + (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass : + &PPC::GPRC_and_GPRC_NOR0RegClass))))); + + bool Is32BitInt = UseRC->hasSuperClassEq(&PPC::GPRCRegClass); + + switch (VT.SimpleTy) { + default: // e.g., vector types not handled + return false; + case MVT::i8: + Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8; + break; + case MVT::i16: + Opc = (IsZExt ? + (Is32BitInt ? PPC::LHZ : PPC::LHZ8) : + (Is32BitInt ? PPC::LHA : PPC::LHA8)); + break; + case MVT::i32: + Opc = (IsZExt ? + (Is32BitInt ? PPC::LWZ : PPC::LWZ8) : + (Is32BitInt ? PPC::LWA_32 : PPC::LWA)); + if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0)) + UseOffset = false; + break; + case MVT::i64: + Opc = PPC::LD; + assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) && + "64-bit load with 32-bit target??"); + UseOffset = ((Addr.Offset & 3) == 0); + break; + case MVT::f32: + Opc = PPC::LFS; + break; + case MVT::f64: + Opc = FP64LoadOpc; + break; + } + + // If necessary, materialize the offset into a register and use + // the indexed form. Also handle stack pointers with special needs. + unsigned IndexReg = 0; + PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg); + if (ResultReg == 0) + ResultReg = createResultReg(UseRC); + + // Note: If we still have a frame index here, we know the offset is + // in range, as otherwise PPCSimplifyAddress would have converted it + // into a RegBase. + if (Addr.BaseType == Address::FrameIndexBase) { + + MachineMemOperand *MMO = + FuncInfo.MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset), + MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI), + MFI.getObjectAlignment(Addr.Base.FI)); + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) + .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO); + + // Base reg with offset in range. + } else if (UseOffset) { + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) + .addImm(Addr.Offset).addReg(Addr.Base.Reg); + + // Indexed form. + } else { + // Get the RR opcode corresponding to the RI one. FIXME: It would be + // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it + // is hard to get at. + switch (Opc) { + default: llvm_unreachable("Unexpected opcode!"); + case PPC::LBZ: Opc = PPC::LBZX; break; + case PPC::LBZ8: Opc = PPC::LBZX8; break; + case PPC::LHZ: Opc = PPC::LHZX; break; + case PPC::LHZ8: Opc = PPC::LHZX8; break; + case PPC::LHA: Opc = PPC::LHAX; break; + case PPC::LHA8: Opc = PPC::LHAX8; break; + case PPC::LWZ: Opc = PPC::LWZX; break; + case PPC::LWZ8: Opc = PPC::LWZX8; break; + case PPC::LWA: Opc = PPC::LWAX; break; + case PPC::LWA_32: Opc = PPC::LWAX_32; break; + case PPC::LD: Opc = PPC::LDX; break; + case PPC::LFS: Opc = PPC::LFSX; break; + case PPC::LFD: Opc = PPC::LFDX; break; + } + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) + .addReg(Addr.Base.Reg).addReg(IndexReg); + } + + return true; +} + +// Attempt to fast-select a load instruction. +bool PPCFastISel::SelectLoad(const Instruction *I) { + // FIXME: No atomic loads are supported. + if (cast(I)->isAtomic()) + return false; + + // Verify we have a legal type before going any further. + MVT VT; + if (!isLoadTypeLegal(I->getType(), VT)) + return false; + + // See if we can handle this address. + Address Addr; + if (!PPCComputeAddress(I->getOperand(0), Addr)) + return false; + + // Look at the currently assigned register for this instruction + // to determine the required register class. This is necessary + // to constrain RA from using R0/X0 when this is not legal. + unsigned AssignedReg = FuncInfo.ValueMap[I]; + const TargetRegisterClass *RC = + AssignedReg ? MRI.getRegClass(AssignedReg) : 0; + + unsigned ResultReg = 0; + if (!PPCEmitLoad(VT, ResultReg, Addr, RC)) + return false; + UpdateValueMap(I, ResultReg); + return true; +} + +// Emit a store instruction to store SrcReg at Addr. +bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) { + assert(SrcReg && "Nothing to store!"); + unsigned Opc; + bool UseOffset = true; + + const TargetRegisterClass *RC = MRI.getRegClass(SrcReg); + bool Is32BitInt = RC->hasSuperClassEq(&PPC::GPRCRegClass); + + switch (VT.SimpleTy) { + default: // e.g., vector types not handled + return false; + case MVT::i8: + Opc = Is32BitInt ? PPC::STB : PPC::STB8; + break; + case MVT::i16: + Opc = Is32BitInt ? PPC::STH : PPC::STH8; + break; + case MVT::i32: + assert(Is32BitInt && "Not GPRC for i32??"); + Opc = PPC::STW; + break; + case MVT::i64: + Opc = PPC::STD; + UseOffset = ((Addr.Offset & 3) == 0); + break; + case MVT::f32: + Opc = PPC::STFS; + break; + case MVT::f64: + Opc = PPC::STFD; + break; + } + + // If necessary, materialize the offset into a register and use + // the indexed form. Also handle stack pointers with special needs. + unsigned IndexReg = 0; + PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg); + + // Note: If we still have a frame index here, we know the offset is + // in range, as otherwise PPCSimplifyAddress would have converted it + // into a RegBase. + if (Addr.BaseType == Address::FrameIndexBase) { + MachineMemOperand *MMO = + FuncInfo.MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset), + MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI), + MFI.getObjectAlignment(Addr.Base.FI)); + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)).addReg(SrcReg) + .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO); + + // Base reg with offset in range. + } else if (UseOffset) { + if (Addr.Offset == 0 && Opc == PPC::STW8) + dbgs() << "Possible problem here.\n"; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)) + .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg); + + // Indexed form. + } else { + // Get the RR opcode corresponding to the RI one. FIXME: It would be + // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it + // is hard to get at. + switch (Opc) { + default: llvm_unreachable("Unexpected opcode!"); + case PPC::STB: Opc = PPC::STBX; break; + case PPC::STH : Opc = PPC::STHX; break; + case PPC::STW : Opc = PPC::STWX; break; + case PPC::STB8: Opc = PPC::STBX8; break; + case PPC::STH8: Opc = PPC::STHX8; break; + case PPC::STW8: Opc = PPC::STWX8; break; + case PPC::STD: Opc = PPC::STDX; break; + case PPC::STFS: Opc = PPC::STFSX; break; + case PPC::STFD: Opc = PPC::STFDX; break; + } + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)) + .addReg(SrcReg).addReg(Addr.Base.Reg).addReg(IndexReg); + } + + return true; +} + +// Attempt to fast-select a store instruction. +bool PPCFastISel::SelectStore(const Instruction *I) { + Value *Op0 = I->getOperand(0); + unsigned SrcReg = 0; + + // FIXME: No atomics loads are supported. + if (cast(I)->isAtomic()) + return false; + + // Verify we have a legal type before going any further. + MVT VT; + if (!isLoadTypeLegal(Op0->getType(), VT)) + return false; + + // Get the value to be stored into a register. + SrcReg = getRegForValue(Op0); + if (SrcReg == 0) + return false; + + // See if we can handle this address. + Address Addr; + if (!PPCComputeAddress(I->getOperand(1), Addr)) + return false; + + if (!PPCEmitStore(VT, SrcReg, Addr)) + return false; + + return true; +} + // Attempt to fast-select a branch instruction. bool PPCFastISel::SelectBranch(const Instruction *I) { const BranchInst *BI = cast(I); @@ -330,6 +787,109 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, return true; } +// Attempt to fast-select a binary integer operation that isn't already +// handled automatically. +bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { + EVT DestVT = TLI.getValueType(I->getType(), true); + + // We can get here in the case when we have a binary operation on a non-legal + // type and the target independent selector doesn't know how to handle it. + if (DestVT != MVT::i16 && DestVT != MVT::i8) + return false; + + // Look at the currently assigned register for this instruction + // to determine the required register class. If there is no register, + // make a conservative choice (don't assign R0). + unsigned AssignedReg = FuncInfo.ValueMap[I]; + const TargetRegisterClass *RC = + (AssignedReg ? MRI.getRegClass(AssignedReg) : + &PPC::GPRC_and_GPRC_NOR0RegClass); + bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass); + + unsigned Opc; + switch (ISDOpcode) { + default: return false; + case ISD::ADD: + Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8; + break; + case ISD::OR: + Opc = IsGPRC ? PPC::OR : PPC::OR8; + break; + case ISD::SUB: + Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8; + break; + } + + unsigned ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass); + unsigned SrcReg1 = getRegForValue(I->getOperand(0)); + if (SrcReg1 == 0) return false; + + // Handle case of small immediate operand. + if (const ConstantInt *ConstInt = dyn_cast(I->getOperand(1))) { + const APInt &CIVal = ConstInt->getValue(); + int Imm = (int)CIVal.getSExtValue(); + bool UseImm = true; + if (isInt<16>(Imm)) { + switch (Opc) { + default: + llvm_unreachable("Missing case!"); + case PPC::ADD4: + Opc = PPC::ADDI; + MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass); + break; + case PPC::ADD8: + Opc = PPC::ADDI8; + MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass); + break; + case PPC::OR: + Opc = PPC::ORI; + break; + case PPC::OR8: + Opc = PPC::ORI8; + break; + case PPC::SUBF: + if (Imm == -32768) + UseImm = false; + else { + Opc = PPC::ADDI; + MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass); + Imm = -Imm; + } + break; + case PPC::SUBF8: + if (Imm == -32768) + UseImm = false; + else { + Opc = PPC::ADDI8; + MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass); + Imm = -Imm; + } + break; + } + + if (UseImm) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) + .addReg(SrcReg1).addImm(Imm); + UpdateValueMap(I, ResultReg); + return true; + } + } + } + + // Reg-reg case. + unsigned SrcReg2 = getRegForValue(I->getOperand(1)); + if (SrcReg2 == 0) return false; + + // Reverse operands for subtract-from. + if (ISDOpcode == ISD::SUB) + std::swap(SrcReg1, SrcReg2); + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) + .addReg(SrcReg1).addReg(SrcReg2); + UpdateValueMap(I, ResultReg); + return true; +} + // Attempt to fast-select a return instruction. bool PPCFastISel::SelectRet(const Instruction *I) { @@ -551,10 +1111,20 @@ bool PPCFastISel::SelectIntExt(const Instruction *I) { bool PPCFastISel::TargetSelectInstruction(const Instruction *I) { switch (I->getOpcode()) { + case Instruction::Load: + return SelectLoad(I); + case Instruction::Store: + return SelectStore(I); case Instruction::Br: return SelectBranch(I); case Instruction::IndirectBr: return SelectIndirectBr(I); + case Instruction::Add: + return SelectBinaryIntOp(I, ISD::ADD); + case Instruction::Or: + return SelectBinaryIntOp(I, ISD::OR); + case Instruction::Sub: + return SelectBinaryIntOp(I, ISD::SUB); case Instruction::Ret: return SelectRet(I); case Instruction::ZExt: @@ -611,6 +1181,68 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) { return DestReg; } +// Materialize the address of a global value into a register, and return +// the register number (or zero if we failed to handle it). +unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { + assert(VT == MVT::i64 && "Non-address!"); + const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass; + unsigned DestReg = createResultReg(RC); + + // Global values may be plain old object addresses, TLS object + // addresses, constant pool entries, or jump tables. How we generate + // code for these may depend on small, medium, or large code model. + CodeModel::Model CModel = TM.getCodeModel(); + + // FIXME: Jump tables are not yet required because fast-isel doesn't + // handle switches; if that changes, we need them as well. For now, + // what follows assumes everything's a generic (or TLS) global address. + const GlobalVariable *GVar = dyn_cast(GV); + if (!GVar) { + // If GV is an alias, use the aliasee for determining thread-locality. + if (const GlobalAlias *GA = dyn_cast(GV)) + GVar = dyn_cast_or_null(GA->resolveAliasedGlobal(false)); + assert((GVar || isa(GV)) && "Unexpected GV subclass!"); + } + + // FIXME: We don't yet handle the complexity of TLS. + bool IsTLS = GVar && GVar->isThreadLocal(); + if (IsTLS) + return 0; + + // For small code model, generate a simple TOC load. + if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtoc), DestReg) + .addGlobalAddress(GV).addReg(PPC::X2); + else { + // If the address is an externally defined symbol, a symbol with + // common or externally available linkage, a function address, or a + // jump table address (not yet needed), or if we are generating code + // for large code model, we generate: + // LDtocL(GV, ADDIStocHA(%X2, GV)) + // Otherwise we generate: + // ADDItocL(ADDIStocHA(%X2, GV), GV) + // Either way, start with the ADDIStocHA: + unsigned HighPartReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDIStocHA), + HighPartReg).addReg(PPC::X2).addGlobalAddress(GV); + + // !GVar implies a function address. An external variable is one + // without an initializer. + // If/when switches are implemented, jump tables should be handled + // on the "if" path here. + if (CModel == CodeModel::Large || !GVar || !GVar->hasInitializer() || + GVar->hasCommonLinkage() || GVar->hasAvailableExternallyLinkage()) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocL), + DestReg).addGlobalAddress(GV).addReg(HighPartReg); + else + // Otherwise generate the ADDItocL. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDItocL), + DestReg).addReg(HighPartReg).addGlobalAddress(GV); + } + + return DestReg; +} + // Materialize a 32-bit integer constant into a register, and return // the register number (or zero if we failed to handle it). unsigned PPCFastISel::PPCMaterialize32BitInt(int64_t Imm, @@ -743,6 +1375,8 @@ unsigned PPCFastISel::TargetMaterializeConstant(const Constant *C) { if (const ConstantFP *CFP = dyn_cast(C)) return PPCMaterializeFP(CFP, VT); + else if (const GlobalValue *GV = dyn_cast(C)) + return PPCMaterializeGV(GV, VT); else if (isa(C)) return PPCMaterializeInt(C, VT); // TBD: Global values. @@ -756,10 +1390,82 @@ unsigned PPCFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { return AI && 0; } -// Fold loads into extends when possible. TBD. +// Fold loads into extends when possible. +// FIXME: We can have multiple redundant extend/trunc instructions +// following a load. The folding only picks up one. Extend this +// to check subsequent instructions for the same pattern and remove +// them. Thus ResultReg should be the def reg for the last redundant +// instruction in a chain, and all intervening instructions can be +// removed from parent. Change test/CodeGen/PowerPC/fast-isel-fold.ll +// to add ELF64-NOT: rldicl to the appropriate tests when this works. bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, const LoadInst *LI) { - return MI && OpNo && LI && false; + // Verify we have a legal type before going any further. + MVT VT; + if (!isLoadTypeLegal(LI->getType(), VT)) + return false; + + // Combine load followed by zero- or sign-extend. + bool IsZExt = false; + switch(MI->getOpcode()) { + default: + return false; + + case PPC::RLDICL: + case PPC::RLDICL_32_64: { + IsZExt = true; + unsigned MB = MI->getOperand(3).getImm(); + if ((VT == MVT::i8 && MB <= 56) || + (VT == MVT::i16 && MB <= 48) || + (VT == MVT::i32 && MB <= 32)) + break; + return false; + } + + case PPC::RLWINM: + case PPC::RLWINM8: { + IsZExt = true; + unsigned MB = MI->getOperand(3).getImm(); + if ((VT == MVT::i8 && MB <= 24) || + (VT == MVT::i16 && MB <= 16)) + break; + return false; + } + + case PPC::EXTSB: + case PPC::EXTSB8: + case PPC::EXTSB8_32_64: + /* There is no sign-extending load-byte instruction. */ + return false; + + case PPC::EXTSH: + case PPC::EXTSH8: + case PPC::EXTSH8_32_64: { + if (VT != MVT::i16 && VT != MVT::i8) + return false; + break; + } + + case PPC::EXTSW: + case PPC::EXTSW_32_64: { + if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8) + return false; + break; + } + } + + // See if we can handle this address. + Address Addr; + if (!PPCComputeAddress(LI->getOperand(0), Addr)) + return false; + + unsigned ResultReg = MI->getOperand(0).getReg(); + + if (!PPCEmitLoad(VT, ResultReg, Addr, 0, IsZExt)) + return false; + + MI->eraseFromParent(); + return true; } // Attempt to lower call arguments in a faster way than done by @@ -791,6 +1497,62 @@ unsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) { return PPCMaterialize32BitInt(Imm, RC); } +// Override for ADDI and ADDI8 to set the correct register class +// on RHS operand 0. The automatic infrastructure naively assumes +// GPRC for i32 and G8RC for i64; the concept of "no R0" is lost +// for these cases. At the moment, none of the other automatically +// generated RI instructions require special treatment. However, once +// SelectSelect is implemented, "isel" requires similar handling. +// +// Also be conservative about the output register class. Avoid +// assigning R0 or X0 to the output register for GPRC and G8RC +// register classes, as any such result could be used in ADDI, etc., +// where those regs have another meaning. +unsigned PPCFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + uint64_t Imm) { + if (MachineInstOpcode == PPC::ADDI) + MRI.setRegClass(Op0, &PPC::GPRC_and_GPRC_NOR0RegClass); + else if (MachineInstOpcode == PPC::ADDI8) + MRI.setRegClass(Op0, &PPC::G8RC_and_G8RC_NOX0RegClass); + + const TargetRegisterClass *UseRC = + (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass : + (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC)); + + return FastISel::FastEmitInst_ri(MachineInstOpcode, UseRC, + Op0, Op0IsKill, Imm); +} + +// Override for instructions with one register operand to avoid use of +// R0/X0. The automatic infrastructure isn't aware of the context so +// we must be conservative. +unsigned PPCFastISel::FastEmitInst_r(unsigned MachineInstOpcode, + const TargetRegisterClass* RC, + unsigned Op0, bool Op0IsKill) { + const TargetRegisterClass *UseRC = + (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass : + (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC)); + + return FastISel::FastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill); +} + +// Override for instructions with two register operands to avoid use +// of R0/X0. The automatic infrastructure isn't aware of the context +// so we must be conservative. +unsigned PPCFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass* RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill) { + const TargetRegisterClass *UseRC = + (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass : + (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC)); + + return FastISel::FastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill, + Op1, Op1IsKill); +} + namespace llvm { // Create the fast instruction selector for PowerPC64 ELF. FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo, diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 28b9ba9..29c2270 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1813,10 +1813,8 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, // Function whose sole purpose is to kill compiler warnings // stemming from unused functions included from PPCGenCallingConv.inc. CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const { - if (Flag == 1) - return RetCC_PPC64_ELF_FIS; /* CC_PPC64_ELF_FIS in future patch. */ - else - return RetCC_PPC64_ELF_FIS; + /* One of these will be CC_PPC64_ELF_FIS in a future patch. */ + return Flag ? RetCC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS; } bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 9257904..4a8f59b 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -636,6 +636,15 @@ def LWAX : XForm_1<31, 341, (outs g8rc:$rD), (ins memrr:$src), "lwax $rD, $src", LdStLHA, [(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64, PPC970_DGroup_Cracked; +// For fast-isel: +let isCodeGenOnly = 1, mayLoad = 1 in { +def LWA_32 : DSForm_1<58, 2, (outs gprc:$rD), (ins memrix:$src), + "lwa $rD, $src", LdStLWA, []>, isPPC64, + PPC970_DGroup_Cracked; +def LWAX_32 : XForm_1<31, 341, (outs gprc:$rD), (ins memrr:$src), + "lwax $rD, $src", LdStLHA, []>, isPPC64, + PPC970_DGroup_Cracked; +} // end fast-isel isCodeGenOnly // Update forms. let mayLoad = 1, neverHasSideEffects = 1 in { -- cgit v1.1 From 40433e5df94f05ba93df7e96f7eb3f861915152a Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Fri, 30 Aug 2013 03:07:11 +0000 Subject: Remove bogus debug statement. Sheesh. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189638 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCFastISel.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index a308ade..14a4b31 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -584,14 +584,12 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) { .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO); // Base reg with offset in range. - } else if (UseOffset) { - if (Addr.Offset == 0 && Opc == PPC::STW8) - dbgs() << "Possible problem here.\n"; + } else if (UseOffset) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)) .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg); // Indexed form. - } else { + else { // Get the RR opcode corresponding to the RI one. FIXME: It would be // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it // is hard to get at. -- cgit v1.1 From e206efd39bcc00600d816b67b041820b35d023fe Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Fri, 30 Aug 2013 03:16:48 +0000 Subject: [PowerPC] Handle selection of compare instructions in fast-isel. Mostly trivial patch adding support for compares. The meat of the work was added with the branch support. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189639 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCFastISel.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 14a4b31..aeda78b 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -108,6 +108,7 @@ class PPCFastISel : public FastISel { bool SelectStore(const Instruction *I); bool SelectBranch(const Instruction *I); bool SelectIndirectBr(const Instruction *I); + bool SelectCmp(const Instruction *I); bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode); bool SelectRet(const Instruction *I); bool SelectIntExt(const Instruction *I); @@ -1065,6 +1066,23 @@ bool PPCFastISel::SelectIndirectBr(const Instruction *I) { return true; } +// Attempt to fast-select a compare instruction. +bool PPCFastISel::SelectCmp(const Instruction *I) { + const CmpInst *CI = cast(I); + Optional OptPPCPred = getComparePred(CI->getPredicate()); + if (!OptPPCPred) + return false; + + unsigned CondReg = createResultReg(&PPC::CRRCRegClass); + + if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(), + CondReg)) + return false; + + UpdateValueMap(I, CondReg); + return true; +} + // Attempt to fast-select an integer extend instruction. bool PPCFastISel::SelectIntExt(const Instruction *I) { Type *DestTy = I->getType(); -- cgit v1.1 From 4c60b8a78d811a5b16ae45f6957933fb479bab58 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Fri, 30 Aug 2013 03:49:48 +0000 Subject: mi-sched: Precompute a PressureDiff for each instruction, adjust for liveness later. Created SUPressureDiffs array to hold the per node PDiff computed during DAG building. Added a getUpwardPressureDelta API that will soon replace the old one. Compute PressureDelta here from the precomputed PressureDiffs. Updating for liveness will come next. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189640 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonMachineScheduler.cpp | 10 +++++----- lib/Target/Hexagon/HexagonMachineScheduler.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp index 10bb3e9..22c485f 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -407,11 +407,11 @@ SUnit *ConvergingVLIWScheduler::SchedBoundary::pickOnlyChoice() { #ifndef NDEBUG void ConvergingVLIWScheduler::traceCandidate(const char *Label, const ReadyQueue &Q, - SUnit *SU, PressureElement P) { + SUnit *SU, PressureChange P) { dbgs() << Label << " " << Q.getName() << " "; if (P.isValid()) - dbgs() << DAG->TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease - << " "; + dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":" + << P.getUnitInc() << " "; else dbgs() << " "; SU->dump(DAG); @@ -517,8 +517,8 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, ResCount += (NumNodesBlocking * ScaleTwo); // Factor in reg pressure as a heuristic. - ResCount -= (Delta.Excess.UnitIncrease*PriorityThree); - ResCount -= (Delta.CriticalMax.UnitIncrease*PriorityThree); + ResCount -= (Delta.Excess.getUnitInc()*PriorityThree); + ResCount -= (Delta.CriticalMax.getUnitInc()*PriorityThree); DEBUG(if (verbose) dbgs() << " Total(" << ResCount << ")"); diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h index 171193e..8ac333f 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.h +++ b/lib/Target/Hexagon/HexagonMachineScheduler.h @@ -233,7 +233,7 @@ protected: SchedCandidate &Candidate); #ifndef NDEBUG void traceCandidate(const char *Label, const ReadyQueue &Q, SUnit *SU, - PressureElement P = PressureElement()); + PressureChange P = PressureChange()); #endif }; -- cgit v1.1 From 5de35bc7303ac36714e9e8ad42987c86cabf9c74 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Fri, 30 Aug 2013 05:36:14 +0000 Subject: Revert "ARM: Improve pattern for isel mul of vector by scalar." This reverts commit r189619. The commit was breaking the arm_neon_intrinsic test. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189648 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index f1bd37e..49ae334 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -4022,17 +4022,6 @@ def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; - -def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), - (VMULslfd DPR:$Rn, - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), - (i32 0))>; -def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), - (VMULslfq QPR:$Rn, - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), - (i32 0))>; - - // VQDMULH : Vector Saturating Doubling Multiply Returning High Half defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, IIC_VMULi32Q, -- cgit v1.1 From b6ac30a15591a8dab3cff3f0891d7e1ca9476826 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 30 Aug 2013 06:52:21 +0000 Subject: Teach X86 backend to create BMI2 BZHI instructions from (and X, (add (shl 1, Y), -1)). Fixes PR17038. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189653 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 74 +++++++++++++++++++++++++++----------- lib/Target/X86/X86ISelLowering.h | 1 + lib/Target/X86/X86InstrInfo.td | 10 ++++++ 3 files changed, 64 insertions(+), 21 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 81008e9..ecbd24f 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -13473,6 +13473,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::BLSI: return "X86ISD::BLSI"; case X86ISD::BLSMSK: return "X86ISD::BLSMSK"; case X86ISD::BLSR: return "X86ISD::BLSR"; + case X86ISD::BZHI: return "X86ISD::BZHI"; case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM"; case X86ISD::PTEST: return "X86ISD::PTEST"; case X86ISD::TESTP: return "X86ISD::TESTP"; @@ -17279,33 +17280,64 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, if (R.getNode()) return R; - // Create BLSI, and BLSR instructions + // Create BLSI, BLSR, and BZHI instructions // BLSI is X & (-X) // BLSR is X & (X-1) - if (Subtarget->hasBMI() && (VT == MVT::i32 || VT == MVT::i64)) { + // BZHI is X & ((1 << Y) - 1) + if (VT == MVT::i32 || VT == MVT::i64) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDLoc DL(N); - // Check LHS for neg - if (N0.getOpcode() == ISD::SUB && N0.getOperand(1) == N1 && - isZero(N0.getOperand(0))) - return DAG.getNode(X86ISD::BLSI, DL, VT, N1); - - // Check RHS for neg - if (N1.getOpcode() == ISD::SUB && N1.getOperand(1) == N0 && - isZero(N1.getOperand(0))) - return DAG.getNode(X86ISD::BLSI, DL, VT, N0); - - // Check LHS for X-1 - if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1 && - isAllOnes(N0.getOperand(1))) - return DAG.getNode(X86ISD::BLSR, DL, VT, N1); - - // Check RHS for X-1 - if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N0 && - isAllOnes(N1.getOperand(1))) - return DAG.getNode(X86ISD::BLSR, DL, VT, N0); + if (Subtarget->hasBMI()) { + // Check LHS for neg + if (N0.getOpcode() == ISD::SUB && N0.getOperand(1) == N1 && + isZero(N0.getOperand(0))) + return DAG.getNode(X86ISD::BLSI, DL, VT, N1); + + // Check RHS for neg + if (N1.getOpcode() == ISD::SUB && N1.getOperand(1) == N0 && + isZero(N1.getOperand(0))) + return DAG.getNode(X86ISD::BLSI, DL, VT, N0); + + // Check LHS for X-1 + if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1 && + isAllOnes(N0.getOperand(1))) + return DAG.getNode(X86ISD::BLSR, DL, VT, N1); + + // Check RHS for X-1 + if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N0 && + isAllOnes(N1.getOperand(1))) + return DAG.getNode(X86ISD::BLSR, DL, VT, N0); + } + + if (Subtarget->hasBMI2()) { + // Check for (and (add (shl 1, Y), -1), X) + if (N0.getOpcode() == ISD::ADD && isAllOnes(N0.getOperand(1))) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::SHL) { + SDValue N001 = N00.getOperand(1); + assert(N001.getValueType() == MVT::i8 && "unexpected type"); + ConstantSDNode *C = dyn_cast(N00.getOperand(0)); + if (C && C->getZExtValue() == 1) + return DAG.getNode(X86ISD::BZHI, DL, VT, N1, + DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N001)); + } + } + + // Check for (and X, (add (shl 1, Y), -1)) + if (N1.getOpcode() == ISD::ADD && isAllOnes(N1.getOperand(1))) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == ISD::SHL) { + SDValue N101 = N10.getOperand(1); + assert(N101.getValueType() == MVT::i8 && "unexpected type"); + ConstantSDNode *C = dyn_cast(N10.getOperand(0)); + if (C && C->getZExtValue() == 1) + return DAG.getNode(X86ISD::BZHI, DL, VT, N0, + DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N101)); + } + } + } return SDValue(); } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 632a5b6..6a2e4d2 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -295,6 +295,7 @@ namespace llvm { BLSI, // BLSI - Extract lowest set isolated bit BLSMSK, // BLSMSK - Get mask up to lowest set bit BLSR, // BLSR - Reset lowest set bit + BZHI, // BZHI - Zero high bits UMUL, // LOW, HI, FLAGS = umul LHS, RHS diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index c2d1807..41e2c44 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -253,6 +253,7 @@ def X86andn_flag : SDNode<"X86ISD::ANDN", SDTBinaryArithWithFlags>; def X86blsi : SDNode<"X86ISD::BLSI", SDTIntUnaryOp>; def X86blsmsk : SDNode<"X86ISD::BLSMSK", SDTIntUnaryOp>; def X86blsr : SDNode<"X86ISD::BLSR", SDTIntUnaryOp>; +def X86bzhi : SDNode<"X86ISD::BZHI", SDTIntBinOp>; def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; @@ -1856,6 +1857,15 @@ let Predicates = [HasBMI2], Defs = [EFLAGS] in { int_x86_bmi_bzhi_64, loadi64>, VEX_W; } +def : Pat<(X86bzhi GR32:$src1, GR32:$src2), + (BZHI32rr GR32:$src1, GR32:$src2)>; +def : Pat<(X86bzhi (loadi32 addr:$src1), GR32:$src2), + (BZHI32rm addr:$src1, GR32:$src2)>; +def : Pat<(X86bzhi GR64:$src1, GR64:$src2), + (BZHI64rr GR64:$src1, GR64:$src2)>; +def : Pat<(X86bzhi (loadi64 addr:$src1), GR64:$src2), + (BZHI64rm addr:$src1, GR64:$src2)>; + multiclass bmi_pdep_pext { -- cgit v1.1 From 74f269176fb3b3dc2d3d6417492c65ef28a8ecfa Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 30 Aug 2013 07:06:26 +0000 Subject: Remove unused X86andn_flag node. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189654 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.td | 1 - 1 file changed, 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 41e2c44..fca7d4c 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -248,7 +248,6 @@ def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags, [SDNPCommutative]>; def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags, [SDNPCommutative]>; -def X86andn_flag : SDNode<"X86ISD::ANDN", SDTBinaryArithWithFlags>; def X86blsi : SDNode<"X86ISD::BLSI", SDTIntUnaryOp>; def X86blsmsk : SDNode<"X86ISD::BLSMSK", SDTIntUnaryOp>; -- cgit v1.1 From a9080653c22d6fcacca5878603167151d8d11ee4 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 30 Aug 2013 07:16:16 +0000 Subject: Fixup BZHI selection to remove an unneeded zero extension. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189656 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 6 ++---- lib/Target/X86/X86InstrInfo.td | 22 +++++++++++++--------- 2 files changed, 15 insertions(+), 13 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ecbd24f..73c4a1c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -17320,8 +17320,7 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, assert(N001.getValueType() == MVT::i8 && "unexpected type"); ConstantSDNode *C = dyn_cast(N00.getOperand(0)); if (C && C->getZExtValue() == 1) - return DAG.getNode(X86ISD::BZHI, DL, VT, N1, - DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N001)); + return DAG.getNode(X86ISD::BZHI, DL, VT, N1, N001); } } @@ -17333,8 +17332,7 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, assert(N101.getValueType() == MVT::i8 && "unexpected type"); ConstantSDNode *C = dyn_cast(N10.getOperand(0)); if (C && C->getZExtValue() == 1) - return DAG.getNode(X86ISD::BZHI, DL, VT, N0, - DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N101)); + return DAG.getNode(X86ISD::BZHI, DL, VT, N0, N101); } } } diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index fca7d4c..869b9e0 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -252,7 +252,7 @@ def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags, def X86blsi : SDNode<"X86ISD::BLSI", SDTIntUnaryOp>; def X86blsmsk : SDNode<"X86ISD::BLSMSK", SDTIntUnaryOp>; def X86blsr : SDNode<"X86ISD::BLSR", SDTIntUnaryOp>; -def X86bzhi : SDNode<"X86ISD::BZHI", SDTIntBinOp>; +def X86bzhi : SDNode<"X86ISD::BZHI", SDTIntShiftOp>; def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; @@ -1856,14 +1856,18 @@ let Predicates = [HasBMI2], Defs = [EFLAGS] in { int_x86_bmi_bzhi_64, loadi64>, VEX_W; } -def : Pat<(X86bzhi GR32:$src1, GR32:$src2), - (BZHI32rr GR32:$src1, GR32:$src2)>; -def : Pat<(X86bzhi (loadi32 addr:$src1), GR32:$src2), - (BZHI32rm addr:$src1, GR32:$src2)>; -def : Pat<(X86bzhi GR64:$src1, GR64:$src2), - (BZHI64rr GR64:$src1, GR64:$src2)>; -def : Pat<(X86bzhi (loadi64 addr:$src1), GR64:$src2), - (BZHI64rm addr:$src1, GR64:$src2)>; +def : Pat<(X86bzhi GR32:$src1, GR8:$src2), + (BZHI32rr GR32:$src1, + (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; +def : Pat<(X86bzhi (loadi32 addr:$src1), GR8:$src2), + (BZHI32rm addr:$src1, + (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; +def : Pat<(X86bzhi GR64:$src1, GR8:$src2), + (BZHI64rr GR64:$src1, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; +def : Pat<(X86bzhi (loadi64 addr:$src1), GR8:$src2), + (BZHI64rm addr:$src1, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; multiclass bmi_pdep_pext Date: Fri, 30 Aug 2013 14:05:32 +0000 Subject: X86: Add a description of the Intel Atom Silvermont CPU. Currently this is just the atom model with SSE4.2 enabled. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189669 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86.td | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 2c3643d..da989ad 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -206,6 +206,15 @@ def : ProcessorModel<"atom", AtomModel, FeatureLEAUsesAG, FeaturePadShortFunctions]>; +// Silvermont. +def : ProcessorModel<"slm", AtomModel, + [ProcIntelAtom, FeatureSSE42, FeatureCMPXCHG16B, + FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP, + FeatureSlowDivide, + FeatureCallRegIndirect, + FeatureLEAUsesAG, + FeaturePadShortFunctions]>; + // "Arrandale" along with corei3 and corei5 def : ProcessorModel<"corei7", SandyBridgeModel, [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem, -- cgit v1.1 From 2cc396bfb06a8e4df2fae6450277e7a05ebf6e4b Mon Sep 17 00:00:00 2001 From: Andrey Churbanov Date: Fri, 30 Aug 2013 14:40:24 +0000 Subject: Checking commit access; removed one space added in previous test checkin by Jim git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189673 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/README.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index 4fbad88..b4285a0 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -2039,5 +2039,5 @@ GCC: sbbl %eax, %eax andl $-10, %eax addl $5, %eax - + //===---------------------------------------------------------------------===// -- cgit v1.1 From 9bc94276e796d644cb425a7c7d38cc44dbf4e9c1 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Fri, 30 Aug 2013 15:18:11 +0000 Subject: [PowerPC] Add handling for conversions to fast-isel. Yet another chunk of fast-isel code. This one handles various conversions involving floating-point. (It also includes some miscellaneous handling throughout the back end for LWA_32 and LWAX_32 that should have been part of the load-store patch.) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189677 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp | 12 + lib/Target/PowerPC/PPCAsmPrinter.cpp | 1 + lib/Target/PowerPC/PPCFastISel.cpp | 273 ++++++++++++++++++++++ lib/Target/PowerPC/PPCRegisterInfo.cpp | 2 + 4 files changed, 288 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 08d7665..79af2ef 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -19,6 +19,7 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOpcodes.h" using namespace llvm; #include "PPCGenAsmWriter.inc" @@ -78,6 +79,17 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O, } } + // For fast-isel, a COPY_TO_REGCLASS may survive this long. This is + // used when converting a 32-bit float to a 64-bit float as part of + // conversion to an integer (see PPCFastISel.cpp:SelectFPToI()), + // as otherwise we have problems with incorrect register classes + // in machine instruction verification. For now, just avoid trying + // to print it as such an instruction has no effect (a 32-bit float + // in a register is already in 64-bit form, just with lower + // precision). FIXME: Is there a better solution? + if (MI->getOpcode() == TargetOpcode::COPY_TO_REGCLASS) + return; + printInstruction(MI, O); printAnnotation(O, Annot); } diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index bbfad87..e4a631a 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -704,6 +704,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { break; case PPC::LD: case PPC::STD: + case PPC::LWA_32: case PPC::LWA: { // Verify alignment is legal, so we don't create relocations // that can't be supported. diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index aeda78b..3bceed4 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -109,6 +109,10 @@ class PPCFastISel : public FastISel { bool SelectBranch(const Instruction *I); bool SelectIndirectBr(const Instruction *I); bool SelectCmp(const Instruction *I); + bool SelectFPExt(const Instruction *I); + bool SelectFPTrunc(const Instruction *I); + bool SelectIToFP(const Instruction *I, bool IsSigned); + bool SelectFPToI(const Instruction *I, bool IsSigned); bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode); bool SelectRet(const Instruction *I); bool SelectIntExt(const Instruction *I); @@ -135,6 +139,9 @@ class PPCFastISel : public FastISel { const TargetRegisterClass *RC); unsigned PPCMaterialize64BitInt(int64_t Imm, const TargetRegisterClass *RC); + unsigned PPCMoveToIntReg(const Instruction *I, MVT VT, + unsigned SrcReg, bool IsSigned); + unsigned PPCMoveToFPReg(MVT VT, unsigned SrcReg, bool IsSigned); // Call handling routines. private: @@ -786,6 +793,260 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, return true; } +// Attempt to fast-select a floating-point extend instruction. +bool PPCFastISel::SelectFPExt(const Instruction *I) { + Value *Src = I->getOperand(0); + EVT SrcVT = TLI.getValueType(Src->getType(), true); + EVT DestVT = TLI.getValueType(I->getType(), true); + + if (SrcVT != MVT::f32 || DestVT != MVT::f64) + return false; + + unsigned SrcReg = getRegForValue(Src); + if (!SrcReg) + return false; + + // No code is generated for a FP extend. + UpdateValueMap(I, SrcReg); + return true; +} + +// Attempt to fast-select a floating-point truncate instruction. +bool PPCFastISel::SelectFPTrunc(const Instruction *I) { + Value *Src = I->getOperand(0); + EVT SrcVT = TLI.getValueType(Src->getType(), true); + EVT DestVT = TLI.getValueType(I->getType(), true); + + if (SrcVT != MVT::f64 || DestVT != MVT::f32) + return false; + + unsigned SrcReg = getRegForValue(Src); + if (!SrcReg) + return false; + + // Round the result to single precision. + unsigned DestReg = createResultReg(&PPC::F4RCRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::FRSP), DestReg) + .addReg(SrcReg); + + UpdateValueMap(I, DestReg); + return true; +} + +// Move an i32 or i64 value in a GPR to an f64 value in an FPR. +// FIXME: When direct register moves are implemented (see PowerISA 2.08), +// those should be used instead of moving via a stack slot when the +// subtarget permits. +// FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte +// stack slot and 4-byte store/load sequence. Or just sext the 4-byte +// case to 8 bytes which produces tighter code but wastes stack space. +unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg, + bool IsSigned) { + + // If necessary, extend 32-bit int to 64-bit. + if (SrcVT == MVT::i32) { + unsigned TmpReg = createResultReg(&PPC::G8RCRegClass); + if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned)) + return 0; + SrcReg = TmpReg; + } + + // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary. + Address Addr; + Addr.BaseType = Address::FrameIndexBase; + Addr.Base.FI = MFI.CreateStackObject(8, 8, false); + + // Store the value from the GPR. + if (!PPCEmitStore(MVT::i64, SrcReg, Addr)) + return 0; + + // Load the integer value into an FPR. The kind of load used depends + // on a number of conditions. + unsigned LoadOpc = PPC::LFD; + + if (SrcVT == MVT::i32) { + Addr.Offset = 4; + if (!IsSigned) + LoadOpc = PPC::LFIWZX; + else if (PPCSubTarget.hasLFIWAX()) + LoadOpc = PPC::LFIWAX; + } + + const TargetRegisterClass *RC = &PPC::F8RCRegClass; + unsigned ResultReg = 0; + if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc)) + return 0; + + return ResultReg; +} + +// Attempt to fast-select an integer-to-floating-point conversion. +bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) { + MVT DstVT; + Type *DstTy = I->getType(); + if (!isTypeLegal(DstTy, DstVT)) + return false; + + if (DstVT != MVT::f32 && DstVT != MVT::f64) + return false; + + Value *Src = I->getOperand(0); + EVT SrcEVT = TLI.getValueType(Src->getType(), true); + if (!SrcEVT.isSimple()) + return false; + + MVT SrcVT = SrcEVT.getSimpleVT(); + + if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && + SrcVT != MVT::i32 && SrcVT != MVT::i64) + return false; + + unsigned SrcReg = getRegForValue(Src); + if (SrcReg == 0) + return false; + + // We can only lower an unsigned convert if we have the newer + // floating-point conversion operations. + if (!IsSigned && !PPCSubTarget.hasFPCVT()) + return false; + + // FIXME: For now we require the newer floating-point conversion operations + // (which are present only on P7 and A2 server models) when converting + // to single-precision float. Otherwise we have to generate a lot of + // fiddly code to avoid double rounding. If necessary, the fiddly code + // can be found in PPCTargetLowering::LowerINT_TO_FP(). + if (DstVT == MVT::f32 && !PPCSubTarget.hasFPCVT()) + return false; + + // Extend the input if necessary. + if (SrcVT == MVT::i8 || SrcVT == MVT::i16) { + unsigned TmpReg = createResultReg(&PPC::G8RCRegClass); + if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned)) + return false; + SrcVT = MVT::i64; + SrcReg = TmpReg; + } + + // Move the integer value to an FPR. + unsigned FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned); + if (FPReg == 0) + return false; + + // Determine the opcode for the conversion. + const TargetRegisterClass *RC = &PPC::F8RCRegClass; + unsigned DestReg = createResultReg(RC); + unsigned Opc; + + if (DstVT == MVT::f32) + Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS; + else + Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU; + + // Generate the convert. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + .addReg(FPReg); + + UpdateValueMap(I, DestReg); + return true; +} + +// Move the floating-point value in SrcReg into an integer destination +// register, and return the register (or zero if we can't handle it). +// FIXME: When direct register moves are implemented (see PowerISA 2.08), +// those should be used instead of moving via a stack slot when the +// subtarget permits. +unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT, + unsigned SrcReg, bool IsSigned) { + // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary. + // Note that if have STFIWX available, we could use a 4-byte stack + // slot for i32, but this being fast-isel we'll just go with the + // easiest code gen possible. + Address Addr; + Addr.BaseType = Address::FrameIndexBase; + Addr.Base.FI = MFI.CreateStackObject(8, 8, false); + + // Store the value from the FPR. + if (!PPCEmitStore(MVT::f64, SrcReg, Addr)) + return 0; + + // Reload it into a GPR. If we want an i32, modify the address + // to have a 4-byte offset so we load from the right place. + if (VT == MVT::i32) + Addr.Offset = 4; + + // Look at the currently assigned register for this instruction + // to determine the required register class. + unsigned AssignedReg = FuncInfo.ValueMap[I]; + const TargetRegisterClass *RC = + AssignedReg ? MRI.getRegClass(AssignedReg) : 0; + + unsigned ResultReg = 0; + if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned)) + return 0; + + return ResultReg; +} + +// Attempt to fast-select a floating-point-to-integer conversion. +bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { + MVT DstVT, SrcVT; + Type *DstTy = I->getType(); + if (!isTypeLegal(DstTy, DstVT)) + return false; + + if (DstVT != MVT::i32 && DstVT != MVT::i64) + return false; + + Value *Src = I->getOperand(0); + Type *SrcTy = Src->getType(); + if (!isTypeLegal(SrcTy, SrcVT)) + return false; + + if (SrcVT != MVT::f32 && SrcVT != MVT::f64) + return false; + + unsigned SrcReg = getRegForValue(Src); + if (SrcReg == 0) + return false; + + // Convert f32 to f64 if necessary. This is just a meaningless copy + // to get the register class right. COPY_TO_REGCLASS is needed since + // a COPY from F4RC to F8RC is converted to a F4RC-F4RC copy downstream. + const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg); + if (InRC == &PPC::F4RCRegClass) { + unsigned TmpReg = createResultReg(&PPC::F8RCRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY_TO_REGCLASS), TmpReg) + .addReg(SrcReg).addImm(PPC::F8RCRegClassID); + SrcReg = TmpReg; + } + + // Determine the opcode for the conversion, which takes place + // entirely within FPRs. + unsigned DestReg = createResultReg(&PPC::F8RCRegClass); + unsigned Opc; + + if (DstVT == MVT::i32) + if (IsSigned) + Opc = PPC::FCTIWZ; + else + Opc = PPCSubTarget.hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ; + else + Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ; + + // Generate the convert. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + .addReg(SrcReg); + + // Now move the integer value from a float register to an integer register. + unsigned IntReg = PPCMoveToIntReg(I, DstVT, DestReg, IsSigned); + if (IntReg == 0) + return false; + + UpdateValueMap(I, IntReg); + return true; +} + // Attempt to fast-select a binary integer operation that isn't already // handled automatically. bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { @@ -1135,6 +1396,18 @@ bool PPCFastISel::TargetSelectInstruction(const Instruction *I) { return SelectBranch(I); case Instruction::IndirectBr: return SelectIndirectBr(I); + case Instruction::FPExt: + return SelectFPExt(I); + case Instruction::FPTrunc: + return SelectFPTrunc(I); + case Instruction::SIToFP: + return SelectIToFP(I, /*IsSigned*/ true); + case Instruction::UIToFP: + return SelectIToFP(I, /*IsSigned*/ false); + case Instruction::FPToSI: + return SelectFPToI(I, /*IsSigned*/ true); + case Instruction::FPToUI: + return SelectFPToI(I, /*IsSigned*/ false); case Instruction::Add: return SelectBinaryIntOp(I, ISD::ADD); case Instruction::Or: diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index adba613..aee2830 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -69,6 +69,7 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST) ImmToIdxMap[PPC::STH] = PPC::STHX; ImmToIdxMap[PPC::STW] = PPC::STWX; ImmToIdxMap[PPC::STFS] = PPC::STFSX; ImmToIdxMap[PPC::STFD] = PPC::STFDX; ImmToIdxMap[PPC::ADDI] = PPC::ADD4; + ImmToIdxMap[PPC::LWA_32] = PPC::LWAX_32; // 64-bit ImmToIdxMap[PPC::LHA8] = PPC::LHAX8; ImmToIdxMap[PPC::LBZ8] = PPC::LBZX8; @@ -532,6 +533,7 @@ static bool usesIXAddr(const MachineInstr &MI) { default: return false; case PPC::LWA: + case PPC::LWA_32: case PPC::LD: case PPC::STD: return true; -- cgit v1.1 From c673f9c6fecb0f828845ada7ea5458f66f896283 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Fri, 30 Aug 2013 19:40:56 +0000 Subject: Fix a problem with dual mips16/mips32 mode. When the underlying processor has hard float, when you compile the mips32 code you have to make sure that it knows to compile any mips32 routines as hard float. I need to clean up the way mips16 hard float is specified but I need to first think through all the details. Mips16 always has a form of soft float, the difference being whether the underlying hardware has floating point. So it's not really necessary to pass the -soft-float to llvm since soft-float is always true for mips16 by virtue of the fact that it will not register floating point registers. By using this fact, I can simplify the way this is all handled. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189690 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16HardFloat.cpp | 20 ++++++++++++++++++++ lib/Target/Mips/MipsISelLowering.cpp | 8 ++++---- lib/Target/Mips/MipsSEISelLowering.cpp | 2 +- lib/Target/Mips/MipsSubtarget.cpp | 3 +++ lib/Target/Mips/MipsSubtarget.h | 2 ++ 5 files changed, 30 insertions(+), 5 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp index 617c178..a66abb2 100644 --- a/lib/Target/Mips/Mips16HardFloat.cpp +++ b/lib/Target/Mips/Mips16HardFloat.cpp @@ -430,6 +430,21 @@ static void createFPFnStub(Function *F, Module *M, FPParamVariant PV, new UnreachableInst(FStub->getContext(), BB); } +// +// remove the use-soft-float attribute +// +static void removeUseSoftFloat(Function &F) { + AttributeSet A; + DEBUG(errs() << "removing -use-soft-float\n"); + A = A.addAttribute(F.getContext(), AttributeSet::FunctionIndex, + "use-soft-float", "false"); + F.removeAttributes(AttributeSet::FunctionIndex, A); + if (F.hasFnAttribute("use-soft-float")) { + DEBUG(errs() << "still has -use-soft-float\n"); + } + F.addAttributes(AttributeSet::FunctionIndex, A); +} + namespace llvm { // @@ -453,6 +468,11 @@ bool Mips16HardFloat::runOnModule(Module &M) { DEBUG(errs() << "Run on Module Mips16HardFloat\n"); bool Modified = false; for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->hasFnAttribute("nomips16") && + F->hasFnAttribute("use-soft-float")) { + removeUseSoftFloat(*F); + continue; + } if (F->isDeclaration() || F->hasFnAttribute("mips16_fp_stub") || F->hasFnAttribute("nomips16")) continue; Modified |= fixupFPReturnAndCall(*F, &M, Subtarget); diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 4d1f329..a1706ab 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -2334,7 +2334,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SpecialCallingConv); MipsCCInfo.analyzeCallOperands(Outs, IsVarArg, - getTargetMachine().Options.UseSoftFloat, + Subtarget->mipsSEUsesSoftFloat(), Callee.getNode(), CLI.Args); // Get a count of how many bytes are to be pushed on the stack. @@ -2520,7 +2520,7 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, getTargetMachine(), RVLocs, *DAG.getContext()); MipsCC MipsCCInfo(CallConv, IsO32, Subtarget->isFP64bit(), CCInfo); - MipsCCInfo.analyzeCallResult(Ins, getTargetMachine().Options.UseSoftFloat, + MipsCCInfo.analyzeCallResult(Ins, Subtarget->mipsSEUsesSoftFloat(), CallNode, RetTy); // Copy all of the result registers out of their specified physreg. @@ -2568,7 +2568,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, MipsCC MipsCCInfo(CallConv, IsO32, Subtarget->isFP64bit(), CCInfo); Function::const_arg_iterator FuncArg = DAG.getMachineFunction().getFunction()->arg_begin(); - bool UseSoftFloat = getTargetMachine().Options.UseSoftFloat; + bool UseSoftFloat = Subtarget->mipsSEUsesSoftFloat(); MipsCCInfo.analyzeFormalArguments(Ins, UseSoftFloat, FuncArg); MipsFI->setFormalArgInfo(CCInfo.getNextStackOffset(), @@ -2728,7 +2728,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain, MipsCC MipsCCInfo(CallConv, IsO32, Subtarget->isFP64bit(), CCInfo); // Analyze return values. - MipsCCInfo.analyzeReturn(Outs, getTargetMachine().Options.UseSoftFloat, + MipsCCInfo.analyzeReturn(Outs, Subtarget->mipsSEUsesSoftFloat(), MF.getFunction()->getReturnType()); SDValue Flag; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 9108211..fae294c 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -87,7 +87,7 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) addMSAType(MVT::v2f64, &Mips::MSA128DRegClass); } - if (!TM.Options.UseSoftFloat) { + if (!Subtarget->mipsSEUsesSoftFloat()) { addRegisterClass(MVT::f32, &Mips::FGR32RegClass); // When dealing with single precision only, use libcalls diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 8383904..91e2535 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -155,3 +155,6 @@ void MipsSubtarget::resetSubtarget(MachineFunction *MF) { } } +bool MipsSubtarget::mipsSEUsesSoftFloat() const { + return TM->Options.UseSoftFloat && !InMips16HardFloat; +} diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index 03bef69..140ddde 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -192,6 +192,8 @@ public: bool hasStandardEncoding() const { return !inMips16Mode(); } + bool mipsSEUsesSoftFloat() const; + /// Features related to the presence of specific instructions. bool hasSEInReg() const { return HasSEInReg; } bool hasCondMov() const { return HasCondMov; } -- cgit v1.1 From d4b3168609d4a6dfb8a948d87dc61f83855ac604 Mon Sep 17 00:00:00 2001 From: Richard Mitton Date: Fri, 30 Aug 2013 21:19:48 +0000 Subject: Fixed a bug where diassembling an instruction that had a prefix would cause LLVM to identify a 1-byte instruction, but then upon querying it for that 1-byte instruction would cause an undefined opcode. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189698 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/Disassembler/X86DisassemblerDecoder.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index bb195ee..b63fd5a 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -314,20 +314,22 @@ static int readPrefixes(struct InternalInstruction* insn) { while (isPrefix) { prefixLocation = insn->readerCursor; + /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */ if (consumeByte(insn, &byte)) - return -1; + break; /* * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then * break and let it be disassembled as a normal "instruction". */ + if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) + break; + + uint8_t nextByte; if (insn->readerCursor - 1 == insn->startLocation - && (byte == 0xf0 || byte == 0xf2 || byte == 0xf3)) { - uint8_t nextByte; - if (byte == 0xf0) - break; - if (lookAtByte(insn, &nextByte)) - return -1; + && (byte == 0xf2 || byte == 0xf3) + && !lookAtByte(insn, &nextByte)) + { /* * If the byte is 0xf2 or 0xf3, and any of the following conditions are * met: -- cgit v1.1 From 06f9db01ba294cc0834fe7d6cc8180291822fa43 Mon Sep 17 00:00:00 2001 From: Richard Mitton Date: Fri, 30 Aug 2013 21:32:42 +0000 Subject: Build fix git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189699 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/Disassembler/X86DisassemblerDecoder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index b63fd5a..20e61da 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -305,6 +305,7 @@ static int readPrefixes(struct InternalInstruction* insn) { BOOL prefixGroups[4] = { FALSE }; uint64_t prefixLocation; uint8_t byte = 0; + uint8_t nextByte; BOOL hasAdSize = FALSE; BOOL hasOpSize = FALSE; @@ -325,7 +326,6 @@ static int readPrefixes(struct InternalInstruction* insn) { if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) break; - uint8_t nextByte; if (insn->readerCursor - 1 == insn->startLocation && (byte == 0xf2 || byte == 0xf3) && !lookAtByte(insn, &nextByte)) -- cgit v1.1 From 11addd2a2f584571ffcfd51711972b27aef0133f Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Fri, 30 Aug 2013 22:18:55 +0000 Subject: [PowerPC] Call support for fast-isel. This patch adds fast-isel support for calls (but not intrinsic calls or varargs calls). It also removes a badly-formed assert. There are some new tests just for calls, and also for folding loads into arguments on calls to avoid extra extends. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189701 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCCallingConv.td | 11 ++ lib/Target/PowerPC/PPCFastISel.cpp | 327 ++++++++++++++++++++++++++++++++- lib/Target/PowerPC/PPCISelLowering.cpp | 3 +- 3 files changed, 338 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index 9c937ed..e8e7f4c 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -42,6 +42,17 @@ def RetCC_PPC : CallingConv<[ // logic. FIXME: See if the logic can be simplified with use of CCs. // This may require some extensions to current table generation. +// Simple calling convention for 64-bit ELF PowerPC fast isel. +// Only handle ints and floats. All ints are promoted to i64. +// Vector types and quadword ints are not handled. +def CC_PPC64_ELF_FIS : CallingConv<[ + CCIfType<[i8], CCPromoteToType>, + CCIfType<[i16], CCPromoteToType>, + CCIfType<[i32], CCPromoteToType>, + CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>, + CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>> +]>; + // Simple return-value convention for 64-bit ELF PowerPC fast isel. // All small ints are promoted to i64. Vector types, quadword ints, // and multiple register returns are "supported" to avoid compile diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 3bceed4..8a88e76 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -114,6 +114,7 @@ class PPCFastISel : public FastISel { bool SelectIToFP(const Instruction *I, bool IsSigned); bool SelectFPToI(const Instruction *I, bool IsSigned); bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode); + bool SelectCall(const Instruction *I); bool SelectRet(const Instruction *I); bool SelectIntExt(const Instruction *I); @@ -145,6 +146,17 @@ class PPCFastISel : public FastISel { // Call handling routines. private: + bool processCallArgs(SmallVectorImpl &Args, + SmallVectorImpl &ArgRegs, + SmallVectorImpl &ArgVTs, + SmallVectorImpl &ArgFlags, + SmallVectorImpl &RegArgs, + CallingConv::ID CC, + unsigned &NumBytes, + bool IsVarArg); + void finishCall(MVT RetVT, SmallVectorImpl &UsedRegs, + const Instruction *I, CallingConv::ID CC, + unsigned &NumBytes, bool IsVarArg); CCAssignFn *usePPC32CCs(unsigned Flag); private: @@ -1150,6 +1162,316 @@ bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { return true; } +// Handle arguments to a call that we're attempting to fast-select. +// Return false if the arguments are too complex for us at the moment. +bool PPCFastISel::processCallArgs(SmallVectorImpl &Args, + SmallVectorImpl &ArgRegs, + SmallVectorImpl &ArgVTs, + SmallVectorImpl &ArgFlags, + SmallVectorImpl &RegArgs, + CallingConv::ID CC, + unsigned &NumBytes, + bool IsVarArg) { + SmallVector ArgLocs; + CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, ArgLocs, *Context); + CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS); + + // Bail out if we can't handle any of the arguments. + for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { + CCValAssign &VA = ArgLocs[I]; + MVT ArgVT = ArgVTs[VA.getValNo()]; + + // Skip vector arguments for now, as well as long double and + // uint128_t, and anything that isn't passed in a register. + if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || + !VA.isRegLoc() || VA.needsCustom()) + return false; + + // Skip bit-converted arguments for now. + if (VA.getLocInfo() == CCValAssign::BCvt) + return false; + } + + // Get a count of how many bytes are to be pushed onto the stack. + NumBytes = CCInfo.getNextStackOffset(); + + // Issue CALLSEQ_START. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TII.getCallFrameSetupOpcode())) + .addImm(NumBytes); + + // Prepare to assign register arguments. Every argument uses up a + // GPR protocol register even if it's passed in a floating-point + // register. + unsigned NextGPR = PPC::X3; + unsigned NextFPR = PPC::F1; + + // Process arguments. + for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { + CCValAssign &VA = ArgLocs[I]; + unsigned Arg = ArgRegs[VA.getValNo()]; + MVT ArgVT = ArgVTs[VA.getValNo()]; + + // Handle argument promotion and bitcasts. + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: + break; + case CCValAssign::SExt: { + MVT DestVT = VA.getLocVT(); + const TargetRegisterClass *RC = + (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; + unsigned TmpReg = createResultReg(RC); + if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false)) + llvm_unreachable("Failed to emit a sext!"); + ArgVT = DestVT; + Arg = TmpReg; + break; + } + case CCValAssign::AExt: + case CCValAssign::ZExt: { + MVT DestVT = VA.getLocVT(); + const TargetRegisterClass *RC = + (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; + unsigned TmpReg = createResultReg(RC); + if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true)) + llvm_unreachable("Failed to emit a zext!"); + ArgVT = DestVT; + Arg = TmpReg; + break; + } + case CCValAssign::BCvt: { + // FIXME: Not yet handled. + llvm_unreachable("Should have bailed before getting here!"); + break; + } + } + + // Copy this argument to the appropriate register. + unsigned ArgReg; + if (ArgVT == MVT::f32 || ArgVT == MVT::f64) { + ArgReg = NextFPR++; + ++NextGPR; + } else + ArgReg = NextGPR++; + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ArgReg).addReg(Arg); + RegArgs.push_back(ArgReg); + } + + return true; +} + +// For a call that we've determined we can fast-select, finish the +// call sequence and generate a copy to obtain the return value (if any). +void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl &UsedRegs, + const Instruction *I, CallingConv::ID CC, + unsigned &NumBytes, bool IsVarArg) { + // Issue CallSEQ_END. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TII.getCallFrameDestroyOpcode())) + .addImm(NumBytes).addImm(0); + + // Next, generate a copy to obtain the return value. + // FIXME: No multi-register return values yet, though I don't foresee + // any real difficulties there. + if (RetVT != MVT::isVoid) { + SmallVector RVLocs; + CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context); + CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS); + CCValAssign &VA = RVLocs[0]; + assert(RVLocs.size() == 1 && "No support for multi-reg return values!"); + assert(VA.isRegLoc() && "Can only return in registers!"); + + MVT DestVT = VA.getValVT(); + MVT CopyVT = DestVT; + + // Ints smaller than a register still arrive in a full 64-bit + // register, so make sure we recognize this. + if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) + CopyVT = MVT::i64; + + unsigned SourcePhysReg = VA.getLocReg(); + unsigned ResultReg; + + if (RetVT == CopyVT) { + const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT); + ResultReg = createResultReg(CpyRC); + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(SourcePhysReg); + + // If necessary, round the floating result to single precision. + } else if (CopyVT == MVT::f64) { + ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::FRSP), + ResultReg).addReg(SourcePhysReg); + + // If only the low half of a general register is needed, generate + // a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be + // used along the fast-isel path (not lowered), and downstream logic + // also doesn't like a direct subreg copy on a physical reg.) + } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) { + ResultReg = createResultReg(&PPC::GPRCRegClass); + // Convert physical register from G8RC to GPRC. + SourcePhysReg -= PPC::X0 - PPC::R0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(SourcePhysReg); + } + + UsedRegs.push_back(SourcePhysReg); + UpdateValueMap(I, ResultReg); + } +} + +// Attempt to fast-select a call instruction. +bool PPCFastISel::SelectCall(const Instruction *I) { + const CallInst *CI = cast(I); + const Value *Callee = CI->getCalledValue(); + + // Can't handle inline asm. + if (isa(Callee)) + return false; + + // Allow SelectionDAG isel to handle tail calls. + if (CI->isTailCall()) + return false; + + // Obtain calling convention. + ImmutableCallSite CS(CI); + CallingConv::ID CC = CS.getCallingConv(); + + PointerType *PT = cast(CS.getCalledValue()->getType()); + FunctionType *FTy = cast(PT->getElementType()); + bool IsVarArg = FTy->isVarArg(); + + // Not ready for varargs yet. + if (IsVarArg) + return false; + + // Handle simple calls for now, with legal return types and + // those that can be extended. + Type *RetTy = I->getType(); + MVT RetVT; + if (RetTy->isVoidTy()) + RetVT = MVT::isVoid; + else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 && + RetVT != MVT::i8) + return false; + + // FIXME: No multi-register return values yet. + if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 && + RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 && + RetVT != MVT::f64) { + SmallVector RVLocs; + CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context); + CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS); + if (RVLocs.size() > 1) + return false; + } + + // Bail early if more than 8 arguments, as we only currently + // handle arguments passed in registers. + unsigned NumArgs = CS.arg_size(); + if (NumArgs > 8) + return false; + + // Set up the argument vectors. + SmallVector Args; + SmallVector ArgRegs; + SmallVector ArgVTs; + SmallVector ArgFlags; + + Args.reserve(NumArgs); + ArgRegs.reserve(NumArgs); + ArgVTs.reserve(NumArgs); + ArgFlags.reserve(NumArgs); + + for (ImmutableCallSite::arg_iterator II = CS.arg_begin(), IE = CS.arg_end(); + II != IE; ++II) { + // FIXME: ARM does something for intrinsic calls here, check into that. + + unsigned AttrIdx = II - CS.arg_begin() + 1; + + // Only handle easy calls for now. It would be reasonably easy + // to handle <= 8-byte structures passed ByVal in registers, but we + // have to ensure they are right-justified in the register. + if (CS.paramHasAttr(AttrIdx, Attribute::InReg) || + CS.paramHasAttr(AttrIdx, Attribute::StructRet) || + CS.paramHasAttr(AttrIdx, Attribute::Nest) || + CS.paramHasAttr(AttrIdx, Attribute::ByVal)) + return false; + + ISD::ArgFlagsTy Flags; + if (CS.paramHasAttr(AttrIdx, Attribute::SExt)) + Flags.setSExt(); + if (CS.paramHasAttr(AttrIdx, Attribute::ZExt)) + Flags.setZExt(); + + Type *ArgTy = (*II)->getType(); + MVT ArgVT; + if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8) + return false; + + if (ArgVT.isVector()) + return false; + + unsigned Arg = getRegForValue(*II); + if (Arg == 0) + return false; + + unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); + Flags.setOrigAlign(OriginalAlignment); + + Args.push_back(*II); + ArgRegs.push_back(Arg); + ArgVTs.push_back(ArgVT); + ArgFlags.push_back(Flags); + } + + // Process the arguments. + SmallVector RegArgs; + unsigned NumBytes; + + if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, + RegArgs, CC, NumBytes, IsVarArg)) + return false; + + // FIXME: No handling for function pointers yet. This requires + // implementing the function descriptor (OPD) setup. + const GlobalValue *GV = dyn_cast(Callee); + if (!GV) + return false; + + // Build direct call with NOP for TOC restore. + // FIXME: We can and should optimize away the NOP for local calls. + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(PPC::BL8_NOP)); + // Add callee. + MIB.addGlobalAddress(GV); + + // Add implicit physical register uses to the call. + for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II) + MIB.addReg(RegArgs[II], RegState::Implicit); + + // Add a register mask with the call-preserved registers. Proper + // defs for return values will be added by setPhysRegsDeadExcept(). + MIB.addRegMask(TRI.getCallPreservedMask(CC)); + + // Finish off the call including any return values. + SmallVector UsedRegs; + finishCall(RetVT, UsedRegs, I, CC, NumBytes, IsVarArg); + + // Set all unused physregs defs as dead. + static_cast(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); + + return true; +} + // Attempt to fast-select a return instruction. bool PPCFastISel::SelectRet(const Instruction *I) { @@ -1414,6 +1736,10 @@ bool PPCFastISel::TargetSelectInstruction(const Instruction *I) { return SelectBinaryIntOp(I, ISD::OR); case Instruction::Sub: return SelectBinaryIntOp(I, ISD::SUB); + case Instruction::Call: + if (dyn_cast(I)) + return false; + return SelectCall(I); case Instruction::Ret: return SelectRet(I); case Instruction::ZExt: @@ -1490,7 +1816,6 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { // If GV is an alias, use the aliasee for determining thread-locality. if (const GlobalAlias *GA = dyn_cast(GV)) GVar = dyn_cast_or_null(GA->resolveAliasedGlobal(false)); - assert((GVar || isa(GV)) && "Unexpected GV subclass!"); } // FIXME: We don't yet handle the complexity of TLS. diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 29c2270..244e00d 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1813,8 +1813,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, // Function whose sole purpose is to kill compiler warnings // stemming from unused functions included from PPCGenCallingConv.inc. CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const { - /* One of these will be CC_PPC64_ELF_FIS in a future patch. */ - return Flag ? RetCC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS; + return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS; } bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, -- cgit v1.1 From 9056dd45a4402cf6266b61f219aa56651633b2c1 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Fri, 30 Aug 2013 23:25:30 +0000 Subject: Correct partially defined variable git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189705 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCFastISel.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 8a88e76..0276668 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -1294,7 +1294,7 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl &UsedRegs, CopyVT = MVT::i64; unsigned SourcePhysReg = VA.getLocReg(); - unsigned ResultReg; + unsigned ResultReg = 0; if (RetVT == CopyVT) { const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT); @@ -1323,6 +1323,7 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl &UsedRegs, .addReg(SourcePhysReg); } + assert(ResultReg && "ResultReg unset!"); UsedRegs.push_back(SourcePhysReg); UpdateValueMap(I, ResultReg); } -- cgit v1.1 From 9d2238cb0f6f67ed6883a0e9f98a835c523724da Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Fri, 30 Aug 2013 23:31:33 +0000 Subject: [PowerPC] Add integer truncation support to fast-isel. This is the last substantive patch I'm planning for fast-isel in the near future, adding fast selection of integer truncates. There are certainly more things that can be improved (many of which are called out in FIXMEs), but for now we are catching most of the important cases. I'll document some of the remaining work in a cleanup patch shortly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189706 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCFastISel.cpp | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 0276668..0e789cc 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -116,6 +116,7 @@ class PPCFastISel : public FastISel { bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode); bool SelectCall(const Instruction *I); bool SelectRet(const Instruction *I); + bool SelectTrunc(const Instruction *I); bool SelectIntExt(const Instruction *I); // Utility routines. @@ -1667,6 +1668,34 @@ bool PPCFastISel::SelectCmp(const Instruction *I) { return true; } +// Attempt to fast-select an integer truncate instruction. +bool PPCFastISel::SelectTrunc(const Instruction *I) { + Value *Src = I->getOperand(0); + EVT SrcVT = TLI.getValueType(Src->getType(), true); + EVT DestVT = TLI.getValueType(I->getType(), true); + + if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16) + return false; + + if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8) + return false; + + unsigned SrcReg = getRegForValue(Src); + if (!SrcReg) + return false; + + // The only interesting case is when we need to switch register classes. + if (SrcVT == MVT::i64) { + unsigned ResultReg = createResultReg(&PPC::GPRCRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(SrcReg, 0, PPC::sub_32); + SrcReg = ResultReg; + } + + UpdateValueMap(I, SrcReg); + return true; +} + // Attempt to fast-select an integer extend instruction. bool PPCFastISel::SelectIntExt(const Instruction *I) { Type *DestTy = I->getType(); @@ -1743,6 +1772,8 @@ bool PPCFastISel::TargetSelectInstruction(const Instruction *I) { return SelectCall(I); case Instruction::Ret: return SelectRet(I); + case Instruction::Trunc: + return SelectTrunc(I); case Instruction::ZExt: case Instruction::SExt: return SelectIntExt(I); -- cgit v1.1 From cda04f9a0a7ef0755ca36db404239346c0edb24c Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Sat, 31 Aug 2013 02:33:40 +0000 Subject: [PowerPC] Fast-isel cleanup patch. Here are a few miscellaneous things to tidy up the PPC64 fast-isel implementation. I corrected a couple of commentary lapses, and added documentation of future opportunities. I also implemented TargetMaterializeAlloca, which I somehow forgot when I split up the original huge patch. Finally, I decided to delete SelectCmp. I hadn't previously hooked it in to TargetSelectInstruction(), and when I did I realized it wasn't serving any useful purpose. This is only useful for compares that don't feed a branch in the same block, and to handle that we would have to have logic to interpret i1 as a condition register. This could probably be done, but would require Unseemly Hackery, and honestly does not seem worth the hassle. This ends the current patch series. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189715 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCFastISel.cpp | 57 +++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 20 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 0e789cc..09cc21b 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -37,6 +37,25 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" +//===----------------------------------------------------------------------===// +// +// TBD: +// FastLowerArguments: Handle simple cases. +// PPCMaterializeGV: Handle TLS. +// SelectCall: Handle function pointers. +// SelectCall: Handle multi-register return values. +// SelectCall: Optimize away nops for local calls. +// processCallArgs: Handle bit-converted arguments. +// finishCall: Handle multi-register return values. +// PPCComputeAddress: Handle parameter references as FrameIndex's. +// PPCEmitCmp: Handle immediate as operand 1. +// SelectCall: Handle small byval arguments. +// SelectIntrinsicCall: Implement. +// SelectSelect: Implement. +// Consider factoring isTypeLegal into the base class. +// Implement switches and jump tables. +// +//===----------------------------------------------------------------------===// using namespace llvm; namespace { @@ -1651,23 +1670,6 @@ bool PPCFastISel::SelectIndirectBr(const Instruction *I) { return true; } -// Attempt to fast-select a compare instruction. -bool PPCFastISel::SelectCmp(const Instruction *I) { - const CmpInst *CI = cast(I); - Optional OptPPCPred = getComparePred(CI->getPredicate()); - if (!OptPPCPred) - return false; - - unsigned CondReg = createResultReg(&PPC::CRRCRegClass); - - if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(), - CondReg)) - return false; - - UpdateValueMap(I, CondReg); - return true; -} - // Attempt to fast-select an integer truncate instruction. bool PPCFastISel::SelectTrunc(const Instruction *I) { Value *Src = I->getOperand(0); @@ -2025,15 +2027,30 @@ unsigned PPCFastISel::TargetMaterializeConstant(const Constant *C) { return PPCMaterializeGV(GV, VT); else if (isa(C)) return PPCMaterializeInt(C, VT); - // TBD: Global values. return 0; } // Materialize the address created by an alloca into a register, and -// return the register number (or zero if we failed to handle it). TBD. +// return the register number (or zero if we failed to handle it). unsigned PPCFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { - return AI && 0; + // Don't handle dynamic allocas. + if (!FuncInfo.StaticAllocaMap.count(AI)) return 0; + + MVT VT; + if (!isLoadTypeLegal(AI->getType(), VT)) return 0; + + DenseMap::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + + if (SI != FuncInfo.StaticAllocaMap.end()) { + unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDI8), + ResultReg).addFrameIndex(SI->second).addImm(0); + return ResultReg; + } + + return 0; } // Fold loads into extends when possible. -- cgit v1.1 From f53e9b4896e98fc07318c79274d0c5ed676f4548 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 31 Aug 2013 21:20:04 +0000 Subject: Mark an unreachable code path with llvm_unreachable. Pacifies GCC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189726 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index 4f78f29..46e50bc 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -328,6 +328,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID : AMDGPU::SReg_512RegClassID; break; + default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); } } else { // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG -- cgit v1.1 From b83c66eb5e1efb8a1b2b204fc67fe7d11e938248 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Sun, 1 Sep 2013 04:12:59 +0000 Subject: Make sure we don't generate stubs for any of these functions because they don't exist in libc. This is really not the right way to solve this problem; but it's not clear to me at this time exactly what is the right way. If we create stubs here, they will cause link errors because these functions do not exist in libc. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189727 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16HardFloat.cpp | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp index a66abb2..a73d162 100644 --- a/lib/Target/Mips/Mips16HardFloat.cpp +++ b/lib/Target/Mips/Mips16HardFloat.cpp @@ -322,10 +322,29 @@ static void assureFPCallStub(Function &F, Module *M, } // -// Functions that are inline intrinsics don't need helpers. +// Functions that are llvm intrinsics and don't need helpers. // static const char *IntrinsicInline[] = - {"fabs", "llvm.powi.f64"}; + {"fabs", + "llvm.ceil.f32", "llvm.ceil.f64", + "llvm.copysign.f32", "llvm.copysign.f64", + "llvm.cos.f32", "llvm.cos.f64", + "llvm.exp.f32", "llvm.exp.f64", + "llvm.exp2.f32", "llvm.exp2.f64", + "llvm.fabs.f32", "llvm.fabs.f64", + "llvm.floor.f32", "llvm.floor.f64", + "llvm.fma.f32", "llvm.fma.f64", + "llvm.log.f32", "llvm.log.f64", + "llvm.log10.f32", "llvm.log10.f64", + "llvm.nearbyint.f32", "llvm.nearbyint.f64", + "llvm.pow.f32", "llvm.pow.f64", + "llvm.powi.f32", "llvm.powi.f64", + "llvm.rint.f32", "llvm.rint.f64", + "llvm.round.f32", "llvm.round.f64", + "llvm.sin.f32", "llvm.sin.f64", + "llvm.sqrt.f32", "llvm.sqrt.f64", + "llvm.trunc.f32", "llvm.trunc.f64", + }; static bool isIntrinsicInline(Function *F) { return std::binary_search( -- cgit v1.1 From 5510728d28bb1ee04abc32da3d21b7df12948053 Mon Sep 17 00:00:00 2001 From: Charles Davis Date: Sun, 1 Sep 2013 04:28:48 +0000 Subject: Move everything depending on Object/MachOFormat.h over to Support/MachO.h. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189728 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp | 32 ++--- .../ARM/MCTargetDesc/ARMMachObjectWriter.cpp | 129 ++++++++++---------- lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp | 6 +- .../PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp | 84 ++++++------- lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 10 +- .../X86/MCTargetDesc/X86MachORelocationInfo.cpp | 24 ++-- .../X86/MCTargetDesc/X86MachObjectWriter.cpp | 133 ++++++++++----------- 7 files changed, 210 insertions(+), 208 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index b1e25d8..828442f 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -25,9 +25,9 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCValue.h" -#include "llvm/Object/MachOFormat.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachO.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -640,16 +640,16 @@ public: // FIXME: This should be in a separate file. class DarwinARMAsmBackend : public ARMAsmBackend { public: - const object::mach::CPUSubtypeARM Subtype; + const MachO::CPUSubTypeARM Subtype; DarwinARMAsmBackend(const Target &T, const StringRef TT, - object::mach::CPUSubtypeARM st) + MachO::CPUSubTypeARM st) : ARMAsmBackend(T, TT), Subtype(st) { HasDataInCodeSupport = true; } MCObjectWriter *createObjectWriter(raw_ostream &OS) const { return createARMMachObjectWriter(OS, /*Is64Bit=*/false, - object::mach::CTM_ARM, + MachO::CPU_TYPE_ARM, Subtype); } @@ -664,18 +664,18 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT, StringRef Triple TheTriple(TT); if (TheTriple.isOSDarwin()) { - object::mach::CPUSubtypeARM CS = - StringSwitch(TheTriple.getArchName()) - .Cases("armv4t", "thumbv4t", object::mach::CSARM_V4T) - .Cases("armv5e", "thumbv5e",object::mach::CSARM_V5TEJ) - .Cases("armv6", "thumbv6", object::mach::CSARM_V6) - .Cases("armv6m", "thumbv6m", object::mach::CSARM_V6M) - .Cases("armv7em", "thumbv7em", object::mach::CSARM_V7EM) - .Cases("armv7f", "thumbv7f", object::mach::CSARM_V7F) - .Cases("armv7k", "thumbv7k", object::mach::CSARM_V7K) - .Cases("armv7m", "thumbv7m", object::mach::CSARM_V7M) - .Cases("armv7s", "thumbv7s", object::mach::CSARM_V7S) - .Default(object::mach::CSARM_V7); + MachO::CPUSubTypeARM CS = + StringSwitch(TheTriple.getArchName()) + .Cases("armv4t", "thumbv4t", MachO::CPU_SUBTYPE_ARM_V4T) + .Cases("armv5e", "thumbv5e", MachO::CPU_SUBTYPE_ARM_V5TEJ) + .Cases("armv6", "thumbv6", MachO::CPU_SUBTYPE_ARM_V6) + .Cases("armv6m", "thumbv6m", MachO::CPU_SUBTYPE_ARM_V6M) + .Cases("armv7em", "thumbv7em", MachO::CPU_SUBTYPE_ARM_V7EM) + .Cases("armv7f", "thumbv7f", MachO::CPU_SUBTYPE_ARM_V7F) + .Cases("armv7k", "thumbv7k", MachO::CPU_SUBTYPE_ARM_V7K) + .Cases("armv7m", "thumbv7m", MachO::CPU_SUBTYPE_ARM_V7M) + .Cases("armv7s", "thumbv7s", MachO::CPU_SUBTYPE_ARM_V7S) + .Default(MachO::CPU_SUBTYPE_ARM_V7); return new DarwinARMAsmBackend(T, TT, CS); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index b9efe74..1f681ba 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -20,10 +20,9 @@ #include "llvm/MC/MCMachOSymbolFlags.h" #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCValue.h" -#include "llvm/Object/MachOFormat.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachO.h" using namespace llvm; -using namespace llvm::object; namespace { class ARMMachObjectWriter : public MCMachObjectTargetWriter { @@ -63,7 +62,7 @@ public: static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, unsigned &Log2Size) { - RelocType = unsigned(macho::RIT_Vanilla); + RelocType = unsigned(MachO::ARM_RELOC_VANILLA); Log2Size = ~0U; switch (Kind) { @@ -92,21 +91,21 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, case ARM::fixup_arm_uncondbl: case ARM::fixup_arm_condbl: case ARM::fixup_arm_blx: - RelocType = unsigned(macho::RIT_ARM_Branch24Bit); + RelocType = unsigned(MachO::ARM_RELOC_BR24); // Report as 'long', even though that is not quite accurate. Log2Size = llvm::Log2_32(4); return true; // Handle Thumb branches. case ARM::fixup_arm_thumb_br: - RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit); + RelocType = unsigned(MachO::ARM_THUMB_RELOC_BR22); Log2Size = llvm::Log2_32(2); return true; case ARM::fixup_t2_uncondbranch: case ARM::fixup_arm_thumb_bl: case ARM::fixup_arm_thumb_blx: - RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit); + RelocType = unsigned(MachO::ARM_THUMB_RELOC_BR22); Log2Size = llvm::Log2_32(4); return true; @@ -121,23 +120,23 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, // 1 - thumb instructions case ARM::fixup_arm_movt_hi16: case ARM::fixup_arm_movt_hi16_pcrel: - RelocType = unsigned(macho::RIT_ARM_Half); + RelocType = unsigned(MachO::ARM_RELOC_HALF); Log2Size = 1; return true; case ARM::fixup_t2_movt_hi16: case ARM::fixup_t2_movt_hi16_pcrel: - RelocType = unsigned(macho::RIT_ARM_Half); + RelocType = unsigned(MachO::ARM_RELOC_HALF); Log2Size = 3; return true; case ARM::fixup_arm_movw_lo16: case ARM::fixup_arm_movw_lo16_pcrel: - RelocType = unsigned(macho::RIT_ARM_Half); + RelocType = unsigned(MachO::ARM_RELOC_HALF); Log2Size = 0; return true; case ARM::fixup_t2_movw_lo16: case ARM::fixup_t2_movw_lo16_pcrel: - RelocType = unsigned(macho::RIT_ARM_Half); + RelocType = unsigned(MachO::ARM_RELOC_HALF); Log2Size = 2; return true; } @@ -153,7 +152,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, uint64_t &FixedValue) { uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); - unsigned Type = macho::RIT_ARM_Half; + unsigned Type = MachO::ARM_RELOC_HALF; // See . const MCSymbol *A = &Target.getSymA()->getSymbol(); @@ -179,7 +178,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, "' can not be undefined in a subtraction expression"); // Select the appropriate difference relocation type. - Type = macho::RIT_ARM_HalfDifference; + Type = MachO::ARM_RELOC_HALF_SECTDIFF; Value2 = Writer->getSymbolAddress(B_SD, Layout); FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent()); } @@ -223,29 +222,29 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, break; } - if (Type == macho::RIT_ARM_HalfDifference) { + if (Type == MachO::ARM_RELOC_HALF_SECTDIFF) { uint32_t OtherHalf = MovtBit ? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16); - macho::RelocationEntry MRE; - MRE.Word0 = ((OtherHalf << 0) | - (macho::RIT_Pair << 24) | - (MovtBit << 28) | - (ThumbBit << 29) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value2; + MachO::any_relocation_info MRE; + MRE.r_word0 = ((OtherHalf << 0) | + (MachO::ARM_RELOC_PAIR << 24) | + (MovtBit << 28) | + (ThumbBit << 29) | + (IsPCRel << 30) | + MachO::R_SCATTERED); + MRE.r_word1 = Value2; Writer->addRelocation(Fragment->getParent(), MRE); } - macho::RelocationEntry MRE; - MRE.Word0 = ((FixupOffset << 0) | - (Type << 24) | - (MovtBit << 28) | - (ThumbBit << 29) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value; + MachO::any_relocation_info MRE; + MRE.r_word0 = ((FixupOffset << 0) | + (Type << 24) | + (MovtBit << 28) | + (ThumbBit << 29) | + (IsPCRel << 30) | + MachO::R_SCATTERED); + MRE.r_word1 = Value; Writer->addRelocation(Fragment->getParent(), MRE); } @@ -259,7 +258,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, uint64_t &FixedValue) { uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); - unsigned Type = macho::RIT_Vanilla; + unsigned Type = MachO::ARM_RELOC_VANILLA; // See . const MCSymbol *A = &Target.getSymA()->getSymbol(); @@ -284,31 +283,31 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, "' can not be undefined in a subtraction expression"); // Select the appropriate difference relocation type. - Type = macho::RIT_Difference; + Type = MachO::ARM_RELOC_SECTDIFF; Value2 = Writer->getSymbolAddress(B_SD, Layout); FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent()); } // Relocations are written out in reverse order, so the PAIR comes first. - if (Type == macho::RIT_Difference || - Type == macho::RIT_Generic_LocalDifference) { - macho::RelocationEntry MRE; - MRE.Word0 = ((0 << 0) | - (macho::RIT_Pair << 24) | - (Log2Size << 28) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value2; + if (Type == MachO::ARM_RELOC_SECTDIFF || + Type == MachO::ARM_RELOC_LOCAL_SECTDIFF) { + MachO::any_relocation_info MRE; + MRE.r_word0 = ((0 << 0) | + (MachO::ARM_RELOC_PAIR << 24) | + (Log2Size << 28) | + (IsPCRel << 30) | + MachO::R_SCATTERED); + MRE.r_word1 = Value2; Writer->addRelocation(Fragment->getParent(), MRE); } - macho::RelocationEntry MRE; - MRE.Word0 = ((FixupOffset << 0) | - (Type << 24) | - (Log2Size << 28) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value; + MachO::any_relocation_info MRE; + MRE.r_word0 = ((FixupOffset << 0) | + (Type << 24) | + (Log2Size << 28) | + (IsPCRel << 30) | + MachO::R_SCATTERED); + MRE.r_word1 = Value; Writer->addRelocation(Fragment->getParent(), MRE); } @@ -326,13 +325,13 @@ bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer, switch (RelocType) { default: return false; - case macho::RIT_ARM_Branch24Bit: + case MachO::ARM_RELOC_BR24: // PC pre-adjustment of 8 for these instructions. Value -= 8; // ARM BL/BLX has a 25-bit offset. Range = 0x1ffffff; break; - case macho::RIT_ARM_ThumbBranch22Bit: + case MachO::ARM_THUMB_RELOC_BR22: // PC pre-adjustment of 4 for these instructions. Value -= 4; // Thumb BL/BLX has a 24-bit offset. @@ -361,7 +360,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, uint64_t &FixedValue) { unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); unsigned Log2Size; - unsigned RelocType = macho::RIT_Vanilla; + unsigned RelocType = MachO::ARM_RELOC_VANILLA; if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) // If we failed to get fixup kind info, it's because there's no legal // relocation type for the fixup kind. This happens when it's a fixup that's @@ -374,7 +373,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, // scattered relocation entry. Differences always require scattered // relocations. if (Target.getSymB()) { - if (RelocType == macho::RIT_ARM_Half) + if (RelocType == MachO::ARM_RELOC_HALF) return RecordARMScatteredHalfRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, FixedValue); return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, @@ -392,7 +391,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, // // Is this right for ARM? uint32_t Offset = Target.getConstant(); - if (IsPCRel && RelocType == macho::RIT_Vanilla) + if (IsPCRel && RelocType == MachO::ARM_RELOC_VANILLA) Offset += 1 << Log2Size; if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD)) return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, @@ -445,17 +444,17 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, } // struct relocation_info (8 bytes) - macho::RelocationEntry MRE; - MRE.Word0 = FixupOffset; - MRE.Word1 = ((Index << 0) | - (IsPCRel << 24) | - (Log2Size << 25) | - (IsExtern << 27) | - (Type << 28)); + MachO::any_relocation_info MRE; + MRE.r_word0 = FixupOffset; + MRE.r_word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); // Even when it's not a scattered relocation, movw/movt always uses // a PAIR relocation. - if (Type == macho::RIT_ARM_Half) { + if (Type == MachO::ARM_RELOC_HALF) { // The other-half value only gets populated for the movt and movw // relocation entries. uint32_t Value = 0; @@ -474,11 +473,11 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, Value = FixedValue & 0xffff; break; } - macho::RelocationEntry MREPair; - MREPair.Word0 = Value; - MREPair.Word1 = ((0xffffff) | - (Log2Size << 25) | - (macho::RIT_Pair << 28)); + MachO::any_relocation_info MREPair; + MREPair.r_word0 = Value; + MREPair.r_word1 = ((0xffffff << 0) | + (Log2Size << 25) | + (MachO::ARM_RELOC_PAIR << 28)); Writer->addRelocation(Fragment->getParent(), MREPair); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index dd61954..ffeac43 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -16,9 +16,9 @@ #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCValue.h" -#include "llvm/Object/MachOFormat.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachO.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; @@ -164,8 +164,8 @@ namespace { return createPPCMachObjectWriter( OS, /*Is64Bit=*/is64, - (is64 ? object::mach::CTM_PowerPC64 : object::mach::CTM_PowerPC), - object::mach::CSPPC_ALL); + (is64 ? MachO::CPU_TYPE_POWERPC64 : MachO::CPU_TYPE_POWERPC), + MachO::CPU_SUBTYPE_POWERPC_ALL); } virtual bool doesSectionRequireSymbols(const MCSection &Section) const { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp index fc9b892..bbafe2e 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp @@ -16,12 +16,11 @@ #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCValue.h" -#include "llvm/Object/MachOFormat.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" +#include "llvm/Support/MachO.h" using namespace llvm; -using namespace llvm::object; namespace { class PPCMachObjectWriter : public MCMachObjectTargetWriter { @@ -90,29 +89,29 @@ static unsigned getRelocType(const MCValue &Target, Target.isAbsolute() ? MCSymbolRefExpr::VK_None : Target.getSymA()->getKind(); // determine the type of the relocation - unsigned Type = macho::RIT_Vanilla; + unsigned Type = MachO::GENERIC_RELOC_VANILLA; if (IsPCRel) { // relative to PC switch ((unsigned)FixupKind) { default: report_fatal_error("Unimplemented fixup kind (relative)"); case PPC::fixup_ppc_br24: - Type = macho::RIT_PPC_BR24; // R_PPC_REL24 + Type = MachO::PPC_RELOC_BR24; // R_PPC_REL24 break; case PPC::fixup_ppc_brcond14: - Type = macho::RIT_PPC_BR14; + Type = MachO::PPC_RELOC_BR14; break; case PPC::fixup_ppc_half16: switch (Modifier) { default: llvm_unreachable("Unsupported modifier for half16 fixup"); case MCSymbolRefExpr::VK_PPC_HA: - Type = macho::RIT_PPC_HA16; + Type = MachO::PPC_RELOC_HA16; break; case MCSymbolRefExpr::VK_PPC_LO: - Type = macho::RIT_PPC_LO16; + Type = MachO::PPC_RELOC_LO16; break; case MCSymbolRefExpr::VK_PPC_HI: - Type = macho::RIT_PPC_HI16; + Type = MachO::PPC_RELOC_HI16; break; } break; @@ -126,13 +125,13 @@ static unsigned getRelocType(const MCValue &Target, default: llvm_unreachable("Unsupported modifier for half16 fixup"); case MCSymbolRefExpr::VK_PPC_HA: - Type = macho::RIT_PPC_HA16_SECTDIFF; + Type = MachO::PPC_RELOC_HA16_SECTDIFF; break; case MCSymbolRefExpr::VK_PPC_LO: - Type = macho::RIT_PPC_LO16_SECTDIFF; + Type = MachO::PPC_RELOC_LO16_SECTDIFF; break; case MCSymbolRefExpr::VK_PPC_HI: - Type = macho::RIT_PPC_HI16_SECTDIFF; + Type = MachO::PPC_RELOC_HI16_SECTDIFF; break; } break; @@ -145,30 +144,33 @@ static unsigned getRelocType(const MCValue &Target, return Type; } -static void makeRelocationInfo(macho::RelocationEntry &MRE, +static void makeRelocationInfo(MachO::any_relocation_info &MRE, const uint32_t FixupOffset, const uint32_t Index, const unsigned IsPCRel, const unsigned Log2Size, const unsigned IsExtern, const unsigned Type) { - MRE.Word0 = FixupOffset; + MRE.r_word0 = FixupOffset; // The bitfield offsets that work (as determined by trial-and-error) // are different than what is documented in the mach-o manuals. - // Is this an endianness issue w/ PPC? - MRE.Word1 = ((Index << 8) | // was << 0 - (IsPCRel << 7) | // was << 24 - (Log2Size << 5) | // was << 25 - (IsExtern << 4) | // was << 27 - (Type << 0)); // was << 28 + // This appears to be an endianness issue; reversing the order of the + // documented bitfields in fixes this (but + // breaks x86/ARM assembly). + MRE.r_word1 = ((Index << 8) | // was << 0 + (IsPCRel << 7) | // was << 24 + (Log2Size << 5) | // was << 25 + (IsExtern << 4) | // was << 27 + (Type << 0)); // was << 28 } static void -makeScatteredRelocationInfo(macho::RelocationEntry &MRE, const uint32_t Addr, - const unsigned Type, const unsigned Log2Size, - const unsigned IsPCRel, const uint32_t Value2) { +makeScatteredRelocationInfo(MachO::any_relocation_info &MRE, + const uint32_t Addr, const unsigned Type, + const unsigned Log2Size, const unsigned IsPCRel, + const uint32_t Value2) { // For notes on bitfield positions and endianness, see: // https://developer.apple.com/library/mac/documentation/developertools/conceptual/MachORuntime/Reference/reference.html#//apple_ref/doc/uid/20001298-scattered_relocation_entry - MRE.Word0 = ((Addr << 0) | (Type << 24) | (Log2Size << 28) | (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value2; + MRE.r_word0 = ((Addr << 0) | (Type << 24) | (Log2Size << 28) | + (IsPCRel << 30) | MachO::R_SCATTERED); + MRE.r_word1 = Value2; } /// Compute fixup offset (address). @@ -223,18 +225,19 @@ bool PPCMachObjectWriter::RecordScatteredRelocation( report_fatal_error("symbol '" + B->getSymbol().getName() + "' can not be undefined in a subtraction expression"); - // FIXME: is Type correct? see include/llvm/Object/MachOFormat.h + // FIXME: is Type correct? see include/llvm/Support/MachO.h Value2 = Writer->getSymbolAddress(B_SD, Layout); FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent()); } // FIXME: does FixedValue get used?? // Relocations are written out in reverse order, so the PAIR comes first. - if (Type == macho::RIT_PPC_SECTDIFF || Type == macho::RIT_PPC_HI16_SECTDIFF || - Type == macho::RIT_PPC_LO16_SECTDIFF || - Type == macho::RIT_PPC_HA16_SECTDIFF || - Type == macho::RIT_PPC_LO14_SECTDIFF || - Type == macho::RIT_PPC_LOCAL_SECTDIFF) { + if (Type == MachO::PPC_RELOC_SECTDIFF || + Type == MachO::PPC_RELOC_HI16_SECTDIFF || + Type == MachO::PPC_RELOC_LO16_SECTDIFF || + Type == MachO::PPC_RELOC_HA16_SECTDIFF || + Type == MachO::PPC_RELOC_LO14_SECTDIFF || + Type == MachO::PPC_RELOC_LOCAL_SECTDIFF) { // X86 had this piece, but ARM does not // If the offset is too large to fit in a scattered relocation, // we're hosed. It's an unfortunate limitation of the MachO format. @@ -253,7 +256,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation( // see PPCMCExpr::EvaluateAsRelocatableImpl() uint32_t other_half = 0; switch (Type) { - case macho::RIT_PPC_LO16_SECTDIFF: + case MachO::PPC_RELOC_LO16_SECTDIFF: other_half = (FixedValue >> 16) & 0xffff; // applyFixupOffset longer extracts the high part because it now assumes // this was already done. @@ -262,12 +265,12 @@ bool PPCMachObjectWriter::RecordScatteredRelocation( // So we need to adjust FixedValue again here. FixedValue &= 0xffff; break; - case macho::RIT_PPC_HA16_SECTDIFF: + case MachO::PPC_RELOC_HA16_SECTDIFF: other_half = FixedValue & 0xffff; FixedValue = ((FixedValue >> 16) + ((FixedValue & 0x8000) ? 1 : 0)) & 0xffff; break; - case macho::RIT_PPC_HI16_SECTDIFF: + case MachO::PPC_RELOC_HI16_SECTDIFF: other_half = FixedValue & 0xffff; FixedValue = (FixedValue >> 16) & 0xffff; break; @@ -276,9 +279,9 @@ bool PPCMachObjectWriter::RecordScatteredRelocation( break; } - macho::RelocationEntry MRE; - makeScatteredRelocationInfo(MRE, other_half, macho::RIT_Pair, Log2Size, - IsPCRel, Value2); + MachO::any_relocation_info MRE; + makeScatteredRelocationInfo(MRE, other_half, MachO::GENERIC_RELOC_PAIR, + Log2Size, IsPCRel, Value2); Writer->addRelocation(Fragment->getParent(), MRE); } else { // If the offset is more than 24-bits, it won't fit in a scattered @@ -291,7 +294,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation( if (FixupOffset > 0xffffff) return false; } - macho::RelocationEntry MRE; + MachO::any_relocation_info MRE; makeScatteredRelocationInfo(MRE, FixupOffset, Type, Log2Size, IsPCRel, Value); Writer->addRelocation(Fragment->getParent(), MRE); return true; @@ -312,7 +315,8 @@ void PPCMachObjectWriter::RecordPPCRelocation( // relocations. if (Target.getSymB() && // Q: are branch targets ever scattered? - RelocType != macho::RIT_PPC_BR24 && RelocType != macho::RIT_PPC_BR14) { + RelocType != MachO::PPC_RELOC_BR24 && + RelocType != MachO::PPC_RELOC_BR14) { RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, Log2Size, FixedValue); return; @@ -370,7 +374,7 @@ void PPCMachObjectWriter::RecordPPCRelocation( } // struct relocation_info (8 bytes) - macho::RelocationEntry MRE; + MachO::any_relocation_info MRE; makeRelocationInfo(MRE, FixupOffset, Index, IsPCRel, Log2Size, IsExtern, Type); Writer->addRelocation(Fragment->getParent(), MRE); diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 598ddee..fc3bae3 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -19,10 +19,10 @@ #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" -#include "llvm/Object/MachOFormat.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachO.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -395,8 +395,8 @@ public: MCObjectWriter *createObjectWriter(raw_ostream &OS) const { return createX86MachObjectWriter(OS, /*Is64Bit=*/false, - object::mach::CTM_i386, - object::mach::CSX86_ALL); + MachO::CPU_TYPE_I386, + MachO::CPU_SUBTYPE_I386_ALL); } }; @@ -409,8 +409,8 @@ public: MCObjectWriter *createObjectWriter(raw_ostream &OS) const { return createX86MachObjectWriter(OS, /*Is64Bit=*/true, - object::mach::CTM_x86_64, - object::mach::CSX86_ALL); + MachO::CPU_TYPE_X86_64, + MachO::CPU_SUBTYPE_X86_64_ALL); } virtual bool doesSectionRequireSymbols(const MCSection &Section) const { diff --git a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp index 75b5acf..209b1d0 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp @@ -17,7 +17,7 @@ using namespace llvm; using namespace object; -using namespace macho; +using namespace MachO; namespace { class X86_64MachORelocationInfo : public MCRelocationInfo { @@ -33,7 +33,7 @@ public: StringRef SymName; SymI->getName(SymName); uint64_t SymAddr; SymI->getAddress(SymAddr); - RelocationEntry RE = Obj->getRelocation(Rel.getRawDataRefImpl()); + any_relocation_info RE = Obj->getRelocation(Rel.getRawDataRefImpl()); bool isPCRel = Obj->getAnyRelocationPCRel(RE); MCSymbol *Sym = Ctx.GetOrCreateSymbol(SymName); @@ -43,44 +43,44 @@ public: const MCExpr *Expr = 0; switch(RelType) { - case RIT_X86_64_TLV: + case X86_64_RELOC_TLV: Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); break; - case RIT_X86_64_Signed4: + case X86_64_RELOC_SIGNED_4: Expr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Sym, Ctx), MCConstantExpr::Create(4, Ctx), Ctx); break; - case RIT_X86_64_Signed2: + case X86_64_RELOC_SIGNED_2: Expr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Sym, Ctx), MCConstantExpr::Create(2, Ctx), Ctx); break; - case RIT_X86_64_Signed1: + case X86_64_RELOC_SIGNED_1: Expr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Sym, Ctx), MCConstantExpr::Create(1, Ctx), Ctx); break; - case RIT_X86_64_GOTLoad: + case X86_64_RELOC_GOT_LOAD: Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Ctx); break; - case RIT_X86_64_GOT: + case X86_64_RELOC_GOT: Expr = MCSymbolRefExpr::Create(Sym, isPCRel ? MCSymbolRefExpr::VK_GOTPCREL : MCSymbolRefExpr::VK_GOT, Ctx); break; - case RIT_X86_64_Subtractor: + case X86_64_RELOC_SUBTRACTOR: { RelocationRef RelNext; Obj->getRelocationNext(Rel.getRawDataRefImpl(), RelNext); - RelocationEntry RENext = Obj->getRelocation(RelNext.getRawDataRefImpl()); + any_relocation_info RENext = Obj->getRelocation(RelNext.getRawDataRefImpl()); // X86_64_SUBTRACTOR must be followed by a relocation of type - // X86_64_RELOC_UNSIGNED . + // X86_64_RELOC_UNSIGNED. // NOTE: Scattered relocations don't exist on x86_64. unsigned RType = Obj->getAnyRelocationType(RENext); - if (RType != RIT_X86_64_Unsigned) + if (RType != X86_64_RELOC_UNSIGNED) report_fatal_error("Expected X86_64_RELOC_UNSIGNED after " "X86_64_RELOC_SUBTRACTOR."); diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index 9db4cab..6ee47c7 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -16,12 +16,11 @@ #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCValue.h" -#include "llvm/Object/MachOFormat.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" +#include "llvm/Support/MachO.h" using namespace llvm; -using namespace llvm::object; namespace { class X86MachObjectWriter : public MCMachObjectTargetWriter { @@ -132,7 +131,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, if (Target.isAbsolute()) { // constant // SymbolNum of 0 indicates the absolute section. - Type = macho::RIT_X86_64_Unsigned; + Type = MachO::X86_64_RELOC_UNSIGNED; Index = 0; // FIXME: I believe this is broken, I don't think the linker can understand @@ -141,7 +140,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, // is to use an absolute symbol (which we don't support yet). if (IsPCRel) { IsExtern = 1; - Type = macho::RIT_X86_64_Branch; + Type = MachO::X86_64_RELOC_BRANCH; } } else if (Target.getSymB()) { // A - B + constant const MCSymbol *A = &Target.getSymA()->getSymbol(); @@ -193,15 +192,15 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, Index = A_SD.getFragment()->getParent()->getOrdinal() + 1; IsExtern = 0; } - Type = macho::RIT_X86_64_Unsigned; - - macho::RelocationEntry MRE; - MRE.Word0 = FixupOffset; - MRE.Word1 = ((Index << 0) | - (IsPCRel << 24) | - (Log2Size << 25) | - (IsExtern << 27) | - (Type << 28)); + Type = MachO::X86_64_RELOC_UNSIGNED; + + MachO::any_relocation_info MRE; + MRE.r_word0 = FixupOffset; + MRE.r_word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); Writer->addRelocation(Fragment->getParent(), MRE); if (B_Base) { @@ -212,7 +211,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, Index = B_SD.getFragment()->getParent()->getOrdinal() + 1; IsExtern = 0; } - Type = macho::RIT_X86_64_Subtractor; + Type = MachO::X86_64_RELOC_SUBTRACTOR; } else { const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); MCSymbolData &SD = Asm.getSymbolData(*Symbol); @@ -272,15 +271,15 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, // rewrite the movq to an leaq at link time if the symbol ends up in // the same linkage unit. if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load) - Type = macho::RIT_X86_64_GOTLoad; + Type = MachO::X86_64_RELOC_GOT_LOAD; else - Type = macho::RIT_X86_64_GOT; + Type = MachO::X86_64_RELOC_GOT; } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { - Type = macho::RIT_X86_64_TLV; + Type = MachO::X86_64_RELOC_TLV; } else if (Modifier != MCSymbolRefExpr::VK_None) { report_fatal_error("unsupported symbol modifier in relocation"); } else { - Type = macho::RIT_X86_64_Signed; + Type = MachO::X86_64_RELOC_SIGNED; // The Darwin x86_64 relocation format has a problem where it cannot // encode an address (L + ) which is outside the atom @@ -297,9 +296,9 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, // (the additional bias), but instead appear to just look at the final // offset. switch (-(Target.getConstant() + (1LL << Log2Size))) { - case 1: Type = macho::RIT_X86_64_Signed1; break; - case 2: Type = macho::RIT_X86_64_Signed2; break; - case 4: Type = macho::RIT_X86_64_Signed4; break; + case 1: Type = MachO::X86_64_RELOC_SIGNED_1; break; + case 2: Type = MachO::X86_64_RELOC_SIGNED_2; break; + case 4: Type = MachO::X86_64_RELOC_SIGNED_4; break; } } } else { @@ -307,24 +306,24 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, report_fatal_error("unsupported symbol modifier in branch " "relocation"); - Type = macho::RIT_X86_64_Branch; + Type = MachO::X86_64_RELOC_BRANCH; } } else { if (Modifier == MCSymbolRefExpr::VK_GOT) { - Type = macho::RIT_X86_64_GOT; + Type = MachO::X86_64_RELOC_GOT; } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) { // GOTPCREL is allowed as a modifier on non-PCrel instructions, in which // case all we do is set the PCrel bit in the relocation entry; this is // used with exception handling, for example. The source is required to // include any necessary offset directly. - Type = macho::RIT_X86_64_GOT; + Type = MachO::X86_64_RELOC_GOT; IsPCRel = 1; } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { report_fatal_error("TLVP symbol modifier should have been rip-rel"); } else if (Modifier != MCSymbolRefExpr::VK_None) report_fatal_error("unsupported symbol modifier in relocation"); else { - Type = macho::RIT_X86_64_Unsigned; + Type = MachO::X86_64_RELOC_UNSIGNED; unsigned Kind = Fixup.getKind(); if (Kind == X86::reloc_signed_4byte) report_fatal_error("32-bit absolute addressing is not supported in " @@ -337,13 +336,13 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, FixedValue = Value; // struct relocation_info (8 bytes) - macho::RelocationEntry MRE; - MRE.Word0 = FixupOffset; - MRE.Word1 = ((Index << 0) | - (IsPCRel << 24) | - (Log2Size << 25) | - (IsExtern << 27) | - (Type << 28)); + MachO::any_relocation_info MRE; + MRE.r_word0 = FixupOffset; + MRE.r_word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); Writer->addRelocation(Fragment->getParent(), MRE); } @@ -357,7 +356,7 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, uint64_t &FixedValue) { uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); - unsigned Type = macho::RIT_Vanilla; + unsigned Type = MachO::GENERIC_RELOC_VANILLA; // See . const MCSymbol *A = &Target.getSymA()->getSymbol(); @@ -384,15 +383,15 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, // Note that there is no longer any semantic difference between these two // relocation types from the linkers point of view, this is done solely for // pedantic compatibility with 'as'. - Type = A_SD->isExternal() ? (unsigned)macho::RIT_Difference : - (unsigned)macho::RIT_Generic_LocalDifference; + Type = A_SD->isExternal() ? (unsigned)MachO::GENERIC_RELOC_SECTDIFF : + (unsigned)MachO::GENERIC_RELOC_LOCAL_SECTDIFF; Value2 = Writer->getSymbolAddress(B_SD, Layout); FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent()); } // Relocations are written out in reverse order, so the PAIR comes first. - if (Type == macho::RIT_Difference || - Type == macho::RIT_Generic_LocalDifference) { + if (Type == MachO::GENERIC_RELOC_SECTDIFF || + Type == MachO::GENERIC_RELOC_LOCAL_SECTDIFF) { // If the offset is too large to fit in a scattered relocation, // we're hosed. It's an unfortunate limitation of the MachO format. if (FixupOffset > 0xffffff) { @@ -406,13 +405,13 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, llvm_unreachable("fatal error returned?!"); } - macho::RelocationEntry MRE; - MRE.Word0 = ((0 << 0) | - (macho::RIT_Pair << 24) | - (Log2Size << 28) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value2; + MachO::any_relocation_info MRE; + MRE.r_word0 = ((0 << 0) | // r_address + (MachO::GENERIC_RELOC_PAIR << 24) | // r_type + (Log2Size << 28) | + (IsPCRel << 30) | + MachO::R_SCATTERED); + MRE.r_word1 = Value2; Writer->addRelocation(Fragment->getParent(), MRE); } else { // If the offset is more than 24-bits, it won't fit in a scattered @@ -426,13 +425,13 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, return false; } - macho::RelocationEntry MRE; - MRE.Word0 = ((FixupOffset << 0) | - (Type << 24) | - (Log2Size << 28) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value; + MachO::any_relocation_info MRE; + MRE.r_word0 = ((FixupOffset << 0) | + (Type << 24) | + (Log2Size << 28) | + (IsPCRel << 30) | + MachO::R_SCATTERED); + MRE.r_word1 = Value; Writer->addRelocation(Fragment->getParent(), MRE); return true; } @@ -474,13 +473,13 @@ void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer, } // struct relocation_info (8 bytes) - macho::RelocationEntry MRE; - MRE.Word0 = Value; - MRE.Word1 = ((Index << 0) | - (IsPCRel << 24) | - (Log2Size << 25) | - (1 << 27) | // Extern - (macho::RIT_Generic_TLV << 28)); // Type + MachO::any_relocation_info MRE; + MRE.r_word0 = Value; + MRE.r_word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (1 << 27) | // r_extern + (MachO::GENERIC_RELOC_TLV << 28)); // r_type Writer->addRelocation(Fragment->getParent(), MRE); } @@ -540,7 +539,7 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, // // FIXME: Currently, these are never generated (see code below). I cannot // find a case where they are actually emitted. - Type = macho::RIT_Vanilla; + Type = MachO::GENERIC_RELOC_VANILLA; } else { // Resolve constant variables. if (SD->getSymbol().isVariable()) { @@ -571,17 +570,17 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, if (IsPCRel) FixedValue -= Writer->getSectionAddress(Fragment->getParent()); - Type = macho::RIT_Vanilla; + Type = MachO::GENERIC_RELOC_VANILLA; } // struct relocation_info (8 bytes) - macho::RelocationEntry MRE; - MRE.Word0 = FixupOffset; - MRE.Word1 = ((Index << 0) | - (IsPCRel << 24) | - (Log2Size << 25) | - (IsExtern << 27) | - (Type << 28)); + MachO::any_relocation_info MRE; + MRE.r_word0 = FixupOffset; + MRE.r_word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); Writer->addRelocation(Fragment->getParent(), MRE); } -- cgit v1.1 From 6adcd58d3c58a8eeb21bc1bfe399c7b03592f273 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Sun, 1 Sep 2013 14:24:41 +0000 Subject: AVX-512: Added GATHER and SCATTER instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189729 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 225 ++++++++++++++++++++++++++++++++++++- lib/Target/X86/X86InstrAVX512.td | 63 +++++++++++ 2 files changed, 285 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 73c4a1c..739c144 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1445,6 +1445,7 @@ void X86TargetLowering::resetOperationActions() { // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't // handle type legalization for these operations here. @@ -11623,7 +11624,87 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { } } -static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) { +static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, + SDValue Base, SDValue Index, + SDValue ScaleOp, SDValue Chain, + const X86Subtarget * Subtarget) { + SDLoc dl(Op); + ConstantSDNode *C = dyn_cast(ScaleOp); + assert(C && "Invalid scale type"); + SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8); + SDValue Src = getZeroVector(Op.getValueType(), Subtarget, DAG, dl); + EVT MaskVT = MVT::getVectorVT(MVT::i1, + Index.getValueType().getVectorNumElements()); + SDValue MaskInReg = DAG.getConstant(~0, MaskVT); + SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other); + SDValue Disp = DAG.getTargetConstant(0, MVT::i32); + SDValue Segment = DAG.getRegister(0, MVT::i32); + SDValue Ops[] = {Src, MaskInReg, Base, Scale, Index, Disp, Segment, Chain}; + SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops); + SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) }; + return DAG.getMergeValues(RetOps, array_lengthof(RetOps), dl); +} + +static SDValue getMGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, + SDValue Src, SDValue Mask, SDValue Base, + SDValue Index, SDValue ScaleOp, SDValue Chain, + const X86Subtarget * Subtarget) { + SDLoc dl(Op); + ConstantSDNode *C = dyn_cast(ScaleOp); + assert(C && "Invalid scale type"); + SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8); + EVT MaskVT = MVT::getVectorVT(MVT::i1, + Index.getValueType().getVectorNumElements()); + SDValue MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask); + SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other); + SDValue Disp = DAG.getTargetConstant(0, MVT::i32); + SDValue Segment = DAG.getRegister(0, MVT::i32); + if (Src.getOpcode() == ISD::UNDEF) + Src = getZeroVector(Op.getValueType(), Subtarget, DAG, dl); + SDValue Ops[] = {Src, MaskInReg, Base, Scale, Index, Disp, Segment, Chain}; + SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops); + SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) }; + return DAG.getMergeValues(RetOps, array_lengthof(RetOps), dl); +} + +static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, + SDValue Src, SDValue Base, SDValue Index, + SDValue ScaleOp, SDValue Chain) { + SDLoc dl(Op); + ConstantSDNode *C = dyn_cast(ScaleOp); + assert(C && "Invalid scale type"); + SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8); + SDValue Disp = DAG.getTargetConstant(0, MVT::i32); + SDValue Segment = DAG.getRegister(0, MVT::i32); + EVT MaskVT = MVT::getVectorVT(MVT::i1, + Index.getValueType().getVectorNumElements()); + SDValue MaskInReg = DAG.getConstant(~0, MaskVT); + SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other); + SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain}; + SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops); + return SDValue(Res, 1); +} + +static SDValue getMScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, + SDValue Src, SDValue Mask, SDValue Base, + SDValue Index, SDValue ScaleOp, SDValue Chain) { + SDLoc dl(Op); + ConstantSDNode *C = dyn_cast(ScaleOp); + assert(C && "Invalid scale type"); + SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8); + SDValue Disp = DAG.getTargetConstant(0, MVT::i32); + SDValue Segment = DAG.getRegister(0, MVT::i32); + EVT MaskVT = MVT::getVectorVT(MVT::i1, + Index.getValueType().getVectorNumElements()); + SDValue MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask); + SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other); + SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain}; + SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops); + return SDValue(Res, 1); +} + +static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { SDLoc dl(Op); unsigned IntNo = cast(Op.getOperand(1))->getZExtValue(); switch (IntNo) { @@ -11658,7 +11739,144 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid, SDValue(Result.getNode(), 2)); } - + //int_gather(index, base, scale); + case Intrinsic::x86_avx512_gather_qpd_512: + case Intrinsic::x86_avx512_gather_qps_512: + case Intrinsic::x86_avx512_gather_dpd_512: + case Intrinsic::x86_avx512_gather_qpi_512: + case Intrinsic::x86_avx512_gather_qpq_512: + case Intrinsic::x86_avx512_gather_dpq_512: + case Intrinsic::x86_avx512_gather_dps_512: + case Intrinsic::x86_avx512_gather_dpi_512: { + unsigned Opc; + switch (IntNo) { + default: llvm_unreachable("Unexpected intrinsic!"); + case Intrinsic::x86_avx512_gather_qps_512: Opc = X86::VGATHERQPSZrm; break; + case Intrinsic::x86_avx512_gather_qpd_512: Opc = X86::VGATHERQPDZrm; break; + case Intrinsic::x86_avx512_gather_dpd_512: Opc = X86::VGATHERDPDZrm; break; + case Intrinsic::x86_avx512_gather_dps_512: Opc = X86::VGATHERDPSZrm; break; + case Intrinsic::x86_avx512_gather_qpi_512: Opc = X86::VPGATHERQDZrm; break; + case Intrinsic::x86_avx512_gather_qpq_512: Opc = X86::VPGATHERQQZrm; break; + case Intrinsic::x86_avx512_gather_dpi_512: Opc = X86::VPGATHERDDZrm; break; + case Intrinsic::x86_avx512_gather_dpq_512: Opc = X86::VPGATHERDQZrm; break; + } + SDValue Chain = Op.getOperand(0); + SDValue Index = Op.getOperand(2); + SDValue Base = Op.getOperand(3); + SDValue Scale = Op.getOperand(4); + return getGatherNode(Opc, Op, DAG, Base, Index, Scale, Chain, Subtarget); + } + //int_gather_mask(v1, mask, index, base, scale); + case Intrinsic::x86_avx512_gather_qps_mask_512: + case Intrinsic::x86_avx512_gather_qpd_mask_512: + case Intrinsic::x86_avx512_gather_dpd_mask_512: + case Intrinsic::x86_avx512_gather_dps_mask_512: + case Intrinsic::x86_avx512_gather_qpi_mask_512: + case Intrinsic::x86_avx512_gather_qpq_mask_512: + case Intrinsic::x86_avx512_gather_dpi_mask_512: + case Intrinsic::x86_avx512_gather_dpq_mask_512: { + unsigned Opc; + switch (IntNo) { + default: llvm_unreachable("Unexpected intrinsic!"); + case Intrinsic::x86_avx512_gather_qps_mask_512: + Opc = X86::VGATHERQPSZrm; break; + case Intrinsic::x86_avx512_gather_qpd_mask_512: + Opc = X86::VGATHERQPDZrm; break; + case Intrinsic::x86_avx512_gather_dpd_mask_512: + Opc = X86::VGATHERDPDZrm; break; + case Intrinsic::x86_avx512_gather_dps_mask_512: + Opc = X86::VGATHERDPSZrm; break; + case Intrinsic::x86_avx512_gather_qpi_mask_512: + Opc = X86::VPGATHERQDZrm; break; + case Intrinsic::x86_avx512_gather_qpq_mask_512: + Opc = X86::VPGATHERQQZrm; break; + case Intrinsic::x86_avx512_gather_dpi_mask_512: + Opc = X86::VPGATHERDDZrm; break; + case Intrinsic::x86_avx512_gather_dpq_mask_512: + Opc = X86::VPGATHERDQZrm; break; + } + SDValue Chain = Op.getOperand(0); + SDValue Src = Op.getOperand(2); + SDValue Mask = Op.getOperand(3); + SDValue Index = Op.getOperand(4); + SDValue Base = Op.getOperand(5); + SDValue Scale = Op.getOperand(6); + return getMGatherNode(Opc, Op, DAG, Src, Mask, Base, Index, Scale, Chain, + Subtarget); + } + //int_scatter(base, index, v1, scale); + case Intrinsic::x86_avx512_scatter_qpd_512: + case Intrinsic::x86_avx512_scatter_qps_512: + case Intrinsic::x86_avx512_scatter_dpd_512: + case Intrinsic::x86_avx512_scatter_qpi_512: + case Intrinsic::x86_avx512_scatter_qpq_512: + case Intrinsic::x86_avx512_scatter_dpq_512: + case Intrinsic::x86_avx512_scatter_dps_512: + case Intrinsic::x86_avx512_scatter_dpi_512: { + unsigned Opc; + switch (IntNo) { + default: llvm_unreachable("Unexpected intrinsic!"); + case Intrinsic::x86_avx512_scatter_qpd_512: + Opc = X86::VSCATTERQPDZmr; break; + case Intrinsic::x86_avx512_scatter_qps_512: + Opc = X86::VSCATTERQPSZmr; break; + case Intrinsic::x86_avx512_scatter_dpd_512: + Opc = X86::VSCATTERDPDZmr; break; + case Intrinsic::x86_avx512_scatter_dps_512: + Opc = X86::VSCATTERDPSZmr; break; + case Intrinsic::x86_avx512_scatter_qpi_512: + Opc = X86::VPSCATTERQDZmr; break; + case Intrinsic::x86_avx512_scatter_qpq_512: + Opc = X86::VPSCATTERQQZmr; break; + case Intrinsic::x86_avx512_scatter_dpq_512: + Opc = X86::VPSCATTERDQZmr; break; + case Intrinsic::x86_avx512_scatter_dpi_512: + Opc = X86::VPSCATTERDDZmr; break; + } + SDValue Chain = Op.getOperand(0); + SDValue Base = Op.getOperand(2); + SDValue Index = Op.getOperand(3); + SDValue Src = Op.getOperand(4); + SDValue Scale = Op.getOperand(5); + return getScatterNode(Opc, Op, DAG, Src, Base, Index, Scale, Chain); + } + //int_scatter_mask(base, mask, index, v1, scale); + case Intrinsic::x86_avx512_scatter_qps_mask_512: + case Intrinsic::x86_avx512_scatter_qpd_mask_512: + case Intrinsic::x86_avx512_scatter_dpd_mask_512: + case Intrinsic::x86_avx512_scatter_dps_mask_512: + case Intrinsic::x86_avx512_scatter_qpi_mask_512: + case Intrinsic::x86_avx512_scatter_qpq_mask_512: + case Intrinsic::x86_avx512_scatter_dpi_mask_512: + case Intrinsic::x86_avx512_scatter_dpq_mask_512: { + unsigned Opc; + switch (IntNo) { + default: llvm_unreachable("Unexpected intrinsic!"); + case Intrinsic::x86_avx512_scatter_qpd_mask_512: + Opc = X86::VSCATTERQPDZmr; break; + case Intrinsic::x86_avx512_scatter_qps_mask_512: + Opc = X86::VSCATTERQPSZmr; break; + case Intrinsic::x86_avx512_scatter_dpd_mask_512: + Opc = X86::VSCATTERDPDZmr; break; + case Intrinsic::x86_avx512_scatter_dps_mask_512: + Opc = X86::VSCATTERDPSZmr; break; + case Intrinsic::x86_avx512_scatter_qpi_mask_512: + Opc = X86::VPSCATTERQDZmr; break; + case Intrinsic::x86_avx512_scatter_qpq_mask_512: + Opc = X86::VPSCATTERQQZmr; break; + case Intrinsic::x86_avx512_scatter_dpq_mask_512: + Opc = X86::VPSCATTERDQZmr; break; + case Intrinsic::x86_avx512_scatter_dpi_mask_512: + Opc = X86::VPSCATTERDDZmr; break; + } + SDValue Chain = Op.getOperand(0); + SDValue Base = Op.getOperand(2); + SDValue Mask = Op.getOperand(3); + SDValue Index = Op.getOperand(4); + SDValue Src = Op.getOperand(5); + SDValue Scale = Op.getOperand(6); + return getMScatterNode(Opc, Op, DAG, Src, Mask, Base, Index, Scale, Chain); + } // XTEST intrinsics. case Intrinsic::x86_xtest: { SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other); @@ -13093,7 +13311,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::VAARG: return LowerVAARG(Op, DAG); case ISD::VACOPY: return LowerVACOPY(Op, Subtarget, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); - case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG); + case ISD::INTRINSIC_VOID: + case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, Subtarget, DAG); case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::FRAME_TO_ARGS_OFFSET: diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 6b2f160..ea3a4e1 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -2923,6 +2923,69 @@ defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VR512, VR256X, X86vsext, EVEX_CD8<32, CD8VH>; //===----------------------------------------------------------------------===// +// GATHER - SCATTER Operations + +multiclass avx512_gather opc, string OpcodeStr, RegisterClass KRC, + RegisterClass RC, X86MemOperand memop> { +let mayLoad = 1, + Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in + def rm : AVX5128I, EVEX, EVEX_K; +} +defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", VK8WM, VR512, vy64xmem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", VK16WM, VR512, vz32mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", VK8WM, VR512, vz64mem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512, vy64xmem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", VK8WM, VR512, vz64mem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", VK8WM, VR256X, vz64mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +multiclass avx512_scatter opc, string OpcodeStr, RegisterClass KRC, + RegisterClass RC, X86MemOperand memop> { +let mayStore = 1, Constraints = "$mask = $mask_wb" in + def mr : AVX5128I, EVEX, EVEX_K; +} + +defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", VK8WM, VR512, vy64xmem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", VK16WM, VR512, vz32mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", VK8WM, VR512, vz64mem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", VK8WM, VR256X, vz64mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", VK8WM, VR512, vy64xmem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", VK16WM, VR512, vz32mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", VK8WM, VR512, vz64mem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", VK8WM, VR256X, vz64mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +//===----------------------------------------------------------------------===// // VSHUFPS - VSHUFPD Operations multiclass avx512_shufp Date: Mon, 2 Sep 2013 07:12:29 +0000 Subject: AVX-512: gather-scatter tests; added foldable instructions; Specify GATHER/SCATTER as heavy instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189736 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 46 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index c4c090b..c9942c5 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -375,7 +375,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VMOVAPSYrr, X86::VMOVAPSYmr, TB_FOLDED_STORE | TB_ALIGN_32 }, { X86::VMOVDQAYrr, X86::VMOVDQAYmr, TB_FOLDED_STORE | TB_ALIGN_32 }, { X86::VMOVUPDYrr, X86::VMOVUPDYmr, TB_FOLDED_STORE }, - { X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE } + { X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE }, + // AVX-512 foldable instructions + { X86::VMOVPDI2DIZrr,X86::VMOVPDI2DIZmr, TB_FOLDED_STORE } }; for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) { @@ -581,6 +583,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::TZCNT16rr, X86::TZCNT16rm, 0 }, { X86::TZCNT32rr, X86::TZCNT32rm, 0 }, { X86::TZCNT64rr, X86::TZCNT64rm, 0 }, + + // AVX-512 foldable instructions + { X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 }, + { X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 }, + { X86::VMOVDQA32rr, X86::VMOVDQA32rm, TB_ALIGN_64 }, + { X86::VMOVDQA64rr, X86::VMOVDQA64rm, TB_ALIGN_64 }, + { X86::VMOVDQU32rr, X86::VMOVDQU32rm, 0 }, + { X86::VMOVDQU64rr, X86::VMOVDQU64rm, 0 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { @@ -1180,12 +1190,35 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::PEXT64rr, X86::PEXT64rm, 0 }, // AVX-512 foldable instructions + { X86::VPADDDZrr, X86::VPADDDZrm, 0 }, + { X86::VPADDQZrr, X86::VPADDQZrm, 0 }, + { X86::VADDPSZrr, X86::VADDPSZrm, 0 }, + { X86::VADDPDZrr, X86::VADDPDZrm, 0 }, + { X86::VSUBPSZrr, X86::VSUBPSZrm, 0 }, + { X86::VSUBPDZrr, X86::VSUBPDZrm, 0 }, + { X86::VMULPSZrr, X86::VMULPSZrm, 0 }, + { X86::VMULPDZrr, X86::VMULPDZrm, 0 }, + { X86::VDIVPSZrr, X86::VDIVPSZrm, 0 }, + { X86::VDIVPDZrr, X86::VDIVPDZrm, 0 }, + { X86::VMINPSZrr, X86::VMINPSZrm, 0 }, + { X86::VMINPDZrr, X86::VMINPDZrm, 0 }, + { X86::VMAXPSZrr, X86::VMAXPSZrm, 0 }, + { X86::VMAXPDZrr, X86::VMAXPDZrm, 0 }, { X86::VPERMPDZri, X86::VPERMPDZmi, 0 }, { X86::VPERMPSZrr, X86::VPERMPSZrm, 0 }, { X86::VPERMI2Drr, X86::VPERMI2Drm, 0 }, { X86::VPERMI2Qrr, X86::VPERMI2Qrm, 0 }, { X86::VPERMI2PSrr, X86::VPERMI2PSrm, 0 }, { X86::VPERMI2PDrr, X86::VPERMI2PDrm, 0 }, + { X86::VPSLLVDZrr, X86::VPSLLVDZrm, 0 }, + { X86::VPSLLVQZrr, X86::VPSLLVQZrm, 0 }, + { X86::VPSRAVDZrr, X86::VPSRAVDZrm, 0 }, + { X86::VPSRLVDZrr, X86::VPSRLVDZrm, 0 }, + { X86::VPSRLVQZrr, X86::VPSRLVQZrm, 0 }, + { X86::VSHUFPDZrri, X86::VSHUFPDZrmi, 0 }, + { X86::VSHUFPSZrri, X86::VSHUFPSZrmi, 0 }, + { X86::VALIGNQrri, X86::VALIGNQrmi, 0 }, + { X86::VALIGNDrri, X86::VALIGNDrmi, 0 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { @@ -4010,6 +4043,8 @@ static bool hasPartialRegUpdate(unsigned Opcode) { case X86::Int_VCVTSD2SSrr: case X86::VCVTSS2SDrr: case X86::Int_VCVTSS2SDrr: + case X86::VCVTSD2SSZrr: + case X86::VCVTSS2SDZrr: case X86::VRCPSSr: case X86::VROUNDSDr: case X86::VROUNDSDr_Int: @@ -5064,6 +5099,15 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const { case X86::VSQRTSSm: case X86::VSQRTSSm_Int: case X86::VSQRTSSr: + + case X86::VGATHERQPSZrm: + case X86::VGATHERQPDZrm: + case X86::VGATHERDPDZrm: + case X86::VGATHERDPSZrm: + case X86::VPGATHERQDZrm: + case X86::VPGATHERQQZrm: + case X86::VPGATHERDDZrm: + case X86::VPGATHERDQZrm: return true; } } -- cgit v1.1 From da0ce6eb8b11083b5cc1849b625509b87a7d9db9 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Mon, 2 Sep 2013 07:41:01 +0000 Subject: AVX-512: updated the list of high-latency instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189740 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index c9942c5..9b02b07 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -5099,6 +5099,20 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const { case X86::VSQRTSSm: case X86::VSQRTSSm_Int: case X86::VSQRTSSr: + case X86::VSQRTPDZrm: + case X86::VSQRTPDZrr: + case X86::VSQRTPSZrm: + case X86::VSQRTPSZrr: + case X86::VSQRTSDZm: + case X86::VSQRTSDZm_Int: + case X86::VSQRTSDZr: + case X86::VSQRTSSZm_Int: + case X86::VSQRTSSZr: + case X86::VSQRTSSZm: + case X86::VDIVSDZrm: + case X86::VDIVSDZrr: + case X86::VDIVSSZrm: + case X86::VDIVSSZrr: case X86::VGATHERQPSZrm: case X86::VGATHERQPDZrm: @@ -5107,7 +5121,15 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const { case X86::VPGATHERQDZrm: case X86::VPGATHERQQZrm: case X86::VPGATHERDDZrm: - case X86::VPGATHERDQZrm: + case X86::VPGATHERDQZrm: + case X86::VSCATTERQPDZmr: + case X86::VSCATTERQPSZmr: + case X86::VSCATTERDPDZmr: + case X86::VSCATTERDPSZmr: + case X86::VPSCATTERQDZmr: + case X86::VPSCATTERQQZmr: + case X86::VPSCATTERDDZmr: + case X86::VPSCATTERDQZmr: return true; } } -- cgit v1.1 From 69c474ffa8a4b0186067e39d338a165d4165af2b Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 2 Sep 2013 07:53:17 +0000 Subject: Create BEXTR instructions for (and ((sra or srl) x, imm), (2**size - 1)). Fixes PR17028. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189742 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 18 ++++++++++++++++++ lib/Target/X86/X86ISelLowering.h | 1 + lib/Target/X86/X86InstrInfo.td | 10 ++++++++++ 3 files changed, 29 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 739c144..ba6c9c0 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -13693,6 +13693,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::BLSMSK: return "X86ISD::BLSMSK"; case X86ISD::BLSR: return "X86ISD::BLSR"; case X86ISD::BZHI: return "X86ISD::BZHI"; + case X86ISD::BEXTR: return "X86ISD::BEXTR"; case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM"; case X86ISD::PTEST: return "X86ISD::PTEST"; case X86ISD::TESTP: return "X86ISD::TESTP"; @@ -17503,6 +17504,7 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, // BLSI is X & (-X) // BLSR is X & (X-1) // BZHI is X & ((1 << Y) - 1) + // BEXTR is ((X >> imm) & (2**size-1)) if (VT == MVT::i32 || VT == MVT::i64) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -17528,6 +17530,22 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N0 && isAllOnes(N1.getOperand(1))) return DAG.getNode(X86ISD::BLSR, DL, VT, N0); + + // Check for BEXTR + if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL) { + ConstantSDNode *MaskNode = dyn_cast(N1); + ConstantSDNode *ShiftNode = dyn_cast(N0.getOperand(1)); + if (MaskNode && ShiftNode) { + uint64_t Mask = MaskNode->getZExtValue(); + uint64_t Shift = ShiftNode->getZExtValue(); + if (isMask_64(Mask)) { + uint64_t MaskSize = CountPopulation_64(Mask); + if (Shift + MaskSize <= VT.getSizeInBits()) + return DAG.getNode(X86ISD::BEXTR, DL, VT, N0.getOperand(0), + DAG.getConstant(Shift | (MaskSize << 8), VT)); + } + } + } } if (Subtarget->hasBMI2()) { diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 6a2e4d2..642df94 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -296,6 +296,7 @@ namespace llvm { BLSMSK, // BLSMSK - Get mask up to lowest set bit BLSR, // BLSR - Reset lowest set bit BZHI, // BZHI - Zero high bits + BEXTR, // BEXTR - Bit field extract UMUL, // LOW, HI, FLAGS = umul LHS, RHS diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 869b9e0..9aa75ff 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -253,6 +253,7 @@ def X86blsi : SDNode<"X86ISD::BLSI", SDTIntUnaryOp>; def X86blsmsk : SDNode<"X86ISD::BLSMSK", SDTIntUnaryOp>; def X86blsr : SDNode<"X86ISD::BLSR", SDTIntUnaryOp>; def X86bzhi : SDNode<"X86ISD::BZHI", SDTIntShiftOp>; +def X86bextr : SDNode<"X86ISD::BEXTR", SDTIntShiftOp>; def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; @@ -1869,6 +1870,15 @@ def : Pat<(X86bzhi (loadi64 addr:$src1), GR8:$src2), (BZHI64rm addr:$src1, (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; +def : Pat<(X86bextr GR32:$src1, GR32:$src2), + (BEXTR32rr GR32:$src1, GR32:$src2)>; +def : Pat<(X86bextr (loadi32 addr:$src1), GR32:$src2), + (BEXTR32rm addr:$src1, GR32:$src2)>; +def : Pat<(X86bextr GR64:$src1, GR64:$src2), + (BEXTR64rr GR64:$src1, GR64:$src2)>; +def : Pat<(X86bextr (loadi64 addr:$src1), GR64:$src2), + (BEXTR64rm addr:$src1, GR64:$src2)>; + multiclass bmi_pdep_pext { -- cgit v1.1 From 024e76b69bc46a20e96eba22f2655d249c495d00 Mon Sep 17 00:00:00 2001 From: Tilmann Scheller Date: Mon, 2 Sep 2013 12:01:58 +0000 Subject: ARM: Default to Swift when compiling for iOS 6 or later. Test cases adjusted accordingly. This fixes rdar://14871821. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189756 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMSubtarget.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 3111f5e..a8e62e5 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -133,8 +133,13 @@ void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) { } void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { - if (CPUString.empty()) - CPUString = "generic"; + if (CPUString.empty()) { + if (isTargetIOS() && !getTargetTriple().isOSVersionLT(6)) + // Default to Swift for iOS 6 or later versions. + CPUString = "swift"; + else + CPUString = "generic"; + } // Insert the architecture feature derived from the target triple into the // feature string. This is important for setting features that are implied -- cgit v1.1 From 5bed440eb13b4104b64fa9c557954f335aac2aab Mon Sep 17 00:00:00 2001 From: Tilmann Scheller Date: Mon, 2 Sep 2013 15:48:17 +0000 Subject: Revert 189756 for now, it doesn't match what rdar://14871821 really wants. What we really want is to enable Swift by default for *v7s triples (and there already seems to be some logic which attempts to do that). In that case the iOS version doesn't matter. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189763 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMSubtarget.cpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index a8e62e5..3111f5e 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -133,13 +133,8 @@ void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) { } void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { - if (CPUString.empty()) { - if (isTargetIOS() && !getTargetTriple().isOSVersionLT(6)) - // Default to Swift for iOS 6 or later versions. - CPUString = "swift"; - else - CPUString = "generic"; - } + if (CPUString.empty()) + CPUString = "generic"; // Insert the architecture feature derived from the target triple into the // feature string. This is important for setting features that are implied -- cgit v1.1 From 195dd8a1ce38970e3463ee1425647280373b60a7 Mon Sep 17 00:00:00 2001 From: Tilmann Scheller Date: Mon, 2 Sep 2013 17:09:01 +0000 Subject: ARM: Default to the Swift CPU when targeting armv7s/thumbv7s. Test cases adjusted accordingly. This fixes rdar://14871821. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189766 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMSubtarget.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 3111f5e..e9254c3 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -133,8 +133,13 @@ void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) { } void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { - if (CPUString.empty()) - CPUString = "generic"; + if (CPUString.empty()) { + if (isTargetIOS() && TargetTriple.getArchName().endswith("v7s")) + // Default to the Swift CPU when targeting armv7s/thumbv7s. + CPUString = "swift"; + else + CPUString = "generic"; + } // Insert the architecture feature derived from the target triple into the // feature string. This is important for setting features that are implied -- cgit v1.1 From 6ee0857bd74dd68e46e970a6bcf756ee03ed8e99 Mon Sep 17 00:00:00 2001 From: Venkatraman Govindaraju Date: Mon, 2 Sep 2013 18:32:45 +0000 Subject: [Sparc] Implement spill and load for long double(f128) registers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189768 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Sparc/SparcInstrInfo.cpp | 82 ++++++++++++++++++++++++++-------- lib/Target/Sparc/SparcRegisterInfo.cpp | 77 +++++++++++++++++++++++-------- 2 files changed, 123 insertions(+), 36 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp index 6c14bc9..0342db4 100644 --- a/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/lib/Target/Sparc/SparcInstrInfo.cpp @@ -44,7 +44,8 @@ unsigned SparcInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, if (MI->getOpcode() == SP::LDri || MI->getOpcode() == SP::LDXri || MI->getOpcode() == SP::LDFri || - MI->getOpcode() == SP::LDDFri) { + MI->getOpcode() == SP::LDDFri || + MI->getOpcode() == SP::LDQFri) { if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) { FrameIndex = MI->getOperand(1).getIndex(); @@ -64,7 +65,8 @@ unsigned SparcInstrInfo::isStoreToStackSlot(const MachineInstr *MI, if (MI->getOpcode() == SP::STri || MI->getOpcode() == SP::STXri || MI->getOpcode() == SP::STFri || - MI->getOpcode() == SP::STDFri) { + MI->getOpcode() == SP::STDFri || + MI->getOpcode() == SP::STQFri) { if (MI->getOperand(0).isFI() && MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0) { FrameIndex = MI->getOperand(0).getIndex(); @@ -273,6 +275,16 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { + unsigned numSubRegs = 0; + unsigned movOpc = 0; + const unsigned *subRegIdx = 0; + + const unsigned DFP_FP_SubRegsIdx[] = { SP::sub_even, SP::sub_odd }; + const unsigned QFP_DFP_SubRegsIdx[] = { SP::sub_even64, SP::sub_odd64 }; + const unsigned QFP_FP_SubRegsIdx[] = { SP::sub_even, SP::sub_odd, + SP::sub_odd64_then_sub_even, + SP::sub_odd64_then_sub_odd }; + if (SP::IntRegsRegClass.contains(DestReg, SrcReg)) BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0) .addReg(SrcReg, getKillRegState(KillSrc)); @@ -285,23 +297,47 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addReg(SrcReg, getKillRegState(KillSrc)); } else { // Use two FMOVS instructions. - const TargetRegisterInfo *TRI = &getRegisterInfo(); - MachineInstr *MovMI = 0; - unsigned subRegIdx[] = {SP::sub_even, SP::sub_odd}; - for (unsigned i = 0; i != 2; ++i) { - unsigned Dst = TRI->getSubReg(DestReg, subRegIdx[i]); - unsigned Src = TRI->getSubReg(SrcReg, subRegIdx[i]); - assert(Dst && Src && "Bad sub-register"); - - MovMI = BuildMI(MBB, I, DL, get(SP::FMOVS), Dst).addReg(Src); + subRegIdx = DFP_FP_SubRegsIdx; + numSubRegs = 2; + movOpc = SP::FMOVS; + } + } else if (SP::QFPRegsRegClass.contains(DestReg, SrcReg)) { + if (Subtarget.isV9()) { + if (Subtarget.hasHardQuad()) { + BuildMI(MBB, I, DL, get(SP::FMOVQ), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } else { + // Use two FMOVD instructions. + subRegIdx = QFP_DFP_SubRegsIdx; + numSubRegs = 2; + movOpc = SP::FMOVD; } - // Add implicit super-register defs and kills to the last MovMI. - MovMI->addRegisterDefined(DestReg, TRI); - if (KillSrc) - MovMI->addRegisterKilled(SrcReg, TRI); + } else { + // Use four FMOVS instructions. + subRegIdx = QFP_FP_SubRegsIdx; + numSubRegs = 4; + movOpc = SP::FMOVS; } } else llvm_unreachable("Impossible reg-to-reg copy"); + + if (numSubRegs == 0 || subRegIdx == 0 || movOpc == 0) + return; + + const TargetRegisterInfo *TRI = &getRegisterInfo(); + MachineInstr *MovMI = 0; + + for (unsigned i = 0; i != numSubRegs; ++i) { + unsigned Dst = TRI->getSubReg(DestReg, subRegIdx[i]); + unsigned Src = TRI->getSubReg(SrcReg, subRegIdx[i]); + assert(Dst && Src && "Bad sub-register"); + + MovMI = BuildMI(MBB, I, DL, get(movOpc), Dst).addReg(Src); + } + // Add implicit super-register defs and kills to the last MovMI. + MovMI->addRegisterDefined(DestReg, TRI); + if (KillSrc) + MovMI->addRegisterKilled(SrcReg, TRI); } void SparcInstrInfo:: @@ -321,7 +357,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MFI.getObjectAlignment(FI)); // On the order of operands here: think "[FrameIdx + 0] = SrcReg". - if (RC == &SP::I64RegsRegClass) + if (RC == &SP::I64RegsRegClass) BuildMI(MBB, I, DL, get(SP::STXri)).addFrameIndex(FI).addImm(0) .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); else if (RC == &SP::IntRegsRegClass) @@ -330,9 +366,14 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, else if (RC == &SP::FPRegsRegClass) BuildMI(MBB, I, DL, get(SP::STFri)).addFrameIndex(FI).addImm(0) .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); - else if (RC == &SP::DFPRegsRegClass) + else if (SP::DFPRegsRegClass.hasSubClassEq(RC)) BuildMI(MBB, I, DL, get(SP::STDFri)).addFrameIndex(FI).addImm(0) .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); + else if (SP::QFPRegsRegClass.hasSubClassEq(RC)) + // Use STQFri irrespective of its legality. If STQ is not legal, it will be + // lowered into two STDs in eliminateFrameIndex. + BuildMI(MBB, I, DL, get(SP::STQFri)).addFrameIndex(FI).addImm(0) + .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); else llvm_unreachable("Can't store this register to stack slot"); } @@ -362,9 +403,14 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, else if (RC == &SP::FPRegsRegClass) BuildMI(MBB, I, DL, get(SP::LDFri), DestReg).addFrameIndex(FI).addImm(0) .addMemOperand(MMO); - else if (RC == &SP::DFPRegsRegClass) + else if (SP::DFPRegsRegClass.hasSubClassEq(RC)) BuildMI(MBB, I, DL, get(SP::LDDFri), DestReg).addFrameIndex(FI).addImm(0) .addMemOperand(MMO); + else if (SP::QFPRegsRegClass.hasSubClassEq(RC)) + // Use LDQFri irrespective of its legality. If LDQ is not legal, it will be + // lowered into two LDDs in eliminateFrameIndex. + BuildMI(MBB, I, DL, get(SP::LDQFri), DestReg).addFrameIndex(FI).addImm(0) + .addMemOperand(MMO); else llvm_unreachable("Can't load this register from stack slot"); } diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index d940ae6..09f52fb 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -87,6 +87,35 @@ SparcRegisterInfo::getPointerRegClass(const MachineFunction &MF, return Subtarget.is64Bit() ? &SP::I64RegsRegClass : &SP::IntRegsRegClass; } +static void replaceFI(MachineFunction &MF, + MachineBasicBlock::iterator II, + MachineInstr &MI, + DebugLoc dl, + unsigned FIOperandNum, int Offset, + unsigned FramePtr) +{ + // Replace frame index with a frame pointer reference. + if (Offset >= -4096 && Offset <= 4095) { + // If the offset is small enough to fit in the immediate field, directly + // encode it. + MI.getOperand(FIOperandNum).ChangeToRegister(FramePtr, false); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); + } else { + // Otherwise, emit a G1 = SETHI %hi(offset). FIXME: it would be better to + // scavenge a register here instead of reserving G1 all of the time. + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + unsigned OffHi = (unsigned)Offset >> 10U; + BuildMI(*MI.getParent(), II, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi); + // Emit G1 = G1 + I6 + BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1) + .addReg(FramePtr); + // Insert: G1+%lo(offset) into the user. + MI.getOperand(FIOperandNum).ChangeToRegister(SP::G1, false); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset & ((1 << 10)-1)); + } +} + + void SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, @@ -111,25 +140,37 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset += (stackSize) ? Subtarget.getAdjustedFrameSize(stackSize) : 0 ; } - // Replace frame index with a frame pointer reference. - if (Offset >= -4096 && Offset <= 4095) { - // If the offset is small enough to fit in the immediate field, directly - // encode it. - MI.getOperand(FIOperandNum).ChangeToRegister(FramePtr, false); - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); - } else { - // Otherwise, emit a G1 = SETHI %hi(offset). FIXME: it would be better to - // scavenge a register here instead of reserving G1 all of the time. - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - unsigned OffHi = (unsigned)Offset >> 10U; - BuildMI(*MI.getParent(), II, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi); - // Emit G1 = G1 + I6 - BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1) - .addReg(FramePtr); - // Insert: G1+%lo(offset) into the user. - MI.getOperand(FIOperandNum).ChangeToRegister(SP::G1, false); - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset & ((1 << 10)-1)); + if (!Subtarget.isV9() || !Subtarget.hasHardQuad()) { + if (MI.getOpcode() == SP::STQFri) { + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + unsigned SrcReg = MI.getOperand(2).getReg(); + unsigned SrcEvenReg = getSubReg(SrcReg, SP::sub_even64); + unsigned SrcOddReg = getSubReg(SrcReg, SP::sub_odd64); + MachineInstr *StMI = + BuildMI(*MI.getParent(), II, dl, TII.get(SP::STDFri)) + .addReg(FramePtr).addImm(0).addReg(SrcEvenReg); + replaceFI(MF, II, *StMI, dl, 0, Offset, FramePtr); + MI.setDesc(TII.get(SP::STDFri)); + MI.getOperand(2).setReg(SrcOddReg); + Offset += 8; + } else if (MI.getOpcode() == SP::LDQFri) { + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + unsigned DestReg = MI.getOperand(0).getReg(); + unsigned DestEvenReg = getSubReg(DestReg, SP::sub_even64); + unsigned DestOddReg = getSubReg(DestReg, SP::sub_odd64); + MachineInstr *StMI = + BuildMI(*MI.getParent(), II, dl, TII.get(SP::LDDFri), DestEvenReg) + .addReg(FramePtr).addImm(0); + replaceFI(MF, II, *StMI, dl, 1, Offset, FramePtr); + + MI.setDesc(TII.get(SP::LDDFri)); + MI.getOperand(0).setReg(DestOddReg); + Offset += 8; + } } + + replaceFI(MF, II, MI, dl, FIOperandNum, Offset, FramePtr); + } unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const { -- cgit v1.1 From 4ccfbedaedc35920dc437aec501ec2bc4f8a4a2a Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 3 Sep 2013 03:56:17 +0000 Subject: Add hadSideEffects=0 to some instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189779 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrControl.td | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td index 0e69651..e4ccc06 100644 --- a/lib/Target/X86/X86InstrControl.td +++ b/lib/Target/X86/X86InstrControl.td @@ -49,10 +49,12 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, let isBarrier = 1, isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in { def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst), "jmp\t$dst", [(br bb:$dst)], IIC_JMP_REL>; + let hasSideEffects = 0 in def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst), "jmp\t$dst", [], IIC_JMP_REL>; // FIXME : Intel syntax for JMP64pcrel32 such that it is not ambiguious // with JMP_1. + let hasSideEffects = 0 in def JMP64pcrel32 : I<0xE9, RawFrm, (outs), (ins brtarget:$dst), "jmpq\t$dst", [], IIC_JMP_REL>; } @@ -60,6 +62,7 @@ let isBarrier = 1, isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in { // Conditional Branches. let isBranch = 1, isTerminator = 1, Uses = [EFLAGS], SchedRW = [WriteJump] in { multiclass ICBr opc1, bits<8> opc4, string asm, PatFrag Cond> { + let hasSideEffects = 0 in def _1 : Ii8PCRel ; def _4 : Ii32PCRel; defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>; // jcx/jecx/jrcx instructions. -let isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in { +let isBranch = 1, isTerminator = 1, hasSideEffects = 0, SchedRW = [WriteJump] in { // These are the 32-bit versions of this instruction for the asmparser. In // 32-bit mode, the address size prefix is jcxz and the unprefixed version is // jecxz. -- cgit v1.1 From 75ddb2bb34f96c2bda48d0e86331fb52b55b8d03 Mon Sep 17 00:00:00 2001 From: Venkatraman Govindaraju Date: Tue, 3 Sep 2013 04:11:59 +0000 Subject: [Sparc] Add support for soft long double (fp128). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189780 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Sparc/SparcISelLowering.cpp | 414 +++++++++++++++++++++++++++++++-- lib/Target/Sparc/SparcISelLowering.h | 17 ++ lib/Target/Sparc/SparcInstrInfo.td | 9 + 3 files changed, 422 insertions(+), 18 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 654a03a..3250537 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -915,6 +915,22 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, return Chain; } +// This functions returns true if CalleeName is a ABI function that returns +// a long double (fp128). +static bool isFP128ABICall(const char *CalleeName) +{ + static const char *const ABICalls[] = + { "_Q_add", "_Q_sub", "_Q_mul", "_Q_div", + "_Q_sqrt", "_Q_neg", + "_Q_itoq", "_Q_stoq", "_Q_dtoq", "_Q_utoq", + 0 + }; + for (const char * const *I = ABICalls; I != 0; ++I) + if (strcmp(CalleeName, *I) == 0) + return true; + return false; +} + unsigned SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const { @@ -925,7 +941,10 @@ SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const dyn_cast(Callee)) { const Function *Fn = DAG.getMachineFunction().getFunction(); const Module *M = Fn->getParent(); - CalleeFn = M->getFunction(E->getSymbol()); + const char *CalleeName = E->getSymbol(); + CalleeFn = M->getFunction(CalleeName); + if (!CalleeFn && isFP128ABICall(CalleeName)) + return 16; // Return sizeof(fp128) } if (!CalleeFn) @@ -1428,6 +1447,47 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setOperationAction(ISD::FNEG, MVT::f128, Custom); setOperationAction(ISD::FABS, MVT::f128, Custom); } + } else { + // Custom legalize f128 operations. + + setOperationAction(ISD::FADD, MVT::f128, Custom); + setOperationAction(ISD::FSUB, MVT::f128, Custom); + setOperationAction(ISD::FMUL, MVT::f128, Custom); + setOperationAction(ISD::FDIV, MVT::f128, Custom); + setOperationAction(ISD::FSQRT, MVT::f128, Custom); + setOperationAction(ISD::FNEG, MVT::f128, Custom); + setOperationAction(ISD::FABS, MVT::f128, Custom); + + setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); + setOperationAction(ISD::FP_ROUND, MVT::f64, Custom); + setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); + + // Setup Runtime library names. + if (Subtarget->is64Bit()) { + setLibcallName(RTLIB::ADD_F128, "_Qp_add"); + setLibcallName(RTLIB::SUB_F128, "_Qp_sub"); + setLibcallName(RTLIB::MUL_F128, "_Qp_mul"); + setLibcallName(RTLIB::DIV_F128, "_Qp_div"); + setLibcallName(RTLIB::SQRT_F128, "_Qp_sqrt"); + setLibcallName(RTLIB::FPTOSINT_F128_I32, "_Qp_qtoi"); + setLibcallName(RTLIB::SINTTOFP_I32_F128, "_Qp_itoq"); + setLibcallName(RTLIB::FPEXT_F32_F128, "_Qp_stoq"); + setLibcallName(RTLIB::FPEXT_F64_F128, "_Qp_dtoq"); + setLibcallName(RTLIB::FPROUND_F128_F32, "_Qp_qtos"); + setLibcallName(RTLIB::FPROUND_F128_F64, "_Qp_qtod"); + } else { + setLibcallName(RTLIB::ADD_F128, "_Q_add"); + setLibcallName(RTLIB::SUB_F128, "_Q_sub"); + setLibcallName(RTLIB::MUL_F128, "_Q_mul"); + setLibcallName(RTLIB::DIV_F128, "_Q_div"); + setLibcallName(RTLIB::SQRT_F128, "_Q_sqrt"); + setLibcallName(RTLIB::FPTOSINT_F128_I32, "_Q_qtoi"); + setLibcallName(RTLIB::SINTTOFP_I32_F128, "_Q_itoq"); + setLibcallName(RTLIB::FPEXT_F32_F128, "_Q_stoq"); + setLibcallName(RTLIB::FPEXT_F64_F128, "_Q_dtoq"); + setLibcallName(RTLIB::FPROUND_F128_F32, "_Q_qtos"); + setLibcallName(RTLIB::FPROUND_F128_F64, "_Q_qtod"); + } } setMinFunctionAlignment(2); @@ -1605,23 +1665,264 @@ SDValue SparcTargetLowering::LowerBlockAddress(SDValue Op, return makeAddress(Op, DAG); } -static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { +SDValue +SparcTargetLowering::LowerF128_LibCallArg(SDValue Chain, ArgListTy &Args, + SDValue Arg, SDLoc DL, + SelectionDAG &DAG) const { + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + EVT ArgVT = Arg.getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + + ArgListEntry Entry; + Entry.Node = Arg; + Entry.Ty = ArgTy; + + if (ArgTy->isFP128Ty()) { + // Create a stack object and pass the pointer to the library function. + int FI = MFI->CreateStackObject(16, 8, false); + SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy()); + Chain = DAG.getStore(Chain, + DL, + Entry.Node, + FIPtr, + MachinePointerInfo(), + false, + false, + 8); + + Entry.Node = FIPtr; + Entry.Ty = PointerType::getUnqual(ArgTy); + } + Args.push_back(Entry); + return Chain; +} + +SDValue +SparcTargetLowering::LowerF128Op(SDValue Op, SelectionDAG &DAG, + const char *LibFuncName, + unsigned numArgs) const { + + ArgListTy Args; + + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + + SDValue Callee = DAG.getExternalSymbol(LibFuncName, getPointerTy()); + Type *RetTy = Op.getValueType().getTypeForEVT(*DAG.getContext()); + Type *RetTyABI = RetTy; + SDValue Chain = DAG.getEntryNode(); + SDValue RetPtr; + + if (RetTy->isFP128Ty()) { + // Create a Stack Object to receive the return value of type f128. + ArgListEntry Entry; + int RetFI = MFI->CreateStackObject(16, 8, false); + RetPtr = DAG.getFrameIndex(RetFI, getPointerTy()); + Entry.Node = RetPtr; + Entry.Ty = PointerType::getUnqual(RetTy); + if (!Subtarget->is64Bit()) + Entry.isSRet = true; + Entry.isReturned = false; + Args.push_back(Entry); + RetTyABI = Type::getVoidTy(*DAG.getContext()); + } + + assert(Op->getNumOperands() >= numArgs && "Not enough operands!"); + for (unsigned i = 0, e = numArgs; i != e; ++i) { + Chain = LowerF128_LibCallArg(Chain, Args, Op.getOperand(i), SDLoc(Op), DAG); + } + TargetLowering:: + CallLoweringInfo CLI(Chain, + RetTyABI, + false, false, false, false, + 0, CallingConv::C, + false, false, true, + Callee, Args, DAG, SDLoc(Op)); + std::pair CallInfo = LowerCallTo(CLI); + + // chain is in second result. + if (RetTyABI == RetTy) + return CallInfo.first; + + assert (RetTy->isFP128Ty() && "Unexpected return type!"); + + Chain = CallInfo.second; + + // Load RetPtr to get the return value. + return DAG.getLoad(Op.getValueType(), + SDLoc(Op), + Chain, + RetPtr, + MachinePointerInfo(), + false, false, false, 8); +} + +SDValue +SparcTargetLowering::LowerF128Compare(SDValue LHS, SDValue RHS, + unsigned &SPCC, + SDLoc DL, + SelectionDAG &DAG) const { + + const char *LibCall = 0; + bool is64Bit = Subtarget->is64Bit(); + switch(SPCC) { + default: llvm_unreachable("Unhandled conditional code!"); + case SPCC::FCC_E : LibCall = is64Bit? "_Qp_feq" : "_Q_feq"; break; + case SPCC::FCC_NE : LibCall = is64Bit? "_Qp_fne" : "_Q_fne"; break; + case SPCC::FCC_L : LibCall = is64Bit? "_Qp_flt" : "_Q_flt"; break; + case SPCC::FCC_G : LibCall = is64Bit? "_Qp_fgt" : "_Q_fgt"; break; + case SPCC::FCC_LE : LibCall = is64Bit? "_Qp_fle" : "_Q_fle"; break; + case SPCC::FCC_GE : LibCall = is64Bit? "_Qp_fge" : "_Q_fge"; break; + case SPCC::FCC_UL : + case SPCC::FCC_ULE: + case SPCC::FCC_UG : + case SPCC::FCC_UGE: + case SPCC::FCC_U : + case SPCC::FCC_O : + case SPCC::FCC_LG : + case SPCC::FCC_UE : LibCall = is64Bit? "_Qp_cmp" : "_Q_cmp"; break; + } + + SDValue Callee = DAG.getExternalSymbol(LibCall, getPointerTy()); + Type *RetTy = Type::getInt32Ty(*DAG.getContext()); + ArgListTy Args; + SDValue Chain = DAG.getEntryNode(); + Chain = LowerF128_LibCallArg(Chain, Args, LHS, DL, DAG); + Chain = LowerF128_LibCallArg(Chain, Args, RHS, DL, DAG); + + TargetLowering:: + CallLoweringInfo CLI(Chain, + RetTy, + false, false, false, false, + 0, CallingConv::C, + false, false, true, + Callee, Args, DAG, DL); + + std::pair CallInfo = LowerCallTo(CLI); + + // result is in first, and chain is in second result. + SDValue Result = CallInfo.first; + + switch(SPCC) { + default: { + SDValue RHS = DAG.getTargetConstant(0, Result.getValueType()); + SPCC = SPCC::ICC_NE; + return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS); + } + case SPCC::FCC_UL : { + SDValue Mask = DAG.getTargetConstant(1, Result.getValueType()); + Result = DAG.getNode(ISD::AND, DL, Result.getValueType(), Result, Mask); + SDValue RHS = DAG.getTargetConstant(0, Result.getValueType()); + SPCC = SPCC::ICC_NE; + return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS); + } + case SPCC::FCC_ULE: { + SDValue RHS = DAG.getTargetConstant(2, LHS.getValueType()); + SPCC = SPCC::ICC_NE; + return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS); + } + case SPCC::FCC_UG : { + SDValue RHS = DAG.getTargetConstant(1, Result.getValueType()); + SPCC = SPCC::ICC_G; + return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS); + } + case SPCC::FCC_UGE: { + SDValue RHS = DAG.getTargetConstant(1, Result.getValueType()); + SPCC = SPCC::ICC_NE; + return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS); + } + + case SPCC::FCC_U : { + SDValue RHS = DAG.getTargetConstant(3, Result.getValueType()); + SPCC = SPCC::ICC_E; + return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS); + } + case SPCC::FCC_O : { + SDValue RHS = DAG.getTargetConstant(3, Result.getValueType()); + SPCC = SPCC::ICC_NE; + return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS); + } + case SPCC::FCC_LG : { + SDValue Mask = DAG.getTargetConstant(3, Result.getValueType()); + Result = DAG.getNode(ISD::AND, DL, Result.getValueType(), Result, Mask); + SDValue RHS = DAG.getTargetConstant(0, Result.getValueType()); + SPCC = SPCC::ICC_NE; + return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS); + } + case SPCC::FCC_UE : { + SDValue Mask = DAG.getTargetConstant(3, Result.getValueType()); + Result = DAG.getNode(ISD::AND, DL, Result.getValueType(), Result, Mask); + SDValue RHS = DAG.getTargetConstant(0, Result.getValueType()); + SPCC = SPCC::ICC_E; + return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS); + } + } +} + +static SDValue +LowerF128_FPEXTEND(SDValue Op, SelectionDAG &DAG, + const SparcTargetLowering &TLI) { + + if (Op.getOperand(0).getValueType() == MVT::f64) + return TLI.LowerF128Op(Op, DAG, + TLI.getLibcallName(RTLIB::FPEXT_F64_F128), 1); + + if (Op.getOperand(0).getValueType() == MVT::f32) + return TLI.LowerF128Op(Op, DAG, + TLI.getLibcallName(RTLIB::FPEXT_F32_F128), 1); + + llvm_unreachable("fpextend with non-float operand!"); + return SDValue(0, 0); +} + +static SDValue +LowerF128_FPROUND(SDValue Op, SelectionDAG &DAG, + const SparcTargetLowering &TLI) { + // FP_ROUND on f64 and f32 are legal. + if (Op.getOperand(0).getValueType() != MVT::f128) + return Op; + + if (Op.getValueType() == MVT::f64) + return TLI.LowerF128Op(Op, DAG, + TLI.getLibcallName(RTLIB::FPROUND_F128_F64), 1); + if (Op.getValueType() == MVT::f32) + return TLI.LowerF128Op(Op, DAG, + TLI.getLibcallName(RTLIB::FPROUND_F128_F32), 1); + + llvm_unreachable("fpround to non-float!"); + return SDValue(0, 0); +} + +static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG, + const SparcTargetLowering &TLI, + bool hasHardQuad) { SDLoc dl(Op); // Convert the fp value to integer in an FP register. assert(Op.getValueType() == MVT::i32); + + if (Op.getOperand(0).getValueType() == MVT::f128 && !hasHardQuad) + return TLI.LowerF128Op(Op, DAG, + TLI.getLibcallName(RTLIB::FPTOSINT_F128_I32), 1); + Op = DAG.getNode(SPISD::FTOI, dl, MVT::f32, Op.getOperand(0)); return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); } -static SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { +static SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG, + const SparcTargetLowering &TLI, + bool hasHardQuad) { SDLoc dl(Op); assert(Op.getOperand(0).getValueType() == MVT::i32); SDValue Tmp = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0)); // Convert the int value to FP in an FP register. + if (Op.getValueType() == MVT::f128 && hasHardQuad) + return TLI.LowerF128Op(Op, DAG, + TLI.getLibcallName(RTLIB::SINTTOFP_I32_F128), 1); return DAG.getNode(SPISD::ITOF, dl, Op.getValueType(), Tmp); } -static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) { +static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG, + const SparcTargetLowering &TLI, + bool hasHardQuad) { SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast(Op.getOperand(1))->get(); SDValue LHS = Op.getOperand(2); @@ -1642,15 +1943,23 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) { // 32-bit compares use the icc flags, 64-bit uses the xcc flags. Opc = LHS.getValueType() == MVT::i32 ? SPISD::BRICC : SPISD::BRXCC; } else { - CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS); - if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC); - Opc = SPISD::BRFCC; + if (!hasHardQuad && LHS.getValueType() == MVT::f128) { + if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC); + CompareFlag = TLI.LowerF128Compare(LHS, RHS, SPCC, dl, DAG); + Opc = SPISD::BRICC; + } else { + CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS); + if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC); + Opc = SPISD::BRFCC; + } } return DAG.getNode(Opc, dl, MVT::Other, Chain, Dest, DAG.getConstant(SPCC, MVT::i32), CompareFlag); } -static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { +static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, + const SparcTargetLowering &TLI, + bool hasHardQuad) { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); ISD::CondCode CC = cast(Op.getOperand(4))->get(); @@ -1670,9 +1979,15 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { SPISD::SELECT_ICC : SPISD::SELECT_XCC; if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC); } else { - CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS); - Opc = SPISD::SELECT_FCC; - if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC); + if (!hasHardQuad && LHS.getValueType() == MVT::f128) { + if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC); + CompareFlag = TLI.LowerF128Compare(LHS, RHS, SPCC, dl, DAG); + Opc = SPISD::SELECT_ICC; + } else { + CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS); + Opc = SPISD::SELECT_FCC; + if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC); + } } return DAG.getNode(Opc, dl, TrueVal.getValueType(), TrueVal, FalseVal, DAG.getConstant(SPCC, MVT::i32), CompareFlag); @@ -1931,14 +2246,57 @@ static SDValue LowerF128Store(SDValue Op, SelectionDAG &DAG) { &OutChains[0], 2); } +static SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG, + const SparcTargetLowering &TLI, + bool is64Bit) { + if (Op.getValueType() == MVT::f64) + return LowerF64Op(Op, DAG); + if (Op.getValueType() == MVT::f128) + return TLI.LowerF128Op(Op, DAG, ((is64Bit) ? "_Qp_neg" : "_Q_neg"), 1); + return Op; +} + +static SDValue LowerFABS(SDValue Op, SelectionDAG &DAG, bool isV9) { + if (Op.getValueType() == MVT::f64) + return LowerF64Op(Op, DAG); + if (Op.getValueType() != MVT::f128) + return Op; + + // Lower fabs on f128 to fabs on f64 + // fabs f128 => fabs f64:sub_even64, fmov f64:sub_odd64 + + SDLoc dl(Op); + SDValue SrcReg128 = Op.getOperand(0); + SDValue Hi64 = DAG.getTargetExtractSubreg(SP::sub_even64, dl, MVT::f64, + SrcReg128); + SDValue Lo64 = DAG.getTargetExtractSubreg(SP::sub_odd64, dl, MVT::f64, + SrcReg128); + if (isV9) + Hi64 = DAG.getNode(Op.getOpcode(), dl, MVT::f64, Hi64); + else + Hi64 = LowerF64Op(Op, DAG); + + SDValue DstReg128 = SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, MVT::f128), 0); + DstReg128 = DAG.getTargetInsertSubreg(SP::sub_even64, dl, MVT::f128, + DstReg128, Hi64); + DstReg128 = DAG.getTargetInsertSubreg(SP::sub_odd64, dl, MVT::f128, + DstReg128, Lo64); + return DstReg128; +} + + + SDValue SparcTargetLowering:: LowerOperation(SDValue Op, SelectionDAG &DAG) const { + + bool hasHardQuad = Subtarget->hasHardQuad(); + bool is64Bit = Subtarget->is64Bit(); + bool isV9 = Subtarget->isV9(); + switch (Op.getOpcode()) { default: llvm_unreachable("Should not custom lower this!"); - case ISD::FNEG: - case ISD::FABS: return LowerF64Op(Op, DAG); - case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG, *this); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::GlobalTLSAddress: @@ -1946,16 +2304,34 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); - case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); - case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); - case ISD::BR_CC: return LowerBR_CC(Op, DAG); - case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG, *this, + hasHardQuad); + case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG, *this, + hasHardQuad); + case ISD::BR_CC: return LowerBR_CC(Op, DAG, *this, + hasHardQuad); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG, *this, + hasHardQuad); case ISD::VASTART: return LowerVASTART(Op, DAG, *this); case ISD::VAARG: return LowerVAARG(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::LOAD: return LowerF128Load(Op, DAG); case ISD::STORE: return LowerF128Store(Op, DAG); + case ISD::FADD: return LowerF128Op(Op, DAG, + getLibcallName(RTLIB::ADD_F128), 2); + case ISD::FSUB: return LowerF128Op(Op, DAG, + getLibcallName(RTLIB::SUB_F128), 2); + case ISD::FMUL: return LowerF128Op(Op, DAG, + getLibcallName(RTLIB::MUL_F128), 2); + case ISD::FDIV: return LowerF128Op(Op, DAG, + getLibcallName(RTLIB::DIV_F128), 2); + case ISD::FSQRT: return LowerF128Op(Op, DAG, + getLibcallName(RTLIB::SQRT_F128),1); + case ISD::FNEG: return LowerFNEG(Op, DAG, *this, is64Bit); + case ISD::FABS: return LowerFABS(Op, DAG, isV9); + case ISD::FP_EXTEND: return LowerF128_FPEXTEND(Op, DAG, *this); + case ISD::FP_ROUND: return LowerF128_FPROUND(Op, DAG, *this); } } @@ -1972,11 +2348,13 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case SP::SELECT_CC_Int_ICC: case SP::SELECT_CC_FP_ICC: case SP::SELECT_CC_DFP_ICC: + case SP::SELECT_CC_QFP_ICC: BROpcode = SP::BCOND; break; case SP::SELECT_CC_Int_FCC: case SP::SELECT_CC_FP_FCC: case SP::SELECT_CC_DFP_FCC: + case SP::SELECT_CC_QFP_FCC: BROpcode = SP::FBCOND; break; } diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index 261c25a..64c688d 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -127,6 +127,23 @@ namespace llvm { SDValue makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF, SelectionDAG &DAG) const; SDValue makeAddress(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerF128_LibCallArg(SDValue Chain, ArgListTy &Args, + SDValue Arg, SDLoc DL, + SelectionDAG &DAG) const; + SDValue LowerF128Op(SDValue Op, SelectionDAG &DAG, + const char *LibFuncName, + unsigned numArgs) const; + SDValue LowerF128Compare(SDValue LHS, SDValue RHS, + unsigned &SPCC, + SDLoc DL, + SelectionDAG &DAG) const; + + bool ShouldShrinkFPConstant(EVT VT) const { + // Do not shrink FP constpool if VT == MVT::f128. + // (ldd, call _Q_fdtoq) is more expensive than two ldds. + return VT != MVT::f128; + } }; } // end namespace llvm diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index a08d1cb..695be33 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -267,6 +267,11 @@ let Uses = [ICC], usesCustomInserter = 1 in { : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond), "; SELECT_CC_DFP_ICC PSEUDO!", [(set f64:$dst, (SPselecticc f64:$T, f64:$F, imm:$Cond))]>; + + def SELECT_CC_QFP_ICC + : Pseudo<(outs QFPRegs:$dst), (ins QFPRegs:$T, QFPRegs:$F, i32imm:$Cond), + "; SELECT_CC_QFP_ICC PSEUDO!", + [(set f128:$dst, (SPselecticc f128:$T, f128:$F, imm:$Cond))]>; } let usesCustomInserter = 1, Uses = [FCC] in { @@ -284,6 +289,10 @@ let usesCustomInserter = 1, Uses = [FCC] in { : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond), "; SELECT_CC_DFP_FCC PSEUDO!", [(set f64:$dst, (SPselectfcc f64:$T, f64:$F, imm:$Cond))]>; + def SELECT_CC_QFP_FCC + : Pseudo<(outs QFPRegs:$dst), (ins QFPRegs:$T, QFPRegs:$F, i32imm:$Cond), + "; SELECT_CC_QFP_FCC PSEUDO!", + [(set f128:$dst, (SPselectfcc f128:$T, f128:$F, imm:$Cond))]>; } -- cgit v1.1 From 8bce43648be1156fdced590beb81aed3915762f1 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 3 Sep 2013 15:38:35 +0000 Subject: [SystemZ] Add support for TMHH, TMHL, TMLH and TMLL For now this just handles simple comparisons of an ANDed value with zero. The CC value provides enough information to do any comparison for a 2-bit mask, and some nonzero comparisons with more populated masks, but that's all future work. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189819 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZ.h | 4 ++ lib/Target/SystemZ/SystemZISelLowering.cpp | 111 ++++++++++++++++++++++++++--- 2 files changed, 107 insertions(+), 8 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h index 051ba1d..1647d93 100644 --- a/lib/Target/SystemZ/SystemZ.h +++ b/lib/Target/SystemZ/SystemZ.h @@ -62,6 +62,10 @@ namespace llvm { const unsigned CCMASK_TM_MIXED_MSB_0 = CCMASK_1; const unsigned CCMASK_TM_MIXED_MSB_1 = CCMASK_2; const unsigned CCMASK_TM_ALL_1 = CCMASK_3; + const unsigned CCMASK_TM_SOME_0 = CCMASK_TM_ALL_1 ^ CCMASK_ANY; + const unsigned CCMASK_TM_SOME_1 = CCMASK_TM_ALL_0 ^ CCMASK_ANY; + const unsigned CCMASK_TM_MSB_0 = CCMASK_0 | CCMASK_1; + const unsigned CCMASK_TM_MSB_1 = CCMASK_2 | CCMASK_3; const unsigned CCMASK_TM = CCMASK_ANY; // Mask assignments for PFD. diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 1ab1ef4..13d5604 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1176,6 +1176,98 @@ static bool shouldSwapCmpOperands(SDValue Op0, SDValue Op1, return false; } +// Check whether the CC value produced by TEST UNDER MASK is descriptive +// enough to handle an AND with Mask followed by a comparison of type Opcode +// with CmpVal. CCMask says which comparison result is being tested and +// BitSize is the number of bits in the operands. Return the CC mask that +// should be used for the TEST UNDER MASK result, or 0 if the condition is +// too complex. +static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned Opcode, + unsigned CCMask, uint64_t Mask, + uint64_t CmpVal) { + assert(Mask != 0 && "ANDs with zero should have been removed by now"); + + // Work out the masks for the lowest and highest bits. + unsigned HighShift = 63 - countLeadingZeros(Mask); + uint64_t High = uint64_t(1) << HighShift; + uint64_t Low = uint64_t(1) << countTrailingZeros(Mask); + + // Signed ordered comparisons are effectively unsigned if the sign + // bit is dropped. + bool EffectivelyUnsigned = (Opcode == SystemZISD::UCMP + || HighShift < BitSize - 1); + + // Check for equality comparisons with 0, or the equivalent. + if (CmpVal == 0) { + if (CCMask == SystemZ::CCMASK_CMP_EQ) + return SystemZ::CCMASK_TM_ALL_0; + if (CCMask == SystemZ::CCMASK_CMP_NE) + return SystemZ::CCMASK_TM_SOME_1; + } + if (EffectivelyUnsigned && CmpVal <= Low) { + if (CCMask == SystemZ::CCMASK_CMP_LT) + return SystemZ::CCMASK_TM_ALL_0; + if (CCMask == SystemZ::CCMASK_CMP_GE) + return SystemZ::CCMASK_TM_SOME_1; + } + if (EffectivelyUnsigned && CmpVal < Low) { + if (CCMask == SystemZ::CCMASK_CMP_LE) + return SystemZ::CCMASK_TM_ALL_0; + if (CCMask == SystemZ::CCMASK_CMP_GT) + return SystemZ::CCMASK_TM_SOME_1; + } + + // Check for equality comparisons with the mask, or the equivalent. + if (CmpVal == Mask) { + if (CCMask == SystemZ::CCMASK_CMP_EQ) + return SystemZ::CCMASK_TM_ALL_1; + if (CCMask == SystemZ::CCMASK_CMP_NE) + return SystemZ::CCMASK_TM_SOME_0; + } + if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) { + if (CCMask == SystemZ::CCMASK_CMP_GT) + return SystemZ::CCMASK_TM_ALL_1; + if (CCMask == SystemZ::CCMASK_CMP_LE) + return SystemZ::CCMASK_TM_SOME_0; + } + if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) { + if (CCMask == SystemZ::CCMASK_CMP_GE) + return SystemZ::CCMASK_TM_ALL_1; + if (CCMask == SystemZ::CCMASK_CMP_LT) + return SystemZ::CCMASK_TM_SOME_0; + } + + // Check for ordered comparisons with the top bit. + if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) { + if (CCMask == SystemZ::CCMASK_CMP_LE) + return SystemZ::CCMASK_TM_MSB_0; + if (CCMask == SystemZ::CCMASK_CMP_GT) + return SystemZ::CCMASK_TM_MSB_1; + } + if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) { + if (CCMask == SystemZ::CCMASK_CMP_LT) + return SystemZ::CCMASK_TM_MSB_0; + if (CCMask == SystemZ::CCMASK_CMP_GE) + return SystemZ::CCMASK_TM_MSB_1; + } + + // If there are just two bits, we can do equality checks for Low and High + // as well. + if (Mask == Low + High) { + if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low) + return SystemZ::CCMASK_TM_MIXED_MSB_0; + if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low) + return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY; + if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High) + return SystemZ::CCMASK_TM_MIXED_MSB_1; + if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High) + return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY; + } + + // Looks like we've exhausted our options. + return 0; +} + // See whether the comparison (Opcode CmpOp0, CmpOp1) can be implemented // as a TEST UNDER MASK instruction when the condition being tested is // as described by CCValid and CCMask. Update the arguments with the @@ -1183,12 +1275,9 @@ static bool shouldSwapCmpOperands(SDValue Op0, SDValue Op1, static void adjustForTestUnderMask(unsigned &Opcode, SDValue &CmpOp0, SDValue &CmpOp1, unsigned &CCValid, unsigned &CCMask) { - // For now we just handle equality and inequality with zero. - if (CCMask != SystemZ::CCMASK_CMP_EQ && - (CCMask ^ CCValid) != SystemZ::CCMASK_CMP_EQ) - return; + // Check that we have a comparison with a constant. ConstantSDNode *ConstCmpOp1 = dyn_cast(CmpOp1); - if (!ConstCmpOp1 || ConstCmpOp1->getZExtValue() != 0) + if (!ConstCmpOp1) return; // Check whether the nonconstant input is an AND with a constant mask. @@ -1206,14 +1295,20 @@ static void adjustForTestUnderMask(unsigned &Opcode, SDValue &CmpOp0, !SystemZ::isImmHL(MaskVal) && !SystemZ::isImmHH(MaskVal)) return; + // Check whether the combination of mask, comparison value and comparison + // type are suitable. + unsigned BitSize = CmpOp0.getValueType().getSizeInBits(); + unsigned NewCCMask = getTestUnderMaskCond(BitSize, Opcode, CCMask, MaskVal, + ConstCmpOp1->getZExtValue()); + if (!NewCCMask) + return; + // Go ahead and make the change. Opcode = SystemZISD::TM; CmpOp0 = AndOp0; CmpOp1 = AndOp1; CCValid = SystemZ::CCMASK_TM; - CCMask = (CCMask == SystemZ::CCMASK_CMP_EQ ? - SystemZ::CCMASK_TM_ALL_0 : - SystemZ::CCMASK_TM_ALL_0 ^ CCValid); + CCMask = NewCCMask; } // Return a target node that compares CmpOp0 with CmpOp1 and stores a -- cgit v1.1 From b81d7a89129ff1621fa609144b400df3500542d6 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Tue, 3 Sep 2013 20:08:17 +0000 Subject: Revert "Revert "ARM: Improve pattern for isel mul of vector by scalar."" This reverts commit r189648. Fixes for the previously failing clang-side arm_neon_intrinsics test cases will be checked in separately. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189841 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 49ae334..f1bd37e 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -4022,6 +4022,17 @@ def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; + +def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), + (VMULslfd DPR:$Rn, + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), + (i32 0))>; +def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), + (VMULslfq QPR:$Rn, + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), + (i32 0))>; + + // VQDMULH : Vector Saturating Doubling Multiply Returning High Half defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, IIC_VMULi32Q, -- cgit v1.1 From b9daaa02a24afb2027dc999258dcdb6ec4fbbc9a Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 3 Sep 2013 20:59:07 +0000 Subject: WIP: Refactor some code so that it can be called by more than just one method. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189849 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 131 +++++++++++++++++++----------------- 1 file changed, 71 insertions(+), 60 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index ecf7b73..f44a893 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -483,13 +483,78 @@ encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS], return RegEnc; } +static uint32_t +doCompactUnwindEncoding(unsigned SavedRegs[CU_NUM_SAVED_REGS], + unsigned StackSize, unsigned StackAdjust, + unsigned SubtractInstrIdx, unsigned SavedRegIdx, + bool Is64Bit, bool HasFP) { + // Encode that we are using EBP/RBP as the frame pointer. + unsigned StackDivide = (Is64Bit ? 8 : 4); + uint32_t CompactUnwindEncoding = 0; + + StackAdjust /= StackDivide; + + if (HasFP) { + if ((StackAdjust & 0xFF) != StackAdjust) + // Offset was too big for compact encoding. + return CU::UNWIND_MODE_DWARF; + + // Get the encoding of the saved registers when we have a frame pointer. + uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(SavedRegs, Is64Bit); + if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; + + CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME; + CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16; + CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS; + } else { + ++StackAdjust; + uint32_t TotalStackSize = StackAdjust + StackSize; + if ((TotalStackSize & 0xFF) == TotalStackSize) { + // Frameless stack with a small stack size. + CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD; + + // Encode the stack size. + CompactUnwindEncoding |= (TotalStackSize & 0xFF) << 16; + } else { + if ((StackAdjust & 0x7) != StackAdjust) + // The extra stack adjustments are too big for us to handle. + return CU::UNWIND_MODE_DWARF; + + // Frameless stack with an offset too large for us to encode compactly. + CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND; + + // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP' + // instruction. + CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16; + + // Encode any extra stack stack adjustments (done via push instructions). + CompactUnwindEncoding |= (StackAdjust & 0x7) << 13; + } + + // Encode the number of registers saved. + CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10; + + // Get the encoding of the saved registers when we don't have a frame + // pointer. + uint32_t RegEnc = + encodeCompactUnwindRegistersWithoutFrame(SavedRegs, SavedRegIdx, + Is64Bit); + if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; + + // Encode the register encoding. + CompactUnwindEncoding |= + RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION; + } + + return CompactUnwindEncoding; +} + uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); unsigned FramePtr = RegInfo->getFrameRegister(MF); unsigned StackPtr = RegInfo->getStackRegister(); bool Is64Bit = STI.is64Bit(); - bool HasFP = hasFP(MF); unsigned SavedRegs[CU_NUM_SAVED_REGS] = { 0, 0, 0, 0, 0, 0 }; unsigned SavedRegIdx = 0; @@ -508,10 +573,9 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { unsigned StackAdjust = 0; unsigned StackSize = 0; - MachineBasicBlock &MBB = MF.front(); // Prologue is in entry BB. bool ExpectEnd = false; - for (MachineBasicBlock::iterator - MBBI = MBB.begin(), MBBE = MBB.end(); MBBI != MBBE; ++MBBI) { + for (MachineBasicBlock::iterator MBBI = MF.front().begin(), + MBBE = MF.front().end(); MBBI != MBBE; ++MBBI) { MachineInstr &MI = *MBBI; unsigned Opc = MI.getOpcode(); if (Opc == X86::PROLOG_LABEL) continue; @@ -564,62 +628,9 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { } } - // Encode that we are using EBP/RBP as the frame pointer. - uint32_t CompactUnwindEncoding = 0; - StackAdjust /= StackDivide; - if (HasFP) { - if ((StackAdjust & 0xFF) != StackAdjust) - // Offset was too big for compact encoding. - return CU::UNWIND_MODE_DWARF; - - // Get the encoding of the saved registers when we have a frame pointer. - uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(SavedRegs, Is64Bit); - if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; - - CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME; - CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16; - CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS; - } else { - ++StackAdjust; - uint32_t TotalStackSize = StackAdjust + StackSize; - if ((TotalStackSize & 0xFF) == TotalStackSize) { - // Frameless stack with a small stack size. - CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD; - - // Encode the stack size. - CompactUnwindEncoding |= (TotalStackSize & 0xFF) << 16; - } else { - if ((StackAdjust & 0x7) != StackAdjust) - // The extra stack adjustments are too big for us to handle. - return CU::UNWIND_MODE_DWARF; - - // Frameless stack with an offset too large for us to encode compactly. - CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND; - - // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP' - // instruction. - CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16; - - // Encode any extra stack stack adjustments (done via push instructions). - CompactUnwindEncoding |= (StackAdjust & 0x7) << 13; - } - - // Encode the number of registers saved. - CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10; - - // Get the encoding of the saved registers when we don't have a frame - // pointer. - uint32_t RegEnc = - encodeCompactUnwindRegistersWithoutFrame(SavedRegs, SavedRegIdx, - Is64Bit); - if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; - - // Encode the register encoding. - CompactUnwindEncoding |= - RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION; - } - - return CompactUnwindEncoding; + return doCompactUnwindEncoding(SavedRegs, StackSize, StackAdjust, + SubtractInstrIdx, SavedRegIdx, + Is64Bit, hasFP(MF)); } /// usesTheStack - This function checks if any of the users of EFLAGS -- cgit v1.1 From 36b00f394fb03bd276d9343592bd718ddf4be789 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Tue, 3 Sep 2013 23:02:00 +0000 Subject: X86: Mark non-crashing report_fatal_errors() as such. Previously, the clang crash handling code would kick in and give a crash report for these, even though they're not that sort of error. rdar://14882264 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189878 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../X86/MCTargetDesc/X86MachObjectWriter.cpp | 32 +++++++++++++--------- 1 file changed, 19 insertions(+), 13 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index 6ee47c7..b6f10ba 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -154,12 +154,13 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, // Neither symbol can be modified. if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None || Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) - report_fatal_error("unsupported relocation of modified symbol"); + report_fatal_error("unsupported relocation of modified symbol", false); // We don't support PCrel relocations of differences. Darwin 'as' doesn't // implement most of these correctly. if (IsPCRel) - report_fatal_error("unsupported pc-relative relocation of difference"); + report_fatal_error("unsupported pc-relative relocation of difference", + false); // The support for the situation where one or both of the symbols would // require a local relocation is handled just like if the symbols were @@ -172,12 +173,13 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, // single SIGNED relocation); reject it for now. Except the case where both // symbols don't have a base, equal but both NULL. if (A_Base == B_Base && A_Base) - report_fatal_error("unsupported relocation with identical base"); + report_fatal_error("unsupported relocation with identical base", false); // A subtraction expression where both symbols are undefined is a // non-relocatable expression. if (A->isUndefined() && B->isUndefined()) - report_fatal_error("unsupported relocation with subtraction expression"); + report_fatal_error("unsupported relocation with subtraction expression", + false); Value += Writer->getSymbolAddress(&A_SD, Layout) - (A_Base == NULL ? 0 : Writer->getSymbolAddress(A_Base, Layout)); @@ -256,11 +258,11 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, return; } else { report_fatal_error("unsupported relocation of variable '" + - Symbol->getName() + "'"); + Symbol->getName() + "'", false); } } else { report_fatal_error("unsupported relocation of undefined symbol '" + - Symbol->getName() + "'"); + Symbol->getName() + "'", false); } MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind(); @@ -277,7 +279,8 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { Type = MachO::X86_64_RELOC_TLV; } else if (Modifier != MCSymbolRefExpr::VK_None) { - report_fatal_error("unsupported symbol modifier in relocation"); + report_fatal_error("unsupported symbol modifier in relocation", + false); } else { Type = MachO::X86_64_RELOC_SIGNED; @@ -304,7 +307,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, } else { if (Modifier != MCSymbolRefExpr::VK_None) report_fatal_error("unsupported symbol modifier in branch " - "relocation"); + "relocation", false); Type = MachO::X86_64_RELOC_BRANCH; } @@ -319,15 +322,16 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, Type = MachO::X86_64_RELOC_GOT; IsPCRel = 1; } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { - report_fatal_error("TLVP symbol modifier should have been rip-rel"); + report_fatal_error("TLVP symbol modifier should have been rip-rel", + false); } else if (Modifier != MCSymbolRefExpr::VK_None) - report_fatal_error("unsupported symbol modifier in relocation"); + report_fatal_error("unsupported symbol modifier in relocation", false); else { Type = MachO::X86_64_RELOC_UNSIGNED; unsigned Kind = Fixup.getKind(); if (Kind == X86::reloc_signed_4byte) report_fatal_error("32-bit absolute addressing is not supported in " - "64-bit mode"); + "64-bit mode", false); } } } @@ -364,7 +368,8 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, if (!A_SD->getFragment()) report_fatal_error("symbol '" + A->getName() + - "' can not be undefined in a subtraction expression"); + "' can not be undefined in a subtraction expression", + false); uint32_t Value = Writer->getSymbolAddress(A_SD, Layout); uint64_t SecAddr = Writer->getSectionAddress(A_SD->getFragment()->getParent()); @@ -376,7 +381,8 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, if (!B_SD->getFragment()) report_fatal_error("symbol '" + B->getSymbol().getName() + - "' can not be undefined in a subtraction expression"); + "' can not be undefined in a subtraction expression", + false); // Select the appropriate difference relocation type. // -- cgit v1.1 From 30955e9afb6548c79e426e5971e06bdfd3799ee0 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Wed, 4 Sep 2013 04:19:01 +0000 Subject: Use llvm::next() instead of incrementing begin iterators of std::vector. Iterator of std::vector may be implemented as a raw pointer. In this case begin iterators are rvalues and cannot be incremented. For example, this is the case with STDCXX implementation of vector. Patch by Konstantin Tokarev . git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189911 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDILCFGStructurizer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp index 687eadb..481b5a7 100644 --- a/lib/Target/R600/AMDILCFGStructurizer.cpp +++ b/lib/Target/R600/AMDILCFGStructurizer.cpp @@ -1233,7 +1233,7 @@ int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB, numClonedBlock += Num; Num += serialPatternMatch(*HeadMBB->succ_begin()); - Num += serialPatternMatch(*(++HeadMBB->succ_begin())); + Num += serialPatternMatch(*next(HeadMBB->succ_begin())); Num += ifPatternMatch(HeadMBB); assert(Num > 0); @@ -1713,7 +1713,7 @@ void AMDGPUCFGStructurizer::removeRedundantConditionalBranch( if (MBB->succ_size() != 2) return; MachineBasicBlock *MBB1 = *MBB->succ_begin(); - MachineBasicBlock *MBB2 = *(++MBB->succ_begin()); + MachineBasicBlock *MBB2 = *next(MBB->succ_begin()); if (MBB1 != MBB2) return; -- cgit v1.1 From c15c64fbebee11d6aeb12f17ac36473ae8397e77 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Wed, 4 Sep 2013 04:26:09 +0000 Subject: Add llvm namespace to llvm::next. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189912 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDILCFGStructurizer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp index 481b5a7..80190c9 100644 --- a/lib/Target/R600/AMDILCFGStructurizer.cpp +++ b/lib/Target/R600/AMDILCFGStructurizer.cpp @@ -1233,7 +1233,7 @@ int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB, numClonedBlock += Num; Num += serialPatternMatch(*HeadMBB->succ_begin()); - Num += serialPatternMatch(*next(HeadMBB->succ_begin())); + Num += serialPatternMatch(*llvm::next(HeadMBB->succ_begin())); Num += ifPatternMatch(HeadMBB); assert(Num > 0); @@ -1713,7 +1713,7 @@ void AMDGPUCFGStructurizer::removeRedundantConditionalBranch( if (MBB->succ_size() != 2) return; MachineBasicBlock *MBB1 = *MBB->succ_begin(); - MachineBasicBlock *MBB2 = *next(MBB->succ_begin()); + MachineBasicBlock *MBB2 = *llvm::next(MBB->succ_begin()); if (MBB1 != MBB2) return; -- cgit v1.1 From 19fdc268c316b3b0bdcb2b558449819f4f402d6a Mon Sep 17 00:00:00 2001 From: Hao Liu Date: Wed, 4 Sep 2013 09:28:24 +0000 Subject: Inplement aarch64 neon instructions in AdvSIMD(shift). About 24 shift instructions: sshr,ushr,ssra,usra,srshr,urshr,srsra,ursra,sri,shl,sli,sqshlu,sqshl,uqshl,shrn,sqrshrun,sqshrn,uqshr,sqrshrn,uqrshrn,sshll,ushll and 4 convert instructions: scvtf,ucvtf,fcvtzs,fcvtzu git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189925 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 90 ++- lib/Target/AArch64/AArch64ISelLowering.h | 8 +- lib/Target/AArch64/AArch64InstrNEON.td | 649 ++++++++++++++++++++- lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 31 + .../AArch64/Disassembler/AArch64Disassembler.cpp | 38 ++ .../AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp | 27 + 6 files changed, 805 insertions(+), 38 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index d12302e..b68c43a 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -77,8 +77,11 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::SRA); + setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::SHL); + setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); + // AArch64 does not have i1 loads, or much of anything for i1 really. setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); @@ -283,6 +286,8 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal); + setOperationAction(ISD::SETCC, MVT::v8i8, Custom); setOperationAction(ISD::SETCC, MVT::v16i8, Custom); setOperationAction(ISD::SETCC, MVT::v4i16, Custom); @@ -834,6 +839,12 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { return "AArch64ISD::NEON_CMPZ"; case AArch64ISD::NEON_TST: return "AArch64ISD::NEON_TST"; + case AArch64ISD::NEON_DUPIMM: + return "AArch64ISD::NEON_DUPIMM"; + case AArch64ISD::NEON_QSHLs: + return "AArch64ISD::NEON_QSHLs"; + case AArch64ISD::NEON_QSHLu: + return "AArch64ISD::NEON_QSHLu"; default: return NULL; } @@ -3257,7 +3268,7 @@ static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { /// Check if this is a valid build_vector for the immediate operand of /// a vector shift left operation. That value must be in the range: -/// 0 <= Value < ElementBits for a left shift +/// 0 <= Value < ElementBits static bool isVShiftLImm(SDValue Op, EVT VT, int64_t &Cnt) { assert(VT.isVector() && "vector shift count is not a vector type"); unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); @@ -3266,10 +3277,25 @@ static bool isVShiftLImm(SDValue Op, EVT VT, int64_t &Cnt) { return (Cnt >= 0 && Cnt < ElementBits); } -static SDValue PerformSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, +/// Check if this is a valid build_vector for the immediate operand of a +/// vector shift right operation. The value must be in the range: +/// 1 <= Value <= ElementBits +static bool isVShiftRImm(SDValue Op, EVT VT, int64_t &Cnt) { + assert(VT.isVector() && "vector shift count is not a vector type"); + unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); + if (!getVShiftImm(Op, ElementBits, Cnt)) + return false; + return (Cnt >= 1 && Cnt <= ElementBits); +} + +/// Checks for immediate versions of vector shifts and lowers them. +static SDValue PerformShiftCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *ST) { SelectionDAG &DAG = DCI.DAG; EVT VT = N->getValueType(0); + if (N->getOpcode() == ISD::SRA && (VT == MVT::i32 || VT == MVT::i64)) + return PerformSRACombine(N, DCI); // Nothing to be done for scalar shifts. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -3278,10 +3304,54 @@ static SDValue PerformSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI assert(ST->hasNEON() && "unexpected vector shift"); int64_t Cnt; - if (isVShiftLImm(N->getOperand(1), VT, Cnt)) { - SDValue RHS = DAG.getNode(AArch64ISD::NEON_DUPIMM, SDLoc(N->getOperand(0)), - VT, DAG.getConstant(Cnt, MVT::i32)); - return DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), RHS); + + switch (N->getOpcode()) { + default: + llvm_unreachable("unexpected shift opcode"); + + case ISD::SHL: + if (isVShiftLImm(N->getOperand(1), VT, Cnt)) { + SDValue RHS = + DAG.getNode(AArch64ISD::NEON_DUPIMM, SDLoc(N->getOperand(1)), VT, + DAG.getConstant(Cnt, MVT::i32)); + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), RHS); + } + break; + + case ISD::SRA: + case ISD::SRL: + if (isVShiftRImm(N->getOperand(1), VT, Cnt)) { + SDValue RHS = + DAG.getNode(AArch64ISD::NEON_DUPIMM, SDLoc(N->getOperand(1)), VT, + DAG.getConstant(Cnt, MVT::i32)); + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N->getOperand(0), RHS); + } + break; + } + + return SDValue(); +} + +/// ARM-specific DAG combining for intrinsics. +static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { + unsigned IntNo = cast(N->getOperand(0))->getZExtValue(); + + switch (IntNo) { + default: + // Don't do anything for most intrinsics. + break; + + case Intrinsic::arm_neon_vqshifts: + case Intrinsic::arm_neon_vqshiftu: + EVT VT = N->getOperand(1).getValueType(); + int64_t Cnt; + if (!isVShiftLImm(N->getOperand(2), VT, Cnt)) + break; + unsigned VShiftOpc = (IntNo == Intrinsic::arm_neon_vqshifts) + ? AArch64ISD::NEON_QSHLs + : AArch64ISD::NEON_QSHLu; + return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0), + N->getOperand(1), DAG.getConstant(Cnt, MVT::i32)); } return SDValue(); @@ -3294,8 +3364,12 @@ AArch64TargetLowering::PerformDAGCombine(SDNode *N, default: break; case ISD::AND: return PerformANDCombine(N, DCI); case ISD::OR: return PerformORCombine(N, DCI, getSubtarget()); - case ISD::SRA: return PerformSRACombine(N, DCI); - case ISD::SHL: return PerformSHLCombine(N, DCI, getSubtarget()); + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + return PerformShiftCombine(N, DCI, getSubtarget()); + case ISD::INTRINSIC_WO_CHAIN: + return PerformIntrinsicCombine(N, DCI.DAG); } return SDValue(); } diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index c9795b2..7c7d038 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -19,7 +19,7 @@ #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" - +#include "llvm/IR/Intrinsics.h" namespace llvm { namespace AArch64ISD { @@ -135,7 +135,11 @@ namespace AArch64ISD { NEON_TST, // Operation for the immediate in vector shift - NEON_DUPIMM + NEON_DUPIMM, + + // Vector saturating shift + NEON_QSHLs, + NEON_QSHLu }; } diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index fb6d654..9712a5a 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -44,6 +44,12 @@ def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2, def Neon_dupImm : SDNode<"AArch64ISD::NEON_DUPIMM", SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>>; +def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>]>; +def Neon_sqrshlImm : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>; +def Neon_uqrshlImm : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>; + + //===----------------------------------------------------------------------===// // Multiclasses //===----------------------------------------------------------------------===// @@ -1413,58 +1419,133 @@ def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>; } // Vector Shift (Immediate) - +// Immediate in [0, 63] def imm0_63 : Operand { let ParserMatchClass = uimm6_asmoperand; } -class N2VShiftLeft opcode, string asmop, string T, - RegisterClass VPRC, ValueType Ty, Operand ImmTy> +// Shift Right Immediate - A shift right immediate is encoded differently from +// other shift immediates. The immh:immb field is encoded like so: +// +// Offset Encoding +// 8 immh:immb<6:3> = '0001xxx', is encoded in immh:immb<2:0> +// 16 immh:immb<6:4> = '001xxxx', is encoded in immh:immb<3:0> +// 32 immh:immb<6:5> = '01xxxxx', is encoded in immh:immb<4:0> +// 64 immh:immb<6> = '1xxxxxx', is encoded in immh:immb<5:0> +class shr_imm_asmoperands : AsmOperandClass { + let Name = "ShrImm" # OFFSET; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "ShrImm" # OFFSET; +} + +class shr_imm : Operand { + let EncoderMethod = "getShiftRightImm" # OFFSET; + let DecoderMethod = "DecodeShiftRightImm" # OFFSET; + let ParserMatchClass = + !cast("shr_imm" # OFFSET # "_asmoperand"); +} + +def shr_imm8_asmoperand : shr_imm_asmoperands<"8">; +def shr_imm16_asmoperand : shr_imm_asmoperands<"16">; +def shr_imm32_asmoperand : shr_imm_asmoperands<"32">; +def shr_imm64_asmoperand : shr_imm_asmoperands<"64">; + +def shr_imm8 : shr_imm<"8">; +def shr_imm16 : shr_imm<"16">; +def shr_imm32 : shr_imm<"32">; +def shr_imm64 : shr_imm<"64">; + +class N2VShift opcode, string asmop, string T, + RegisterClass VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode> : NeonI_2VShiftImm; multiclass NeonI_N2VShL opcode, string asmop> { // 64-bit vector types. - def _8B : N2VShiftLeft<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3> { + def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3, shl> { let Inst{22-19} = 0b0001; // immh:immb = 0001xxx } - def _4H : N2VShiftLeft<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4> { + def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4, shl> { let Inst{22-20} = 0b001; // immh:immb = 001xxxx } - def _2S : N2VShiftLeft<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5> { + def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5, shl> { let Inst{22-21} = 0b01; // immh:immb = 01xxxxx } // 128-bit vector types. - def _16B : N2VShiftLeft<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3> { + def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3, shl> { let Inst{22-19} = 0b0001; // immh:immb = 0001xxx } - def _8H : N2VShiftLeft<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4> { + def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4, shl> { let Inst{22-20} = 0b001; // immh:immb = 001xxxx } - def _4S : N2VShiftLeft<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5> { + def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5, shl> { let Inst{22-21} = 0b01; // immh:immb = 01xxxxx } - def _2D : N2VShiftLeft<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63> { + def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63, shl> { let Inst{22} = 0b1; // immh:immb = 1xxxxxx } } -def Neon_top16B : PatFrag<(ops node:$in), +multiclass NeonI_N2VShR opcode, string asmop, SDNode OpNode> { + def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, + OpNode> { + let Inst{22-19} = 0b0001; + } + + def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, + OpNode> { + let Inst{22-20} = 0b001; + } + + def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, + OpNode> { + let Inst{22-21} = 0b01; + } + + def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, + OpNode> { + let Inst{22-19} = 0b0001; + } + + def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, + OpNode> { + let Inst{22-20} = 0b001; + } + + def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, + OpNode> { + let Inst{22-21} = 0b01; + } + + def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, + OpNode> { + let Inst{22} = 0b1; + } +} + +// Shift left +defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">; + +// Shift right +defm SSHRvvi : NeonI_N2VShR<0b0, 0b00000, "sshr", sra>; +defm USHRvvi : NeonI_N2VShR<0b1, 0b00000, "ushr", srl>; + +def Neon_top16B : PatFrag<(ops node:$in), (extract_subvector (v16i8 node:$in), (iPTR 8))>; -def Neon_top8H : PatFrag<(ops node:$in), +def Neon_top8H : PatFrag<(ops node:$in), (extract_subvector (v8i16 node:$in), (iPTR 4))>; -def Neon_top4S : PatFrag<(ops node:$in), +def Neon_top4S : PatFrag<(ops node:$in), (extract_subvector (v4i32 node:$in), (iPTR 2))>; class N2VShiftLong opcode, string asmop, string DestT, @@ -1474,21 +1555,21 @@ class N2VShiftLong opcode, string asmop, string DestT, (ins VPR64:$Rn, ImmTy:$Imm), asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm", [(set (DestTy VPR128:$Rd), - (DestTy (shl + (DestTy (shl (DestTy (ExtOp (SrcTy VPR64:$Rn))), (DestTy (Neon_dupImm (i32 imm:$Imm))))))], NoItinerary>; class N2VShiftLongHigh opcode, string asmop, string DestT, - string SrcT, ValueType DestTy, ValueType SrcTy, + string SrcT, ValueType DestTy, ValueType SrcTy, int StartIndex, Operand ImmTy, SDPatternOperator ExtOp, PatFrag getTop> : NeonI_2VShiftImm; @@ -1497,33 +1578,33 @@ multiclass NeonI_N2VShLL opcode, string asmop, SDNode ExtOp> { // 64-bit vector types. def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8, - uimm3, ExtOp>{ + uimm3, ExtOp> { let Inst{22-19} = 0b0001; // immh:immb = 0001xxx } def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16, - uimm4, ExtOp>{ + uimm4, ExtOp> { let Inst{22-20} = 0b001; // immh:immb = 001xxxx } def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32, - uimm5, ExtOp>{ + uimm5, ExtOp> { let Inst{22-21} = 0b01; // immh:immb = 01xxxxx } // 128-bit vector types def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b", - v8i16, v8i8, 8, uimm3, ExtOp, Neon_top16B>{ + v8i16, v8i8, 8, uimm3, ExtOp, Neon_top16B> { let Inst{22-19} = 0b0001; // immh:immb = 0001xxx } def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h", - v4i32, v4i16, 4, uimm4, ExtOp, Neon_top8H>{ + v4i32, v4i16, 4, uimm4, ExtOp, Neon_top8H> { let Inst{22-20} = 0b001; // immh:immb = 001xxxx } def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s", - v2i64, v2i32, 2, uimm5, ExtOp, Neon_top4S>{ + v2i64, v2i32, 2, uimm5, ExtOp, Neon_top4S> { let Inst{22-21} = 0b01; // immh:immb = 01xxxxx } @@ -1547,13 +1628,521 @@ multiclass NeonI_N2VShLL opcode, string asmop, (!cast(prefix # "_4S") VPR128:$Rn, 0)>; } -// Shift left immediate -defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">; - -// Shift left long immediate +// Shift left long defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>; defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>; +// Rounding/Saturating shift +class N2VShift_RQ opcode, string asmop, string T, + RegisterClass VPRC, ValueType Ty, Operand ImmTy, + SDPatternOperator OpNode> + : NeonI_2VShiftImm; + +// shift right (vector by immediate) +multiclass NeonI_N2VShR_RQ opcode, string asmop, + SDPatternOperator OpNode> { + def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, + OpNode> { + let Inst{22-19} = 0b0001; + } + + def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, + OpNode> { + let Inst{22-20} = 0b001; + } + + def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, + OpNode> { + let Inst{22-21} = 0b01; + } + + def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, + OpNode> { + let Inst{22-19} = 0b0001; + } + + def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, + OpNode> { + let Inst{22-20} = 0b001; + } + + def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, + OpNode> { + let Inst{22-21} = 0b01; + } + + def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, + OpNode> { + let Inst{22} = 0b1; + } +} + +multiclass NeonI_N2VShL_Q opcode, string asmop, + SDPatternOperator OpNode> { + // 64-bit vector types. + def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3, + OpNode> { + let Inst{22-19} = 0b0001; + } + + def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4, + OpNode> { + let Inst{22-20} = 0b001; + } + + def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5, + OpNode> { + let Inst{22-21} = 0b01; + } + + // 128-bit vector types. + def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3, + OpNode> { + let Inst{22-19} = 0b0001; + } + + def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4, + OpNode> { + let Inst{22-20} = 0b001; + } + + def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5, + OpNode> { + let Inst{22-21} = 0b01; + } + + def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63, + OpNode> { + let Inst{22} = 0b1; + } +} + +// Rounding shift right +defm SRSHRvvi : NeonI_N2VShR_RQ<0b0, 0b00100, "srshr", + int_aarch64_neon_vsrshr>; +defm URSHRvvi : NeonI_N2VShR_RQ<0b1, 0b00100, "urshr", + int_aarch64_neon_vurshr>; + +// Saturating shift left unsigned +defm SQSHLUvvi : NeonI_N2VShL_Q<0b1, 0b01100, "sqshlu", int_aarch64_neon_vsqshlu>; + +// Saturating shift left +defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>; +defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>; + +class N2VShiftAdd opcode, string asmop, string T, + RegisterClass VPRC, ValueType Ty, Operand ImmTy, + SDNode OpNode> + : NeonI_2VShiftImm { + let Constraints = "$src = $Rd"; +} + +// Shift Right accumulate +multiclass NeonI_N2VShRAdd opcode, string asmop, SDNode OpNode> { + def _8B : N2VShiftAdd<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, + OpNode> { + let Inst{22-19} = 0b0001; + } + + def _4H : N2VShiftAdd<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, + OpNode> { + let Inst{22-20} = 0b001; + } + + def _2S : N2VShiftAdd<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, + OpNode> { + let Inst{22-21} = 0b01; + } + + def _16B : N2VShiftAdd<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, + OpNode> { + let Inst{22-19} = 0b0001; + } + + def _8H : N2VShiftAdd<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, + OpNode> { + let Inst{22-20} = 0b001; + } + + def _4S : N2VShiftAdd<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, + OpNode> { + let Inst{22-21} = 0b01; + } + + def _2D : N2VShiftAdd<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, + OpNode> { + let Inst{22} = 0b1; + } +} + +// Shift right and accumulate +defm SSRAvvi : NeonI_N2VShRAdd<0, 0b00010, "ssra", sra>; +defm USRAvvi : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>; + +// Rounding shift accumulate +class N2VShiftAdd_R opcode, string asmop, string T, + RegisterClass VPRC, ValueType Ty, Operand ImmTy, + SDPatternOperator OpNode> + : NeonI_2VShiftImm { + let Constraints = "$src = $Rd"; +} + +multiclass NeonI_N2VShRAdd_R opcode, string asmop, + SDPatternOperator OpNode> { + def _8B : N2VShiftAdd_R<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, + OpNode> { + let Inst{22-19} = 0b0001; + } + + def _4H : N2VShiftAdd_R<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, + OpNode> { + let Inst{22-20} = 0b001; + } + + def _2S : N2VShiftAdd_R<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, + OpNode> { + let Inst{22-21} = 0b01; + } + + def _16B : N2VShiftAdd_R<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, + OpNode> { + let Inst{22-19} = 0b0001; + } + + def _8H : N2VShiftAdd_R<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, + OpNode> { + let Inst{22-20} = 0b001; + } + + def _4S : N2VShiftAdd_R<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, + OpNode> { + let Inst{22-21} = 0b01; + } + + def _2D : N2VShiftAdd_R<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, + OpNode> { + let Inst{22} = 0b1; + } +} + +// Rounding shift right and accumulate +defm SRSRAvvi : NeonI_N2VShRAdd_R<0, 0b00110, "srsra", int_aarch64_neon_vsrshr>; +defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>; + +// Shift insert by immediate +class N2VShiftIns opcode, string asmop, string T, + RegisterClass VPRC, ValueType Ty, Operand ImmTy, + SDPatternOperator OpNode> + : NeonI_2VShiftImm { + let Constraints = "$src = $Rd"; +} + +// shift left insert (vector by immediate) +multiclass NeonI_N2VShLIns opcode, string asmop> { + def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3, + int_aarch64_neon_vsli> { + let Inst{22-19} = 0b0001; + } + + def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4, + int_aarch64_neon_vsli> { + let Inst{22-20} = 0b001; + } + + def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5, + int_aarch64_neon_vsli> { + let Inst{22-21} = 0b01; + } + + // 128-bit vector types + def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3, + int_aarch64_neon_vsli> { + let Inst{22-19} = 0b0001; + } + + def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4, + int_aarch64_neon_vsli> { + let Inst{22-20} = 0b001; + } + + def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5, + int_aarch64_neon_vsli> { + let Inst{22-21} = 0b01; + } + + def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63, + int_aarch64_neon_vsli> { + let Inst{22} = 0b1; + } +} + +// shift right insert (vector by immediate) +multiclass NeonI_N2VShRIns opcode, string asmop> { + // 64-bit vector types. + def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, + int_aarch64_neon_vsri> { + let Inst{22-19} = 0b0001; + } + + def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, + int_aarch64_neon_vsri> { + let Inst{22-20} = 0b001; + } + + def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, + int_aarch64_neon_vsri> { + let Inst{22-21} = 0b01; + } + + // 128-bit vector types + def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, + int_aarch64_neon_vsri> { + let Inst{22-19} = 0b0001; + } + + def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, + int_aarch64_neon_vsri> { + let Inst{22-20} = 0b001; + } + + def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, + int_aarch64_neon_vsri> { + let Inst{22-21} = 0b01; + } + + def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, + int_aarch64_neon_vsri> { + let Inst{22} = 0b1; + } +} + +// Shift left and insert +defm SLIvvi : NeonI_N2VShLIns<0b1, 0b01010, "sli">; + +// Shift right and insert +defm SRIvvi : NeonI_N2VShRIns<0b1, 0b01000, "sri">; + +class N2VShR_Narrow opcode, string asmop, string DestT, + string SrcT, Operand ImmTy> + : NeonI_2VShiftImm; + +class N2VShR_Narrow_Hi opcode, string asmop, string DestT, + string SrcT, Operand ImmTy> + : NeonI_2VShiftImm { + let Constraints = "$src = $Rd"; +} + +// left long shift by immediate +multiclass NeonI_N2VShR_Narrow opcode, string asmop> { + def _8B : N2VShR_Narrow<0b0, u, opcode, asmop, "8b", "8h", shr_imm8> { + let Inst{22-19} = 0b0001; + } + + def _4H : N2VShR_Narrow<0b0, u, opcode, asmop, "4h", "4s", shr_imm16> { + let Inst{22-20} = 0b001; + } + + def _2S : N2VShR_Narrow<0b0, u, opcode, asmop, "2s", "2d", shr_imm32> { + let Inst{22-21} = 0b01; + } + + // Shift Narrow High + def _16B : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "16b", "8h", + shr_imm8> { + let Inst{22-19} = 0b0001; + } + + def _8H : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "8h", "4s", + shr_imm16> { + let Inst{22-20} = 0b001; + } + + def _4S : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "4s", "2d", + shr_imm32> { + let Inst{22-21} = 0b01; + } +} + +// Shift right narrow +defm SHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10000, "shrn">; + +// Shift right narrow (prefix Q is saturating, prefix R is rounding) +defm QSHRUNvvi :NeonI_N2VShR_Narrow<0b1, 0b10000, "sqshrun">; +defm RSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10001, "rshrn">; +defm QRSHRUNvvi : NeonI_N2VShR_Narrow<0b1, 0b10001, "sqrshrun">; +defm SQSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10010, "sqshrn">; +defm UQSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10010, "uqshrn">; +defm SQRSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10011, "sqrshrn">; +defm UQRSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10011, "uqrshrn">; + +def Neon_combine : PatFrag<(ops node:$Rm, node:$Rn), + (v2i64 (concat_vectors (v1i64 node:$Rm), + (v1i64 node:$Rn)))>; + +def Neon_lshrImm8H : PatFrag<(ops node:$lhs, node:$rhs), + (v8i16 (srl (v8i16 node:$lhs), + (v8i16 (Neon_dupImm (i32 node:$rhs)))))>; +def Neon_lshrImm4S : PatFrag<(ops node:$lhs, node:$rhs), + (v4i32 (srl (v4i32 node:$lhs), + (v4i32 (Neon_dupImm (i32 node:$rhs)))))>; +def Neon_lshrImm2D : PatFrag<(ops node:$lhs, node:$rhs), + (v2i64 (srl (v2i64 node:$lhs), + (v2i64 (Neon_dupImm (i32 node:$rhs)))))>; +def Neon_ashrImm8H : PatFrag<(ops node:$lhs, node:$rhs), + (v8i16 (sra (v8i16 node:$lhs), + (v8i16 (Neon_dupImm (i32 node:$rhs)))))>; +def Neon_ashrImm4S : PatFrag<(ops node:$lhs, node:$rhs), + (v4i32 (sra (v4i32 node:$lhs), + (v4i32 (Neon_dupImm (i32 node:$rhs)))))>; +def Neon_ashrImm2D : PatFrag<(ops node:$lhs, node:$rhs), + (v2i64 (sra (v2i64 node:$lhs), + (v2i64 (Neon_dupImm (i32 node:$rhs)))))>; + +// Normal shift right narrow is matched by IR (srl/sra, trunc, concat_vectors) +multiclass Neon_shiftNarrow_patterns { + def : Pat<(v8i8 (trunc (!cast("Neon_" # shr # "Imm8H") VPR128:$Rn, + imm:$Imm))), + (SHRNvvi_8B VPR128:$Rn, imm:$Imm)>; + def : Pat<(v4i16 (trunc (!cast("Neon_" # shr # "Imm4S") VPR128:$Rn, + imm:$Imm))), + (SHRNvvi_4H VPR128:$Rn, imm:$Imm)>; + def : Pat<(v2i32 (trunc (!cast("Neon_" # shr # "Imm2D") VPR128:$Rn, + imm:$Imm))), + (SHRNvvi_2S VPR128:$Rn, imm:$Imm)>; + + def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert + (v8i8 (trunc (!cast("Neon_" # shr # "Imm8H") + VPR128:$Rn, imm:$Imm)))))), + (SHRNvvi_16B VPR64:$src, VPR128:$Rn, imm:$Imm)>; + def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert + (v4i16 (trunc (!cast("Neon_" # shr # "Imm4S") + VPR128:$Rn, imm:$Imm)))))), + (SHRNvvi_8H VPR64:$src, VPR128:$Rn, imm:$Imm)>; + def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert + (v2i32 (trunc (!cast("Neon_" # shr # "Imm2D") + VPR128:$Rn, imm:$Imm)))))), + (SHRNvvi_4S VPR64:$src, VPR128:$Rn, imm:$Imm)>; +} + +multiclass Neon_shiftNarrow_QR_patterns { + def : Pat<(v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm)), + (!cast(prefix # "_8B") VPR128:$Rn, imm:$Imm)>; + def : Pat<(v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm)), + (!cast(prefix # "_4H") VPR128:$Rn, imm:$Imm)>; + def : Pat<(v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm)), + (!cast(prefix # "_2S") VPR128:$Rn, imm:$Imm)>; + + def : Pat<(Neon_combine (v1i64 VPR64:$src), + (v1i64 (bitconvert (v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm))))), + (!cast(prefix # "_16B") + VPR64:$src, VPR128:$Rn, imm:$Imm)>; + def : Pat<(Neon_combine (v1i64 VPR64:$src), + (v1i64 (bitconvert (v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm))))), + (!cast(prefix # "_8H") + VPR64:$src, VPR128:$Rn, imm:$Imm)>; + def : Pat<(Neon_combine (v1i64 VPR64:$src), + (v1i64 (bitconvert (v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm))))), + (!cast(prefix # "_4S") + VPR64:$src, VPR128:$Rn, imm:$Imm)>; +} + +defm : Neon_shiftNarrow_patterns<"lshr">; +defm : Neon_shiftNarrow_patterns<"ashr">; + +defm : Neon_shiftNarrow_QR_patterns; +defm : Neon_shiftNarrow_QR_patterns; +defm : Neon_shiftNarrow_QR_patterns; +defm : Neon_shiftNarrow_QR_patterns; +defm : Neon_shiftNarrow_QR_patterns; +defm : Neon_shiftNarrow_QR_patterns; +defm : Neon_shiftNarrow_QR_patterns; + +// Convert fix-point and float-pointing +class N2VCvt_Fx opcode, string asmop, string T, + RegisterClass VPRC, ValueType DestTy, ValueType SrcTy, + Operand ImmTy, SDPatternOperator IntOp> + : NeonI_2VShiftImm; + +multiclass NeonI_N2VCvt_Fx2fp opcode, string asmop, + SDPatternOperator IntOp> { + def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2f32, v2i32, + shr_imm32, IntOp> { + let Inst{22-21} = 0b01; + } + + def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4f32, v4i32, + shr_imm32, IntOp> { + let Inst{22-21} = 0b01; + } + + def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2f64, v2i64, + shr_imm64, IntOp> { + let Inst{22} = 0b1; + } +} + +multiclass NeonI_N2VCvt_Fp2fx opcode, string asmop, + SDPatternOperator IntOp> { + def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2i32, v2f32, + shr_imm32, IntOp> { + let Inst{22-21} = 0b01; + } + + def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4i32, v4f32, + shr_imm32, IntOp> { + let Inst{22-21} = 0b01; + } + + def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2i64, v2f64, + shr_imm64, IntOp> { + let Inst{22} = 0b1; + } +} + +// Convert fixed-point to floating-point +defm VCVTxs2f : NeonI_N2VCvt_Fx2fp<0, 0b11100, "scvtf", + int_arm_neon_vcvtfxs2fp>; +defm VCVTxu2f : NeonI_N2VCvt_Fx2fp<1, 0b11100, "ucvtf", + int_arm_neon_vcvtfxu2fp>; + +// Convert floating-point to fixed-point +defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs", + int_arm_neon_vcvtfp2fxs>; +defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu", + int_arm_neon_vcvtfp2fxu>; + // Scalar Arithmetic class NeonI_Scalar3Same_D_size opcode, string asmop> @@ -1726,6 +2315,10 @@ def : Pat<(v16i8 (bitconvert (v2f64 VPR128:$src))), (v16i8 VPR128:$src)>; // ...and scalar bitcasts... +def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>; +def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>; +def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>; + def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))), (f64 (EXTRACT_SUBREG (v8i8 VPR64:$src), sub_64))>; def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))), diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 43e91ac..68d4be4 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -664,6 +664,25 @@ public: return !ShiftExtend.ImplicitAmount && ShiftExtend.Amount <= 4; } + // if 0 < value <= w, return true + bool isShrFixedWidth(int w) const { + if (!isImm()) + return false; + const MCConstantExpr *CE = dyn_cast(getImm()); + if (!CE) + return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value <= w; + } + + bool isShrImm8() const { return isShrFixedWidth(8); } + + bool isShrImm16() const { return isShrFixedWidth(16); } + + bool isShrImm32() const { return isShrFixedWidth(32); } + + bool isShrImm64() const { return isShrFixedWidth(64); } + bool isNeonMovImmShiftLSL() const { if (!isShiftOrExtend()) return false; @@ -2240,6 +2259,18 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_Width64: return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), "expected integer in range [, 63]"); + case Match_ShrImm8: + return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [1, 8]"); + case Match_ShrImm16: + return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [1, 16]"); + case Match_ShrImm32: + return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [1, 32]"); + case Match_ShrImm64: + return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [1, 64]"); } llvm_unreachable("Implement any new match types added!"); diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index a88a8e8..5b57b50 100644 --- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -113,6 +113,18 @@ static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeShiftRightImm8(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeShiftRightImm16(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeShiftRightImm32(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeShiftRightImm64(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder); + template static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst, unsigned FullImm, @@ -413,7 +425,33 @@ static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeShiftRightImm8(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(8 - Val)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeShiftRightImm16(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(16 - Val)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeShiftRightImm32(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(32 - Val)); + return MCDisassembler::Success; +} +static DecodeStatus DecodeShiftRightImm64(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(64 - Val)); + return MCDisassembler::Success; +} template static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst, diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index b9770b3..7bfaecc 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -59,6 +59,14 @@ public: unsigned getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups) const; + unsigned getShiftRightImm8(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups) const; + unsigned getShiftRightImm16(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups) const; + unsigned getShiftRightImm32(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups) const; + unsigned getShiftRightImm64(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups) const; // Labels are handled mostly the same way: a symbol is needed, and // just gets some fixup attached. @@ -310,6 +318,25 @@ AArch64MCCodeEmitter::getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx, return ((64 - MO.getImm()) & 0x3f) | (63 - MO.getImm()) << 6; } +unsigned AArch64MCCodeEmitter::getShiftRightImm8( + const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups) const { + return 8 - MI.getOperand(Op).getImm(); +} + +unsigned AArch64MCCodeEmitter::getShiftRightImm16( + const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups) const { + return 16 - MI.getOperand(Op).getImm(); +} + +unsigned AArch64MCCodeEmitter::getShiftRightImm32( + const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups) const { + return 32 - MI.getOperand(Op).getImm(); +} + +unsigned AArch64MCCodeEmitter::getShiftRightImm64( + const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups) const { + return 64 - MI.getOperand(Op).getImm(); +} template unsigned AArch64MCCodeEmitter::getLabelOpValue(const MCInst &MI, -- cgit v1.1 From bf34f346420dbcdb3f9376967bde701682471a79 Mon Sep 17 00:00:00 2001 From: Venkatraman Govindaraju Date: Wed, 4 Sep 2013 15:15:20 +0000 Subject: [Sparc] Fix an assertion failure while lowering fcmp on long double. This assertion is triggered because an integer constant is created with wrong type. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189948 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Sparc/SparcISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 3250537..e646521 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -1816,7 +1816,7 @@ SparcTargetLowering::LowerF128Compare(SDValue LHS, SDValue RHS, return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS); } case SPCC::FCC_ULE: { - SDValue RHS = DAG.getTargetConstant(2, LHS.getValueType()); + SDValue RHS = DAG.getTargetConstant(2, Result.getValueType()); SPCC = SPCC::ICC_NE; return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS); } -- cgit v1.1 From 87b120690b64f41c5b2367653e542ae2cfaa27ba Mon Sep 17 00:00:00 2001 From: Silviu Baranga Date: Wed, 4 Sep 2013 17:05:18 +0000 Subject: Fix scheduling for vldm/vstm instructions that load/store more than 32 bytes on Cortex-A9. This also makes the existing code more compact. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189958 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMScheduleA9.td | 132 ++++++++++++++++------------------------ 1 file changed, 54 insertions(+), 78 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 74ee50b..603e775 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -1879,6 +1879,10 @@ def CortexA9Itineraries : ProcessorItineraries< // The following definitions describe the simpler per-operand machine model. // This works with MachineScheduler and will eventually replace itineraries. +class A9WriteLMOpsListType writes> { + list Writes = writes; + SchedMachineModel SchedModel = ?; +} // Cortex-A9 machine model for scheduling and other instruction cost heuristics. def CortexA9Model : SchedMachineModel { @@ -2011,7 +2015,7 @@ def A9WriteAdr#NumAddr : WriteSequence<[A9WriteAdr], NumAddr>; // Define a predicate to select the LDM based on number of memory addresses. def A9LMAdr#NumAddr#Pred : - SchedPredicate<"TII->getNumLDMAddresses(MI) == "#NumAddr>; + SchedPredicate<"(TII->getNumLDMAddresses(MI)+1)/2 == "#NumAddr>; } // foreach NumAddr @@ -2054,48 +2058,30 @@ def A9WriteL#NumAddr#Hi : WriteSequence< //===----------------------------------------------------------------------===// // LDM: Load multiple into 32-bit integer registers. +def A9WriteLMOpsList : A9WriteLMOpsListType< + [A9WriteL1, A9WriteL1Hi, + A9WriteL2, A9WriteL2Hi, + A9WriteL3, A9WriteL3Hi, + A9WriteL4, A9WriteL4Hi, + A9WriteL5, A9WriteL5Hi, + A9WriteL6, A9WriteL6Hi, + A9WriteL7, A9WriteL7Hi, + A9WriteL8, A9WriteL8Hi]>; + // A9WriteLM variants expand into a pair of writes for each 64-bit // value loaded. When the number of registers is odd, the last // A9WriteLnHi is naturally ignored because the instruction has no // following def operands. These variants take no issue resource, so // they may need to be part of a WriteSequence that includes A9WriteIssue. def A9WriteLM : SchedWriteVariant<[ - SchedVar, - SchedVar, - SchedVar, - SchedVar, - SchedVar, - SchedVar, - SchedVar, - SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, // For unknown LDMs, define the maximum number of writes, but only // make the first two consume resources. SchedVar; // 21-22 + def A9WriteLMfpPostRA : SchedWriteVariant<[ - SchedVar, - SchedVar, - SchedVar, - SchedVar, - SchedVar, - SchedVar, - SchedVar, - SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, // For unknown LDMs, define the maximum number of writes, but only - // make the first two consume resources. - SchedVar Date: Wed, 4 Sep 2013 17:41:16 +0000 Subject: Swift: Only build vldm/vstm with q register aligned register lists Unaligned vldm/vstm need more uops and therefore are slower in general on swift. radar://14522102 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189961 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 1803a8a..237adb9 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -489,7 +489,10 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, if (Reg != ARM::SP && NewOffset == Offset + (int)Size && ((isNotVFP && RegNum > PRegNum) || - ((Count < Limit) && RegNum == PRegNum+1))) { + ((Count < Limit) && RegNum == PRegNum+1)) && + // On Swift we don't want vldm/vstm to start with a odd register num + // because Q register unaligned vldm/vstm need more uops. + (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) { Offset += Size; PRegNum = RegNum; ++Count; -- cgit v1.1 From 1bfa80359edc004bc8a91999de7c8e16708d2206 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Wed, 4 Sep 2013 19:08:44 +0000 Subject: ARM: Teach A15 SDOptimizer to properly handle D-reg by-lane. These instructions, such as vmul.f32, require the second source operand to be in D0-D15 rather than the full D0-D31. When optimizing, make sure to account for that by constraining the register class of a replacement virtual register to be compatible with the virtual register(s) it's replacing. I've been unsuccessful in creating a non-fragile regression test. This issue was detected by the LLVM nightly test suite running on an A15 (Bullet). PR17093: http://llvm.org/bugs/show_bug.cgi?id=17093 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189972 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/A15SDOptimizer.cpp | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp index e8c2f7c..ff585b4 100644 --- a/lib/Target/ARM/A15SDOptimizer.cpp +++ b/lib/Target/ARM/A15SDOptimizer.cpp @@ -657,6 +657,13 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { Modified = true; for (SmallVectorImpl::const_iterator I = Uses.begin(), E = Uses.end(); I != E; ++I) { + // Make sure to constrain the register class of the new register to + // match what we're replacing. Otherwise we can optimize a DPR_VFP2 + // reference into a plain DPR, and that will end poorly. NewReg is + // always virtual here, so there will always be a matching subclass + // to find. + MRI->constrainRegClass(NewReg, MRI->getRegClass((*I)->getReg())); + DEBUG(dbgs() << "Replacing operand " << **I << " with " << PrintReg(NewReg) << "\n"); -- cgit v1.1 From b3df27d4402d8c8fc81d5acec812035360806cdc Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Wed, 4 Sep 2013 19:53:30 +0000 Subject: R600: Use SchedModel enum for is{Trans,Vector}Only functions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189979 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600InstrFormats.td | 2 -- lib/Target/R600/R600InstrInfo.cpp | 12 +++++++++++- lib/Target/R600/R600InstrInfo.h | 2 ++ lib/Target/R600/R600Instructions.td | 26 ++++++-------------------- 4 files changed, 19 insertions(+), 23 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/R600InstrFormats.td b/lib/Target/R600/R600InstrFormats.td index ae3046d..9428bab 100644 --- a/lib/Target/R600/R600InstrFormats.td +++ b/lib/Target/R600/R600InstrFormats.td @@ -16,7 +16,6 @@ class InstR600 pattern, : AMDGPUInst { field bits<64> Inst; - bit TransOnly = 0; bit Trig = 0; bit Op3 = 0; bit isVector = 0; @@ -39,7 +38,6 @@ class InstR600 pattern, let Pattern = pattern; let Itinerary = itin; - let TSFlags{0} = TransOnly; let TSFlags{4} = Trig; let TSFlags{5} = Op3; diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 4e0607f..212463f 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -154,13 +154,23 @@ bool R600InstrInfo::isLDSInstr(unsigned Opcode) const { } bool R600InstrInfo::isTransOnly(unsigned Opcode) const { - return (get(Opcode).TSFlags & R600_InstFlag::TRANS_ONLY); + if (ST.hasCaymanISA()) + return false; + return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU); } bool R600InstrInfo::isTransOnly(const MachineInstr *MI) const { return isTransOnly(MI->getOpcode()); } +bool R600InstrInfo::isVectorOnly(unsigned Opcode) const { + return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU); +} + +bool R600InstrInfo::isVectorOnly(const MachineInstr *MI) const { + return isVectorOnly(MI->getOpcode()); +} + bool R600InstrInfo::isExport(unsigned Opcode) const { return (get(Opcode).TSFlags & R600_InstFlag::IS_EXPORT); } diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h index e28d771..d083fb8 100644 --- a/lib/Target/R600/R600InstrInfo.h +++ b/lib/Target/R600/R600InstrInfo.h @@ -68,6 +68,8 @@ namespace llvm { bool isTransOnly(unsigned Opcode) const; bool isTransOnly(const MachineInstr *MI) const; + bool isVectorOnly(unsigned Opcode) const; + bool isVectorOnly(const MachineInstr *MI) const; bool isExport(unsigned Opcode) const; bool usesVertexCache(unsigned Opcode) const; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index f5c0266..99ffa14 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -926,12 +926,16 @@ class CNDE_Common inst> : R600_3OP < class CNDGT_Common inst> : R600_3OP < inst, "CNDGT", [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GT))] ->; +> { + let Itinerary = VecALU; +} class CNDGE_Common inst> : R600_3OP < inst, "CNDGE", [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GE))] ->; +> { + let Itinerary = VecALU; +} let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in { @@ -1001,35 +1005,30 @@ multiclass CUBE_Common inst> { class EXP_IEEE_Common inst> : R600_1OP_Helper < inst, "EXP_IEEE", fexp2 > { - let TransOnly = 1; let Itinerary = TransALU; } class FLT_TO_INT_Common inst> : R600_1OP_Helper < inst, "FLT_TO_INT", fp_to_sint > { - let TransOnly = 1; let Itinerary = TransALU; } class INT_TO_FLT_Common inst> : R600_1OP_Helper < inst, "INT_TO_FLT", sint_to_fp > { - let TransOnly = 1; let Itinerary = TransALU; } class FLT_TO_UINT_Common inst> : R600_1OP_Helper < inst, "FLT_TO_UINT", fp_to_uint > { - let TransOnly = 1; let Itinerary = TransALU; } class UINT_TO_FLT_Common inst> : R600_1OP_Helper < inst, "UINT_TO_FLT", uint_to_fp > { - let TransOnly = 1; let Itinerary = TransALU; } @@ -1040,7 +1039,6 @@ class LOG_CLAMPED_Common inst> : R600_1OP < class LOG_IEEE_Common inst> : R600_1OP_Helper < inst, "LOG_IEEE", flog2 > { - let TransOnly = 1; let Itinerary = TransALU; } @@ -1050,72 +1048,61 @@ class ASHR_Common inst> : R600_2OP_Helper ; class MULHI_INT_Common inst> : R600_2OP_Helper < inst, "MULHI_INT", mulhs > { - let TransOnly = 1; let Itinerary = TransALU; } class MULHI_UINT_Common inst> : R600_2OP_Helper < inst, "MULHI", mulhu > { - let TransOnly = 1; let Itinerary = TransALU; } class MULLO_INT_Common inst> : R600_2OP_Helper < inst, "MULLO_INT", mul > { - let TransOnly = 1; let Itinerary = TransALU; } class MULLO_UINT_Common inst> : R600_2OP { - let TransOnly = 1; let Itinerary = TransALU; } class RECIP_CLAMPED_Common inst> : R600_1OP < inst, "RECIP_CLAMPED", [] > { - let TransOnly = 1; let Itinerary = TransALU; } class RECIP_IEEE_Common inst> : R600_1OP < inst, "RECIP_IEEE", [(set f32:$dst, (fdiv FP_ONE, f32:$src0))] > { - let TransOnly = 1; let Itinerary = TransALU; } class RECIP_UINT_Common inst> : R600_1OP_Helper < inst, "RECIP_UINT", AMDGPUurecip > { - let TransOnly = 1; let Itinerary = TransALU; } class RECIPSQRT_CLAMPED_Common inst> : R600_1OP_Helper < inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq > { - let TransOnly = 1; let Itinerary = TransALU; } class RECIPSQRT_IEEE_Common inst> : R600_1OP < inst, "RECIPSQRT_IEEE", [] > { - let TransOnly = 1; let Itinerary = TransALU; } class SIN_Common inst> : R600_1OP < inst, "SIN", [(set f32:$dst, (SIN_HW f32:$src0))]>{ let Trig = 1; - let TransOnly = 1; let Itinerary = TransALU; } class COS_Common inst> : R600_1OP < inst, "COS", [(set f32:$dst, (COS_HW f32:$src0))]> { let Trig = 1; - let TransOnly = 1; let Itinerary = TransALU; } @@ -1548,7 +1535,6 @@ let hasSideEffects = 1 in { def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> { let Pattern = []; - let TransOnly = 0; let Itinerary = AnyALU; } -- cgit v1.1 From bb25a01d232257b134f1f6a5810116cbb04b95b1 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Wed, 4 Sep 2013 19:53:46 +0000 Subject: R600: Non vector only instruction can be scheduled on trans unit git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189980 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600InstrInfo.cpp | 3 +++ lib/Target/R600/R600MachineScheduler.cpp | 33 +++++++++++++++++---------- lib/Target/R600/R600MachineScheduler.h | 5 +++-- lib/Target/R600/R600Packetizer.cpp | 38 +++++++++++++++++++++++++++----- 4 files changed, 60 insertions(+), 19 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 212463f..e3cb71b 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -477,6 +477,9 @@ static bool isConstCompatible(R600InstrInfo::BankSwizzle TransSwz, const std::vector > &TransOps, unsigned ConstCount) { + // TransALU can't read 3 constants + if (ConstCount > 2) + return false; for (unsigned i = 0, e = TransOps.size(); i < e; ++i) { const std::pair &Src = TransOps[i]; unsigned Cycle = getTransSwizzle(TransSwz, i); diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp index 0dc0365..0499dd5 100644 --- a/lib/Target/R600/R600MachineScheduler.cpp +++ b/lib/Target/R600/R600MachineScheduler.cpp @@ -9,7 +9,6 @@ // /// \file /// \brief R600 Machine Scheduler interface -// TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot // //===----------------------------------------------------------------------===// @@ -29,6 +28,7 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) { DAG = dag; TII = static_cast(DAG->TII); TRI = static_cast(DAG->TRI); + VLIW5 = !DAG->MF.getTarget().getSubtarget().hasCaymanISA(); MRI = &DAG->MRI; CurInstKind = IDOther; CurEmitted = 0; @@ -342,14 +342,16 @@ int R600SchedStrategy::getInstKind(SUnit* SU) { } } -SUnit *R600SchedStrategy::PopInst(std::vector &Q) { +SUnit *R600SchedStrategy::PopInst(std::vector &Q, bool AnyALU) { if (Q.empty()) return NULL; for (std::vector::reverse_iterator It = Q.rbegin(), E = Q.rend(); It != E; ++It) { SUnit *SU = *It; InstructionsGroupCandidate.push_back(SU->getInstr()); - if (TII->fitsConstReadLimitations(InstructionsGroupCandidate)) { + if (TII->fitsConstReadLimitations(InstructionsGroupCandidate) + && (!AnyALU || !TII->isVectorOnly(SU->getInstr())) + ) { InstructionsGroupCandidate.pop_back(); Q.erase((It + 1).base()); return SU; @@ -373,6 +375,8 @@ void R600SchedStrategy::PrepareNextSlot() { DEBUG(dbgs() << "New Slot\n"); assert (OccupedSlotsMask && "Slot wasn't filled"); OccupedSlotsMask = 0; +// if (HwGen == AMDGPUSubtarget::NORTHERN_ISLANDS) +// OccupedSlotsMask |= 16; InstructionsGroupCandidate.clear(); LoadAlu(); } @@ -409,12 +413,12 @@ void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) { } } -SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) { +SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot, bool AnyAlu) { static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W}; - SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]); + SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]], AnyAlu); if (SlotedSU) return SlotedSU; - SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]); + SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny], AnyAlu); if (UnslotedSU) AssignSlot(UnslotedSU->getInstr(), Slot); return UnslotedSU; @@ -434,30 +438,35 @@ SUnit* R600SchedStrategy::pickAlu() { // Bottom up scheduling : predX must comes first if (!AvailableAlus[AluPredX].empty()) { OccupedSlotsMask |= 31; - return PopInst(AvailableAlus[AluPredX]); + return PopInst(AvailableAlus[AluPredX], false); } // Flush physical reg copies (RA will discard them) if (!AvailableAlus[AluDiscarded].empty()) { OccupedSlotsMask |= 31; - return PopInst(AvailableAlus[AluDiscarded]); + return PopInst(AvailableAlus[AluDiscarded], false); } // If there is a T_XYZW alu available, use it if (!AvailableAlus[AluT_XYZW].empty()) { OccupedSlotsMask |= 15; - return PopInst(AvailableAlus[AluT_XYZW]); + return PopInst(AvailableAlus[AluT_XYZW], false); } } bool TransSlotOccuped = OccupedSlotsMask & 16; - if (!TransSlotOccuped) { + if (!TransSlotOccuped && VLIW5) { if (!AvailableAlus[AluTrans].empty()) { OccupedSlotsMask |= 16; - return PopInst(AvailableAlus[AluTrans]); + return PopInst(AvailableAlus[AluTrans], false); + } + SUnit *SU = AttemptFillSlot(3, true); + if (SU) { + OccupedSlotsMask |= 16; + return SU; } } for (int Chan = 3; Chan > -1; --Chan) { bool isOccupied = OccupedSlotsMask & (1 << Chan); if (!isOccupied) { - SUnit *SU = AttemptFillSlot(Chan); + SUnit *SU = AttemptFillSlot(Chan, false); if (SU) { OccupedSlotsMask |= (1 << Chan); InstructionsGroupCandidate.push_back(SU->getInstr()); diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h index f8965d8..0a6f120 100644 --- a/lib/Target/R600/R600MachineScheduler.h +++ b/lib/Target/R600/R600MachineScheduler.h @@ -84,15 +84,16 @@ public: private: std::vector InstructionsGroupCandidate; + bool VLIW5; int getInstKind(SUnit *SU); bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const; AluKind getAluKind(SUnit *SU) const; void LoadAlu(); unsigned AvailablesAluCount() const; - SUnit *AttemptFillSlot (unsigned Slot); + SUnit *AttemptFillSlot (unsigned Slot, bool AnyAlu); void PrepareNextSlot(); - SUnit *PopInst(std::vector &Q); + SUnit *PopInst(std::vector &Q, bool AnyALU); void AssignSlot(MachineInstr *MI, unsigned Slot); SUnit* pickAlu(); diff --git a/lib/Target/R600/R600Packetizer.cpp b/lib/Target/R600/R600Packetizer.cpp index 5cf1fd3..6c70052 100644 --- a/lib/Target/R600/R600Packetizer.cpp +++ b/lib/Target/R600/R600Packetizer.cpp @@ -58,6 +58,8 @@ class R600PacketizerList : public VLIWPacketizerList { private: const R600InstrInfo *TII; const R600RegisterInfo &TRI; + bool VLIW5; + bool ConsideredInstUsesAlreadyWrittenVectorElement; unsigned getSlot(const MachineInstr *MI) const { return TRI.getHWRegChan(MI->getOperand(0).getReg()); @@ -74,7 +76,13 @@ private: MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); if (I->isBundle()) BI++; + int LastDstChan = -1; do { + bool isTrans = false; + int BISlot = getSlot(BI); + if (LastDstChan >= BISlot) + isTrans = true; + LastDstChan = BISlot; if (TII->isPredicated(BI)) continue; int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write); @@ -85,7 +93,7 @@ private: continue; } unsigned Dst = BI->getOperand(DstIdx).getReg(); - if (TII->isTransOnly(BI)) { + if (isTrans || TII->isTransOnly(BI)) { Result[Dst] = AMDGPU::PS; continue; } @@ -142,10 +150,14 @@ public: MachineDominatorTree &MDT) : VLIWPacketizerList(MF, MLI, MDT, true), TII (static_cast(MF.getTarget().getInstrInfo())), - TRI(TII->getRegisterInfo()) { } + TRI(TII->getRegisterInfo()) { + VLIW5 = !MF.getTarget().getSubtarget().hasCaymanISA(); + } // initPacketizerState - initialize some internal flags. - void initPacketizerState() { } + void initPacketizerState() { + ConsideredInstUsesAlreadyWrittenVectorElement = false; + } // ignorePseudoInstruction - Ignore bundling of pseudo instructions. bool ignorePseudoInstruction(MachineInstr *MI, MachineBasicBlock *MBB) { @@ -172,8 +184,8 @@ public: // together. bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr(); - if (getSlot(MII) <= getSlot(MIJ) && !TII->isTransOnly(MII)) - return false; + if (getSlot(MII) == getSlot(MIJ)) + ConsideredInstUsesAlreadyWrittenVectorElement = true; // Does MII and MIJ share the same pred_sel ? int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel), OpJ = TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel); @@ -211,6 +223,20 @@ public: std::vector &BS, bool &isTransSlot) { isTransSlot = TII->isTransOnly(MI); + assert (!isTransSlot || VLIW5); + + // Is the dst reg sequence legal ? + if (!isTransSlot && !CurrentPacketMIs.empty()) { + if (getSlot(MI) <= getSlot(CurrentPacketMIs.back())) { + if (ConsideredInstUsesAlreadyWrittenVectorElement && + !TII->isVectorOnly(MI) && VLIW5) { + isTransSlot = true; + DEBUG(dbgs() << "Considering as Trans Inst :"; MI->dump();); + } + else + return false; + } + } // Are the Constants limitations met ? CurrentPacketMIs.push_back(MI); @@ -278,6 +304,8 @@ public: return It; } endPacket(MI->getParent(), MI); + if (TII->isTransOnly(MI)) + return MI; return VLIWPacketizerList::addToPacket(MI); } }; -- cgit v1.1 From f94eea9e112a75d0d328f799dd889681094cee97 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Wed, 4 Sep 2013 19:53:54 +0000 Subject: R600: Use shared op optimization when checking cycle compatibility git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189981 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600InstrInfo.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index e3cb71b..0e7cfb4 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -336,6 +336,8 @@ R600InstrInfo::ExtractSrcs(MachineInstr *MI, static std::vector > Swizzle(std::vector > Src, R600InstrInfo::BankSwizzle Swz) { + if (Src[0] == Src[1]) + Src[1].first = -1; switch (Swz) { case R600InstrInfo::ALU_VEC_012_SCL_210: break; -- cgit v1.1 From 1b41835f02f77c04a93323f722cf158cc566acae Mon Sep 17 00:00:00 2001 From: Venkatraman Govindaraju Date: Thu, 5 Sep 2013 05:32:16 +0000 Subject: [Sparc] Correctly handle call to functions with ReturnsTwice attribute. In sparc, setjmp stores only the registers %fp, %sp, %i7 and %o7. longjmp restores the stack, and the callee-saved registers (all local/in registers: %i0-%i7, %l0-%l7) using the stored %fp and register windows. However, this does not guarantee that the longjmp will restore the registers, as they were when the setjmp was called. This is because these registers may be clobbered after returning from setjmp, but before calling longjmp. This patch prevents the registers %i0-%i5, %l0-l7 to live across the setjmp call using the register mask. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190033 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Sparc/SparcCallingConv.td | 5 +++++ lib/Target/Sparc/SparcISelLowering.cpp | 38 ++++++++++++++++++++++++++++++---- lib/Target/Sparc/SparcRegisterInfo.cpp | 5 +++++ lib/Target/Sparc/SparcRegisterInfo.h | 2 ++ 4 files changed, 46 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Sparc/SparcCallingConv.td b/lib/Target/Sparc/SparcCallingConv.td index 181165d..acd4ec2 100644 --- a/lib/Target/Sparc/SparcCallingConv.td +++ b/lib/Target/Sparc/SparcCallingConv.td @@ -123,3 +123,8 @@ def CSR : CalleeSavedRegs<(add)> { let OtherPreserved = (add (sequence "I%u", 0, 7), (sequence "L%u", 0, 7)); } + +// Callee-saved registers for calls with ReturnsTwice attribute. +def RTCSR : CalleeSavedRegs<(add)> { + let OtherPreserved = (add I6, I7); +} diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index e646521..641ab6c 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -14,6 +14,7 @@ #include "SparcISelLowering.h" #include "SparcMachineFunctionInfo.h" +#include "SparcRegisterInfo.h" #include "SparcTargetMachine.h" #include "MCTargetDesc/SparcBaseInfo.h" #include "llvm/CodeGen/CallingConvLower.h" @@ -648,6 +649,27 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, return LowerCall_32(CLI, InVals); } +static bool hasReturnsTwiceAttr(SelectionDAG &DAG, SDValue Callee, + ImmutableCallSite *CS) { + if (CS) + return CS->hasFnAttr(Attribute::ReturnsTwice); + + const Function *CalleeFn = 0; + if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + CalleeFn = dyn_cast(G->getGlobal()); + } else if (ExternalSymbolSDNode *E = + dyn_cast(Callee)) { + const Function *Fn = DAG.getMachineFunction().getFunction(); + const Module *M = Fn->getParent(); + const char *CalleeName = E->getSymbol(); + CalleeFn = M->getFunction(CalleeName); + } + + if (!CalleeFn) + return false; + return CalleeFn->hasFnAttribute(Attribute::ReturnsTwice); +} + // Lower a call for the 32-bit ABI. SDValue SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, @@ -861,6 +883,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, } unsigned SRetArgSize = (hasStructRetAttr)? getSRetArgSize(DAG, Callee):0; + bool hasReturnsTwice = hasReturnsTwiceAttr(DAG, Callee, CLI.CS); // If the callee is a GlobalAddress node (quite common, every direct call is) // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. @@ -882,8 +905,11 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, RegsToPass[i].second.getValueType())); // Add a register mask operand representing the call-preserved registers. - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); + const SparcRegisterInfo *TRI = + ((const SparcTargetMachine&)getTargetMachine()).getRegisterInfo(); + const uint32_t *Mask = ((hasReturnsTwice) + ? TRI->getRTCallPreservedMask(CallConv) + : TRI->getCallPreservedMask(CallConv)); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); @@ -1125,6 +1151,7 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI, // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. // Likewise ExternalSymbol -> TargetExternalSymbol. SDValue Callee = CLI.Callee; + bool hasReturnsTwice = hasReturnsTwiceAttr(DAG, Callee, CLI.CS); if (GlobalAddressSDNode *G = dyn_cast(Callee)) Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy()); else if (ExternalSymbolSDNode *E = dyn_cast(Callee)) @@ -1139,8 +1166,11 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI, RegsToPass[i].second.getValueType())); // Add a register mask operand representing the call-preserved registers. - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const uint32_t *Mask = TRI->getCallPreservedMask(CLI.CallConv); + const SparcRegisterInfo *TRI = + ((const SparcTargetMachine&)getTargetMachine()).getRegisterInfo(); + const uint32_t *Mask = ((hasReturnsTwice) + ? TRI->getRTCallPreservedMask(CLI.CallConv) + : TRI->getCallPreservedMask(CLI.CallConv)); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index 09f52fb..8e14dd9 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -48,6 +48,11 @@ SparcRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { return CSR_RegMask; } +const uint32_t* +SparcRegisterInfo::getRTCallPreservedMask(CallingConv::ID CC) const { + return RTCSR_RegMask; +} + BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); // FIXME: G1 reserved for now for large imm generation by frame code. diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h index ae056cd..503229d 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.h +++ b/lib/Target/Sparc/SparcRegisterInfo.h @@ -34,6 +34,8 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo { const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; const uint32_t* getCallPreservedMask(CallingConv::ID CC) const; + const uint32_t* getRTCallPreservedMask(CallingConv::ID CC) const; + BitVector getReservedRegs(const MachineFunction &MF) const; const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF, -- cgit v1.1 From 16277c4698f36a756c540fae326874774156aaed Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 5 Sep 2013 10:36:45 +0000 Subject: [SystemZ] Add NC, OC and XC For now these are just used to handle scalar ANDs, ORs and XORs in which all operands are memory. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190041 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 69 ++++++++++++++++++++++-------- lib/Target/SystemZ/SystemZISelLowering.cpp | 15 +++++++ lib/Target/SystemZ/SystemZISelLowering.h | 8 ++++ lib/Target/SystemZ/SystemZInstrFP.td | 6 +-- lib/Target/SystemZ/SystemZInstrInfo.td | 42 ++++++++++++------ lib/Target/SystemZ/SystemZOperators.td | 37 ++++++++++++++++ lib/Target/SystemZ/SystemZPatterns.td | 39 ++++++++++++----- 7 files changed, 170 insertions(+), 46 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index e0d5437..9d71c39 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -290,8 +290,15 @@ class SystemZDAGToDAGISel : public SelectionDAGISel { SDNode *splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0, uint64_t UpperVal, uint64_t LowerVal); + // N is a (store (load Y), X) pattern. Return true if it can use an MVC + // from Y to X. bool storeLoadCanUseMVC(SDNode *N) const; + // N is a (store (op (load A[0]), (load A[1])), X) pattern. Return true + // if A[1 - I] == X and if N can use a block operation like NC from A[I] + // to X. + bool storeLoadCanUseBlockBinary(SDNode *N, unsigned I) const; + public: SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel) : SelectionDAGISel(TM, OptLevel), @@ -925,34 +932,27 @@ SDNode *SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node, return Or.getNode(); } -// N is a (store (load ...), ...) pattern. Return true if it can use MVC. -bool SystemZDAGToDAGISel::storeLoadCanUseMVC(SDNode *N) const { - StoreSDNode *Store = cast(N); - LoadSDNode *Load = cast(Store->getValue().getNode()); +// Return true if Load and Store: +// - are loads and stores of the same size; +// - do not partially overlap; and +// - can be decomposed into what are logically individual character accesses +// without changing the semantics. +static bool canUseBlockOperation(StoreSDNode *Store, LoadSDNode *Load, + AliasAnalysis *AA) { + // Check that the two memory operands have the same size. + if (Load->getMemoryVT() != Store->getMemoryVT()) + return false; - // MVC is logically a bytewise copy, so can't be used for volatile accesses. + // Volatility stops an access from being decomposed. if (Load->isVolatile() || Store->isVolatile()) return false; - // Prefer not to use MVC if either address can use ... RELATIVE LONG - // instructions. - assert(Load->getMemoryVT() == Store->getMemoryVT() && - "Should already have checked that the types match"); - uint64_t Size = Load->getMemoryVT().getStoreSize(); - if (Size > 1 && Size <= 8) { - // Prefer LHRL, LRL and LGRL. - if (Load->getBasePtr().getOpcode() == SystemZISD::PCREL_WRAPPER) - return false; - // Prefer STHRL, STRL and STGRL. - if (Store->getBasePtr().getOpcode() == SystemZISD::PCREL_WRAPPER) - return false; - } - // There's no chance of overlap if the load is invariant. if (Load->isInvariant()) return true; // If both operands are aligned, they must be equal or not overlap. + uint64_t Size = Load->getMemoryVT().getStoreSize(); if (Load->getAlignment() >= Size && Store->getAlignment() >= Size) return true; @@ -968,6 +968,37 @@ bool SystemZDAGToDAGISel::storeLoadCanUseMVC(SDNode *N) const { AliasAnalysis::Location(V2, End2, Store->getTBAAInfo())); } +bool SystemZDAGToDAGISel::storeLoadCanUseMVC(SDNode *N) const { + StoreSDNode *Store = cast(N); + LoadSDNode *Load = cast(Store->getValue()); + + // Prefer not to use MVC if either address can use ... RELATIVE LONG + // instructions. + uint64_t Size = Load->getMemoryVT().getStoreSize(); + if (Size > 1 && Size <= 8) { + // Prefer LHRL, LRL and LGRL. + if (Load->getBasePtr().getOpcode() == SystemZISD::PCREL_WRAPPER) + return false; + // Prefer STHRL, STRL and STGRL. + if (Store->getBasePtr().getOpcode() == SystemZISD::PCREL_WRAPPER) + return false; + } + + return canUseBlockOperation(Store, Load, AA); +} + +bool SystemZDAGToDAGISel::storeLoadCanUseBlockBinary(SDNode *N, + unsigned I) const { + StoreSDNode *StoreA = cast(N); + LoadSDNode *LoadA = cast(StoreA->getValue().getOperand(1 - I)); + LoadSDNode *LoadB = cast(StoreA->getValue().getOperand(I)); + if (LoadA->isVolatile() || + LoadA->getMemoryVT() != StoreA->getMemoryVT() || + LoadA->getBasePtr() != StoreA->getBasePtr()) + return false; + return canUseBlockOperation(StoreA, LoadB, AA); +} + SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { // Dump information about the Node being selected DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n"); diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 13d5604..180c631 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -2054,6 +2054,12 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(UDIVREM64); OPCODE(MVC); OPCODE(MVC_LOOP); + OPCODE(NC); + OPCODE(NC_LOOP); + OPCODE(OC); + OPCODE(OC_LOOP); + OPCODE(XC); + OPCODE(XC_LOOP); OPCODE(CLC); OPCODE(CLC_LOOP); OPCODE(STRCMP); @@ -3077,6 +3083,15 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { case SystemZ::MVCSequence: case SystemZ::MVCLoop: return emitMemMemWrapper(MI, MBB, SystemZ::MVC); + case SystemZ::NCSequence: + case SystemZ::NCLoop: + return emitMemMemWrapper(MI, MBB, SystemZ::NC); + case SystemZ::OCSequence: + case SystemZ::OCLoop: + return emitMemMemWrapper(MI, MBB, SystemZ::OC); + case SystemZ::XCSequence: + case SystemZ::XCLoop: + return emitMemMemWrapper(MI, MBB, SystemZ::XC); case SystemZ::CLCSequence: case SystemZ::CLCLoop: return emitMemMemWrapper(MI, MBB, SystemZ::CLC); diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index ec5051f..8b2d19a 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -93,6 +93,14 @@ namespace SystemZISD { // The value of X is passed as an additional operand. MVC_LOOP, + // Similar to MVC and MVC_LOOP, but for logic operations (AND, OR, XOR). + NC, + NC_LOOP, + OC, + OC_LOOP, + XC, + XC_LOOP, + // Use CLC to compare two blocks of memory, with the same comments // as for MVC and MVC_LOOP. CLC, diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td index 8d0dcb6..24adaee 100644 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -86,9 +86,9 @@ def : CopySign128; -defm LoadStoreF32 : MVCLoadStore; -defm LoadStoreF64 : MVCLoadStore; -defm LoadStoreF128 : MVCLoadStore; +defm LoadStoreF32 : MVCLoadStore; +defm LoadStoreF64 : MVCLoadStore; +defm LoadStoreF128 : MVCLoadStore; //===----------------------------------------------------------------------===// // Load instructions diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 7793818..abe28a5 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -350,20 +350,6 @@ let mayLoad = 1, mayStore = 1 in let mayLoad = 1, mayStore = 1, Defs = [CC], Uses = [R0W] in defm MVST : StringRRE<"mvst", 0xB255, z_stpcpy>; -defm LoadStore8_32 : MVCLoadStore; -defm LoadStore16_32 : MVCLoadStore; -defm LoadStore32_32 : MVCLoadStore; - -defm LoadStore8 : MVCLoadStore; -defm LoadStore16 : MVCLoadStore; -defm LoadStore32 : MVCLoadStore; -defm LoadStore64 : MVCLoadStore; - //===----------------------------------------------------------------------===// // Sign extensions //===----------------------------------------------------------------------===// @@ -770,6 +756,10 @@ let Defs = [CC] in { // AND to memory defm NI : BinarySIPair<"ni", 0x94, 0xEB54, null_frag, uimm8>; + + // Block AND. + let mayLoad = 1, mayStore = 1 in + defm NC : MemorySS<"nc", 0xD4, z_nc, z_nc_loop>; } defm : RMWIByte; defm : RMWIByte; @@ -812,6 +802,10 @@ let Defs = [CC] in { // OR to memory defm OI : BinarySIPair<"oi", 0x96, 0xEB56, null_frag, uimm8>; + + // Block OR. + let mayLoad = 1, mayStore = 1 in + defm OC : MemorySS<"oc", 0xD6, z_oc, z_oc_loop>; } defm : RMWIByte; defm : RMWIByte; @@ -843,6 +837,10 @@ let Defs = [CC] in { // XOR to memory defm XI : BinarySIPair<"xi", 0x97, 0xEB57, null_frag, uimm8>; + + // Block XOR. + let mayLoad = 1, mayStore = 1 in + defm XC : MemorySS<"xc", 0xD7, z_xc, z_xc_loop>; } defm : RMWIByte; defm : RMWIByte; @@ -1246,3 +1244,19 @@ def : Pat<(sra (shl (i64 (anyext (i32 (z_select_ccmask 1, 0, uimm8zx4:$valid, (i32 63)), (i32 63)), (Select64 (LGHI -1), (LGHI 0), uimm8zx4:$valid, uimm8zx4:$cc)>; + +// Peepholes for turning scalar operations into block operations. +defm : BlockLoadStore; +defm : BlockLoadStore; +defm : BlockLoadStore; +defm : BlockLoadStore; +defm : BlockLoadStore; +defm : BlockLoadStore; +defm : BlockLoadStore; diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index 822195c..c89e315 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -131,6 +131,18 @@ def z_mvc : SDNode<"SystemZISD::MVC", SDT_ZMemMemLength, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; def z_mvc_loop : SDNode<"SystemZISD::MVC_LOOP", SDT_ZMemMemLoop, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_nc : SDNode<"SystemZISD::NC", SDT_ZMemMemLength, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_nc_loop : SDNode<"SystemZISD::NC_LOOP", SDT_ZMemMemLoop, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_oc : SDNode<"SystemZISD::OC", SDT_ZMemMemLength, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_oc_loop : SDNode<"SystemZISD::OC_LOOP", SDT_ZMemMemLoop, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_xc : SDNode<"SystemZISD::XC", SDT_ZMemMemLength, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def z_xc_loop : SDNode<"SystemZISD::XC_LOOP", SDT_ZMemMemLoop, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; def z_clc : SDNode<"SystemZISD::CLC", SDT_ZMemMemLength, [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>; def z_clc_loop : SDNode<"SystemZISD::CLC_LOOP", SDT_ZMemMemLoop, @@ -224,6 +236,31 @@ def nonvolatile_truncstorei8 : NonvolatileStore; def nonvolatile_truncstorei16 : NonvolatileStore; def nonvolatile_truncstorei32 : NonvolatileStore; +// A store of a load that can be implemented using MVC. +def mvc_store : PatFrag<(ops node:$value, node:$addr), + (unindexedstore node:$value, node:$addr), + [{ return storeLoadCanUseMVC(N); }]>; + +// Binary read-modify-write operations on memory in which the other +// operand is also memory and for which block operations like NC can +// be used. There are two patterns for each operator, depending on +// which operand contains the "other" load. +multiclass block_op { + def "1" : PatFrag<(ops node:$value, node:$addr), + (unindexedstore (operator node:$value, + (unindexedload node:$addr)), + node:$addr), + [{ return storeLoadCanUseBlockBinary(N, 0); }]>; + def "2" : PatFrag<(ops node:$value, node:$addr), + (unindexedstore (operator (unindexedload node:$addr), + node:$value), + node:$addr), + [{ return storeLoadCanUseBlockBinary(N, 1); }]>; +} +defm block_and : block_op; +defm block_or : block_op; +defm block_xor : block_op; + // Insertions. def inserti8 : PatFrag<(ops node:$src1, node:$src2), (or (and node:$src1, -256), node:$src2)>; diff --git a/lib/Target/SystemZ/SystemZPatterns.td b/lib/Target/SystemZ/SystemZPatterns.td index c442ae0..bc3775f 100644 --- a/lib/Target/SystemZ/SystemZPatterns.td +++ b/lib/Target/SystemZ/SystemZPatterns.td @@ -66,20 +66,39 @@ multiclass InsertMem; } -// Use MVC instruction INSN for a load of type LOAD followed by a store -// of type STORE. VT is the type of the intermediate register and LENGTH -// is the number of bytes to copy (which may be smaller than VT). -multiclass MVCLoadStore length> { - def Pat : PatFrag<(ops node:$dest, node:$src), - (store (vt (load node:$src)), node:$dest), - [{ return storeLoadCanUseMVC(N); }]>; +// Try to use MVC instruction INSN for a load of type LOAD followed by a store +// of the same size. VT is the type of the intermediate (legalized) value and +// LENGTH is the number of bytes loaded by LOAD. +multiclass MVCLoadStore length> { + def : Pat<(mvc_store (vt (load bdaddr12only:$src)), bdaddr12only:$dest), + (insn bdaddr12only:$dest, bdaddr12only:$src, length)>; +} - def : Pat<(!cast(NAME##"Pat") bdaddr12only:$dest, - bdaddr12only:$src), +// Use NC-like instruction INSN for block_op operation OPERATOR. +// The other operand is a load of type LOAD, which accesses LENGTH bytes. +// VT is the intermediate legalized type in which the binary operation +// is actually done. +multiclass BinaryLoadStore length> { + def : Pat<(operator (vt (load bdaddr12only:$src)), bdaddr12only:$dest), (insn bdaddr12only:$dest, bdaddr12only:$src, length)>; } +// A convenient way of generating all block peepholes for a particular +// LOAD/VT/LENGTH combination. +multiclass BlockLoadStore length> { + defm : MVCLoadStore; + defm : BinaryLoadStore; + defm : BinaryLoadStore; + defm : BinaryLoadStore; + defm : BinaryLoadStore; + defm : BinaryLoadStore; + defm : BinaryLoadStore; +} + // Record that INSN is a LOAD AND TEST that can be used to compare // registers in CLS against zero. The instruction has separate R1 and R2 // operands, but they must be the same when the instruction is used like this. -- cgit v1.1 From 10b5086e6e945b830ff909821240eff5c4a42bfc Mon Sep 17 00:00:00 2001 From: Tilmann Scheller Date: Thu, 5 Sep 2013 11:10:31 +0000 Subject: ARM: Add GPR register class excluding LR for use with the ADR instruction. This improves code generation for jump tables by avoiding the emission of "mov pc, lr" which could fool the processor into believing this is a return from a function causing mispredicts. The code generation logic for jump tables uses ADR to materialize the address of the jump target. Patch by Daniel Stewart! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190043 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrThumb2.td | 2 +- lib/Target/ARM/ARMRegisterInfo.td | 8 ++++++++ lib/Target/ARM/Thumb2InstrInfo.cpp | 16 ++++++++++++++-- 3 files changed, 23 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 5d0c484..2f3fa63 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1233,7 +1233,7 @@ let neverHasSideEffects = 1, isReMaterializable = 1 in def t2LEApcrel : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p), 4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>; let hasSideEffects = 1 in -def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd), +def t2LEApcrelJT : t2PseudoInst<(outs jtGPR:$Rd), (ins i32imm:$label, nohash_imm:$id, pred:$p), 4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>; diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 90c6a96..51ecaf7 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -240,6 +240,14 @@ def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> { }]; } +// jtGPR - Jump Table General Purpose Registers. +// Used by the Thumb2 instructions to prevent Thumb2 jump tables +// from using the LR. The implementation of the jump table uses a mov pc, rA +// type instruction to jump into the table. Use of the LR register (as in +// mov pc, lr) can cause the ARM branch predictor to think it is returning +// from a function instead. This causes a mispredict and a pipe flush. +def jtGPR : RegisterClass<"ARM", [i32], 32, (sub rGPR, LR)>; + // Thumb registers are R0-R7 normally. Some instructions can still use // the general GPR register class above (MOV, e.g.) def tGPR : RegisterClass<"ARM", [i32], 32, (trunc GPR, 8)>; diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 286eaa0..7c51c70 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -152,7 +152,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // gsub_0, but needs an extra constraint for gsub_1 (which could be sp // otherwise). MachineRegisterInfo *MRI = &MF.getRegInfo(); - MRI->constrainRegClass(SrcReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass); + const TargetRegisterClass* TargetClass = TRI->getMatchingSuperRegClass(RC, + &ARM::rGPRRegClass, + ARM::gsub_1); + assert(TargetClass && "No Matching GPRPair with gsub_1 in rGPRRegClass"); + const TargetRegisterClass* ConstrainedClass = + MRI->constrainRegClass(SrcReg, TargetClass); + assert(ConstrainedClass && "Couldn't constrain the register class"); MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8)); AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); @@ -193,7 +199,13 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // gsub_0, but needs an extra constraint for gsub_1 (which could be sp // otherwise). MachineRegisterInfo *MRI = &MF.getRegInfo(); - MRI->constrainRegClass(DestReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass); + const TargetRegisterClass* TargetClass = TRI->getMatchingSuperRegClass(RC, + &ARM::rGPRRegClass, + ARM::gsub_1); + assert(TargetClass && "No Matching GPRPair with gsub_1 in rGPRRegClass"); + const TargetRegisterClass* ConstrainedClass = + MRI->constrainRegClass(DestReg, TargetClass); + assert(ConstrainedClass && "Couldn't constrain the register class"); MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8)); AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); -- cgit v1.1 From 8f3d54d057007552d0abc37c87a50ef34a7ab9ef Mon Sep 17 00:00:00 2001 From: Tilmann Scheller Date: Thu, 5 Sep 2013 11:59:43 +0000 Subject: Reverting 190043 for now. Solution is not sufficient to prevent 'mov pc, lr' being emitted for jump table code. Test case doesn't trigger the added functionality. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190047 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrThumb2.td | 2 +- lib/Target/ARM/ARMRegisterInfo.td | 8 -------- lib/Target/ARM/Thumb2InstrInfo.cpp | 16 ++-------------- 3 files changed, 3 insertions(+), 23 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 2f3fa63..5d0c484 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1233,7 +1233,7 @@ let neverHasSideEffects = 1, isReMaterializable = 1 in def t2LEApcrel : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p), 4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>; let hasSideEffects = 1 in -def t2LEApcrelJT : t2PseudoInst<(outs jtGPR:$Rd), +def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, nohash_imm:$id, pred:$p), 4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>; diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 51ecaf7..90c6a96 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -240,14 +240,6 @@ def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> { }]; } -// jtGPR - Jump Table General Purpose Registers. -// Used by the Thumb2 instructions to prevent Thumb2 jump tables -// from using the LR. The implementation of the jump table uses a mov pc, rA -// type instruction to jump into the table. Use of the LR register (as in -// mov pc, lr) can cause the ARM branch predictor to think it is returning -// from a function instead. This causes a mispredict and a pipe flush. -def jtGPR : RegisterClass<"ARM", [i32], 32, (sub rGPR, LR)>; - // Thumb registers are R0-R7 normally. Some instructions can still use // the general GPR register class above (MOV, e.g.) def tGPR : RegisterClass<"ARM", [i32], 32, (trunc GPR, 8)>; diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 7c51c70..286eaa0 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -152,13 +152,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // gsub_0, but needs an extra constraint for gsub_1 (which could be sp // otherwise). MachineRegisterInfo *MRI = &MF.getRegInfo(); - const TargetRegisterClass* TargetClass = TRI->getMatchingSuperRegClass(RC, - &ARM::rGPRRegClass, - ARM::gsub_1); - assert(TargetClass && "No Matching GPRPair with gsub_1 in rGPRRegClass"); - const TargetRegisterClass* ConstrainedClass = - MRI->constrainRegClass(SrcReg, TargetClass); - assert(ConstrainedClass && "Couldn't constrain the register class"); + MRI->constrainRegClass(SrcReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass); MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8)); AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); @@ -199,13 +193,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // gsub_0, but needs an extra constraint for gsub_1 (which could be sp // otherwise). MachineRegisterInfo *MRI = &MF.getRegInfo(); - const TargetRegisterClass* TargetClass = TRI->getMatchingSuperRegClass(RC, - &ARM::rGPRRegClass, - ARM::gsub_1); - assert(TargetClass && "No Matching GPRPair with gsub_1 in rGPRRegClass"); - const TargetRegisterClass* ConstrainedClass = - MRI->constrainRegClass(DestReg, TargetClass); - assert(ConstrainedClass && "Couldn't constrain the register class"); + MRI->constrainRegClass(DestReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass); MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8)); AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); -- cgit v1.1 From b5523ce1bb50e86942ad5273e3a89872c4d26b73 Mon Sep 17 00:00:00 2001 From: Richard Barton Date: Thu, 5 Sep 2013 14:14:19 +0000 Subject: Add AArch32 DCPS{1,2,3} and HLT instructions. These were pretty straightforward instructions, with some assembly support required for HLT. The ARM assembler is keen to split the instruction mnemonic into a (non-existent) 'H' instruction with the LT condition code. An exception for HLT is needed. HLT follows the same rules as BKPT when in IT blocks, so the special BKPT hadling code has been adapted to handle HLT also. Regression tests added including diagnostic tests for out of range immediates and illegal condition codes, as well as negative tests for pre-ARMv8. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190053 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrInfo.td | 10 ++++++++++ lib/Target/ARM/ARMInstrThumb.td | 7 +++++++ lib/Target/ARM/ARMInstrThumb2.td | 14 ++++++++++++++ lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 23 ++++++++++++++++------- 4 files changed, 47 insertions(+), 7 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 08b3ef2..5ebe97d 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -1758,6 +1758,16 @@ def BKPT : AInoP<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary, let Inst{7-4} = 0b0111; } +def HLT : AInoP<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary, + "hlt", "\t$val", []>, Requires<[IsARM, HasV8]> { + bits<16> val; + let Inst{3-0} = val{3-0}; + let Inst{19-8} = val{15-4}; + let Inst{27-20} = 0b00010000; + let Inst{31-28} = 0xe; // AL + let Inst{7-4} = 0b0111; +} + // Change Processor State // FIXME: We should use InstAlias to handle the optional operands. class CPS diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 291b98a..dd5f2cf 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -300,6 +300,13 @@ def tBKPT : T1I<(outs), (ins imm0_255:$val), NoItinerary, "bkpt\t$val", let Inst{7-0} = val; } +def tHLT : T1I<(outs), (ins imm0_63:$val), NoItinerary, "hlt\t$val", + []>, T1Encoding<0b101110>, Requires<[IsThumb, HasV8]> { + let Inst{9-6} = 0b1010; + bits<6> val; + let Inst{5-0} = val; +} + def tSETEND : T1I<(outs), (ins setend_op:$end), NoItinerary, "setend\t$end", []>, T1Encoding<0b101101> { bits<1> end; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 5d0c484..5ba14cf 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3645,6 +3645,20 @@ def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", let Inst{19-16} = opt; } +class T2DCPS opt, string opc> + : T2I<(outs), (ins), NoItinerary, opc, "", []>, Requires<[IsThumb2, HasV8]> { + let Inst{31-27} = 0b11110; + let Inst{26-20} = 0b1111000; + let Inst{19-16} = 0b1111; + let Inst{15-12} = 0b1000; + let Inst{11-2} = 0b0000000000; + let Inst{1-0} = opt; +} + +def t2DCPS1 : T2DCPS<0b01, "dcps1">; +def t2DCPS2 : T2DCPS<0b10, "dcps2">; +def t2DCPS3 : T2DCPS<0b11, "dcps3">; + class T2SRS Op, bit W, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> : T2I { diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index b0037f0..9344810 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -4687,7 +4687,7 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "mls" || Mnemonic == "smmls" || Mnemonic == "vcls" || Mnemonic == "vmls" || Mnemonic == "vnmls" || Mnemonic == "vacge" || Mnemonic == "vcge" || Mnemonic == "vclt" || Mnemonic == "vacgt" || - Mnemonic == "vaclt" || Mnemonic == "vacle" || + Mnemonic == "vaclt" || Mnemonic == "vacle" || Mnemonic == "hlt" || Mnemonic == "vcgt" || Mnemonic == "vcle" || Mnemonic == "smlal" || Mnemonic == "umaal" || Mnemonic == "umlal" || Mnemonic == "vabal" || Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal" || @@ -4793,7 +4793,7 @@ getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, if (Mnemonic == "bkpt" || Mnemonic == "cbnz" || Mnemonic == "setend" || Mnemonic == "cps" || Mnemonic == "it" || Mnemonic == "cbz" || - Mnemonic == "trap" || Mnemonic == "setend" || + Mnemonic == "trap" || Mnemonic == "hlt" || Mnemonic.startswith("cps") || Mnemonic.startswith("vsel") || Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || Mnemonic == "vcvta" || Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" || @@ -5297,6 +5297,16 @@ static const MCInstrDesc &getInstDesc(unsigned Opcode) { return ARMInsts[Opcode]; } +// Return true if instruction has the interesting property of being +// allowed in IT blocks, but not being predicable. +static bool instIsBreakpoint(const MCInst &Inst) { + return Inst.getOpcode() == ARM::tBKPT || + Inst.getOpcode() == ARM::BKPT || + Inst.getOpcode() == ARM::tHLT || + Inst.getOpcode() == ARM::HLT; + +} + // FIXME: We would really like to be able to tablegen'erate this. bool ARMAsmParser:: validateInstruction(MCInst &Inst, @@ -5305,11 +5315,10 @@ validateInstruction(MCInst &Inst, SMLoc Loc = Operands[0]->getStartLoc(); // Check the IT block state first. - // NOTE: BKPT instruction has the interesting property of being - // allowed in IT blocks, but not being predicable. It just always - // executes. - if (inITBlock() && Inst.getOpcode() != ARM::tBKPT && - Inst.getOpcode() != ARM::BKPT) { + // NOTE: BKPT and HLT instructions have the interesting property of being + // allowed in IT blocks, but not being predicable. They just always + // execute. + if (inITBlock() && !instIsBreakpoint(Inst)) { unsigned bit = 1; if (ITState.FirstCond) ITState.FirstCond = false; -- cgit v1.1 From 4897151df698197f0eb5c4085545312dbb20c94d Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Thu, 5 Sep 2013 15:35:24 +0000 Subject: [ARMv8] Implement the new DMB/DSB operands. This removes the custom ISD Node: MEMBARRIER and replaces it with an intrinsic. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190055 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 4 ++-- lib/Target/ARM/ARMISelLowering.h | 1 - lib/Target/ARM/ARMInstrInfo.td | 6 ++---- lib/Target/ARM/ARMInstrThumb2.td | 5 +++-- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 11 ++++++++++- lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp | 2 +- lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h | 18 +++++++++--------- 7 files changed, 27 insertions(+), 20 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index df9c78a..94270ed 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1010,7 +1010,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; - case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER"; case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR"; case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; @@ -2610,7 +2609,8 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, Domain = ARM_MB::ISHST; } - return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), + return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0), + DAG.getConstant(Intrinsic::arm_dmb, MVT::i32), DAG.getConstant(Domain, MVT::i32)); } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index be7811f..fca9e0e 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -94,7 +94,6 @@ namespace llvm { DYN_ALLOC, // Dynamic allocation on the stack. - MEMBARRIER, // Memory barrier (DMB) MEMBARRIER_MCR, // Memory barrier (MCR) PRELOAD, // Preload diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 5ebe97d..c9bad1c 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -165,8 +165,6 @@ def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP", SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain, SDNPSideEffect]>; -def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER, - [SDNPHasChain, SDNPSideEffect]>; def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER, [SDNPHasChain, SDNPSideEffect]>; def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH, @@ -4265,7 +4263,7 @@ def instsyncb_opt : Operand { // memory barriers protect the atomic sequences let hasSideEffects = 1 in { def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, - "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>, + "dmb", "\t$opt", [(int_arm_dmb (i32 imm0_15:$opt))]>, Requires<[IsARM, HasDB]> { bits<4> opt; let Inst{31-4} = 0xf57ff05; @@ -4274,7 +4272,7 @@ def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, } def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, - "dsb", "\t$opt", []>, + "dsb", "\t$opt", [(int_arm_dsb (i32 imm0_15:$opt))]>, Requires<[IsARM, HasDB]> { bits<4> opt; let Inst{31-4} = 0xf57ff04; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 5ba14cf..83edf16 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3155,7 +3155,7 @@ def t2MOVCCi32imm // memory barriers protect the atomic sequences let hasSideEffects = 1 in { def t2DMB : T2I<(outs), (ins memb_opt:$opt), NoItinerary, - "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>, + "dmb", "\t$opt", [(int_arm_dmb (i32 imm0_15:$opt))]>, Requires<[HasDB]> { bits<4> opt; let Inst{31-4} = 0xf3bf8f5; @@ -3164,7 +3164,8 @@ def t2DMB : T2I<(outs), (ins memb_opt:$opt), NoItinerary, } def t2DSB : T2I<(outs), (ins memb_opt:$opt), NoItinerary, - "dsb", "\t$opt", []>, Requires<[HasDB]> { + "dsb", "\t$opt", [(int_arm_dsb (i32 imm0_15:$opt))]>, + Requires<[HasDB]> { bits<4> opt; let Inst{31-4} = 0xf3bf8f4; let Inst{3-0} = opt; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 9344810..cfa24f9 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -2521,7 +2521,7 @@ void ARMOperand::print(raw_ostream &OS) const { getImm()->print(OS); break; case k_MemBarrierOpt: - OS << ""; + OS << ""; break; case k_InstSyncBarrierOpt: OS << ""; @@ -3466,18 +3466,27 @@ parseMemBarrierOptOperand(SmallVectorImpl &Operands) { Opt = StringSwitch(OptStr.slice(0, OptStr.size()).lower()) .Case("sy", ARM_MB::SY) .Case("st", ARM_MB::ST) + .Case("ld", ARM_MB::LD) .Case("sh", ARM_MB::ISH) .Case("ish", ARM_MB::ISH) .Case("shst", ARM_MB::ISHST) .Case("ishst", ARM_MB::ISHST) + .Case("ishld", ARM_MB::ISHLD) .Case("nsh", ARM_MB::NSH) .Case("un", ARM_MB::NSH) .Case("nshst", ARM_MB::NSHST) + .Case("nshld", ARM_MB::NSHLD) .Case("unst", ARM_MB::NSHST) .Case("osh", ARM_MB::OSH) .Case("oshst", ARM_MB::OSHST) + .Case("oshld", ARM_MB::OSHLD) .Default(~0U); + // ishld, oshld, nshld and ld are only available from ARMv8. + if (!hasV8Ops() && (Opt == ARM_MB::ISHLD || Opt == ARM_MB::OSHLD || + Opt == ARM_MB::NSHLD || Opt == ARM_MB::LD)) + Opt = ~0U; + if (Opt == ~0U) return MatchOperand_NoMatch; diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 8b99c17..82c1294 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -677,7 +677,7 @@ void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI, void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O) { unsigned val = MI->getOperand(OpNum).getImm(); - O << ARM_MB::MemBOptToString(val); + O << ARM_MB::MemBOptToString(val, (getAvailableFeatures() & ARM::HasV8Ops)); } void ARMInstPrinter::printInstSyncBOption(const MCInst *MI, unsigned OpNum, diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index ff9917d..af939fc 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -121,41 +121,41 @@ namespace ARM_MB { // the option field for memory barrier operations. enum MemBOpt { RESERVED_0 = 0, - RESERVED_1 = 1, + OSHLD = 1, OSHST = 2, OSH = 3, RESERVED_4 = 4, - RESERVED_5 = 5, + NSHLD = 5, NSHST = 6, NSH = 7, RESERVED_8 = 8, - RESERVED_9 = 9, + ISHLD = 9, ISHST = 10, ISH = 11, RESERVED_12 = 12, - RESERVED_13 = 13, + LD = 13, ST = 14, SY = 15 }; - inline static const char *MemBOptToString(unsigned val) { + inline static const char *MemBOptToString(unsigned val, bool HasV8) { switch (val) { default: llvm_unreachable("Unknown memory operation"); case SY: return "sy"; case ST: return "st"; - case RESERVED_13: return "#0xd"; + case LD: return HasV8 ? "ld" : "#0xd"; case RESERVED_12: return "#0xc"; case ISH: return "ish"; case ISHST: return "ishst"; - case RESERVED_9: return "#0x9"; + case ISHLD: return HasV8 ? "ishld" : "#0x9"; case RESERVED_8: return "#0x8"; case NSH: return "nsh"; case NSHST: return "nshst"; - case RESERVED_5: return "#0x5"; + case NSHLD: return HasV8 ? "nshld" : "#0x5"; case RESERVED_4: return "#0x4"; case OSH: return "osh"; case OSHST: return "oshst"; - case RESERVED_1: return "#0x1"; + case OSHLD: return HasV8 ? "oshld" : "#0x1"; case RESERVED_0: return "#0x0"; } } -- cgit v1.1 From 791cdd5fe08162f982e013057b587c9a8ff0bd03 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 5 Sep 2013 18:37:45 +0000 Subject: R600: Fix segfault in R600TextureIntrinsicReplacer This pass was segfaulting when it ran into a non-intrinsic function call. Function calls are not supported, so now instead of segfaulting, we will get an assertion failure with a nice error message. I'm not sure how to test this using lit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190076 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600TextureIntrinsicsReplacer.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp index 37d9059..d4b8ec0 100644 --- a/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp +++ b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp @@ -260,6 +260,9 @@ public: } void visitCallInst(CallInst &I) { + if (!I.getCalledFunction()) { + return; + } StringRef Name = I.getCalledFunction()->getName(); if (Name == "llvm.AMDGPU.tex") { ReplaceTexIntrinsic(I, false, TexSign, "llvm.R600.tex", "llvm.R600.texc"); -- cgit v1.1 From 402b8e2175dc926eef664ec03bd61c7922a50447 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 5 Sep 2013 18:37:52 +0000 Subject: R600/SI: Don't emit S_WQM_B64 instruction for compute shaders git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190077 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SILowerControlFlow.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp index c2e8f02..09cf25b 100644 --- a/lib/Target/R600/SILowerControlFlow.cpp +++ b/lib/Target/R600/SILowerControlFlow.cpp @@ -409,6 +409,7 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) { bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { TII = MF.getTarget().getInstrInfo(); TRI = MF.getTarget().getRegisterInfo(); + SIMachineFunctionInfo *MFI = MF.getInfo(); bool HaveKill = false; bool NeedM0 = false; @@ -508,7 +509,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { AMDGPU::M0).addImm(0xffffffff); } - if (NeedWQM) { + if (NeedWQM && MFI->ShaderType != ShaderType::COMPUTE) { MachineBasicBlock &MBB = MF.front(); BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64), AMDGPU::EXEC).addReg(AMDGPU::EXEC); -- cgit v1.1 From 470c451574609adcaab9b279cc74c6ff0f91b00f Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 5 Sep 2013 18:37:57 +0000 Subject: R600: Fix incorrect LDS size calculation GlobalAdderss nodes that appeared in more than one basic block were being counted twice. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190078 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 14 ++++++++++---- lib/Target/R600/AMDGPUMachineFunction.h | 4 ++++ 2 files changed, 14 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 1237323..d6b7cbe 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -246,12 +246,18 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, assert(G->getOffset() == 0 && "Do not know what to do with an non-zero offset"); - unsigned Offset = MFI->LDSSize; const GlobalValue *GV = G->getGlobal(); - uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType()); - // XXX: Account for alignment? - MFI->LDSSize += Size; + unsigned Offset; + if (MFI->LocalMemoryObjects.count(GV) == 0) { + uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType()); + Offset = MFI->LDSSize; + MFI->LocalMemoryObjects[GV] = Offset; + // XXX: Account for alignment? + MFI->LDSSize += Size; + } else { + Offset = MFI->LocalMemoryObjects[GV]; + } return DAG.getConstant(Offset, getPointerTy(G->getAddressSpace())); } diff --git a/lib/Target/R600/AMDGPUMachineFunction.h b/lib/Target/R600/AMDGPUMachineFunction.h index 789b96a..fe80ce3 100644 --- a/lib/Target/R600/AMDGPUMachineFunction.h +++ b/lib/Target/R600/AMDGPUMachineFunction.h @@ -14,6 +14,7 @@ #define AMDGPUMACHINEFUNCTION_H #include "llvm/CodeGen/MachineFunction.h" +#include namespace llvm { @@ -21,6 +22,9 @@ class AMDGPUMachineFunction : public MachineFunctionInfo { public: AMDGPUMachineFunction(const MachineFunction &MF); unsigned ShaderType; + /// A map to keep track of local memory objects and their offsets within + /// the local memory space. + std::map LocalMemoryObjects; /// Number of bytes in the LDS that are being used. unsigned LDSSize; }; -- cgit v1.1 From 756f382ac116d1d935fe5c01f2c07c19c0aac77a Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 5 Sep 2013 18:38:03 +0000 Subject: R600: Expand SELECT nodes rather than custom lowering them git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190079 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600ISelLowering.cpp | 20 ++++++-------------- lib/Target/R600/R600ISelLowering.h | 1 - 2 files changed, 6 insertions(+), 15 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index f0242b8..450e2a8 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -60,8 +60,12 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::SETCC, MVT::f32, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom); - setOperationAction(ISD::SELECT, MVT::i32, Custom); - setOperationAction(ISD::SELECT, MVT::f32, Custom); + setOperationAction(ISD::SELECT, MVT::i32, Expand); + setOperationAction(ISD::SELECT, MVT::f32, Expand); + setOperationAction(ISD::SELECT, MVT::v2i32, Expand); + setOperationAction(ISD::SELECT, MVT::v2f32, Expand); + setOperationAction(ISD::SELECT, MVT::v4i32, Expand); + setOperationAction(ISD::SELECT, MVT::v4f32, Expand); // Legalize loads and stores to the private address space. setOperationAction(ISD::LOAD, MVT::i32, Custom); @@ -480,7 +484,6 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::FCOS: case ISD::FSIN: return LowerTrig(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); - case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::LOAD: return LowerLOAD(Op, DAG); case ISD::FrameIndex: return LowerFrameIndex(Op, DAG); @@ -930,17 +933,6 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const DAG.getCondCode(ISD::SETNE)); } -SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { - return DAG.getNode(ISD::SELECT_CC, - SDLoc(Op), - Op.getValueType(), - Op.getOperand(0), - DAG.getConstant(0, MVT::i32), - Op.getOperand(1), - Op.getOperand(2), - DAG.getCondCode(ISD::SETNE)); -} - /// LLVM generates byte-addresed pointers. For indirect addressing, we need to /// convert these pointers to a register index. Each register holds /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h index a033fcb..811850d 100644 --- a/lib/Target/R600/R600ISelLowering.h +++ b/lib/Target/R600/R600ISelLowering.h @@ -56,7 +56,6 @@ private: SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; -- cgit v1.1 From 79916948e1fd176a3898b596b679cc9dba3d40a8 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 5 Sep 2013 18:38:09 +0000 Subject: R600: Add support for local memory atomic add git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190080 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUInstructions.td | 5 ++++ lib/Target/R600/R600ISelLowering.cpp | 22 ++++++++++++------ lib/Target/R600/R600InstrInfo.h | 6 +++++ lib/Target/R600/R600Instructions.td | 42 ++++++++++++++++++++++++++++------ lib/Target/R600/SIInstrInfo.td | 12 ++++++++++ lib/Target/R600/SIInstructions.td | 4 ++++ lib/Target/R600/SILowerControlFlow.cpp | 1 + 7 files changed, 78 insertions(+), 14 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index dec6082..6745fed 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -191,6 +191,11 @@ def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return isLocalLoad(dyn_cast(N)); }]>; +def atomic_load_add_local : PatFrag<(ops node:$ptr, node:$value), + (atomic_load_add node:$ptr, node:$value), [{ + return dyn_cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; +}]>; + def mskor_global : PatFrag<(ops node:$val, node:$ptr), (AMDGPUstore_mskor node:$val, node:$ptr), [{ return dyn_cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 450e2a8..ff9ba52 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -109,16 +109,24 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( switch (MI->getOpcode()) { default: - if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::LDS_1A) { - MachineInstrBuilder NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), - TII->get(MI->getOpcode()), - AMDGPU::OQAP); + if (TII->isLDSInstr(MI->getOpcode()) && + TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst) != -1) { + int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst); + assert(DstIdx != -1); + MachineInstrBuilder NewMI; + if (!MRI.use_empty(MI->getOperand(DstIdx).getReg())) { + NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()), + AMDGPU::OQAP); + TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV, + MI->getOperand(0).getReg(), + AMDGPU::OQAP); + } else { + NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), + TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode()))); + } for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) { NewMI.addOperand(MI->getOperand(i)); } - TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV, - MI->getOperand(0).getReg(), - AMDGPU::OQAP); } else { return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); } diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h index d083fb8..24cc43d 100644 --- a/lib/Target/R600/R600InstrInfo.h +++ b/lib/Target/R600/R600InstrInfo.h @@ -275,6 +275,12 @@ namespace llvm { void clearFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const; }; +namespace AMDGPU { + +int getLDSNoRetOp(uint16_t Opcode); + +} //End namespace AMDGPU + } // End llvm namespace #endif // R600INSTRINFO_H_ diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 99ffa14..efa4751 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1626,23 +1626,39 @@ class R600_LDS_1A lds_op, string name, list pattern> : R600_LDS < let DisableEncoding = "$dst"; } -class R600_LDS_1A1D lds_op, string name, list pattern> : +class R600_LDS_1A1D lds_op, dag outs, string name, list pattern, + string dst =""> : R600_LDS < - lds_op, - (outs), + lds_op, outs, (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel, R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel, LAST:$last, R600_Pred:$pred_sel, BANK_SWIZZLE:$bank_swizzle), - " "#name#" $last $src0$src0_rel, $src1$src1_rel, $pred_sel", + " "#name#" $last "#dst#"$src0$src0_rel, $src1$src1_rel, $pred_sel", pattern > { + field string BaseOp; + let src2 = 0; let src2_rel = 0; let LDS_1A1D = 1; } +class R600_LDS_1A1D_NORET lds_op, string name, list pattern> : + R600_LDS_1A1D { + let BaseOp = name; +} + +class R600_LDS_1A1D_RET lds_op, string name, list pattern> : + R600_LDS_1A1D { + + let BaseOp = name; + let usesCustomInserter = 1; + let DisableEncoding = "$dst"; + let Defs = [OQAP]; +} + class R600_LDS_1A2D lds_op, string name, list pattern> : R600_LDS < lds_op, @@ -1656,15 +1672,19 @@ class R600_LDS_1A2D lds_op, string name, list pattern> : let LDS_1A2D = 1; } -def LDS_WRITE : R600_LDS_1A1D <0xD, "LDS_WRITE", +def LDS_ADD : R600_LDS_1A1D_NORET <0x0, "LDS_ADD", [] >; +def LDS_WRITE : R600_LDS_1A1D_NORET <0xD, "LDS_WRITE", [(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)] >; -def LDS_BYTE_WRITE : R600_LDS_1A1D<0x12, "LDS_BYTE_WRITE", +def LDS_BYTE_WRITE : R600_LDS_1A1D_NORET<0x12, "LDS_BYTE_WRITE", [(truncstorei8_local i32:$src1, i32:$src0)] >; -def LDS_SHORT_WRITE : R600_LDS_1A1D<0x13, "LDS_SHORT_WRITE", +def LDS_SHORT_WRITE : R600_LDS_1A1D_NORET<0x13, "LDS_SHORT_WRITE", [(truncstorei16_local i32:$src1, i32:$src0)] >; +def LDS_ADD_RET : R600_LDS_1A1D_RET <0x20, "LDS_ADD", + [(set i32:$dst, (atomic_load_add_local i32:$src0, i32:$src1))] +>; def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET", [(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))] >; @@ -2428,3 +2448,11 @@ def : BitConvert ; def : DwordAddrPat ; } // End isR600toCayman Predicate + +def getLDSNoRetOp : InstrMapping { + let FilterClass = "R600_LDS_1A1D"; + let RowFields = ["BaseOp"]; + let ColFields = ["DisableEncoding"]; + let KeyCol = ["$dst"]; + let ValueCols = [[""""]]; +} diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index ecc4718..09d5f01 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -362,6 +362,18 @@ class DS_Store_Helper op, string asm, RegisterClass regClass> : DS < let vdst = 0; } +class DS_1A1D_RET op, string asm, RegisterClass rc> : DS < + op, + (outs rc:$vdst), + (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, i8imm:$offset0, + i8imm:$offset1), + asm#" $gds, $vdst, $addr, $data0, $offset0, $offset1, [M0]", + []> { + let mayStore = 1; + let mayLoad = 1; + let data1 = 0; +} + class MTBUF_Store_Helper op, string asm, RegisterClass regClass> : MTBUF < op, (outs), diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 136f69c..31a5ad2 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -391,6 +391,7 @@ defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">; } // End isCompare = 1 +def DS_ADD_U32_RTN : DS_1A1D_RET <0x20, "DS_ADD_U32_RTN", VReg_32>; def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>; def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>; def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>; @@ -1775,6 +1776,9 @@ def : DSWritePat ; def : DSWritePat ; def : DSWritePat ; +def : Pat <(atomic_load_add_local i32:$ptr, i32:$val), + (DS_ADD_U32_RTN 0, $ptr, $val, 0, 0)>; + /********** ================== **********/ /********** SMRD Patterns **********/ /********** ================== **********/ diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp index 09cf25b..a6c43bb 100644 --- a/lib/Target/R600/SILowerControlFlow.cpp +++ b/lib/Target/R600/SILowerControlFlow.cpp @@ -488,6 +488,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { NeedWQM = true; // Fall through case AMDGPU::DS_WRITE_B32: + case AMDGPU::DS_ADD_U32_RTN: NeedM0 = true; break; -- cgit v1.1 From 3987e532f7e7b326083d3b5145bd29bb69e15410 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 5 Sep 2013 19:41:10 +0000 Subject: R600: Fix i64 to i32 trunc on SI git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190091 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIISelLowering.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index f196059..05b7b5d 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -90,6 +90,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::i64, MVT::i32, Expand); setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); -- cgit v1.1 From d8e2f1757d9ececd7937406596fec8e4ebfb7d46 Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Thu, 5 Sep 2013 20:25:06 +0000 Subject: =?UTF-8?q?Fixed=20a=20crash=20in=20the=20integrated=20assembler?= =?UTF-8?q?=20for=20Mach-O=20when=20a=20symbol=20difference=20expression?= =?UTF-8?q?=20uses=20an=20assembler=20temporary=20symbol=20from=20an=20ass?= =?UTF-8?q?ignment.=20=C2=A0In=20this=20case=20the=20symbol=20does=20not?= =?UTF-8?q?=20have=20a=20fragment=20so=20the=20use=20of=20getFragment()=20?= =?UTF-8?q?would=20be=20NULL=20and=20caused=20a=20crash.=20In=20the=20case?= =?UTF-8?q?=20of=20an=20assembler=20temporary=20symbol=20we=20want=20to=20?= =?UTF-8?q?use=20the=20AliasedSymbol=20(if=20any)=20which=20will=20create?= =?UTF-8?q?=20a=20local=20relocation=20entry,=20but=20if=20it=20is=20not?= =?UTF-8?q?=20an=20assembler=20temporary=20symbol=20then=20let=20it=20use?= =?UTF-8?q?=20that=20symbol=20with=20an=20external=20relocation=20entry.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit rdar://9356266 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190096 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index b6f10ba..eb7c0b1 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -144,10 +144,14 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, } } else if (Target.getSymB()) { // A - B + constant const MCSymbol *A = &Target.getSymA()->getSymbol(); + if (A->isTemporary()) + A = &A->AliasedSymbol(); MCSymbolData &A_SD = Asm.getSymbolData(*A); const MCSymbolData *A_Base = Asm.getAtom(&A_SD); const MCSymbol *B = &Target.getSymB()->getSymbol(); + if (B->isTemporary()) + B = &B->AliasedSymbol(); MCSymbolData &B_SD = Asm.getSymbolData(*B); const MCSymbolData *B_Base = Asm.getAtom(&B_SD); -- cgit v1.1 From d7174719a9d5bec7f0c999cd12249d3a918d7153 Mon Sep 17 00:00:00 2001 From: Juergen Ributzka Date: Thu, 5 Sep 2013 23:02:56 +0000 Subject: [X86] Perform VSELECT DAG combines also before DAG type legalization. If the DAG already has only legal types, then the second round of DAG combines is skipped. In this case VSELECT+SETCC patterns that match a more efficient instruction (e.g. min/max) are never recognized. This fix allows VSELECT+SETCC combines if the types are already legal before DAG type legalization. Reviewer: Nadav git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190105 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ba6c9c0..68c81ab 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16419,13 +16419,14 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); EVT VT = LHS.getValueType(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // If we have SSE[12] support, try to form min/max nodes. SSE min/max // instructions match the semantics of the common C idiom xhasSSE2() || (Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) { ISD::CondCode CC = cast(Cond.getOperand(2))->get(); @@ -16578,8 +16579,6 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, DCI.AddToWorklist(Cond.getNode()); return DAG.getNode(N->getOpcode(), DL, OpVT, Cond, LHS, RHS); } - else - return SDValue(); } // If this is a select between two integer constants, try to do some // optimizations. @@ -16705,9 +16704,12 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, } } + // Early exit check + if (!TLI.isTypeLegal(VT)) + return SDValue(); + // Match VSELECTs into subs with unsigned saturation. - if (!DCI.isBeforeLegalize() && - N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC && + if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC && // psubus is available in SSE2 and AVX2 for i8 and i16 vectors. ((Subtarget->hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) || (Subtarget->hasAVX2() && (VT == MVT::v32i8 || VT == MVT::v16i16)))) { @@ -16761,14 +16763,14 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, } // Try to match a min/max vector operation. - if (!DCI.isBeforeLegalize() && - N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC) + if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC) if (unsigned Op = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget)) return DAG.getNode(Op, DL, N->getValueType(0), LHS, RHS); // Simplify vector selection if the selector will be produced by CMPP*/PCMP*. - if (!DCI.isBeforeLegalize() && N->getOpcode() == ISD::VSELECT && - Cond.getOpcode() == ISD::SETCC) { + if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC && + // Check if SETCC has already been promoted + TLI.getSetCCResultType(*DAG.getContext(), VT) == Cond.getValueType()) { assert(Cond.getValueType().isVector() && "vector select expects a vector selector!"); @@ -16815,7 +16817,6 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // matched by one of the SSE/AVX BLEND instructions. These instructions only // depend on the highest bit in each word. Try to use SimplifyDemandedBits // to simplify previous instructions. - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() && !DCI.isBeforeLegalize() && TLI.isOperationLegal(ISD::VSELECT, VT)) { unsigned BitWidth = Cond.getValueType().getScalarType().getSizeInBits(); -- cgit v1.1 From c3b5c042a4e63f8aa464734248881139cbd1694d Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 5 Sep 2013 23:55:13 +0000 Subject: R600: Coding style git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190110 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600TextureIntrinsicsReplacer.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp index d4b8ec0..3258894 100644 --- a/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp +++ b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp @@ -94,9 +94,8 @@ class R600TextureIntrinsicsReplacer : } if (TextureType == TEXTURE_CUBE_ARRAY || - TextureType == TEXTURE_SHADOWCUBE_ARRAY) { + TextureType == TEXTURE_SHADOWCUBE_ARRAY) CT[2] = 0; - } if (TextureType == TEXTURE_1D_ARRAY || TextureType == TEXTURE_SHADOW1D_ARRAY) { @@ -115,9 +114,8 @@ class R600TextureIntrinsicsReplacer : TextureType == TEXTURE_SHADOW2D || TextureType == TEXTURE_SHADOWRECT || TextureType == TEXTURE_SHADOW1D_ARRAY) && - !(hasLOD && useShadowVariant)) { + !(hasLOD && useShadowVariant)) SrcSelect[3] = 2; - } } void ReplaceCallInst(CallInst &I, FunctionType *FT, const char *Name, @@ -260,9 +258,9 @@ public: } void visitCallInst(CallInst &I) { - if (!I.getCalledFunction()) { + if (!I.getCalledFunction()) return; - } + StringRef Name = I.getCalledFunction()->getName(); if (Name == "llvm.AMDGPU.tex") { ReplaceTexIntrinsic(I, false, TexSign, "llvm.R600.tex", "llvm.R600.texc"); -- cgit v1.1 From 867f9501f64c12a693bed624c53e8104421917a9 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 6 Sep 2013 10:25:07 +0000 Subject: [SystemZ] Use XC for a memset of 0 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190130 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp | 29 +++++++++++++++++++------- 1 file changed, 21 insertions(+), 8 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index dc2f225..3b43cbd 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -25,10 +25,14 @@ SystemZSelectionDAGInfo(const SystemZTargetMachine &TM) SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() { } -// Use MVC to copy Size bytes from Src to Dest, deciding whether to use -// a loop or straight-line code. -static SDValue emitMVC(SelectionDAG &DAG, SDLoc DL, SDValue Chain, - SDValue Dst, SDValue Src, uint64_t Size) { +// Decide whether it is best to use a loop or straight-line code for +// a block operation of Size bytes with source address Src and destination +// address Dest. Sequence is the opcode to use for straight-line code +// (such as MVC) and Loop is the opcode to use for loops (such as MVC_LOOP). +// Return the chain for the completed operation. +static SDValue emitMemMem(SelectionDAG &DAG, SDLoc DL, unsigned Sequence, + unsigned Loop, SDValue Chain, SDValue Dst, + SDValue Src, uint64_t Size) { EVT PtrVT = Src.getValueType(); // The heuristic we use is to prefer loops for anything that would // require 7 or more MVCs. With these kinds of sizes there isn't @@ -42,10 +46,10 @@ static SDValue emitMVC(SelectionDAG &DAG, SDLoc DL, SDValue Chain, // The next value up, 6 * 256, can be implemented in the same // number of straight-line MVCs as 6 * 256 - 1. if (Size > 6 * 256) - return DAG.getNode(SystemZISD::MVC_LOOP, DL, MVT::Other, Chain, Dst, Src, + return DAG.getNode(Loop, DL, MVT::Other, Chain, Dst, Src, DAG.getConstant(Size, PtrVT), DAG.getConstant(Size / 256, PtrVT)); - return DAG.getNode(SystemZISD::MVC, DL, MVT::Other, Chain, Dst, Src, + return DAG.getNode(Sequence, DL, MVT::Other, Chain, Dst, Src, DAG.getConstant(Size, PtrVT)); } @@ -59,7 +63,8 @@ EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, return SDValue(); if (ConstantSDNode *CSize = dyn_cast(Size)) - return emitMVC(DAG, DL, Chain, Dst, Src, CSize->getZExtValue()); + return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP, + Chain, Dst, Src, CSize->getZExtValue()); return SDValue(); } @@ -130,13 +135,21 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, } } assert(Bytes >= 2 && "Should have dealt with 0- and 1-byte cases already"); + + // Handle the special case of a memset of 0, which can use XC. + ConstantSDNode *CByte = dyn_cast(Byte); + if (CByte && CByte->getZExtValue() == 0) + return emitMemMem(DAG, DL, SystemZISD::XC, SystemZISD::XC_LOOP, + Chain, Dst, Dst, Bytes); + // Copy the byte to the first location and then use MVC to copy // it to the rest. Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, false, false, Align); SDValue DstPlus1 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, DAG.getConstant(1, PtrVT)); - return emitMVC(DAG, DL, Chain, DstPlus1, Dst, Bytes - 1); + return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP, + Chain, DstPlus1, Dst, Bytes - 1); } return SDValue(); } -- cgit v1.1 From 3f1acd55a8ad249926bc80916bb20f0b77798cb5 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Fri, 6 Sep 2013 10:55:15 +0000 Subject: [mips][msa] Made the InstrItinClass argument optional since it is always NoItinerary at the moment. No functional change git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190131 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 1207 ++++++++++++++++++----------------- 1 file changed, 607 insertions(+), 600 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 1814b1c..a12144c 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -760,8 +760,8 @@ class XORI_B_ENC : MSA_I8_FMT<0b11, 0b000000>; // Instruction desc. class MSA_BIT_D_DESC_BASE { + RegisterClass RCWD, RegisterClass RCWS, + InstrItinClass itin = NoItinerary> { dag OutOperandList = (outs RCWD:$wd); dag InOperandList = (ins RCWS:$ws, uimm6:$u6); string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u6"); @@ -770,8 +770,8 @@ class MSA_BIT_D_DESC_BASE { + RegisterClass RCWD, RegisterClass RCWS, + InstrItinClass itin = NoItinerary> { dag OutOperandList = (outs RCWD:$wd); dag InOperandList = (ins RCWS:$ws, uimm5:$u5); string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u5"); @@ -780,8 +780,8 @@ class MSA_BIT_W_DESC_BASE { + RegisterClass RCWD, RegisterClass RCWS, + InstrItinClass itin = NoItinerary> { dag OutOperandList = (outs RCWD:$wd); dag InOperandList = (ins RCWS:$ws, uimm4:$u4); string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u4"); @@ -790,8 +790,8 @@ class MSA_BIT_H_DESC_BASE { + RegisterClass RCWD, RegisterClass RCWS, + InstrItinClass itin = NoItinerary> { dag OutOperandList = (outs RCWD:$wd); dag InOperandList = (ins RCWS:$ws, uimm3:$u3); string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u3"); @@ -800,8 +800,8 @@ class MSA_BIT_B_DESC_BASE { + RegisterClass RCD, RegisterClass RCWS, + InstrItinClass itin = NoItinerary> { dag OutOperandList = (outs RCD:$rd); dag InOperandList = (ins RCWS:$ws, uimm6:$n); string AsmString = !strconcat(instr_asm, "\t$rd, $ws[$n]"); @@ -810,8 +810,8 @@ class MSA_COPY_DESC_BASE { + RegisterClass RCWD, RegisterClass RCWS, + InstrItinClass itin = NoItinerary> { dag OutOperandList = (outs RCWD:$wd); dag InOperandList = (ins RCWS:$ws, uimm5:$u5); string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u5"); @@ -820,8 +820,8 @@ class MSA_I5_DESC_BASE { + RegisterClass RCWD, RegisterClass RCWS, + InstrItinClass itin = NoItinerary> { dag OutOperandList = (outs RCWD:$wd); dag InOperandList = (ins RCWS:$ws, simm5:$s5); string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $s5"); @@ -830,8 +830,8 @@ class MSA_SI5_DESC_BASE { + RegisterClass RCWD, RegisterClass RCWS, + InstrItinClass itin = NoItinerary> { dag OutOperandList = (outs RCWD:$wd); dag InOperandList = (ins RCWS:$ws, uimm8:$u8); string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u8"); @@ -840,7 +840,8 @@ class MSA_I8_DESC_BASE { + RegisterClass RCWD, + InstrItinClass itin = NoItinerary> { dag OutOperandList = (outs RCWD:$wd); dag InOperandList = (ins simm10:$i10); string AsmString = !strconcat(instr_asm, "\t$wd, $i10"); @@ -849,8 +850,8 @@ class MSA_I10_DESC_BASE { + RegisterClass RCWD, RegisterClass RCWS, + InstrItinClass itin = NoItinerary> { dag OutOperandList = (outs RCWD:$wd); dag InOperandList = (ins RCWS:$ws); string AsmString = !strconcat(instr_asm, "\t$wd, $ws"); @@ -859,14 +860,15 @@ class MSA_2R_DESC_BASE : - MSA_2R_DESC_BASE; + RegisterClass RCWD, RegisterClass RCWS, + InstrItinClass itin = NoItinerary> : + MSA_2R_DESC_BASE; class MSA_3R_DESC_BASE { + RegisterClass RCWD, RegisterClass RCWS, + RegisterClass RCWT = RCWS, + InstrItinClass itin = NoItinerary> { dag OutOperandList = (outs RCWD:$wd); dag InOperandList = (ins RCWS:$ws, RCWT:$wt); string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $wt"); @@ -875,8 +877,9 @@ class MSA_3R_DESC_BASE { + RegisterClass RCWD, RegisterClass RCWS, + RegisterClass RCWT = RCWS, + InstrItinClass itin = NoItinerary> { dag OutOperandList = (outs RCWD:$wd); dag InOperandList = (ins RCWD:$wd_in, RCWS:$ws, RCWT:$wt); string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $wt"); @@ -887,14 +890,16 @@ class MSA_3R_4R_DESC_BASE : - MSA_3R_DESC_BASE; + RegisterClass RCWD, RegisterClass RCWS, + RegisterClass RCWT = RCWS, + InstrItinClass itin = NoItinerary> : + MSA_3R_DESC_BASE; class MSA_3RF_4RF_DESC_BASE : - MSA_3R_4R_DESC_BASE; + RegisterClass RCWD, RegisterClass RCWS, + RegisterClass RCWT = RCWS, + InstrItinClass itin = NoItinerary> : + MSA_3R_4R_DESC_BASE; class MSA_CBRANCH_DESC_BASE { dag OutOperandList = (outs); @@ -909,8 +914,8 @@ class MSA_CBRANCH_DESC_BASE { } class MSA_INSERT_DESC_BASE { + RegisterClass RCD, RegisterClass RCWS, + InstrItinClass itin = NoItinerary> { dag OutOperandList = (outs RCD:$wd); dag InOperandList = (ins RCD:$wd_in, uimm6:$n, RCWS:$rs); string AsmString = !strconcat(instr_asm, "\t$wd[$n], $rs"); @@ -922,8 +927,8 @@ class MSA_INSERT_DESC_BASE { + RegisterClass RCD, RegisterClass RCWS, + InstrItinClass itin = NoItinerary> { dag OutOperandList = (outs RCD:$wd); dag InOperandList = (ins RCD:$wd_in, uimm6:$n, RCWS:$ws); string AsmString = !strconcat(instr_asm, "\t$wd[$n], $ws[0]"); @@ -935,8 +940,9 @@ class MSA_INSVE_DESC_BASE { + RegisterClass RCWD, RegisterClass RCWS, + RegisterClass RCWT = RCWS, + InstrItinClass itin = NoItinerary> { dag OutOperandList = (outs RCWD:$wd); dag InOperandList = (ins RCWS:$ws, RCWT:$wt); string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $wt"); @@ -944,223 +950,223 @@ class MSA_VEC_DESC_BASE, IsCommutable; -class ADD_A_H_DESC : MSA_3R_DESC_BASE<"add_a.h", int_mips_add_a_h, NoItinerary, +class ADD_A_H_DESC : MSA_3R_DESC_BASE<"add_a.h", int_mips_add_a_h, MSA128H, MSA128H>, IsCommutable; -class ADD_A_W_DESC : MSA_3R_DESC_BASE<"add_a.w", int_mips_add_a_w, NoItinerary, +class ADD_A_W_DESC : MSA_3R_DESC_BASE<"add_a.w", int_mips_add_a_w, MSA128W, MSA128W>, IsCommutable; -class ADD_A_D_DESC : MSA_3R_DESC_BASE<"add_a.d", int_mips_add_a_d, NoItinerary, +class ADD_A_D_DESC : MSA_3R_DESC_BASE<"add_a.d", int_mips_add_a_d, MSA128D, MSA128D>, IsCommutable; class ADDS_A_B_DESC : MSA_3R_DESC_BASE<"adds_a.b", int_mips_adds_a_b, - NoItinerary, MSA128B, MSA128B>, - IsCommutable; + MSA128B, MSA128B>, + IsCommutable; class ADDS_A_H_DESC : MSA_3R_DESC_BASE<"adds_a.h", int_mips_adds_a_h, - NoItinerary, MSA128H, MSA128H>, - IsCommutable; + MSA128H, MSA128H>, + IsCommutable; class ADDS_A_W_DESC : MSA_3R_DESC_BASE<"adds_a.w", int_mips_adds_a_w, - NoItinerary, MSA128W, MSA128W>, - IsCommutable; + MSA128W, MSA128W>, + IsCommutable; class ADDS_A_D_DESC : MSA_3R_DESC_BASE<"adds_a.d", int_mips_adds_a_d, - NoItinerary, MSA128D, MSA128D>, - IsCommutable; + MSA128D, MSA128D>, + IsCommutable; class ADDS_S_B_DESC : MSA_3R_DESC_BASE<"adds_s.b", int_mips_adds_s_b, - NoItinerary, MSA128B, MSA128B>, - IsCommutable; + MSA128B, MSA128B>, + IsCommutable; class ADDS_S_H_DESC : MSA_3R_DESC_BASE<"adds_s.h", int_mips_adds_s_h, - NoItinerary, MSA128H, MSA128H>, - IsCommutable; + MSA128H, MSA128H>, + IsCommutable; class ADDS_S_W_DESC : MSA_3R_DESC_BASE<"adds_s.w", int_mips_adds_s_w, - NoItinerary, MSA128W, MSA128W>, - IsCommutable; + MSA128W, MSA128W>, + IsCommutable; class ADDS_S_D_DESC : MSA_3R_DESC_BASE<"adds_s.d", int_mips_adds_s_d, - NoItinerary, MSA128D, MSA128D>, - IsCommutable; + MSA128D, MSA128D>, + IsCommutable; class ADDS_U_B_DESC : MSA_3R_DESC_BASE<"adds_u.b", int_mips_adds_u_b, - NoItinerary, MSA128B, MSA128B>, - IsCommutable; + MSA128B, MSA128B>, + IsCommutable; class ADDS_U_H_DESC : MSA_3R_DESC_BASE<"adds_u.h", int_mips_adds_u_h, - NoItinerary, MSA128H, MSA128H>, - IsCommutable; + MSA128H, MSA128H>, + IsCommutable; class ADDS_U_W_DESC : MSA_3R_DESC_BASE<"adds_u.w", int_mips_adds_u_w, - NoItinerary, MSA128W, MSA128W>, - IsCommutable; + MSA128W, MSA128W>, + IsCommutable; class ADDS_U_D_DESC : MSA_3R_DESC_BASE<"adds_u.d", int_mips_adds_u_d, - NoItinerary, MSA128D, MSA128D>, - IsCommutable; + MSA128D, MSA128D>, + IsCommutable; -class ADDV_B_DESC : MSA_3R_DESC_BASE<"addv.b", int_mips_addv_b, NoItinerary, +class ADDV_B_DESC : MSA_3R_DESC_BASE<"addv.b", int_mips_addv_b, MSA128B, MSA128B>, IsCommutable; -class ADDV_H_DESC : MSA_3R_DESC_BASE<"addv.h", int_mips_addv_h, NoItinerary, +class ADDV_H_DESC : MSA_3R_DESC_BASE<"addv.h", int_mips_addv_h, MSA128H, MSA128H>, IsCommutable; -class ADDV_W_DESC : MSA_3R_DESC_BASE<"addv.w", int_mips_addv_w, NoItinerary, +class ADDV_W_DESC : MSA_3R_DESC_BASE<"addv.w", int_mips_addv_w, MSA128W, MSA128W>, IsCommutable; -class ADDV_D_DESC : MSA_3R_DESC_BASE<"addv.d", int_mips_addv_d, NoItinerary, +class ADDV_D_DESC : MSA_3R_DESC_BASE<"addv.d", int_mips_addv_d, MSA128D, MSA128D>, IsCommutable; -class ADDVI_B_DESC : MSA_I5_DESC_BASE<"addvi.b", int_mips_addvi_b, NoItinerary, +class ADDVI_B_DESC : MSA_I5_DESC_BASE<"addvi.b", int_mips_addvi_b, MSA128B, MSA128B>; -class ADDVI_H_DESC : MSA_I5_DESC_BASE<"addvi.h", int_mips_addvi_h, NoItinerary, +class ADDVI_H_DESC : MSA_I5_DESC_BASE<"addvi.h", int_mips_addvi_h, MSA128H, MSA128H>; -class ADDVI_W_DESC : MSA_I5_DESC_BASE<"addvi.w", int_mips_addvi_w, NoItinerary, +class ADDVI_W_DESC : MSA_I5_DESC_BASE<"addvi.w", int_mips_addvi_w, MSA128W, MSA128W>; -class ADDVI_D_DESC : MSA_I5_DESC_BASE<"addvi.d", int_mips_addvi_d, NoItinerary, +class ADDVI_D_DESC : MSA_I5_DESC_BASE<"addvi.d", int_mips_addvi_d, MSA128D, MSA128D>; -class AND_V_DESC : MSA_VEC_DESC_BASE<"and.v", int_mips_and_v, NoItinerary, +class AND_V_DESC : MSA_VEC_DESC_BASE<"and.v", int_mips_and_v, MSA128B, MSA128B>; -class ANDI_B_DESC : MSA_I8_DESC_BASE<"andi.b", int_mips_andi_b, NoItinerary, +class ANDI_B_DESC : MSA_I8_DESC_BASE<"andi.b", int_mips_andi_b, MSA128B, MSA128B>; class ASUB_S_B_DESC : MSA_3R_DESC_BASE<"asub_s.b", int_mips_asub_s_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class ASUB_S_H_DESC : MSA_3R_DESC_BASE<"asub_s.h", int_mips_asub_s_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class ASUB_S_W_DESC : MSA_3R_DESC_BASE<"asub_s.w", int_mips_asub_s_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class ASUB_S_D_DESC : MSA_3R_DESC_BASE<"asub_s.d", int_mips_asub_s_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class ASUB_U_B_DESC : MSA_3R_DESC_BASE<"asub_u.b", int_mips_asub_u_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class ASUB_U_H_DESC : MSA_3R_DESC_BASE<"asub_u.h", int_mips_asub_u_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class ASUB_U_W_DESC : MSA_3R_DESC_BASE<"asub_u.w", int_mips_asub_u_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class ASUB_U_D_DESC : MSA_3R_DESC_BASE<"asub_u.d", int_mips_asub_u_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; -class AVE_S_B_DESC : MSA_3R_DESC_BASE<"ave_s.b", int_mips_ave_s_b, NoItinerary, +class AVE_S_B_DESC : MSA_3R_DESC_BASE<"ave_s.b", int_mips_ave_s_b, MSA128B, MSA128B>, IsCommutable; -class AVE_S_H_DESC : MSA_3R_DESC_BASE<"ave_s.h", int_mips_ave_s_h, NoItinerary, +class AVE_S_H_DESC : MSA_3R_DESC_BASE<"ave_s.h", int_mips_ave_s_h, MSA128H, MSA128H>, IsCommutable; -class AVE_S_W_DESC : MSA_3R_DESC_BASE<"ave_s.w", int_mips_ave_s_w, NoItinerary, +class AVE_S_W_DESC : MSA_3R_DESC_BASE<"ave_s.w", int_mips_ave_s_w, MSA128W, MSA128W>, IsCommutable; -class AVE_S_D_DESC : MSA_3R_DESC_BASE<"ave_s.d", int_mips_ave_s_d, NoItinerary, +class AVE_S_D_DESC : MSA_3R_DESC_BASE<"ave_s.d", int_mips_ave_s_d, MSA128D, MSA128D>, IsCommutable; -class AVE_U_B_DESC : MSA_3R_DESC_BASE<"ave_u.b", int_mips_ave_u_b, NoItinerary, +class AVE_U_B_DESC : MSA_3R_DESC_BASE<"ave_u.b", int_mips_ave_u_b, MSA128B, MSA128B>, IsCommutable; -class AVE_U_H_DESC : MSA_3R_DESC_BASE<"ave_u.h", int_mips_ave_u_h, NoItinerary, +class AVE_U_H_DESC : MSA_3R_DESC_BASE<"ave_u.h", int_mips_ave_u_h, MSA128H, MSA128H>, IsCommutable; -class AVE_U_W_DESC : MSA_3R_DESC_BASE<"ave_u.w", int_mips_ave_u_w, NoItinerary, +class AVE_U_W_DESC : MSA_3R_DESC_BASE<"ave_u.w", int_mips_ave_u_w, MSA128W, MSA128W>, IsCommutable; -class AVE_U_D_DESC : MSA_3R_DESC_BASE<"ave_u.d", int_mips_ave_u_d, NoItinerary, +class AVE_U_D_DESC : MSA_3R_DESC_BASE<"ave_u.d", int_mips_ave_u_d, MSA128D, MSA128D>, IsCommutable; class AVER_S_B_DESC : MSA_3R_DESC_BASE<"aver_s.b", int_mips_aver_s_b, - NoItinerary, MSA128B, MSA128B>, - IsCommutable; + MSA128B, MSA128B>, + IsCommutable; class AVER_S_H_DESC : MSA_3R_DESC_BASE<"aver_s.h", int_mips_aver_s_h, - NoItinerary, MSA128H, MSA128H>, - IsCommutable; + MSA128H, MSA128H>, + IsCommutable; class AVER_S_W_DESC : MSA_3R_DESC_BASE<"aver_s.w", int_mips_aver_s_w, - NoItinerary, MSA128W, MSA128W>, - IsCommutable; + MSA128W, MSA128W>, + IsCommutable; class AVER_S_D_DESC : MSA_3R_DESC_BASE<"aver_s.d", int_mips_aver_s_d, - NoItinerary, MSA128D, MSA128D>, - IsCommutable; + MSA128D, MSA128D>, + IsCommutable; class AVER_U_B_DESC : MSA_3R_DESC_BASE<"aver_u.b", int_mips_aver_u_b, - NoItinerary, MSA128B, MSA128B>, - IsCommutable; + MSA128B, MSA128B>, + IsCommutable; class AVER_U_H_DESC : MSA_3R_DESC_BASE<"aver_u.h", int_mips_aver_u_h, - NoItinerary, MSA128H, MSA128H>, - IsCommutable; + MSA128H, MSA128H>, + IsCommutable; class AVER_U_W_DESC : MSA_3R_DESC_BASE<"aver_u.w", int_mips_aver_u_w, - NoItinerary, MSA128W, MSA128W>, - IsCommutable; + MSA128W, MSA128W>, + IsCommutable; class AVER_U_D_DESC : MSA_3R_DESC_BASE<"aver_u.d", int_mips_aver_u_d, - NoItinerary, MSA128D, MSA128D>, - IsCommutable; + MSA128D, MSA128D>, + IsCommutable; -class BCLR_B_DESC : MSA_3R_DESC_BASE<"bclr.b", int_mips_bclr_b, NoItinerary, +class BCLR_B_DESC : MSA_3R_DESC_BASE<"bclr.b", int_mips_bclr_b, MSA128B, MSA128B>; -class BCLR_H_DESC : MSA_3R_DESC_BASE<"bclr.h", int_mips_bclr_h, NoItinerary, +class BCLR_H_DESC : MSA_3R_DESC_BASE<"bclr.h", int_mips_bclr_h, MSA128H, MSA128H>; -class BCLR_W_DESC : MSA_3R_DESC_BASE<"bclr.w", int_mips_bclr_w, NoItinerary, +class BCLR_W_DESC : MSA_3R_DESC_BASE<"bclr.w", int_mips_bclr_w, MSA128W, MSA128W>; -class BCLR_D_DESC : MSA_3R_DESC_BASE<"bclr.d", int_mips_bclr_d, NoItinerary, +class BCLR_D_DESC : MSA_3R_DESC_BASE<"bclr.d", int_mips_bclr_d, MSA128D, MSA128D>; class BCLRI_B_DESC : MSA_BIT_B_DESC_BASE<"bclri.b", int_mips_bclri_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class BCLRI_H_DESC : MSA_BIT_H_DESC_BASE<"bclri.h", int_mips_bclri_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class BCLRI_W_DESC : MSA_BIT_W_DESC_BASE<"bclri.w", int_mips_bclri_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class BCLRI_D_DESC : MSA_BIT_D_DESC_BASE<"bclri.d", int_mips_bclri_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; -class BINSL_B_DESC : MSA_3R_DESC_BASE<"binsl.b", int_mips_binsl_b, NoItinerary, +class BINSL_B_DESC : MSA_3R_DESC_BASE<"binsl.b", int_mips_binsl_b, MSA128B, MSA128B>; -class BINSL_H_DESC : MSA_3R_DESC_BASE<"binsl.h", int_mips_binsl_h, NoItinerary, +class BINSL_H_DESC : MSA_3R_DESC_BASE<"binsl.h", int_mips_binsl_h, MSA128H, MSA128H>; -class BINSL_W_DESC : MSA_3R_DESC_BASE<"binsl.w", int_mips_binsl_w, NoItinerary, +class BINSL_W_DESC : MSA_3R_DESC_BASE<"binsl.w", int_mips_binsl_w, MSA128W, MSA128W>; -class BINSL_D_DESC : MSA_3R_DESC_BASE<"binsl.d", int_mips_binsl_d, NoItinerary, +class BINSL_D_DESC : MSA_3R_DESC_BASE<"binsl.d", int_mips_binsl_d, MSA128D, MSA128D>; class BINSLI_B_DESC : MSA_BIT_B_DESC_BASE<"binsli.b", int_mips_binsli_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class BINSLI_H_DESC : MSA_BIT_H_DESC_BASE<"binsli.h", int_mips_binsli_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class BINSLI_W_DESC : MSA_BIT_W_DESC_BASE<"binsli.w", int_mips_binsli_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class BINSLI_D_DESC : MSA_BIT_D_DESC_BASE<"binsli.d", int_mips_binsli_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; -class BINSR_B_DESC : MSA_3R_DESC_BASE<"binsr.b", int_mips_binsr_b, NoItinerary, +class BINSR_B_DESC : MSA_3R_DESC_BASE<"binsr.b", int_mips_binsr_b, MSA128B, MSA128B>; -class BINSR_H_DESC : MSA_3R_DESC_BASE<"binsr.h", int_mips_binsr_h, NoItinerary, +class BINSR_H_DESC : MSA_3R_DESC_BASE<"binsr.h", int_mips_binsr_h, MSA128H, MSA128H>; -class BINSR_W_DESC : MSA_3R_DESC_BASE<"binsr.w", int_mips_binsr_w, NoItinerary, +class BINSR_W_DESC : MSA_3R_DESC_BASE<"binsr.w", int_mips_binsr_w, MSA128W, MSA128W>; -class BINSR_D_DESC : MSA_3R_DESC_BASE<"binsr.d", int_mips_binsr_d, NoItinerary, +class BINSR_D_DESC : MSA_3R_DESC_BASE<"binsr.d", int_mips_binsr_d, MSA128D, MSA128D>; class BINSRI_B_DESC : MSA_BIT_B_DESC_BASE<"binsri.b", int_mips_binsri_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class BINSRI_H_DESC : MSA_BIT_H_DESC_BASE<"binsri.h", int_mips_binsri_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class BINSRI_W_DESC : MSA_BIT_W_DESC_BASE<"binsri.w", int_mips_binsri_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class BINSRI_D_DESC : MSA_BIT_D_DESC_BASE<"binsri.d", int_mips_binsri_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; -class BMNZ_V_DESC : MSA_VEC_DESC_BASE<"bmnz.v", int_mips_bmnz_v, NoItinerary, +class BMNZ_V_DESC : MSA_VEC_DESC_BASE<"bmnz.v", int_mips_bmnz_v, MSA128B, MSA128B>; -class BMNZI_B_DESC : MSA_I8_DESC_BASE<"bmnzi.b", int_mips_bmnzi_b, NoItinerary, +class BMNZI_B_DESC : MSA_I8_DESC_BASE<"bmnzi.b", int_mips_bmnzi_b, MSA128B, MSA128B>; -class BMZ_V_DESC : MSA_VEC_DESC_BASE<"bmz.v", int_mips_bmz_v, NoItinerary, +class BMZ_V_DESC : MSA_VEC_DESC_BASE<"bmz.v", int_mips_bmz_v, MSA128B, MSA128B>; -class BMZI_B_DESC : MSA_I8_DESC_BASE<"bmzi.b", int_mips_bmzi_b, NoItinerary, +class BMZI_B_DESC : MSA_I8_DESC_BASE<"bmzi.b", int_mips_bmzi_b, MSA128B, MSA128B>; -class BNEG_B_DESC : MSA_3R_DESC_BASE<"bneg.b", int_mips_bneg_b, NoItinerary, +class BNEG_B_DESC : MSA_3R_DESC_BASE<"bneg.b", int_mips_bneg_b, MSA128B, MSA128B>; -class BNEG_H_DESC : MSA_3R_DESC_BASE<"bneg.h", int_mips_bneg_h, NoItinerary, +class BNEG_H_DESC : MSA_3R_DESC_BASE<"bneg.h", int_mips_bneg_h, MSA128H, MSA128H>; -class BNEG_W_DESC : MSA_3R_DESC_BASE<"bneg.w", int_mips_bneg_w, NoItinerary, +class BNEG_W_DESC : MSA_3R_DESC_BASE<"bneg.w", int_mips_bneg_w, MSA128W, MSA128W>; -class BNEG_D_DESC : MSA_3R_DESC_BASE<"bneg.d", int_mips_bneg_d, NoItinerary, +class BNEG_D_DESC : MSA_3R_DESC_BASE<"bneg.d", int_mips_bneg_d, MSA128D, MSA128D>; class BNEGI_B_DESC : MSA_BIT_B_DESC_BASE<"bnegi.b", int_mips_bnegi_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class BNEGI_H_DESC : MSA_BIT_H_DESC_BASE<"bnegi.h", int_mips_bnegi_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class BNEGI_W_DESC : MSA_BIT_W_DESC_BASE<"bnegi.w", int_mips_bnegi_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class BNEGI_D_DESC : MSA_BIT_D_DESC_BASE<"bnegi.d", int_mips_bnegi_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class BNZ_B_DESC : MSA_CBRANCH_DESC_BASE<"bnz.b", MSA128B>; class BNZ_H_DESC : MSA_CBRANCH_DESC_BASE<"bnz.h", MSA128H>; @@ -1169,29 +1175,29 @@ class BNZ_D_DESC : MSA_CBRANCH_DESC_BASE<"bnz.d", MSA128D>; class BNZ_V_DESC : MSA_CBRANCH_DESC_BASE<"bnz.v", MSA128B>; -class BSEL_V_DESC : MSA_VEC_DESC_BASE<"bsel.v", int_mips_bsel_v, NoItinerary, +class BSEL_V_DESC : MSA_VEC_DESC_BASE<"bsel.v", int_mips_bsel_v, MSA128B, MSA128B>; -class BSELI_B_DESC : MSA_I8_DESC_BASE<"bseli.b", int_mips_bseli_b, NoItinerary, +class BSELI_B_DESC : MSA_I8_DESC_BASE<"bseli.b", int_mips_bseli_b, MSA128B, MSA128B>; -class BSET_B_DESC : MSA_3R_DESC_BASE<"bset.b", int_mips_bset_b, NoItinerary, +class BSET_B_DESC : MSA_3R_DESC_BASE<"bset.b", int_mips_bset_b, MSA128B, MSA128B>; -class BSET_H_DESC : MSA_3R_DESC_BASE<"bset.h", int_mips_bset_h, NoItinerary, +class BSET_H_DESC : MSA_3R_DESC_BASE<"bset.h", int_mips_bset_h, MSA128H, MSA128H>; -class BSET_W_DESC : MSA_3R_DESC_BASE<"bset.w", int_mips_bset_w, NoItinerary, +class BSET_W_DESC : MSA_3R_DESC_BASE<"bset.w", int_mips_bset_w, MSA128W, MSA128W>; -class BSET_D_DESC : MSA_3R_DESC_BASE<"bset.d", int_mips_bset_d, NoItinerary, +class BSET_D_DESC : MSA_3R_DESC_BASE<"bset.d", int_mips_bset_d, MSA128D, MSA128D>; class BSETI_B_DESC : MSA_BIT_B_DESC_BASE<"bseti.b", int_mips_bseti_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class BSETI_H_DESC : MSA_BIT_H_DESC_BASE<"bseti.h", int_mips_bseti_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class BSETI_W_DESC : MSA_BIT_W_DESC_BASE<"bseti.w", int_mips_bseti_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class BSETI_D_DESC : MSA_BIT_D_DESC_BASE<"bseti.d", int_mips_bseti_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class BZ_B_DESC : MSA_CBRANCH_DESC_BASE<"bz.b", MSA128B>; class BZ_H_DESC : MSA_CBRANCH_DESC_BASE<"bz.h", MSA128H>; @@ -1200,22 +1206,22 @@ class BZ_D_DESC : MSA_CBRANCH_DESC_BASE<"bz.d", MSA128D>; class BZ_V_DESC : MSA_CBRANCH_DESC_BASE<"bz.v", MSA128B>; -class CEQ_B_DESC : MSA_3R_DESC_BASE<"ceq.b", int_mips_ceq_b, NoItinerary, +class CEQ_B_DESC : MSA_3R_DESC_BASE<"ceq.b", int_mips_ceq_b, MSA128B, MSA128B>, IsCommutable; -class CEQ_H_DESC : MSA_3R_DESC_BASE<"ceq.h", int_mips_ceq_h, NoItinerary, +class CEQ_H_DESC : MSA_3R_DESC_BASE<"ceq.h", int_mips_ceq_h, MSA128H, MSA128H>, IsCommutable; -class CEQ_W_DESC : MSA_3R_DESC_BASE<"ceq.w", int_mips_ceq_w, NoItinerary, +class CEQ_W_DESC : MSA_3R_DESC_BASE<"ceq.w", int_mips_ceq_w, MSA128W, MSA128W>, IsCommutable; -class CEQ_D_DESC : MSA_3R_DESC_BASE<"ceq.d", int_mips_ceq_d, NoItinerary, +class CEQ_D_DESC : MSA_3R_DESC_BASE<"ceq.d", int_mips_ceq_d, MSA128D, MSA128D>, IsCommutable; -class CEQI_B_DESC : MSA_SI5_DESC_BASE<"ceqi.b", int_mips_ceqi_b, NoItinerary, +class CEQI_B_DESC : MSA_SI5_DESC_BASE<"ceqi.b", int_mips_ceqi_b, MSA128B, MSA128B>; -class CEQI_H_DESC : MSA_SI5_DESC_BASE<"ceqi.h", int_mips_ceqi_h, NoItinerary, +class CEQI_H_DESC : MSA_SI5_DESC_BASE<"ceqi.h", int_mips_ceqi_h, MSA128H, MSA128H>; -class CEQI_W_DESC : MSA_SI5_DESC_BASE<"ceqi.w", int_mips_ceqi_w, NoItinerary, +class CEQI_W_DESC : MSA_SI5_DESC_BASE<"ceqi.w", int_mips_ceqi_w, MSA128W, MSA128W>; -class CEQI_D_DESC : MSA_SI5_DESC_BASE<"ceqi.d", int_mips_ceqi_d, NoItinerary, +class CEQI_D_DESC : MSA_SI5_DESC_BASE<"ceqi.d", int_mips_ceqi_d, MSA128D, MSA128D>; class CFCMSA_DESC { @@ -1226,91 +1232,91 @@ class CFCMSA_DESC { bit hasSideEffects = 1; } -class CLE_S_B_DESC : MSA_3R_DESC_BASE<"cle_s.b", int_mips_cle_s_b, NoItinerary, +class CLE_S_B_DESC : MSA_3R_DESC_BASE<"cle_s.b", int_mips_cle_s_b, MSA128B, MSA128B>; -class CLE_S_H_DESC : MSA_3R_DESC_BASE<"cle_s.h", int_mips_cle_s_h, NoItinerary, +class CLE_S_H_DESC : MSA_3R_DESC_BASE<"cle_s.h", int_mips_cle_s_h, MSA128H, MSA128H>; -class CLE_S_W_DESC : MSA_3R_DESC_BASE<"cle_s.w", int_mips_cle_s_w, NoItinerary, +class CLE_S_W_DESC : MSA_3R_DESC_BASE<"cle_s.w", int_mips_cle_s_w, MSA128W, MSA128W>; -class CLE_S_D_DESC : MSA_3R_DESC_BASE<"cle_s.d", int_mips_cle_s_d, NoItinerary, +class CLE_S_D_DESC : MSA_3R_DESC_BASE<"cle_s.d", int_mips_cle_s_d, MSA128D, MSA128D>; -class CLE_U_B_DESC : MSA_3R_DESC_BASE<"cle_u.b", int_mips_cle_u_b, NoItinerary, +class CLE_U_B_DESC : MSA_3R_DESC_BASE<"cle_u.b", int_mips_cle_u_b, MSA128B, MSA128B>; -class CLE_U_H_DESC : MSA_3R_DESC_BASE<"cle_u.h", int_mips_cle_u_h, NoItinerary, +class CLE_U_H_DESC : MSA_3R_DESC_BASE<"cle_u.h", int_mips_cle_u_h, MSA128H, MSA128H>; -class CLE_U_W_DESC : MSA_3R_DESC_BASE<"cle_u.w", int_mips_cle_u_w, NoItinerary, +class CLE_U_W_DESC : MSA_3R_DESC_BASE<"cle_u.w", int_mips_cle_u_w, MSA128W, MSA128W>; -class CLE_U_D_DESC : MSA_3R_DESC_BASE<"cle_u.d", int_mips_cle_u_d, NoItinerary, +class CLE_U_D_DESC : MSA_3R_DESC_BASE<"cle_u.d", int_mips_cle_u_d, MSA128D, MSA128D>; class CLEI_S_B_DESC : MSA_SI5_DESC_BASE<"clei_s.b", int_mips_clei_s_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class CLEI_S_H_DESC : MSA_SI5_DESC_BASE<"clei_s.h", int_mips_clei_s_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class CLEI_S_W_DESC : MSA_SI5_DESC_BASE<"clei_s.w", int_mips_clei_s_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class CLEI_S_D_DESC : MSA_SI5_DESC_BASE<"clei_s.d", int_mips_clei_s_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class CLEI_U_B_DESC : MSA_SI5_DESC_BASE<"clei_u.b", int_mips_clei_u_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class CLEI_U_H_DESC : MSA_SI5_DESC_BASE<"clei_u.h", int_mips_clei_u_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class CLEI_U_W_DESC : MSA_SI5_DESC_BASE<"clei_u.w", int_mips_clei_u_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class CLEI_U_D_DESC : MSA_SI5_DESC_BASE<"clei_u.d", int_mips_clei_u_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; -class CLT_S_B_DESC : MSA_3R_DESC_BASE<"clt_s.b", int_mips_clt_s_b, NoItinerary, +class CLT_S_B_DESC : MSA_3R_DESC_BASE<"clt_s.b", int_mips_clt_s_b, MSA128B, MSA128B>; -class CLT_S_H_DESC : MSA_3R_DESC_BASE<"clt_s.h", int_mips_clt_s_h, NoItinerary, +class CLT_S_H_DESC : MSA_3R_DESC_BASE<"clt_s.h", int_mips_clt_s_h, MSA128H, MSA128H>; -class CLT_S_W_DESC : MSA_3R_DESC_BASE<"clt_s.w", int_mips_clt_s_w, NoItinerary, +class CLT_S_W_DESC : MSA_3R_DESC_BASE<"clt_s.w", int_mips_clt_s_w, MSA128W, MSA128W>; -class CLT_S_D_DESC : MSA_3R_DESC_BASE<"clt_s.d", int_mips_clt_s_d, NoItinerary, +class CLT_S_D_DESC : MSA_3R_DESC_BASE<"clt_s.d", int_mips_clt_s_d, MSA128D, MSA128D>; -class CLT_U_B_DESC : MSA_3R_DESC_BASE<"clt_u.b", int_mips_clt_u_b, NoItinerary, +class CLT_U_B_DESC : MSA_3R_DESC_BASE<"clt_u.b", int_mips_clt_u_b, MSA128B, MSA128B>; -class CLT_U_H_DESC : MSA_3R_DESC_BASE<"clt_u.h", int_mips_clt_u_h, NoItinerary, +class CLT_U_H_DESC : MSA_3R_DESC_BASE<"clt_u.h", int_mips_clt_u_h, MSA128H, MSA128H>; -class CLT_U_W_DESC : MSA_3R_DESC_BASE<"clt_u.w", int_mips_clt_u_w, NoItinerary, +class CLT_U_W_DESC : MSA_3R_DESC_BASE<"clt_u.w", int_mips_clt_u_w, MSA128W, MSA128W>; -class CLT_U_D_DESC : MSA_3R_DESC_BASE<"clt_u.d", int_mips_clt_u_d, NoItinerary, +class CLT_U_D_DESC : MSA_3R_DESC_BASE<"clt_u.d", int_mips_clt_u_d, MSA128D, MSA128D>; class CLTI_S_B_DESC : MSA_SI5_DESC_BASE<"clti_s.b", int_mips_clti_s_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class CLTI_S_H_DESC : MSA_SI5_DESC_BASE<"clti_s.h", int_mips_clti_s_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class CLTI_S_W_DESC : MSA_SI5_DESC_BASE<"clti_s.w", int_mips_clti_s_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class CLTI_S_D_DESC : MSA_SI5_DESC_BASE<"clti_s.d", int_mips_clti_s_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class CLTI_U_B_DESC : MSA_SI5_DESC_BASE<"clti_u.b", int_mips_clti_u_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class CLTI_U_H_DESC : MSA_SI5_DESC_BASE<"clti_u.h", int_mips_clti_u_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class CLTI_U_W_DESC : MSA_SI5_DESC_BASE<"clti_u.w", int_mips_clti_u_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class CLTI_U_D_DESC : MSA_SI5_DESC_BASE<"clti_u.d", int_mips_clti_u_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class COPY_S_B_DESC : MSA_COPY_DESC_BASE<"copy_s.b", int_mips_copy_s_b, - NoItinerary, GPR32, MSA128B>; + GPR32, MSA128B>; class COPY_S_H_DESC : MSA_COPY_DESC_BASE<"copy_s.h", int_mips_copy_s_h, - NoItinerary, GPR32, MSA128H>; + GPR32, MSA128H>; class COPY_S_W_DESC : MSA_COPY_DESC_BASE<"copy_s.w", int_mips_copy_s_w, - NoItinerary, GPR32, MSA128W>; + GPR32, MSA128W>; class COPY_U_B_DESC : MSA_COPY_DESC_BASE<"copy_u.b", int_mips_copy_u_b, - NoItinerary, GPR32, MSA128B>; + GPR32, MSA128B>; class COPY_U_H_DESC : MSA_COPY_DESC_BASE<"copy_u.h", int_mips_copy_u_h, - NoItinerary, GPR32, MSA128H>; + GPR32, MSA128H>; class COPY_U_W_DESC : MSA_COPY_DESC_BASE<"copy_u.w", int_mips_copy_u_w, - NoItinerary, GPR32, MSA128W>; + GPR32, MSA128W>; class CTCMSA_DESC { dag OutOperandList = (outs); @@ -1320,449 +1326,450 @@ class CTCMSA_DESC { bit hasSideEffects = 1; } -class DIV_S_B_DESC : MSA_3R_DESC_BASE<"div_s.b", int_mips_div_s_b, NoItinerary, +class DIV_S_B_DESC : MSA_3R_DESC_BASE<"div_s.b", int_mips_div_s_b, MSA128B, MSA128B>; -class DIV_S_H_DESC : MSA_3R_DESC_BASE<"div_s.h", int_mips_div_s_h, NoItinerary, +class DIV_S_H_DESC : MSA_3R_DESC_BASE<"div_s.h", int_mips_div_s_h, MSA128H, MSA128H>; -class DIV_S_W_DESC : MSA_3R_DESC_BASE<"div_s.w", int_mips_div_s_w, NoItinerary, +class DIV_S_W_DESC : MSA_3R_DESC_BASE<"div_s.w", int_mips_div_s_w, MSA128W, MSA128W>; -class DIV_S_D_DESC : MSA_3R_DESC_BASE<"div_s.d", int_mips_div_s_d, NoItinerary, +class DIV_S_D_DESC : MSA_3R_DESC_BASE<"div_s.d", int_mips_div_s_d, MSA128D, MSA128D>; -class DIV_U_B_DESC : MSA_3R_DESC_BASE<"div_u.b", int_mips_div_u_b, NoItinerary, +class DIV_U_B_DESC : MSA_3R_DESC_BASE<"div_u.b", int_mips_div_u_b, MSA128B, MSA128B>; -class DIV_U_H_DESC : MSA_3R_DESC_BASE<"div_u.h", int_mips_div_u_h, NoItinerary, +class DIV_U_H_DESC : MSA_3R_DESC_BASE<"div_u.h", int_mips_div_u_h, MSA128H, MSA128H>; -class DIV_U_W_DESC : MSA_3R_DESC_BASE<"div_u.w", int_mips_div_u_w, NoItinerary, +class DIV_U_W_DESC : MSA_3R_DESC_BASE<"div_u.w", int_mips_div_u_w, MSA128W, MSA128W>; -class DIV_U_D_DESC : MSA_3R_DESC_BASE<"div_u.d", int_mips_div_u_d, NoItinerary, +class DIV_U_D_DESC : MSA_3R_DESC_BASE<"div_u.d", int_mips_div_u_d, MSA128D, MSA128D>; class DOTP_S_B_DESC : MSA_3R_DESC_BASE<"dotp_s.b", int_mips_dotp_s_b, - NoItinerary, MSA128B, MSA128B>, + MSA128B, MSA128B>, IsCommutable; class DOTP_S_H_DESC : MSA_3R_DESC_BASE<"dotp_s.h", int_mips_dotp_s_h, - NoItinerary, MSA128H, MSA128H>, - IsCommutable; + MSA128H, MSA128H>, + IsCommutable; class DOTP_S_W_DESC : MSA_3R_DESC_BASE<"dotp_s.w", int_mips_dotp_s_w, - NoItinerary, MSA128W, MSA128W>, - IsCommutable; + MSA128W, MSA128W>, + IsCommutable; class DOTP_S_D_DESC : MSA_3R_DESC_BASE<"dotp_s.d", int_mips_dotp_s_d, - NoItinerary, MSA128D, MSA128D>, - IsCommutable; + MSA128D, MSA128D>, + IsCommutable; class DOTP_U_B_DESC : MSA_3R_DESC_BASE<"dotp_u.b", int_mips_dotp_u_b, - NoItinerary, MSA128B, MSA128B>, + MSA128B, MSA128B>, IsCommutable; class DOTP_U_H_DESC : MSA_3R_DESC_BASE<"dotp_u.h", int_mips_dotp_u_h, - NoItinerary, MSA128H, MSA128H>, - IsCommutable; + MSA128H, MSA128H>, + IsCommutable; class DOTP_U_W_DESC : MSA_3R_DESC_BASE<"dotp_u.w", int_mips_dotp_u_w, - NoItinerary, MSA128W, MSA128W>, - IsCommutable; + MSA128W, MSA128W>, + IsCommutable; class DOTP_U_D_DESC : MSA_3R_DESC_BASE<"dotp_u.d", int_mips_dotp_u_d, - NoItinerary, MSA128D, MSA128D>, - IsCommutable; + MSA128D, MSA128D>, + IsCommutable; class DPADD_S_H_DESC : MSA_3R_4R_DESC_BASE<"dpadd_s.h", int_mips_dpadd_s_h, - NoItinerary, MSA128H, MSA128B>, + MSA128H, MSA128B>, IsCommutable; class DPADD_S_W_DESC : MSA_3R_4R_DESC_BASE<"dpadd_s.w", int_mips_dpadd_s_w, - NoItinerary, MSA128W, MSA128H>, + MSA128W, MSA128H>, IsCommutable; class DPADD_S_D_DESC : MSA_3R_4R_DESC_BASE<"dpadd_s.d", int_mips_dpadd_s_d, - NoItinerary, MSA128D, MSA128W>, + MSA128D, MSA128W>, IsCommutable; class DPADD_U_H_DESC : MSA_3R_4R_DESC_BASE<"dpadd_u.h", int_mips_dpadd_u_h, - NoItinerary, MSA128H, MSA128B>, + MSA128H, MSA128B>, IsCommutable; class DPADD_U_W_DESC : MSA_3R_4R_DESC_BASE<"dpadd_u.w", int_mips_dpadd_u_w, - NoItinerary, MSA128W, MSA128H>, + MSA128W, MSA128H>, IsCommutable; class DPADD_U_D_DESC : MSA_3R_4R_DESC_BASE<"dpadd_u.d", int_mips_dpadd_u_d, - NoItinerary, MSA128D, MSA128W>, + MSA128D, MSA128W>, IsCommutable; class DPSUB_S_H_DESC : MSA_3R_4R_DESC_BASE<"dpsub_s.h", int_mips_dpsub_s_h, - NoItinerary, MSA128H, MSA128B>; + MSA128H, MSA128B>; class DPSUB_S_W_DESC : MSA_3R_4R_DESC_BASE<"dpsub_s.w", int_mips_dpsub_s_w, - NoItinerary, MSA128W, MSA128H>; + MSA128W, MSA128H>; class DPSUB_S_D_DESC : MSA_3R_4R_DESC_BASE<"dpsub_s.d", int_mips_dpsub_s_d, - NoItinerary, MSA128D, MSA128W>; + MSA128D, MSA128W>; class DPSUB_U_H_DESC : MSA_3R_4R_DESC_BASE<"dpsub_u.h", int_mips_dpsub_u_h, - NoItinerary, MSA128H, MSA128B>; + MSA128H, MSA128B>; class DPSUB_U_W_DESC : MSA_3R_4R_DESC_BASE<"dpsub_u.w", int_mips_dpsub_u_w, - NoItinerary, MSA128W, MSA128H>; + MSA128W, MSA128H>; class DPSUB_U_D_DESC : MSA_3R_4R_DESC_BASE<"dpsub_u.d", int_mips_dpsub_u_d, - NoItinerary, MSA128D, MSA128W>; + MSA128D, MSA128W>; class FADD_W_DESC : MSA_3RF_DESC_BASE<"fadd.w", int_mips_fadd_w, - NoItinerary, MSA128W, MSA128W>, + MSA128W, MSA128W>, IsCommutable; class FADD_D_DESC : MSA_3RF_DESC_BASE<"fadd.d", int_mips_fadd_d, - NoItinerary, MSA128D, MSA128D>, + MSA128D, MSA128D>, IsCommutable; class FCAF_W_DESC : MSA_3RF_DESC_BASE<"fcaf.w", int_mips_fcaf_w, - NoItinerary, MSA128W, MSA128W>, + MSA128W, MSA128W>, IsCommutable; class FCAF_D_DESC : MSA_3RF_DESC_BASE<"fcaf.d", int_mips_fcaf_d, - NoItinerary, MSA128D, MSA128D>, + MSA128D, MSA128D>, IsCommutable; class FCEQ_W_DESC : MSA_3RF_DESC_BASE<"fceq.w", int_mips_fceq_w, - NoItinerary, MSA128W, MSA128W>, + MSA128W, MSA128W>, IsCommutable; class FCEQ_D_DESC : MSA_3RF_DESC_BASE<"fceq.d", int_mips_fceq_d, - NoItinerary, MSA128D, MSA128D>, + MSA128D, MSA128D>, IsCommutable; class FCLASS_W_DESC : MSA_2RF_DESC_BASE<"fclass.w", int_mips_fclass_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FCLASS_D_DESC : MSA_2RF_DESC_BASE<"fclass.d", int_mips_fclass_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FCLE_W_DESC : MSA_3RF_DESC_BASE<"fcle.w", int_mips_fcle_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FCLE_D_DESC : MSA_3RF_DESC_BASE<"fcle.d", int_mips_fcle_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FCLT_W_DESC : MSA_3RF_DESC_BASE<"fclt.w", int_mips_fclt_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FCLT_D_DESC : MSA_3RF_DESC_BASE<"fclt.d", int_mips_fclt_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FCNE_W_DESC : MSA_3RF_DESC_BASE<"fcne.w", int_mips_fcne_w, - NoItinerary, MSA128W, MSA128W>, + MSA128W, MSA128W>, IsCommutable; class FCNE_D_DESC : MSA_3RF_DESC_BASE<"fcne.d", int_mips_fcne_d, - NoItinerary, MSA128D, MSA128D>, + MSA128D, MSA128D>, IsCommutable; class FCOR_W_DESC : MSA_3RF_DESC_BASE<"fcor.w", int_mips_fcor_w, - NoItinerary, MSA128W, MSA128W>, + MSA128W, MSA128W>, IsCommutable; class FCOR_D_DESC : MSA_3RF_DESC_BASE<"fcor.d", int_mips_fcor_d, - NoItinerary, MSA128D, MSA128D>, + MSA128D, MSA128D>, IsCommutable; class FCUEQ_W_DESC : MSA_3RF_DESC_BASE<"fcueq.w", int_mips_fcueq_w, - NoItinerary, MSA128W, MSA128W>, + MSA128W, MSA128W>, IsCommutable; class FCUEQ_D_DESC : MSA_3RF_DESC_BASE<"fcueq.d", int_mips_fcueq_d, - NoItinerary, MSA128D, MSA128D>, + MSA128D, MSA128D>, IsCommutable; class FCULE_W_DESC : MSA_3RF_DESC_BASE<"fcule.w", int_mips_fcule_w, - NoItinerary, MSA128W, MSA128W>, + MSA128W, MSA128W>, IsCommutable; class FCULE_D_DESC : MSA_3RF_DESC_BASE<"fcule.d", int_mips_fcule_d, - NoItinerary, MSA128D, MSA128D>, + MSA128D, MSA128D>, IsCommutable; class FCULT_W_DESC : MSA_3RF_DESC_BASE<"fcult.w", int_mips_fcult_w, - NoItinerary, MSA128W, MSA128W>, + MSA128W, MSA128W>, IsCommutable; class FCULT_D_DESC : MSA_3RF_DESC_BASE<"fcult.d", int_mips_fcult_d, - NoItinerary, MSA128D, MSA128D>, + MSA128D, MSA128D>, IsCommutable; class FCUN_W_DESC : MSA_3RF_DESC_BASE<"fcun.w", int_mips_fcun_w, - NoItinerary, MSA128W, MSA128W>, + MSA128W, MSA128W>, IsCommutable; class FCUN_D_DESC : MSA_3RF_DESC_BASE<"fcun.d", int_mips_fcun_d, - NoItinerary, MSA128D, MSA128D>, + MSA128D, MSA128D>, IsCommutable; class FCUNE_W_DESC : MSA_3RF_DESC_BASE<"fcune.w", int_mips_fcune_w, - NoItinerary, MSA128W, MSA128W>, + MSA128W, MSA128W>, IsCommutable; class FCUNE_D_DESC : MSA_3RF_DESC_BASE<"fcune.d", int_mips_fcune_d, - NoItinerary, MSA128D, MSA128D>, + MSA128D, MSA128D>, IsCommutable; class FDIV_W_DESC : MSA_3RF_DESC_BASE<"fdiv.w", int_mips_fdiv_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FDIV_D_DESC : MSA_3RF_DESC_BASE<"fdiv.d", int_mips_fdiv_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FEXDO_H_DESC : MSA_3RF_DESC_BASE<"fexdo.h", int_mips_fexdo_h, - NoItinerary, MSA128H, MSA128W>; + MSA128H, MSA128W>; class FEXDO_W_DESC : MSA_3RF_DESC_BASE<"fexdo.w", int_mips_fexdo_w, - NoItinerary, MSA128W, MSA128D>; + MSA128W, MSA128D>; class FEXP2_W_DESC : MSA_3RF_DESC_BASE<"fexp2.w", int_mips_fexp2_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FEXP2_D_DESC : MSA_3RF_DESC_BASE<"fexp2.d", int_mips_fexp2_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FEXUPL_W_DESC : MSA_2RF_DESC_BASE<"fexupl.w", int_mips_fexupl_w, - NoItinerary, MSA128W, MSA128H>; + MSA128W, MSA128H>; class FEXUPL_D_DESC : MSA_2RF_DESC_BASE<"fexupl.d", int_mips_fexupl_d, - NoItinerary, MSA128D, MSA128W>; + MSA128D, MSA128W>; class FEXUPR_W_DESC : MSA_2RF_DESC_BASE<"fexupr.w", int_mips_fexupr_w, - NoItinerary, MSA128W, MSA128H>; + MSA128W, MSA128H>; class FEXUPR_D_DESC : MSA_2RF_DESC_BASE<"fexupr.d", int_mips_fexupr_d, - NoItinerary, MSA128D, MSA128W>; + MSA128D, MSA128W>; class FFINT_S_W_DESC : MSA_2RF_DESC_BASE<"ffint_s.w", int_mips_ffint_s_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FFINT_S_D_DESC : MSA_2RF_DESC_BASE<"ffint_s.d", int_mips_ffint_s_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FFINT_U_W_DESC : MSA_2RF_DESC_BASE<"ffint_u.w", int_mips_ffint_u_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FFINT_U_D_DESC : MSA_2RF_DESC_BASE<"ffint_u.d", int_mips_ffint_u_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FFQL_W_DESC : MSA_2RF_DESC_BASE<"ffql.w", int_mips_ffql_w, - NoItinerary, MSA128W, MSA128H>; + MSA128W, MSA128H>; class FFQL_D_DESC : MSA_2RF_DESC_BASE<"ffql.d", int_mips_ffql_d, - NoItinerary, MSA128D, MSA128W>; + MSA128D, MSA128W>; class FFQR_W_DESC : MSA_2RF_DESC_BASE<"ffqr.w", int_mips_ffqr_w, - NoItinerary, MSA128W, MSA128H>; + MSA128W, MSA128H>; class FFQR_D_DESC : MSA_2RF_DESC_BASE<"ffqr.d", int_mips_ffqr_d, - NoItinerary, MSA128D, MSA128W>; + MSA128D, MSA128W>; class FILL_B_DESC : MSA_2R_DESC_BASE<"fill.b", int_mips_fill_b, - NoItinerary, MSA128B, GPR32>; + MSA128B, GPR32>; class FILL_H_DESC : MSA_2R_DESC_BASE<"fill.h", int_mips_fill_h, - NoItinerary, MSA128H, GPR32>; + MSA128H, GPR32>; class FILL_W_DESC : MSA_2R_DESC_BASE<"fill.w", int_mips_fill_w, - NoItinerary, MSA128W, GPR32>; + MSA128W, GPR32>; class FLOG2_W_DESC : MSA_2RF_DESC_BASE<"flog2.w", int_mips_flog2_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FLOG2_D_DESC : MSA_2RF_DESC_BASE<"flog2.d", int_mips_flog2_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FMADD_W_DESC : MSA_3RF_4RF_DESC_BASE<"fmadd.w", int_mips_fmadd_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FMADD_D_DESC : MSA_3RF_4RF_DESC_BASE<"fmadd.d", int_mips_fmadd_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FMAX_W_DESC : MSA_3RF_DESC_BASE<"fmax.w", int_mips_fmax_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FMAX_D_DESC : MSA_3RF_DESC_BASE<"fmax.d", int_mips_fmax_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FMAX_A_W_DESC : MSA_3RF_DESC_BASE<"fmax_a.w", int_mips_fmax_a_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FMAX_A_D_DESC : MSA_3RF_DESC_BASE<"fmax_a.d", int_mips_fmax_a_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FMIN_W_DESC : MSA_3RF_DESC_BASE<"fmin.w", int_mips_fmin_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FMIN_D_DESC : MSA_3RF_DESC_BASE<"fmin.d", int_mips_fmin_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FMIN_A_W_DESC : MSA_3RF_DESC_BASE<"fmin_a.w", int_mips_fmin_a_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FMIN_A_D_DESC : MSA_3RF_DESC_BASE<"fmin_a.d", int_mips_fmin_a_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FMSUB_W_DESC : MSA_3RF_4RF_DESC_BASE<"fmsub.w", int_mips_fmsub_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FMSUB_D_DESC : MSA_3RF_4RF_DESC_BASE<"fmsub.d", int_mips_fmsub_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FMUL_W_DESC : MSA_3RF_DESC_BASE<"fmul.w", int_mips_fmul_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FMUL_D_DESC : MSA_3RF_DESC_BASE<"fmul.d", int_mips_fmul_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FRINT_W_DESC : MSA_2RF_DESC_BASE<"frint.w", int_mips_frint_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FRINT_D_DESC : MSA_2RF_DESC_BASE<"frint.d", int_mips_frint_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FRCP_W_DESC : MSA_2RF_DESC_BASE<"frcp.w", int_mips_frcp_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FRCP_D_DESC : MSA_2RF_DESC_BASE<"frcp.d", int_mips_frcp_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FRSQRT_W_DESC : MSA_2RF_DESC_BASE<"frsqrt.w", int_mips_frsqrt_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FRSQRT_D_DESC : MSA_2RF_DESC_BASE<"frsqrt.d", int_mips_frsqrt_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FSAF_W_DESC : MSA_3RF_DESC_BASE<"fsaf.w", int_mips_fsaf_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FSAF_D_DESC : MSA_3RF_DESC_BASE<"fsaf.d", int_mips_fsaf_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FSEQ_W_DESC : MSA_3RF_DESC_BASE<"fseq.w", int_mips_fseq_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FSEQ_D_DESC : MSA_3RF_DESC_BASE<"fseq.d", int_mips_fseq_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FSLE_W_DESC : MSA_3RF_DESC_BASE<"fsle.w", int_mips_fsle_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FSLE_D_DESC : MSA_3RF_DESC_BASE<"fsle.d", int_mips_fsle_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FSLT_W_DESC : MSA_3RF_DESC_BASE<"fslt.w", int_mips_fslt_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FSLT_D_DESC : MSA_3RF_DESC_BASE<"fslt.d", int_mips_fslt_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FSNE_W_DESC : MSA_3RF_DESC_BASE<"fsne.w", int_mips_fsne_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FSNE_D_DESC : MSA_3RF_DESC_BASE<"fsne.d", int_mips_fsne_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FSOR_W_DESC : MSA_3RF_DESC_BASE<"fsor.w", int_mips_fsor_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FSOR_D_DESC : MSA_3RF_DESC_BASE<"fsor.d", int_mips_fsor_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FSQRT_W_DESC : MSA_2RF_DESC_BASE<"fsqrt.w", int_mips_fsqrt_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FSQRT_D_DESC : MSA_2RF_DESC_BASE<"fsqrt.d", int_mips_fsqrt_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FSUB_W_DESC : MSA_3RF_DESC_BASE<"fsub.w", int_mips_fsub_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FSUB_D_DESC : MSA_3RF_DESC_BASE<"fsub.d", int_mips_fsub_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FSUEQ_W_DESC : MSA_3RF_DESC_BASE<"fsueq.w", int_mips_fsueq_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FSUEQ_D_DESC : MSA_3RF_DESC_BASE<"fsueq.d", int_mips_fsueq_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FSULE_W_DESC : MSA_3RF_DESC_BASE<"fsule.w", int_mips_fsule_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FSULE_D_DESC : MSA_3RF_DESC_BASE<"fsule.d", int_mips_fsule_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FSULT_W_DESC : MSA_3RF_DESC_BASE<"fsult.w", int_mips_fsult_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FSULT_D_DESC : MSA_3RF_DESC_BASE<"fsult.d", int_mips_fsult_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FSUN_W_DESC : MSA_3RF_DESC_BASE<"fsun.w", int_mips_fsun_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FSUN_D_DESC : MSA_3RF_DESC_BASE<"fsun.d", int_mips_fsun_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FSUNE_W_DESC : MSA_3RF_DESC_BASE<"fsune.w", int_mips_fsune_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FSUNE_D_DESC : MSA_3RF_DESC_BASE<"fsune.d", int_mips_fsune_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FTRUNC_S_W_DESC : MSA_2RF_DESC_BASE<"ftrunc_s.w", int_mips_ftrunc_s_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FTRUNC_S_D_DESC : MSA_2RF_DESC_BASE<"ftrunc_s.d", int_mips_ftrunc_s_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FTRUNC_U_W_DESC : MSA_2RF_DESC_BASE<"ftrunc_u.w", int_mips_ftrunc_u_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FTRUNC_U_D_DESC : MSA_2RF_DESC_BASE<"ftrunc_u.d", int_mips_ftrunc_u_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FTINT_S_W_DESC : MSA_2RF_DESC_BASE<"ftint_s.w", int_mips_ftint_s_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FTINT_S_D_DESC : MSA_2RF_DESC_BASE<"ftint_s.d", int_mips_ftint_s_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FTINT_U_W_DESC : MSA_2RF_DESC_BASE<"ftint_u.w", int_mips_ftint_u_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class FTINT_U_D_DESC : MSA_2RF_DESC_BASE<"ftint_u.d", int_mips_ftint_u_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class FTQ_H_DESC : MSA_3RF_DESC_BASE<"ftq.h", int_mips_ftq_h, - NoItinerary, MSA128H, MSA128W>; + MSA128H, MSA128W>; class FTQ_W_DESC : MSA_3RF_DESC_BASE<"ftq.w", int_mips_ftq_w, - NoItinerary, MSA128W, MSA128D>; + MSA128W, MSA128D>; class HADD_S_H_DESC : MSA_3R_DESC_BASE<"hadd_s.h", int_mips_hadd_s_h, - NoItinerary, MSA128H, MSA128B>; + MSA128H, MSA128B>; class HADD_S_W_DESC : MSA_3R_DESC_BASE<"hadd_s.w", int_mips_hadd_s_w, - NoItinerary, MSA128W, MSA128H>; + MSA128W, MSA128H>; class HADD_S_D_DESC : MSA_3R_DESC_BASE<"hadd_s.d", int_mips_hadd_s_d, - NoItinerary, MSA128D, MSA128W>; + MSA128D, MSA128W>; class HADD_U_H_DESC : MSA_3R_DESC_BASE<"hadd_u.h", int_mips_hadd_u_h, - NoItinerary, MSA128H, MSA128B>; + MSA128H, MSA128B>; class HADD_U_W_DESC : MSA_3R_DESC_BASE<"hadd_u.w", int_mips_hadd_u_w, - NoItinerary, MSA128W, MSA128H>; + MSA128W, MSA128H>; class HADD_U_D_DESC : MSA_3R_DESC_BASE<"hadd_u.d", int_mips_hadd_u_d, - NoItinerary, MSA128D, MSA128W>; + MSA128D, MSA128W>; class HSUB_S_H_DESC : MSA_3R_DESC_BASE<"hsub_s.h", int_mips_hsub_s_h, - NoItinerary, MSA128H, MSA128B>; + MSA128H, MSA128B>; class HSUB_S_W_DESC : MSA_3R_DESC_BASE<"hsub_s.w", int_mips_hsub_s_w, - NoItinerary, MSA128W, MSA128H>; + MSA128W, MSA128H>; class HSUB_S_D_DESC : MSA_3R_DESC_BASE<"hsub_s.d", int_mips_hsub_s_d, - NoItinerary, MSA128D, MSA128W>; + MSA128D, MSA128W>; class HSUB_U_H_DESC : MSA_3R_DESC_BASE<"hsub_u.h", int_mips_hsub_u_h, - NoItinerary, MSA128H, MSA128B>; + MSA128H, MSA128B>; class HSUB_U_W_DESC : MSA_3R_DESC_BASE<"hsub_u.w", int_mips_hsub_u_w, - NoItinerary, MSA128W, MSA128H>; + MSA128W, MSA128H>; class HSUB_U_D_DESC : MSA_3R_DESC_BASE<"hsub_u.d", int_mips_hsub_u_d, - NoItinerary, MSA128D, MSA128W>; + MSA128D, MSA128W>; -class ILVEV_B_DESC : MSA_3R_DESC_BASE<"ilvev.b", int_mips_ilvev_b, NoItinerary, +class ILVEV_B_DESC : MSA_3R_DESC_BASE<"ilvev.b", int_mips_ilvev_b, MSA128B, MSA128B>; -class ILVEV_H_DESC : MSA_3R_DESC_BASE<"ilvev.h", int_mips_ilvev_h, NoItinerary, +class ILVEV_H_DESC : MSA_3R_DESC_BASE<"ilvev.h", int_mips_ilvev_h, MSA128H, MSA128H>; -class ILVEV_W_DESC : MSA_3R_DESC_BASE<"ilvev.w", int_mips_ilvev_w, NoItinerary, +class ILVEV_W_DESC : MSA_3R_DESC_BASE<"ilvev.w", int_mips_ilvev_w, MSA128W, MSA128W>; -class ILVEV_D_DESC : MSA_3R_DESC_BASE<"ilvev.d", int_mips_ilvev_d, NoItinerary, +class ILVEV_D_DESC : MSA_3R_DESC_BASE<"ilvev.d", int_mips_ilvev_d, MSA128D, MSA128D>; -class ILVL_B_DESC : MSA_3R_DESC_BASE<"ilvl.b", int_mips_ilvl_b, NoItinerary, +class ILVL_B_DESC : MSA_3R_DESC_BASE<"ilvl.b", int_mips_ilvl_b, MSA128B, MSA128B>; -class ILVL_H_DESC : MSA_3R_DESC_BASE<"ilvl.h", int_mips_ilvl_h, NoItinerary, +class ILVL_H_DESC : MSA_3R_DESC_BASE<"ilvl.h", int_mips_ilvl_h, MSA128H, MSA128H>; -class ILVL_W_DESC : MSA_3R_DESC_BASE<"ilvl.w", int_mips_ilvl_w, NoItinerary, +class ILVL_W_DESC : MSA_3R_DESC_BASE<"ilvl.w", int_mips_ilvl_w, MSA128W, MSA128W>; -class ILVL_D_DESC : MSA_3R_DESC_BASE<"ilvl.d", int_mips_ilvl_d, NoItinerary, +class ILVL_D_DESC : MSA_3R_DESC_BASE<"ilvl.d", int_mips_ilvl_d, MSA128D, MSA128D>; -class ILVOD_B_DESC : MSA_3R_DESC_BASE<"ilvod.b", int_mips_ilvod_b, NoItinerary, +class ILVOD_B_DESC : MSA_3R_DESC_BASE<"ilvod.b", int_mips_ilvod_b, MSA128B, MSA128B>; -class ILVOD_H_DESC : MSA_3R_DESC_BASE<"ilvod.h", int_mips_ilvod_h, NoItinerary, +class ILVOD_H_DESC : MSA_3R_DESC_BASE<"ilvod.h", int_mips_ilvod_h, MSA128H, MSA128H>; -class ILVOD_W_DESC : MSA_3R_DESC_BASE<"ilvod.w", int_mips_ilvod_w, NoItinerary, +class ILVOD_W_DESC : MSA_3R_DESC_BASE<"ilvod.w", int_mips_ilvod_w, MSA128W, MSA128W>; -class ILVOD_D_DESC : MSA_3R_DESC_BASE<"ilvod.d", int_mips_ilvod_d, NoItinerary, +class ILVOD_D_DESC : MSA_3R_DESC_BASE<"ilvod.d", int_mips_ilvod_d, MSA128D, MSA128D>; -class ILVR_B_DESC : MSA_3R_DESC_BASE<"ilvr.b", int_mips_ilvr_b, NoItinerary, +class ILVR_B_DESC : MSA_3R_DESC_BASE<"ilvr.b", int_mips_ilvr_b, MSA128B, MSA128B>; -class ILVR_H_DESC : MSA_3R_DESC_BASE<"ilvr.h", int_mips_ilvr_h, NoItinerary, +class ILVR_H_DESC : MSA_3R_DESC_BASE<"ilvr.h", int_mips_ilvr_h, MSA128H, MSA128H>; -class ILVR_W_DESC : MSA_3R_DESC_BASE<"ilvr.w", int_mips_ilvr_w, NoItinerary, +class ILVR_W_DESC : MSA_3R_DESC_BASE<"ilvr.w", int_mips_ilvr_w, MSA128W, MSA128W>; -class ILVR_D_DESC : MSA_3R_DESC_BASE<"ilvr.d", int_mips_ilvr_d, NoItinerary, +class ILVR_D_DESC : MSA_3R_DESC_BASE<"ilvr.d", int_mips_ilvr_d, MSA128D, MSA128D>; class INSERT_B_DESC : MSA_INSERT_DESC_BASE<"insert.b", int_mips_insert_b, - NoItinerary, MSA128B, GPR32>; + MSA128B, GPR32>; class INSERT_H_DESC : MSA_INSERT_DESC_BASE<"insert.h", int_mips_insert_h, - NoItinerary, MSA128H, GPR32>; + MSA128H, GPR32>; class INSERT_W_DESC : MSA_INSERT_DESC_BASE<"insert.w", int_mips_insert_w, - NoItinerary, MSA128W, GPR32>; + MSA128W, GPR32>; class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", int_mips_insve_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", int_mips_insve_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", int_mips_insve_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", int_mips_insve_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class LD_DESC_BASE { + ValueType TyNode, RegisterClass RCWD, + Operand MemOpnd = mem, ComplexPattern Addr = addrRegImm, + InstrItinClass itin = NoItinerary> { dag OutOperandList = (outs RCWD:$wd); dag InOperandList = (ins MemOpnd:$addr); string AsmString = !strconcat(instr_asm, "\t$wd, $addr"); @@ -1770,23 +1777,20 @@ class LD_DESC_BASE; -class LD_H_DESC : LD_DESC_BASE<"ld.h", load, v8i16, NoItinerary, MSA128H>; -class LD_W_DESC : LD_DESC_BASE<"ld.w", load, v4i32, NoItinerary, MSA128W>; -class LD_D_DESC : LD_DESC_BASE<"ld.d", load, v2i64, NoItinerary, MSA128D>; +class LD_B_DESC : LD_DESC_BASE<"ld.b", load, v16i8, MSA128B>; +class LD_H_DESC : LD_DESC_BASE<"ld.h", load, v8i16, MSA128H>; +class LD_W_DESC : LD_DESC_BASE<"ld.w", load, v4i32, MSA128W>; +class LD_D_DESC : LD_DESC_BASE<"ld.d", load, v2i64, MSA128D>; -class LDI_B_DESC : MSA_I10_DESC_BASE<"ldi.b", int_mips_ldi_b, - NoItinerary, MSA128B>; -class LDI_H_DESC : MSA_I10_DESC_BASE<"ldi.h", int_mips_ldi_h, - NoItinerary, MSA128H>; -class LDI_W_DESC : MSA_I10_DESC_BASE<"ldi.w", int_mips_ldi_w, - NoItinerary, MSA128W>; -class LDI_D_DESC : MSA_I10_DESC_BASE<"ldi.d", int_mips_ldi_d, - NoItinerary, MSA128D>; +class LDI_B_DESC : MSA_I10_DESC_BASE<"ldi.b", int_mips_ldi_b, MSA128B>; +class LDI_H_DESC : MSA_I10_DESC_BASE<"ldi.h", int_mips_ldi_h, MSA128H>; +class LDI_W_DESC : MSA_I10_DESC_BASE<"ldi.w", int_mips_ldi_w, MSA128W>; +class LDI_D_DESC : MSA_I10_DESC_BASE<"ldi.d", int_mips_ldi_d, MSA128D>; class LDX_DESC_BASE { + ValueType TyNode, RegisterClass RCWD, + Operand MemOpnd = mem, ComplexPattern Addr = addrRegReg, + InstrItinClass itin = NoItinerary> { dag OutOperandList = (outs RCWD:$wd); dag InOperandList = (ins MemOpnd:$addr); string AsmString = !strconcat(instr_asm, "\t$wd, $addr"); @@ -1794,136 +1798,136 @@ class LDX_DESC_BASE; -class LDX_H_DESC : LDX_DESC_BASE<"ldx.h", load, v8i16, NoItinerary, MSA128H>; -class LDX_W_DESC : LDX_DESC_BASE<"ldx.w", load, v4i32, NoItinerary, MSA128W>; -class LDX_D_DESC : LDX_DESC_BASE<"ldx.d", load, v2i64, NoItinerary, MSA128D>; +class LDX_B_DESC : LDX_DESC_BASE<"ldx.b", load, v16i8, MSA128B>; +class LDX_H_DESC : LDX_DESC_BASE<"ldx.h", load, v8i16, MSA128H>; +class LDX_W_DESC : LDX_DESC_BASE<"ldx.w", load, v4i32, MSA128W>; +class LDX_D_DESC : LDX_DESC_BASE<"ldx.d", load, v2i64, MSA128D>; class MADD_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"madd_q.h", int_mips_madd_q_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class MADD_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"madd_q.w", int_mips_madd_q_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class MADDR_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"maddr_q.h", int_mips_maddr_q_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class MADDR_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"maddr_q.w", int_mips_maddr_q_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class MADDV_B_DESC : MSA_3R_4R_DESC_BASE<"maddv.b", int_mips_maddv_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class MADDV_H_DESC : MSA_3R_4R_DESC_BASE<"maddv.h", int_mips_maddv_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class MADDV_W_DESC : MSA_3R_4R_DESC_BASE<"maddv.w", int_mips_maddv_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class MADDV_D_DESC : MSA_3R_4R_DESC_BASE<"maddv.d", int_mips_maddv_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; -class MAX_A_B_DESC : MSA_3R_DESC_BASE<"max_a.b", int_mips_max_a_b, NoItinerary, +class MAX_A_B_DESC : MSA_3R_DESC_BASE<"max_a.b", int_mips_max_a_b, MSA128B, MSA128B>; -class MAX_A_H_DESC : MSA_3R_DESC_BASE<"max_a.h", int_mips_max_a_h, NoItinerary, +class MAX_A_H_DESC : MSA_3R_DESC_BASE<"max_a.h", int_mips_max_a_h, MSA128H, MSA128H>; -class MAX_A_W_DESC : MSA_3R_DESC_BASE<"max_a.w", int_mips_max_a_w, NoItinerary, +class MAX_A_W_DESC : MSA_3R_DESC_BASE<"max_a.w", int_mips_max_a_w, MSA128W, MSA128W>; -class MAX_A_D_DESC : MSA_3R_DESC_BASE<"max_a.d", int_mips_max_a_d, NoItinerary, +class MAX_A_D_DESC : MSA_3R_DESC_BASE<"max_a.d", int_mips_max_a_d, MSA128D, MSA128D>; -class MAX_S_B_DESC : MSA_3R_DESC_BASE<"max_s.b", int_mips_max_s_b, NoItinerary, +class MAX_S_B_DESC : MSA_3R_DESC_BASE<"max_s.b", int_mips_max_s_b, MSA128B, MSA128B>; -class MAX_S_H_DESC : MSA_3R_DESC_BASE<"max_s.h", int_mips_max_s_h, NoItinerary, +class MAX_S_H_DESC : MSA_3R_DESC_BASE<"max_s.h", int_mips_max_s_h, MSA128H, MSA128H>; -class MAX_S_W_DESC : MSA_3R_DESC_BASE<"max_s.w", int_mips_max_s_w, NoItinerary, +class MAX_S_W_DESC : MSA_3R_DESC_BASE<"max_s.w", int_mips_max_s_w, MSA128W, MSA128W>; -class MAX_S_D_DESC : MSA_3R_DESC_BASE<"max_s.d", int_mips_max_s_d, NoItinerary, +class MAX_S_D_DESC : MSA_3R_DESC_BASE<"max_s.d", int_mips_max_s_d, MSA128D, MSA128D>; -class MAX_U_B_DESC : MSA_3R_DESC_BASE<"max_u.b", int_mips_max_u_b, NoItinerary, +class MAX_U_B_DESC : MSA_3R_DESC_BASE<"max_u.b", int_mips_max_u_b, MSA128B, MSA128B>; -class MAX_U_H_DESC : MSA_3R_DESC_BASE<"max_u.h", int_mips_max_u_h, NoItinerary, +class MAX_U_H_DESC : MSA_3R_DESC_BASE<"max_u.h", int_mips_max_u_h, MSA128H, MSA128H>; -class MAX_U_W_DESC : MSA_3R_DESC_BASE<"max_u.w", int_mips_max_u_w, NoItinerary, +class MAX_U_W_DESC : MSA_3R_DESC_BASE<"max_u.w", int_mips_max_u_w, MSA128W, MSA128W>; -class MAX_U_D_DESC : MSA_3R_DESC_BASE<"max_u.d", int_mips_max_u_d, NoItinerary, +class MAX_U_D_DESC : MSA_3R_DESC_BASE<"max_u.d", int_mips_max_u_d, MSA128D, MSA128D>; class MAXI_S_B_DESC : MSA_I5_DESC_BASE<"maxi_s.b", int_mips_maxi_s_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class MAXI_S_H_DESC : MSA_I5_DESC_BASE<"maxi_s.h", int_mips_maxi_s_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class MAXI_S_W_DESC : MSA_I5_DESC_BASE<"maxi_s.w", int_mips_maxi_s_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class MAXI_S_D_DESC : MSA_I5_DESC_BASE<"maxi_s.d", int_mips_maxi_s_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class MAXI_U_B_DESC : MSA_I5_DESC_BASE<"maxi_u.b", int_mips_maxi_u_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class MAXI_U_H_DESC : MSA_I5_DESC_BASE<"maxi_u.h", int_mips_maxi_u_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class MAXI_U_W_DESC : MSA_I5_DESC_BASE<"maxi_u.w", int_mips_maxi_u_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class MAXI_U_D_DESC : MSA_I5_DESC_BASE<"maxi_u.d", int_mips_maxi_u_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; -class MIN_A_B_DESC : MSA_3R_DESC_BASE<"min_a.b", int_mips_min_a_b, NoItinerary, +class MIN_A_B_DESC : MSA_3R_DESC_BASE<"min_a.b", int_mips_min_a_b, MSA128B, MSA128B>; -class MIN_A_H_DESC : MSA_3R_DESC_BASE<"min_a.h", int_mips_min_a_h, NoItinerary, +class MIN_A_H_DESC : MSA_3R_DESC_BASE<"min_a.h", int_mips_min_a_h, MSA128H, MSA128H>; -class MIN_A_W_DESC : MSA_3R_DESC_BASE<"min_a.w", int_mips_min_a_w, NoItinerary, +class MIN_A_W_DESC : MSA_3R_DESC_BASE<"min_a.w", int_mips_min_a_w, MSA128W, MSA128W>; -class MIN_A_D_DESC : MSA_3R_DESC_BASE<"min_a.d", int_mips_min_a_d, NoItinerary, +class MIN_A_D_DESC : MSA_3R_DESC_BASE<"min_a.d", int_mips_min_a_d, MSA128D, MSA128D>; -class MIN_S_B_DESC : MSA_3R_DESC_BASE<"min_s.b", int_mips_min_s_b, NoItinerary, +class MIN_S_B_DESC : MSA_3R_DESC_BASE<"min_s.b", int_mips_min_s_b, MSA128B, MSA128B>; -class MIN_S_H_DESC : MSA_3R_DESC_BASE<"min_s.h", int_mips_min_s_h, NoItinerary, +class MIN_S_H_DESC : MSA_3R_DESC_BASE<"min_s.h", int_mips_min_s_h, MSA128H, MSA128H>; -class MIN_S_W_DESC : MSA_3R_DESC_BASE<"min_s.w", int_mips_min_s_w, NoItinerary, +class MIN_S_W_DESC : MSA_3R_DESC_BASE<"min_s.w", int_mips_min_s_w, MSA128W, MSA128W>; -class MIN_S_D_DESC : MSA_3R_DESC_BASE<"min_s.d", int_mips_min_s_d, NoItinerary, +class MIN_S_D_DESC : MSA_3R_DESC_BASE<"min_s.d", int_mips_min_s_d, MSA128D, MSA128D>; -class MIN_U_B_DESC : MSA_3R_DESC_BASE<"min_u.b", int_mips_min_u_b, NoItinerary, +class MIN_U_B_DESC : MSA_3R_DESC_BASE<"min_u.b", int_mips_min_u_b, MSA128B, MSA128B>; -class MIN_U_H_DESC : MSA_3R_DESC_BASE<"min_u.h", int_mips_min_u_h, NoItinerary, +class MIN_U_H_DESC : MSA_3R_DESC_BASE<"min_u.h", int_mips_min_u_h, MSA128H, MSA128H>; -class MIN_U_W_DESC : MSA_3R_DESC_BASE<"min_u.w", int_mips_min_u_w, NoItinerary, +class MIN_U_W_DESC : MSA_3R_DESC_BASE<"min_u.w", int_mips_min_u_w, MSA128W, MSA128W>; -class MIN_U_D_DESC : MSA_3R_DESC_BASE<"min_u.d", int_mips_min_u_d, NoItinerary, +class MIN_U_D_DESC : MSA_3R_DESC_BASE<"min_u.d", int_mips_min_u_d, MSA128D, MSA128D>; class MINI_S_B_DESC : MSA_I5_DESC_BASE<"mini_s.b", int_mips_mini_s_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class MINI_S_H_DESC : MSA_I5_DESC_BASE<"mini_s.h", int_mips_mini_s_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class MINI_S_W_DESC : MSA_I5_DESC_BASE<"mini_s.w", int_mips_mini_s_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class MINI_S_D_DESC : MSA_I5_DESC_BASE<"mini_s.d", int_mips_mini_s_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class MINI_U_B_DESC : MSA_I5_DESC_BASE<"mini_u.b", int_mips_mini_u_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class MINI_U_H_DESC : MSA_I5_DESC_BASE<"mini_u.h", int_mips_mini_u_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class MINI_U_W_DESC : MSA_I5_DESC_BASE<"mini_u.w", int_mips_mini_u_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class MINI_U_D_DESC : MSA_I5_DESC_BASE<"mini_u.d", int_mips_mini_u_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; -class MOD_S_B_DESC : MSA_3R_DESC_BASE<"mod_s.b", int_mips_mod_s_b, NoItinerary, +class MOD_S_B_DESC : MSA_3R_DESC_BASE<"mod_s.b", int_mips_mod_s_b, MSA128B, MSA128B>; -class MOD_S_H_DESC : MSA_3R_DESC_BASE<"mod_s.h", int_mips_mod_s_h, NoItinerary, +class MOD_S_H_DESC : MSA_3R_DESC_BASE<"mod_s.h", int_mips_mod_s_h, MSA128H, MSA128H>; -class MOD_S_W_DESC : MSA_3R_DESC_BASE<"mod_s.w", int_mips_mod_s_w, NoItinerary, +class MOD_S_W_DESC : MSA_3R_DESC_BASE<"mod_s.w", int_mips_mod_s_w, MSA128W, MSA128W>; -class MOD_S_D_DESC : MSA_3R_DESC_BASE<"mod_s.d", int_mips_mod_s_d, NoItinerary, +class MOD_S_D_DESC : MSA_3R_DESC_BASE<"mod_s.d", int_mips_mod_s_d, MSA128D, MSA128D>; -class MOD_U_B_DESC : MSA_3R_DESC_BASE<"mod_u.b", int_mips_mod_u_b, NoItinerary, +class MOD_U_B_DESC : MSA_3R_DESC_BASE<"mod_u.b", int_mips_mod_u_b, MSA128B, MSA128B>; -class MOD_U_H_DESC : MSA_3R_DESC_BASE<"mod_u.h", int_mips_mod_u_h, NoItinerary, +class MOD_U_H_DESC : MSA_3R_DESC_BASE<"mod_u.h", int_mips_mod_u_h, MSA128H, MSA128H>; -class MOD_U_W_DESC : MSA_3R_DESC_BASE<"mod_u.w", int_mips_mod_u_w, NoItinerary, +class MOD_U_W_DESC : MSA_3R_DESC_BASE<"mod_u.w", int_mips_mod_u_w, MSA128W, MSA128W>; -class MOD_U_D_DESC : MSA_3R_DESC_BASE<"mod_u.d", int_mips_mod_u_d, NoItinerary, +class MOD_U_D_DESC : MSA_3R_DESC_BASE<"mod_u.d", int_mips_mod_u_d, MSA128D, MSA128D>; class MOVE_V_DESC { @@ -1935,254 +1939,255 @@ class MOVE_V_DESC { } class MSUB_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"msub_q.h", int_mips_msub_q_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class MSUB_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"msub_q.w", int_mips_msub_q_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class MSUBR_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"msubr_q.h", int_mips_msubr_q_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class MSUBR_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"msubr_q.w", int_mips_msubr_q_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class MSUBV_B_DESC : MSA_3R_4R_DESC_BASE<"msubv.b", int_mips_msubv_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class MSUBV_H_DESC : MSA_3R_4R_DESC_BASE<"msubv.h", int_mips_msubv_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class MSUBV_W_DESC : MSA_3R_4R_DESC_BASE<"msubv.w", int_mips_msubv_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class MSUBV_D_DESC : MSA_3R_4R_DESC_BASE<"msubv.d", int_mips_msubv_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class MUL_Q_H_DESC : MSA_3RF_DESC_BASE<"mul_q.h", int_mips_mul_q_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class MUL_Q_W_DESC : MSA_3RF_DESC_BASE<"mul_q.w", int_mips_mul_q_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class MULR_Q_H_DESC : MSA_3RF_DESC_BASE<"mulr_q.h", int_mips_mulr_q_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class MULR_Q_W_DESC : MSA_3RF_DESC_BASE<"mulr_q.w", int_mips_mulr_q_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class MULV_B_DESC : MSA_3R_DESC_BASE<"mulv.b", int_mips_mulv_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class MULV_H_DESC : MSA_3R_DESC_BASE<"mulv.h", int_mips_mulv_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class MULV_W_DESC : MSA_3R_DESC_BASE<"mulv.w", int_mips_mulv_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class MULV_D_DESC : MSA_3R_DESC_BASE<"mulv.d", int_mips_mulv_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class NLOC_B_DESC : MSA_2R_DESC_BASE<"nloc.b", int_mips_nloc_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class NLOC_H_DESC : MSA_2R_DESC_BASE<"nloc.h", int_mips_nloc_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class NLOC_W_DESC : MSA_2R_DESC_BASE<"nloc.w", int_mips_nloc_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class NLOC_D_DESC : MSA_2R_DESC_BASE<"nloc.d", int_mips_nloc_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class NLZC_B_DESC : MSA_2R_DESC_BASE<"nlzc.b", int_mips_nlzc_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class NLZC_H_DESC : MSA_2R_DESC_BASE<"nlzc.h", int_mips_nlzc_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class NLZC_W_DESC : MSA_2R_DESC_BASE<"nlzc.w", int_mips_nlzc_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class NLZC_D_DESC : MSA_2R_DESC_BASE<"nlzc.d", int_mips_nlzc_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; -class NOR_V_DESC : MSA_VEC_DESC_BASE<"nor.v", int_mips_nor_v, NoItinerary, +class NOR_V_DESC : MSA_VEC_DESC_BASE<"nor.v", int_mips_nor_v, MSA128B, MSA128B>; -class NORI_B_DESC : MSA_I8_DESC_BASE<"nori.b", int_mips_nori_b, NoItinerary, +class NORI_B_DESC : MSA_I8_DESC_BASE<"nori.b", int_mips_nori_b, MSA128B, MSA128B>; -class OR_V_DESC : MSA_VEC_DESC_BASE<"or.v", int_mips_or_v, NoItinerary, +class OR_V_DESC : MSA_VEC_DESC_BASE<"or.v", int_mips_or_v, MSA128B, MSA128B>; -class ORI_B_DESC : MSA_I8_DESC_BASE<"ori.b", int_mips_ori_b, NoItinerary, +class ORI_B_DESC : MSA_I8_DESC_BASE<"ori.b", int_mips_ori_b, MSA128B, MSA128B>; -class PCKEV_B_DESC : MSA_3R_DESC_BASE<"pckev.b", int_mips_pckev_b, NoItinerary, +class PCKEV_B_DESC : MSA_3R_DESC_BASE<"pckev.b", int_mips_pckev_b, MSA128B, MSA128B>; -class PCKEV_H_DESC : MSA_3R_DESC_BASE<"pckev.h", int_mips_pckev_h, NoItinerary, +class PCKEV_H_DESC : MSA_3R_DESC_BASE<"pckev.h", int_mips_pckev_h, MSA128H, MSA128H>; -class PCKEV_W_DESC : MSA_3R_DESC_BASE<"pckev.w", int_mips_pckev_w, NoItinerary, +class PCKEV_W_DESC : MSA_3R_DESC_BASE<"pckev.w", int_mips_pckev_w, MSA128W, MSA128W>; -class PCKEV_D_DESC : MSA_3R_DESC_BASE<"pckev.d", int_mips_pckev_d, NoItinerary, +class PCKEV_D_DESC : MSA_3R_DESC_BASE<"pckev.d", int_mips_pckev_d, MSA128D, MSA128D>; -class PCKOD_B_DESC : MSA_3R_DESC_BASE<"pckod.b", int_mips_pckod_b, NoItinerary, +class PCKOD_B_DESC : MSA_3R_DESC_BASE<"pckod.b", int_mips_pckod_b, MSA128B, MSA128B>; -class PCKOD_H_DESC : MSA_3R_DESC_BASE<"pckod.h", int_mips_pckod_h, NoItinerary, +class PCKOD_H_DESC : MSA_3R_DESC_BASE<"pckod.h", int_mips_pckod_h, MSA128H, MSA128H>; -class PCKOD_W_DESC : MSA_3R_DESC_BASE<"pckod.w", int_mips_pckod_w, NoItinerary, +class PCKOD_W_DESC : MSA_3R_DESC_BASE<"pckod.w", int_mips_pckod_w, MSA128W, MSA128W>; -class PCKOD_D_DESC : MSA_3R_DESC_BASE<"pckod.d", int_mips_pckod_d, NoItinerary, +class PCKOD_D_DESC : MSA_3R_DESC_BASE<"pckod.d", int_mips_pckod_d, MSA128D, MSA128D>; class PCNT_B_DESC : MSA_2R_DESC_BASE<"pcnt.b", int_mips_pcnt_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class PCNT_H_DESC : MSA_2R_DESC_BASE<"pcnt.h", int_mips_pcnt_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class PCNT_W_DESC : MSA_2R_DESC_BASE<"pcnt.w", int_mips_pcnt_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class PCNT_D_DESC : MSA_2R_DESC_BASE<"pcnt.d", int_mips_pcnt_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class SAT_S_B_DESC : MSA_BIT_B_DESC_BASE<"sat_s.b", int_mips_sat_s_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class SAT_S_H_DESC : MSA_BIT_H_DESC_BASE<"sat_s.h", int_mips_sat_s_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class SAT_S_W_DESC : MSA_BIT_W_DESC_BASE<"sat_s.w", int_mips_sat_s_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class SAT_S_D_DESC : MSA_BIT_D_DESC_BASE<"sat_s.d", int_mips_sat_s_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class SAT_U_B_DESC : MSA_BIT_B_DESC_BASE<"sat_u.b", int_mips_sat_u_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class SAT_U_H_DESC : MSA_BIT_H_DESC_BASE<"sat_u.h", int_mips_sat_u_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class SAT_U_W_DESC : MSA_BIT_W_DESC_BASE<"sat_u.w", int_mips_sat_u_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class SAT_U_D_DESC : MSA_BIT_D_DESC_BASE<"sat_u.d", int_mips_sat_u_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; -class SHF_B_DESC : MSA_I8_DESC_BASE<"shf.b", int_mips_shf_b, NoItinerary, +class SHF_B_DESC : MSA_I8_DESC_BASE<"shf.b", int_mips_shf_b, MSA128B, MSA128B>; -class SHF_H_DESC : MSA_I8_DESC_BASE<"shf.h", int_mips_shf_h, NoItinerary, +class SHF_H_DESC : MSA_I8_DESC_BASE<"shf.h", int_mips_shf_h, MSA128H, MSA128H>; -class SHF_W_DESC : MSA_I8_DESC_BASE<"shf.w", int_mips_shf_w, NoItinerary, +class SHF_W_DESC : MSA_I8_DESC_BASE<"shf.w", int_mips_shf_w, MSA128W, MSA128W>; -class SLD_B_DESC : MSA_3R_DESC_BASE<"sld.b", int_mips_sld_b, NoItinerary, +class SLD_B_DESC : MSA_3R_DESC_BASE<"sld.b", int_mips_sld_b, MSA128B, MSA128B>; -class SLD_H_DESC : MSA_3R_DESC_BASE<"sld.h", int_mips_sld_h, NoItinerary, +class SLD_H_DESC : MSA_3R_DESC_BASE<"sld.h", int_mips_sld_h, MSA128H, MSA128H>; -class SLD_W_DESC : MSA_3R_DESC_BASE<"sld.w", int_mips_sld_w, NoItinerary, +class SLD_W_DESC : MSA_3R_DESC_BASE<"sld.w", int_mips_sld_w, MSA128W, MSA128W>; -class SLD_D_DESC : MSA_3R_DESC_BASE<"sld.d", int_mips_sld_d, NoItinerary, +class SLD_D_DESC : MSA_3R_DESC_BASE<"sld.d", int_mips_sld_d, MSA128D, MSA128D>; class SLDI_B_DESC : MSA_BIT_B_DESC_BASE<"sldi.b", int_mips_sldi_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class SLDI_H_DESC : MSA_BIT_H_DESC_BASE<"sldi.h", int_mips_sldi_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class SLDI_W_DESC : MSA_BIT_W_DESC_BASE<"sldi.w", int_mips_sldi_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class SLDI_D_DESC : MSA_BIT_D_DESC_BASE<"sldi.d", int_mips_sldi_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; -class SLL_B_DESC : MSA_3R_DESC_BASE<"sll.b", int_mips_sll_b, NoItinerary, +class SLL_B_DESC : MSA_3R_DESC_BASE<"sll.b", int_mips_sll_b, MSA128B, MSA128B>; -class SLL_H_DESC : MSA_3R_DESC_BASE<"sll.h", int_mips_sll_h, NoItinerary, +class SLL_H_DESC : MSA_3R_DESC_BASE<"sll.h", int_mips_sll_h, MSA128H, MSA128H>; -class SLL_W_DESC : MSA_3R_DESC_BASE<"sll.w", int_mips_sll_w, NoItinerary, +class SLL_W_DESC : MSA_3R_DESC_BASE<"sll.w", int_mips_sll_w, MSA128W, MSA128W>; -class SLL_D_DESC : MSA_3R_DESC_BASE<"sll.d", int_mips_sll_d, NoItinerary, +class SLL_D_DESC : MSA_3R_DESC_BASE<"sll.d", int_mips_sll_d, MSA128D, MSA128D>; class SLLI_B_DESC : MSA_BIT_B_DESC_BASE<"slli.b", int_mips_slli_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class SLLI_H_DESC : MSA_BIT_H_DESC_BASE<"slli.h", int_mips_slli_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class SLLI_W_DESC : MSA_BIT_W_DESC_BASE<"slli.w", int_mips_slli_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class SLLI_D_DESC : MSA_BIT_D_DESC_BASE<"slli.d", int_mips_slli_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; -class SPLAT_B_DESC : MSA_3R_DESC_BASE<"splat.b", int_mips_splat_b, NoItinerary, +class SPLAT_B_DESC : MSA_3R_DESC_BASE<"splat.b", int_mips_splat_b, MSA128B, MSA128B, GPR32>; -class SPLAT_H_DESC : MSA_3R_DESC_BASE<"splat.h", int_mips_splat_h, NoItinerary, +class SPLAT_H_DESC : MSA_3R_DESC_BASE<"splat.h", int_mips_splat_h, MSA128H, MSA128H, GPR32>; -class SPLAT_W_DESC : MSA_3R_DESC_BASE<"splat.w", int_mips_splat_w, NoItinerary, +class SPLAT_W_DESC : MSA_3R_DESC_BASE<"splat.w", int_mips_splat_w, MSA128W, MSA128W, GPR32>; -class SPLAT_D_DESC : MSA_3R_DESC_BASE<"splat.d", int_mips_splat_d, NoItinerary, +class SPLAT_D_DESC : MSA_3R_DESC_BASE<"splat.d", int_mips_splat_d, MSA128D, MSA128D, GPR32>; class SPLATI_B_DESC : MSA_BIT_B_DESC_BASE<"splati.b", int_mips_splati_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class SPLATI_H_DESC : MSA_BIT_H_DESC_BASE<"splati.h", int_mips_splati_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class SPLATI_W_DESC : MSA_BIT_W_DESC_BASE<"splati.w", int_mips_splati_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class SPLATI_D_DESC : MSA_BIT_D_DESC_BASE<"splati.d", int_mips_splati_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; -class SRA_B_DESC : MSA_3R_DESC_BASE<"sra.b", int_mips_sra_b, NoItinerary, +class SRA_B_DESC : MSA_3R_DESC_BASE<"sra.b", int_mips_sra_b, MSA128B, MSA128B>; -class SRA_H_DESC : MSA_3R_DESC_BASE<"sra.h", int_mips_sra_h, NoItinerary, +class SRA_H_DESC : MSA_3R_DESC_BASE<"sra.h", int_mips_sra_h, MSA128H, MSA128H>; -class SRA_W_DESC : MSA_3R_DESC_BASE<"sra.w", int_mips_sra_w, NoItinerary, +class SRA_W_DESC : MSA_3R_DESC_BASE<"sra.w", int_mips_sra_w, MSA128W, MSA128W>; -class SRA_D_DESC : MSA_3R_DESC_BASE<"sra.d", int_mips_sra_d, NoItinerary, +class SRA_D_DESC : MSA_3R_DESC_BASE<"sra.d", int_mips_sra_d, MSA128D, MSA128D>; class SRAI_B_DESC : MSA_BIT_B_DESC_BASE<"srai.b", int_mips_srai_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class SRAI_H_DESC : MSA_BIT_H_DESC_BASE<"srai.h", int_mips_srai_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class SRAI_W_DESC : MSA_BIT_W_DESC_BASE<"srai.w", int_mips_srai_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class SRAI_D_DESC : MSA_BIT_D_DESC_BASE<"srai.d", int_mips_srai_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; -class SRAR_B_DESC : MSA_3R_DESC_BASE<"srar.b", int_mips_srar_b, NoItinerary, +class SRAR_B_DESC : MSA_3R_DESC_BASE<"srar.b", int_mips_srar_b, MSA128B, MSA128B>; -class SRAR_H_DESC : MSA_3R_DESC_BASE<"srar.h", int_mips_srar_h, NoItinerary, +class SRAR_H_DESC : MSA_3R_DESC_BASE<"srar.h", int_mips_srar_h, MSA128H, MSA128H>; -class SRAR_W_DESC : MSA_3R_DESC_BASE<"srar.w", int_mips_srar_w, NoItinerary, +class SRAR_W_DESC : MSA_3R_DESC_BASE<"srar.w", int_mips_srar_w, MSA128W, MSA128W>; -class SRAR_D_DESC : MSA_3R_DESC_BASE<"srar.d", int_mips_srar_d, NoItinerary, +class SRAR_D_DESC : MSA_3R_DESC_BASE<"srar.d", int_mips_srar_d, MSA128D, MSA128D>; class SRARI_B_DESC : MSA_BIT_B_DESC_BASE<"srari.b", int_mips_srari_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class SRARI_H_DESC : MSA_BIT_H_DESC_BASE<"srari.h", int_mips_srari_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class SRARI_W_DESC : MSA_BIT_W_DESC_BASE<"srari.w", int_mips_srari_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class SRARI_D_DESC : MSA_BIT_D_DESC_BASE<"srari.d", int_mips_srari_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; -class SRL_B_DESC : MSA_3R_DESC_BASE<"srl.b", int_mips_srl_b, NoItinerary, +class SRL_B_DESC : MSA_3R_DESC_BASE<"srl.b", int_mips_srl_b, MSA128B, MSA128B>; -class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", int_mips_srl_h, NoItinerary, +class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", int_mips_srl_h, MSA128H, MSA128H>; -class SRL_W_DESC : MSA_3R_DESC_BASE<"srl.w", int_mips_srl_w, NoItinerary, +class SRL_W_DESC : MSA_3R_DESC_BASE<"srl.w", int_mips_srl_w, MSA128W, MSA128W>; -class SRL_D_DESC : MSA_3R_DESC_BASE<"srl.d", int_mips_srl_d, NoItinerary, +class SRL_D_DESC : MSA_3R_DESC_BASE<"srl.d", int_mips_srl_d, MSA128D, MSA128D>; class SRLI_B_DESC : MSA_BIT_B_DESC_BASE<"srli.b", int_mips_srli_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class SRLI_H_DESC : MSA_BIT_H_DESC_BASE<"srli.h", int_mips_srli_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class SRLI_W_DESC : MSA_BIT_W_DESC_BASE<"srli.w", int_mips_srli_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class SRLI_D_DESC : MSA_BIT_D_DESC_BASE<"srli.d", int_mips_srli_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; -class SRLR_B_DESC : MSA_3R_DESC_BASE<"srlr.b", int_mips_srlr_b, NoItinerary, +class SRLR_B_DESC : MSA_3R_DESC_BASE<"srlr.b", int_mips_srlr_b, MSA128B, MSA128B>; -class SRLR_H_DESC : MSA_3R_DESC_BASE<"srlr.h", int_mips_srlr_h, NoItinerary, +class SRLR_H_DESC : MSA_3R_DESC_BASE<"srlr.h", int_mips_srlr_h, MSA128H, MSA128H>; -class SRLR_W_DESC : MSA_3R_DESC_BASE<"srlr.w", int_mips_srlr_w, NoItinerary, +class SRLR_W_DESC : MSA_3R_DESC_BASE<"srlr.w", int_mips_srlr_w, MSA128W, MSA128W>; -class SRLR_D_DESC : MSA_3R_DESC_BASE<"srlr.d", int_mips_srlr_d, NoItinerary, +class SRLR_D_DESC : MSA_3R_DESC_BASE<"srlr.d", int_mips_srlr_d, MSA128D, MSA128D>; class SRLRI_B_DESC : MSA_BIT_B_DESC_BASE<"srlri.b", int_mips_srlri_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class SRLRI_H_DESC : MSA_BIT_H_DESC_BASE<"srlri.h", int_mips_srlri_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class SRLRI_W_DESC : MSA_BIT_W_DESC_BASE<"srlri.w", int_mips_srlri_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class SRLRI_D_DESC : MSA_BIT_D_DESC_BASE<"srlri.d", int_mips_srlri_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class ST_DESC_BASE { + ValueType TyNode, RegisterClass RCWD, + Operand MemOpnd = mem, ComplexPattern Addr = addrRegImm, + InstrItinClass itin = NoItinerary> { dag OutOperandList = (outs); dag InOperandList = (ins RCWD:$wd, MemOpnd:$addr); string AsmString = !strconcat(instr_asm, "\t$wd, $addr"); @@ -2190,14 +2195,15 @@ class ST_DESC_BASE; -class ST_H_DESC : ST_DESC_BASE<"st.h", store, v8i16, NoItinerary, MSA128H>; -class ST_W_DESC : ST_DESC_BASE<"st.w", store, v4i32, NoItinerary, MSA128W>; -class ST_D_DESC : ST_DESC_BASE<"st.d", store, v2i64, NoItinerary, MSA128D>; +class ST_B_DESC : ST_DESC_BASE<"st.b", store, v16i8, MSA128B>; +class ST_H_DESC : ST_DESC_BASE<"st.h", store, v8i16, MSA128H>; +class ST_W_DESC : ST_DESC_BASE<"st.w", store, v4i32, MSA128W>; +class ST_D_DESC : ST_DESC_BASE<"st.d", store, v2i64, MSA128D>; class STX_DESC_BASE { + ValueType TyNode, RegisterClass RCWD, + Operand MemOpnd = mem, ComplexPattern Addr = addrRegReg, + InstrItinClass itin = NoItinerary> { dag OutOperandList = (outs); dag InOperandList = (ins RCWD:$wd, MemOpnd:$addr); string AsmString = !strconcat(instr_asm, "\t$wd, $addr"); @@ -2205,78 +2211,78 @@ class STX_DESC_BASE; -class STX_H_DESC : STX_DESC_BASE<"stx.h", store, v8i16, NoItinerary, MSA128H>; -class STX_W_DESC : STX_DESC_BASE<"stx.w", store, v4i32, NoItinerary, MSA128W>; -class STX_D_DESC : STX_DESC_BASE<"stx.d", store, v2i64, NoItinerary, MSA128D>; +class STX_B_DESC : STX_DESC_BASE<"stx.b", store, v16i8, MSA128B>; +class STX_H_DESC : STX_DESC_BASE<"stx.h", store, v8i16, MSA128H>; +class STX_W_DESC : STX_DESC_BASE<"stx.w", store, v4i32, MSA128W>; +class STX_D_DESC : STX_DESC_BASE<"stx.d", store, v2i64, MSA128D>; class SUBS_S_B_DESC : MSA_3R_DESC_BASE<"subs_s.b", int_mips_subs_s_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class SUBS_S_H_DESC : MSA_3R_DESC_BASE<"subs_s.h", int_mips_subs_s_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class SUBS_S_W_DESC : MSA_3R_DESC_BASE<"subs_s.w", int_mips_subs_s_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class SUBS_S_D_DESC : MSA_3R_DESC_BASE<"subs_s.d", int_mips_subs_s_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class SUBS_U_B_DESC : MSA_3R_DESC_BASE<"subs_u.b", int_mips_subs_u_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class SUBS_U_H_DESC : MSA_3R_DESC_BASE<"subs_u.h", int_mips_subs_u_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class SUBS_U_W_DESC : MSA_3R_DESC_BASE<"subs_u.w", int_mips_subs_u_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class SUBS_U_D_DESC : MSA_3R_DESC_BASE<"subs_u.d", int_mips_subs_u_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class SUBSUS_U_B_DESC : MSA_3R_DESC_BASE<"subsus_u.b", int_mips_subsus_u_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class SUBSUS_U_H_DESC : MSA_3R_DESC_BASE<"subsus_u.h", int_mips_subsus_u_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class SUBSUS_U_W_DESC : MSA_3R_DESC_BASE<"subsus_u.w", int_mips_subsus_u_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class SUBSUS_U_D_DESC : MSA_3R_DESC_BASE<"subsus_u.d", int_mips_subsus_u_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class SUBSUU_S_B_DESC : MSA_3R_DESC_BASE<"subsuu_s.b", int_mips_subsuu_s_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class SUBSUU_S_H_DESC : MSA_3R_DESC_BASE<"subsuu_s.h", int_mips_subsuu_s_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class SUBSUU_S_W_DESC : MSA_3R_DESC_BASE<"subsuu_s.w", int_mips_subsuu_s_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class SUBSUU_S_D_DESC : MSA_3R_DESC_BASE<"subsuu_s.d", int_mips_subsuu_s_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; class SUBV_B_DESC : MSA_3R_DESC_BASE<"subv.b", int_mips_subv_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class SUBV_H_DESC : MSA_3R_DESC_BASE<"subv.h", int_mips_subv_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class SUBV_W_DESC : MSA_3R_DESC_BASE<"subv.w", int_mips_subv_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class SUBV_D_DESC : MSA_3R_DESC_BASE<"subv.d", int_mips_subv_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; -class SUBVI_B_DESC : MSA_I5_DESC_BASE<"subvi.b", int_mips_subvi_b, NoItinerary, +class SUBVI_B_DESC : MSA_I5_DESC_BASE<"subvi.b", int_mips_subvi_b, MSA128B, MSA128B>; -class SUBVI_H_DESC : MSA_I5_DESC_BASE<"subvi.h", int_mips_subvi_h, NoItinerary, +class SUBVI_H_DESC : MSA_I5_DESC_BASE<"subvi.h", int_mips_subvi_h, MSA128H, MSA128H>; -class SUBVI_W_DESC : MSA_I5_DESC_BASE<"subvi.w", int_mips_subvi_w, NoItinerary, +class SUBVI_W_DESC : MSA_I5_DESC_BASE<"subvi.w", int_mips_subvi_w, MSA128W, MSA128W>; -class SUBVI_D_DESC : MSA_I5_DESC_BASE<"subvi.d", int_mips_subvi_d, NoItinerary, +class SUBVI_D_DESC : MSA_I5_DESC_BASE<"subvi.d", int_mips_subvi_d, MSA128D, MSA128D>; class VSHF_B_DESC : MSA_3R_DESC_BASE<"vshf.b", int_mips_vshf_b, - NoItinerary, MSA128B, MSA128B>; + MSA128B, MSA128B>; class VSHF_H_DESC : MSA_3R_DESC_BASE<"vshf.h", int_mips_vshf_h, - NoItinerary, MSA128H, MSA128H>; + MSA128H, MSA128H>; class VSHF_W_DESC : MSA_3R_DESC_BASE<"vshf.w", int_mips_vshf_w, - NoItinerary, MSA128W, MSA128W>; + MSA128W, MSA128W>; class VSHF_D_DESC : MSA_3R_DESC_BASE<"vshf.d", int_mips_vshf_d, - NoItinerary, MSA128D, MSA128D>; + MSA128D, MSA128D>; -class XOR_V_DESC : MSA_VEC_DESC_BASE<"xor.v", int_mips_xor_v, NoItinerary, +class XOR_V_DESC : MSA_VEC_DESC_BASE<"xor.v", int_mips_xor_v, MSA128B, MSA128B>; -class XORI_B_DESC : MSA_I8_DESC_BASE<"xori.b", int_mips_xori_b, NoItinerary, +class XORI_B_DESC : MSA_I8_DESC_BASE<"xori.b", int_mips_xori_b, MSA128B, MSA128B>; // Instruction defs. def ADD_A_B : ADD_A_B_ENC, ADD_A_B_DESC, Requires<[HasMSA]>; @@ -3182,7 +3188,8 @@ def : MSABitconvertReverseWInDPat; // Pseudos used to implement BNZ.df, and BZ.df class MSA_CBRANCH_PSEUDO_DESC_BASE : + RegisterClass RCWS, + InstrItinClass itin = NoItinerary> : MipsPseudo<(outs GPR32:$dst), (ins RCWS:$ws), [(set GPR32:$dst, (OpNode (TyNode RCWS:$ws)))]> { -- cgit v1.1 From cc538affd270c81d12285e6addcd261086beec2b Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Fri, 6 Sep 2013 10:59:24 +0000 Subject: [mips][msa] Made the operand register sets optional for the 3R format Their default is to be the same as the result register set. No functional change git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190133 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 740 ++++++++++++++---------------------- 1 file changed, 280 insertions(+), 460 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index a12144c..9117cef 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -866,8 +866,8 @@ class MSA_2RF_DESC_BASE { dag OutOperandList = (outs RCWD:$wd); dag InOperandList = (ins RCWS:$ws, RCWT:$wt); @@ -950,71 +950,59 @@ class MSA_VEC_DESC_BASE, IsCommutable; -class ADD_A_H_DESC : MSA_3R_DESC_BASE<"add_a.h", int_mips_add_a_h, - MSA128H, MSA128H>, IsCommutable; -class ADD_A_W_DESC : MSA_3R_DESC_BASE<"add_a.w", int_mips_add_a_w, - MSA128W, MSA128W>, IsCommutable; -class ADD_A_D_DESC : MSA_3R_DESC_BASE<"add_a.d", int_mips_add_a_d, - MSA128D, MSA128D>, IsCommutable; - -class ADDS_A_B_DESC : MSA_3R_DESC_BASE<"adds_a.b", int_mips_adds_a_b, - MSA128B, MSA128B>, +class ADD_A_B_DESC : MSA_3R_DESC_BASE<"add_a.b", int_mips_add_a_b, MSA128B>, + IsCommutable; +class ADD_A_H_DESC : MSA_3R_DESC_BASE<"add_a.h", int_mips_add_a_h, MSA128H>, + IsCommutable; +class ADD_A_W_DESC : MSA_3R_DESC_BASE<"add_a.w", int_mips_add_a_w, MSA128W>, + IsCommutable; +class ADD_A_D_DESC : MSA_3R_DESC_BASE<"add_a.d", int_mips_add_a_d, MSA128D>, + IsCommutable; + +class ADDS_A_B_DESC : MSA_3R_DESC_BASE<"adds_a.b", int_mips_adds_a_b, MSA128B>, IsCommutable; -class ADDS_A_H_DESC : MSA_3R_DESC_BASE<"adds_a.h", int_mips_adds_a_h, - MSA128H, MSA128H>, +class ADDS_A_H_DESC : MSA_3R_DESC_BASE<"adds_a.h", int_mips_adds_a_h, MSA128H>, IsCommutable; -class ADDS_A_W_DESC : MSA_3R_DESC_BASE<"adds_a.w", int_mips_adds_a_w, - MSA128W, MSA128W>, +class ADDS_A_W_DESC : MSA_3R_DESC_BASE<"adds_a.w", int_mips_adds_a_w, MSA128W>, IsCommutable; -class ADDS_A_D_DESC : MSA_3R_DESC_BASE<"adds_a.d", int_mips_adds_a_d, - MSA128D, MSA128D>, +class ADDS_A_D_DESC : MSA_3R_DESC_BASE<"adds_a.d", int_mips_adds_a_d, MSA128D>, IsCommutable; -class ADDS_S_B_DESC : MSA_3R_DESC_BASE<"adds_s.b", int_mips_adds_s_b, - MSA128B, MSA128B>, +class ADDS_S_B_DESC : MSA_3R_DESC_BASE<"adds_s.b", int_mips_adds_s_b, MSA128B>, IsCommutable; -class ADDS_S_H_DESC : MSA_3R_DESC_BASE<"adds_s.h", int_mips_adds_s_h, - MSA128H, MSA128H>, +class ADDS_S_H_DESC : MSA_3R_DESC_BASE<"adds_s.h", int_mips_adds_s_h, MSA128H>, IsCommutable; -class ADDS_S_W_DESC : MSA_3R_DESC_BASE<"adds_s.w", int_mips_adds_s_w, - MSA128W, MSA128W>, +class ADDS_S_W_DESC : MSA_3R_DESC_BASE<"adds_s.w", int_mips_adds_s_w, MSA128W>, IsCommutable; -class ADDS_S_D_DESC : MSA_3R_DESC_BASE<"adds_s.d", int_mips_adds_s_d, - MSA128D, MSA128D>, +class ADDS_S_D_DESC : MSA_3R_DESC_BASE<"adds_s.d", int_mips_adds_s_d, MSA128D>, IsCommutable; -class ADDS_U_B_DESC : MSA_3R_DESC_BASE<"adds_u.b", int_mips_adds_u_b, - MSA128B, MSA128B>, +class ADDS_U_B_DESC : MSA_3R_DESC_BASE<"adds_u.b", int_mips_adds_u_b, MSA128B>, IsCommutable; -class ADDS_U_H_DESC : MSA_3R_DESC_BASE<"adds_u.h", int_mips_adds_u_h, - MSA128H, MSA128H>, +class ADDS_U_H_DESC : MSA_3R_DESC_BASE<"adds_u.h", int_mips_adds_u_h, MSA128H>, IsCommutable; -class ADDS_U_W_DESC : MSA_3R_DESC_BASE<"adds_u.w", int_mips_adds_u_w, - MSA128W, MSA128W>, +class ADDS_U_W_DESC : MSA_3R_DESC_BASE<"adds_u.w", int_mips_adds_u_w, MSA128W>, IsCommutable; -class ADDS_U_D_DESC : MSA_3R_DESC_BASE<"adds_u.d", int_mips_adds_u_d, - MSA128D, MSA128D>, +class ADDS_U_D_DESC : MSA_3R_DESC_BASE<"adds_u.d", int_mips_adds_u_d, MSA128D>, IsCommutable; -class ADDV_B_DESC : MSA_3R_DESC_BASE<"addv.b", int_mips_addv_b, - MSA128B, MSA128B>, IsCommutable; -class ADDV_H_DESC : MSA_3R_DESC_BASE<"addv.h", int_mips_addv_h, - MSA128H, MSA128H>, IsCommutable; -class ADDV_W_DESC : MSA_3R_DESC_BASE<"addv.w", int_mips_addv_w, - MSA128W, MSA128W>, IsCommutable; -class ADDV_D_DESC : MSA_3R_DESC_BASE<"addv.d", int_mips_addv_d, - MSA128D, MSA128D>, IsCommutable; - -class ADDVI_B_DESC : MSA_I5_DESC_BASE<"addvi.b", int_mips_addvi_b, - MSA128B, MSA128B>; -class ADDVI_H_DESC : MSA_I5_DESC_BASE<"addvi.h", int_mips_addvi_h, - MSA128H, MSA128H>; -class ADDVI_W_DESC : MSA_I5_DESC_BASE<"addvi.w", int_mips_addvi_w, - MSA128W, MSA128W>; -class ADDVI_D_DESC : MSA_I5_DESC_BASE<"addvi.d", int_mips_addvi_d, - MSA128D, MSA128D>; +class ADDV_B_DESC : MSA_3R_DESC_BASE<"addv.b", int_mips_addv_b, MSA128B>, + IsCommutable; +class ADDV_H_DESC : MSA_3R_DESC_BASE<"addv.h", int_mips_addv_h, MSA128H>, + IsCommutable; +class ADDV_W_DESC : MSA_3R_DESC_BASE<"addv.w", int_mips_addv_w, MSA128W>, + IsCommutable; +class ADDV_D_DESC : MSA_3R_DESC_BASE<"addv.d", int_mips_addv_d, MSA128D>, + IsCommutable; + +class ADDVI_B_DESC : MSA_I5_DESC_BASE<"addvi.b", int_mips_addvi_b, MSA128B, + MSA128B>; +class ADDVI_H_DESC : MSA_I5_DESC_BASE<"addvi.h", int_mips_addvi_h, MSA128H, + MSA128H>; +class ADDVI_W_DESC : MSA_I5_DESC_BASE<"addvi.w", int_mips_addvi_w, MSA128W, + MSA128W>; +class ADDVI_D_DESC : MSA_I5_DESC_BASE<"addvi.d", int_mips_addvi_d, MSA128D, + MSA128D>; class AND_V_DESC : MSA_VEC_DESC_BASE<"and.v", int_mips_and_v, MSA128B, MSA128B>; @@ -1022,76 +1010,56 @@ class AND_V_DESC : MSA_VEC_DESC_BASE<"and.v", int_mips_and_v, class ANDI_B_DESC : MSA_I8_DESC_BASE<"andi.b", int_mips_andi_b, MSA128B, MSA128B>; -class ASUB_S_B_DESC : MSA_3R_DESC_BASE<"asub_s.b", int_mips_asub_s_b, - MSA128B, MSA128B>; -class ASUB_S_H_DESC : MSA_3R_DESC_BASE<"asub_s.h", int_mips_asub_s_h, - MSA128H, MSA128H>; -class ASUB_S_W_DESC : MSA_3R_DESC_BASE<"asub_s.w", int_mips_asub_s_w, - MSA128W, MSA128W>; -class ASUB_S_D_DESC : MSA_3R_DESC_BASE<"asub_s.d", int_mips_asub_s_d, - MSA128D, MSA128D>; - -class ASUB_U_B_DESC : MSA_3R_DESC_BASE<"asub_u.b", int_mips_asub_u_b, - MSA128B, MSA128B>; -class ASUB_U_H_DESC : MSA_3R_DESC_BASE<"asub_u.h", int_mips_asub_u_h, - MSA128H, MSA128H>; -class ASUB_U_W_DESC : MSA_3R_DESC_BASE<"asub_u.w", int_mips_asub_u_w, - MSA128W, MSA128W>; -class ASUB_U_D_DESC : MSA_3R_DESC_BASE<"asub_u.d", int_mips_asub_u_d, - MSA128D, MSA128D>; - -class AVE_S_B_DESC : MSA_3R_DESC_BASE<"ave_s.b", int_mips_ave_s_b, - MSA128B, MSA128B>, IsCommutable; -class AVE_S_H_DESC : MSA_3R_DESC_BASE<"ave_s.h", int_mips_ave_s_h, - MSA128H, MSA128H>, IsCommutable; -class AVE_S_W_DESC : MSA_3R_DESC_BASE<"ave_s.w", int_mips_ave_s_w, - MSA128W, MSA128W>, IsCommutable; -class AVE_S_D_DESC : MSA_3R_DESC_BASE<"ave_s.d", int_mips_ave_s_d, - MSA128D, MSA128D>, IsCommutable; - -class AVE_U_B_DESC : MSA_3R_DESC_BASE<"ave_u.b", int_mips_ave_u_b, - MSA128B, MSA128B>, IsCommutable; -class AVE_U_H_DESC : MSA_3R_DESC_BASE<"ave_u.h", int_mips_ave_u_h, - MSA128H, MSA128H>, IsCommutable; -class AVE_U_W_DESC : MSA_3R_DESC_BASE<"ave_u.w", int_mips_ave_u_w, - MSA128W, MSA128W>, IsCommutable; -class AVE_U_D_DESC : MSA_3R_DESC_BASE<"ave_u.d", int_mips_ave_u_d, - MSA128D, MSA128D>, IsCommutable; - -class AVER_S_B_DESC : MSA_3R_DESC_BASE<"aver_s.b", int_mips_aver_s_b, - MSA128B, MSA128B>, +class ASUB_S_B_DESC : MSA_3R_DESC_BASE<"asub_s.b", int_mips_asub_s_b, MSA128B>; +class ASUB_S_H_DESC : MSA_3R_DESC_BASE<"asub_s.h", int_mips_asub_s_h, MSA128H>; +class ASUB_S_W_DESC : MSA_3R_DESC_BASE<"asub_s.w", int_mips_asub_s_w, MSA128W>; +class ASUB_S_D_DESC : MSA_3R_DESC_BASE<"asub_s.d", int_mips_asub_s_d, MSA128D>; + +class ASUB_U_B_DESC : MSA_3R_DESC_BASE<"asub_u.b", int_mips_asub_u_b, MSA128B>; +class ASUB_U_H_DESC : MSA_3R_DESC_BASE<"asub_u.h", int_mips_asub_u_h, MSA128H>; +class ASUB_U_W_DESC : MSA_3R_DESC_BASE<"asub_u.w", int_mips_asub_u_w, MSA128W>; +class ASUB_U_D_DESC : MSA_3R_DESC_BASE<"asub_u.d", int_mips_asub_u_d, MSA128D>; + +class AVE_S_B_DESC : MSA_3R_DESC_BASE<"ave_s.b", int_mips_ave_s_b, MSA128B>, + IsCommutable; +class AVE_S_H_DESC : MSA_3R_DESC_BASE<"ave_s.h", int_mips_ave_s_h, MSA128H>, + IsCommutable; +class AVE_S_W_DESC : MSA_3R_DESC_BASE<"ave_s.w", int_mips_ave_s_w, MSA128W>, + IsCommutable; +class AVE_S_D_DESC : MSA_3R_DESC_BASE<"ave_s.d", int_mips_ave_s_d, MSA128D>, + IsCommutable; + +class AVE_U_B_DESC : MSA_3R_DESC_BASE<"ave_u.b", int_mips_ave_u_b, MSA128B>, + IsCommutable; +class AVE_U_H_DESC : MSA_3R_DESC_BASE<"ave_u.h", int_mips_ave_u_h, MSA128H>, + IsCommutable; +class AVE_U_W_DESC : MSA_3R_DESC_BASE<"ave_u.w", int_mips_ave_u_w, MSA128W>, + IsCommutable; +class AVE_U_D_DESC : MSA_3R_DESC_BASE<"ave_u.d", int_mips_ave_u_d, MSA128D>, + IsCommutable; + +class AVER_S_B_DESC : MSA_3R_DESC_BASE<"aver_s.b", int_mips_aver_s_b, MSA128B>, IsCommutable; -class AVER_S_H_DESC : MSA_3R_DESC_BASE<"aver_s.h", int_mips_aver_s_h, - MSA128H, MSA128H>, +class AVER_S_H_DESC : MSA_3R_DESC_BASE<"aver_s.h", int_mips_aver_s_h, MSA128H>, IsCommutable; -class AVER_S_W_DESC : MSA_3R_DESC_BASE<"aver_s.w", int_mips_aver_s_w, - MSA128W, MSA128W>, +class AVER_S_W_DESC : MSA_3R_DESC_BASE<"aver_s.w", int_mips_aver_s_w, MSA128W>, IsCommutable; -class AVER_S_D_DESC : MSA_3R_DESC_BASE<"aver_s.d", int_mips_aver_s_d, - MSA128D, MSA128D>, +class AVER_S_D_DESC : MSA_3R_DESC_BASE<"aver_s.d", int_mips_aver_s_d, MSA128D>, IsCommutable; -class AVER_U_B_DESC : MSA_3R_DESC_BASE<"aver_u.b", int_mips_aver_u_b, - MSA128B, MSA128B>, +class AVER_U_B_DESC : MSA_3R_DESC_BASE<"aver_u.b", int_mips_aver_u_b, MSA128B>, IsCommutable; -class AVER_U_H_DESC : MSA_3R_DESC_BASE<"aver_u.h", int_mips_aver_u_h, - MSA128H, MSA128H>, +class AVER_U_H_DESC : MSA_3R_DESC_BASE<"aver_u.h", int_mips_aver_u_h, MSA128H>, IsCommutable; -class AVER_U_W_DESC : MSA_3R_DESC_BASE<"aver_u.w", int_mips_aver_u_w, - MSA128W, MSA128W>, +class AVER_U_W_DESC : MSA_3R_DESC_BASE<"aver_u.w", int_mips_aver_u_w, MSA128W>, IsCommutable; -class AVER_U_D_DESC : MSA_3R_DESC_BASE<"aver_u.d", int_mips_aver_u_d, - MSA128D, MSA128D>, +class AVER_U_D_DESC : MSA_3R_DESC_BASE<"aver_u.d", int_mips_aver_u_d, MSA128D>, IsCommutable; -class BCLR_B_DESC : MSA_3R_DESC_BASE<"bclr.b", int_mips_bclr_b, - MSA128B, MSA128B>; -class BCLR_H_DESC : MSA_3R_DESC_BASE<"bclr.h", int_mips_bclr_h, - MSA128H, MSA128H>; -class BCLR_W_DESC : MSA_3R_DESC_BASE<"bclr.w", int_mips_bclr_w, - MSA128W, MSA128W>; -class BCLR_D_DESC : MSA_3R_DESC_BASE<"bclr.d", int_mips_bclr_d, - MSA128D, MSA128D>; +class BCLR_B_DESC : MSA_3R_DESC_BASE<"bclr.b", int_mips_bclr_b, MSA128B>; +class BCLR_H_DESC : MSA_3R_DESC_BASE<"bclr.h", int_mips_bclr_h, MSA128H>; +class BCLR_W_DESC : MSA_3R_DESC_BASE<"bclr.w", int_mips_bclr_w, MSA128W>; +class BCLR_D_DESC : MSA_3R_DESC_BASE<"bclr.d", int_mips_bclr_d, MSA128D>; class BCLRI_B_DESC : MSA_BIT_B_DESC_BASE<"bclri.b", int_mips_bclri_b, MSA128B, MSA128B>; @@ -1102,14 +1070,10 @@ class BCLRI_W_DESC : MSA_BIT_W_DESC_BASE<"bclri.w", int_mips_bclri_w, class BCLRI_D_DESC : MSA_BIT_D_DESC_BASE<"bclri.d", int_mips_bclri_d, MSA128D, MSA128D>; -class BINSL_B_DESC : MSA_3R_DESC_BASE<"binsl.b", int_mips_binsl_b, - MSA128B, MSA128B>; -class BINSL_H_DESC : MSA_3R_DESC_BASE<"binsl.h", int_mips_binsl_h, - MSA128H, MSA128H>; -class BINSL_W_DESC : MSA_3R_DESC_BASE<"binsl.w", int_mips_binsl_w, - MSA128W, MSA128W>; -class BINSL_D_DESC : MSA_3R_DESC_BASE<"binsl.d", int_mips_binsl_d, - MSA128D, MSA128D>; +class BINSL_B_DESC : MSA_3R_DESC_BASE<"binsl.b", int_mips_binsl_b, MSA128B>; +class BINSL_H_DESC : MSA_3R_DESC_BASE<"binsl.h", int_mips_binsl_h, MSA128H>; +class BINSL_W_DESC : MSA_3R_DESC_BASE<"binsl.w", int_mips_binsl_w, MSA128W>; +class BINSL_D_DESC : MSA_3R_DESC_BASE<"binsl.d", int_mips_binsl_d, MSA128D>; class BINSLI_B_DESC : MSA_BIT_B_DESC_BASE<"binsli.b", int_mips_binsli_b, MSA128B, MSA128B>; @@ -1120,14 +1084,10 @@ class BINSLI_W_DESC : MSA_BIT_W_DESC_BASE<"binsli.w", int_mips_binsli_w, class BINSLI_D_DESC : MSA_BIT_D_DESC_BASE<"binsli.d", int_mips_binsli_d, MSA128D, MSA128D>; -class BINSR_B_DESC : MSA_3R_DESC_BASE<"binsr.b", int_mips_binsr_b, - MSA128B, MSA128B>; -class BINSR_H_DESC : MSA_3R_DESC_BASE<"binsr.h", int_mips_binsr_h, - MSA128H, MSA128H>; -class BINSR_W_DESC : MSA_3R_DESC_BASE<"binsr.w", int_mips_binsr_w, - MSA128W, MSA128W>; -class BINSR_D_DESC : MSA_3R_DESC_BASE<"binsr.d", int_mips_binsr_d, - MSA128D, MSA128D>; +class BINSR_B_DESC : MSA_3R_DESC_BASE<"binsr.b", int_mips_binsr_b, MSA128B>; +class BINSR_H_DESC : MSA_3R_DESC_BASE<"binsr.h", int_mips_binsr_h, MSA128H>; +class BINSR_W_DESC : MSA_3R_DESC_BASE<"binsr.w", int_mips_binsr_w, MSA128W>; +class BINSR_D_DESC : MSA_3R_DESC_BASE<"binsr.d", int_mips_binsr_d, MSA128D>; class BINSRI_B_DESC : MSA_BIT_B_DESC_BASE<"binsri.b", int_mips_binsri_b, MSA128B, MSA128B>; @@ -1150,14 +1110,10 @@ class BMZ_V_DESC : MSA_VEC_DESC_BASE<"bmz.v", int_mips_bmz_v, class BMZI_B_DESC : MSA_I8_DESC_BASE<"bmzi.b", int_mips_bmzi_b, MSA128B, MSA128B>; -class BNEG_B_DESC : MSA_3R_DESC_BASE<"bneg.b", int_mips_bneg_b, - MSA128B, MSA128B>; -class BNEG_H_DESC : MSA_3R_DESC_BASE<"bneg.h", int_mips_bneg_h, - MSA128H, MSA128H>; -class BNEG_W_DESC : MSA_3R_DESC_BASE<"bneg.w", int_mips_bneg_w, - MSA128W, MSA128W>; -class BNEG_D_DESC : MSA_3R_DESC_BASE<"bneg.d", int_mips_bneg_d, - MSA128D, MSA128D>; +class BNEG_B_DESC : MSA_3R_DESC_BASE<"bneg.b", int_mips_bneg_b, MSA128B>; +class BNEG_H_DESC : MSA_3R_DESC_BASE<"bneg.h", int_mips_bneg_h, MSA128H>; +class BNEG_W_DESC : MSA_3R_DESC_BASE<"bneg.w", int_mips_bneg_w, MSA128W>; +class BNEG_D_DESC : MSA_3R_DESC_BASE<"bneg.d", int_mips_bneg_d, MSA128D>; class BNEGI_B_DESC : MSA_BIT_B_DESC_BASE<"bnegi.b", int_mips_bnegi_b, MSA128B, MSA128B>; @@ -1181,14 +1137,10 @@ class BSEL_V_DESC : MSA_VEC_DESC_BASE<"bsel.v", int_mips_bsel_v, class BSELI_B_DESC : MSA_I8_DESC_BASE<"bseli.b", int_mips_bseli_b, MSA128B, MSA128B>; -class BSET_B_DESC : MSA_3R_DESC_BASE<"bset.b", int_mips_bset_b, - MSA128B, MSA128B>; -class BSET_H_DESC : MSA_3R_DESC_BASE<"bset.h", int_mips_bset_h, - MSA128H, MSA128H>; -class BSET_W_DESC : MSA_3R_DESC_BASE<"bset.w", int_mips_bset_w, - MSA128W, MSA128W>; -class BSET_D_DESC : MSA_3R_DESC_BASE<"bset.d", int_mips_bset_d, - MSA128D, MSA128D>; +class BSET_B_DESC : MSA_3R_DESC_BASE<"bset.b", int_mips_bset_b, MSA128B>; +class BSET_H_DESC : MSA_3R_DESC_BASE<"bset.h", int_mips_bset_h, MSA128H>; +class BSET_W_DESC : MSA_3R_DESC_BASE<"bset.w", int_mips_bset_w, MSA128W>; +class BSET_D_DESC : MSA_3R_DESC_BASE<"bset.d", int_mips_bset_d, MSA128D>; class BSETI_B_DESC : MSA_BIT_B_DESC_BASE<"bseti.b", int_mips_bseti_b, MSA128B, MSA128B>; @@ -1206,14 +1158,14 @@ class BZ_D_DESC : MSA_CBRANCH_DESC_BASE<"bz.d", MSA128D>; class BZ_V_DESC : MSA_CBRANCH_DESC_BASE<"bz.v", MSA128B>; -class CEQ_B_DESC : MSA_3R_DESC_BASE<"ceq.b", int_mips_ceq_b, - MSA128B, MSA128B>, IsCommutable; -class CEQ_H_DESC : MSA_3R_DESC_BASE<"ceq.h", int_mips_ceq_h, - MSA128H, MSA128H>, IsCommutable; -class CEQ_W_DESC : MSA_3R_DESC_BASE<"ceq.w", int_mips_ceq_w, - MSA128W, MSA128W>, IsCommutable; -class CEQ_D_DESC : MSA_3R_DESC_BASE<"ceq.d", int_mips_ceq_d, - MSA128D, MSA128D>, IsCommutable; +class CEQ_B_DESC : MSA_3R_DESC_BASE<"ceq.b", int_mips_ceq_b, MSA128B>, + IsCommutable; +class CEQ_H_DESC : MSA_3R_DESC_BASE<"ceq.h", int_mips_ceq_h, MSA128H>, + IsCommutable; +class CEQ_W_DESC : MSA_3R_DESC_BASE<"ceq.w", int_mips_ceq_w, MSA128W>, + IsCommutable; +class CEQ_D_DESC : MSA_3R_DESC_BASE<"ceq.d", int_mips_ceq_d, MSA128D>, + IsCommutable; class CEQI_B_DESC : MSA_SI5_DESC_BASE<"ceqi.b", int_mips_ceqi_b, MSA128B, MSA128B>; @@ -1232,23 +1184,15 @@ class CFCMSA_DESC { bit hasSideEffects = 1; } -class CLE_S_B_DESC : MSA_3R_DESC_BASE<"cle_s.b", int_mips_cle_s_b, - MSA128B, MSA128B>; -class CLE_S_H_DESC : MSA_3R_DESC_BASE<"cle_s.h", int_mips_cle_s_h, - MSA128H, MSA128H>; -class CLE_S_W_DESC : MSA_3R_DESC_BASE<"cle_s.w", int_mips_cle_s_w, - MSA128W, MSA128W>; -class CLE_S_D_DESC : MSA_3R_DESC_BASE<"cle_s.d", int_mips_cle_s_d, - MSA128D, MSA128D>; +class CLE_S_B_DESC : MSA_3R_DESC_BASE<"cle_s.b", int_mips_cle_s_b, MSA128B>; +class CLE_S_H_DESC : MSA_3R_DESC_BASE<"cle_s.h", int_mips_cle_s_h, MSA128H>; +class CLE_S_W_DESC : MSA_3R_DESC_BASE<"cle_s.w", int_mips_cle_s_w, MSA128W>; +class CLE_S_D_DESC : MSA_3R_DESC_BASE<"cle_s.d", int_mips_cle_s_d, MSA128D>; -class CLE_U_B_DESC : MSA_3R_DESC_BASE<"cle_u.b", int_mips_cle_u_b, - MSA128B, MSA128B>; -class CLE_U_H_DESC : MSA_3R_DESC_BASE<"cle_u.h", int_mips_cle_u_h, - MSA128H, MSA128H>; -class CLE_U_W_DESC : MSA_3R_DESC_BASE<"cle_u.w", int_mips_cle_u_w, - MSA128W, MSA128W>; -class CLE_U_D_DESC : MSA_3R_DESC_BASE<"cle_u.d", int_mips_cle_u_d, - MSA128D, MSA128D>; +class CLE_U_B_DESC : MSA_3R_DESC_BASE<"cle_u.b", int_mips_cle_u_b, MSA128B>; +class CLE_U_H_DESC : MSA_3R_DESC_BASE<"cle_u.h", int_mips_cle_u_h, MSA128H>; +class CLE_U_W_DESC : MSA_3R_DESC_BASE<"cle_u.w", int_mips_cle_u_w, MSA128W>; +class CLE_U_D_DESC : MSA_3R_DESC_BASE<"cle_u.d", int_mips_cle_u_d, MSA128D>; class CLEI_S_B_DESC : MSA_SI5_DESC_BASE<"clei_s.b", int_mips_clei_s_b, MSA128B, MSA128B>; @@ -1268,23 +1212,15 @@ class CLEI_U_W_DESC : MSA_SI5_DESC_BASE<"clei_u.w", int_mips_clei_u_w, class CLEI_U_D_DESC : MSA_SI5_DESC_BASE<"clei_u.d", int_mips_clei_u_d, MSA128D, MSA128D>; -class CLT_S_B_DESC : MSA_3R_DESC_BASE<"clt_s.b", int_mips_clt_s_b, - MSA128B, MSA128B>; -class CLT_S_H_DESC : MSA_3R_DESC_BASE<"clt_s.h", int_mips_clt_s_h, - MSA128H, MSA128H>; -class CLT_S_W_DESC : MSA_3R_DESC_BASE<"clt_s.w", int_mips_clt_s_w, - MSA128W, MSA128W>; -class CLT_S_D_DESC : MSA_3R_DESC_BASE<"clt_s.d", int_mips_clt_s_d, - MSA128D, MSA128D>; +class CLT_S_B_DESC : MSA_3R_DESC_BASE<"clt_s.b", int_mips_clt_s_b, MSA128B>; +class CLT_S_H_DESC : MSA_3R_DESC_BASE<"clt_s.h", int_mips_clt_s_h, MSA128H>; +class CLT_S_W_DESC : MSA_3R_DESC_BASE<"clt_s.w", int_mips_clt_s_w, MSA128W>; +class CLT_S_D_DESC : MSA_3R_DESC_BASE<"clt_s.d", int_mips_clt_s_d, MSA128D>; -class CLT_U_B_DESC : MSA_3R_DESC_BASE<"clt_u.b", int_mips_clt_u_b, - MSA128B, MSA128B>; -class CLT_U_H_DESC : MSA_3R_DESC_BASE<"clt_u.h", int_mips_clt_u_h, - MSA128H, MSA128H>; -class CLT_U_W_DESC : MSA_3R_DESC_BASE<"clt_u.w", int_mips_clt_u_w, - MSA128W, MSA128W>; -class CLT_U_D_DESC : MSA_3R_DESC_BASE<"clt_u.d", int_mips_clt_u_d, - MSA128D, MSA128D>; +class CLT_U_B_DESC : MSA_3R_DESC_BASE<"clt_u.b", int_mips_clt_u_b, MSA128B>; +class CLT_U_H_DESC : MSA_3R_DESC_BASE<"clt_u.h", int_mips_clt_u_h, MSA128H>; +class CLT_U_W_DESC : MSA_3R_DESC_BASE<"clt_u.w", int_mips_clt_u_w, MSA128W>; +class CLT_U_D_DESC : MSA_3R_DESC_BASE<"clt_u.d", int_mips_clt_u_d, MSA128D>; class CLTI_S_B_DESC : MSA_SI5_DESC_BASE<"clti_s.b", int_mips_clti_s_b, MSA128B, MSA128B>; @@ -1326,48 +1262,32 @@ class CTCMSA_DESC { bit hasSideEffects = 1; } -class DIV_S_B_DESC : MSA_3R_DESC_BASE<"div_s.b", int_mips_div_s_b, - MSA128B, MSA128B>; -class DIV_S_H_DESC : MSA_3R_DESC_BASE<"div_s.h", int_mips_div_s_h, - MSA128H, MSA128H>; -class DIV_S_W_DESC : MSA_3R_DESC_BASE<"div_s.w", int_mips_div_s_w, - MSA128W, MSA128W>; -class DIV_S_D_DESC : MSA_3R_DESC_BASE<"div_s.d", int_mips_div_s_d, - MSA128D, MSA128D>; +class DIV_S_B_DESC : MSA_3R_DESC_BASE<"div_s.b", int_mips_div_s_b, MSA128B>; +class DIV_S_H_DESC : MSA_3R_DESC_BASE<"div_s.h", int_mips_div_s_h, MSA128H>; +class DIV_S_W_DESC : MSA_3R_DESC_BASE<"div_s.w", int_mips_div_s_w, MSA128W>; +class DIV_S_D_DESC : MSA_3R_DESC_BASE<"div_s.d", int_mips_div_s_d, MSA128D>; -class DIV_U_B_DESC : MSA_3R_DESC_BASE<"div_u.b", int_mips_div_u_b, - MSA128B, MSA128B>; -class DIV_U_H_DESC : MSA_3R_DESC_BASE<"div_u.h", int_mips_div_u_h, - MSA128H, MSA128H>; -class DIV_U_W_DESC : MSA_3R_DESC_BASE<"div_u.w", int_mips_div_u_w, - MSA128W, MSA128W>; -class DIV_U_D_DESC : MSA_3R_DESC_BASE<"div_u.d", int_mips_div_u_d, - MSA128D, MSA128D>; +class DIV_U_B_DESC : MSA_3R_DESC_BASE<"div_u.b", int_mips_div_u_b, MSA128B>; +class DIV_U_H_DESC : MSA_3R_DESC_BASE<"div_u.h", int_mips_div_u_h, MSA128H>; +class DIV_U_W_DESC : MSA_3R_DESC_BASE<"div_u.w", int_mips_div_u_w, MSA128W>; +class DIV_U_D_DESC : MSA_3R_DESC_BASE<"div_u.d", int_mips_div_u_d, MSA128D>; -class DOTP_S_B_DESC : MSA_3R_DESC_BASE<"dotp_s.b", int_mips_dotp_s_b, - MSA128B, MSA128B>, +class DOTP_S_B_DESC : MSA_3R_DESC_BASE<"dotp_s.b", int_mips_dotp_s_b, MSA128B>, IsCommutable; -class DOTP_S_H_DESC : MSA_3R_DESC_BASE<"dotp_s.h", int_mips_dotp_s_h, - MSA128H, MSA128H>, +class DOTP_S_H_DESC : MSA_3R_DESC_BASE<"dotp_s.h", int_mips_dotp_s_h, MSA128H>, IsCommutable; -class DOTP_S_W_DESC : MSA_3R_DESC_BASE<"dotp_s.w", int_mips_dotp_s_w, - MSA128W, MSA128W>, +class DOTP_S_W_DESC : MSA_3R_DESC_BASE<"dotp_s.w", int_mips_dotp_s_w, MSA128W>, IsCommutable; -class DOTP_S_D_DESC : MSA_3R_DESC_BASE<"dotp_s.d", int_mips_dotp_s_d, - MSA128D, MSA128D>, +class DOTP_S_D_DESC : MSA_3R_DESC_BASE<"dotp_s.d", int_mips_dotp_s_d, MSA128D>, IsCommutable; -class DOTP_U_B_DESC : MSA_3R_DESC_BASE<"dotp_u.b", int_mips_dotp_u_b, - MSA128B, MSA128B>, +class DOTP_U_B_DESC : MSA_3R_DESC_BASE<"dotp_u.b", int_mips_dotp_u_b, MSA128B>, IsCommutable; -class DOTP_U_H_DESC : MSA_3R_DESC_BASE<"dotp_u.h", int_mips_dotp_u_h, - MSA128H, MSA128H>, +class DOTP_U_H_DESC : MSA_3R_DESC_BASE<"dotp_u.h", int_mips_dotp_u_h, MSA128H>, IsCommutable; -class DOTP_U_W_DESC : MSA_3R_DESC_BASE<"dotp_u.w", int_mips_dotp_u_w, - MSA128W, MSA128W>, +class DOTP_U_W_DESC : MSA_3R_DESC_BASE<"dotp_u.w", int_mips_dotp_u_w, MSA128W>, IsCommutable; -class DOTP_U_D_DESC : MSA_3R_DESC_BASE<"dotp_u.d", int_mips_dotp_u_d, - MSA128D, MSA128D>, +class DOTP_U_D_DESC : MSA_3R_DESC_BASE<"dotp_u.d", int_mips_dotp_u_d, MSA128D>, IsCommutable; class DPADD_S_H_DESC : MSA_3R_4R_DESC_BASE<"dpadd_s.h", int_mips_dpadd_s_h, @@ -1686,69 +1606,53 @@ class FTQ_H_DESC : MSA_3RF_DESC_BASE<"ftq.h", int_mips_ftq_h, class FTQ_W_DESC : MSA_3RF_DESC_BASE<"ftq.w", int_mips_ftq_w, MSA128W, MSA128D>; -class HADD_S_H_DESC : MSA_3R_DESC_BASE<"hadd_s.h", int_mips_hadd_s_h, - MSA128H, MSA128B>; -class HADD_S_W_DESC : MSA_3R_DESC_BASE<"hadd_s.w", int_mips_hadd_s_w, - MSA128W, MSA128H>; -class HADD_S_D_DESC : MSA_3R_DESC_BASE<"hadd_s.d", int_mips_hadd_s_d, - MSA128D, MSA128W>; - -class HADD_U_H_DESC : MSA_3R_DESC_BASE<"hadd_u.h", int_mips_hadd_u_h, - MSA128H, MSA128B>; -class HADD_U_W_DESC : MSA_3R_DESC_BASE<"hadd_u.w", int_mips_hadd_u_w, - MSA128W, MSA128H>; -class HADD_U_D_DESC : MSA_3R_DESC_BASE<"hadd_u.d", int_mips_hadd_u_d, - MSA128D, MSA128W>; - -class HSUB_S_H_DESC : MSA_3R_DESC_BASE<"hsub_s.h", int_mips_hsub_s_h, - MSA128H, MSA128B>; -class HSUB_S_W_DESC : MSA_3R_DESC_BASE<"hsub_s.w", int_mips_hsub_s_w, - MSA128W, MSA128H>; -class HSUB_S_D_DESC : MSA_3R_DESC_BASE<"hsub_s.d", int_mips_hsub_s_d, - MSA128D, MSA128W>; - -class HSUB_U_H_DESC : MSA_3R_DESC_BASE<"hsub_u.h", int_mips_hsub_u_h, - MSA128H, MSA128B>; -class HSUB_U_W_DESC : MSA_3R_DESC_BASE<"hsub_u.w", int_mips_hsub_u_w, - MSA128W, MSA128H>; -class HSUB_U_D_DESC : MSA_3R_DESC_BASE<"hsub_u.d", int_mips_hsub_u_d, - MSA128D, MSA128W>; - -class ILVEV_B_DESC : MSA_3R_DESC_BASE<"ilvev.b", int_mips_ilvev_b, - MSA128B, MSA128B>; -class ILVEV_H_DESC : MSA_3R_DESC_BASE<"ilvev.h", int_mips_ilvev_h, - MSA128H, MSA128H>; -class ILVEV_W_DESC : MSA_3R_DESC_BASE<"ilvev.w", int_mips_ilvev_w, - MSA128W, MSA128W>; -class ILVEV_D_DESC : MSA_3R_DESC_BASE<"ilvev.d", int_mips_ilvev_d, - MSA128D, MSA128D>; +class HADD_S_H_DESC : MSA_3R_DESC_BASE<"hadd_s.h", int_mips_hadd_s_h, MSA128H, + MSA128B, MSA128B>; +class HADD_S_W_DESC : MSA_3R_DESC_BASE<"hadd_s.w", int_mips_hadd_s_w, MSA128W, + MSA128H, MSA128H>; +class HADD_S_D_DESC : MSA_3R_DESC_BASE<"hadd_s.d", int_mips_hadd_s_d, MSA128D, + MSA128W, MSA128W>; -class ILVL_B_DESC : MSA_3R_DESC_BASE<"ilvl.b", int_mips_ilvl_b, - MSA128B, MSA128B>; -class ILVL_H_DESC : MSA_3R_DESC_BASE<"ilvl.h", int_mips_ilvl_h, - MSA128H, MSA128H>; -class ILVL_W_DESC : MSA_3R_DESC_BASE<"ilvl.w", int_mips_ilvl_w, - MSA128W, MSA128W>; -class ILVL_D_DESC : MSA_3R_DESC_BASE<"ilvl.d", int_mips_ilvl_d, - MSA128D, MSA128D>; +class HADD_U_H_DESC : MSA_3R_DESC_BASE<"hadd_u.h", int_mips_hadd_u_h, MSA128H, + MSA128B, MSA128B>; +class HADD_U_W_DESC : MSA_3R_DESC_BASE<"hadd_u.w", int_mips_hadd_u_w, MSA128W, + MSA128H, MSA128H>; +class HADD_U_D_DESC : MSA_3R_DESC_BASE<"hadd_u.d", int_mips_hadd_u_d, MSA128D, + MSA128W, MSA128W>; -class ILVOD_B_DESC : MSA_3R_DESC_BASE<"ilvod.b", int_mips_ilvod_b, - MSA128B, MSA128B>; -class ILVOD_H_DESC : MSA_3R_DESC_BASE<"ilvod.h", int_mips_ilvod_h, - MSA128H, MSA128H>; -class ILVOD_W_DESC : MSA_3R_DESC_BASE<"ilvod.w", int_mips_ilvod_w, - MSA128W, MSA128W>; -class ILVOD_D_DESC : MSA_3R_DESC_BASE<"ilvod.d", int_mips_ilvod_d, - MSA128D, MSA128D>; +class HSUB_S_H_DESC : MSA_3R_DESC_BASE<"hsub_s.h", int_mips_hsub_s_h, MSA128H, + MSA128B, MSA128B>; +class HSUB_S_W_DESC : MSA_3R_DESC_BASE<"hsub_s.w", int_mips_hsub_s_w, MSA128W, + MSA128H, MSA128H>; +class HSUB_S_D_DESC : MSA_3R_DESC_BASE<"hsub_s.d", int_mips_hsub_s_d, MSA128D, + MSA128W, MSA128W>; -class ILVR_B_DESC : MSA_3R_DESC_BASE<"ilvr.b", int_mips_ilvr_b, - MSA128B, MSA128B>; -class ILVR_H_DESC : MSA_3R_DESC_BASE<"ilvr.h", int_mips_ilvr_h, - MSA128H, MSA128H>; -class ILVR_W_DESC : MSA_3R_DESC_BASE<"ilvr.w", int_mips_ilvr_w, - MSA128W, MSA128W>; -class ILVR_D_DESC : MSA_3R_DESC_BASE<"ilvr.d", int_mips_ilvr_d, - MSA128D, MSA128D>; +class HSUB_U_H_DESC : MSA_3R_DESC_BASE<"hsub_u.h", int_mips_hsub_u_h, MSA128H, + MSA128B, MSA128B>; +class HSUB_U_W_DESC : MSA_3R_DESC_BASE<"hsub_u.w", int_mips_hsub_u_w, MSA128W, + MSA128H, MSA128H>; +class HSUB_U_D_DESC : MSA_3R_DESC_BASE<"hsub_u.d", int_mips_hsub_u_d, MSA128D, + MSA128W, MSA128W>; + +class ILVEV_B_DESC : MSA_3R_DESC_BASE<"ilvev.b", int_mips_ilvev_b, MSA128B>; +class ILVEV_H_DESC : MSA_3R_DESC_BASE<"ilvev.h", int_mips_ilvev_h, MSA128H>; +class ILVEV_W_DESC : MSA_3R_DESC_BASE<"ilvev.w", int_mips_ilvev_w, MSA128W>; +class ILVEV_D_DESC : MSA_3R_DESC_BASE<"ilvev.d", int_mips_ilvev_d, MSA128D>; + +class ILVL_B_DESC : MSA_3R_DESC_BASE<"ilvl.b", int_mips_ilvl_b, MSA128B>; +class ILVL_H_DESC : MSA_3R_DESC_BASE<"ilvl.h", int_mips_ilvl_h, MSA128H>; +class ILVL_W_DESC : MSA_3R_DESC_BASE<"ilvl.w", int_mips_ilvl_w, MSA128W>; +class ILVL_D_DESC : MSA_3R_DESC_BASE<"ilvl.d", int_mips_ilvl_d, MSA128D>; + +class ILVOD_B_DESC : MSA_3R_DESC_BASE<"ilvod.b", int_mips_ilvod_b, MSA128B>; +class ILVOD_H_DESC : MSA_3R_DESC_BASE<"ilvod.h", int_mips_ilvod_h, MSA128H>; +class ILVOD_W_DESC : MSA_3R_DESC_BASE<"ilvod.w", int_mips_ilvod_w, MSA128W>; +class ILVOD_D_DESC : MSA_3R_DESC_BASE<"ilvod.d", int_mips_ilvod_d, MSA128D>; + +class ILVR_B_DESC : MSA_3R_DESC_BASE<"ilvr.b", int_mips_ilvr_b, MSA128B>; +class ILVR_H_DESC : MSA_3R_DESC_BASE<"ilvr.h", int_mips_ilvr_h, MSA128H>; +class ILVR_W_DESC : MSA_3R_DESC_BASE<"ilvr.w", int_mips_ilvr_w, MSA128W>; +class ILVR_D_DESC : MSA_3R_DESC_BASE<"ilvr.d", int_mips_ilvr_d, MSA128D>; class INSERT_B_DESC : MSA_INSERT_DESC_BASE<"insert.b", int_mips_insert_b, MSA128B, GPR32>; @@ -1822,32 +1726,20 @@ class MADDV_W_DESC : MSA_3R_4R_DESC_BASE<"maddv.w", int_mips_maddv_w, class MADDV_D_DESC : MSA_3R_4R_DESC_BASE<"maddv.d", int_mips_maddv_d, MSA128D, MSA128D>; -class MAX_A_B_DESC : MSA_3R_DESC_BASE<"max_a.b", int_mips_max_a_b, - MSA128B, MSA128B>; -class MAX_A_H_DESC : MSA_3R_DESC_BASE<"max_a.h", int_mips_max_a_h, - MSA128H, MSA128H>; -class MAX_A_W_DESC : MSA_3R_DESC_BASE<"max_a.w", int_mips_max_a_w, - MSA128W, MSA128W>; -class MAX_A_D_DESC : MSA_3R_DESC_BASE<"max_a.d", int_mips_max_a_d, - MSA128D, MSA128D>; +class MAX_A_B_DESC : MSA_3R_DESC_BASE<"max_a.b", int_mips_max_a_b, MSA128B>; +class MAX_A_H_DESC : MSA_3R_DESC_BASE<"max_a.h", int_mips_max_a_h, MSA128H>; +class MAX_A_W_DESC : MSA_3R_DESC_BASE<"max_a.w", int_mips_max_a_w, MSA128W>; +class MAX_A_D_DESC : MSA_3R_DESC_BASE<"max_a.d", int_mips_max_a_d, MSA128D>; -class MAX_S_B_DESC : MSA_3R_DESC_BASE<"max_s.b", int_mips_max_s_b, - MSA128B, MSA128B>; -class MAX_S_H_DESC : MSA_3R_DESC_BASE<"max_s.h", int_mips_max_s_h, - MSA128H, MSA128H>; -class MAX_S_W_DESC : MSA_3R_DESC_BASE<"max_s.w", int_mips_max_s_w, - MSA128W, MSA128W>; -class MAX_S_D_DESC : MSA_3R_DESC_BASE<"max_s.d", int_mips_max_s_d, - MSA128D, MSA128D>; +class MAX_S_B_DESC : MSA_3R_DESC_BASE<"max_s.b", int_mips_max_s_b, MSA128B>; +class MAX_S_H_DESC : MSA_3R_DESC_BASE<"max_s.h", int_mips_max_s_h, MSA128H>; +class MAX_S_W_DESC : MSA_3R_DESC_BASE<"max_s.w", int_mips_max_s_w, MSA128W>; +class MAX_S_D_DESC : MSA_3R_DESC_BASE<"max_s.d", int_mips_max_s_d, MSA128D>; -class MAX_U_B_DESC : MSA_3R_DESC_BASE<"max_u.b", int_mips_max_u_b, - MSA128B, MSA128B>; -class MAX_U_H_DESC : MSA_3R_DESC_BASE<"max_u.h", int_mips_max_u_h, - MSA128H, MSA128H>; -class MAX_U_W_DESC : MSA_3R_DESC_BASE<"max_u.w", int_mips_max_u_w, - MSA128W, MSA128W>; -class MAX_U_D_DESC : MSA_3R_DESC_BASE<"max_u.d", int_mips_max_u_d, - MSA128D, MSA128D>; +class MAX_U_B_DESC : MSA_3R_DESC_BASE<"max_u.b", int_mips_max_u_b, MSA128B>; +class MAX_U_H_DESC : MSA_3R_DESC_BASE<"max_u.h", int_mips_max_u_h, MSA128H>; +class MAX_U_W_DESC : MSA_3R_DESC_BASE<"max_u.w", int_mips_max_u_w, MSA128W>; +class MAX_U_D_DESC : MSA_3R_DESC_BASE<"max_u.d", int_mips_max_u_d, MSA128D>; class MAXI_S_B_DESC : MSA_I5_DESC_BASE<"maxi_s.b", int_mips_maxi_s_b, MSA128B, MSA128B>; @@ -1867,32 +1759,20 @@ class MAXI_U_W_DESC : MSA_I5_DESC_BASE<"maxi_u.w", int_mips_maxi_u_w, class MAXI_U_D_DESC : MSA_I5_DESC_BASE<"maxi_u.d", int_mips_maxi_u_d, MSA128D, MSA128D>; -class MIN_A_B_DESC : MSA_3R_DESC_BASE<"min_a.b", int_mips_min_a_b, - MSA128B, MSA128B>; -class MIN_A_H_DESC : MSA_3R_DESC_BASE<"min_a.h", int_mips_min_a_h, - MSA128H, MSA128H>; -class MIN_A_W_DESC : MSA_3R_DESC_BASE<"min_a.w", int_mips_min_a_w, - MSA128W, MSA128W>; -class MIN_A_D_DESC : MSA_3R_DESC_BASE<"min_a.d", int_mips_min_a_d, - MSA128D, MSA128D>; +class MIN_A_B_DESC : MSA_3R_DESC_BASE<"min_a.b", int_mips_min_a_b, MSA128B>; +class MIN_A_H_DESC : MSA_3R_DESC_BASE<"min_a.h", int_mips_min_a_h, MSA128H>; +class MIN_A_W_DESC : MSA_3R_DESC_BASE<"min_a.w", int_mips_min_a_w, MSA128W>; +class MIN_A_D_DESC : MSA_3R_DESC_BASE<"min_a.d", int_mips_min_a_d, MSA128D>; -class MIN_S_B_DESC : MSA_3R_DESC_BASE<"min_s.b", int_mips_min_s_b, - MSA128B, MSA128B>; -class MIN_S_H_DESC : MSA_3R_DESC_BASE<"min_s.h", int_mips_min_s_h, - MSA128H, MSA128H>; -class MIN_S_W_DESC : MSA_3R_DESC_BASE<"min_s.w", int_mips_min_s_w, - MSA128W, MSA128W>; -class MIN_S_D_DESC : MSA_3R_DESC_BASE<"min_s.d", int_mips_min_s_d, - MSA128D, MSA128D>; +class MIN_S_B_DESC : MSA_3R_DESC_BASE<"min_s.b", int_mips_min_s_b, MSA128B>; +class MIN_S_H_DESC : MSA_3R_DESC_BASE<"min_s.h", int_mips_min_s_h, MSA128H>; +class MIN_S_W_DESC : MSA_3R_DESC_BASE<"min_s.w", int_mips_min_s_w, MSA128W>; +class MIN_S_D_DESC : MSA_3R_DESC_BASE<"min_s.d", int_mips_min_s_d, MSA128D>; -class MIN_U_B_DESC : MSA_3R_DESC_BASE<"min_u.b", int_mips_min_u_b, - MSA128B, MSA128B>; -class MIN_U_H_DESC : MSA_3R_DESC_BASE<"min_u.h", int_mips_min_u_h, - MSA128H, MSA128H>; -class MIN_U_W_DESC : MSA_3R_DESC_BASE<"min_u.w", int_mips_min_u_w, - MSA128W, MSA128W>; -class MIN_U_D_DESC : MSA_3R_DESC_BASE<"min_u.d", int_mips_min_u_d, - MSA128D, MSA128D>; +class MIN_U_B_DESC : MSA_3R_DESC_BASE<"min_u.b", int_mips_min_u_b, MSA128B>; +class MIN_U_H_DESC : MSA_3R_DESC_BASE<"min_u.h", int_mips_min_u_h, MSA128H>; +class MIN_U_W_DESC : MSA_3R_DESC_BASE<"min_u.w", int_mips_min_u_w, MSA128W>; +class MIN_U_D_DESC : MSA_3R_DESC_BASE<"min_u.d", int_mips_min_u_d, MSA128D>; class MINI_S_B_DESC : MSA_I5_DESC_BASE<"mini_s.b", int_mips_mini_s_b, MSA128B, MSA128B>; @@ -1912,23 +1792,15 @@ class MINI_U_W_DESC : MSA_I5_DESC_BASE<"mini_u.w", int_mips_mini_u_w, class MINI_U_D_DESC : MSA_I5_DESC_BASE<"mini_u.d", int_mips_mini_u_d, MSA128D, MSA128D>; -class MOD_S_B_DESC : MSA_3R_DESC_BASE<"mod_s.b", int_mips_mod_s_b, - MSA128B, MSA128B>; -class MOD_S_H_DESC : MSA_3R_DESC_BASE<"mod_s.h", int_mips_mod_s_h, - MSA128H, MSA128H>; -class MOD_S_W_DESC : MSA_3R_DESC_BASE<"mod_s.w", int_mips_mod_s_w, - MSA128W, MSA128W>; -class MOD_S_D_DESC : MSA_3R_DESC_BASE<"mod_s.d", int_mips_mod_s_d, - MSA128D, MSA128D>; +class MOD_S_B_DESC : MSA_3R_DESC_BASE<"mod_s.b", int_mips_mod_s_b, MSA128B>; +class MOD_S_H_DESC : MSA_3R_DESC_BASE<"mod_s.h", int_mips_mod_s_h, MSA128H>; +class MOD_S_W_DESC : MSA_3R_DESC_BASE<"mod_s.w", int_mips_mod_s_w, MSA128W>; +class MOD_S_D_DESC : MSA_3R_DESC_BASE<"mod_s.d", int_mips_mod_s_d, MSA128D>; -class MOD_U_B_DESC : MSA_3R_DESC_BASE<"mod_u.b", int_mips_mod_u_b, - MSA128B, MSA128B>; -class MOD_U_H_DESC : MSA_3R_DESC_BASE<"mod_u.h", int_mips_mod_u_h, - MSA128H, MSA128H>; -class MOD_U_W_DESC : MSA_3R_DESC_BASE<"mod_u.w", int_mips_mod_u_w, - MSA128W, MSA128W>; -class MOD_U_D_DESC : MSA_3R_DESC_BASE<"mod_u.d", int_mips_mod_u_d, - MSA128D, MSA128D>; +class MOD_U_B_DESC : MSA_3R_DESC_BASE<"mod_u.b", int_mips_mod_u_b, MSA128B>; +class MOD_U_H_DESC : MSA_3R_DESC_BASE<"mod_u.h", int_mips_mod_u_h, MSA128H>; +class MOD_U_W_DESC : MSA_3R_DESC_BASE<"mod_u.w", int_mips_mod_u_w, MSA128W>; +class MOD_U_D_DESC : MSA_3R_DESC_BASE<"mod_u.d", int_mips_mod_u_d, MSA128D>; class MOVE_V_DESC { dag OutOperandList = (outs MSA128B:$wd); @@ -1967,14 +1839,10 @@ class MULR_Q_H_DESC : MSA_3RF_DESC_BASE<"mulr_q.h", int_mips_mulr_q_h, class MULR_Q_W_DESC : MSA_3RF_DESC_BASE<"mulr_q.w", int_mips_mulr_q_w, MSA128W, MSA128W>; -class MULV_B_DESC : MSA_3R_DESC_BASE<"mulv.b", int_mips_mulv_b, - MSA128B, MSA128B>; -class MULV_H_DESC : MSA_3R_DESC_BASE<"mulv.h", int_mips_mulv_h, - MSA128H, MSA128H>; -class MULV_W_DESC : MSA_3R_DESC_BASE<"mulv.w", int_mips_mulv_w, - MSA128W, MSA128W>; -class MULV_D_DESC : MSA_3R_DESC_BASE<"mulv.d", int_mips_mulv_d, - MSA128D, MSA128D>; +class MULV_B_DESC : MSA_3R_DESC_BASE<"mulv.b", int_mips_mulv_b, MSA128B>; +class MULV_H_DESC : MSA_3R_DESC_BASE<"mulv.h", int_mips_mulv_h, MSA128H>; +class MULV_W_DESC : MSA_3R_DESC_BASE<"mulv.w", int_mips_mulv_w, MSA128W>; +class MULV_D_DESC : MSA_3R_DESC_BASE<"mulv.d", int_mips_mulv_d, MSA128D>; class NLOC_B_DESC : MSA_2R_DESC_BASE<"nloc.b", int_mips_nloc_b, MSA128B, MSA128B>; @@ -2006,23 +1874,15 @@ class OR_V_DESC : MSA_VEC_DESC_BASE<"or.v", int_mips_or_v, class ORI_B_DESC : MSA_I8_DESC_BASE<"ori.b", int_mips_ori_b, MSA128B, MSA128B>; -class PCKEV_B_DESC : MSA_3R_DESC_BASE<"pckev.b", int_mips_pckev_b, - MSA128B, MSA128B>; -class PCKEV_H_DESC : MSA_3R_DESC_BASE<"pckev.h", int_mips_pckev_h, - MSA128H, MSA128H>; -class PCKEV_W_DESC : MSA_3R_DESC_BASE<"pckev.w", int_mips_pckev_w, - MSA128W, MSA128W>; -class PCKEV_D_DESC : MSA_3R_DESC_BASE<"pckev.d", int_mips_pckev_d, - MSA128D, MSA128D>; +class PCKEV_B_DESC : MSA_3R_DESC_BASE<"pckev.b", int_mips_pckev_b, MSA128B>; +class PCKEV_H_DESC : MSA_3R_DESC_BASE<"pckev.h", int_mips_pckev_h, MSA128H>; +class PCKEV_W_DESC : MSA_3R_DESC_BASE<"pckev.w", int_mips_pckev_w, MSA128W>; +class PCKEV_D_DESC : MSA_3R_DESC_BASE<"pckev.d", int_mips_pckev_d, MSA128D>; -class PCKOD_B_DESC : MSA_3R_DESC_BASE<"pckod.b", int_mips_pckod_b, - MSA128B, MSA128B>; -class PCKOD_H_DESC : MSA_3R_DESC_BASE<"pckod.h", int_mips_pckod_h, - MSA128H, MSA128H>; -class PCKOD_W_DESC : MSA_3R_DESC_BASE<"pckod.w", int_mips_pckod_w, - MSA128W, MSA128W>; -class PCKOD_D_DESC : MSA_3R_DESC_BASE<"pckod.d", int_mips_pckod_d, - MSA128D, MSA128D>; +class PCKOD_B_DESC : MSA_3R_DESC_BASE<"pckod.b", int_mips_pckod_b, MSA128B>; +class PCKOD_H_DESC : MSA_3R_DESC_BASE<"pckod.h", int_mips_pckod_h, MSA128H>; +class PCKOD_W_DESC : MSA_3R_DESC_BASE<"pckod.w", int_mips_pckod_w, MSA128W>; +class PCKOD_D_DESC : MSA_3R_DESC_BASE<"pckod.d", int_mips_pckod_d, MSA128D>; class PCNT_B_DESC : MSA_2R_DESC_BASE<"pcnt.b", int_mips_pcnt_b, MSA128B, MSA128B>; @@ -2058,14 +1918,10 @@ class SHF_H_DESC : MSA_I8_DESC_BASE<"shf.h", int_mips_shf_h, class SHF_W_DESC : MSA_I8_DESC_BASE<"shf.w", int_mips_shf_w, MSA128W, MSA128W>; -class SLD_B_DESC : MSA_3R_DESC_BASE<"sld.b", int_mips_sld_b, - MSA128B, MSA128B>; -class SLD_H_DESC : MSA_3R_DESC_BASE<"sld.h", int_mips_sld_h, - MSA128H, MSA128H>; -class SLD_W_DESC : MSA_3R_DESC_BASE<"sld.w", int_mips_sld_w, - MSA128W, MSA128W>; -class SLD_D_DESC : MSA_3R_DESC_BASE<"sld.d", int_mips_sld_d, - MSA128D, MSA128D>; +class SLD_B_DESC : MSA_3R_DESC_BASE<"sld.b", int_mips_sld_b, MSA128B>; +class SLD_H_DESC : MSA_3R_DESC_BASE<"sld.h", int_mips_sld_h, MSA128H>; +class SLD_W_DESC : MSA_3R_DESC_BASE<"sld.w", int_mips_sld_w, MSA128W>; +class SLD_D_DESC : MSA_3R_DESC_BASE<"sld.d", int_mips_sld_d, MSA128D>; class SLDI_B_DESC : MSA_BIT_B_DESC_BASE<"sldi.b", int_mips_sldi_b, MSA128B, MSA128B>; @@ -2076,14 +1932,10 @@ class SLDI_W_DESC : MSA_BIT_W_DESC_BASE<"sldi.w", int_mips_sldi_w, class SLDI_D_DESC : MSA_BIT_D_DESC_BASE<"sldi.d", int_mips_sldi_d, MSA128D, MSA128D>; -class SLL_B_DESC : MSA_3R_DESC_BASE<"sll.b", int_mips_sll_b, - MSA128B, MSA128B>; -class SLL_H_DESC : MSA_3R_DESC_BASE<"sll.h", int_mips_sll_h, - MSA128H, MSA128H>; -class SLL_W_DESC : MSA_3R_DESC_BASE<"sll.w", int_mips_sll_w, - MSA128W, MSA128W>; -class SLL_D_DESC : MSA_3R_DESC_BASE<"sll.d", int_mips_sll_d, - MSA128D, MSA128D>; +class SLL_B_DESC : MSA_3R_DESC_BASE<"sll.b", int_mips_sll_b, MSA128B>; +class SLL_H_DESC : MSA_3R_DESC_BASE<"sll.h", int_mips_sll_h, MSA128H>; +class SLL_W_DESC : MSA_3R_DESC_BASE<"sll.w", int_mips_sll_w, MSA128W>; +class SLL_D_DESC : MSA_3R_DESC_BASE<"sll.d", int_mips_sll_d, MSA128D>; class SLLI_B_DESC : MSA_BIT_B_DESC_BASE<"slli.b", int_mips_slli_b, MSA128B, MSA128B>; @@ -2094,14 +1946,14 @@ class SLLI_W_DESC : MSA_BIT_W_DESC_BASE<"slli.w", int_mips_slli_w, class SLLI_D_DESC : MSA_BIT_D_DESC_BASE<"slli.d", int_mips_slli_d, MSA128D, MSA128D>; -class SPLAT_B_DESC : MSA_3R_DESC_BASE<"splat.b", int_mips_splat_b, - MSA128B, MSA128B, GPR32>; -class SPLAT_H_DESC : MSA_3R_DESC_BASE<"splat.h", int_mips_splat_h, - MSA128H, MSA128H, GPR32>; -class SPLAT_W_DESC : MSA_3R_DESC_BASE<"splat.w", int_mips_splat_w, - MSA128W, MSA128W, GPR32>; -class SPLAT_D_DESC : MSA_3R_DESC_BASE<"splat.d", int_mips_splat_d, - MSA128D, MSA128D, GPR32>; +class SPLAT_B_DESC : MSA_3R_DESC_BASE<"splat.b", int_mips_splat_b, MSA128B, + MSA128B, GPR32>; +class SPLAT_H_DESC : MSA_3R_DESC_BASE<"splat.h", int_mips_splat_h, MSA128H, + MSA128H, GPR32>; +class SPLAT_W_DESC : MSA_3R_DESC_BASE<"splat.w", int_mips_splat_w, MSA128W, + MSA128W, GPR32>; +class SPLAT_D_DESC : MSA_3R_DESC_BASE<"splat.d", int_mips_splat_d, MSA128D, + MSA128D, GPR32>; class SPLATI_B_DESC : MSA_BIT_B_DESC_BASE<"splati.b", int_mips_splati_b, MSA128B, MSA128B>; @@ -2112,14 +1964,10 @@ class SPLATI_W_DESC : MSA_BIT_W_DESC_BASE<"splati.w", int_mips_splati_w, class SPLATI_D_DESC : MSA_BIT_D_DESC_BASE<"splati.d", int_mips_splati_d, MSA128D, MSA128D>; -class SRA_B_DESC : MSA_3R_DESC_BASE<"sra.b", int_mips_sra_b, - MSA128B, MSA128B>; -class SRA_H_DESC : MSA_3R_DESC_BASE<"sra.h", int_mips_sra_h, - MSA128H, MSA128H>; -class SRA_W_DESC : MSA_3R_DESC_BASE<"sra.w", int_mips_sra_w, - MSA128W, MSA128W>; -class SRA_D_DESC : MSA_3R_DESC_BASE<"sra.d", int_mips_sra_d, - MSA128D, MSA128D>; +class SRA_B_DESC : MSA_3R_DESC_BASE<"sra.b", int_mips_sra_b, MSA128B>; +class SRA_H_DESC : MSA_3R_DESC_BASE<"sra.h", int_mips_sra_h, MSA128H>; +class SRA_W_DESC : MSA_3R_DESC_BASE<"sra.w", int_mips_sra_w, MSA128W>; +class SRA_D_DESC : MSA_3R_DESC_BASE<"sra.d", int_mips_sra_d, MSA128D>; class SRAI_B_DESC : MSA_BIT_B_DESC_BASE<"srai.b", int_mips_srai_b, MSA128B, MSA128B>; @@ -2130,14 +1978,10 @@ class SRAI_W_DESC : MSA_BIT_W_DESC_BASE<"srai.w", int_mips_srai_w, class SRAI_D_DESC : MSA_BIT_D_DESC_BASE<"srai.d", int_mips_srai_d, MSA128D, MSA128D>; -class SRAR_B_DESC : MSA_3R_DESC_BASE<"srar.b", int_mips_srar_b, - MSA128B, MSA128B>; -class SRAR_H_DESC : MSA_3R_DESC_BASE<"srar.h", int_mips_srar_h, - MSA128H, MSA128H>; -class SRAR_W_DESC : MSA_3R_DESC_BASE<"srar.w", int_mips_srar_w, - MSA128W, MSA128W>; -class SRAR_D_DESC : MSA_3R_DESC_BASE<"srar.d", int_mips_srar_d, - MSA128D, MSA128D>; +class SRAR_B_DESC : MSA_3R_DESC_BASE<"srar.b", int_mips_srar_b, MSA128B>; +class SRAR_H_DESC : MSA_3R_DESC_BASE<"srar.h", int_mips_srar_h, MSA128H>; +class SRAR_W_DESC : MSA_3R_DESC_BASE<"srar.w", int_mips_srar_w, MSA128W>; +class SRAR_D_DESC : MSA_3R_DESC_BASE<"srar.d", int_mips_srar_d, MSA128D>; class SRARI_B_DESC : MSA_BIT_B_DESC_BASE<"srari.b", int_mips_srari_b, MSA128B, MSA128B>; @@ -2148,14 +1992,10 @@ class SRARI_W_DESC : MSA_BIT_W_DESC_BASE<"srari.w", int_mips_srari_w, class SRARI_D_DESC : MSA_BIT_D_DESC_BASE<"srari.d", int_mips_srari_d, MSA128D, MSA128D>; -class SRL_B_DESC : MSA_3R_DESC_BASE<"srl.b", int_mips_srl_b, - MSA128B, MSA128B>; -class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", int_mips_srl_h, - MSA128H, MSA128H>; -class SRL_W_DESC : MSA_3R_DESC_BASE<"srl.w", int_mips_srl_w, - MSA128W, MSA128W>; -class SRL_D_DESC : MSA_3R_DESC_BASE<"srl.d", int_mips_srl_d, - MSA128D, MSA128D>; +class SRL_B_DESC : MSA_3R_DESC_BASE<"srl.b", int_mips_srl_b, MSA128B>; +class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", int_mips_srl_h, MSA128H>; +class SRL_W_DESC : MSA_3R_DESC_BASE<"srl.w", int_mips_srl_w, MSA128W>; +class SRL_D_DESC : MSA_3R_DESC_BASE<"srl.d", int_mips_srl_d, MSA128D>; class SRLI_B_DESC : MSA_BIT_B_DESC_BASE<"srli.b", int_mips_srli_b, MSA128B, MSA128B>; @@ -2166,14 +2006,10 @@ class SRLI_W_DESC : MSA_BIT_W_DESC_BASE<"srli.w", int_mips_srli_w, class SRLI_D_DESC : MSA_BIT_D_DESC_BASE<"srli.d", int_mips_srli_d, MSA128D, MSA128D>; -class SRLR_B_DESC : MSA_3R_DESC_BASE<"srlr.b", int_mips_srlr_b, - MSA128B, MSA128B>; -class SRLR_H_DESC : MSA_3R_DESC_BASE<"srlr.h", int_mips_srlr_h, - MSA128H, MSA128H>; -class SRLR_W_DESC : MSA_3R_DESC_BASE<"srlr.w", int_mips_srlr_w, - MSA128W, MSA128W>; -class SRLR_D_DESC : MSA_3R_DESC_BASE<"srlr.d", int_mips_srlr_d, - MSA128D, MSA128D>; +class SRLR_B_DESC : MSA_3R_DESC_BASE<"srlr.b", int_mips_srlr_b, MSA128B>; +class SRLR_H_DESC : MSA_3R_DESC_BASE<"srlr.h", int_mips_srlr_h, MSA128H>; +class SRLR_W_DESC : MSA_3R_DESC_BASE<"srlr.w", int_mips_srlr_w, MSA128W>; +class SRLR_D_DESC : MSA_3R_DESC_BASE<"srlr.d", int_mips_srlr_d, MSA128D>; class SRLRI_B_DESC : MSA_BIT_B_DESC_BASE<"srlri.b", int_mips_srlri_b, MSA128B, MSA128B>; @@ -2216,50 +2052,38 @@ class STX_H_DESC : STX_DESC_BASE<"stx.h", store, v8i16, MSA128H>; class STX_W_DESC : STX_DESC_BASE<"stx.w", store, v4i32, MSA128W>; class STX_D_DESC : STX_DESC_BASE<"stx.d", store, v2i64, MSA128D>; -class SUBS_S_B_DESC : MSA_3R_DESC_BASE<"subs_s.b", int_mips_subs_s_b, - MSA128B, MSA128B>; -class SUBS_S_H_DESC : MSA_3R_DESC_BASE<"subs_s.h", int_mips_subs_s_h, - MSA128H, MSA128H>; -class SUBS_S_W_DESC : MSA_3R_DESC_BASE<"subs_s.w", int_mips_subs_s_w, - MSA128W, MSA128W>; -class SUBS_S_D_DESC : MSA_3R_DESC_BASE<"subs_s.d", int_mips_subs_s_d, - MSA128D, MSA128D>; +class SUBS_S_B_DESC : MSA_3R_DESC_BASE<"subs_s.b", int_mips_subs_s_b, MSA128B>; +class SUBS_S_H_DESC : MSA_3R_DESC_BASE<"subs_s.h", int_mips_subs_s_h, MSA128H>; +class SUBS_S_W_DESC : MSA_3R_DESC_BASE<"subs_s.w", int_mips_subs_s_w, MSA128W>; +class SUBS_S_D_DESC : MSA_3R_DESC_BASE<"subs_s.d", int_mips_subs_s_d, MSA128D>; -class SUBS_U_B_DESC : MSA_3R_DESC_BASE<"subs_u.b", int_mips_subs_u_b, - MSA128B, MSA128B>; -class SUBS_U_H_DESC : MSA_3R_DESC_BASE<"subs_u.h", int_mips_subs_u_h, - MSA128H, MSA128H>; -class SUBS_U_W_DESC : MSA_3R_DESC_BASE<"subs_u.w", int_mips_subs_u_w, - MSA128W, MSA128W>; -class SUBS_U_D_DESC : MSA_3R_DESC_BASE<"subs_u.d", int_mips_subs_u_d, - MSA128D, MSA128D>; +class SUBS_U_B_DESC : MSA_3R_DESC_BASE<"subs_u.b", int_mips_subs_u_b, MSA128B>; +class SUBS_U_H_DESC : MSA_3R_DESC_BASE<"subs_u.h", int_mips_subs_u_h, MSA128H>; +class SUBS_U_W_DESC : MSA_3R_DESC_BASE<"subs_u.w", int_mips_subs_u_w, MSA128W>; +class SUBS_U_D_DESC : MSA_3R_DESC_BASE<"subs_u.d", int_mips_subs_u_d, MSA128D>; class SUBSUS_U_B_DESC : MSA_3R_DESC_BASE<"subsus_u.b", int_mips_subsus_u_b, - MSA128B, MSA128B>; + MSA128B>; class SUBSUS_U_H_DESC : MSA_3R_DESC_BASE<"subsus_u.h", int_mips_subsus_u_h, - MSA128H, MSA128H>; + MSA128H>; class SUBSUS_U_W_DESC : MSA_3R_DESC_BASE<"subsus_u.w", int_mips_subsus_u_w, - MSA128W, MSA128W>; + MSA128W>; class SUBSUS_U_D_DESC : MSA_3R_DESC_BASE<"subsus_u.d", int_mips_subsus_u_d, - MSA128D, MSA128D>; + MSA128D>; class SUBSUU_S_B_DESC : MSA_3R_DESC_BASE<"subsuu_s.b", int_mips_subsuu_s_b, - MSA128B, MSA128B>; + MSA128B>; class SUBSUU_S_H_DESC : MSA_3R_DESC_BASE<"subsuu_s.h", int_mips_subsuu_s_h, - MSA128H, MSA128H>; + MSA128H>; class SUBSUU_S_W_DESC : MSA_3R_DESC_BASE<"subsuu_s.w", int_mips_subsuu_s_w, - MSA128W, MSA128W>; + MSA128W>; class SUBSUU_S_D_DESC : MSA_3R_DESC_BASE<"subsuu_s.d", int_mips_subsuu_s_d, - MSA128D, MSA128D>; + MSA128D>; -class SUBV_B_DESC : MSA_3R_DESC_BASE<"subv.b", int_mips_subv_b, - MSA128B, MSA128B>; -class SUBV_H_DESC : MSA_3R_DESC_BASE<"subv.h", int_mips_subv_h, - MSA128H, MSA128H>; -class SUBV_W_DESC : MSA_3R_DESC_BASE<"subv.w", int_mips_subv_w, - MSA128W, MSA128W>; -class SUBV_D_DESC : MSA_3R_DESC_BASE<"subv.d", int_mips_subv_d, - MSA128D, MSA128D>; +class SUBV_B_DESC : MSA_3R_DESC_BASE<"subv.b", int_mips_subv_b, MSA128B>; +class SUBV_H_DESC : MSA_3R_DESC_BASE<"subv.h", int_mips_subv_h, MSA128H>; +class SUBV_W_DESC : MSA_3R_DESC_BASE<"subv.w", int_mips_subv_w, MSA128W>; +class SUBV_D_DESC : MSA_3R_DESC_BASE<"subv.d", int_mips_subv_d, MSA128D>; class SUBVI_B_DESC : MSA_I5_DESC_BASE<"subvi.b", int_mips_subvi_b, MSA128B, MSA128B>; @@ -2270,14 +2094,10 @@ class SUBVI_W_DESC : MSA_I5_DESC_BASE<"subvi.w", int_mips_subvi_w, class SUBVI_D_DESC : MSA_I5_DESC_BASE<"subvi.d", int_mips_subvi_d, MSA128D, MSA128D>; -class VSHF_B_DESC : MSA_3R_DESC_BASE<"vshf.b", int_mips_vshf_b, - MSA128B, MSA128B>; -class VSHF_H_DESC : MSA_3R_DESC_BASE<"vshf.h", int_mips_vshf_h, - MSA128H, MSA128H>; -class VSHF_W_DESC : MSA_3R_DESC_BASE<"vshf.w", int_mips_vshf_w, - MSA128W, MSA128W>; -class VSHF_D_DESC : MSA_3R_DESC_BASE<"vshf.d", int_mips_vshf_d, - MSA128D, MSA128D>; +class VSHF_B_DESC : MSA_3R_DESC_BASE<"vshf.b", int_mips_vshf_b, MSA128B>; +class VSHF_H_DESC : MSA_3R_DESC_BASE<"vshf.h", int_mips_vshf_h, MSA128H>; +class VSHF_W_DESC : MSA_3R_DESC_BASE<"vshf.w", int_mips_vshf_w, MSA128W>; +class VSHF_D_DESC : MSA_3R_DESC_BASE<"vshf.d", int_mips_vshf_d, MSA128D>; class XOR_V_DESC : MSA_VEC_DESC_BASE<"xor.v", int_mips_xor_v, MSA128B, MSA128B>; -- cgit v1.1 From e3273b327555df6489640d2195b52b6317c88844 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Fri, 6 Sep 2013 11:01:38 +0000 Subject: [mips][msa] Sorted MSA_BIT_[BHWD]_DESC_BASE into ascending order of element size No functional change git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190134 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 9117cef..1da5071 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -759,43 +759,43 @@ class XOR_V_ENC : MSA_VEC_FMT<0b00011, 0b011110>; class XORI_B_ENC : MSA_I8_FMT<0b11, 0b000000>; // Instruction desc. -class MSA_BIT_D_DESC_BASE { dag OutOperandList = (outs RCWD:$wd); - dag InOperandList = (ins RCWS:$ws, uimm6:$u6); - string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u6"); - list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, immZExt6:$u6))]; + dag InOperandList = (ins RCWS:$ws, uimm3:$u3); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u3"); + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, immZExt3:$u3))]; InstrItinClass Itinerary = itin; } -class MSA_BIT_W_DESC_BASE { dag OutOperandList = (outs RCWD:$wd); - dag InOperandList = (ins RCWS:$ws, uimm5:$u5); - string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u5"); - list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, immZExt5:$u5))]; + dag InOperandList = (ins RCWS:$ws, uimm4:$u4); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u4"); + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, immZExt4:$u4))]; InstrItinClass Itinerary = itin; } -class MSA_BIT_H_DESC_BASE { dag OutOperandList = (outs RCWD:$wd); - dag InOperandList = (ins RCWS:$ws, uimm4:$u4); - string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u4"); - list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, immZExt4:$u4))]; + dag InOperandList = (ins RCWS:$ws, uimm5:$u5); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u5"); + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, immZExt5:$u5))]; InstrItinClass Itinerary = itin; } -class MSA_BIT_B_DESC_BASE { dag OutOperandList = (outs RCWD:$wd); - dag InOperandList = (ins RCWS:$ws, uimm3:$u3); - string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u3"); - list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, immZExt3:$u3))]; + dag InOperandList = (ins RCWS:$ws, uimm6:$u6); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u6"); + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, immZExt6:$u6))]; InstrItinClass Itinerary = itin; } -- cgit v1.1 From aff1c6427ce22125adfa29de4145030aa3214a2e Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 6 Sep 2013 11:51:39 +0000 Subject: [SystemZ] Tweak integer comparison code The architecture has many comparison instructions, including some that extend one of the operands. The signed comparison instructions use sign extensions and the unsigned comparison instructions use zero extensions. In cases where we had a free choice between signed or unsigned comparisons, we were trying to decide at lowering time which would best fit the available instructions, taking things like extension type into account. The code to do that was getting increasingly hairy and was also making some bad decisions. E.g. when comparing the result of two LLCs, it is better to use CR rather than CLR, since CR can be fused with a branch while CLR can't. This patch removes the lowering code and instead adds an operand to integer comparisons to say whether signed comparison is required, whether unsigned comparison is required, or whether either is OK. We can then leave the choice of instruction up to the normal isel code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190138 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelLowering.cpp | 150 ++++++++++++----------------- lib/Target/SystemZ/SystemZISelLowering.h | 20 +++- lib/Target/SystemZ/SystemZInstrFP.td | 10 +- lib/Target/SystemZ/SystemZInstrInfo.td | 44 +++++---- lib/Target/SystemZ/SystemZOperators.td | 17 +++- lib/Target/SystemZ/SystemZPatterns.td | 2 +- 6 files changed, 122 insertions(+), 121 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 180c631..9fc56fe 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1069,73 +1069,33 @@ static void adjustSubwordCmp(SelectionDAG &DAG, bool &IsUnsigned, CmpOp1 = DAG.getConstant(Value, MVT::i32); } -// Return true if a comparison described by CCMask, CmpOp0 and CmpOp1 -// is an equality comparison that is better implemented using unsigned -// rather than signed comparison instructions. -static bool preferUnsignedComparison(SDValue CmpOp0, SDValue CmpOp1, - unsigned CCMask) { - // The test must be for equality or inequality. - if (CCMask != SystemZ::CCMASK_CMP_EQ && CCMask != SystemZ::CCMASK_CMP_NE) - return false; - - if (CmpOp1.getOpcode() == ISD::Constant) { - uint64_t Value = cast(CmpOp1)->getSExtValue(); - - // If we're comparing with memory, prefer unsigned comparisons for - // values that are in the unsigned 16-bit range but not the signed - // 16-bit range. We want to use CLFHSI and CLGHSI. - if (CmpOp0.hasOneUse() && - ISD::isNormalLoad(CmpOp0.getNode()) && - (Value >= 32768 && Value < 65536)) - return true; - - // Use unsigned comparisons for values that are in the CLGFI range - // but not in the CGFI range. - if (CmpOp0.getValueType() == MVT::i64 && (Value >> 31) == 1) - return true; - - return false; - } - - // Prefer CL for zero-extended loads. - if (CmpOp1.getOpcode() == ISD::ZERO_EXTEND || - ISD::isZEXTLoad(CmpOp1.getNode())) - return true; - - // ...and for "in-register" zero extensions. - if (CmpOp1.getOpcode() == ISD::AND && CmpOp1.getValueType() == MVT::i64) { - SDValue Mask = CmpOp1.getOperand(1); - if (Mask.getOpcode() == ISD::Constant && - cast(Mask)->getZExtValue() == 0xffffffff) - return true; - } - - return false; -} - -// Return true if Op is either an unextended load, or a load with the -// extension type given by IsUnsigned. -static bool isNaturalMemoryOperand(SDValue Op, bool IsUnsigned) { +// Return true if Op is either an unextended load, or a load suitable +// for integer register-memory comparisons of type ICmpType. +static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) { LoadSDNode *Load = dyn_cast(Op.getNode()); - if (Load) + if (Load) { + // There are no instructions to compare a register with a memory byte. + if (Load->getMemoryVT() == MVT::i8) + return false; + // Otherwise decide on extension type. switch (Load->getExtensionType()) { case ISD::NON_EXTLOAD: - case ISD::EXTLOAD: return true; case ISD::SEXTLOAD: - return !IsUnsigned; + return ICmpType != SystemZICMP::UnsignedOnly; case ISD::ZEXTLOAD: - return IsUnsigned; + return ICmpType != SystemZICMP::SignedOnly; default: break; } + } return false; } // Return true if it is better to swap comparison operands Op0 and Op1. -// IsUnsigned says whether an integer comparison is signed or unsigned. +// ICmpType is the type of an integer comparison. static bool shouldSwapCmpOperands(SDValue Op0, SDValue Op1, - bool IsUnsigned) { + unsigned ICmpType) { // Leave f128 comparisons alone, since they have no memory forms. if (Op0.getValueType() == MVT::f128) return false; @@ -1154,23 +1114,22 @@ static bool shouldSwapCmpOperands(SDValue Op0, SDValue Op1, // Look for cases where Cmp0 is a single-use load and Cmp1 isn't. // In that case we generally prefer the memory to be second. - if ((isNaturalMemoryOperand(Op0, IsUnsigned) && Op0.hasOneUse()) && - !(isNaturalMemoryOperand(Op1, IsUnsigned) && Op1.hasOneUse())) { + if ((isNaturalMemoryOperand(Op0, ICmpType) && Op0.hasOneUse()) && + !(isNaturalMemoryOperand(Op1, ICmpType) && Op1.hasOneUse())) { // The only exceptions are when the second operand is a constant and // we can use things like CHHSI. if (!COp1) return true; - if (IsUnsigned) { - // The memory-immediate instructions require 16-bit unsigned integers. - if (isUInt<16>(COp1->getZExtValue())) - return false; - } else { - // There are no comparisons between integers and signed memory bytes. - // The others require 16-bit signed integers. - if (cast(Op0.getNode())->getMemoryVT() == MVT::i8 || - isInt<16>(COp1->getSExtValue())) - return false; - } + // The unsigned memory-immediate instructions can handle 16-bit + // unsigned integers. + if (ICmpType != SystemZICMP::SignedOnly && + isUInt<16>(COp1->getZExtValue())) + return false; + // The signed memory-immediate instructions can handle 16-bit + // signed integers. + if (ICmpType != SystemZICMP::UnsignedOnly && + isInt<16>(COp1->getSExtValue())) + return false; return true; } return false; @@ -1182,9 +1141,9 @@ static bool shouldSwapCmpOperands(SDValue Op0, SDValue Op1, // BitSize is the number of bits in the operands. Return the CC mask that // should be used for the TEST UNDER MASK result, or 0 if the condition is // too complex. -static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned Opcode, - unsigned CCMask, uint64_t Mask, - uint64_t CmpVal) { +static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, + uint64_t Mask, uint64_t CmpVal, + unsigned ICmpType) { assert(Mask != 0 && "ANDs with zero should have been removed by now"); // Work out the masks for the lowest and highest bits. @@ -1194,8 +1153,7 @@ static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned Opcode, // Signed ordered comparisons are effectively unsigned if the sign // bit is dropped. - bool EffectivelyUnsigned = (Opcode == SystemZISD::UCMP - || HighShift < BitSize - 1); + bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly); // Check for equality comparisons with 0, or the equivalent. if (CmpVal == 0) { @@ -1274,7 +1232,7 @@ static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned Opcode, // TM version if so. static void adjustForTestUnderMask(unsigned &Opcode, SDValue &CmpOp0, SDValue &CmpOp1, unsigned &CCValid, - unsigned &CCMask) { + unsigned &CCMask, unsigned ICmpType) { // Check that we have a comparison with a constant. ConstantSDNode *ConstCmpOp1 = dyn_cast(CmpOp1); if (!ConstCmpOp1) @@ -1298,8 +1256,9 @@ static void adjustForTestUnderMask(unsigned &Opcode, SDValue &CmpOp0, // Check whether the combination of mask, comparison value and comparison // type are suitable. unsigned BitSize = CmpOp0.getValueType().getSizeInBits(); - unsigned NewCCMask = getTestUnderMaskCond(BitSize, Opcode, CCMask, MaskVal, - ConstCmpOp1->getZExtValue()); + unsigned NewCCMask = getTestUnderMaskCond(BitSize, CCMask, MaskVal, + ConstCmpOp1->getZExtValue(), + ICmpType); if (!NewCCMask) return; @@ -1315,24 +1274,39 @@ static void adjustForTestUnderMask(unsigned &Opcode, SDValue &CmpOp0, // 2-bit result in CC. Set CCValid to the CCMASK_* of all possible // 2-bit results and CCMask to the subset of those results that are // associated with Cond. -static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, SDValue CmpOp0, - SDValue CmpOp1, ISD::CondCode Cond, unsigned &CCValid, +static SDValue emitCmp(const SystemZTargetMachine &TM, SelectionDAG &DAG, + SDLoc DL, SDValue CmpOp0, SDValue CmpOp1, + ISD::CondCode Cond, unsigned &CCValid, unsigned &CCMask) { bool IsUnsigned = false; CCMask = CCMaskForCondCode(Cond); - if (CmpOp0.getValueType().isFloatingPoint()) + unsigned Opcode, ICmpType = 0; + if (CmpOp0.getValueType().isFloatingPoint()) { CCValid = SystemZ::CCMASK_FCMP; - else { + Opcode = SystemZISD::FCMP; + } else { IsUnsigned = CCMask & SystemZ::CCMASK_CMP_UO; CCValid = SystemZ::CCMASK_ICMP; CCMask &= CCValid; adjustZeroCmp(DAG, IsUnsigned, CmpOp0, CmpOp1, CCMask); adjustSubwordCmp(DAG, IsUnsigned, CmpOp0, CmpOp1, CCMask); - if (preferUnsignedComparison(CmpOp0, CmpOp1, CCMask)) - IsUnsigned = true; + Opcode = SystemZISD::ICMP; + // Choose the type of comparison. Equality and inequality tests can + // use either signed or unsigned comparisons. The choice also doesn't + // matter if both sign bits are known to be clear. In those cases we + // want to give the main isel code the freedom to choose whichever + // form fits best. + if (CCMask == SystemZ::CCMASK_CMP_EQ || + CCMask == SystemZ::CCMASK_CMP_NE || + (DAG.SignBitIsZero(CmpOp0) && DAG.SignBitIsZero(CmpOp1))) + ICmpType = SystemZICMP::Any; + else if (IsUnsigned) + ICmpType = SystemZICMP::UnsignedOnly; + else + ICmpType = SystemZICMP::SignedOnly; } - if (shouldSwapCmpOperands(CmpOp0, CmpOp1, IsUnsigned)) { + if (shouldSwapCmpOperands(CmpOp0, CmpOp1, ICmpType)) { std::swap(CmpOp0, CmpOp1); CCMask = ((CCMask & SystemZ::CCMASK_CMP_EQ) | (CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) | @@ -1340,8 +1314,10 @@ static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, SDValue CmpOp0, (CCMask & SystemZ::CCMASK_CMP_UO)); } - unsigned Opcode = (IsUnsigned ? SystemZISD::UCMP : SystemZISD::CMP); - adjustForTestUnderMask(Opcode, CmpOp0, CmpOp1, CCValid, CCMask); + adjustForTestUnderMask(Opcode, CmpOp0, CmpOp1, CCValid, CCMask, ICmpType); + if (Opcode == SystemZISD::ICMP) + return DAG.getNode(Opcode, DL, MVT::Glue, CmpOp0, CmpOp1, + DAG.getConstant(ICmpType, MVT::i32)); return DAG.getNode(Opcode, DL, MVT::Glue, CmpOp0, CmpOp1); } @@ -1391,7 +1367,7 @@ SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); unsigned CCValid, CCMask; - SDValue Flags = emitCmp(DAG, DL, CmpOp0, CmpOp1, CC, CCValid, CCMask); + SDValue Flags = emitCmp(TM, DAG, DL, CmpOp0, CmpOp1, CC, CCValid, CCMask); return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(), Chain, DAG.getConstant(CCValid, MVT::i32), DAG.getConstant(CCMask, MVT::i32), Dest, Flags); @@ -1407,7 +1383,7 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, SDLoc DL(Op); unsigned CCValid, CCMask; - SDValue Flags = emitCmp(DAG, DL, CmpOp0, CmpOp1, CC, CCValid, CCMask); + SDValue Flags = emitCmp(TM, DAG, DL, CmpOp0, CmpOp1, CC, CCValid, CCMask); SmallVector Ops; Ops.push_back(TrueOp); @@ -2041,8 +2017,8 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(CALL); OPCODE(SIBCALL); OPCODE(PCREL_WRAPPER); - OPCODE(CMP); - OPCODE(UCMP); + OPCODE(ICMP); + OPCODE(FCMP); OPCODE(TM); OPCODE(BR_CCMASK); OPCODE(SELECT_CCMASK); diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 8b2d19a..500abce 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -38,12 +38,12 @@ namespace SystemZISD { // accesses (LARL). Operand 0 is the address. PCREL_WRAPPER, - // Signed integer and floating-point comparisons. The operands are the - // two values to compare. - CMP, + // Integer comparisons. There are three operands: the two values + // to compare, and an integer of type SystemZICMP. + ICMP, - // Likewise unsigned integer comparison. - UCMP, + // Floating-point comparisons. The two operands are the values to compare. + FCMP, // Test under mask. The first operand is ANDed with the second operand // and the condition codes are set on the result. @@ -162,6 +162,16 @@ namespace SystemZISD { }; } +namespace SystemZICMP { + // Describes whether an integer comparison needs to be signed or unsigned, + // or whether either type is OK. + enum { + Any, + UnsignedOnly, + SignedOnly + }; +} + class SystemZSubtarget; class SystemZTargetMachine; diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td index 24adaee..576f666 100644 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -349,12 +349,12 @@ def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load, 8>; //===----------------------------------------------------------------------===// let Defs = [CC], CCValues = 0xF in { - def CEBR : CompareRRE<"ceb", 0xB309, z_cmp, FP32, FP32>; - def CDBR : CompareRRE<"cdb", 0xB319, z_cmp, FP64, FP64>; - def CXBR : CompareRRE<"cxb", 0xB349, z_cmp, FP128, FP128>; + def CEBR : CompareRRE<"ceb", 0xB309, z_fcmp, FP32, FP32>; + def CDBR : CompareRRE<"cdb", 0xB319, z_fcmp, FP64, FP64>; + def CXBR : CompareRRE<"cxb", 0xB349, z_fcmp, FP128, FP128>; - def CEB : CompareRXE<"ceb", 0xED09, z_cmp, FP32, load, 4>; - def CDB : CompareRXE<"cdb", 0xED19, z_cmp, FP64, load, 8>; + def CEB : CompareRXE<"ceb", 0xED09, z_fcmp, FP32, load, 4>; + def CDB : CompareRXE<"cdb", 0xED19, z_fcmp, FP64, load, 8>; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index abe28a5..5bfa06d 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -954,39 +954,41 @@ let Defs = [CC] in { // Comparison //===----------------------------------------------------------------------===// -// Signed comparisons. +// Signed comparisons. We put these before the unsigned comparisons because +// some of the signed forms have COMPARE AND BRANCH equivalents whereas none +// of the unsigned forms do. let Defs = [CC], CCValues = 0xE in { // Comparison with a register. - def CR : CompareRR <"c", 0x19, z_cmp, GR32, GR32>; + def CR : CompareRR <"c", 0x19, z_scmp, GR32, GR32>; def CGFR : CompareRRE<"cgf", 0xB930, null_frag, GR64, GR32>; - def CGR : CompareRRE<"cg", 0xB920, z_cmp, GR64, GR64>; + def CGR : CompareRRE<"cg", 0xB920, z_scmp, GR64, GR64>; // Comparison with a signed 16-bit immediate. - def CHI : CompareRI<"chi", 0xA7E, z_cmp, GR32, imm32sx16>; - def CGHI : CompareRI<"cghi", 0xA7F, z_cmp, GR64, imm64sx16>; + def CHI : CompareRI<"chi", 0xA7E, z_scmp, GR32, imm32sx16>; + def CGHI : CompareRI<"cghi", 0xA7F, z_scmp, GR64, imm64sx16>; // Comparison with a signed 32-bit immediate. - def CFI : CompareRIL<"cfi", 0xC2D, z_cmp, GR32, simm32>; - def CGFI : CompareRIL<"cgfi", 0xC2C, z_cmp, GR64, imm64sx32>; + def CFI : CompareRIL<"cfi", 0xC2D, z_scmp, GR32, simm32>; + def CGFI : CompareRIL<"cgfi", 0xC2C, z_scmp, GR64, imm64sx32>; // Comparison with memory. - defm CH : CompareRXPair<"ch", 0x49, 0xE379, z_cmp, GR32, sextloadi16, 2>; - defm C : CompareRXPair<"c", 0x59, 0xE359, z_cmp, GR32, load, 4>; - def CGH : CompareRXY<"cgh", 0xE334, z_cmp, GR64, sextloadi16, 2>; - def CGF : CompareRXY<"cgf", 0xE330, z_cmp, GR64, sextloadi32, 4>; - def CG : CompareRXY<"cg", 0xE320, z_cmp, GR64, load, 8>; - def CHRL : CompareRILPC<"chrl", 0xC65, z_cmp, GR32, aligned_sextloadi16>; - def CRL : CompareRILPC<"crl", 0xC6D, z_cmp, GR32, aligned_load>; - def CGHRL : CompareRILPC<"cghrl", 0xC64, z_cmp, GR64, aligned_sextloadi16>; - def CGFRL : CompareRILPC<"cgfrl", 0xC6C, z_cmp, GR64, aligned_sextloadi32>; - def CGRL : CompareRILPC<"cgrl", 0xC68, z_cmp, GR64, aligned_load>; + defm CH : CompareRXPair<"ch", 0x49, 0xE379, z_scmp, GR32, sextloadi16, 2>; + defm C : CompareRXPair<"c", 0x59, 0xE359, z_scmp, GR32, load, 4>; + def CGH : CompareRXY<"cgh", 0xE334, z_scmp, GR64, sextloadi16, 2>; + def CGF : CompareRXY<"cgf", 0xE330, z_scmp, GR64, sextloadi32, 4>; + def CG : CompareRXY<"cg", 0xE320, z_scmp, GR64, load, 8>; + def CHRL : CompareRILPC<"chrl", 0xC65, z_scmp, GR32, aligned_sextloadi16>; + def CRL : CompareRILPC<"crl", 0xC6D, z_scmp, GR32, aligned_load>; + def CGHRL : CompareRILPC<"cghrl", 0xC64, z_scmp, GR64, aligned_sextloadi16>; + def CGFRL : CompareRILPC<"cgfrl", 0xC6C, z_scmp, GR64, aligned_sextloadi32>; + def CGRL : CompareRILPC<"cgrl", 0xC68, z_scmp, GR64, aligned_load>; // Comparison between memory and a signed 16-bit immediate. - def CHHSI : CompareSIL<"chhsi", 0xE554, z_cmp, sextloadi16, imm32sx16>; - def CHSI : CompareSIL<"chsi", 0xE55C, z_cmp, load, imm32sx16>; - def CGHSI : CompareSIL<"cghsi", 0xE558, z_cmp, load, imm64sx16>; + def CHHSI : CompareSIL<"chhsi", 0xE554, z_scmp, sextloadi16, imm32sx16>; + def CHSI : CompareSIL<"chsi", 0xE55C, z_scmp, load, imm32sx16>; + def CGHSI : CompareSIL<"cghsi", 0xE558, z_scmp, load, imm64sx16>; } -defm : SXB; +defm : SXB; // Unsigned comparisons. let Defs = [CC], CCValues = 0xE, IsLogical = 1 in { diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index c89e315..c135b3b 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -15,6 +15,9 @@ def SDT_CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i64>, SDTCisVT<1, i64>]>; def SDT_ZCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; def SDT_ZCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; +def SDT_ZICmp : SDTypeProfile<0, 3, + [SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>]>; def SDT_ZBRCCMask : SDTypeProfile<0, 3, [SDTCisVT<0, i8>, SDTCisVT<1, i8>, @@ -94,8 +97,8 @@ def z_sibcall : SDNode<"SystemZISD::SIBCALL", SDT_ZCall, [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>; def z_pcrel_wrapper : SDNode<"SystemZISD::PCREL_WRAPPER", SDT_ZWrapPtr, []>; -def z_cmp : SDNode<"SystemZISD::CMP", SDT_ZCmp, [SDNPOutGlue]>; -def z_ucmp : SDNode<"SystemZISD::UCMP", SDT_ZCmp, [SDNPOutGlue]>; +def z_icmp : SDNode<"SystemZISD::ICMP", SDT_ZICmp, [SDNPOutGlue]>; +def z_fcmp : SDNode<"SystemZISD::FCMP", SDT_ZCmp, [SDNPOutGlue]>; def z_tm : SDNode<"SystemZISD::TM", SDT_ZCmp, [SDNPOutGlue]>; def z_br_ccmask : SDNode<"SystemZISD::BR_CCMASK", SDT_ZBRCCMask, [SDNPHasChain, SDNPInGlue]>; @@ -163,6 +166,16 @@ def z_prefetch : SDNode<"SystemZISD::PREFETCH", SDT_ZPrefetch, // Pattern fragments //===----------------------------------------------------------------------===// +// Signed and unsigned comparisons. +def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{ + unsigned Type = cast(N->getOperand(2))->getZExtValue(); + return Type != SystemZICMP::UnsignedOnly; +}]>; +def z_ucmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{ + unsigned Type = cast(N->getOperand(2))->getZExtValue(); + return Type != SystemZICMP::SignedOnly; +}]>; + // Register sign-extend operations. Sub-32-bit values are represented as i32s. def sext8 : PatFrag<(ops node:$src), (sext_inreg node:$src, i8)>; def sext16 : PatFrag<(ops node:$src), (sext_inreg node:$src, i16)>; diff --git a/lib/Target/SystemZ/SystemZPatterns.td b/lib/Target/SystemZ/SystemZPatterns.td index bc3775f..481534b 100644 --- a/lib/Target/SystemZ/SystemZPatterns.td +++ b/lib/Target/SystemZ/SystemZPatterns.td @@ -103,4 +103,4 @@ multiclass BlockLoadStore - : Pat<(z_cmp cls:$reg, (fpimm0)), (insn cls:$reg, cls:$reg)>; + : Pat<(z_fcmp cls:$reg, (fpimm0)), (insn cls:$reg, cls:$reg)>; -- cgit v1.1 From bfb9bab2434e3a8c4452708995a7f5d4f5a3b20e Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Fri, 6 Sep 2013 12:10:24 +0000 Subject: [mips][msa] Made the operand register sets optional for the BIT_[BHWD] formats Their default is to be the same as the result register set. No functional change git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190140 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 164 +++++++++++++----------------------- 1 file changed, 60 insertions(+), 104 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 1da5071..3daf0d2 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -760,7 +760,7 @@ class XORI_B_ENC : MSA_I8_FMT<0b11, 0b000000>; // Instruction desc. class MSA_BIT_B_DESC_BASE { dag OutOperandList = (outs RCWD:$wd); dag InOperandList = (ins RCWS:$ws, uimm3:$u3); @@ -770,7 +770,7 @@ class MSA_BIT_B_DESC_BASE { dag OutOperandList = (outs RCWD:$wd); dag InOperandList = (ins RCWS:$ws, uimm4:$u4); @@ -780,7 +780,7 @@ class MSA_BIT_H_DESC_BASE { dag OutOperandList = (outs RCWD:$wd); dag InOperandList = (ins RCWS:$ws, uimm5:$u5); @@ -790,7 +790,7 @@ class MSA_BIT_W_DESC_BASE { dag OutOperandList = (outs RCWD:$wd); dag InOperandList = (ins RCWS:$ws, uimm6:$u6); @@ -1061,14 +1061,10 @@ class BCLR_H_DESC : MSA_3R_DESC_BASE<"bclr.h", int_mips_bclr_h, MSA128H>; class BCLR_W_DESC : MSA_3R_DESC_BASE<"bclr.w", int_mips_bclr_w, MSA128W>; class BCLR_D_DESC : MSA_3R_DESC_BASE<"bclr.d", int_mips_bclr_d, MSA128D>; -class BCLRI_B_DESC : MSA_BIT_B_DESC_BASE<"bclri.b", int_mips_bclri_b, - MSA128B, MSA128B>; -class BCLRI_H_DESC : MSA_BIT_H_DESC_BASE<"bclri.h", int_mips_bclri_h, - MSA128H, MSA128H>; -class BCLRI_W_DESC : MSA_BIT_W_DESC_BASE<"bclri.w", int_mips_bclri_w, - MSA128W, MSA128W>; -class BCLRI_D_DESC : MSA_BIT_D_DESC_BASE<"bclri.d", int_mips_bclri_d, - MSA128D, MSA128D>; +class BCLRI_B_DESC : MSA_BIT_B_DESC_BASE<"bclri.b", int_mips_bclri_b, MSA128B>; +class BCLRI_H_DESC : MSA_BIT_H_DESC_BASE<"bclri.h", int_mips_bclri_h, MSA128H>; +class BCLRI_W_DESC : MSA_BIT_W_DESC_BASE<"bclri.w", int_mips_bclri_w, MSA128W>; +class BCLRI_D_DESC : MSA_BIT_D_DESC_BASE<"bclri.d", int_mips_bclri_d, MSA128D>; class BINSL_B_DESC : MSA_3R_DESC_BASE<"binsl.b", int_mips_binsl_b, MSA128B>; class BINSL_H_DESC : MSA_3R_DESC_BASE<"binsl.h", int_mips_binsl_h, MSA128H>; @@ -1076,13 +1072,13 @@ class BINSL_W_DESC : MSA_3R_DESC_BASE<"binsl.w", int_mips_binsl_w, MSA128W>; class BINSL_D_DESC : MSA_3R_DESC_BASE<"binsl.d", int_mips_binsl_d, MSA128D>; class BINSLI_B_DESC : MSA_BIT_B_DESC_BASE<"binsli.b", int_mips_binsli_b, - MSA128B, MSA128B>; + MSA128B>; class BINSLI_H_DESC : MSA_BIT_H_DESC_BASE<"binsli.h", int_mips_binsli_h, - MSA128H, MSA128H>; + MSA128H>; class BINSLI_W_DESC : MSA_BIT_W_DESC_BASE<"binsli.w", int_mips_binsli_w, - MSA128W, MSA128W>; + MSA128W>; class BINSLI_D_DESC : MSA_BIT_D_DESC_BASE<"binsli.d", int_mips_binsli_d, - MSA128D, MSA128D>; + MSA128D>; class BINSR_B_DESC : MSA_3R_DESC_BASE<"binsr.b", int_mips_binsr_b, MSA128B>; class BINSR_H_DESC : MSA_3R_DESC_BASE<"binsr.h", int_mips_binsr_h, MSA128H>; @@ -1090,13 +1086,13 @@ class BINSR_W_DESC : MSA_3R_DESC_BASE<"binsr.w", int_mips_binsr_w, MSA128W>; class BINSR_D_DESC : MSA_3R_DESC_BASE<"binsr.d", int_mips_binsr_d, MSA128D>; class BINSRI_B_DESC : MSA_BIT_B_DESC_BASE<"binsri.b", int_mips_binsri_b, - MSA128B, MSA128B>; + MSA128B>; class BINSRI_H_DESC : MSA_BIT_H_DESC_BASE<"binsri.h", int_mips_binsri_h, - MSA128H, MSA128H>; + MSA128H>; class BINSRI_W_DESC : MSA_BIT_W_DESC_BASE<"binsri.w", int_mips_binsri_w, - MSA128W, MSA128W>; + MSA128W>; class BINSRI_D_DESC : MSA_BIT_D_DESC_BASE<"binsri.d", int_mips_binsri_d, - MSA128D, MSA128D>; + MSA128D>; class BMNZ_V_DESC : MSA_VEC_DESC_BASE<"bmnz.v", int_mips_bmnz_v, MSA128B, MSA128B>; @@ -1115,14 +1111,10 @@ class BNEG_H_DESC : MSA_3R_DESC_BASE<"bneg.h", int_mips_bneg_h, MSA128H>; class BNEG_W_DESC : MSA_3R_DESC_BASE<"bneg.w", int_mips_bneg_w, MSA128W>; class BNEG_D_DESC : MSA_3R_DESC_BASE<"bneg.d", int_mips_bneg_d, MSA128D>; -class BNEGI_B_DESC : MSA_BIT_B_DESC_BASE<"bnegi.b", int_mips_bnegi_b, - MSA128B, MSA128B>; -class BNEGI_H_DESC : MSA_BIT_H_DESC_BASE<"bnegi.h", int_mips_bnegi_h, - MSA128H, MSA128H>; -class BNEGI_W_DESC : MSA_BIT_W_DESC_BASE<"bnegi.w", int_mips_bnegi_w, - MSA128W, MSA128W>; -class BNEGI_D_DESC : MSA_BIT_D_DESC_BASE<"bnegi.d", int_mips_bnegi_d, - MSA128D, MSA128D>; +class BNEGI_B_DESC : MSA_BIT_B_DESC_BASE<"bnegi.b", int_mips_bnegi_b, MSA128B>; +class BNEGI_H_DESC : MSA_BIT_H_DESC_BASE<"bnegi.h", int_mips_bnegi_h, MSA128H>; +class BNEGI_W_DESC : MSA_BIT_W_DESC_BASE<"bnegi.w", int_mips_bnegi_w, MSA128W>; +class BNEGI_D_DESC : MSA_BIT_D_DESC_BASE<"bnegi.d", int_mips_bnegi_d, MSA128D>; class BNZ_B_DESC : MSA_CBRANCH_DESC_BASE<"bnz.b", MSA128B>; class BNZ_H_DESC : MSA_CBRANCH_DESC_BASE<"bnz.h", MSA128H>; @@ -1142,14 +1134,10 @@ class BSET_H_DESC : MSA_3R_DESC_BASE<"bset.h", int_mips_bset_h, MSA128H>; class BSET_W_DESC : MSA_3R_DESC_BASE<"bset.w", int_mips_bset_w, MSA128W>; class BSET_D_DESC : MSA_3R_DESC_BASE<"bset.d", int_mips_bset_d, MSA128D>; -class BSETI_B_DESC : MSA_BIT_B_DESC_BASE<"bseti.b", int_mips_bseti_b, - MSA128B, MSA128B>; -class BSETI_H_DESC : MSA_BIT_H_DESC_BASE<"bseti.h", int_mips_bseti_h, - MSA128H, MSA128H>; -class BSETI_W_DESC : MSA_BIT_W_DESC_BASE<"bseti.w", int_mips_bseti_w, - MSA128W, MSA128W>; -class BSETI_D_DESC : MSA_BIT_D_DESC_BASE<"bseti.d", int_mips_bseti_d, - MSA128D, MSA128D>; +class BSETI_B_DESC : MSA_BIT_B_DESC_BASE<"bseti.b", int_mips_bseti_b, MSA128B>; +class BSETI_H_DESC : MSA_BIT_H_DESC_BASE<"bseti.h", int_mips_bseti_h, MSA128H>; +class BSETI_W_DESC : MSA_BIT_W_DESC_BASE<"bseti.w", int_mips_bseti_w, MSA128W>; +class BSETI_D_DESC : MSA_BIT_D_DESC_BASE<"bseti.d", int_mips_bseti_d, MSA128D>; class BZ_B_DESC : MSA_CBRANCH_DESC_BASE<"bz.b", MSA128B>; class BZ_H_DESC : MSA_CBRANCH_DESC_BASE<"bz.h", MSA128H>; @@ -1893,23 +1881,15 @@ class PCNT_W_DESC : MSA_2R_DESC_BASE<"pcnt.w", int_mips_pcnt_w, class PCNT_D_DESC : MSA_2R_DESC_BASE<"pcnt.d", int_mips_pcnt_d, MSA128D, MSA128D>; -class SAT_S_B_DESC : MSA_BIT_B_DESC_BASE<"sat_s.b", int_mips_sat_s_b, - MSA128B, MSA128B>; -class SAT_S_H_DESC : MSA_BIT_H_DESC_BASE<"sat_s.h", int_mips_sat_s_h, - MSA128H, MSA128H>; -class SAT_S_W_DESC : MSA_BIT_W_DESC_BASE<"sat_s.w", int_mips_sat_s_w, - MSA128W, MSA128W>; -class SAT_S_D_DESC : MSA_BIT_D_DESC_BASE<"sat_s.d", int_mips_sat_s_d, - MSA128D, MSA128D>; +class SAT_S_B_DESC : MSA_BIT_B_DESC_BASE<"sat_s.b", int_mips_sat_s_b, MSA128B>; +class SAT_S_H_DESC : MSA_BIT_H_DESC_BASE<"sat_s.h", int_mips_sat_s_h, MSA128H>; +class SAT_S_W_DESC : MSA_BIT_W_DESC_BASE<"sat_s.w", int_mips_sat_s_w, MSA128W>; +class SAT_S_D_DESC : MSA_BIT_D_DESC_BASE<"sat_s.d", int_mips_sat_s_d, MSA128D>; -class SAT_U_B_DESC : MSA_BIT_B_DESC_BASE<"sat_u.b", int_mips_sat_u_b, - MSA128B, MSA128B>; -class SAT_U_H_DESC : MSA_BIT_H_DESC_BASE<"sat_u.h", int_mips_sat_u_h, - MSA128H, MSA128H>; -class SAT_U_W_DESC : MSA_BIT_W_DESC_BASE<"sat_u.w", int_mips_sat_u_w, - MSA128W, MSA128W>; -class SAT_U_D_DESC : MSA_BIT_D_DESC_BASE<"sat_u.d", int_mips_sat_u_d, - MSA128D, MSA128D>; +class SAT_U_B_DESC : MSA_BIT_B_DESC_BASE<"sat_u.b", int_mips_sat_u_b, MSA128B>; +class SAT_U_H_DESC : MSA_BIT_H_DESC_BASE<"sat_u.h", int_mips_sat_u_h, MSA128H>; +class SAT_U_W_DESC : MSA_BIT_W_DESC_BASE<"sat_u.w", int_mips_sat_u_w, MSA128W>; +class SAT_U_D_DESC : MSA_BIT_D_DESC_BASE<"sat_u.d", int_mips_sat_u_d, MSA128D>; class SHF_B_DESC : MSA_I8_DESC_BASE<"shf.b", int_mips_shf_b, MSA128B, MSA128B>; @@ -1923,28 +1903,20 @@ class SLD_H_DESC : MSA_3R_DESC_BASE<"sld.h", int_mips_sld_h, MSA128H>; class SLD_W_DESC : MSA_3R_DESC_BASE<"sld.w", int_mips_sld_w, MSA128W>; class SLD_D_DESC : MSA_3R_DESC_BASE<"sld.d", int_mips_sld_d, MSA128D>; -class SLDI_B_DESC : MSA_BIT_B_DESC_BASE<"sldi.b", int_mips_sldi_b, - MSA128B, MSA128B>; -class SLDI_H_DESC : MSA_BIT_H_DESC_BASE<"sldi.h", int_mips_sldi_h, - MSA128H, MSA128H>; -class SLDI_W_DESC : MSA_BIT_W_DESC_BASE<"sldi.w", int_mips_sldi_w, - MSA128W, MSA128W>; -class SLDI_D_DESC : MSA_BIT_D_DESC_BASE<"sldi.d", int_mips_sldi_d, - MSA128D, MSA128D>; +class SLDI_B_DESC : MSA_BIT_B_DESC_BASE<"sldi.b", int_mips_sldi_b, MSA128B>; +class SLDI_H_DESC : MSA_BIT_H_DESC_BASE<"sldi.h", int_mips_sldi_h, MSA128H>; +class SLDI_W_DESC : MSA_BIT_W_DESC_BASE<"sldi.w", int_mips_sldi_w, MSA128W>; +class SLDI_D_DESC : MSA_BIT_D_DESC_BASE<"sldi.d", int_mips_sldi_d, MSA128D>; class SLL_B_DESC : MSA_3R_DESC_BASE<"sll.b", int_mips_sll_b, MSA128B>; class SLL_H_DESC : MSA_3R_DESC_BASE<"sll.h", int_mips_sll_h, MSA128H>; class SLL_W_DESC : MSA_3R_DESC_BASE<"sll.w", int_mips_sll_w, MSA128W>; class SLL_D_DESC : MSA_3R_DESC_BASE<"sll.d", int_mips_sll_d, MSA128D>; -class SLLI_B_DESC : MSA_BIT_B_DESC_BASE<"slli.b", int_mips_slli_b, - MSA128B, MSA128B>; -class SLLI_H_DESC : MSA_BIT_H_DESC_BASE<"slli.h", int_mips_slli_h, - MSA128H, MSA128H>; -class SLLI_W_DESC : MSA_BIT_W_DESC_BASE<"slli.w", int_mips_slli_w, - MSA128W, MSA128W>; -class SLLI_D_DESC : MSA_BIT_D_DESC_BASE<"slli.d", int_mips_slli_d, - MSA128D, MSA128D>; +class SLLI_B_DESC : MSA_BIT_B_DESC_BASE<"slli.b", int_mips_slli_b, MSA128B>; +class SLLI_H_DESC : MSA_BIT_H_DESC_BASE<"slli.h", int_mips_slli_h, MSA128H>; +class SLLI_W_DESC : MSA_BIT_W_DESC_BASE<"slli.w", int_mips_slli_w, MSA128W>; +class SLLI_D_DESC : MSA_BIT_D_DESC_BASE<"slli.d", int_mips_slli_d, MSA128D>; class SPLAT_B_DESC : MSA_3R_DESC_BASE<"splat.b", int_mips_splat_b, MSA128B, MSA128B, GPR32>; @@ -1956,69 +1928,53 @@ class SPLAT_D_DESC : MSA_3R_DESC_BASE<"splat.d", int_mips_splat_d, MSA128D, MSA128D, GPR32>; class SPLATI_B_DESC : MSA_BIT_B_DESC_BASE<"splati.b", int_mips_splati_b, - MSA128B, MSA128B>; + MSA128B>; class SPLATI_H_DESC : MSA_BIT_H_DESC_BASE<"splati.h", int_mips_splati_h, - MSA128H, MSA128H>; + MSA128H>; class SPLATI_W_DESC : MSA_BIT_W_DESC_BASE<"splati.w", int_mips_splati_w, - MSA128W, MSA128W>; + MSA128W>; class SPLATI_D_DESC : MSA_BIT_D_DESC_BASE<"splati.d", int_mips_splati_d, - MSA128D, MSA128D>; + MSA128D>; class SRA_B_DESC : MSA_3R_DESC_BASE<"sra.b", int_mips_sra_b, MSA128B>; class SRA_H_DESC : MSA_3R_DESC_BASE<"sra.h", int_mips_sra_h, MSA128H>; class SRA_W_DESC : MSA_3R_DESC_BASE<"sra.w", int_mips_sra_w, MSA128W>; class SRA_D_DESC : MSA_3R_DESC_BASE<"sra.d", int_mips_sra_d, MSA128D>; -class SRAI_B_DESC : MSA_BIT_B_DESC_BASE<"srai.b", int_mips_srai_b, - MSA128B, MSA128B>; -class SRAI_H_DESC : MSA_BIT_H_DESC_BASE<"srai.h", int_mips_srai_h, - MSA128H, MSA128H>; -class SRAI_W_DESC : MSA_BIT_W_DESC_BASE<"srai.w", int_mips_srai_w, - MSA128W, MSA128W>; -class SRAI_D_DESC : MSA_BIT_D_DESC_BASE<"srai.d", int_mips_srai_d, - MSA128D, MSA128D>; +class SRAI_B_DESC : MSA_BIT_B_DESC_BASE<"srai.b", int_mips_srai_b, MSA128B>; +class SRAI_H_DESC : MSA_BIT_H_DESC_BASE<"srai.h", int_mips_srai_h, MSA128H>; +class SRAI_W_DESC : MSA_BIT_W_DESC_BASE<"srai.w", int_mips_srai_w, MSA128W>; +class SRAI_D_DESC : MSA_BIT_D_DESC_BASE<"srai.d", int_mips_srai_d, MSA128D>; class SRAR_B_DESC : MSA_3R_DESC_BASE<"srar.b", int_mips_srar_b, MSA128B>; class SRAR_H_DESC : MSA_3R_DESC_BASE<"srar.h", int_mips_srar_h, MSA128H>; class SRAR_W_DESC : MSA_3R_DESC_BASE<"srar.w", int_mips_srar_w, MSA128W>; class SRAR_D_DESC : MSA_3R_DESC_BASE<"srar.d", int_mips_srar_d, MSA128D>; -class SRARI_B_DESC : MSA_BIT_B_DESC_BASE<"srari.b", int_mips_srari_b, - MSA128B, MSA128B>; -class SRARI_H_DESC : MSA_BIT_H_DESC_BASE<"srari.h", int_mips_srari_h, - MSA128H, MSA128H>; -class SRARI_W_DESC : MSA_BIT_W_DESC_BASE<"srari.w", int_mips_srari_w, - MSA128W, MSA128W>; -class SRARI_D_DESC : MSA_BIT_D_DESC_BASE<"srari.d", int_mips_srari_d, - MSA128D, MSA128D>; +class SRARI_B_DESC : MSA_BIT_B_DESC_BASE<"srari.b", int_mips_srari_b, MSA128B>; +class SRARI_H_DESC : MSA_BIT_H_DESC_BASE<"srari.h", int_mips_srari_h, MSA128H>; +class SRARI_W_DESC : MSA_BIT_W_DESC_BASE<"srari.w", int_mips_srari_w, MSA128W>; +class SRARI_D_DESC : MSA_BIT_D_DESC_BASE<"srari.d", int_mips_srari_d, MSA128D>; class SRL_B_DESC : MSA_3R_DESC_BASE<"srl.b", int_mips_srl_b, MSA128B>; class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", int_mips_srl_h, MSA128H>; class SRL_W_DESC : MSA_3R_DESC_BASE<"srl.w", int_mips_srl_w, MSA128W>; class SRL_D_DESC : MSA_3R_DESC_BASE<"srl.d", int_mips_srl_d, MSA128D>; -class SRLI_B_DESC : MSA_BIT_B_DESC_BASE<"srli.b", int_mips_srli_b, - MSA128B, MSA128B>; -class SRLI_H_DESC : MSA_BIT_H_DESC_BASE<"srli.h", int_mips_srli_h, - MSA128H, MSA128H>; -class SRLI_W_DESC : MSA_BIT_W_DESC_BASE<"srli.w", int_mips_srli_w, - MSA128W, MSA128W>; -class SRLI_D_DESC : MSA_BIT_D_DESC_BASE<"srli.d", int_mips_srli_d, - MSA128D, MSA128D>; +class SRLI_B_DESC : MSA_BIT_B_DESC_BASE<"srli.b", int_mips_srli_b, MSA128B>; +class SRLI_H_DESC : MSA_BIT_H_DESC_BASE<"srli.h", int_mips_srli_h, MSA128H>; +class SRLI_W_DESC : MSA_BIT_W_DESC_BASE<"srli.w", int_mips_srli_w, MSA128W>; +class SRLI_D_DESC : MSA_BIT_D_DESC_BASE<"srli.d", int_mips_srli_d, MSA128D>; class SRLR_B_DESC : MSA_3R_DESC_BASE<"srlr.b", int_mips_srlr_b, MSA128B>; class SRLR_H_DESC : MSA_3R_DESC_BASE<"srlr.h", int_mips_srlr_h, MSA128H>; class SRLR_W_DESC : MSA_3R_DESC_BASE<"srlr.w", int_mips_srlr_w, MSA128W>; class SRLR_D_DESC : MSA_3R_DESC_BASE<"srlr.d", int_mips_srlr_d, MSA128D>; -class SRLRI_B_DESC : MSA_BIT_B_DESC_BASE<"srlri.b", int_mips_srlri_b, - MSA128B, MSA128B>; -class SRLRI_H_DESC : MSA_BIT_H_DESC_BASE<"srlri.h", int_mips_srlri_h, - MSA128H, MSA128H>; -class SRLRI_W_DESC : MSA_BIT_W_DESC_BASE<"srlri.w", int_mips_srlri_w, - MSA128W, MSA128W>; -class SRLRI_D_DESC : MSA_BIT_D_DESC_BASE<"srlri.d", int_mips_srlri_d, - MSA128D, MSA128D>; +class SRLRI_B_DESC : MSA_BIT_B_DESC_BASE<"srlri.b", int_mips_srlri_b, MSA128B>; +class SRLRI_H_DESC : MSA_BIT_H_DESC_BASE<"srlri.h", int_mips_srlri_h, MSA128H>; +class SRLRI_W_DESC : MSA_BIT_W_DESC_BASE<"srlri.w", int_mips_srlri_w, MSA128W>; +class SRLRI_D_DESC : MSA_BIT_D_DESC_BASE<"srlri.d", int_mips_srlri_d, MSA128D>; class ST_DESC_BASE Date: Fri, 6 Sep 2013 12:23:19 +0000 Subject: [mips][msa] Made the operand register sets optional for the I5 and SI5 formats Their default is to be the same as the result register set. No functional change git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190141 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 120 ++++++++++++++---------------------- 1 file changed, 46 insertions(+), 74 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 3daf0d2..ac5e01f 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -810,7 +810,7 @@ class MSA_COPY_DESC_BASE { dag OutOperandList = (outs RCWD:$wd); dag InOperandList = (ins RCWS:$ws, uimm5:$u5); @@ -820,7 +820,7 @@ class MSA_I5_DESC_BASE { dag OutOperandList = (outs RCWD:$wd); dag InOperandList = (ins RCWS:$ws, simm5:$s5); @@ -995,14 +995,10 @@ class ADDV_W_DESC : MSA_3R_DESC_BASE<"addv.w", int_mips_addv_w, MSA128W>, class ADDV_D_DESC : MSA_3R_DESC_BASE<"addv.d", int_mips_addv_d, MSA128D>, IsCommutable; -class ADDVI_B_DESC : MSA_I5_DESC_BASE<"addvi.b", int_mips_addvi_b, MSA128B, - MSA128B>; -class ADDVI_H_DESC : MSA_I5_DESC_BASE<"addvi.h", int_mips_addvi_h, MSA128H, - MSA128H>; -class ADDVI_W_DESC : MSA_I5_DESC_BASE<"addvi.w", int_mips_addvi_w, MSA128W, - MSA128W>; -class ADDVI_D_DESC : MSA_I5_DESC_BASE<"addvi.d", int_mips_addvi_d, MSA128D, - MSA128D>; +class ADDVI_B_DESC : MSA_I5_DESC_BASE<"addvi.b", int_mips_addvi_b, MSA128B>; +class ADDVI_H_DESC : MSA_I5_DESC_BASE<"addvi.h", int_mips_addvi_h, MSA128H>; +class ADDVI_W_DESC : MSA_I5_DESC_BASE<"addvi.w", int_mips_addvi_w, MSA128W>; +class ADDVI_D_DESC : MSA_I5_DESC_BASE<"addvi.d", int_mips_addvi_d, MSA128D>; class AND_V_DESC : MSA_VEC_DESC_BASE<"and.v", int_mips_and_v, MSA128B, MSA128B>; @@ -1155,14 +1151,10 @@ class CEQ_W_DESC : MSA_3R_DESC_BASE<"ceq.w", int_mips_ceq_w, MSA128W>, class CEQ_D_DESC : MSA_3R_DESC_BASE<"ceq.d", int_mips_ceq_d, MSA128D>, IsCommutable; -class CEQI_B_DESC : MSA_SI5_DESC_BASE<"ceqi.b", int_mips_ceqi_b, - MSA128B, MSA128B>; -class CEQI_H_DESC : MSA_SI5_DESC_BASE<"ceqi.h", int_mips_ceqi_h, - MSA128H, MSA128H>; -class CEQI_W_DESC : MSA_SI5_DESC_BASE<"ceqi.w", int_mips_ceqi_w, - MSA128W, MSA128W>; -class CEQI_D_DESC : MSA_SI5_DESC_BASE<"ceqi.d", int_mips_ceqi_d, - MSA128D, MSA128D>; +class CEQI_B_DESC : MSA_SI5_DESC_BASE<"ceqi.b", int_mips_ceqi_b, MSA128B>; +class CEQI_H_DESC : MSA_SI5_DESC_BASE<"ceqi.h", int_mips_ceqi_h, MSA128H>; +class CEQI_W_DESC : MSA_SI5_DESC_BASE<"ceqi.w", int_mips_ceqi_w, MSA128W>; +class CEQI_D_DESC : MSA_SI5_DESC_BASE<"ceqi.d", int_mips_ceqi_d, MSA128D>; class CFCMSA_DESC { dag OutOperandList = (outs GPR32:$rd); @@ -1183,22 +1175,22 @@ class CLE_U_W_DESC : MSA_3R_DESC_BASE<"cle_u.w", int_mips_cle_u_w, MSA128W>; class CLE_U_D_DESC : MSA_3R_DESC_BASE<"cle_u.d", int_mips_cle_u_d, MSA128D>; class CLEI_S_B_DESC : MSA_SI5_DESC_BASE<"clei_s.b", int_mips_clei_s_b, - MSA128B, MSA128B>; + MSA128B>; class CLEI_S_H_DESC : MSA_SI5_DESC_BASE<"clei_s.h", int_mips_clei_s_h, - MSA128H, MSA128H>; + MSA128H>; class CLEI_S_W_DESC : MSA_SI5_DESC_BASE<"clei_s.w", int_mips_clei_s_w, - MSA128W, MSA128W>; + MSA128W>; class CLEI_S_D_DESC : MSA_SI5_DESC_BASE<"clei_s.d", int_mips_clei_s_d, - MSA128D, MSA128D>; + MSA128D>; class CLEI_U_B_DESC : MSA_SI5_DESC_BASE<"clei_u.b", int_mips_clei_u_b, - MSA128B, MSA128B>; + MSA128B>; class CLEI_U_H_DESC : MSA_SI5_DESC_BASE<"clei_u.h", int_mips_clei_u_h, - MSA128H, MSA128H>; + MSA128H>; class CLEI_U_W_DESC : MSA_SI5_DESC_BASE<"clei_u.w", int_mips_clei_u_w, - MSA128W, MSA128W>; + MSA128W>; class CLEI_U_D_DESC : MSA_SI5_DESC_BASE<"clei_u.d", int_mips_clei_u_d, - MSA128D, MSA128D>; + MSA128D>; class CLT_S_B_DESC : MSA_3R_DESC_BASE<"clt_s.b", int_mips_clt_s_b, MSA128B>; class CLT_S_H_DESC : MSA_3R_DESC_BASE<"clt_s.h", int_mips_clt_s_h, MSA128H>; @@ -1211,22 +1203,22 @@ class CLT_U_W_DESC : MSA_3R_DESC_BASE<"clt_u.w", int_mips_clt_u_w, MSA128W>; class CLT_U_D_DESC : MSA_3R_DESC_BASE<"clt_u.d", int_mips_clt_u_d, MSA128D>; class CLTI_S_B_DESC : MSA_SI5_DESC_BASE<"clti_s.b", int_mips_clti_s_b, - MSA128B, MSA128B>; + MSA128B>; class CLTI_S_H_DESC : MSA_SI5_DESC_BASE<"clti_s.h", int_mips_clti_s_h, - MSA128H, MSA128H>; + MSA128H>; class CLTI_S_W_DESC : MSA_SI5_DESC_BASE<"clti_s.w", int_mips_clti_s_w, - MSA128W, MSA128W>; + MSA128W>; class CLTI_S_D_DESC : MSA_SI5_DESC_BASE<"clti_s.d", int_mips_clti_s_d, - MSA128D, MSA128D>; + MSA128D>; class CLTI_U_B_DESC : MSA_SI5_DESC_BASE<"clti_u.b", int_mips_clti_u_b, - MSA128B, MSA128B>; + MSA128B>; class CLTI_U_H_DESC : MSA_SI5_DESC_BASE<"clti_u.h", int_mips_clti_u_h, - MSA128H, MSA128H>; + MSA128H>; class CLTI_U_W_DESC : MSA_SI5_DESC_BASE<"clti_u.w", int_mips_clti_u_w, - MSA128W, MSA128W>; + MSA128W>; class CLTI_U_D_DESC : MSA_SI5_DESC_BASE<"clti_u.d", int_mips_clti_u_d, - MSA128D, MSA128D>; + MSA128D>; class COPY_S_B_DESC : MSA_COPY_DESC_BASE<"copy_s.b", int_mips_copy_s_b, GPR32, MSA128B>; @@ -1729,23 +1721,15 @@ class MAX_U_H_DESC : MSA_3R_DESC_BASE<"max_u.h", int_mips_max_u_h, MSA128H>; class MAX_U_W_DESC : MSA_3R_DESC_BASE<"max_u.w", int_mips_max_u_w, MSA128W>; class MAX_U_D_DESC : MSA_3R_DESC_BASE<"max_u.d", int_mips_max_u_d, MSA128D>; -class MAXI_S_B_DESC : MSA_I5_DESC_BASE<"maxi_s.b", int_mips_maxi_s_b, - MSA128B, MSA128B>; -class MAXI_S_H_DESC : MSA_I5_DESC_BASE<"maxi_s.h", int_mips_maxi_s_h, - MSA128H, MSA128H>; -class MAXI_S_W_DESC : MSA_I5_DESC_BASE<"maxi_s.w", int_mips_maxi_s_w, - MSA128W, MSA128W>; -class MAXI_S_D_DESC : MSA_I5_DESC_BASE<"maxi_s.d", int_mips_maxi_s_d, - MSA128D, MSA128D>; +class MAXI_S_B_DESC : MSA_I5_DESC_BASE<"maxi_s.b", int_mips_maxi_s_b, MSA128B>; +class MAXI_S_H_DESC : MSA_I5_DESC_BASE<"maxi_s.h", int_mips_maxi_s_h, MSA128H>; +class MAXI_S_W_DESC : MSA_I5_DESC_BASE<"maxi_s.w", int_mips_maxi_s_w, MSA128W>; +class MAXI_S_D_DESC : MSA_I5_DESC_BASE<"maxi_s.d", int_mips_maxi_s_d, MSA128D>; -class MAXI_U_B_DESC : MSA_I5_DESC_BASE<"maxi_u.b", int_mips_maxi_u_b, - MSA128B, MSA128B>; -class MAXI_U_H_DESC : MSA_I5_DESC_BASE<"maxi_u.h", int_mips_maxi_u_h, - MSA128H, MSA128H>; -class MAXI_U_W_DESC : MSA_I5_DESC_BASE<"maxi_u.w", int_mips_maxi_u_w, - MSA128W, MSA128W>; -class MAXI_U_D_DESC : MSA_I5_DESC_BASE<"maxi_u.d", int_mips_maxi_u_d, - MSA128D, MSA128D>; +class MAXI_U_B_DESC : MSA_I5_DESC_BASE<"maxi_u.b", int_mips_maxi_u_b, MSA128B>; +class MAXI_U_H_DESC : MSA_I5_DESC_BASE<"maxi_u.h", int_mips_maxi_u_h, MSA128H>; +class MAXI_U_W_DESC : MSA_I5_DESC_BASE<"maxi_u.w", int_mips_maxi_u_w, MSA128W>; +class MAXI_U_D_DESC : MSA_I5_DESC_BASE<"maxi_u.d", int_mips_maxi_u_d, MSA128D>; class MIN_A_B_DESC : MSA_3R_DESC_BASE<"min_a.b", int_mips_min_a_b, MSA128B>; class MIN_A_H_DESC : MSA_3R_DESC_BASE<"min_a.h", int_mips_min_a_h, MSA128H>; @@ -1762,23 +1746,15 @@ class MIN_U_H_DESC : MSA_3R_DESC_BASE<"min_u.h", int_mips_min_u_h, MSA128H>; class MIN_U_W_DESC : MSA_3R_DESC_BASE<"min_u.w", int_mips_min_u_w, MSA128W>; class MIN_U_D_DESC : MSA_3R_DESC_BASE<"min_u.d", int_mips_min_u_d, MSA128D>; -class MINI_S_B_DESC : MSA_I5_DESC_BASE<"mini_s.b", int_mips_mini_s_b, - MSA128B, MSA128B>; -class MINI_S_H_DESC : MSA_I5_DESC_BASE<"mini_s.h", int_mips_mini_s_h, - MSA128H, MSA128H>; -class MINI_S_W_DESC : MSA_I5_DESC_BASE<"mini_s.w", int_mips_mini_s_w, - MSA128W, MSA128W>; -class MINI_S_D_DESC : MSA_I5_DESC_BASE<"mini_s.d", int_mips_mini_s_d, - MSA128D, MSA128D>; +class MINI_S_B_DESC : MSA_I5_DESC_BASE<"mini_s.b", int_mips_mini_s_b, MSA128B>; +class MINI_S_H_DESC : MSA_I5_DESC_BASE<"mini_s.h", int_mips_mini_s_h, MSA128H>; +class MINI_S_W_DESC : MSA_I5_DESC_BASE<"mini_s.w", int_mips_mini_s_w, MSA128W>; +class MINI_S_D_DESC : MSA_I5_DESC_BASE<"mini_s.d", int_mips_mini_s_d, MSA128D>; -class MINI_U_B_DESC : MSA_I5_DESC_BASE<"mini_u.b", int_mips_mini_u_b, - MSA128B, MSA128B>; -class MINI_U_H_DESC : MSA_I5_DESC_BASE<"mini_u.h", int_mips_mini_u_h, - MSA128H, MSA128H>; -class MINI_U_W_DESC : MSA_I5_DESC_BASE<"mini_u.w", int_mips_mini_u_w, - MSA128W, MSA128W>; -class MINI_U_D_DESC : MSA_I5_DESC_BASE<"mini_u.d", int_mips_mini_u_d, - MSA128D, MSA128D>; +class MINI_U_B_DESC : MSA_I5_DESC_BASE<"mini_u.b", int_mips_mini_u_b, MSA128B>; +class MINI_U_H_DESC : MSA_I5_DESC_BASE<"mini_u.h", int_mips_mini_u_h, MSA128H>; +class MINI_U_W_DESC : MSA_I5_DESC_BASE<"mini_u.w", int_mips_mini_u_w, MSA128W>; +class MINI_U_D_DESC : MSA_I5_DESC_BASE<"mini_u.d", int_mips_mini_u_d, MSA128D>; class MOD_S_B_DESC : MSA_3R_DESC_BASE<"mod_s.b", int_mips_mod_s_b, MSA128B>; class MOD_S_H_DESC : MSA_3R_DESC_BASE<"mod_s.h", int_mips_mod_s_h, MSA128H>; @@ -2041,14 +2017,10 @@ class SUBV_H_DESC : MSA_3R_DESC_BASE<"subv.h", int_mips_subv_h, MSA128H>; class SUBV_W_DESC : MSA_3R_DESC_BASE<"subv.w", int_mips_subv_w, MSA128W>; class SUBV_D_DESC : MSA_3R_DESC_BASE<"subv.d", int_mips_subv_d, MSA128D>; -class SUBVI_B_DESC : MSA_I5_DESC_BASE<"subvi.b", int_mips_subvi_b, - MSA128B, MSA128B>; -class SUBVI_H_DESC : MSA_I5_DESC_BASE<"subvi.h", int_mips_subvi_h, - MSA128H, MSA128H>; -class SUBVI_W_DESC : MSA_I5_DESC_BASE<"subvi.w", int_mips_subvi_w, - MSA128W, MSA128W>; -class SUBVI_D_DESC : MSA_I5_DESC_BASE<"subvi.d", int_mips_subvi_d, - MSA128D, MSA128D>; +class SUBVI_B_DESC : MSA_I5_DESC_BASE<"subvi.b", int_mips_subvi_b, MSA128B>; +class SUBVI_H_DESC : MSA_I5_DESC_BASE<"subvi.h", int_mips_subvi_h, MSA128H>; +class SUBVI_W_DESC : MSA_I5_DESC_BASE<"subvi.w", int_mips_subvi_w, MSA128W>; +class SUBVI_D_DESC : MSA_I5_DESC_BASE<"subvi.d", int_mips_subvi_d, MSA128D>; class VSHF_B_DESC : MSA_3R_DESC_BASE<"vshf.b", int_mips_vshf_b, MSA128B>; class VSHF_H_DESC : MSA_3R_DESC_BASE<"vshf.h", int_mips_vshf_h, MSA128H>; -- cgit v1.1 From d31c238372b2ddba147a012f457cae896e6e828f Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Fri, 6 Sep 2013 12:25:47 +0000 Subject: [mips][msa] Made the operand register sets optional for the I8 format Their default is to be the same as the result register set. No functional change git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190142 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index ac5e01f..8d9b664 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -830,7 +830,7 @@ class MSA_SI5_DESC_BASE { dag OutOperandList = (outs RCWD:$wd); dag InOperandList = (ins RCWS:$ws, uimm8:$u8); @@ -1003,8 +1003,7 @@ class ADDVI_D_DESC : MSA_I5_DESC_BASE<"addvi.d", int_mips_addvi_d, MSA128D>; class AND_V_DESC : MSA_VEC_DESC_BASE<"and.v", int_mips_and_v, MSA128B, MSA128B>; -class ANDI_B_DESC : MSA_I8_DESC_BASE<"andi.b", int_mips_andi_b, - MSA128B, MSA128B>; +class ANDI_B_DESC : MSA_I8_DESC_BASE<"andi.b", int_mips_andi_b, MSA128B>; class ASUB_S_B_DESC : MSA_3R_DESC_BASE<"asub_s.b", int_mips_asub_s_b, MSA128B>; class ASUB_S_H_DESC : MSA_3R_DESC_BASE<"asub_s.h", int_mips_asub_s_h, MSA128H>; @@ -1093,14 +1092,12 @@ class BINSRI_D_DESC : MSA_BIT_D_DESC_BASE<"binsri.d", int_mips_binsri_d, class BMNZ_V_DESC : MSA_VEC_DESC_BASE<"bmnz.v", int_mips_bmnz_v, MSA128B, MSA128B>; -class BMNZI_B_DESC : MSA_I8_DESC_BASE<"bmnzi.b", int_mips_bmnzi_b, - MSA128B, MSA128B>; +class BMNZI_B_DESC : MSA_I8_DESC_BASE<"bmnzi.b", int_mips_bmnzi_b, MSA128B>; class BMZ_V_DESC : MSA_VEC_DESC_BASE<"bmz.v", int_mips_bmz_v, MSA128B, MSA128B>; -class BMZI_B_DESC : MSA_I8_DESC_BASE<"bmzi.b", int_mips_bmzi_b, - MSA128B, MSA128B>; +class BMZI_B_DESC : MSA_I8_DESC_BASE<"bmzi.b", int_mips_bmzi_b, MSA128B>; class BNEG_B_DESC : MSA_3R_DESC_BASE<"bneg.b", int_mips_bneg_b, MSA128B>; class BNEG_H_DESC : MSA_3R_DESC_BASE<"bneg.h", int_mips_bneg_h, MSA128H>; @@ -1122,8 +1119,7 @@ class BNZ_V_DESC : MSA_CBRANCH_DESC_BASE<"bnz.v", MSA128B>; class BSEL_V_DESC : MSA_VEC_DESC_BASE<"bsel.v", int_mips_bsel_v, MSA128B, MSA128B>; -class BSELI_B_DESC : MSA_I8_DESC_BASE<"bseli.b", int_mips_bseli_b, - MSA128B, MSA128B>; +class BSELI_B_DESC : MSA_I8_DESC_BASE<"bseli.b", int_mips_bseli_b, MSA128B>; class BSET_B_DESC : MSA_3R_DESC_BASE<"bset.b", int_mips_bset_b, MSA128B>; class BSET_H_DESC : MSA_3R_DESC_BASE<"bset.h", int_mips_bset_h, MSA128H>; @@ -1829,14 +1825,12 @@ class NLZC_D_DESC : MSA_2R_DESC_BASE<"nlzc.d", int_mips_nlzc_d, class NOR_V_DESC : MSA_VEC_DESC_BASE<"nor.v", int_mips_nor_v, MSA128B, MSA128B>; -class NORI_B_DESC : MSA_I8_DESC_BASE<"nori.b", int_mips_nori_b, - MSA128B, MSA128B>; +class NORI_B_DESC : MSA_I8_DESC_BASE<"nori.b", int_mips_nori_b, MSA128B>; class OR_V_DESC : MSA_VEC_DESC_BASE<"or.v", int_mips_or_v, MSA128B, MSA128B>; -class ORI_B_DESC : MSA_I8_DESC_BASE<"ori.b", int_mips_ori_b, - MSA128B, MSA128B>; +class ORI_B_DESC : MSA_I8_DESC_BASE<"ori.b", int_mips_ori_b, MSA128B>; class PCKEV_B_DESC : MSA_3R_DESC_BASE<"pckev.b", int_mips_pckev_b, MSA128B>; class PCKEV_H_DESC : MSA_3R_DESC_BASE<"pckev.h", int_mips_pckev_h, MSA128H>; @@ -1867,12 +1861,9 @@ class SAT_U_H_DESC : MSA_BIT_H_DESC_BASE<"sat_u.h", int_mips_sat_u_h, MSA128H>; class SAT_U_W_DESC : MSA_BIT_W_DESC_BASE<"sat_u.w", int_mips_sat_u_w, MSA128W>; class SAT_U_D_DESC : MSA_BIT_D_DESC_BASE<"sat_u.d", int_mips_sat_u_d, MSA128D>; -class SHF_B_DESC : MSA_I8_DESC_BASE<"shf.b", int_mips_shf_b, - MSA128B, MSA128B>; -class SHF_H_DESC : MSA_I8_DESC_BASE<"shf.h", int_mips_shf_h, - MSA128H, MSA128H>; -class SHF_W_DESC : MSA_I8_DESC_BASE<"shf.w", int_mips_shf_w, - MSA128W, MSA128W>; +class SHF_B_DESC : MSA_I8_DESC_BASE<"shf.b", int_mips_shf_b, MSA128B>; +class SHF_H_DESC : MSA_I8_DESC_BASE<"shf.h", int_mips_shf_h, MSA128H>; +class SHF_W_DESC : MSA_I8_DESC_BASE<"shf.w", int_mips_shf_w, MSA128W>; class SLD_B_DESC : MSA_3R_DESC_BASE<"sld.b", int_mips_sld_b, MSA128B>; class SLD_H_DESC : MSA_3R_DESC_BASE<"sld.h", int_mips_sld_h, MSA128H>; @@ -2030,8 +2021,8 @@ class VSHF_D_DESC : MSA_3R_DESC_BASE<"vshf.d", int_mips_vshf_d, MSA128D>; class XOR_V_DESC : MSA_VEC_DESC_BASE<"xor.v", int_mips_xor_v, MSA128B, MSA128B>; -class XORI_B_DESC : MSA_I8_DESC_BASE<"xori.b", int_mips_xori_b, - MSA128B, MSA128B>; +class XORI_B_DESC : MSA_I8_DESC_BASE<"xori.b", int_mips_xori_b, MSA128B>; + // Instruction defs. def ADD_A_B : ADD_A_B_ENC, ADD_A_B_DESC, Requires<[HasMSA]>; def ADD_A_H : ADD_A_H_ENC, ADD_A_H_DESC, Requires<[HasMSA]>; -- cgit v1.1 From 99d02d13259620d175986bf9c7e1c07b2640163d Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Fri, 6 Sep 2013 12:28:13 +0000 Subject: [mips][msa] Made the operand register sets optional for the 2RF format Their default is to be the same as the result register set. No functional change git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190143 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 98 +++++++++++++++---------------------- 1 file changed, 39 insertions(+), 59 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 8d9b664..3ca83d7 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -850,7 +850,7 @@ class MSA_I10_DESC_BASE { dag OutOperandList = (outs RCWD:$wd); dag InOperandList = (ins RCWS:$ws); @@ -860,7 +860,7 @@ class MSA_2R_DESC_BASE : MSA_2R_DESC_BASE; @@ -1322,9 +1322,9 @@ class FCEQ_D_DESC : MSA_3RF_DESC_BASE<"fceq.d", int_mips_fceq_d, IsCommutable; class FCLASS_W_DESC : MSA_2RF_DESC_BASE<"fclass.w", int_mips_fclass_w, - MSA128W, MSA128W>; + MSA128W>; class FCLASS_D_DESC : MSA_2RF_DESC_BASE<"fclass.d", int_mips_fclass_d, - MSA128D, MSA128D>; + MSA128D>; class FCLE_W_DESC : MSA_3RF_DESC_BASE<"fcle.w", int_mips_fcle_w, MSA128W, MSA128W>; @@ -1411,14 +1411,14 @@ class FEXUPR_D_DESC : MSA_2RF_DESC_BASE<"fexupr.d", int_mips_fexupr_d, MSA128D, MSA128W>; class FFINT_S_W_DESC : MSA_2RF_DESC_BASE<"ffint_s.w", int_mips_ffint_s_w, - MSA128W, MSA128W>; + MSA128W>; class FFINT_S_D_DESC : MSA_2RF_DESC_BASE<"ffint_s.d", int_mips_ffint_s_d, - MSA128D, MSA128D>; + MSA128D>; class FFINT_U_W_DESC : MSA_2RF_DESC_BASE<"ffint_u.w", int_mips_ffint_u_w, - MSA128W, MSA128W>; + MSA128W>; class FFINT_U_D_DESC : MSA_2RF_DESC_BASE<"ffint_u.d", int_mips_ffint_u_d, - MSA128D, MSA128D>; + MSA128D>; class FFQL_W_DESC : MSA_2RF_DESC_BASE<"ffql.w", int_mips_ffql_w, MSA128W, MSA128H>; @@ -1437,10 +1437,8 @@ class FILL_H_DESC : MSA_2R_DESC_BASE<"fill.h", int_mips_fill_h, class FILL_W_DESC : MSA_2R_DESC_BASE<"fill.w", int_mips_fill_w, MSA128W, GPR32>; -class FLOG2_W_DESC : MSA_2RF_DESC_BASE<"flog2.w", int_mips_flog2_w, - MSA128W, MSA128W>; -class FLOG2_D_DESC : MSA_2RF_DESC_BASE<"flog2.d", int_mips_flog2_d, - MSA128D, MSA128D>; +class FLOG2_W_DESC : MSA_2RF_DESC_BASE<"flog2.w", int_mips_flog2_w, MSA128W>; +class FLOG2_D_DESC : MSA_2RF_DESC_BASE<"flog2.d", int_mips_flog2_d, MSA128D>; class FMADD_W_DESC : MSA_3RF_4RF_DESC_BASE<"fmadd.w", int_mips_fmadd_w, MSA128W, MSA128W>; @@ -1477,20 +1475,16 @@ class FMUL_W_DESC : MSA_3RF_DESC_BASE<"fmul.w", int_mips_fmul_w, class FMUL_D_DESC : MSA_3RF_DESC_BASE<"fmul.d", int_mips_fmul_d, MSA128D, MSA128D>; -class FRINT_W_DESC : MSA_2RF_DESC_BASE<"frint.w", int_mips_frint_w, - MSA128W, MSA128W>; -class FRINT_D_DESC : MSA_2RF_DESC_BASE<"frint.d", int_mips_frint_d, - MSA128D, MSA128D>; +class FRINT_W_DESC : MSA_2RF_DESC_BASE<"frint.w", int_mips_frint_w, MSA128W>; +class FRINT_D_DESC : MSA_2RF_DESC_BASE<"frint.d", int_mips_frint_d, MSA128D>; -class FRCP_W_DESC : MSA_2RF_DESC_BASE<"frcp.w", int_mips_frcp_w, - MSA128W, MSA128W>; -class FRCP_D_DESC : MSA_2RF_DESC_BASE<"frcp.d", int_mips_frcp_d, - MSA128D, MSA128D>; +class FRCP_W_DESC : MSA_2RF_DESC_BASE<"frcp.w", int_mips_frcp_w, MSA128W>; +class FRCP_D_DESC : MSA_2RF_DESC_BASE<"frcp.d", int_mips_frcp_d, MSA128D>; class FRSQRT_W_DESC : MSA_2RF_DESC_BASE<"frsqrt.w", int_mips_frsqrt_w, - MSA128W, MSA128W>; + MSA128W>; class FRSQRT_D_DESC : MSA_2RF_DESC_BASE<"frsqrt.d", int_mips_frsqrt_d, - MSA128D, MSA128D>; + MSA128D>; class FSAF_W_DESC : MSA_3RF_DESC_BASE<"fsaf.w", int_mips_fsaf_w, MSA128W, MSA128W>; @@ -1522,10 +1516,8 @@ class FSOR_W_DESC : MSA_3RF_DESC_BASE<"fsor.w", int_mips_fsor_w, class FSOR_D_DESC : MSA_3RF_DESC_BASE<"fsor.d", int_mips_fsor_d, MSA128D, MSA128D>; -class FSQRT_W_DESC : MSA_2RF_DESC_BASE<"fsqrt.w", int_mips_fsqrt_w, - MSA128W, MSA128W>; -class FSQRT_D_DESC : MSA_2RF_DESC_BASE<"fsqrt.d", int_mips_fsqrt_d, - MSA128D, MSA128D>; +class FSQRT_W_DESC : MSA_2RF_DESC_BASE<"fsqrt.w", int_mips_fsqrt_w, MSA128W>; +class FSQRT_D_DESC : MSA_2RF_DESC_BASE<"fsqrt.d", int_mips_fsqrt_d, MSA128D>; class FSUB_W_DESC : MSA_3RF_DESC_BASE<"fsub.w", int_mips_fsub_w, MSA128W, MSA128W>; @@ -1558,24 +1550,24 @@ class FSUNE_D_DESC : MSA_3RF_DESC_BASE<"fsune.d", int_mips_fsune_d, MSA128D, MSA128D>; class FTRUNC_S_W_DESC : MSA_2RF_DESC_BASE<"ftrunc_s.w", int_mips_ftrunc_s_w, - MSA128W, MSA128W>; + MSA128W>; class FTRUNC_S_D_DESC : MSA_2RF_DESC_BASE<"ftrunc_s.d", int_mips_ftrunc_s_d, - MSA128D, MSA128D>; + MSA128D>; class FTRUNC_U_W_DESC : MSA_2RF_DESC_BASE<"ftrunc_u.w", int_mips_ftrunc_u_w, - MSA128W, MSA128W>; + MSA128W>; class FTRUNC_U_D_DESC : MSA_2RF_DESC_BASE<"ftrunc_u.d", int_mips_ftrunc_u_d, - MSA128D, MSA128D>; + MSA128D>; class FTINT_S_W_DESC : MSA_2RF_DESC_BASE<"ftint_s.w", int_mips_ftint_s_w, - MSA128W, MSA128W>; + MSA128W>; class FTINT_S_D_DESC : MSA_2RF_DESC_BASE<"ftint_s.d", int_mips_ftint_s_d, - MSA128D, MSA128D>; + MSA128D>; class FTINT_U_W_DESC : MSA_2RF_DESC_BASE<"ftint_u.w", int_mips_ftint_u_w, - MSA128W, MSA128W>; + MSA128W>; class FTINT_U_D_DESC : MSA_2RF_DESC_BASE<"ftint_u.d", int_mips_ftint_u_d, - MSA128D, MSA128D>; + MSA128D>; class FTQ_H_DESC : MSA_3RF_DESC_BASE<"ftq.h", int_mips_ftq_h, MSA128H, MSA128W>; @@ -1804,23 +1796,15 @@ class MULV_H_DESC : MSA_3R_DESC_BASE<"mulv.h", int_mips_mulv_h, MSA128H>; class MULV_W_DESC : MSA_3R_DESC_BASE<"mulv.w", int_mips_mulv_w, MSA128W>; class MULV_D_DESC : MSA_3R_DESC_BASE<"mulv.d", int_mips_mulv_d, MSA128D>; -class NLOC_B_DESC : MSA_2R_DESC_BASE<"nloc.b", int_mips_nloc_b, - MSA128B, MSA128B>; -class NLOC_H_DESC : MSA_2R_DESC_BASE<"nloc.h", int_mips_nloc_h, - MSA128H, MSA128H>; -class NLOC_W_DESC : MSA_2R_DESC_BASE<"nloc.w", int_mips_nloc_w, - MSA128W, MSA128W>; -class NLOC_D_DESC : MSA_2R_DESC_BASE<"nloc.d", int_mips_nloc_d, - MSA128D, MSA128D>; - -class NLZC_B_DESC : MSA_2R_DESC_BASE<"nlzc.b", int_mips_nlzc_b, - MSA128B, MSA128B>; -class NLZC_H_DESC : MSA_2R_DESC_BASE<"nlzc.h", int_mips_nlzc_h, - MSA128H, MSA128H>; -class NLZC_W_DESC : MSA_2R_DESC_BASE<"nlzc.w", int_mips_nlzc_w, - MSA128W, MSA128W>; -class NLZC_D_DESC : MSA_2R_DESC_BASE<"nlzc.d", int_mips_nlzc_d, - MSA128D, MSA128D>; +class NLOC_B_DESC : MSA_2R_DESC_BASE<"nloc.b", int_mips_nloc_b, MSA128B>; +class NLOC_H_DESC : MSA_2R_DESC_BASE<"nloc.h", int_mips_nloc_h, MSA128H>; +class NLOC_W_DESC : MSA_2R_DESC_BASE<"nloc.w", int_mips_nloc_w, MSA128W>; +class NLOC_D_DESC : MSA_2R_DESC_BASE<"nloc.d", int_mips_nloc_d, MSA128D>; + +class NLZC_B_DESC : MSA_2R_DESC_BASE<"nlzc.b", int_mips_nlzc_b, MSA128B>; +class NLZC_H_DESC : MSA_2R_DESC_BASE<"nlzc.h", int_mips_nlzc_h, MSA128H>; +class NLZC_W_DESC : MSA_2R_DESC_BASE<"nlzc.w", int_mips_nlzc_w, MSA128W>; +class NLZC_D_DESC : MSA_2R_DESC_BASE<"nlzc.d", int_mips_nlzc_d, MSA128D>; class NOR_V_DESC : MSA_VEC_DESC_BASE<"nor.v", int_mips_nor_v, MSA128B, MSA128B>; @@ -1842,14 +1826,10 @@ class PCKOD_H_DESC : MSA_3R_DESC_BASE<"pckod.h", int_mips_pckod_h, MSA128H>; class PCKOD_W_DESC : MSA_3R_DESC_BASE<"pckod.w", int_mips_pckod_w, MSA128W>; class PCKOD_D_DESC : MSA_3R_DESC_BASE<"pckod.d", int_mips_pckod_d, MSA128D>; -class PCNT_B_DESC : MSA_2R_DESC_BASE<"pcnt.b", int_mips_pcnt_b, - MSA128B, MSA128B>; -class PCNT_H_DESC : MSA_2R_DESC_BASE<"pcnt.h", int_mips_pcnt_h, - MSA128H, MSA128H>; -class PCNT_W_DESC : MSA_2R_DESC_BASE<"pcnt.w", int_mips_pcnt_w, - MSA128W, MSA128W>; -class PCNT_D_DESC : MSA_2R_DESC_BASE<"pcnt.d", int_mips_pcnt_d, - MSA128D, MSA128D>; +class PCNT_B_DESC : MSA_2R_DESC_BASE<"pcnt.b", int_mips_pcnt_b, MSA128B>; +class PCNT_H_DESC : MSA_2R_DESC_BASE<"pcnt.h", int_mips_pcnt_h, MSA128H>; +class PCNT_W_DESC : MSA_2R_DESC_BASE<"pcnt.w", int_mips_pcnt_w, MSA128W>; +class PCNT_D_DESC : MSA_2R_DESC_BASE<"pcnt.d", int_mips_pcnt_d, MSA128D>; class SAT_S_B_DESC : MSA_BIT_B_DESC_BASE<"sat_s.b", int_mips_sat_s_b, MSA128B>; class SAT_S_H_DESC : MSA_BIT_H_DESC_BASE<"sat_s.h", int_mips_sat_s_h, MSA128H>; -- cgit v1.1 From a674463aac1d0b5d039da11045ccfab5e849b886 Mon Sep 17 00:00:00 2001 From: Vladimir Medic Date: Fri, 6 Sep 2013 12:30:36 +0000 Subject: This patch adds support for microMIPS disassembler and disassembler make check tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190144 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Disassembler/MipsDisassembler.cpp | 87 ++++++++++++++++++++--- lib/Target/Mips/MicroMipsInstrInfo.td | 23 +++--- 2 files changed, 93 insertions(+), 17 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index 6e12a5d..e2b3fa4 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -53,12 +53,15 @@ protected: /// MipsDisassembler - a disasembler class for Mips32. class MipsDisassembler : public MipsDisassemblerBase { + bool IsMicroMips; public: /// Constructor - Initializes the disassembler. /// MipsDisassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info, bool bigEndian) : - MipsDisassemblerBase(STI, Info, bigEndian) {} + MipsDisassemblerBase(STI, Info, bigEndian) { + IsMicroMips = STI.getFeatureBits() & Mips::FeatureMicroMips; + } /// getInstruction - See MCDisassembler. virtual DecodeStatus getInstruction(MCInst &instr, @@ -182,6 +185,16 @@ static DecodeStatus DecodeMem(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeMemMMImm12(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeMemMMImm16(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + static DecodeStatus DecodeFMem(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); @@ -251,7 +264,8 @@ static DecodeStatus readInstruction32(const MemoryObject ®ion, uint64_t address, uint64_t &size, uint32_t &insn, - bool isBigEndian) { + bool isBigEndian, + bool IsMicroMips) { uint8_t Bytes[4]; // We want to read exactly 4 Bytes of data. @@ -269,10 +283,20 @@ static DecodeStatus readInstruction32(const MemoryObject ®ion, } else { // Encoded as a small-endian 32-bit word in the stream. - insn = (Bytes[0] << 0) | - (Bytes[1] << 8) | - (Bytes[2] << 16) | - (Bytes[3] << 24); + // Little-endian byte ordering: + // mips32r2: 4 | 3 | 2 | 1 + // microMIPS: 2 | 1 | 4 | 3 + if (IsMicroMips) { + insn = (Bytes[2] << 0) | + (Bytes[3] << 8) | + (Bytes[0] << 16) | + (Bytes[1] << 24); + } else { + insn = (Bytes[0] << 0) | + (Bytes[1] << 8) | + (Bytes[2] << 16) | + (Bytes[3] << 24); + } } return MCDisassembler::Success; @@ -288,10 +312,21 @@ MipsDisassembler::getInstruction(MCInst &instr, uint32_t Insn; DecodeStatus Result = readInstruction32(Region, Address, Size, - Insn, isBigEndian); + Insn, isBigEndian, IsMicroMips); if (Result == MCDisassembler::Fail) return MCDisassembler::Fail; + if (IsMicroMips) { + // Calling the auto-generated decoder function. + Result = decodeInstruction(DecoderTableMicroMips32, instr, Insn, Address, + this, STI); + if (Result != MCDisassembler::Fail) { + Size = 4; + return Result; + } + return MCDisassembler::Fail; + } + // Calling the auto-generated decoder function. Result = decodeInstruction(DecoderTableMips32, instr, Insn, Address, this, STI); @@ -313,7 +348,7 @@ Mips64Disassembler::getInstruction(MCInst &instr, uint32_t Insn; DecodeStatus Result = readInstruction32(Region, Address, Size, - Insn, isBigEndian); + Insn, isBigEndian, false); if (Result == MCDisassembler::Fail) return MCDisassembler::Fail; @@ -470,6 +505,42 @@ static DecodeStatus DecodeMem(MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeMemMMImm12(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + int Offset = SignExtend32<12>(Insn & 0x0fff); + unsigned Reg = fieldFromInstruction(Insn, 21, 5); + unsigned Base = fieldFromInstruction(Insn, 16, 5); + + Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg); + Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + + Inst.addOperand(MCOperand::CreateReg(Reg)); + Inst.addOperand(MCOperand::CreateReg(Base)); + Inst.addOperand(MCOperand::CreateImm(Offset)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeMemMMImm16(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + int Offset = SignExtend32<16>(Insn & 0xffff); + unsigned Reg = fieldFromInstruction(Insn, 21, 5); + unsigned Base = fieldFromInstruction(Insn, 16, 5); + + Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg); + Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + + Inst.addOperand(MCOperand::CreateReg(Reg)); + Inst.addOperand(MCOperand::CreateReg(Base)); + Inst.addOperand(MCOperand::CreateImm(Offset)); + + return MCDisassembler::Success; +} + static DecodeStatus DecodeFMem(MCInst &Inst, unsigned Insn, uint64_t Address, diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td index 2cc5555..273d3cc 100644 --- a/lib/Target/Mips/MicroMipsInstrInfo.td +++ b/lib/Target/Mips/MicroMipsInstrInfo.td @@ -19,6 +19,7 @@ class LoadLeftRightMM { + let DecoderMethod = "DecodeMemMMImm12"; string Constraints = "$src = $rt"; } @@ -26,7 +27,9 @@ class StoreLeftRightMM: InstSE<(outs), (ins RO:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), - [(OpNode RO:$rt, addrimm12:$addr)], NoItinerary, FrmI>; + [(OpNode RO:$rt, addrimm12:$addr)], NoItinerary, FrmI> { + let DecoderMethod = "DecodeMemMMImm12"; +} let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { /// Arithmetic Instructions (ALU Immediate) @@ -86,14 +89,16 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { SRLV_FM_MM<0xd0, 0>; /// Load and Store Instructions - aligned - def LB_MM : Load<"lb", GPR32Opnd, sextloadi8>, MMRel, LW_FM_MM<0x7>; - def LBu_MM : Load<"lbu", GPR32Opnd, zextloadi8>, MMRel, LW_FM_MM<0x5>; - def LH_MM : Load<"lh", GPR32Opnd, sextloadi16>, MMRel, LW_FM_MM<0xf>; - def LHu_MM : Load<"lhu", GPR32Opnd, zextloadi16>, MMRel, LW_FM_MM<0xd>; - def LW_MM : Load<"lw", GPR32Opnd>, MMRel, LW_FM_MM<0x3f>; - def SB_MM : Store<"sb", GPR32Opnd, truncstorei8>, MMRel, LW_FM_MM<0x6>; - def SH_MM : Store<"sh", GPR32Opnd, truncstorei16>, MMRel, LW_FM_MM<0xe>; - def SW_MM : Store<"sw", GPR32Opnd>, MMRel, LW_FM_MM<0x3e>; + let DecoderMethod = "DecodeMemMMImm16" in { + def LB_MM : Load<"lb", GPR32Opnd>, MMRel, LW_FM_MM<0x7>; + def LBu_MM : Load<"lbu", GPR32Opnd>, MMRel, LW_FM_MM<0x5>; + def LH_MM : Load<"lh", GPR32Opnd>, MMRel, LW_FM_MM<0xf>; + def LHu_MM : Load<"lhu", GPR32Opnd>, MMRel, LW_FM_MM<0xd>; + def LW_MM : Load<"lw", GPR32Opnd>, MMRel, LW_FM_MM<0x3f>; + def SB_MM : Store<"sb", GPR32Opnd>, MMRel, LW_FM_MM<0x6>; + def SH_MM : Store<"sh", GPR32Opnd>, MMRel, LW_FM_MM<0xe>; + def SW_MM : Store<"sw", GPR32Opnd>, MMRel, LW_FM_MM<0x3e>; + } /// Load and Store Instructions - unaligned def LWL_MM : LoadLeftRightMM<"lwl", MipsLWL, GPR32Opnd, mem_mm_12>, -- cgit v1.1 From 6976d75415fdbdcd4cef52e0c9a20000dbb17db7 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Fri, 6 Sep 2013 12:30:43 +0000 Subject: [mips][msa] Made the operand register sets optional for the 3R_4R format Their default is to be the same as the result register set. No functional change git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190145 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 52 ++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 30 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 3ca83d7..5d71fbf 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -877,8 +877,8 @@ class MSA_3R_DESC_BASE { dag OutOperandList = (outs RCWD:$wd); dag InOperandList = (ins RCWD:$wd_in, RCWS:$ws, RCWT:$wt); @@ -1267,38 +1267,38 @@ class DOTP_U_D_DESC : MSA_3R_DESC_BASE<"dotp_u.d", int_mips_dotp_u_d, MSA128D>, IsCommutable; class DPADD_S_H_DESC : MSA_3R_4R_DESC_BASE<"dpadd_s.h", int_mips_dpadd_s_h, - MSA128H, MSA128B>, + MSA128H, MSA128B, MSA128B>, IsCommutable; class DPADD_S_W_DESC : MSA_3R_4R_DESC_BASE<"dpadd_s.w", int_mips_dpadd_s_w, - MSA128W, MSA128H>, + MSA128W, MSA128H, MSA128H>, IsCommutable; class DPADD_S_D_DESC : MSA_3R_4R_DESC_BASE<"dpadd_s.d", int_mips_dpadd_s_d, - MSA128D, MSA128W>, + MSA128D, MSA128W, MSA128W>, IsCommutable; class DPADD_U_H_DESC : MSA_3R_4R_DESC_BASE<"dpadd_u.h", int_mips_dpadd_u_h, - MSA128H, MSA128B>, + MSA128H, MSA128B, MSA128B>, IsCommutable; class DPADD_U_W_DESC : MSA_3R_4R_DESC_BASE<"dpadd_u.w", int_mips_dpadd_u_w, - MSA128W, MSA128H>, + MSA128W, MSA128H, MSA128H>, IsCommutable; class DPADD_U_D_DESC : MSA_3R_4R_DESC_BASE<"dpadd_u.d", int_mips_dpadd_u_d, - MSA128D, MSA128W>, + MSA128D, MSA128W, MSA128W>, IsCommutable; class DPSUB_S_H_DESC : MSA_3R_4R_DESC_BASE<"dpsub_s.h", int_mips_dpsub_s_h, - MSA128H, MSA128B>; + MSA128H, MSA128B, MSA128B>; class DPSUB_S_W_DESC : MSA_3R_4R_DESC_BASE<"dpsub_s.w", int_mips_dpsub_s_w, - MSA128W, MSA128H>; + MSA128W, MSA128H, MSA128H>; class DPSUB_S_D_DESC : MSA_3R_4R_DESC_BASE<"dpsub_s.d", int_mips_dpsub_s_d, - MSA128D, MSA128W>; + MSA128D, MSA128W, MSA128W>; class DPSUB_U_H_DESC : MSA_3R_4R_DESC_BASE<"dpsub_u.h", int_mips_dpsub_u_h, - MSA128H, MSA128B>; + MSA128H, MSA128B, MSA128B>; class DPSUB_U_W_DESC : MSA_3R_4R_DESC_BASE<"dpsub_u.w", int_mips_dpsub_u_w, - MSA128W, MSA128H>; + MSA128W, MSA128H, MSA128H>; class DPSUB_U_D_DESC : MSA_3R_4R_DESC_BASE<"dpsub_u.d", int_mips_dpsub_u_d, - MSA128D, MSA128W>; + MSA128D, MSA128W, MSA128W>; class FADD_W_DESC : MSA_3RF_DESC_BASE<"fadd.w", int_mips_fadd_w, MSA128W, MSA128W>, @@ -1685,14 +1685,10 @@ class MADDR_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"maddr_q.h", int_mips_maddr_q_h, class MADDR_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"maddr_q.w", int_mips_maddr_q_w, MSA128W, MSA128W>; -class MADDV_B_DESC : MSA_3R_4R_DESC_BASE<"maddv.b", int_mips_maddv_b, - MSA128B, MSA128B>; -class MADDV_H_DESC : MSA_3R_4R_DESC_BASE<"maddv.h", int_mips_maddv_h, - MSA128H, MSA128H>; -class MADDV_W_DESC : MSA_3R_4R_DESC_BASE<"maddv.w", int_mips_maddv_w, - MSA128W, MSA128W>; -class MADDV_D_DESC : MSA_3R_4R_DESC_BASE<"maddv.d", int_mips_maddv_d, - MSA128D, MSA128D>; +class MADDV_B_DESC : MSA_3R_4R_DESC_BASE<"maddv.b", int_mips_maddv_b, MSA128B>; +class MADDV_H_DESC : MSA_3R_4R_DESC_BASE<"maddv.h", int_mips_maddv_h, MSA128H>; +class MADDV_W_DESC : MSA_3R_4R_DESC_BASE<"maddv.w", int_mips_maddv_w, MSA128W>; +class MADDV_D_DESC : MSA_3R_4R_DESC_BASE<"maddv.d", int_mips_maddv_d, MSA128D>; class MAX_A_B_DESC : MSA_3R_DESC_BASE<"max_a.b", int_mips_max_a_b, MSA128B>; class MAX_A_H_DESC : MSA_3R_DESC_BASE<"max_a.h", int_mips_max_a_h, MSA128H>; @@ -1772,14 +1768,10 @@ class MSUBR_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"msubr_q.h", int_mips_msubr_q_h, class MSUBR_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"msubr_q.w", int_mips_msubr_q_w, MSA128W, MSA128W>; -class MSUBV_B_DESC : MSA_3R_4R_DESC_BASE<"msubv.b", int_mips_msubv_b, - MSA128B, MSA128B>; -class MSUBV_H_DESC : MSA_3R_4R_DESC_BASE<"msubv.h", int_mips_msubv_h, - MSA128H, MSA128H>; -class MSUBV_W_DESC : MSA_3R_4R_DESC_BASE<"msubv.w", int_mips_msubv_w, - MSA128W, MSA128W>; -class MSUBV_D_DESC : MSA_3R_4R_DESC_BASE<"msubv.d", int_mips_msubv_d, - MSA128D, MSA128D>; +class MSUBV_B_DESC : MSA_3R_4R_DESC_BASE<"msubv.b", int_mips_msubv_b, MSA128B>; +class MSUBV_H_DESC : MSA_3R_4R_DESC_BASE<"msubv.h", int_mips_msubv_h, MSA128H>; +class MSUBV_W_DESC : MSA_3R_4R_DESC_BASE<"msubv.w", int_mips_msubv_w, MSA128W>; +class MSUBV_D_DESC : MSA_3R_4R_DESC_BASE<"msubv.d", int_mips_msubv_d, MSA128D>; class MUL_Q_H_DESC : MSA_3RF_DESC_BASE<"mul_q.h", int_mips_mul_q_h, MSA128H, MSA128H>; -- cgit v1.1 From 888497d8a2927ddab38667d54d574c3cadeef1e5 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Fri, 6 Sep 2013 12:32:57 +0000 Subject: [mips][msa] Made the operand register sets optional for the 3RF formats Their default is to be the same as the result register set. No functional change git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190146 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 204 +++++++++++++----------------------- 1 file changed, 72 insertions(+), 132 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 5d71fbf..8e99669 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -890,8 +890,8 @@ class MSA_3R_4R_DESC_BASE : MSA_3R_DESC_BASE; @@ -1300,25 +1300,19 @@ class DPSUB_U_W_DESC : MSA_3R_4R_DESC_BASE<"dpsub_u.w", int_mips_dpsub_u_w, class DPSUB_U_D_DESC : MSA_3R_4R_DESC_BASE<"dpsub_u.d", int_mips_dpsub_u_d, MSA128D, MSA128W, MSA128W>; -class FADD_W_DESC : MSA_3RF_DESC_BASE<"fadd.w", int_mips_fadd_w, - MSA128W, MSA128W>, +class FADD_W_DESC : MSA_3RF_DESC_BASE<"fadd.w", int_mips_fadd_w, MSA128W>, IsCommutable; -class FADD_D_DESC : MSA_3RF_DESC_BASE<"fadd.d", int_mips_fadd_d, - MSA128D, MSA128D>, +class FADD_D_DESC : MSA_3RF_DESC_BASE<"fadd.d", int_mips_fadd_d, MSA128D>, IsCommutable; -class FCAF_W_DESC : MSA_3RF_DESC_BASE<"fcaf.w", int_mips_fcaf_w, - MSA128W, MSA128W>, +class FCAF_W_DESC : MSA_3RF_DESC_BASE<"fcaf.w", int_mips_fcaf_w, MSA128W>, IsCommutable; -class FCAF_D_DESC : MSA_3RF_DESC_BASE<"fcaf.d", int_mips_fcaf_d, - MSA128D, MSA128D>, +class FCAF_D_DESC : MSA_3RF_DESC_BASE<"fcaf.d", int_mips_fcaf_d, MSA128D>, IsCommutable; -class FCEQ_W_DESC : MSA_3RF_DESC_BASE<"fceq.w", int_mips_fceq_w, - MSA128W, MSA128W>, +class FCEQ_W_DESC : MSA_3RF_DESC_BASE<"fceq.w", int_mips_fceq_w, MSA128W>, IsCommutable; -class FCEQ_D_DESC : MSA_3RF_DESC_BASE<"fceq.d", int_mips_fceq_d, - MSA128D, MSA128D>, +class FCEQ_D_DESC : MSA_3RF_DESC_BASE<"fceq.d", int_mips_fceq_d, MSA128D>, IsCommutable; class FCLASS_W_DESC : MSA_2RF_DESC_BASE<"fclass.w", int_mips_fclass_w, @@ -1326,79 +1320,57 @@ class FCLASS_W_DESC : MSA_2RF_DESC_BASE<"fclass.w", int_mips_fclass_w, class FCLASS_D_DESC : MSA_2RF_DESC_BASE<"fclass.d", int_mips_fclass_d, MSA128D>; -class FCLE_W_DESC : MSA_3RF_DESC_BASE<"fcle.w", int_mips_fcle_w, - MSA128W, MSA128W>; -class FCLE_D_DESC : MSA_3RF_DESC_BASE<"fcle.d", int_mips_fcle_d, - MSA128D, MSA128D>; +class FCLE_W_DESC : MSA_3RF_DESC_BASE<"fcle.w", int_mips_fcle_w, MSA128W>; +class FCLE_D_DESC : MSA_3RF_DESC_BASE<"fcle.d", int_mips_fcle_d, MSA128D>; -class FCLT_W_DESC : MSA_3RF_DESC_BASE<"fclt.w", int_mips_fclt_w, - MSA128W, MSA128W>; -class FCLT_D_DESC : MSA_3RF_DESC_BASE<"fclt.d", int_mips_fclt_d, - MSA128D, MSA128D>; +class FCLT_W_DESC : MSA_3RF_DESC_BASE<"fclt.w", int_mips_fclt_w, MSA128W>; +class FCLT_D_DESC : MSA_3RF_DESC_BASE<"fclt.d", int_mips_fclt_d, MSA128D>; -class FCNE_W_DESC : MSA_3RF_DESC_BASE<"fcne.w", int_mips_fcne_w, - MSA128W, MSA128W>, +class FCNE_W_DESC : MSA_3RF_DESC_BASE<"fcne.w", int_mips_fcne_w, MSA128W>, IsCommutable; -class FCNE_D_DESC : MSA_3RF_DESC_BASE<"fcne.d", int_mips_fcne_d, - MSA128D, MSA128D>, +class FCNE_D_DESC : MSA_3RF_DESC_BASE<"fcne.d", int_mips_fcne_d, MSA128D>, IsCommutable; -class FCOR_W_DESC : MSA_3RF_DESC_BASE<"fcor.w", int_mips_fcor_w, - MSA128W, MSA128W>, +class FCOR_W_DESC : MSA_3RF_DESC_BASE<"fcor.w", int_mips_fcor_w, MSA128W>, IsCommutable; -class FCOR_D_DESC : MSA_3RF_DESC_BASE<"fcor.d", int_mips_fcor_d, - MSA128D, MSA128D>, +class FCOR_D_DESC : MSA_3RF_DESC_BASE<"fcor.d", int_mips_fcor_d, MSA128D>, IsCommutable; -class FCUEQ_W_DESC : MSA_3RF_DESC_BASE<"fcueq.w", int_mips_fcueq_w, - MSA128W, MSA128W>, +class FCUEQ_W_DESC : MSA_3RF_DESC_BASE<"fcueq.w", int_mips_fcueq_w, MSA128W>, IsCommutable; -class FCUEQ_D_DESC : MSA_3RF_DESC_BASE<"fcueq.d", int_mips_fcueq_d, - MSA128D, MSA128D>, +class FCUEQ_D_DESC : MSA_3RF_DESC_BASE<"fcueq.d", int_mips_fcueq_d, MSA128D>, IsCommutable; -class FCULE_W_DESC : MSA_3RF_DESC_BASE<"fcule.w", int_mips_fcule_w, - MSA128W, MSA128W>, +class FCULE_W_DESC : MSA_3RF_DESC_BASE<"fcule.w", int_mips_fcule_w, MSA128W>, IsCommutable; -class FCULE_D_DESC : MSA_3RF_DESC_BASE<"fcule.d", int_mips_fcule_d, - MSA128D, MSA128D>, +class FCULE_D_DESC : MSA_3RF_DESC_BASE<"fcule.d", int_mips_fcule_d, MSA128D>, IsCommutable; -class FCULT_W_DESC : MSA_3RF_DESC_BASE<"fcult.w", int_mips_fcult_w, - MSA128W, MSA128W>, +class FCULT_W_DESC : MSA_3RF_DESC_BASE<"fcult.w", int_mips_fcult_w, MSA128W>, IsCommutable; -class FCULT_D_DESC : MSA_3RF_DESC_BASE<"fcult.d", int_mips_fcult_d, - MSA128D, MSA128D>, +class FCULT_D_DESC : MSA_3RF_DESC_BASE<"fcult.d", int_mips_fcult_d, MSA128D>, IsCommutable; -class FCUN_W_DESC : MSA_3RF_DESC_BASE<"fcun.w", int_mips_fcun_w, - MSA128W, MSA128W>, +class FCUN_W_DESC : MSA_3RF_DESC_BASE<"fcun.w", int_mips_fcun_w, MSA128W>, IsCommutable; -class FCUN_D_DESC : MSA_3RF_DESC_BASE<"fcun.d", int_mips_fcun_d, - MSA128D, MSA128D>, +class FCUN_D_DESC : MSA_3RF_DESC_BASE<"fcun.d", int_mips_fcun_d, MSA128D>, IsCommutable; -class FCUNE_W_DESC : MSA_3RF_DESC_BASE<"fcune.w", int_mips_fcune_w, - MSA128W, MSA128W>, +class FCUNE_W_DESC : MSA_3RF_DESC_BASE<"fcune.w", int_mips_fcune_w, MSA128W>, IsCommutable; -class FCUNE_D_DESC : MSA_3RF_DESC_BASE<"fcune.d", int_mips_fcune_d, - MSA128D, MSA128D>, +class FCUNE_D_DESC : MSA_3RF_DESC_BASE<"fcune.d", int_mips_fcune_d, MSA128D>, IsCommutable; -class FDIV_W_DESC : MSA_3RF_DESC_BASE<"fdiv.w", int_mips_fdiv_w, - MSA128W, MSA128W>; -class FDIV_D_DESC : MSA_3RF_DESC_BASE<"fdiv.d", int_mips_fdiv_d, - MSA128D, MSA128D>; +class FDIV_W_DESC : MSA_3RF_DESC_BASE<"fdiv.w", int_mips_fdiv_w, MSA128W>; +class FDIV_D_DESC : MSA_3RF_DESC_BASE<"fdiv.d", int_mips_fdiv_d, MSA128D>; class FEXDO_H_DESC : MSA_3RF_DESC_BASE<"fexdo.h", int_mips_fexdo_h, - MSA128H, MSA128W>; + MSA128H, MSA128W, MSA128W>; class FEXDO_W_DESC : MSA_3RF_DESC_BASE<"fexdo.w", int_mips_fexdo_w, - MSA128W, MSA128D>; + MSA128W, MSA128D, MSA128D>; -class FEXP2_W_DESC : MSA_3RF_DESC_BASE<"fexp2.w", int_mips_fexp2_w, - MSA128W, MSA128W>; -class FEXP2_D_DESC : MSA_3RF_DESC_BASE<"fexp2.d", int_mips_fexp2_d, - MSA128D, MSA128D>; +class FEXP2_W_DESC : MSA_3RF_DESC_BASE<"fexp2.w", int_mips_fexp2_w, MSA128W>; +class FEXP2_D_DESC : MSA_3RF_DESC_BASE<"fexp2.d", int_mips_fexp2_d, MSA128D>; class FEXUPL_W_DESC : MSA_2RF_DESC_BASE<"fexupl.w", int_mips_fexupl_w, MSA128W, MSA128H>; @@ -1445,35 +1417,29 @@ class FMADD_W_DESC : MSA_3RF_4RF_DESC_BASE<"fmadd.w", int_mips_fmadd_w, class FMADD_D_DESC : MSA_3RF_4RF_DESC_BASE<"fmadd.d", int_mips_fmadd_d, MSA128D, MSA128D>; -class FMAX_W_DESC : MSA_3RF_DESC_BASE<"fmax.w", int_mips_fmax_w, - MSA128W, MSA128W>; -class FMAX_D_DESC : MSA_3RF_DESC_BASE<"fmax.d", int_mips_fmax_d, - MSA128D, MSA128D>; +class FMAX_W_DESC : MSA_3RF_DESC_BASE<"fmax.w", int_mips_fmax_w, MSA128W>; +class FMAX_D_DESC : MSA_3RF_DESC_BASE<"fmax.d", int_mips_fmax_d, MSA128D>; class FMAX_A_W_DESC : MSA_3RF_DESC_BASE<"fmax_a.w", int_mips_fmax_a_w, - MSA128W, MSA128W>; + MSA128W>; class FMAX_A_D_DESC : MSA_3RF_DESC_BASE<"fmax_a.d", int_mips_fmax_a_d, - MSA128D, MSA128D>; + MSA128D>; -class FMIN_W_DESC : MSA_3RF_DESC_BASE<"fmin.w", int_mips_fmin_w, - MSA128W, MSA128W>; -class FMIN_D_DESC : MSA_3RF_DESC_BASE<"fmin.d", int_mips_fmin_d, - MSA128D, MSA128D>; +class FMIN_W_DESC : MSA_3RF_DESC_BASE<"fmin.w", int_mips_fmin_w, MSA128W>; +class FMIN_D_DESC : MSA_3RF_DESC_BASE<"fmin.d", int_mips_fmin_d, MSA128D>; class FMIN_A_W_DESC : MSA_3RF_DESC_BASE<"fmin_a.w", int_mips_fmin_a_w, - MSA128W, MSA128W>; + MSA128W>; class FMIN_A_D_DESC : MSA_3RF_DESC_BASE<"fmin_a.d", int_mips_fmin_a_d, - MSA128D, MSA128D>; + MSA128D>; class FMSUB_W_DESC : MSA_3RF_4RF_DESC_BASE<"fmsub.w", int_mips_fmsub_w, MSA128W, MSA128W>; class FMSUB_D_DESC : MSA_3RF_4RF_DESC_BASE<"fmsub.d", int_mips_fmsub_d, MSA128D, MSA128D>; -class FMUL_W_DESC : MSA_3RF_DESC_BASE<"fmul.w", int_mips_fmul_w, - MSA128W, MSA128W>; -class FMUL_D_DESC : MSA_3RF_DESC_BASE<"fmul.d", int_mips_fmul_d, - MSA128D, MSA128D>; +class FMUL_W_DESC : MSA_3RF_DESC_BASE<"fmul.w", int_mips_fmul_w, MSA128W>; +class FMUL_D_DESC : MSA_3RF_DESC_BASE<"fmul.d", int_mips_fmul_d, MSA128D>; class FRINT_W_DESC : MSA_2RF_DESC_BASE<"frint.w", int_mips_frint_w, MSA128W>; class FRINT_D_DESC : MSA_2RF_DESC_BASE<"frint.d", int_mips_frint_d, MSA128D>; @@ -1486,68 +1452,44 @@ class FRSQRT_W_DESC : MSA_2RF_DESC_BASE<"frsqrt.w", int_mips_frsqrt_w, class FRSQRT_D_DESC : MSA_2RF_DESC_BASE<"frsqrt.d", int_mips_frsqrt_d, MSA128D>; -class FSAF_W_DESC : MSA_3RF_DESC_BASE<"fsaf.w", int_mips_fsaf_w, - MSA128W, MSA128W>; -class FSAF_D_DESC : MSA_3RF_DESC_BASE<"fsaf.d", int_mips_fsaf_d, - MSA128D, MSA128D>; +class FSAF_W_DESC : MSA_3RF_DESC_BASE<"fsaf.w", int_mips_fsaf_w, MSA128W>; +class FSAF_D_DESC : MSA_3RF_DESC_BASE<"fsaf.d", int_mips_fsaf_d, MSA128D>; -class FSEQ_W_DESC : MSA_3RF_DESC_BASE<"fseq.w", int_mips_fseq_w, - MSA128W, MSA128W>; -class FSEQ_D_DESC : MSA_3RF_DESC_BASE<"fseq.d", int_mips_fseq_d, - MSA128D, MSA128D>; +class FSEQ_W_DESC : MSA_3RF_DESC_BASE<"fseq.w", int_mips_fseq_w, MSA128W>; +class FSEQ_D_DESC : MSA_3RF_DESC_BASE<"fseq.d", int_mips_fseq_d, MSA128D>; -class FSLE_W_DESC : MSA_3RF_DESC_BASE<"fsle.w", int_mips_fsle_w, - MSA128W, MSA128W>; -class FSLE_D_DESC : MSA_3RF_DESC_BASE<"fsle.d", int_mips_fsle_d, - MSA128D, MSA128D>; +class FSLE_W_DESC : MSA_3RF_DESC_BASE<"fsle.w", int_mips_fsle_w, MSA128W>; +class FSLE_D_DESC : MSA_3RF_DESC_BASE<"fsle.d", int_mips_fsle_d, MSA128D>; -class FSLT_W_DESC : MSA_3RF_DESC_BASE<"fslt.w", int_mips_fslt_w, - MSA128W, MSA128W>; -class FSLT_D_DESC : MSA_3RF_DESC_BASE<"fslt.d", int_mips_fslt_d, - MSA128D, MSA128D>; +class FSLT_W_DESC : MSA_3RF_DESC_BASE<"fslt.w", int_mips_fslt_w, MSA128W>; +class FSLT_D_DESC : MSA_3RF_DESC_BASE<"fslt.d", int_mips_fslt_d, MSA128D>; -class FSNE_W_DESC : MSA_3RF_DESC_BASE<"fsne.w", int_mips_fsne_w, - MSA128W, MSA128W>; -class FSNE_D_DESC : MSA_3RF_DESC_BASE<"fsne.d", int_mips_fsne_d, - MSA128D, MSA128D>; +class FSNE_W_DESC : MSA_3RF_DESC_BASE<"fsne.w", int_mips_fsne_w, MSA128W>; +class FSNE_D_DESC : MSA_3RF_DESC_BASE<"fsne.d", int_mips_fsne_d, MSA128D>; -class FSOR_W_DESC : MSA_3RF_DESC_BASE<"fsor.w", int_mips_fsor_w, - MSA128W, MSA128W>; -class FSOR_D_DESC : MSA_3RF_DESC_BASE<"fsor.d", int_mips_fsor_d, - MSA128D, MSA128D>; +class FSOR_W_DESC : MSA_3RF_DESC_BASE<"fsor.w", int_mips_fsor_w, MSA128W>; +class FSOR_D_DESC : MSA_3RF_DESC_BASE<"fsor.d", int_mips_fsor_d, MSA128D>; class FSQRT_W_DESC : MSA_2RF_DESC_BASE<"fsqrt.w", int_mips_fsqrt_w, MSA128W>; class FSQRT_D_DESC : MSA_2RF_DESC_BASE<"fsqrt.d", int_mips_fsqrt_d, MSA128D>; -class FSUB_W_DESC : MSA_3RF_DESC_BASE<"fsub.w", int_mips_fsub_w, - MSA128W, MSA128W>; -class FSUB_D_DESC : MSA_3RF_DESC_BASE<"fsub.d", int_mips_fsub_d, - MSA128D, MSA128D>; +class FSUB_W_DESC : MSA_3RF_DESC_BASE<"fsub.w", int_mips_fsub_w, MSA128W>; +class FSUB_D_DESC : MSA_3RF_DESC_BASE<"fsub.d", int_mips_fsub_d, MSA128D>; -class FSUEQ_W_DESC : MSA_3RF_DESC_BASE<"fsueq.w", int_mips_fsueq_w, - MSA128W, MSA128W>; -class FSUEQ_D_DESC : MSA_3RF_DESC_BASE<"fsueq.d", int_mips_fsueq_d, - MSA128D, MSA128D>; +class FSUEQ_W_DESC : MSA_3RF_DESC_BASE<"fsueq.w", int_mips_fsueq_w, MSA128W>; +class FSUEQ_D_DESC : MSA_3RF_DESC_BASE<"fsueq.d", int_mips_fsueq_d, MSA128D>; -class FSULE_W_DESC : MSA_3RF_DESC_BASE<"fsule.w", int_mips_fsule_w, - MSA128W, MSA128W>; -class FSULE_D_DESC : MSA_3RF_DESC_BASE<"fsule.d", int_mips_fsule_d, - MSA128D, MSA128D>; +class FSULE_W_DESC : MSA_3RF_DESC_BASE<"fsule.w", int_mips_fsule_w, MSA128W>; +class FSULE_D_DESC : MSA_3RF_DESC_BASE<"fsule.d", int_mips_fsule_d, MSA128D>; -class FSULT_W_DESC : MSA_3RF_DESC_BASE<"fsult.w", int_mips_fsult_w, - MSA128W, MSA128W>; -class FSULT_D_DESC : MSA_3RF_DESC_BASE<"fsult.d", int_mips_fsult_d, - MSA128D, MSA128D>; +class FSULT_W_DESC : MSA_3RF_DESC_BASE<"fsult.w", int_mips_fsult_w, MSA128W>; +class FSULT_D_DESC : MSA_3RF_DESC_BASE<"fsult.d", int_mips_fsult_d, MSA128D>; -class FSUN_W_DESC : MSA_3RF_DESC_BASE<"fsun.w", int_mips_fsun_w, - MSA128W, MSA128W>; -class FSUN_D_DESC : MSA_3RF_DESC_BASE<"fsun.d", int_mips_fsun_d, - MSA128D, MSA128D>; +class FSUN_W_DESC : MSA_3RF_DESC_BASE<"fsun.w", int_mips_fsun_w, MSA128W>; +class FSUN_D_DESC : MSA_3RF_DESC_BASE<"fsun.d", int_mips_fsun_d, MSA128D>; -class FSUNE_W_DESC : MSA_3RF_DESC_BASE<"fsune.w", int_mips_fsune_w, - MSA128W, MSA128W>; -class FSUNE_D_DESC : MSA_3RF_DESC_BASE<"fsune.d", int_mips_fsune_d, - MSA128D, MSA128D>; +class FSUNE_W_DESC : MSA_3RF_DESC_BASE<"fsune.w", int_mips_fsune_w, MSA128W>; +class FSUNE_D_DESC : MSA_3RF_DESC_BASE<"fsune.d", int_mips_fsune_d, MSA128D>; class FTRUNC_S_W_DESC : MSA_2RF_DESC_BASE<"ftrunc_s.w", int_mips_ftrunc_s_w, MSA128W>; @@ -1570,9 +1512,9 @@ class FTINT_U_D_DESC : MSA_2RF_DESC_BASE<"ftint_u.d", int_mips_ftint_u_d, MSA128D>; class FTQ_H_DESC : MSA_3RF_DESC_BASE<"ftq.h", int_mips_ftq_h, - MSA128H, MSA128W>; + MSA128H, MSA128W, MSA128W>; class FTQ_W_DESC : MSA_3RF_DESC_BASE<"ftq.w", int_mips_ftq_w, - MSA128W, MSA128D>; + MSA128W, MSA128D, MSA128D>; class HADD_S_H_DESC : MSA_3R_DESC_BASE<"hadd_s.h", int_mips_hadd_s_h, MSA128H, MSA128B, MSA128B>; @@ -1773,15 +1715,13 @@ class MSUBV_H_DESC : MSA_3R_4R_DESC_BASE<"msubv.h", int_mips_msubv_h, MSA128H>; class MSUBV_W_DESC : MSA_3R_4R_DESC_BASE<"msubv.w", int_mips_msubv_w, MSA128W>; class MSUBV_D_DESC : MSA_3R_4R_DESC_BASE<"msubv.d", int_mips_msubv_d, MSA128D>; -class MUL_Q_H_DESC : MSA_3RF_DESC_BASE<"mul_q.h", int_mips_mul_q_h, - MSA128H, MSA128H>; -class MUL_Q_W_DESC : MSA_3RF_DESC_BASE<"mul_q.w", int_mips_mul_q_w, - MSA128W, MSA128W>; +class MUL_Q_H_DESC : MSA_3RF_DESC_BASE<"mul_q.h", int_mips_mul_q_h, MSA128H>; +class MUL_Q_W_DESC : MSA_3RF_DESC_BASE<"mul_q.w", int_mips_mul_q_w, MSA128W>; class MULR_Q_H_DESC : MSA_3RF_DESC_BASE<"mulr_q.h", int_mips_mulr_q_h, - MSA128H, MSA128H>; + MSA128H>; class MULR_Q_W_DESC : MSA_3RF_DESC_BASE<"mulr_q.w", int_mips_mulr_q_w, - MSA128W, MSA128W>; + MSA128W>; class MULV_B_DESC : MSA_3R_DESC_BASE<"mulv.b", int_mips_mulv_b, MSA128B>; class MULV_H_DESC : MSA_3R_DESC_BASE<"mulv.h", int_mips_mulv_h, MSA128H>; -- cgit v1.1 From bf7f7b5e0eae40bb47a410c90f9f0885c0f38b2c Mon Sep 17 00:00:00 2001 From: Vladimir Medic Date: Fri, 6 Sep 2013 12:41:17 +0000 Subject: This patch adds support for microMIPS Move Conditional instructions. Test cases are included in patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190148 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MicroMipsInstrFormats.td | 15 +++++++++++++++ lib/Target/Mips/MicroMipsInstrInfo.td | 10 ++++++++++ lib/Target/Mips/MipsCondMov.td | 14 +++++++------- lib/Target/Mips/MipsInstrFormats.td | 2 +- 4 files changed, 33 insertions(+), 8 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MicroMipsInstrFormats.td b/lib/Target/Mips/MicroMipsInstrFormats.td index 7d8c513..2577fe6 100644 --- a/lib/Target/Mips/MicroMipsInstrFormats.td +++ b/lib/Target/Mips/MicroMipsInstrFormats.td @@ -123,3 +123,18 @@ class LWL_FM_MM funct> { let Inst{15-12} = funct; let Inst{11-0} = addr{11-0}; } + +class CMov_F_I_FM_MM func> : MMArch { + bits<5> rd; + bits<5> rs; + bits<3> fcc; + + bits<32> Inst; + + let Inst{31-26} = 0x15; + let Inst{25-21} = rd; + let Inst{20-16} = rs; + let Inst{15-13} = fcc; + let Inst{12-6} = func; + let Inst{5-0} = 0x3b; +} diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td index 273d3cc..0c95d06 100644 --- a/lib/Target/Mips/MicroMipsInstrInfo.td +++ b/lib/Target/Mips/MicroMipsInstrInfo.td @@ -109,4 +109,14 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { LWL_FM_MM<0x8>; def SWR_MM : StoreLeftRightMM<"swr", MipsSWR, GPR32Opnd, mem_mm_12>, LWL_FM_MM<0x9>; + + /// Move Conditional + def MOVZ_I_MM : MMRel, CMov_I_I_FT<"movz", GPR32Opnd, GPR32Opnd, + NoItinerary>, ADD_FM_MM<0, 0x58>; + def MOVN_I_MM : MMRel, CMov_I_I_FT<"movn", GPR32Opnd, GPR32Opnd, + NoItinerary>, ADD_FM_MM<0, 0x18>; + def MOVT_I_MM : MMRel, CMov_F_I_FT<"movt", GPR32Opnd, IIAlu>, + CMov_F_I_FM_MM<0x25>; + def MOVF_I_MM : MMRel, CMov_F_I_FT<"movf", GPR32Opnd, IIAlu>, + CMov_F_I_FM_MM<0x5>; } diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td index 1f19adc..924739e 100644 --- a/lib/Target/Mips/MipsCondMov.td +++ b/lib/Target/Mips/MipsCondMov.td @@ -19,7 +19,7 @@ class CMov_I_I_FT : InstSE<(outs DRC:$rd), (ins DRC:$rs, CRC:$rt, DRC:$F), - !strconcat(opstr, "\t$rd, $rs, $rt"), [], Itin, FrmFR> { + !strconcat(opstr, "\t$rd, $rs, $rt"), [], Itin, FrmFR, opstr> { let Constraints = "$F = $rd"; } @@ -37,7 +37,7 @@ class CMov_F_I_FT { + Itin, FrmFR, opstr> { let Constraints = "$F = $rd"; } @@ -103,7 +103,7 @@ multiclass MovnPats, +def MOVZ_I_I : MMRel, CMov_I_I_FT<"movz", GPR32Opnd, GPR32Opnd, NoItinerary>, ADD_FM<0, 0xa>; let Predicates = [HasStdEnc], isCodeGenOnly = 1 in { @@ -115,8 +115,8 @@ let Predicates = [HasStdEnc], isCodeGenOnly = 1 in { NoItinerary>, ADD_FM<0, 0xa>; } -def MOVN_I_I : CMov_I_I_FT<"movn", GPR32Opnd, GPR32Opnd, - NoItinerary>, ADD_FM<0, 0xb>; +def MOVN_I_I : MMRel, CMov_I_I_FT<"movn", GPR32Opnd, GPR32Opnd, + NoItinerary>, ADD_FM<0, 0xb>; let Predicates = [HasStdEnc], isCodeGenOnly = 1 in { def MOVN_I_I64 : CMov_I_I_FT<"movn", GPR32Opnd, GPR64Opnd, @@ -161,14 +161,14 @@ let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in { } } -def MOVT_I : CMov_F_I_FT<"movt", GPR32Opnd, IIArith, MipsCMovFP_T>, +def MOVT_I : MMRel, CMov_F_I_FT<"movt", GPR32Opnd, IIArith, MipsCMovFP_T>, CMov_F_I_FM<1>; let isCodeGenOnly = 1 in def MOVT_I64 : CMov_F_I_FT<"movt", GPR64Opnd, IIArith, MipsCMovFP_T>, CMov_F_I_FM<1>, Requires<[HasMips64, HasStdEnc]>; -def MOVF_I : CMov_F_I_FT<"movf", GPR32Opnd, IIArith, MipsCMovFP_F>, +def MOVF_I : MMRel, CMov_F_I_FT<"movf", GPR32Opnd, IIArith, MipsCMovFP_F>, CMov_F_I_FM<0>; let isCodeGenOnly = 1 in diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index b05363d..dd16e1f 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -730,7 +730,7 @@ class CMov_I_F_FM funct, bits<5> fmt> { let Inst{5-0} = funct; } -class CMov_F_I_FM { +class CMov_F_I_FM : StdArch { bits<5> rd; bits<5> rs; bits<3> fcc; -- cgit v1.1 From b9987d1aa14d8ee91bb0acd113ac21fb322f3efd Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Fri, 6 Sep 2013 12:44:13 +0000 Subject: [mips][msa] Made the operand register sets optional for the 3RF_4RF format Their default is to be the same as the result register set. No functional change git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190150 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 8e99669..63e6f4a 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -896,8 +896,8 @@ class MSA_3RF_DESC_BASE; class MSA_3RF_4RF_DESC_BASE : MSA_3R_4R_DESC_BASE; @@ -1413,9 +1413,9 @@ class FLOG2_W_DESC : MSA_2RF_DESC_BASE<"flog2.w", int_mips_flog2_w, MSA128W>; class FLOG2_D_DESC : MSA_2RF_DESC_BASE<"flog2.d", int_mips_flog2_d, MSA128D>; class FMADD_W_DESC : MSA_3RF_4RF_DESC_BASE<"fmadd.w", int_mips_fmadd_w, - MSA128W, MSA128W>; + MSA128W>; class FMADD_D_DESC : MSA_3RF_4RF_DESC_BASE<"fmadd.d", int_mips_fmadd_d, - MSA128D, MSA128D>; + MSA128D>; class FMAX_W_DESC : MSA_3RF_DESC_BASE<"fmax.w", int_mips_fmax_w, MSA128W>; class FMAX_D_DESC : MSA_3RF_DESC_BASE<"fmax.d", int_mips_fmax_d, MSA128D>; @@ -1434,9 +1434,9 @@ class FMIN_A_D_DESC : MSA_3RF_DESC_BASE<"fmin_a.d", int_mips_fmin_a_d, MSA128D>; class FMSUB_W_DESC : MSA_3RF_4RF_DESC_BASE<"fmsub.w", int_mips_fmsub_w, - MSA128W, MSA128W>; + MSA128W>; class FMSUB_D_DESC : MSA_3RF_4RF_DESC_BASE<"fmsub.d", int_mips_fmsub_d, - MSA128D, MSA128D>; + MSA128D>; class FMUL_W_DESC : MSA_3RF_DESC_BASE<"fmul.w", int_mips_fmul_w, MSA128W>; class FMUL_D_DESC : MSA_3RF_DESC_BASE<"fmul.d", int_mips_fmul_d, MSA128D>; @@ -1618,14 +1618,14 @@ class LDX_W_DESC : LDX_DESC_BASE<"ldx.w", load, v4i32, MSA128W>; class LDX_D_DESC : LDX_DESC_BASE<"ldx.d", load, v2i64, MSA128D>; class MADD_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"madd_q.h", int_mips_madd_q_h, - MSA128H, MSA128H>; + MSA128H>; class MADD_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"madd_q.w", int_mips_madd_q_w, - MSA128W, MSA128W>; + MSA128W>; class MADDR_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"maddr_q.h", int_mips_maddr_q_h, - MSA128H, MSA128H>; + MSA128H>; class MADDR_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"maddr_q.w", int_mips_maddr_q_w, - MSA128W, MSA128W>; + MSA128W>; class MADDV_B_DESC : MSA_3R_4R_DESC_BASE<"maddv.b", int_mips_maddv_b, MSA128B>; class MADDV_H_DESC : MSA_3R_4R_DESC_BASE<"maddv.h", int_mips_maddv_h, MSA128H>; @@ -1701,14 +1701,14 @@ class MOVE_V_DESC { } class MSUB_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"msub_q.h", int_mips_msub_q_h, - MSA128H, MSA128H>; + MSA128H>; class MSUB_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"msub_q.w", int_mips_msub_q_w, - MSA128W, MSA128W>; + MSA128W>; class MSUBR_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"msubr_q.h", int_mips_msubr_q_h, - MSA128H, MSA128H>; + MSA128H>; class MSUBR_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"msubr_q.w", int_mips_msubr_q_w, - MSA128W, MSA128W>; + MSA128W>; class MSUBV_B_DESC : MSA_3R_4R_DESC_BASE<"msubv.b", int_mips_msubv_b, MSA128B>; class MSUBV_H_DESC : MSA_3R_4R_DESC_BASE<"msubv.h", int_mips_msubv_h, MSA128H>; -- cgit v1.1 From 9e935a77a59fc14acf1936165f235342b741e34a Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Fri, 6 Sep 2013 12:50:52 +0000 Subject: [mips][msa] Made the operand register sets optional for the ELM_INSVE formats Their default is to be the same as the result register set. No functional change git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190151 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 63e6f4a..8d7ae1c 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -927,14 +927,14 @@ class MSA_INSERT_DESC_BASE { - dag OutOperandList = (outs RCD:$wd); - dag InOperandList = (ins RCD:$wd_in, uimm6:$n, RCWS:$ws); + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins RCWD:$wd_in, uimm6:$n, RCWS:$ws); string AsmString = !strconcat(instr_asm, "\t$wd[$n], $ws[0]"); - list Pattern = [(set RCD:$wd, (OpNode RCD:$wd_in, - immZExt6:$n, - RCWS:$ws))]; + list Pattern = [(set RCWD:$wd, (OpNode RCWD:$wd_in, + immZExt6:$n, + RCWS:$ws))]; InstrItinClass Itinerary = itin; string Constraints = "$wd = $wd_in"; } @@ -1571,14 +1571,10 @@ class INSERT_H_DESC : MSA_INSERT_DESC_BASE<"insert.h", int_mips_insert_h, class INSERT_W_DESC : MSA_INSERT_DESC_BASE<"insert.w", int_mips_insert_w, MSA128W, GPR32>; -class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", int_mips_insve_b, - MSA128B, MSA128B>; -class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", int_mips_insve_h, - MSA128H, MSA128H>; -class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", int_mips_insve_w, - MSA128W, MSA128W>; -class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", int_mips_insve_d, - MSA128D, MSA128D>; +class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", int_mips_insve_b, MSA128B>; +class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", int_mips_insve_h, MSA128H>; +class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", int_mips_insve_w, MSA128W>; +class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", int_mips_insve_d, MSA128D>; class LD_DESC_BASE Date: Fri, 6 Sep 2013 12:53:21 +0000 Subject: This patch adds support for microMIPS Move to/from HI/LO instructions. Test cases are included in patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190152 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MicroMipsInstrFormats.td | 24 ++++++++++++++++++++++++ lib/Target/Mips/MicroMipsInstrInfo.td | 10 ++++++++++ lib/Target/Mips/MipsInstrFormats.td | 4 ++-- lib/Target/Mips/MipsInstrInfo.td | 14 ++++++++------ 4 files changed, 44 insertions(+), 8 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MicroMipsInstrFormats.td b/lib/Target/Mips/MicroMipsInstrFormats.td index 2577fe6..6d8d29e 100644 --- a/lib/Target/Mips/MicroMipsInstrFormats.td +++ b/lib/Target/Mips/MicroMipsInstrFormats.td @@ -138,3 +138,27 @@ class CMov_F_I_FM_MM func> : MMArch { let Inst{12-6} = func; let Inst{5-0} = 0x3b; } + +class MTLO_FM_MM funct> : MMArch { + bits<5> rs; + + bits<32> Inst; + + let Inst{31-26} = 0x00; + let Inst{25-21} = 0x00; + let Inst{20-16} = rs; + let Inst{15-6} = funct; + let Inst{5-0} = 0x3c; +} + +class MFLO_FM_MM funct> : MMArch { + bits<5> rd; + + bits<32> Inst; + + let Inst{31-26} = 0x00; + let Inst{25-21} = 0x00; + let Inst{20-16} = rd; + let Inst{15-6} = funct; + let Inst{5-0} = 0x3c; +} diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td index 0c95d06..3c3e764 100644 --- a/lib/Target/Mips/MicroMipsInstrInfo.td +++ b/lib/Target/Mips/MicroMipsInstrInfo.td @@ -119,4 +119,14 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { CMov_F_I_FM_MM<0x25>; def MOVF_I_MM : MMRel, CMov_F_I_FT<"movf", GPR32Opnd, IIAlu>, CMov_F_I_FM_MM<0x5>; + + /// Move to/from HI/LO + def MTHI_MM : MMRel, MoveToLOHI<"mthi", GPR32Opnd, [HI0]>, + MTLO_FM_MM<0x0b5>; + def MTLO_MM : MMRel, MoveToLOHI<"mtlo", GPR32Opnd, [LO0]>, + MTLO_FM_MM<0x0f5>; + def MFHI_MM : MMRel, MoveFromLOHI<"mfhi", GPR32Opnd, [HI0]>, + MFLO_FM_MM<0x035>; + def MFLO_MM : MMRel, MoveFromLOHI<"mflo", GPR32Opnd, [LO0]>, + MFLO_FM_MM<0x075>; } diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index dd16e1f..f87d70e 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -321,7 +321,7 @@ class SLTI_FM op> : StdArch { let Inst{15-0} = imm16; } -class MFLO_FM funct> { +class MFLO_FM funct> : StdArch { bits<5> rd; bits<32> Inst; @@ -333,7 +333,7 @@ class MFLO_FM funct> { let Inst{5-0} = funct; } -class MTLO_FM funct> { +class MTLO_FM funct> : StdArch { bits<5> rs; bits<32> Inst; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 9b6c857..9929334 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -690,13 +690,15 @@ class Div UseRegs>: - InstSE<(outs RO:$rd), (ins), !strconcat(opstr, "\t$rd"), [], IIHiLo, FrmR> { + InstSE<(outs RO:$rd), (ins), !strconcat(opstr, "\t$rd"), [], IIHiLo, + FrmR, opstr> { let Uses = UseRegs; let neverHasSideEffects = 1; } class MoveToLOHI DefRegs>: - InstSE<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"), [], IIHiLo, FrmR> { + InstSE<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"), [], IIHiLo, + FrmR, opstr> { let Defs = DefRegs; let neverHasSideEffects = 1; } @@ -1016,10 +1018,10 @@ def PseudoSDIV : MultDivPseudo; -def MTHI : MoveToLOHI<"mthi", GPR32Opnd, [HI0]>, MTLO_FM<0x11>; -def MTLO : MoveToLOHI<"mtlo", GPR32Opnd, [LO0]>, MTLO_FM<0x13>; -def MFHI : MoveFromLOHI<"mfhi", GPR32Opnd, [HI0]>, MFLO_FM<0x10>; -def MFLO : MoveFromLOHI<"mflo", GPR32Opnd, [LO0]>, MFLO_FM<0x12>; +def MTHI : MMRel, MoveToLOHI<"mthi", GPR32Opnd, [HI0]>, MTLO_FM<0x11>; +def MTLO : MMRel, MoveToLOHI<"mtlo", GPR32Opnd, [LO0]>, MTLO_FM<0x13>; +def MFHI : MMRel, MoveFromLOHI<"mfhi", GPR32Opnd, [HI0]>, MFLO_FM<0x10>; +def MFLO : MMRel, MoveFromLOHI<"mflo", GPR32Opnd, [LO0]>, MFLO_FM<0x12>; /// Sign Ext In Register Instructions. def SEB : SignExtInReg<"seb", i8, GPR32Opnd>, SEB_FM<0x10, 0x20>; -- cgit v1.1 From 3aaa3e31aa35164fa54474bcf3a2c2df5ab8b375 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Fri, 6 Sep 2013 13:01:47 +0000 Subject: [mips][msa] Made the operand register sets optional for the VEC formats Their default is to be the same as the result register set. No functional change git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190153 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 8d7ae1c..d371902 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -940,8 +940,8 @@ class MSA_INSVE_DESC_BASE { dag OutOperandList = (outs RCWD:$wd); dag InOperandList = (ins RCWS:$ws, RCWT:$wt); @@ -1000,8 +1000,7 @@ class ADDVI_H_DESC : MSA_I5_DESC_BASE<"addvi.h", int_mips_addvi_h, MSA128H>; class ADDVI_W_DESC : MSA_I5_DESC_BASE<"addvi.w", int_mips_addvi_w, MSA128W>; class ADDVI_D_DESC : MSA_I5_DESC_BASE<"addvi.d", int_mips_addvi_d, MSA128D>; -class AND_V_DESC : MSA_VEC_DESC_BASE<"and.v", int_mips_and_v, - MSA128B, MSA128B>; +class AND_V_DESC : MSA_VEC_DESC_BASE<"and.v", int_mips_and_v, MSA128B>; class ANDI_B_DESC : MSA_I8_DESC_BASE<"andi.b", int_mips_andi_b, MSA128B>; @@ -1089,13 +1088,11 @@ class BINSRI_W_DESC : MSA_BIT_W_DESC_BASE<"binsri.w", int_mips_binsri_w, class BINSRI_D_DESC : MSA_BIT_D_DESC_BASE<"binsri.d", int_mips_binsri_d, MSA128D>; -class BMNZ_V_DESC : MSA_VEC_DESC_BASE<"bmnz.v", int_mips_bmnz_v, - MSA128B, MSA128B>; +class BMNZ_V_DESC : MSA_VEC_DESC_BASE<"bmnz.v", int_mips_bmnz_v, MSA128B>; class BMNZI_B_DESC : MSA_I8_DESC_BASE<"bmnzi.b", int_mips_bmnzi_b, MSA128B>; -class BMZ_V_DESC : MSA_VEC_DESC_BASE<"bmz.v", int_mips_bmz_v, - MSA128B, MSA128B>; +class BMZ_V_DESC : MSA_VEC_DESC_BASE<"bmz.v", int_mips_bmz_v, MSA128B>; class BMZI_B_DESC : MSA_I8_DESC_BASE<"bmzi.b", int_mips_bmzi_b, MSA128B>; @@ -1116,8 +1113,7 @@ class BNZ_D_DESC : MSA_CBRANCH_DESC_BASE<"bnz.d", MSA128D>; class BNZ_V_DESC : MSA_CBRANCH_DESC_BASE<"bnz.v", MSA128B>; -class BSEL_V_DESC : MSA_VEC_DESC_BASE<"bsel.v", int_mips_bsel_v, - MSA128B, MSA128B>; +class BSEL_V_DESC : MSA_VEC_DESC_BASE<"bsel.v", int_mips_bsel_v, MSA128B>; class BSELI_B_DESC : MSA_I8_DESC_BASE<"bseli.b", int_mips_bseli_b, MSA128B>; @@ -1734,13 +1730,11 @@ class NLZC_H_DESC : MSA_2R_DESC_BASE<"nlzc.h", int_mips_nlzc_h, MSA128H>; class NLZC_W_DESC : MSA_2R_DESC_BASE<"nlzc.w", int_mips_nlzc_w, MSA128W>; class NLZC_D_DESC : MSA_2R_DESC_BASE<"nlzc.d", int_mips_nlzc_d, MSA128D>; -class NOR_V_DESC : MSA_VEC_DESC_BASE<"nor.v", int_mips_nor_v, - MSA128B, MSA128B>; +class NOR_V_DESC : MSA_VEC_DESC_BASE<"nor.v", int_mips_nor_v, MSA128B>; class NORI_B_DESC : MSA_I8_DESC_BASE<"nori.b", int_mips_nori_b, MSA128B>; -class OR_V_DESC : MSA_VEC_DESC_BASE<"or.v", int_mips_or_v, - MSA128B, MSA128B>; +class OR_V_DESC : MSA_VEC_DESC_BASE<"or.v", int_mips_or_v, MSA128B>; class ORI_B_DESC : MSA_I8_DESC_BASE<"ori.b", int_mips_ori_b, MSA128B>; @@ -1926,8 +1920,7 @@ class VSHF_H_DESC : MSA_3R_DESC_BASE<"vshf.h", int_mips_vshf_h, MSA128H>; class VSHF_W_DESC : MSA_3R_DESC_BASE<"vshf.w", int_mips_vshf_w, MSA128W>; class VSHF_D_DESC : MSA_3R_DESC_BASE<"vshf.d", int_mips_vshf_d, MSA128D>; -class XOR_V_DESC : MSA_VEC_DESC_BASE<"xor.v", int_mips_xor_v, - MSA128B, MSA128B>; +class XOR_V_DESC : MSA_VEC_DESC_BASE<"xor.v", int_mips_xor_v, MSA128B>; class XORI_B_DESC : MSA_I8_DESC_BASE<"xori.b", int_mips_xori_b, MSA128B>; -- cgit v1.1 From 638382e6f169649eb86fa47a6ea25dd932f07689 Mon Sep 17 00:00:00 2001 From: Vladimir Medic Date: Fri, 6 Sep 2013 13:08:00 +0000 Subject: This patch adds support for microMIPS Multiply and Add/Sub instructions. Test cases are included in patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190154 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MicroMipsInstrInfo.td | 6 ++++++ lib/Target/Mips/MipsInstrInfo.td | 10 +++++----- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td index 3c3e764..b0dc2e8 100644 --- a/lib/Target/Mips/MicroMipsInstrInfo.td +++ b/lib/Target/Mips/MicroMipsInstrInfo.td @@ -129,4 +129,10 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { MFLO_FM_MM<0x035>; def MFLO_MM : MMRel, MoveFromLOHI<"mflo", GPR32Opnd, [LO0]>, MFLO_FM_MM<0x075>; + + /// Multiply Add/Sub Instructions + def MADD_MM : MMRel, MArithR<"madd", 1>, MULT_FM_MM<0x32c>; + def MADDU_MM : MMRel, MArithR<"maddu", 1>, MULT_FM_MM<0x36c>; + def MSUB_MM : MMRel, MArithR<"msub">, MULT_FM_MM<0x3ac>; + def MSUBU_MM : MMRel, MArithR<"msubu">, MULT_FM_MM<0x3ec>; } diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 9929334..d7396e3 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -413,7 +413,7 @@ class ArithLogicI : InstSE<(outs), (ins GPR32Opnd:$rs, GPR32Opnd:$rt), - !strconcat(opstr, "\t$rs, $rt"), [], IIImult, FrmR> { + !strconcat(opstr, "\t$rs, $rt"), [], IIImult, FrmR, opstr> { let Defs = [HI0, LO0]; let Uses = [HI0, LO0]; let isCommutable = isComm; @@ -1044,10 +1044,10 @@ def NOP : PseudoSE<(outs), (ins), []>, PseudoInstExpansion<(SLL ZERO, ZERO, 0)>; def LEA_ADDiu : EffectiveAddress<"addiu", GPR32Opnd>, LW_FM<9>; // MADD*/MSUB* -def MADD : MArithR<"madd", 1>, MULT_FM<0x1c, 0>; -def MADDU : MArithR<"maddu", 1>, MULT_FM<0x1c, 1>; -def MSUB : MArithR<"msub">, MULT_FM<0x1c, 4>; -def MSUBU : MArithR<"msubu">, MULT_FM<0x1c, 5>; +def MADD : MMRel, MArithR<"madd", 1>, MULT_FM<0x1c, 0>; +def MADDU : MMRel, MArithR<"maddu", 1>, MULT_FM<0x1c, 1>; +def MSUB : MMRel, MArithR<"msub">, MULT_FM<0x1c, 4>; +def MSUBU : MMRel, MArithR<"msubu">, MULT_FM<0x1c, 5>; def PseudoMADD : MAddSubPseudo; def PseudoMADDU : MAddSubPseudo; def PseudoMSUB : MAddSubPseudo; -- cgit v1.1 From 5cb39b22fb248b59d7aa37603083ba7d1b0f7f2c Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Fri, 6 Sep 2013 13:15:05 +0000 Subject: [mips][msa] Requires<[HasMSA]> is redundant, it is also supplied via inheritance Tested with 'llvm-tblgen -print-records' which outputs identical records before and after this patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190155 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 1256 +++++++++++++++++------------------ 1 file changed, 628 insertions(+), 628 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index d371902..97fecc4 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -1925,720 +1925,720 @@ class XOR_V_DESC : MSA_VEC_DESC_BASE<"xor.v", int_mips_xor_v, MSA128B>; class XORI_B_DESC : MSA_I8_DESC_BASE<"xori.b", int_mips_xori_b, MSA128B>; // Instruction defs. -def ADD_A_B : ADD_A_B_ENC, ADD_A_B_DESC, Requires<[HasMSA]>; -def ADD_A_H : ADD_A_H_ENC, ADD_A_H_DESC, Requires<[HasMSA]>; -def ADD_A_W : ADD_A_W_ENC, ADD_A_W_DESC, Requires<[HasMSA]>; -def ADD_A_D : ADD_A_D_ENC, ADD_A_D_DESC, Requires<[HasMSA]>; - -def ADDS_A_B : ADDS_A_B_ENC, ADDS_A_B_DESC, Requires<[HasMSA]>; -def ADDS_A_H : ADDS_A_H_ENC, ADDS_A_H_DESC, Requires<[HasMSA]>; -def ADDS_A_W : ADDS_A_W_ENC, ADDS_A_W_DESC, Requires<[HasMSA]>; -def ADDS_A_D : ADDS_A_D_ENC, ADDS_A_D_DESC, Requires<[HasMSA]>; - -def ADDS_S_B : ADDS_S_B_ENC, ADDS_S_B_DESC, Requires<[HasMSA]>; -def ADDS_S_H : ADDS_S_H_ENC, ADDS_S_H_DESC, Requires<[HasMSA]>; -def ADDS_S_W : ADDS_S_W_ENC, ADDS_S_W_DESC, Requires<[HasMSA]>; -def ADDS_S_D : ADDS_S_D_ENC, ADDS_S_D_DESC, Requires<[HasMSA]>; - -def ADDS_U_B : ADDS_U_B_ENC, ADDS_U_B_DESC, Requires<[HasMSA]>; -def ADDS_U_H : ADDS_U_H_ENC, ADDS_U_H_DESC, Requires<[HasMSA]>; -def ADDS_U_W : ADDS_U_W_ENC, ADDS_U_W_DESC, Requires<[HasMSA]>; -def ADDS_U_D : ADDS_U_D_ENC, ADDS_U_D_DESC, Requires<[HasMSA]>; - -def ADDV_B : ADDV_B_ENC, ADDV_B_DESC, Requires<[HasMSA]>; -def ADDV_H : ADDV_H_ENC, ADDV_H_DESC, Requires<[HasMSA]>; -def ADDV_W : ADDV_W_ENC, ADDV_W_DESC, Requires<[HasMSA]>; -def ADDV_D : ADDV_D_ENC, ADDV_D_DESC, Requires<[HasMSA]>; - -def ADDVI_B : ADDVI_B_ENC, ADDVI_B_DESC, Requires<[HasMSA]>; -def ADDVI_H : ADDVI_H_ENC, ADDVI_H_DESC, Requires<[HasMSA]>; -def ADDVI_W : ADDVI_W_ENC, ADDVI_W_DESC, Requires<[HasMSA]>; -def ADDVI_D : ADDVI_D_ENC, ADDVI_D_DESC, Requires<[HasMSA]>; - -def AND_V : AND_V_ENC, AND_V_DESC, Requires<[HasMSA]>; - -def ANDI_B : ANDI_B_ENC, ANDI_B_DESC, Requires<[HasMSA]>; - -def ASUB_S_B : ASUB_S_B_ENC, ASUB_S_B_DESC, Requires<[HasMSA]>; -def ASUB_S_H : ASUB_S_H_ENC, ASUB_S_H_DESC, Requires<[HasMSA]>; -def ASUB_S_W : ASUB_S_W_ENC, ASUB_S_W_DESC, Requires<[HasMSA]>; -def ASUB_S_D : ASUB_S_D_ENC, ASUB_S_D_DESC, Requires<[HasMSA]>; - -def ASUB_U_B : ASUB_U_B_ENC, ASUB_U_B_DESC, Requires<[HasMSA]>; -def ASUB_U_H : ASUB_U_H_ENC, ASUB_U_H_DESC, Requires<[HasMSA]>; -def ASUB_U_W : ASUB_U_W_ENC, ASUB_U_W_DESC, Requires<[HasMSA]>; -def ASUB_U_D : ASUB_U_D_ENC, ASUB_U_D_DESC, Requires<[HasMSA]>; - -def AVE_S_B : AVE_S_B_ENC, AVE_S_B_DESC, Requires<[HasMSA]>; -def AVE_S_H : AVE_S_H_ENC, AVE_S_H_DESC, Requires<[HasMSA]>; -def AVE_S_W : AVE_S_W_ENC, AVE_S_W_DESC, Requires<[HasMSA]>; -def AVE_S_D : AVE_S_D_ENC, AVE_S_D_DESC, Requires<[HasMSA]>; - -def AVE_U_B : AVE_U_B_ENC, AVE_U_B_DESC, Requires<[HasMSA]>; -def AVE_U_H : AVE_U_H_ENC, AVE_U_H_DESC, Requires<[HasMSA]>; -def AVE_U_W : AVE_U_W_ENC, AVE_U_W_DESC, Requires<[HasMSA]>; -def AVE_U_D : AVE_U_D_ENC, AVE_U_D_DESC, Requires<[HasMSA]>; - -def AVER_S_B : AVER_S_B_ENC, AVER_S_B_DESC, Requires<[HasMSA]>; -def AVER_S_H : AVER_S_H_ENC, AVER_S_H_DESC, Requires<[HasMSA]>; -def AVER_S_W : AVER_S_W_ENC, AVER_S_W_DESC, Requires<[HasMSA]>; -def AVER_S_D : AVER_S_D_ENC, AVER_S_D_DESC, Requires<[HasMSA]>; - -def AVER_U_B : AVER_U_B_ENC, AVER_U_B_DESC, Requires<[HasMSA]>; -def AVER_U_H : AVER_U_H_ENC, AVER_U_H_DESC, Requires<[HasMSA]>; -def AVER_U_W : AVER_U_W_ENC, AVER_U_W_DESC, Requires<[HasMSA]>; -def AVER_U_D : AVER_U_D_ENC, AVER_U_D_DESC, Requires<[HasMSA]>; - -def BCLR_B : BCLR_B_ENC, BCLR_B_DESC, Requires<[HasMSA]>; -def BCLR_H : BCLR_H_ENC, BCLR_H_DESC, Requires<[HasMSA]>; -def BCLR_W : BCLR_W_ENC, BCLR_W_DESC, Requires<[HasMSA]>; -def BCLR_D : BCLR_D_ENC, BCLR_D_DESC, Requires<[HasMSA]>; - -def BCLRI_B : BCLRI_B_ENC, BCLRI_B_DESC, Requires<[HasMSA]>; -def BCLRI_H : BCLRI_H_ENC, BCLRI_H_DESC, Requires<[HasMSA]>; -def BCLRI_W : BCLRI_W_ENC, BCLRI_W_DESC, Requires<[HasMSA]>; -def BCLRI_D : BCLRI_D_ENC, BCLRI_D_DESC, Requires<[HasMSA]>; - -def BINSL_B : BINSL_B_ENC, BINSL_B_DESC, Requires<[HasMSA]>; -def BINSL_H : BINSL_H_ENC, BINSL_H_DESC, Requires<[HasMSA]>; -def BINSL_W : BINSL_W_ENC, BINSL_W_DESC, Requires<[HasMSA]>; -def BINSL_D : BINSL_D_ENC, BINSL_D_DESC, Requires<[HasMSA]>; - -def BINSLI_B : BINSLI_B_ENC, BINSLI_B_DESC, Requires<[HasMSA]>; -def BINSLI_H : BINSLI_H_ENC, BINSLI_H_DESC, Requires<[HasMSA]>; -def BINSLI_W : BINSLI_W_ENC, BINSLI_W_DESC, Requires<[HasMSA]>; -def BINSLI_D : BINSLI_D_ENC, BINSLI_D_DESC, Requires<[HasMSA]>; - -def BINSR_B : BINSR_B_ENC, BINSR_B_DESC, Requires<[HasMSA]>; -def BINSR_H : BINSR_H_ENC, BINSR_H_DESC, Requires<[HasMSA]>; -def BINSR_W : BINSR_W_ENC, BINSR_W_DESC, Requires<[HasMSA]>; -def BINSR_D : BINSR_D_ENC, BINSR_D_DESC, Requires<[HasMSA]>; - -def BINSRI_B : BINSRI_B_ENC, BINSRI_B_DESC, Requires<[HasMSA]>; -def BINSRI_H : BINSRI_H_ENC, BINSRI_H_DESC, Requires<[HasMSA]>; -def BINSRI_W : BINSRI_W_ENC, BINSRI_W_DESC, Requires<[HasMSA]>; -def BINSRI_D : BINSRI_D_ENC, BINSRI_D_DESC, Requires<[HasMSA]>; - -def BMNZ_V : BMNZ_V_ENC, BMNZ_V_DESC, Requires<[HasMSA]>; - -def BMNZI_B : BMNZI_B_ENC, BMNZI_B_DESC, Requires<[HasMSA]>; - -def BMZ_V : BMZ_V_ENC, BMZ_V_DESC, Requires<[HasMSA]>; - -def BMZI_B : BMZI_B_ENC, BMZI_B_DESC, Requires<[HasMSA]>; - -def BNEG_B : BNEG_B_ENC, BNEG_B_DESC, Requires<[HasMSA]>; -def BNEG_H : BNEG_H_ENC, BNEG_H_DESC, Requires<[HasMSA]>; -def BNEG_W : BNEG_W_ENC, BNEG_W_DESC, Requires<[HasMSA]>; -def BNEG_D : BNEG_D_ENC, BNEG_D_DESC, Requires<[HasMSA]>; - -def BNEGI_B : BNEGI_B_ENC, BNEGI_B_DESC, Requires<[HasMSA]>; -def BNEGI_H : BNEGI_H_ENC, BNEGI_H_DESC, Requires<[HasMSA]>; -def BNEGI_W : BNEGI_W_ENC, BNEGI_W_DESC, Requires<[HasMSA]>; -def BNEGI_D : BNEGI_D_ENC, BNEGI_D_DESC, Requires<[HasMSA]>; - -def BNZ_B : BNZ_B_ENC, BNZ_B_DESC, Requires<[HasMSA]>; -def BNZ_H : BNZ_H_ENC, BNZ_H_DESC, Requires<[HasMSA]>; -def BNZ_W : BNZ_W_ENC, BNZ_W_DESC, Requires<[HasMSA]>; -def BNZ_D : BNZ_D_ENC, BNZ_D_DESC, Requires<[HasMSA]>; - -def BNZ_V : BNZ_V_ENC, BNZ_V_DESC, Requires<[HasMSA]>; - -def BSEL_V : BSEL_V_ENC, BSEL_V_DESC, Requires<[HasMSA]>; - -def BSELI_B : BSELI_B_ENC, BSELI_B_DESC, Requires<[HasMSA]>; - -def BSET_B : BSET_B_ENC, BSET_B_DESC, Requires<[HasMSA]>; -def BSET_H : BSET_H_ENC, BSET_H_DESC, Requires<[HasMSA]>; -def BSET_W : BSET_W_ENC, BSET_W_DESC, Requires<[HasMSA]>; -def BSET_D : BSET_D_ENC, BSET_D_DESC, Requires<[HasMSA]>; - -def BSETI_B : BSETI_B_ENC, BSETI_B_DESC, Requires<[HasMSA]>; -def BSETI_H : BSETI_H_ENC, BSETI_H_DESC, Requires<[HasMSA]>; -def BSETI_W : BSETI_W_ENC, BSETI_W_DESC, Requires<[HasMSA]>; -def BSETI_D : BSETI_D_ENC, BSETI_D_DESC, Requires<[HasMSA]>; - -def BZ_B : BZ_B_ENC, BZ_B_DESC, Requires<[HasMSA]>; -def BZ_H : BZ_H_ENC, BZ_H_DESC, Requires<[HasMSA]>; -def BZ_W : BZ_W_ENC, BZ_W_DESC, Requires<[HasMSA]>; -def BZ_D : BZ_D_ENC, BZ_D_DESC, Requires<[HasMSA]>; - -def BZ_V : BZ_V_ENC, BZ_V_DESC, Requires<[HasMSA]>; - -def CEQ_B : CEQ_B_ENC, CEQ_B_DESC, Requires<[HasMSA]>; -def CEQ_H : CEQ_H_ENC, CEQ_H_DESC, Requires<[HasMSA]>; -def CEQ_W : CEQ_W_ENC, CEQ_W_DESC, Requires<[HasMSA]>; -def CEQ_D : CEQ_D_ENC, CEQ_D_DESC, Requires<[HasMSA]>; - -def CEQI_B : CEQI_B_ENC, CEQI_B_DESC, Requires<[HasMSA]>; -def CEQI_H : CEQI_H_ENC, CEQI_H_DESC, Requires<[HasMSA]>; -def CEQI_W : CEQI_W_ENC, CEQI_W_DESC, Requires<[HasMSA]>; -def CEQI_D : CEQI_D_ENC, CEQI_D_DESC, Requires<[HasMSA]>; - -def CFCMSA : CFCMSA_ENC, CFCMSA_DESC, Requires<[HasMSA]>; - -def CLE_S_B : CLE_S_B_ENC, CLE_S_B_DESC, Requires<[HasMSA]>; -def CLE_S_H : CLE_S_H_ENC, CLE_S_H_DESC, Requires<[HasMSA]>; -def CLE_S_W : CLE_S_W_ENC, CLE_S_W_DESC, Requires<[HasMSA]>; -def CLE_S_D : CLE_S_D_ENC, CLE_S_D_DESC, Requires<[HasMSA]>; +def ADD_A_B : ADD_A_B_ENC, ADD_A_B_DESC; +def ADD_A_H : ADD_A_H_ENC, ADD_A_H_DESC; +def ADD_A_W : ADD_A_W_ENC, ADD_A_W_DESC; +def ADD_A_D : ADD_A_D_ENC, ADD_A_D_DESC; + +def ADDS_A_B : ADDS_A_B_ENC, ADDS_A_B_DESC; +def ADDS_A_H : ADDS_A_H_ENC, ADDS_A_H_DESC; +def ADDS_A_W : ADDS_A_W_ENC, ADDS_A_W_DESC; +def ADDS_A_D : ADDS_A_D_ENC, ADDS_A_D_DESC; + +def ADDS_S_B : ADDS_S_B_ENC, ADDS_S_B_DESC; +def ADDS_S_H : ADDS_S_H_ENC, ADDS_S_H_DESC; +def ADDS_S_W : ADDS_S_W_ENC, ADDS_S_W_DESC; +def ADDS_S_D : ADDS_S_D_ENC, ADDS_S_D_DESC; + +def ADDS_U_B : ADDS_U_B_ENC, ADDS_U_B_DESC; +def ADDS_U_H : ADDS_U_H_ENC, ADDS_U_H_DESC; +def ADDS_U_W : ADDS_U_W_ENC, ADDS_U_W_DESC; +def ADDS_U_D : ADDS_U_D_ENC, ADDS_U_D_DESC; + +def ADDV_B : ADDV_B_ENC, ADDV_B_DESC; +def ADDV_H : ADDV_H_ENC, ADDV_H_DESC; +def ADDV_W : ADDV_W_ENC, ADDV_W_DESC; +def ADDV_D : ADDV_D_ENC, ADDV_D_DESC; + +def ADDVI_B : ADDVI_B_ENC, ADDVI_B_DESC; +def ADDVI_H : ADDVI_H_ENC, ADDVI_H_DESC; +def ADDVI_W : ADDVI_W_ENC, ADDVI_W_DESC; +def ADDVI_D : ADDVI_D_ENC, ADDVI_D_DESC; + +def AND_V : AND_V_ENC, AND_V_DESC; + +def ANDI_B : ANDI_B_ENC, ANDI_B_DESC; + +def ASUB_S_B : ASUB_S_B_ENC, ASUB_S_B_DESC; +def ASUB_S_H : ASUB_S_H_ENC, ASUB_S_H_DESC; +def ASUB_S_W : ASUB_S_W_ENC, ASUB_S_W_DESC; +def ASUB_S_D : ASUB_S_D_ENC, ASUB_S_D_DESC; + +def ASUB_U_B : ASUB_U_B_ENC, ASUB_U_B_DESC; +def ASUB_U_H : ASUB_U_H_ENC, ASUB_U_H_DESC; +def ASUB_U_W : ASUB_U_W_ENC, ASUB_U_W_DESC; +def ASUB_U_D : ASUB_U_D_ENC, ASUB_U_D_DESC; + +def AVE_S_B : AVE_S_B_ENC, AVE_S_B_DESC; +def AVE_S_H : AVE_S_H_ENC, AVE_S_H_DESC; +def AVE_S_W : AVE_S_W_ENC, AVE_S_W_DESC; +def AVE_S_D : AVE_S_D_ENC, AVE_S_D_DESC; + +def AVE_U_B : AVE_U_B_ENC, AVE_U_B_DESC; +def AVE_U_H : AVE_U_H_ENC, AVE_U_H_DESC; +def AVE_U_W : AVE_U_W_ENC, AVE_U_W_DESC; +def AVE_U_D : AVE_U_D_ENC, AVE_U_D_DESC; + +def AVER_S_B : AVER_S_B_ENC, AVER_S_B_DESC; +def AVER_S_H : AVER_S_H_ENC, AVER_S_H_DESC; +def AVER_S_W : AVER_S_W_ENC, AVER_S_W_DESC; +def AVER_S_D : AVER_S_D_ENC, AVER_S_D_DESC; + +def AVER_U_B : AVER_U_B_ENC, AVER_U_B_DESC; +def AVER_U_H : AVER_U_H_ENC, AVER_U_H_DESC; +def AVER_U_W : AVER_U_W_ENC, AVER_U_W_DESC; +def AVER_U_D : AVER_U_D_ENC, AVER_U_D_DESC; + +def BCLR_B : BCLR_B_ENC, BCLR_B_DESC; +def BCLR_H : BCLR_H_ENC, BCLR_H_DESC; +def BCLR_W : BCLR_W_ENC, BCLR_W_DESC; +def BCLR_D : BCLR_D_ENC, BCLR_D_DESC; + +def BCLRI_B : BCLRI_B_ENC, BCLRI_B_DESC; +def BCLRI_H : BCLRI_H_ENC, BCLRI_H_DESC; +def BCLRI_W : BCLRI_W_ENC, BCLRI_W_DESC; +def BCLRI_D : BCLRI_D_ENC, BCLRI_D_DESC; + +def BINSL_B : BINSL_B_ENC, BINSL_B_DESC; +def BINSL_H : BINSL_H_ENC, BINSL_H_DESC; +def BINSL_W : BINSL_W_ENC, BINSL_W_DESC; +def BINSL_D : BINSL_D_ENC, BINSL_D_DESC; + +def BINSLI_B : BINSLI_B_ENC, BINSLI_B_DESC; +def BINSLI_H : BINSLI_H_ENC, BINSLI_H_DESC; +def BINSLI_W : BINSLI_W_ENC, BINSLI_W_DESC; +def BINSLI_D : BINSLI_D_ENC, BINSLI_D_DESC; + +def BINSR_B : BINSR_B_ENC, BINSR_B_DESC; +def BINSR_H : BINSR_H_ENC, BINSR_H_DESC; +def BINSR_W : BINSR_W_ENC, BINSR_W_DESC; +def BINSR_D : BINSR_D_ENC, BINSR_D_DESC; + +def BINSRI_B : BINSRI_B_ENC, BINSRI_B_DESC; +def BINSRI_H : BINSRI_H_ENC, BINSRI_H_DESC; +def BINSRI_W : BINSRI_W_ENC, BINSRI_W_DESC; +def BINSRI_D : BINSRI_D_ENC, BINSRI_D_DESC; + +def BMNZ_V : BMNZ_V_ENC, BMNZ_V_DESC; + +def BMNZI_B : BMNZI_B_ENC, BMNZI_B_DESC; + +def BMZ_V : BMZ_V_ENC, BMZ_V_DESC; + +def BMZI_B : BMZI_B_ENC, BMZI_B_DESC; + +def BNEG_B : BNEG_B_ENC, BNEG_B_DESC; +def BNEG_H : BNEG_H_ENC, BNEG_H_DESC; +def BNEG_W : BNEG_W_ENC, BNEG_W_DESC; +def BNEG_D : BNEG_D_ENC, BNEG_D_DESC; + +def BNEGI_B : BNEGI_B_ENC, BNEGI_B_DESC; +def BNEGI_H : BNEGI_H_ENC, BNEGI_H_DESC; +def BNEGI_W : BNEGI_W_ENC, BNEGI_W_DESC; +def BNEGI_D : BNEGI_D_ENC, BNEGI_D_DESC; + +def BNZ_B : BNZ_B_ENC, BNZ_B_DESC; +def BNZ_H : BNZ_H_ENC, BNZ_H_DESC; +def BNZ_W : BNZ_W_ENC, BNZ_W_DESC; +def BNZ_D : BNZ_D_ENC, BNZ_D_DESC; + +def BNZ_V : BNZ_V_ENC, BNZ_V_DESC; + +def BSEL_V : BSEL_V_ENC, BSEL_V_DESC; + +def BSELI_B : BSELI_B_ENC, BSELI_B_DESC; + +def BSET_B : BSET_B_ENC, BSET_B_DESC; +def BSET_H : BSET_H_ENC, BSET_H_DESC; +def BSET_W : BSET_W_ENC, BSET_W_DESC; +def BSET_D : BSET_D_ENC, BSET_D_DESC; + +def BSETI_B : BSETI_B_ENC, BSETI_B_DESC; +def BSETI_H : BSETI_H_ENC, BSETI_H_DESC; +def BSETI_W : BSETI_W_ENC, BSETI_W_DESC; +def BSETI_D : BSETI_D_ENC, BSETI_D_DESC; + +def BZ_B : BZ_B_ENC, BZ_B_DESC; +def BZ_H : BZ_H_ENC, BZ_H_DESC; +def BZ_W : BZ_W_ENC, BZ_W_DESC; +def BZ_D : BZ_D_ENC, BZ_D_DESC; + +def BZ_V : BZ_V_ENC, BZ_V_DESC; + +def CEQ_B : CEQ_B_ENC, CEQ_B_DESC; +def CEQ_H : CEQ_H_ENC, CEQ_H_DESC; +def CEQ_W : CEQ_W_ENC, CEQ_W_DESC; +def CEQ_D : CEQ_D_ENC, CEQ_D_DESC; + +def CEQI_B : CEQI_B_ENC, CEQI_B_DESC; +def CEQI_H : CEQI_H_ENC, CEQI_H_DESC; +def CEQI_W : CEQI_W_ENC, CEQI_W_DESC; +def CEQI_D : CEQI_D_ENC, CEQI_D_DESC; + +def CFCMSA : CFCMSA_ENC, CFCMSA_DESC; + +def CLE_S_B : CLE_S_B_ENC, CLE_S_B_DESC; +def CLE_S_H : CLE_S_H_ENC, CLE_S_H_DESC; +def CLE_S_W : CLE_S_W_ENC, CLE_S_W_DESC; +def CLE_S_D : CLE_S_D_ENC, CLE_S_D_DESC; -def CLE_U_B : CLE_U_B_ENC, CLE_U_B_DESC, Requires<[HasMSA]>; -def CLE_U_H : CLE_U_H_ENC, CLE_U_H_DESC, Requires<[HasMSA]>; -def CLE_U_W : CLE_U_W_ENC, CLE_U_W_DESC, Requires<[HasMSA]>; -def CLE_U_D : CLE_U_D_ENC, CLE_U_D_DESC, Requires<[HasMSA]>; +def CLE_U_B : CLE_U_B_ENC, CLE_U_B_DESC; +def CLE_U_H : CLE_U_H_ENC, CLE_U_H_DESC; +def CLE_U_W : CLE_U_W_ENC, CLE_U_W_DESC; +def CLE_U_D : CLE_U_D_ENC, CLE_U_D_DESC; -def CLEI_S_B : CLEI_S_B_ENC, CLEI_S_B_DESC, Requires<[HasMSA]>; -def CLEI_S_H : CLEI_S_H_ENC, CLEI_S_H_DESC, Requires<[HasMSA]>; -def CLEI_S_W : CLEI_S_W_ENC, CLEI_S_W_DESC, Requires<[HasMSA]>; -def CLEI_S_D : CLEI_S_D_ENC, CLEI_S_D_DESC, Requires<[HasMSA]>; +def CLEI_S_B : CLEI_S_B_ENC, CLEI_S_B_DESC; +def CLEI_S_H : CLEI_S_H_ENC, CLEI_S_H_DESC; +def CLEI_S_W : CLEI_S_W_ENC, CLEI_S_W_DESC; +def CLEI_S_D : CLEI_S_D_ENC, CLEI_S_D_DESC; -def CLEI_U_B : CLEI_U_B_ENC, CLEI_U_B_DESC, Requires<[HasMSA]>; -def CLEI_U_H : CLEI_U_H_ENC, CLEI_U_H_DESC, Requires<[HasMSA]>; -def CLEI_U_W : CLEI_U_W_ENC, CLEI_U_W_DESC, Requires<[HasMSA]>; -def CLEI_U_D : CLEI_U_D_ENC, CLEI_U_D_DESC, Requires<[HasMSA]>; +def CLEI_U_B : CLEI_U_B_ENC, CLEI_U_B_DESC; +def CLEI_U_H : CLEI_U_H_ENC, CLEI_U_H_DESC; +def CLEI_U_W : CLEI_U_W_ENC, CLEI_U_W_DESC; +def CLEI_U_D : CLEI_U_D_ENC, CLEI_U_D_DESC; -def CLT_S_B : CLT_S_B_ENC, CLT_S_B_DESC, Requires<[HasMSA]>; -def CLT_S_H : CLT_S_H_ENC, CLT_S_H_DESC, Requires<[HasMSA]>; -def CLT_S_W : CLT_S_W_ENC, CLT_S_W_DESC, Requires<[HasMSA]>; -def CLT_S_D : CLT_S_D_ENC, CLT_S_D_DESC, Requires<[HasMSA]>; +def CLT_S_B : CLT_S_B_ENC, CLT_S_B_DESC; +def CLT_S_H : CLT_S_H_ENC, CLT_S_H_DESC; +def CLT_S_W : CLT_S_W_ENC, CLT_S_W_DESC; +def CLT_S_D : CLT_S_D_ENC, CLT_S_D_DESC; -def CLT_U_B : CLT_U_B_ENC, CLT_U_B_DESC, Requires<[HasMSA]>; -def CLT_U_H : CLT_U_H_ENC, CLT_U_H_DESC, Requires<[HasMSA]>; -def CLT_U_W : CLT_U_W_ENC, CLT_U_W_DESC, Requires<[HasMSA]>; -def CLT_U_D : CLT_U_D_ENC, CLT_U_D_DESC, Requires<[HasMSA]>; +def CLT_U_B : CLT_U_B_ENC, CLT_U_B_DESC; +def CLT_U_H : CLT_U_H_ENC, CLT_U_H_DESC; +def CLT_U_W : CLT_U_W_ENC, CLT_U_W_DESC; +def CLT_U_D : CLT_U_D_ENC, CLT_U_D_DESC; -def CLTI_S_B : CLTI_S_B_ENC, CLTI_S_B_DESC, Requires<[HasMSA]>; -def CLTI_S_H : CLTI_S_H_ENC, CLTI_S_H_DESC, Requires<[HasMSA]>; -def CLTI_S_W : CLTI_S_W_ENC, CLTI_S_W_DESC, Requires<[HasMSA]>; -def CLTI_S_D : CLTI_S_D_ENC, CLTI_S_D_DESC, Requires<[HasMSA]>; +def CLTI_S_B : CLTI_S_B_ENC, CLTI_S_B_DESC; +def CLTI_S_H : CLTI_S_H_ENC, CLTI_S_H_DESC; +def CLTI_S_W : CLTI_S_W_ENC, CLTI_S_W_DESC; +def CLTI_S_D : CLTI_S_D_ENC, CLTI_S_D_DESC; -def CLTI_U_B : CLTI_U_B_ENC, CLTI_U_B_DESC, Requires<[HasMSA]>; -def CLTI_U_H : CLTI_U_H_ENC, CLTI_U_H_DESC, Requires<[HasMSA]>; -def CLTI_U_W : CLTI_U_W_ENC, CLTI_U_W_DESC, Requires<[HasMSA]>; -def CLTI_U_D : CLTI_U_D_ENC, CLTI_U_D_DESC, Requires<[HasMSA]>; +def CLTI_U_B : CLTI_U_B_ENC, CLTI_U_B_DESC; +def CLTI_U_H : CLTI_U_H_ENC, CLTI_U_H_DESC; +def CLTI_U_W : CLTI_U_W_ENC, CLTI_U_W_DESC; +def CLTI_U_D : CLTI_U_D_ENC, CLTI_U_D_DESC; -def COPY_S_B : COPY_S_B_ENC, COPY_S_B_DESC, Requires<[HasMSA]>; -def COPY_S_H : COPY_S_H_ENC, COPY_S_H_DESC, Requires<[HasMSA]>; -def COPY_S_W : COPY_S_W_ENC, COPY_S_W_DESC, Requires<[HasMSA]>; +def COPY_S_B : COPY_S_B_ENC, COPY_S_B_DESC; +def COPY_S_H : COPY_S_H_ENC, COPY_S_H_DESC; +def COPY_S_W : COPY_S_W_ENC, COPY_S_W_DESC; -def COPY_U_B : COPY_U_B_ENC, COPY_U_B_DESC, Requires<[HasMSA]>; -def COPY_U_H : COPY_U_H_ENC, COPY_U_H_DESC, Requires<[HasMSA]>; -def COPY_U_W : COPY_U_W_ENC, COPY_U_W_DESC, Requires<[HasMSA]>; +def COPY_U_B : COPY_U_B_ENC, COPY_U_B_DESC; +def COPY_U_H : COPY_U_H_ENC, COPY_U_H_DESC; +def COPY_U_W : COPY_U_W_ENC, COPY_U_W_DESC; -def CTCMSA : CTCMSA_ENC, CTCMSA_DESC, Requires<[HasMSA]>; +def CTCMSA : CTCMSA_ENC, CTCMSA_DESC; -def DIV_S_B : DIV_S_B_ENC, DIV_S_B_DESC, Requires<[HasMSA]>; -def DIV_S_H : DIV_S_H_ENC, DIV_S_H_DESC, Requires<[HasMSA]>; -def DIV_S_W : DIV_S_W_ENC, DIV_S_W_DESC, Requires<[HasMSA]>; -def DIV_S_D : DIV_S_D_ENC, DIV_S_D_DESC, Requires<[HasMSA]>; +def DIV_S_B : DIV_S_B_ENC, DIV_S_B_DESC; +def DIV_S_H : DIV_S_H_ENC, DIV_S_H_DESC; +def DIV_S_W : DIV_S_W_ENC, DIV_S_W_DESC; +def DIV_S_D : DIV_S_D_ENC, DIV_S_D_DESC; -def DIV_U_B : DIV_U_B_ENC, DIV_U_B_DESC, Requires<[HasMSA]>; -def DIV_U_H : DIV_U_H_ENC, DIV_U_H_DESC, Requires<[HasMSA]>; -def DIV_U_W : DIV_U_W_ENC, DIV_U_W_DESC, Requires<[HasMSA]>; -def DIV_U_D : DIV_U_D_ENC, DIV_U_D_DESC, Requires<[HasMSA]>; +def DIV_U_B : DIV_U_B_ENC, DIV_U_B_DESC; +def DIV_U_H : DIV_U_H_ENC, DIV_U_H_DESC; +def DIV_U_W : DIV_U_W_ENC, DIV_U_W_DESC; +def DIV_U_D : DIV_U_D_ENC, DIV_U_D_DESC; -def DOTP_S_B : DOTP_S_B_ENC, DOTP_S_B_DESC, Requires<[HasMSA]>; -def DOTP_S_H : DOTP_S_H_ENC, DOTP_S_H_DESC, Requires<[HasMSA]>; -def DOTP_S_W : DOTP_S_W_ENC, DOTP_S_W_DESC, Requires<[HasMSA]>; -def DOTP_S_D : DOTP_S_D_ENC, DOTP_S_D_DESC, Requires<[HasMSA]>; +def DOTP_S_B : DOTP_S_B_ENC, DOTP_S_B_DESC; +def DOTP_S_H : DOTP_S_H_ENC, DOTP_S_H_DESC; +def DOTP_S_W : DOTP_S_W_ENC, DOTP_S_W_DESC; +def DOTP_S_D : DOTP_S_D_ENC, DOTP_S_D_DESC; -def DOTP_U_B : DOTP_U_B_ENC, DOTP_U_B_DESC, Requires<[HasMSA]>; -def DOTP_U_H : DOTP_U_H_ENC, DOTP_U_H_DESC, Requires<[HasMSA]>; -def DOTP_U_W : DOTP_U_W_ENC, DOTP_U_W_DESC, Requires<[HasMSA]>; -def DOTP_U_D : DOTP_U_D_ENC, DOTP_U_D_DESC, Requires<[HasMSA]>; +def DOTP_U_B : DOTP_U_B_ENC, DOTP_U_B_DESC; +def DOTP_U_H : DOTP_U_H_ENC, DOTP_U_H_DESC; +def DOTP_U_W : DOTP_U_W_ENC, DOTP_U_W_DESC; +def DOTP_U_D : DOTP_U_D_ENC, DOTP_U_D_DESC; -def DPADD_S_H : DPADD_S_H_ENC, DPADD_S_H_DESC, Requires<[HasMSA]>; -def DPADD_S_W : DPADD_S_W_ENC, DPADD_S_W_DESC, Requires<[HasMSA]>; -def DPADD_S_D : DPADD_S_D_ENC, DPADD_S_D_DESC, Requires<[HasMSA]>; +def DPADD_S_H : DPADD_S_H_ENC, DPADD_S_H_DESC; +def DPADD_S_W : DPADD_S_W_ENC, DPADD_S_W_DESC; +def DPADD_S_D : DPADD_S_D_ENC, DPADD_S_D_DESC; -def DPADD_U_H : DPADD_U_H_ENC, DPADD_U_H_DESC, Requires<[HasMSA]>; -def DPADD_U_W : DPADD_U_W_ENC, DPADD_U_W_DESC, Requires<[HasMSA]>; -def DPADD_U_D : DPADD_U_D_ENC, DPADD_U_D_DESC, Requires<[HasMSA]>; +def DPADD_U_H : DPADD_U_H_ENC, DPADD_U_H_DESC; +def DPADD_U_W : DPADD_U_W_ENC, DPADD_U_W_DESC; +def DPADD_U_D : DPADD_U_D_ENC, DPADD_U_D_DESC; -def DPSUB_S_H : DPSUB_S_H_ENC, DPSUB_S_H_DESC, Requires<[HasMSA]>; -def DPSUB_S_W : DPSUB_S_W_ENC, DPSUB_S_W_DESC, Requires<[HasMSA]>; -def DPSUB_S_D : DPSUB_S_D_ENC, DPSUB_S_D_DESC, Requires<[HasMSA]>; +def DPSUB_S_H : DPSUB_S_H_ENC, DPSUB_S_H_DESC; +def DPSUB_S_W : DPSUB_S_W_ENC, DPSUB_S_W_DESC; +def DPSUB_S_D : DPSUB_S_D_ENC, DPSUB_S_D_DESC; -def DPSUB_U_H : DPSUB_U_H_ENC, DPSUB_U_H_DESC, Requires<[HasMSA]>; -def DPSUB_U_W : DPSUB_U_W_ENC, DPSUB_U_W_DESC, Requires<[HasMSA]>; -def DPSUB_U_D : DPSUB_U_D_ENC, DPSUB_U_D_DESC, Requires<[HasMSA]>; +def DPSUB_U_H : DPSUB_U_H_ENC, DPSUB_U_H_DESC; +def DPSUB_U_W : DPSUB_U_W_ENC, DPSUB_U_W_DESC; +def DPSUB_U_D : DPSUB_U_D_ENC, DPSUB_U_D_DESC; -def FADD_W : FADD_W_ENC, FADD_W_DESC, Requires<[HasMSA]>; -def FADD_D : FADD_D_ENC, FADD_D_DESC, Requires<[HasMSA]>; +def FADD_W : FADD_W_ENC, FADD_W_DESC; +def FADD_D : FADD_D_ENC, FADD_D_DESC; -def FCAF_W : FCAF_W_ENC, FCAF_W_DESC, Requires<[HasMSA]>; -def FCAF_D : FCAF_D_ENC, FCAF_D_DESC, Requires<[HasMSA]>; +def FCAF_W : FCAF_W_ENC, FCAF_W_DESC; +def FCAF_D : FCAF_D_ENC, FCAF_D_DESC; -def FCEQ_W : FCEQ_W_ENC, FCEQ_W_DESC, Requires<[HasMSA]>; -def FCEQ_D : FCEQ_D_ENC, FCEQ_D_DESC, Requires<[HasMSA]>; +def FCEQ_W : FCEQ_W_ENC, FCEQ_W_DESC; +def FCEQ_D : FCEQ_D_ENC, FCEQ_D_DESC; -def FCLE_W : FCLE_W_ENC, FCLE_W_DESC, Requires<[HasMSA]>; -def FCLE_D : FCLE_D_ENC, FCLE_D_DESC, Requires<[HasMSA]>; +def FCLE_W : FCLE_W_ENC, FCLE_W_DESC; +def FCLE_D : FCLE_D_ENC, FCLE_D_DESC; -def FCLT_W : FCLT_W_ENC, FCLT_W_DESC, Requires<[HasMSA]>; -def FCLT_D : FCLT_D_ENC, FCLT_D_DESC, Requires<[HasMSA]>; +def FCLT_W : FCLT_W_ENC, FCLT_W_DESC; +def FCLT_D : FCLT_D_ENC, FCLT_D_DESC; -def FCLASS_W : FCLASS_W_ENC, FCLASS_W_DESC, Requires<[HasMSA]>; -def FCLASS_D : FCLASS_D_ENC, FCLASS_D_DESC, Requires<[HasMSA]>; +def FCLASS_W : FCLASS_W_ENC, FCLASS_W_DESC; +def FCLASS_D : FCLASS_D_ENC, FCLASS_D_DESC; -def FCNE_W : FCNE_W_ENC, FCNE_W_DESC, Requires<[HasMSA]>; -def FCNE_D : FCNE_D_ENC, FCNE_D_DESC, Requires<[HasMSA]>; +def FCNE_W : FCNE_W_ENC, FCNE_W_DESC; +def FCNE_D : FCNE_D_ENC, FCNE_D_DESC; -def FCOR_W : FCOR_W_ENC, FCOR_W_DESC, Requires<[HasMSA]>; -def FCOR_D : FCOR_D_ENC, FCOR_D_DESC, Requires<[HasMSA]>; +def FCOR_W : FCOR_W_ENC, FCOR_W_DESC; +def FCOR_D : FCOR_D_ENC, FCOR_D_DESC; -def FCUEQ_W : FCUEQ_W_ENC, FCUEQ_W_DESC, Requires<[HasMSA]>; -def FCUEQ_D : FCUEQ_D_ENC, FCUEQ_D_DESC, Requires<[HasMSA]>; +def FCUEQ_W : FCUEQ_W_ENC, FCUEQ_W_DESC; +def FCUEQ_D : FCUEQ_D_ENC, FCUEQ_D_DESC; -def FCULE_W : FCULE_W_ENC, FCULE_W_DESC, Requires<[HasMSA]>; -def FCULE_D : FCULE_D_ENC, FCULE_D_DESC, Requires<[HasMSA]>; +def FCULE_W : FCULE_W_ENC, FCULE_W_DESC; +def FCULE_D : FCULE_D_ENC, FCULE_D_DESC; -def FCULT_W : FCULT_W_ENC, FCULT_W_DESC, Requires<[HasMSA]>; -def FCULT_D : FCULT_D_ENC, FCULT_D_DESC, Requires<[HasMSA]>; +def FCULT_W : FCULT_W_ENC, FCULT_W_DESC; +def FCULT_D : FCULT_D_ENC, FCULT_D_DESC; -def FCUN_W : FCUN_W_ENC, FCUN_W_DESC, Requires<[HasMSA]>; -def FCUN_D : FCUN_D_ENC, FCUN_D_DESC, Requires<[HasMSA]>; +def FCUN_W : FCUN_W_ENC, FCUN_W_DESC; +def FCUN_D : FCUN_D_ENC, FCUN_D_DESC; -def FCUNE_W : FCUNE_W_ENC, FCUNE_W_DESC, Requires<[HasMSA]>; -def FCUNE_D : FCUNE_D_ENC, FCUNE_D_DESC, Requires<[HasMSA]>; +def FCUNE_W : FCUNE_W_ENC, FCUNE_W_DESC; +def FCUNE_D : FCUNE_D_ENC, FCUNE_D_DESC; -def FDIV_W : FDIV_W_ENC, FDIV_W_DESC, Requires<[HasMSA]>; -def FDIV_D : FDIV_D_ENC, FDIV_D_DESC, Requires<[HasMSA]>; +def FDIV_W : FDIV_W_ENC, FDIV_W_DESC; +def FDIV_D : FDIV_D_ENC, FDIV_D_DESC; -def FEXDO_H : FEXDO_H_ENC, FEXDO_H_DESC, Requires<[HasMSA]>; -def FEXDO_W : FEXDO_W_ENC, FEXDO_W_DESC, Requires<[HasMSA]>; +def FEXDO_H : FEXDO_H_ENC, FEXDO_H_DESC; +def FEXDO_W : FEXDO_W_ENC, FEXDO_W_DESC; -def FEXP2_W : FEXP2_W_ENC, FEXP2_W_DESC, Requires<[HasMSA]>; -def FEXP2_D : FEXP2_D_ENC, FEXP2_D_DESC, Requires<[HasMSA]>; +def FEXP2_W : FEXP2_W_ENC, FEXP2_W_DESC; +def FEXP2_D : FEXP2_D_ENC, FEXP2_D_DESC; -def FEXUPL_W : FEXUPL_W_ENC, FEXUPL_W_DESC, Requires<[HasMSA]>; -def FEXUPL_D : FEXUPL_D_ENC, FEXUPL_D_DESC, Requires<[HasMSA]>; +def FEXUPL_W : FEXUPL_W_ENC, FEXUPL_W_DESC; +def FEXUPL_D : FEXUPL_D_ENC, FEXUPL_D_DESC; -def FEXUPR_W : FEXUPR_W_ENC, FEXUPR_W_DESC, Requires<[HasMSA]>; -def FEXUPR_D : FEXUPR_D_ENC, FEXUPR_D_DESC, Requires<[HasMSA]>; +def FEXUPR_W : FEXUPR_W_ENC, FEXUPR_W_DESC; +def FEXUPR_D : FEXUPR_D_ENC, FEXUPR_D_DESC; -def FFINT_S_W : FFINT_S_W_ENC, FFINT_S_W_DESC, Requires<[HasMSA]>; -def FFINT_S_D : FFINT_S_D_ENC, FFINT_S_D_DESC, Requires<[HasMSA]>; +def FFINT_S_W : FFINT_S_W_ENC, FFINT_S_W_DESC; +def FFINT_S_D : FFINT_S_D_ENC, FFINT_S_D_DESC; -def FFINT_U_W : FFINT_U_W_ENC, FFINT_U_W_DESC, Requires<[HasMSA]>; -def FFINT_U_D : FFINT_U_D_ENC, FFINT_U_D_DESC, Requires<[HasMSA]>; +def FFINT_U_W : FFINT_U_W_ENC, FFINT_U_W_DESC; +def FFINT_U_D : FFINT_U_D_ENC, FFINT_U_D_DESC; -def FFQL_W : FFQL_W_ENC, FFQL_W_DESC, Requires<[HasMSA]>; -def FFQL_D : FFQL_D_ENC, FFQL_D_DESC, Requires<[HasMSA]>; +def FFQL_W : FFQL_W_ENC, FFQL_W_DESC; +def FFQL_D : FFQL_D_ENC, FFQL_D_DESC; -def FFQR_W : FFQR_W_ENC, FFQR_W_DESC, Requires<[HasMSA]>; -def FFQR_D : FFQR_D_ENC, FFQR_D_DESC, Requires<[HasMSA]>; +def FFQR_W : FFQR_W_ENC, FFQR_W_DESC; +def FFQR_D : FFQR_D_ENC, FFQR_D_DESC; -def FILL_B : FILL_B_ENC, FILL_B_DESC, Requires<[HasMSA]>; -def FILL_H : FILL_H_ENC, FILL_H_DESC, Requires<[HasMSA]>; -def FILL_W : FILL_W_ENC, FILL_W_DESC, Requires<[HasMSA]>; +def FILL_B : FILL_B_ENC, FILL_B_DESC; +def FILL_H : FILL_H_ENC, FILL_H_DESC; +def FILL_W : FILL_W_ENC, FILL_W_DESC; -def FLOG2_W : FLOG2_W_ENC, FLOG2_W_DESC, Requires<[HasMSA]>; -def FLOG2_D : FLOG2_D_ENC, FLOG2_D_DESC, Requires<[HasMSA]>; +def FLOG2_W : FLOG2_W_ENC, FLOG2_W_DESC; +def FLOG2_D : FLOG2_D_ENC, FLOG2_D_DESC; -def FMADD_W : FMADD_W_ENC, FMADD_W_DESC, Requires<[HasMSA]>; -def FMADD_D : FMADD_D_ENC, FMADD_D_DESC, Requires<[HasMSA]>; +def FMADD_W : FMADD_W_ENC, FMADD_W_DESC; +def FMADD_D : FMADD_D_ENC, FMADD_D_DESC; -def FMAX_W : FMAX_W_ENC, FMAX_W_DESC, Requires<[HasMSA]>; -def FMAX_D : FMAX_D_ENC, FMAX_D_DESC, Requires<[HasMSA]>; +def FMAX_W : FMAX_W_ENC, FMAX_W_DESC; +def FMAX_D : FMAX_D_ENC, FMAX_D_DESC; -def FMAX_A_W : FMAX_A_W_ENC, FMAX_A_W_DESC, Requires<[HasMSA]>; -def FMAX_A_D : FMAX_A_D_ENC, FMAX_A_D_DESC, Requires<[HasMSA]>; +def FMAX_A_W : FMAX_A_W_ENC, FMAX_A_W_DESC; +def FMAX_A_D : FMAX_A_D_ENC, FMAX_A_D_DESC; -def FMIN_W : FMIN_W_ENC, FMIN_W_DESC, Requires<[HasMSA]>; -def FMIN_D : FMIN_D_ENC, FMIN_D_DESC, Requires<[HasMSA]>; +def FMIN_W : FMIN_W_ENC, FMIN_W_DESC; +def FMIN_D : FMIN_D_ENC, FMIN_D_DESC; -def FMIN_A_W : FMIN_A_W_ENC, FMIN_A_W_DESC, Requires<[HasMSA]>; -def FMIN_A_D : FMIN_A_D_ENC, FMIN_A_D_DESC, Requires<[HasMSA]>; +def FMIN_A_W : FMIN_A_W_ENC, FMIN_A_W_DESC; +def FMIN_A_D : FMIN_A_D_ENC, FMIN_A_D_DESC; -def FMSUB_W : FMSUB_W_ENC, FMSUB_W_DESC, Requires<[HasMSA]>; -def FMSUB_D : FMSUB_D_ENC, FMSUB_D_DESC, Requires<[HasMSA]>; +def FMSUB_W : FMSUB_W_ENC, FMSUB_W_DESC; +def FMSUB_D : FMSUB_D_ENC, FMSUB_D_DESC; -def FMUL_W : FMUL_W_ENC, FMUL_W_DESC, Requires<[HasMSA]>; -def FMUL_D : FMUL_D_ENC, FMUL_D_DESC, Requires<[HasMSA]>; +def FMUL_W : FMUL_W_ENC, FMUL_W_DESC; +def FMUL_D : FMUL_D_ENC, FMUL_D_DESC; -def FRINT_W : FRINT_W_ENC, FRINT_W_DESC, Requires<[HasMSA]>; -def FRINT_D : FRINT_D_ENC, FRINT_D_DESC, Requires<[HasMSA]>; +def FRINT_W : FRINT_W_ENC, FRINT_W_DESC; +def FRINT_D : FRINT_D_ENC, FRINT_D_DESC; -def FRCP_W : FRCP_W_ENC, FRCP_W_DESC, Requires<[HasMSA]>; -def FRCP_D : FRCP_D_ENC, FRCP_D_DESC, Requires<[HasMSA]>; +def FRCP_W : FRCP_W_ENC, FRCP_W_DESC; +def FRCP_D : FRCP_D_ENC, FRCP_D_DESC; -def FRSQRT_W : FRSQRT_W_ENC, FRSQRT_W_DESC, Requires<[HasMSA]>; -def FRSQRT_D : FRSQRT_D_ENC, FRSQRT_D_DESC, Requires<[HasMSA]>; +def FRSQRT_W : FRSQRT_W_ENC, FRSQRT_W_DESC; +def FRSQRT_D : FRSQRT_D_ENC, FRSQRT_D_DESC; -def FSAF_W : FSAF_W_ENC, FSAF_W_DESC, Requires<[HasMSA]>; -def FSAF_D : FSAF_D_ENC, FSAF_D_DESC, Requires<[HasMSA]>; +def FSAF_W : FSAF_W_ENC, FSAF_W_DESC; +def FSAF_D : FSAF_D_ENC, FSAF_D_DESC; -def FSEQ_W : FSEQ_W_ENC, FSEQ_W_DESC, Requires<[HasMSA]>; -def FSEQ_D : FSEQ_D_ENC, FSEQ_D_DESC, Requires<[HasMSA]>; +def FSEQ_W : FSEQ_W_ENC, FSEQ_W_DESC; +def FSEQ_D : FSEQ_D_ENC, FSEQ_D_DESC; -def FSLE_W : FSLE_W_ENC, FSLE_W_DESC, Requires<[HasMSA]>; -def FSLE_D : FSLE_D_ENC, FSLE_D_DESC, Requires<[HasMSA]>; +def FSLE_W : FSLE_W_ENC, FSLE_W_DESC; +def FSLE_D : FSLE_D_ENC, FSLE_D_DESC; -def FSLT_W : FSLT_W_ENC, FSLT_W_DESC, Requires<[HasMSA]>; -def FSLT_D : FSLT_D_ENC, FSLT_D_DESC, Requires<[HasMSA]>; +def FSLT_W : FSLT_W_ENC, FSLT_W_DESC; +def FSLT_D : FSLT_D_ENC, FSLT_D_DESC; -def FSNE_W : FSNE_W_ENC, FSNE_W_DESC, Requires<[HasMSA]>; -def FSNE_D : FSNE_D_ENC, FSNE_D_DESC, Requires<[HasMSA]>; +def FSNE_W : FSNE_W_ENC, FSNE_W_DESC; +def FSNE_D : FSNE_D_ENC, FSNE_D_DESC; -def FSOR_W : FSOR_W_ENC, FSOR_W_DESC, Requires<[HasMSA]>; -def FSOR_D : FSOR_D_ENC, FSOR_D_DESC, Requires<[HasMSA]>; +def FSOR_W : FSOR_W_ENC, FSOR_W_DESC; +def FSOR_D : FSOR_D_ENC, FSOR_D_DESC; -def FSQRT_W : FSQRT_W_ENC, FSQRT_W_DESC, Requires<[HasMSA]>; -def FSQRT_D : FSQRT_D_ENC, FSQRT_D_DESC, Requires<[HasMSA]>; +def FSQRT_W : FSQRT_W_ENC, FSQRT_W_DESC; +def FSQRT_D : FSQRT_D_ENC, FSQRT_D_DESC; -def FSUB_W : FSUB_W_ENC, FSUB_W_DESC, Requires<[HasMSA]>; -def FSUB_D : FSUB_D_ENC, FSUB_D_DESC, Requires<[HasMSA]>; +def FSUB_W : FSUB_W_ENC, FSUB_W_DESC; +def FSUB_D : FSUB_D_ENC, FSUB_D_DESC; -def FSUEQ_W : FSUEQ_W_ENC, FSUEQ_W_DESC, Requires<[HasMSA]>; -def FSUEQ_D : FSUEQ_D_ENC, FSUEQ_D_DESC, Requires<[HasMSA]>; +def FSUEQ_W : FSUEQ_W_ENC, FSUEQ_W_DESC; +def FSUEQ_D : FSUEQ_D_ENC, FSUEQ_D_DESC; -def FSULE_W : FSULE_W_ENC, FSULE_W_DESC, Requires<[HasMSA]>; -def FSULE_D : FSULE_D_ENC, FSULE_D_DESC, Requires<[HasMSA]>; +def FSULE_W : FSULE_W_ENC, FSULE_W_DESC; +def FSULE_D : FSULE_D_ENC, FSULE_D_DESC; -def FSULT_W : FSULT_W_ENC, FSULT_W_DESC, Requires<[HasMSA]>; -def FSULT_D : FSULT_D_ENC, FSULT_D_DESC, Requires<[HasMSA]>; +def FSULT_W : FSULT_W_ENC, FSULT_W_DESC; +def FSULT_D : FSULT_D_ENC, FSULT_D_DESC; -def FSUN_W : FSUN_W_ENC, FSUN_W_DESC, Requires<[HasMSA]>; -def FSUN_D : FSUN_D_ENC, FSUN_D_DESC, Requires<[HasMSA]>; +def FSUN_W : FSUN_W_ENC, FSUN_W_DESC; +def FSUN_D : FSUN_D_ENC, FSUN_D_DESC; -def FSUNE_W : FSUNE_W_ENC, FSUNE_W_DESC, Requires<[HasMSA]>; -def FSUNE_D : FSUNE_D_ENC, FSUNE_D_DESC, Requires<[HasMSA]>; +def FSUNE_W : FSUNE_W_ENC, FSUNE_W_DESC; +def FSUNE_D : FSUNE_D_ENC, FSUNE_D_DESC; -def FTRUNC_S_W : FTRUNC_S_W_ENC, FTRUNC_S_W_DESC, Requires<[HasMSA]>; -def FTRUNC_S_D : FTRUNC_S_D_ENC, FTRUNC_S_D_DESC, Requires<[HasMSA]>; +def FTRUNC_S_W : FTRUNC_S_W_ENC, FTRUNC_S_W_DESC; +def FTRUNC_S_D : FTRUNC_S_D_ENC, FTRUNC_S_D_DESC; -def FTRUNC_U_W : FTRUNC_U_W_ENC, FTRUNC_U_W_DESC, Requires<[HasMSA]>; -def FTRUNC_U_D : FTRUNC_U_D_ENC, FTRUNC_U_D_DESC, Requires<[HasMSA]>; +def FTRUNC_U_W : FTRUNC_U_W_ENC, FTRUNC_U_W_DESC; +def FTRUNC_U_D : FTRUNC_U_D_ENC, FTRUNC_U_D_DESC; -def FTINT_S_W : FTINT_S_W_ENC, FTINT_S_W_DESC, Requires<[HasMSA]>; -def FTINT_S_D : FTINT_S_D_ENC, FTINT_S_D_DESC, Requires<[HasMSA]>; +def FTINT_S_W : FTINT_S_W_ENC, FTINT_S_W_DESC; +def FTINT_S_D : FTINT_S_D_ENC, FTINT_S_D_DESC; -def FTINT_U_W : FTINT_U_W_ENC, FTINT_U_W_DESC, Requires<[HasMSA]>; -def FTINT_U_D : FTINT_U_D_ENC, FTINT_U_D_DESC, Requires<[HasMSA]>; +def FTINT_U_W : FTINT_U_W_ENC, FTINT_U_W_DESC; +def FTINT_U_D : FTINT_U_D_ENC, FTINT_U_D_DESC; -def FTQ_H : FTQ_H_ENC, FTQ_H_DESC, Requires<[HasMSA]>; -def FTQ_W : FTQ_W_ENC, FTQ_W_DESC, Requires<[HasMSA]>; +def FTQ_H : FTQ_H_ENC, FTQ_H_DESC; +def FTQ_W : FTQ_W_ENC, FTQ_W_DESC; -def HADD_S_H : HADD_S_H_ENC, HADD_S_H_DESC, Requires<[HasMSA]>; -def HADD_S_W : HADD_S_W_ENC, HADD_S_W_DESC, Requires<[HasMSA]>; -def HADD_S_D : HADD_S_D_ENC, HADD_S_D_DESC, Requires<[HasMSA]>; +def HADD_S_H : HADD_S_H_ENC, HADD_S_H_DESC; +def HADD_S_W : HADD_S_W_ENC, HADD_S_W_DESC; +def HADD_S_D : HADD_S_D_ENC, HADD_S_D_DESC; -def HADD_U_H : HADD_U_H_ENC, HADD_U_H_DESC, Requires<[HasMSA]>; -def HADD_U_W : HADD_U_W_ENC, HADD_U_W_DESC, Requires<[HasMSA]>; -def HADD_U_D : HADD_U_D_ENC, HADD_U_D_DESC, Requires<[HasMSA]>; +def HADD_U_H : HADD_U_H_ENC, HADD_U_H_DESC; +def HADD_U_W : HADD_U_W_ENC, HADD_U_W_DESC; +def HADD_U_D : HADD_U_D_ENC, HADD_U_D_DESC; -def HSUB_S_H : HSUB_S_H_ENC, HSUB_S_H_DESC, Requires<[HasMSA]>; -def HSUB_S_W : HSUB_S_W_ENC, HSUB_S_W_DESC, Requires<[HasMSA]>; -def HSUB_S_D : HSUB_S_D_ENC, HSUB_S_D_DESC, Requires<[HasMSA]>; +def HSUB_S_H : HSUB_S_H_ENC, HSUB_S_H_DESC; +def HSUB_S_W : HSUB_S_W_ENC, HSUB_S_W_DESC; +def HSUB_S_D : HSUB_S_D_ENC, HSUB_S_D_DESC; -def HSUB_U_H : HSUB_U_H_ENC, HSUB_U_H_DESC, Requires<[HasMSA]>; -def HSUB_U_W : HSUB_U_W_ENC, HSUB_U_W_DESC, Requires<[HasMSA]>; -def HSUB_U_D : HSUB_U_D_ENC, HSUB_U_D_DESC, Requires<[HasMSA]>; +def HSUB_U_H : HSUB_U_H_ENC, HSUB_U_H_DESC; +def HSUB_U_W : HSUB_U_W_ENC, HSUB_U_W_DESC; +def HSUB_U_D : HSUB_U_D_ENC, HSUB_U_D_DESC; -def ILVEV_B : ILVEV_B_ENC, ILVEV_B_DESC, Requires<[HasMSA]>; -def ILVEV_H : ILVEV_H_ENC, ILVEV_H_DESC, Requires<[HasMSA]>; -def ILVEV_W : ILVEV_W_ENC, ILVEV_W_DESC, Requires<[HasMSA]>; -def ILVEV_D : ILVEV_D_ENC, ILVEV_D_DESC, Requires<[HasMSA]>; +def ILVEV_B : ILVEV_B_ENC, ILVEV_B_DESC; +def ILVEV_H : ILVEV_H_ENC, ILVEV_H_DESC; +def ILVEV_W : ILVEV_W_ENC, ILVEV_W_DESC; +def ILVEV_D : ILVEV_D_ENC, ILVEV_D_DESC; -def ILVL_B : ILVL_B_ENC, ILVL_B_DESC, Requires<[HasMSA]>; -def ILVL_H : ILVL_H_ENC, ILVL_H_DESC, Requires<[HasMSA]>; -def ILVL_W : ILVL_W_ENC, ILVL_W_DESC, Requires<[HasMSA]>; -def ILVL_D : ILVL_D_ENC, ILVL_D_DESC, Requires<[HasMSA]>; +def ILVL_B : ILVL_B_ENC, ILVL_B_DESC; +def ILVL_H : ILVL_H_ENC, ILVL_H_DESC; +def ILVL_W : ILVL_W_ENC, ILVL_W_DESC; +def ILVL_D : ILVL_D_ENC, ILVL_D_DESC; -def ILVOD_B : ILVOD_B_ENC, ILVOD_B_DESC, Requires<[HasMSA]>; -def ILVOD_H : ILVOD_H_ENC, ILVOD_H_DESC, Requires<[HasMSA]>; -def ILVOD_W : ILVOD_W_ENC, ILVOD_W_DESC, Requires<[HasMSA]>; -def ILVOD_D : ILVOD_D_ENC, ILVOD_D_DESC, Requires<[HasMSA]>; +def ILVOD_B : ILVOD_B_ENC, ILVOD_B_DESC; +def ILVOD_H : ILVOD_H_ENC, ILVOD_H_DESC; +def ILVOD_W : ILVOD_W_ENC, ILVOD_W_DESC; +def ILVOD_D : ILVOD_D_ENC, ILVOD_D_DESC; -def ILVR_B : ILVR_B_ENC, ILVR_B_DESC, Requires<[HasMSA]>; -def ILVR_H : ILVR_H_ENC, ILVR_H_DESC, Requires<[HasMSA]>; -def ILVR_W : ILVR_W_ENC, ILVR_W_DESC, Requires<[HasMSA]>; -def ILVR_D : ILVR_D_ENC, ILVR_D_DESC, Requires<[HasMSA]>; +def ILVR_B : ILVR_B_ENC, ILVR_B_DESC; +def ILVR_H : ILVR_H_ENC, ILVR_H_DESC; +def ILVR_W : ILVR_W_ENC, ILVR_W_DESC; +def ILVR_D : ILVR_D_ENC, ILVR_D_DESC; -def INSERT_B : INSERT_B_ENC, INSERT_B_DESC, Requires<[HasMSA]>; -def INSERT_H : INSERT_H_ENC, INSERT_H_DESC, Requires<[HasMSA]>; -def INSERT_W : INSERT_W_ENC, INSERT_W_DESC, Requires<[HasMSA]>; +def INSERT_B : INSERT_B_ENC, INSERT_B_DESC; +def INSERT_H : INSERT_H_ENC, INSERT_H_DESC; +def INSERT_W : INSERT_W_ENC, INSERT_W_DESC; -def INSVE_B : INSVE_B_ENC, INSVE_B_DESC, Requires<[HasMSA]>; -def INSVE_H : INSVE_H_ENC, INSVE_H_DESC, Requires<[HasMSA]>; -def INSVE_W : INSVE_W_ENC, INSVE_W_DESC, Requires<[HasMSA]>; -def INSVE_D : INSVE_D_ENC, INSVE_D_DESC, Requires<[HasMSA]>; +def INSVE_B : INSVE_B_ENC, INSVE_B_DESC; +def INSVE_H : INSVE_H_ENC, INSVE_H_DESC; +def INSVE_W : INSVE_W_ENC, INSVE_W_DESC; +def INSVE_D : INSVE_D_ENC, INSVE_D_DESC; -def LD_B: LD_B_ENC, LD_B_DESC, Requires<[HasMSA]>; -def LD_H: LD_H_ENC, LD_H_DESC, Requires<[HasMSA]>; -def LD_W: LD_W_ENC, LD_W_DESC, Requires<[HasMSA]>; -def LD_D: LD_D_ENC, LD_D_DESC, Requires<[HasMSA]>; +def LD_B: LD_B_ENC, LD_B_DESC; +def LD_H: LD_H_ENC, LD_H_DESC; +def LD_W: LD_W_ENC, LD_W_DESC; +def LD_D: LD_D_ENC, LD_D_DESC; -def LDI_B : LDI_B_ENC, LDI_B_DESC, Requires<[HasMSA]>; -def LDI_H : LDI_H_ENC, LDI_H_DESC, Requires<[HasMSA]>; -def LDI_W : LDI_W_ENC, LDI_W_DESC, Requires<[HasMSA]>; +def LDI_B : LDI_B_ENC, LDI_B_DESC; +def LDI_H : LDI_H_ENC, LDI_H_DESC; +def LDI_W : LDI_W_ENC, LDI_W_DESC; -def LDX_B: LDX_B_ENC, LDX_B_DESC, Requires<[HasMSA]>; -def LDX_H: LDX_H_ENC, LDX_H_DESC, Requires<[HasMSA]>; -def LDX_W: LDX_W_ENC, LDX_W_DESC, Requires<[HasMSA]>; -def LDX_D: LDX_D_ENC, LDX_D_DESC, Requires<[HasMSA]>; +def LDX_B: LDX_B_ENC, LDX_B_DESC; +def LDX_H: LDX_H_ENC, LDX_H_DESC; +def LDX_W: LDX_W_ENC, LDX_W_DESC; +def LDX_D: LDX_D_ENC, LDX_D_DESC; -def MADD_Q_H : MADD_Q_H_ENC, MADD_Q_H_DESC, Requires<[HasMSA]>; -def MADD_Q_W : MADD_Q_W_ENC, MADD_Q_W_DESC, Requires<[HasMSA]>; - -def MADDR_Q_H : MADDR_Q_H_ENC, MADDR_Q_H_DESC, Requires<[HasMSA]>; -def MADDR_Q_W : MADDR_Q_W_ENC, MADDR_Q_W_DESC, Requires<[HasMSA]>; - -def MADDV_B : MADDV_B_ENC, MADDV_B_DESC, Requires<[HasMSA]>; -def MADDV_H : MADDV_H_ENC, MADDV_H_DESC, Requires<[HasMSA]>; -def MADDV_W : MADDV_W_ENC, MADDV_W_DESC, Requires<[HasMSA]>; -def MADDV_D : MADDV_D_ENC, MADDV_D_DESC, Requires<[HasMSA]>; +def MADD_Q_H : MADD_Q_H_ENC, MADD_Q_H_DESC; +def MADD_Q_W : MADD_Q_W_ENC, MADD_Q_W_DESC; + +def MADDR_Q_H : MADDR_Q_H_ENC, MADDR_Q_H_DESC; +def MADDR_Q_W : MADDR_Q_W_ENC, MADDR_Q_W_DESC; + +def MADDV_B : MADDV_B_ENC, MADDV_B_DESC; +def MADDV_H : MADDV_H_ENC, MADDV_H_DESC; +def MADDV_W : MADDV_W_ENC, MADDV_W_DESC; +def MADDV_D : MADDV_D_ENC, MADDV_D_DESC; -def MAX_A_B : MAX_A_B_ENC, MAX_A_B_DESC, Requires<[HasMSA]>; -def MAX_A_H : MAX_A_H_ENC, MAX_A_H_DESC, Requires<[HasMSA]>; -def MAX_A_W : MAX_A_W_ENC, MAX_A_W_DESC, Requires<[HasMSA]>; -def MAX_A_D : MAX_A_D_ENC, MAX_A_D_DESC, Requires<[HasMSA]>; +def MAX_A_B : MAX_A_B_ENC, MAX_A_B_DESC; +def MAX_A_H : MAX_A_H_ENC, MAX_A_H_DESC; +def MAX_A_W : MAX_A_W_ENC, MAX_A_W_DESC; +def MAX_A_D : MAX_A_D_ENC, MAX_A_D_DESC; -def MAX_S_B : MAX_S_B_ENC, MAX_S_B_DESC, Requires<[HasMSA]>; -def MAX_S_H : MAX_S_H_ENC, MAX_S_H_DESC, Requires<[HasMSA]>; -def MAX_S_W : MAX_S_W_ENC, MAX_S_W_DESC, Requires<[HasMSA]>; -def MAX_S_D : MAX_S_D_ENC, MAX_S_D_DESC, Requires<[HasMSA]>; - -def MAX_U_B : MAX_U_B_ENC, MAX_U_B_DESC, Requires<[HasMSA]>; -def MAX_U_H : MAX_U_H_ENC, MAX_U_H_DESC, Requires<[HasMSA]>; -def MAX_U_W : MAX_U_W_ENC, MAX_U_W_DESC, Requires<[HasMSA]>; -def MAX_U_D : MAX_U_D_ENC, MAX_U_D_DESC, Requires<[HasMSA]>; - -def MAXI_S_B : MAXI_S_B_ENC, MAXI_S_B_DESC, Requires<[HasMSA]>; -def MAXI_S_H : MAXI_S_H_ENC, MAXI_S_H_DESC, Requires<[HasMSA]>; -def MAXI_S_W : MAXI_S_W_ENC, MAXI_S_W_DESC, Requires<[HasMSA]>; -def MAXI_S_D : MAXI_S_D_ENC, MAXI_S_D_DESC, Requires<[HasMSA]>; - -def MAXI_U_B : MAXI_U_B_ENC, MAXI_U_B_DESC, Requires<[HasMSA]>; -def MAXI_U_H : MAXI_U_H_ENC, MAXI_U_H_DESC, Requires<[HasMSA]>; -def MAXI_U_W : MAXI_U_W_ENC, MAXI_U_W_DESC, Requires<[HasMSA]>; -def MAXI_U_D : MAXI_U_D_ENC, MAXI_U_D_DESC, Requires<[HasMSA]>; - -def MIN_A_B : MIN_A_B_ENC, MIN_A_B_DESC, Requires<[HasMSA]>; -def MIN_A_H : MIN_A_H_ENC, MIN_A_H_DESC, Requires<[HasMSA]>; -def MIN_A_W : MIN_A_W_ENC, MIN_A_W_DESC, Requires<[HasMSA]>; -def MIN_A_D : MIN_A_D_ENC, MIN_A_D_DESC, Requires<[HasMSA]>; - -def MIN_S_B : MIN_S_B_ENC, MIN_S_B_DESC, Requires<[HasMSA]>; -def MIN_S_H : MIN_S_H_ENC, MIN_S_H_DESC, Requires<[HasMSA]>; -def MIN_S_W : MIN_S_W_ENC, MIN_S_W_DESC, Requires<[HasMSA]>; -def MIN_S_D : MIN_S_D_ENC, MIN_S_D_DESC, Requires<[HasMSA]>; - -def MIN_U_B : MIN_U_B_ENC, MIN_U_B_DESC, Requires<[HasMSA]>; -def MIN_U_H : MIN_U_H_ENC, MIN_U_H_DESC, Requires<[HasMSA]>; -def MIN_U_W : MIN_U_W_ENC, MIN_U_W_DESC, Requires<[HasMSA]>; -def MIN_U_D : MIN_U_D_ENC, MIN_U_D_DESC, Requires<[HasMSA]>; - -def MINI_S_B : MINI_S_B_ENC, MINI_S_B_DESC, Requires<[HasMSA]>; -def MINI_S_H : MINI_S_H_ENC, MINI_S_H_DESC, Requires<[HasMSA]>; -def MINI_S_W : MINI_S_W_ENC, MINI_S_W_DESC, Requires<[HasMSA]>; -def MINI_S_D : MINI_S_D_ENC, MINI_S_D_DESC, Requires<[HasMSA]>; - -def MINI_U_B : MINI_U_B_ENC, MINI_U_B_DESC, Requires<[HasMSA]>; -def MINI_U_H : MINI_U_H_ENC, MINI_U_H_DESC, Requires<[HasMSA]>; -def MINI_U_W : MINI_U_W_ENC, MINI_U_W_DESC, Requires<[HasMSA]>; -def MINI_U_D : MINI_U_D_ENC, MINI_U_D_DESC, Requires<[HasMSA]>; - -def MOD_S_B : MOD_S_B_ENC, MOD_S_B_DESC, Requires<[HasMSA]>; -def MOD_S_H : MOD_S_H_ENC, MOD_S_H_DESC, Requires<[HasMSA]>; -def MOD_S_W : MOD_S_W_ENC, MOD_S_W_DESC, Requires<[HasMSA]>; -def MOD_S_D : MOD_S_D_ENC, MOD_S_D_DESC, Requires<[HasMSA]>; - -def MOD_U_B : MOD_U_B_ENC, MOD_U_B_DESC, Requires<[HasMSA]>; -def MOD_U_H : MOD_U_H_ENC, MOD_U_H_DESC, Requires<[HasMSA]>; -def MOD_U_W : MOD_U_W_ENC, MOD_U_W_DESC, Requires<[HasMSA]>; -def MOD_U_D : MOD_U_D_ENC, MOD_U_D_DESC, Requires<[HasMSA]>; - -def MOVE_V : MOVE_V_ENC, MOVE_V_DESC, Requires<[HasMSA]>; - -def MSUB_Q_H : MSUB_Q_H_ENC, MSUB_Q_H_DESC, Requires<[HasMSA]>; -def MSUB_Q_W : MSUB_Q_W_ENC, MSUB_Q_W_DESC, Requires<[HasMSA]>; - -def MSUBR_Q_H : MSUBR_Q_H_ENC, MSUBR_Q_H_DESC, Requires<[HasMSA]>; -def MSUBR_Q_W : MSUBR_Q_W_ENC, MSUBR_Q_W_DESC, Requires<[HasMSA]>; - -def MSUBV_B : MSUBV_B_ENC, MSUBV_B_DESC, Requires<[HasMSA]>; -def MSUBV_H : MSUBV_H_ENC, MSUBV_H_DESC, Requires<[HasMSA]>; -def MSUBV_W : MSUBV_W_ENC, MSUBV_W_DESC, Requires<[HasMSA]>; -def MSUBV_D : MSUBV_D_ENC, MSUBV_D_DESC, Requires<[HasMSA]>; - -def MUL_Q_H : MUL_Q_H_ENC, MUL_Q_H_DESC, Requires<[HasMSA]>; -def MUL_Q_W : MUL_Q_W_ENC, MUL_Q_W_DESC, Requires<[HasMSA]>; - -def MULR_Q_H : MULR_Q_H_ENC, MULR_Q_H_DESC, Requires<[HasMSA]>; -def MULR_Q_W : MULR_Q_W_ENC, MULR_Q_W_DESC, Requires<[HasMSA]>; - -def MULV_B : MULV_B_ENC, MULV_B_DESC, Requires<[HasMSA]>; -def MULV_H : MULV_H_ENC, MULV_H_DESC, Requires<[HasMSA]>; -def MULV_W : MULV_W_ENC, MULV_W_DESC, Requires<[HasMSA]>; -def MULV_D : MULV_D_ENC, MULV_D_DESC, Requires<[HasMSA]>; - -def NLOC_B : NLOC_B_ENC, NLOC_B_DESC, Requires<[HasMSA]>; -def NLOC_H : NLOC_H_ENC, NLOC_H_DESC, Requires<[HasMSA]>; -def NLOC_W : NLOC_W_ENC, NLOC_W_DESC, Requires<[HasMSA]>; -def NLOC_D : NLOC_D_ENC, NLOC_D_DESC, Requires<[HasMSA]>; - -def NLZC_B : NLZC_B_ENC, NLZC_B_DESC, Requires<[HasMSA]>; -def NLZC_H : NLZC_H_ENC, NLZC_H_DESC, Requires<[HasMSA]>; -def NLZC_W : NLZC_W_ENC, NLZC_W_DESC, Requires<[HasMSA]>; -def NLZC_D : NLZC_D_ENC, NLZC_D_DESC, Requires<[HasMSA]>; - -def NOR_V : NOR_V_ENC, NOR_V_DESC, Requires<[HasMSA]>; - -def NORI_B : NORI_B_ENC, NORI_B_DESC, Requires<[HasMSA]>; - -def OR_V : OR_V_ENC, OR_V_DESC, Requires<[HasMSA]>; - -def ORI_B : ORI_B_ENC, ORI_B_DESC, Requires<[HasMSA]>; - -def PCKEV_B : PCKEV_B_ENC, PCKEV_B_DESC, Requires<[HasMSA]>; -def PCKEV_H : PCKEV_H_ENC, PCKEV_H_DESC, Requires<[HasMSA]>; -def PCKEV_W : PCKEV_W_ENC, PCKEV_W_DESC, Requires<[HasMSA]>; -def PCKEV_D : PCKEV_D_ENC, PCKEV_D_DESC, Requires<[HasMSA]>; - -def PCKOD_B : PCKOD_B_ENC, PCKOD_B_DESC, Requires<[HasMSA]>; -def PCKOD_H : PCKOD_H_ENC, PCKOD_H_DESC, Requires<[HasMSA]>; -def PCKOD_W : PCKOD_W_ENC, PCKOD_W_DESC, Requires<[HasMSA]>; -def PCKOD_D : PCKOD_D_ENC, PCKOD_D_DESC, Requires<[HasMSA]>; - -def PCNT_B : PCNT_B_ENC, PCNT_B_DESC, Requires<[HasMSA]>; -def PCNT_H : PCNT_H_ENC, PCNT_H_DESC, Requires<[HasMSA]>; -def PCNT_W : PCNT_W_ENC, PCNT_W_DESC, Requires<[HasMSA]>; -def PCNT_D : PCNT_D_ENC, PCNT_D_DESC, Requires<[HasMSA]>; - -def SAT_S_B : SAT_S_B_ENC, SAT_S_B_DESC, Requires<[HasMSA]>; -def SAT_S_H : SAT_S_H_ENC, SAT_S_H_DESC, Requires<[HasMSA]>; -def SAT_S_W : SAT_S_W_ENC, SAT_S_W_DESC, Requires<[HasMSA]>; -def SAT_S_D : SAT_S_D_ENC, SAT_S_D_DESC, Requires<[HasMSA]>; - -def SAT_U_B : SAT_U_B_ENC, SAT_U_B_DESC, Requires<[HasMSA]>; -def SAT_U_H : SAT_U_H_ENC, SAT_U_H_DESC, Requires<[HasMSA]>; -def SAT_U_W : SAT_U_W_ENC, SAT_U_W_DESC, Requires<[HasMSA]>; -def SAT_U_D : SAT_U_D_ENC, SAT_U_D_DESC, Requires<[HasMSA]>; - -def SHF_B : SHF_B_ENC, SHF_B_DESC, Requires<[HasMSA]>; -def SHF_H : SHF_H_ENC, SHF_H_DESC, Requires<[HasMSA]>; -def SHF_W : SHF_W_ENC, SHF_W_DESC, Requires<[HasMSA]>; - -def SLD_B : SLD_B_ENC, SLD_B_DESC, Requires<[HasMSA]>; -def SLD_H : SLD_H_ENC, SLD_H_DESC, Requires<[HasMSA]>; -def SLD_W : SLD_W_ENC, SLD_W_DESC, Requires<[HasMSA]>; -def SLD_D : SLD_D_ENC, SLD_D_DESC, Requires<[HasMSA]>; - -def SLDI_B : SLDI_B_ENC, SLDI_B_DESC, Requires<[HasMSA]>; -def SLDI_H : SLDI_H_ENC, SLDI_H_DESC, Requires<[HasMSA]>; -def SLDI_W : SLDI_W_ENC, SLDI_W_DESC, Requires<[HasMSA]>; -def SLDI_D : SLDI_D_ENC, SLDI_D_DESC, Requires<[HasMSA]>; - -def SLL_B : SLL_B_ENC, SLL_B_DESC, Requires<[HasMSA]>; -def SLL_H : SLL_H_ENC, SLL_H_DESC, Requires<[HasMSA]>; -def SLL_W : SLL_W_ENC, SLL_W_DESC, Requires<[HasMSA]>; -def SLL_D : SLL_D_ENC, SLL_D_DESC, Requires<[HasMSA]>; - -def SLLI_B : SLLI_B_ENC, SLLI_B_DESC, Requires<[HasMSA]>; -def SLLI_H : SLLI_H_ENC, SLLI_H_DESC, Requires<[HasMSA]>; -def SLLI_W : SLLI_W_ENC, SLLI_W_DESC, Requires<[HasMSA]>; -def SLLI_D : SLLI_D_ENC, SLLI_D_DESC, Requires<[HasMSA]>; - -def SPLAT_B : SPLAT_B_ENC, SPLAT_B_DESC, Requires<[HasMSA]>; -def SPLAT_H : SPLAT_H_ENC, SPLAT_H_DESC, Requires<[HasMSA]>; -def SPLAT_W : SPLAT_W_ENC, SPLAT_W_DESC, Requires<[HasMSA]>; -def SPLAT_D : SPLAT_D_ENC, SPLAT_D_DESC, Requires<[HasMSA]>; - -def SPLATI_B : SPLATI_B_ENC, SPLATI_B_DESC, Requires<[HasMSA]>; -def SPLATI_H : SPLATI_H_ENC, SPLATI_H_DESC, Requires<[HasMSA]>; -def SPLATI_W : SPLATI_W_ENC, SPLATI_W_DESC, Requires<[HasMSA]>; -def SPLATI_D : SPLATI_D_ENC, SPLATI_D_DESC, Requires<[HasMSA]>; - -def SRA_B : SRA_B_ENC, SRA_B_DESC, Requires<[HasMSA]>; -def SRA_H : SRA_H_ENC, SRA_H_DESC, Requires<[HasMSA]>; -def SRA_W : SRA_W_ENC, SRA_W_DESC, Requires<[HasMSA]>; -def SRA_D : SRA_D_ENC, SRA_D_DESC, Requires<[HasMSA]>; - -def SRAI_B : SRAI_B_ENC, SRAI_B_DESC, Requires<[HasMSA]>; -def SRAI_H : SRAI_H_ENC, SRAI_H_DESC, Requires<[HasMSA]>; -def SRAI_W : SRAI_W_ENC, SRAI_W_DESC, Requires<[HasMSA]>; -def SRAI_D : SRAI_D_ENC, SRAI_D_DESC, Requires<[HasMSA]>; - -def SRAR_B : SRAR_B_ENC, SRAR_B_DESC, Requires<[HasMSA]>; -def SRAR_H : SRAR_H_ENC, SRAR_H_DESC, Requires<[HasMSA]>; -def SRAR_W : SRAR_W_ENC, SRAR_W_DESC, Requires<[HasMSA]>; -def SRAR_D : SRAR_D_ENC, SRAR_D_DESC, Requires<[HasMSA]>; - -def SRARI_B : SRARI_B_ENC, SRARI_B_DESC, Requires<[HasMSA]>; -def SRARI_H : SRARI_H_ENC, SRARI_H_DESC, Requires<[HasMSA]>; -def SRARI_W : SRARI_W_ENC, SRARI_W_DESC, Requires<[HasMSA]>; -def SRARI_D : SRARI_D_ENC, SRARI_D_DESC, Requires<[HasMSA]>; - -def SRL_B : SRL_B_ENC, SRL_B_DESC, Requires<[HasMSA]>; -def SRL_H : SRL_H_ENC, SRL_H_DESC, Requires<[HasMSA]>; -def SRL_W : SRL_W_ENC, SRL_W_DESC, Requires<[HasMSA]>; -def SRL_D : SRL_D_ENC, SRL_D_DESC, Requires<[HasMSA]>; - -def SRLI_B : SRLI_B_ENC, SRLI_B_DESC, Requires<[HasMSA]>; -def SRLI_H : SRLI_H_ENC, SRLI_H_DESC, Requires<[HasMSA]>; -def SRLI_W : SRLI_W_ENC, SRLI_W_DESC, Requires<[HasMSA]>; -def SRLI_D : SRLI_D_ENC, SRLI_D_DESC, Requires<[HasMSA]>; - -def SRLR_B : SRLR_B_ENC, SRLR_B_DESC, Requires<[HasMSA]>; -def SRLR_H : SRLR_H_ENC, SRLR_H_DESC, Requires<[HasMSA]>; -def SRLR_W : SRLR_W_ENC, SRLR_W_DESC, Requires<[HasMSA]>; -def SRLR_D : SRLR_D_ENC, SRLR_D_DESC, Requires<[HasMSA]>; - -def SRLRI_B : SRLRI_B_ENC, SRLRI_B_DESC, Requires<[HasMSA]>; -def SRLRI_H : SRLRI_H_ENC, SRLRI_H_DESC, Requires<[HasMSA]>; -def SRLRI_W : SRLRI_W_ENC, SRLRI_W_DESC, Requires<[HasMSA]>; -def SRLRI_D : SRLRI_D_ENC, SRLRI_D_DESC, Requires<[HasMSA]>; - -def ST_B: ST_B_ENC, ST_B_DESC, Requires<[HasMSA]>; -def ST_H: ST_H_ENC, ST_H_DESC, Requires<[HasMSA]>; -def ST_W: ST_W_ENC, ST_W_DESC, Requires<[HasMSA]>; -def ST_D: ST_D_ENC, ST_D_DESC, Requires<[HasMSA]>; - -def STX_B: STX_B_ENC, STX_B_DESC, Requires<[HasMSA]>; -def STX_H: STX_H_ENC, STX_H_DESC, Requires<[HasMSA]>; -def STX_W: STX_W_ENC, STX_W_DESC, Requires<[HasMSA]>; -def STX_D: STX_D_ENC, STX_D_DESC, Requires<[HasMSA]>; - -def SUBS_S_B : SUBS_S_B_ENC, SUBS_S_B_DESC, Requires<[HasMSA]>; -def SUBS_S_H : SUBS_S_H_ENC, SUBS_S_H_DESC, Requires<[HasMSA]>; -def SUBS_S_W : SUBS_S_W_ENC, SUBS_S_W_DESC, Requires<[HasMSA]>; -def SUBS_S_D : SUBS_S_D_ENC, SUBS_S_D_DESC, Requires<[HasMSA]>; - -def SUBS_U_B : SUBS_U_B_ENC, SUBS_U_B_DESC, Requires<[HasMSA]>; -def SUBS_U_H : SUBS_U_H_ENC, SUBS_U_H_DESC, Requires<[HasMSA]>; -def SUBS_U_W : SUBS_U_W_ENC, SUBS_U_W_DESC, Requires<[HasMSA]>; -def SUBS_U_D : SUBS_U_D_ENC, SUBS_U_D_DESC, Requires<[HasMSA]>; - -def SUBSUS_U_B : SUBSUS_U_B_ENC, SUBSUS_U_B_DESC, Requires<[HasMSA]>; -def SUBSUS_U_H : SUBSUS_U_H_ENC, SUBSUS_U_H_DESC, Requires<[HasMSA]>; -def SUBSUS_U_W : SUBSUS_U_W_ENC, SUBSUS_U_W_DESC, Requires<[HasMSA]>; -def SUBSUS_U_D : SUBSUS_U_D_ENC, SUBSUS_U_D_DESC, Requires<[HasMSA]>; - -def SUBSUU_S_B : SUBSUU_S_B_ENC, SUBSUU_S_B_DESC, Requires<[HasMSA]>; -def SUBSUU_S_H : SUBSUU_S_H_ENC, SUBSUU_S_H_DESC, Requires<[HasMSA]>; -def SUBSUU_S_W : SUBSUU_S_W_ENC, SUBSUU_S_W_DESC, Requires<[HasMSA]>; -def SUBSUU_S_D : SUBSUU_S_D_ENC, SUBSUU_S_D_DESC, Requires<[HasMSA]>; - -def SUBV_B : SUBV_B_ENC, SUBV_B_DESC, Requires<[HasMSA]>; -def SUBV_H : SUBV_H_ENC, SUBV_H_DESC, Requires<[HasMSA]>; -def SUBV_W : SUBV_W_ENC, SUBV_W_DESC, Requires<[HasMSA]>; -def SUBV_D : SUBV_D_ENC, SUBV_D_DESC, Requires<[HasMSA]>; - -def SUBVI_B : SUBVI_B_ENC, SUBVI_B_DESC, Requires<[HasMSA]>; -def SUBVI_H : SUBVI_H_ENC, SUBVI_H_DESC, Requires<[HasMSA]>; -def SUBVI_W : SUBVI_W_ENC, SUBVI_W_DESC, Requires<[HasMSA]>; -def SUBVI_D : SUBVI_D_ENC, SUBVI_D_DESC, Requires<[HasMSA]>; - -def VSHF_B : VSHF_B_ENC, VSHF_B_DESC, Requires<[HasMSA]>; -def VSHF_H : VSHF_H_ENC, VSHF_H_DESC, Requires<[HasMSA]>; -def VSHF_W : VSHF_W_ENC, VSHF_W_DESC, Requires<[HasMSA]>; -def VSHF_D : VSHF_D_ENC, VSHF_D_DESC, Requires<[HasMSA]>; - -def XOR_V : XOR_V_ENC, XOR_V_DESC, Requires<[HasMSA]>; - -def XORI_B : XORI_B_ENC, XORI_B_DESC, Requires<[HasMSA]>; +def MAX_S_B : MAX_S_B_ENC, MAX_S_B_DESC; +def MAX_S_H : MAX_S_H_ENC, MAX_S_H_DESC; +def MAX_S_W : MAX_S_W_ENC, MAX_S_W_DESC; +def MAX_S_D : MAX_S_D_ENC, MAX_S_D_DESC; + +def MAX_U_B : MAX_U_B_ENC, MAX_U_B_DESC; +def MAX_U_H : MAX_U_H_ENC, MAX_U_H_DESC; +def MAX_U_W : MAX_U_W_ENC, MAX_U_W_DESC; +def MAX_U_D : MAX_U_D_ENC, MAX_U_D_DESC; + +def MAXI_S_B : MAXI_S_B_ENC, MAXI_S_B_DESC; +def MAXI_S_H : MAXI_S_H_ENC, MAXI_S_H_DESC; +def MAXI_S_W : MAXI_S_W_ENC, MAXI_S_W_DESC; +def MAXI_S_D : MAXI_S_D_ENC, MAXI_S_D_DESC; + +def MAXI_U_B : MAXI_U_B_ENC, MAXI_U_B_DESC; +def MAXI_U_H : MAXI_U_H_ENC, MAXI_U_H_DESC; +def MAXI_U_W : MAXI_U_W_ENC, MAXI_U_W_DESC; +def MAXI_U_D : MAXI_U_D_ENC, MAXI_U_D_DESC; + +def MIN_A_B : MIN_A_B_ENC, MIN_A_B_DESC; +def MIN_A_H : MIN_A_H_ENC, MIN_A_H_DESC; +def MIN_A_W : MIN_A_W_ENC, MIN_A_W_DESC; +def MIN_A_D : MIN_A_D_ENC, MIN_A_D_DESC; + +def MIN_S_B : MIN_S_B_ENC, MIN_S_B_DESC; +def MIN_S_H : MIN_S_H_ENC, MIN_S_H_DESC; +def MIN_S_W : MIN_S_W_ENC, MIN_S_W_DESC; +def MIN_S_D : MIN_S_D_ENC, MIN_S_D_DESC; + +def MIN_U_B : MIN_U_B_ENC, MIN_U_B_DESC; +def MIN_U_H : MIN_U_H_ENC, MIN_U_H_DESC; +def MIN_U_W : MIN_U_W_ENC, MIN_U_W_DESC; +def MIN_U_D : MIN_U_D_ENC, MIN_U_D_DESC; + +def MINI_S_B : MINI_S_B_ENC, MINI_S_B_DESC; +def MINI_S_H : MINI_S_H_ENC, MINI_S_H_DESC; +def MINI_S_W : MINI_S_W_ENC, MINI_S_W_DESC; +def MINI_S_D : MINI_S_D_ENC, MINI_S_D_DESC; + +def MINI_U_B : MINI_U_B_ENC, MINI_U_B_DESC; +def MINI_U_H : MINI_U_H_ENC, MINI_U_H_DESC; +def MINI_U_W : MINI_U_W_ENC, MINI_U_W_DESC; +def MINI_U_D : MINI_U_D_ENC, MINI_U_D_DESC; + +def MOD_S_B : MOD_S_B_ENC, MOD_S_B_DESC; +def MOD_S_H : MOD_S_H_ENC, MOD_S_H_DESC; +def MOD_S_W : MOD_S_W_ENC, MOD_S_W_DESC; +def MOD_S_D : MOD_S_D_ENC, MOD_S_D_DESC; + +def MOD_U_B : MOD_U_B_ENC, MOD_U_B_DESC; +def MOD_U_H : MOD_U_H_ENC, MOD_U_H_DESC; +def MOD_U_W : MOD_U_W_ENC, MOD_U_W_DESC; +def MOD_U_D : MOD_U_D_ENC, MOD_U_D_DESC; + +def MOVE_V : MOVE_V_ENC, MOVE_V_DESC; + +def MSUB_Q_H : MSUB_Q_H_ENC, MSUB_Q_H_DESC; +def MSUB_Q_W : MSUB_Q_W_ENC, MSUB_Q_W_DESC; + +def MSUBR_Q_H : MSUBR_Q_H_ENC, MSUBR_Q_H_DESC; +def MSUBR_Q_W : MSUBR_Q_W_ENC, MSUBR_Q_W_DESC; + +def MSUBV_B : MSUBV_B_ENC, MSUBV_B_DESC; +def MSUBV_H : MSUBV_H_ENC, MSUBV_H_DESC; +def MSUBV_W : MSUBV_W_ENC, MSUBV_W_DESC; +def MSUBV_D : MSUBV_D_ENC, MSUBV_D_DESC; + +def MUL_Q_H : MUL_Q_H_ENC, MUL_Q_H_DESC; +def MUL_Q_W : MUL_Q_W_ENC, MUL_Q_W_DESC; + +def MULR_Q_H : MULR_Q_H_ENC, MULR_Q_H_DESC; +def MULR_Q_W : MULR_Q_W_ENC, MULR_Q_W_DESC; + +def MULV_B : MULV_B_ENC, MULV_B_DESC; +def MULV_H : MULV_H_ENC, MULV_H_DESC; +def MULV_W : MULV_W_ENC, MULV_W_DESC; +def MULV_D : MULV_D_ENC, MULV_D_DESC; + +def NLOC_B : NLOC_B_ENC, NLOC_B_DESC; +def NLOC_H : NLOC_H_ENC, NLOC_H_DESC; +def NLOC_W : NLOC_W_ENC, NLOC_W_DESC; +def NLOC_D : NLOC_D_ENC, NLOC_D_DESC; + +def NLZC_B : NLZC_B_ENC, NLZC_B_DESC; +def NLZC_H : NLZC_H_ENC, NLZC_H_DESC; +def NLZC_W : NLZC_W_ENC, NLZC_W_DESC; +def NLZC_D : NLZC_D_ENC, NLZC_D_DESC; + +def NOR_V : NOR_V_ENC, NOR_V_DESC; + +def NORI_B : NORI_B_ENC, NORI_B_DESC; + +def OR_V : OR_V_ENC, OR_V_DESC; + +def ORI_B : ORI_B_ENC, ORI_B_DESC; + +def PCKEV_B : PCKEV_B_ENC, PCKEV_B_DESC; +def PCKEV_H : PCKEV_H_ENC, PCKEV_H_DESC; +def PCKEV_W : PCKEV_W_ENC, PCKEV_W_DESC; +def PCKEV_D : PCKEV_D_ENC, PCKEV_D_DESC; + +def PCKOD_B : PCKOD_B_ENC, PCKOD_B_DESC; +def PCKOD_H : PCKOD_H_ENC, PCKOD_H_DESC; +def PCKOD_W : PCKOD_W_ENC, PCKOD_W_DESC; +def PCKOD_D : PCKOD_D_ENC, PCKOD_D_DESC; + +def PCNT_B : PCNT_B_ENC, PCNT_B_DESC; +def PCNT_H : PCNT_H_ENC, PCNT_H_DESC; +def PCNT_W : PCNT_W_ENC, PCNT_W_DESC; +def PCNT_D : PCNT_D_ENC, PCNT_D_DESC; + +def SAT_S_B : SAT_S_B_ENC, SAT_S_B_DESC; +def SAT_S_H : SAT_S_H_ENC, SAT_S_H_DESC; +def SAT_S_W : SAT_S_W_ENC, SAT_S_W_DESC; +def SAT_S_D : SAT_S_D_ENC, SAT_S_D_DESC; + +def SAT_U_B : SAT_U_B_ENC, SAT_U_B_DESC; +def SAT_U_H : SAT_U_H_ENC, SAT_U_H_DESC; +def SAT_U_W : SAT_U_W_ENC, SAT_U_W_DESC; +def SAT_U_D : SAT_U_D_ENC, SAT_U_D_DESC; + +def SHF_B : SHF_B_ENC, SHF_B_DESC; +def SHF_H : SHF_H_ENC, SHF_H_DESC; +def SHF_W : SHF_W_ENC, SHF_W_DESC; + +def SLD_B : SLD_B_ENC, SLD_B_DESC; +def SLD_H : SLD_H_ENC, SLD_H_DESC; +def SLD_W : SLD_W_ENC, SLD_W_DESC; +def SLD_D : SLD_D_ENC, SLD_D_DESC; + +def SLDI_B : SLDI_B_ENC, SLDI_B_DESC; +def SLDI_H : SLDI_H_ENC, SLDI_H_DESC; +def SLDI_W : SLDI_W_ENC, SLDI_W_DESC; +def SLDI_D : SLDI_D_ENC, SLDI_D_DESC; + +def SLL_B : SLL_B_ENC, SLL_B_DESC; +def SLL_H : SLL_H_ENC, SLL_H_DESC; +def SLL_W : SLL_W_ENC, SLL_W_DESC; +def SLL_D : SLL_D_ENC, SLL_D_DESC; + +def SLLI_B : SLLI_B_ENC, SLLI_B_DESC; +def SLLI_H : SLLI_H_ENC, SLLI_H_DESC; +def SLLI_W : SLLI_W_ENC, SLLI_W_DESC; +def SLLI_D : SLLI_D_ENC, SLLI_D_DESC; + +def SPLAT_B : SPLAT_B_ENC, SPLAT_B_DESC; +def SPLAT_H : SPLAT_H_ENC, SPLAT_H_DESC; +def SPLAT_W : SPLAT_W_ENC, SPLAT_W_DESC; +def SPLAT_D : SPLAT_D_ENC, SPLAT_D_DESC; + +def SPLATI_B : SPLATI_B_ENC, SPLATI_B_DESC; +def SPLATI_H : SPLATI_H_ENC, SPLATI_H_DESC; +def SPLATI_W : SPLATI_W_ENC, SPLATI_W_DESC; +def SPLATI_D : SPLATI_D_ENC, SPLATI_D_DESC; + +def SRA_B : SRA_B_ENC, SRA_B_DESC; +def SRA_H : SRA_H_ENC, SRA_H_DESC; +def SRA_W : SRA_W_ENC, SRA_W_DESC; +def SRA_D : SRA_D_ENC, SRA_D_DESC; + +def SRAI_B : SRAI_B_ENC, SRAI_B_DESC; +def SRAI_H : SRAI_H_ENC, SRAI_H_DESC; +def SRAI_W : SRAI_W_ENC, SRAI_W_DESC; +def SRAI_D : SRAI_D_ENC, SRAI_D_DESC; + +def SRAR_B : SRAR_B_ENC, SRAR_B_DESC; +def SRAR_H : SRAR_H_ENC, SRAR_H_DESC; +def SRAR_W : SRAR_W_ENC, SRAR_W_DESC; +def SRAR_D : SRAR_D_ENC, SRAR_D_DESC; + +def SRARI_B : SRARI_B_ENC, SRARI_B_DESC; +def SRARI_H : SRARI_H_ENC, SRARI_H_DESC; +def SRARI_W : SRARI_W_ENC, SRARI_W_DESC; +def SRARI_D : SRARI_D_ENC, SRARI_D_DESC; + +def SRL_B : SRL_B_ENC, SRL_B_DESC; +def SRL_H : SRL_H_ENC, SRL_H_DESC; +def SRL_W : SRL_W_ENC, SRL_W_DESC; +def SRL_D : SRL_D_ENC, SRL_D_DESC; + +def SRLI_B : SRLI_B_ENC, SRLI_B_DESC; +def SRLI_H : SRLI_H_ENC, SRLI_H_DESC; +def SRLI_W : SRLI_W_ENC, SRLI_W_DESC; +def SRLI_D : SRLI_D_ENC, SRLI_D_DESC; + +def SRLR_B : SRLR_B_ENC, SRLR_B_DESC; +def SRLR_H : SRLR_H_ENC, SRLR_H_DESC; +def SRLR_W : SRLR_W_ENC, SRLR_W_DESC; +def SRLR_D : SRLR_D_ENC, SRLR_D_DESC; + +def SRLRI_B : SRLRI_B_ENC, SRLRI_B_DESC; +def SRLRI_H : SRLRI_H_ENC, SRLRI_H_DESC; +def SRLRI_W : SRLRI_W_ENC, SRLRI_W_DESC; +def SRLRI_D : SRLRI_D_ENC, SRLRI_D_DESC; + +def ST_B: ST_B_ENC, ST_B_DESC; +def ST_H: ST_H_ENC, ST_H_DESC; +def ST_W: ST_W_ENC, ST_W_DESC; +def ST_D: ST_D_ENC, ST_D_DESC; + +def STX_B: STX_B_ENC, STX_B_DESC; +def STX_H: STX_H_ENC, STX_H_DESC; +def STX_W: STX_W_ENC, STX_W_DESC; +def STX_D: STX_D_ENC, STX_D_DESC; + +def SUBS_S_B : SUBS_S_B_ENC, SUBS_S_B_DESC; +def SUBS_S_H : SUBS_S_H_ENC, SUBS_S_H_DESC; +def SUBS_S_W : SUBS_S_W_ENC, SUBS_S_W_DESC; +def SUBS_S_D : SUBS_S_D_ENC, SUBS_S_D_DESC; + +def SUBS_U_B : SUBS_U_B_ENC, SUBS_U_B_DESC; +def SUBS_U_H : SUBS_U_H_ENC, SUBS_U_H_DESC; +def SUBS_U_W : SUBS_U_W_ENC, SUBS_U_W_DESC; +def SUBS_U_D : SUBS_U_D_ENC, SUBS_U_D_DESC; + +def SUBSUS_U_B : SUBSUS_U_B_ENC, SUBSUS_U_B_DESC; +def SUBSUS_U_H : SUBSUS_U_H_ENC, SUBSUS_U_H_DESC; +def SUBSUS_U_W : SUBSUS_U_W_ENC, SUBSUS_U_W_DESC; +def SUBSUS_U_D : SUBSUS_U_D_ENC, SUBSUS_U_D_DESC; + +def SUBSUU_S_B : SUBSUU_S_B_ENC, SUBSUU_S_B_DESC; +def SUBSUU_S_H : SUBSUU_S_H_ENC, SUBSUU_S_H_DESC; +def SUBSUU_S_W : SUBSUU_S_W_ENC, SUBSUU_S_W_DESC; +def SUBSUU_S_D : SUBSUU_S_D_ENC, SUBSUU_S_D_DESC; + +def SUBV_B : SUBV_B_ENC, SUBV_B_DESC; +def SUBV_H : SUBV_H_ENC, SUBV_H_DESC; +def SUBV_W : SUBV_W_ENC, SUBV_W_DESC; +def SUBV_D : SUBV_D_ENC, SUBV_D_DESC; + +def SUBVI_B : SUBVI_B_ENC, SUBVI_B_DESC; +def SUBVI_H : SUBVI_H_ENC, SUBVI_H_DESC; +def SUBVI_W : SUBVI_W_ENC, SUBVI_W_DESC; +def SUBVI_D : SUBVI_D_ENC, SUBVI_D_DESC; + +def VSHF_B : VSHF_B_ENC, VSHF_B_DESC; +def VSHF_H : VSHF_H_ENC, VSHF_H_DESC; +def VSHF_W : VSHF_W_ENC, VSHF_W_DESC; +def VSHF_D : VSHF_D_ENC, VSHF_D_DESC; + +def XOR_V : XOR_V_ENC, XOR_V_DESC; + +def XORI_B : XORI_B_ENC, XORI_B_DESC; // Patterns. class MSAPat pred = [HasMSA]> : -- cgit v1.1 From a86062c4b12fc2b9ee339ca8c54bcef3447f713c Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Fri, 6 Sep 2013 13:25:06 +0000 Subject: [mips][msa] Indentation git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190156 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 68 ++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 34 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 97fecc4..96fb5f8 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -987,13 +987,13 @@ class ADDS_U_D_DESC : MSA_3R_DESC_BASE<"adds_u.d", int_mips_adds_u_d, MSA128D>, IsCommutable; class ADDV_B_DESC : MSA_3R_DESC_BASE<"addv.b", int_mips_addv_b, MSA128B>, - IsCommutable; + IsCommutable; class ADDV_H_DESC : MSA_3R_DESC_BASE<"addv.h", int_mips_addv_h, MSA128H>, - IsCommutable; + IsCommutable; class ADDV_W_DESC : MSA_3R_DESC_BASE<"addv.w", int_mips_addv_w, MSA128W>, - IsCommutable; + IsCommutable; class ADDV_D_DESC : MSA_3R_DESC_BASE<"addv.d", int_mips_addv_d, MSA128D>, - IsCommutable; + IsCommutable; class ADDVI_B_DESC : MSA_I5_DESC_BASE<"addvi.b", int_mips_addvi_b, MSA128B>; class ADDVI_H_DESC : MSA_I5_DESC_BASE<"addvi.h", int_mips_addvi_h, MSA128H>; @@ -1135,13 +1135,13 @@ class BZ_D_DESC : MSA_CBRANCH_DESC_BASE<"bz.d", MSA128D>; class BZ_V_DESC : MSA_CBRANCH_DESC_BASE<"bz.v", MSA128B>; class CEQ_B_DESC : MSA_3R_DESC_BASE<"ceq.b", int_mips_ceq_b, MSA128B>, - IsCommutable; + IsCommutable; class CEQ_H_DESC : MSA_3R_DESC_BASE<"ceq.h", int_mips_ceq_h, MSA128H>, - IsCommutable; + IsCommutable; class CEQ_W_DESC : MSA_3R_DESC_BASE<"ceq.w", int_mips_ceq_w, MSA128W>, - IsCommutable; + IsCommutable; class CEQ_D_DESC : MSA_3R_DESC_BASE<"ceq.d", int_mips_ceq_d, MSA128D>, - IsCommutable; + IsCommutable; class CEQI_B_DESC : MSA_SI5_DESC_BASE<"ceqi.b", int_mips_ceqi_b, MSA128B>; class CEQI_H_DESC : MSA_SI5_DESC_BASE<"ceqi.h", int_mips_ceqi_h, MSA128H>; @@ -1264,23 +1264,23 @@ class DOTP_U_D_DESC : MSA_3R_DESC_BASE<"dotp_u.d", int_mips_dotp_u_d, MSA128D>, class DPADD_S_H_DESC : MSA_3R_4R_DESC_BASE<"dpadd_s.h", int_mips_dpadd_s_h, MSA128H, MSA128B, MSA128B>, - IsCommutable; + IsCommutable; class DPADD_S_W_DESC : MSA_3R_4R_DESC_BASE<"dpadd_s.w", int_mips_dpadd_s_w, MSA128W, MSA128H, MSA128H>, - IsCommutable; + IsCommutable; class DPADD_S_D_DESC : MSA_3R_4R_DESC_BASE<"dpadd_s.d", int_mips_dpadd_s_d, MSA128D, MSA128W, MSA128W>, - IsCommutable; + IsCommutable; class DPADD_U_H_DESC : MSA_3R_4R_DESC_BASE<"dpadd_u.h", int_mips_dpadd_u_h, MSA128H, MSA128B, MSA128B>, - IsCommutable; + IsCommutable; class DPADD_U_W_DESC : MSA_3R_4R_DESC_BASE<"dpadd_u.w", int_mips_dpadd_u_w, MSA128W, MSA128H, MSA128H>, - IsCommutable; + IsCommutable; class DPADD_U_D_DESC : MSA_3R_4R_DESC_BASE<"dpadd_u.d", int_mips_dpadd_u_d, MSA128D, MSA128W, MSA128W>, - IsCommutable; + IsCommutable; class DPSUB_S_H_DESC : MSA_3R_4R_DESC_BASE<"dpsub_s.h", int_mips_dpsub_s_h, MSA128H, MSA128B, MSA128B>; @@ -1297,19 +1297,19 @@ class DPSUB_U_D_DESC : MSA_3R_4R_DESC_BASE<"dpsub_u.d", int_mips_dpsub_u_d, MSA128D, MSA128W, MSA128W>; class FADD_W_DESC : MSA_3RF_DESC_BASE<"fadd.w", int_mips_fadd_w, MSA128W>, - IsCommutable; + IsCommutable; class FADD_D_DESC : MSA_3RF_DESC_BASE<"fadd.d", int_mips_fadd_d, MSA128D>, - IsCommutable; + IsCommutable; class FCAF_W_DESC : MSA_3RF_DESC_BASE<"fcaf.w", int_mips_fcaf_w, MSA128W>, - IsCommutable; + IsCommutable; class FCAF_D_DESC : MSA_3RF_DESC_BASE<"fcaf.d", int_mips_fcaf_d, MSA128D>, - IsCommutable; + IsCommutable; class FCEQ_W_DESC : MSA_3RF_DESC_BASE<"fceq.w", int_mips_fceq_w, MSA128W>, - IsCommutable; + IsCommutable; class FCEQ_D_DESC : MSA_3RF_DESC_BASE<"fceq.d", int_mips_fceq_d, MSA128D>, - IsCommutable; + IsCommutable; class FCLASS_W_DESC : MSA_2RF_DESC_BASE<"fclass.w", int_mips_fclass_w, MSA128W>; @@ -1323,39 +1323,39 @@ class FCLT_W_DESC : MSA_3RF_DESC_BASE<"fclt.w", int_mips_fclt_w, MSA128W>; class FCLT_D_DESC : MSA_3RF_DESC_BASE<"fclt.d", int_mips_fclt_d, MSA128D>; class FCNE_W_DESC : MSA_3RF_DESC_BASE<"fcne.w", int_mips_fcne_w, MSA128W>, - IsCommutable; + IsCommutable; class FCNE_D_DESC : MSA_3RF_DESC_BASE<"fcne.d", int_mips_fcne_d, MSA128D>, - IsCommutable; + IsCommutable; class FCOR_W_DESC : MSA_3RF_DESC_BASE<"fcor.w", int_mips_fcor_w, MSA128W>, - IsCommutable; + IsCommutable; class FCOR_D_DESC : MSA_3RF_DESC_BASE<"fcor.d", int_mips_fcor_d, MSA128D>, - IsCommutable; + IsCommutable; class FCUEQ_W_DESC : MSA_3RF_DESC_BASE<"fcueq.w", int_mips_fcueq_w, MSA128W>, - IsCommutable; + IsCommutable; class FCUEQ_D_DESC : MSA_3RF_DESC_BASE<"fcueq.d", int_mips_fcueq_d, MSA128D>, - IsCommutable; + IsCommutable; class FCULE_W_DESC : MSA_3RF_DESC_BASE<"fcule.w", int_mips_fcule_w, MSA128W>, - IsCommutable; + IsCommutable; class FCULE_D_DESC : MSA_3RF_DESC_BASE<"fcule.d", int_mips_fcule_d, MSA128D>, - IsCommutable; + IsCommutable; class FCULT_W_DESC : MSA_3RF_DESC_BASE<"fcult.w", int_mips_fcult_w, MSA128W>, - IsCommutable; + IsCommutable; class FCULT_D_DESC : MSA_3RF_DESC_BASE<"fcult.d", int_mips_fcult_d, MSA128D>, - IsCommutable; + IsCommutable; class FCUN_W_DESC : MSA_3RF_DESC_BASE<"fcun.w", int_mips_fcun_w, MSA128W>, - IsCommutable; + IsCommutable; class FCUN_D_DESC : MSA_3RF_DESC_BASE<"fcun.d", int_mips_fcun_d, MSA128D>, - IsCommutable; + IsCommutable; class FCUNE_W_DESC : MSA_3RF_DESC_BASE<"fcune.w", int_mips_fcune_w, MSA128W>, - IsCommutable; + IsCommutable; class FCUNE_D_DESC : MSA_3RF_DESC_BASE<"fcune.d", int_mips_fcune_d, MSA128D>, - IsCommutable; + IsCommutable; class FDIV_W_DESC : MSA_3RF_DESC_BASE<"fdiv.w", int_mips_fdiv_w, MSA128W>; class FDIV_D_DESC : MSA_3RF_DESC_BASE<"fdiv.d", int_mips_fdiv_d, MSA128D>; -- cgit v1.1 From 546dcc5ddc6d3c1fee50f10bedfea239a721f0e3 Mon Sep 17 00:00:00 2001 From: Aaron Watry Date: Fri, 6 Sep 2013 20:17:42 +0000 Subject: R600: Add support for LDS atomic subtract Signed-off-by: Aaron Watry Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190200 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUInstructions.td | 5 +++++ lib/Target/R600/R600Instructions.td | 4 ++++ lib/Target/R600/SIInstructions.td | 4 ++++ 3 files changed, 13 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 6745fed..e30abc0 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -196,6 +196,11 @@ def atomic_load_add_local : PatFrag<(ops node:$ptr, node:$value), return dyn_cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; }]>; +def atomic_load_sub_local : PatFrag<(ops node:$ptr, node:$value), + (atomic_load_sub node:$ptr, node:$value), [{ + return dyn_cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; +}]>; + def mskor_global : PatFrag<(ops node:$val, node:$ptr), (AMDGPUstore_mskor node:$val, node:$ptr), [{ return dyn_cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index efa4751..24bc6b0 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1673,6 +1673,7 @@ class R600_LDS_1A2D lds_op, string name, list pattern> : } def LDS_ADD : R600_LDS_1A1D_NORET <0x0, "LDS_ADD", [] >; +def LDS_SUB : R600_LDS_1A1D_NORET <0x1, "LDS_SUB", [] >; def LDS_WRITE : R600_LDS_1A1D_NORET <0xD, "LDS_WRITE", [(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)] >; @@ -1685,6 +1686,9 @@ def LDS_SHORT_WRITE : R600_LDS_1A1D_NORET<0x13, "LDS_SHORT_WRITE", def LDS_ADD_RET : R600_LDS_1A1D_RET <0x20, "LDS_ADD", [(set i32:$dst, (atomic_load_add_local i32:$src0, i32:$src1))] >; +def LDS_SUB_RET : R600_LDS_1A1D_RET <0x21, "LDS_SUB", + [(set i32:$dst, (atomic_load_sub_local i32:$src0, i32:$src1))] +>; def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET", [(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))] >; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 31a5ad2..14a189a 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -392,6 +392,7 @@ defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">; } // End isCompare = 1 def DS_ADD_U32_RTN : DS_1A1D_RET <0x20, "DS_ADD_U32_RTN", VReg_32>; +def DS_SUB_U32_RTN : DS_1A1D_RET <0x21, "DS_SUB_U32_RTN", VReg_32>; def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>; def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>; def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>; @@ -1779,6 +1780,9 @@ def : DSWritePat ; def : Pat <(atomic_load_add_local i32:$ptr, i32:$val), (DS_ADD_U32_RTN 0, $ptr, $val, 0, 0)>; +def : Pat <(atomic_load_sub_local i32:$ptr, i32:$val), + (DS_SUB_U32_RTN 0, $ptr, $val, 0, 0)>; + /********** ================== **********/ /********** SMRD Patterns **********/ /********** ================== **********/ -- cgit v1.1 From 77e1ebd18fc558620b97fe38f3ebbf825533655f Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Fri, 6 Sep 2013 23:28:24 +0000 Subject: [mips] Set instruction itineraries of loads, stores and conditional moves. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190219 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips64InstrInfo.td | 16 ++++++++-------- lib/Target/Mips/MipsCondMov.td | 30 +++++++++++++++--------------- lib/Target/Mips/MipsInstrInfo.td | 22 ++++++++++++---------- 3 files changed, 35 insertions(+), 33 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index baf9c1b..9fb787c 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -135,16 +135,16 @@ def SD : Store<"sd", GPR64Opnd, store, IIStore>, LW_FM<0x3f>; /// load/store left/right let isCodeGenOnly = 1 in { -def LWL64 : LoadLeftRight<"lwl", MipsLWL, GPR64Opnd>, LW_FM<0x22>; -def LWR64 : LoadLeftRight<"lwr", MipsLWR, GPR64Opnd>, LW_FM<0x26>; -def SWL64 : StoreLeftRight<"swl", MipsSWL, GPR64Opnd>, LW_FM<0x2a>; -def SWR64 : StoreLeftRight<"swr", MipsSWR, GPR64Opnd>, LW_FM<0x2e>; +def LWL64 : LoadLeftRight<"lwl", MipsLWL, GPR64Opnd, IILoad>, LW_FM<0x22>; +def LWR64 : LoadLeftRight<"lwr", MipsLWR, GPR64Opnd, IILoad>, LW_FM<0x26>; +def SWL64 : StoreLeftRight<"swl", MipsSWL, GPR64Opnd, IIStore>, LW_FM<0x2a>; +def SWR64 : StoreLeftRight<"swr", MipsSWR, GPR64Opnd, IIStore>, LW_FM<0x2e>; } -def LDL : LoadLeftRight<"ldl", MipsLDL, GPR64Opnd>, LW_FM<0x1a>; -def LDR : LoadLeftRight<"ldr", MipsLDR, GPR64Opnd>, LW_FM<0x1b>; -def SDL : StoreLeftRight<"sdl", MipsSDL, GPR64Opnd>, LW_FM<0x2c>; -def SDR : StoreLeftRight<"sdr", MipsSDR, GPR64Opnd>, LW_FM<0x2d>; +def LDL : LoadLeftRight<"ldl", MipsLDL, GPR64Opnd, IILoad>, LW_FM<0x1a>; +def LDR : LoadLeftRight<"ldr", MipsLDR, GPR64Opnd, IILoad>, LW_FM<0x1b>; +def SDL : StoreLeftRight<"sdl", MipsSDL, GPR64Opnd, IIStore>, LW_FM<0x2c>; +def SDR : StoreLeftRight<"sdr", MipsSDR, GPR64Opnd, IIStore>, LW_FM<0x2d>; /// Load-linked, Store-conditional def LLD : LLBase<"lld", GPR64Opnd>, LW_FM<0x34>; diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td index 924739e..2de1430 100644 --- a/lib/Target/Mips/MipsCondMov.td +++ b/lib/Target/Mips/MipsCondMov.td @@ -103,28 +103,28 @@ multiclass MovnPats, +def MOVZ_I_I : MMRel, CMov_I_I_FT<"movz", GPR32Opnd, GPR32Opnd, IIArith>, ADD_FM<0, 0xa>; let Predicates = [HasStdEnc], isCodeGenOnly = 1 in { - def MOVZ_I_I64 : CMov_I_I_FT<"movz", GPR32Opnd, GPR64Opnd, - NoItinerary>, ADD_FM<0, 0xa>; - def MOVZ_I64_I : CMov_I_I_FT<"movz", GPR64Opnd, GPR32Opnd, - NoItinerary>, ADD_FM<0, 0xa>; - def MOVZ_I64_I64 : CMov_I_I_FT<"movz", GPR64Opnd, GPR64Opnd, - NoItinerary>, ADD_FM<0, 0xa>; + def MOVZ_I_I64 : CMov_I_I_FT<"movz", GPR32Opnd, GPR64Opnd, IIArith>, + ADD_FM<0, 0xa>; + def MOVZ_I64_I : CMov_I_I_FT<"movz", GPR64Opnd, GPR32Opnd, IIArith>, + ADD_FM<0, 0xa>; + def MOVZ_I64_I64 : CMov_I_I_FT<"movz", GPR64Opnd, GPR64Opnd, IIArith>, + ADD_FM<0, 0xa>; } -def MOVN_I_I : MMRel, CMov_I_I_FT<"movn", GPR32Opnd, GPR32Opnd, - NoItinerary>, ADD_FM<0, 0xb>; +def MOVN_I_I : MMRel, CMov_I_I_FT<"movn", GPR32Opnd, GPR32Opnd, IIArith>, + ADD_FM<0, 0xb>; let Predicates = [HasStdEnc], isCodeGenOnly = 1 in { - def MOVN_I_I64 : CMov_I_I_FT<"movn", GPR32Opnd, GPR64Opnd, - NoItinerary>, ADD_FM<0, 0xb>; - def MOVN_I64_I : CMov_I_I_FT<"movn", GPR64Opnd, GPR32Opnd, - NoItinerary>, ADD_FM<0, 0xb>; - def MOVN_I64_I64 : CMov_I_I_FT<"movn", GPR64Opnd, GPR64Opnd, - NoItinerary>, ADD_FM<0, 0xb>; + def MOVN_I_I64 : CMov_I_I_FT<"movn", GPR32Opnd, GPR64Opnd, IIArith>, + ADD_FM<0, 0xb>; + def MOVN_I64_I : CMov_I_I_FT<"movn", GPR64Opnd, GPR32Opnd, IIArith>, + ADD_FM<0, 0xb>; + def MOVN_I64_I64 : CMov_I_I_FT<"movn", GPR64Opnd, GPR64Opnd, IIArith>, + ADD_FM<0, 0xb>; } def MOVZ_I_S : CMov_I_F_FT<"movz.s", GPR32Opnd, FGR32Opnd, IIFmove>, diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index d7396e3..8b985a8 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -461,7 +461,7 @@ class FMem op, dag outs, dag ins, string asmstr, list pattern, class Load : InstSE<(outs RO:$rt), (ins mem:$addr), !strconcat(opstr, "\t$rt, $addr"), - [(set RO:$rt, (OpNode Addr:$addr))], NoItinerary, FrmI, opstr> { + [(set RO:$rt, (OpNode Addr:$addr))], Itin, FrmI, opstr> { let DecoderMethod = "DecodeMem"; let canFoldAsLoad = 1; let mayLoad = 1; @@ -470,24 +470,26 @@ class Load : InstSE<(outs), (ins RO:$rt, mem:$addr), !strconcat(opstr, "\t$rt, $addr"), - [(OpNode RO:$rt, Addr:$addr)], NoItinerary, FrmI, opstr> { + [(OpNode RO:$rt, Addr:$addr)], Itin, FrmI, opstr> { let DecoderMethod = "DecodeMem"; let mayStore = 1; } // Load/Store Left/Right let canFoldAsLoad = 1 in -class LoadLeftRight : +class LoadLeftRight : InstSE<(outs RO:$rt), (ins mem:$addr, RO:$src), !strconcat(opstr, "\t$rt, $addr"), - [(set RO:$rt, (OpNode addr:$addr, RO:$src))], NoItinerary, FrmI> { + [(set RO:$rt, (OpNode addr:$addr, RO:$src))], Itin, FrmI> { let DecoderMethod = "DecodeMem"; string Constraints = "$src = $rt"; } -class StoreLeftRight : +class StoreLeftRight : InstSE<(outs), (ins RO:$rt, mem:$addr), !strconcat(opstr, "\t$rt, $addr"), - [(OpNode RO:$rt, addr:$addr)], NoItinerary, FrmI> { + [(OpNode RO:$rt, addr:$addr)], Itin, FrmI> { let DecoderMethod = "DecodeMem"; } @@ -925,10 +927,10 @@ def SH : Store<"sh", GPR32Opnd, truncstorei16, IIStore>, MMRel, LW_FM<0x29>; def SW : Store<"sw", GPR32Opnd, store, IIStore>, MMRel, LW_FM<0x2b>; /// load/store left/right -def LWL : LoadLeftRight<"lwl", MipsLWL, GPR32Opnd>, LW_FM<0x22>; -def LWR : LoadLeftRight<"lwr", MipsLWR, GPR32Opnd>, LW_FM<0x26>; -def SWL : StoreLeftRight<"swl", MipsSWL, GPR32Opnd>, LW_FM<0x2a>; -def SWR : StoreLeftRight<"swr", MipsSWR, GPR32Opnd>, LW_FM<0x2e>; +def LWL : LoadLeftRight<"lwl", MipsLWL, GPR32Opnd, IILoad>, LW_FM<0x22>; +def LWR : LoadLeftRight<"lwr", MipsLWR, GPR32Opnd, IILoad>, LW_FM<0x26>; +def SWL : StoreLeftRight<"swl", MipsSWL, GPR32Opnd, IIStore>, LW_FM<0x2a>; +def SWR : StoreLeftRight<"swr", MipsSWR, GPR32Opnd, IIStore>, LW_FM<0x2e>; def SYNC : SYNC_FT, SYNC_FM; def TEQ : TEQ_FT<"teq", GPR32Opnd>, TEQ_FM<0x34>; -- cgit v1.1 From 1d04ca7987ef0abb5be07b11e3bb9c9e756a1fce Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Fri, 6 Sep 2013 23:40:15 +0000 Subject: [mips] Make "b" (unconditional branch) a pseudo. "b" is an assembly idiom, which is equivalent to "beq $zero, $zero, offset". git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190220 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp | 5 ++++- lib/Target/Mips/MipsCodeEmitter.cpp | 4 ++++ lib/Target/Mips/MipsInstrFormats.td | 11 ----------- lib/Target/Mips/MipsInstrInfo.td | 9 +++++---- 4 files changed, 13 insertions(+), 16 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp index 755a948..231d485 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp @@ -230,8 +230,11 @@ bool MipsInstPrinter::printAlias(const char *Str, const MCInst &MI, bool MipsInstPrinter::printAlias(const MCInst &MI, raw_ostream &OS) { switch (MI.getOpcode()) { case Mips::BEQ: + // beq $zero, $zero, $L2 => b $L2 // beq $r0, $zero, $L2 => beqz $r0, $L2 - return isReg(MI, 1) && printAlias("beqz", MI, 0, 2, OS); + return isReg(MI, 0) && isReg(MI, 1) && + printAlias("b", MI, 2, OS) || + isReg(MI, 1) && printAlias("beqz", MI, 0, 2, OS); case Mips::BEQ64: // beq $r0, $zero, $L2 => beqz $r0, $L2 return isReg(MI, 1) && printAlias("beqz", MI, 0, 2, OS); diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp index eed1155..0f1917a 100644 --- a/lib/Target/Mips/MipsCodeEmitter.cpp +++ b/lib/Target/Mips/MipsCodeEmitter.cpp @@ -323,6 +323,10 @@ bool MipsCodeEmitter::expandPseudos(MachineBasicBlock::instr_iterator &MI, BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::SLL), Mips::ZERO) .addReg(Mips::ZERO).addImm(0); break; + case Mips::B: + BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::BEQ)).addReg(Mips::ZERO) + .addReg(Mips::ZERO).addOperand(MI->getOperand(0)); + break; case Mips::JALRPseudo: BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::JALR), Mips::RA) .addReg(MI->getOperand(0).getReg()); diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index f87d70e..c536264 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -297,17 +297,6 @@ class BGEZ_FM op, bits<5> funct> { let Inst{15-0} = offset; } -class B_FM { - bits<16> offset; - - bits<32> Inst; - - let Inst{31-26} = 4; - let Inst{25-21} = 0; - let Inst{20-16} = 0; - let Inst{15-0} = offset; -} - class SLTI_FM op> : StdArch { bits<5> rt; bits<5> rs; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 8b985a8..83c7885 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -542,9 +542,9 @@ class JumpFJ : - InstSE<(outs), (ins brtarget:$offset), !strconcat(opstr, "\t$offset"), - [(br bb:$offset)], IIBranch, FrmI> { +class UncondBranch : + PseudoSE<(outs), (ins brtarget:$offset), [(br bb:$offset)], IIBranch>, + PseudoInstExpansion<(BEQInst ZERO, ZERO, brtarget:$offset)> { let isBranch = 1; let isTerminator = 1; let isBarrier = 1; @@ -966,13 +966,13 @@ def SC : SCBase<"sc", GPR32Opnd>, LW_FM<0x38>; def J : JumpFJ, FJ<2>, Requires<[RelocStatic, HasStdEnc]>, IsBranch; def JR : IndirectBranch, MTLO_FM<8>; -def B : UncondBranch<"b">, B_FM; def BEQ : CBranch<"beq", seteq, GPR32Opnd>, BEQ_FM<4>; def BNE : CBranch<"bne", setne, GPR32Opnd>, BEQ_FM<5>; def BGEZ : CBranchZero<"bgez", setge, GPR32Opnd>, BGEZ_FM<1, 1>; def BGTZ : CBranchZero<"bgtz", setgt, GPR32Opnd>, BGEZ_FM<7, 0>; def BLEZ : CBranchZero<"blez", setle, GPR32Opnd>, BGEZ_FM<6, 0>; def BLTZ : CBranchZero<"bltz", setlt, GPR32Opnd>, BGEZ_FM<1, 0>; +def B : UncondBranch; def JAL : JumpLink<"jal">, FJ<3>; def JALR : JumpLinkReg<"jalr", GPR32Opnd>, JALR_FM; @@ -1100,6 +1100,7 @@ def : InstAlias<"mfc0 $rt, $rd", (MFC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>; def : InstAlias<"mtc0 $rt, $rd", (MTC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>; def : InstAlias<"mfc2 $rt, $rd", (MFC2 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>; def : InstAlias<"mtc2 $rt, $rd", (MTC2 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>; +def : InstAlias<"b $offset", (BEQ ZERO, ZERO, brtarget:$offset), 0>; def : InstAlias<"bnez $rs,$offset", (BNE GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>; def : InstAlias<"beqz $rs,$offset", -- cgit v1.1 From cd3c1b9af9c79bd128a4811570269022a8183408 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Fri, 6 Sep 2013 23:42:58 +0000 Subject: [mips] Delete unused classes and defs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190221 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsInstrInfo.td | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 83c7885..9fb2a75 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -248,7 +248,7 @@ def brtarget : Operand { def calltarget : Operand { let EncoderMethod = "getJumpTargetOpValue"; } -def calltarget64: Operand; + def simm16 : Operand { let DecoderMethod= "DecodeSimm16"; } @@ -449,14 +449,6 @@ class LoadUpper: let isReMaterializable = 1; } -class FMem op, dag outs, dag ins, string asmstr, list pattern, - InstrItinClass itin>: FFI { - bits<21> addr; - let Inst{25-21} = addr{20-16}; - let Inst{15-0} = addr{15-0}; - let DecoderMethod = "DecodeMem"; -} - // Memory Load/Store class Load : -- cgit v1.1 From 997c5dead83fc237280888696e1fa719563fc7f1 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Fri, 6 Sep 2013 23:52:46 +0000 Subject: [mips] Define "trap" as a pseudo instruction that turns into "break 0, 0". git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190224 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsCodeEmitter.cpp | 4 ++++ lib/Target/Mips/MipsInstrInfo.td | 10 +++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp index 0f1917a..b50edf1 100644 --- a/lib/Target/Mips/MipsCodeEmitter.cpp +++ b/lib/Target/Mips/MipsCodeEmitter.cpp @@ -327,6 +327,10 @@ bool MipsCodeEmitter::expandPseudos(MachineBasicBlock::instr_iterator &MI, BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::BEQ)).addReg(Mips::ZERO) .addReg(Mips::ZERO).addOperand(MI->getOperand(0)); break; + case Mips::TRAP: + BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::BREAK)).addImm(0) + .addImm(0); + break; case Mips::JALRPseudo: BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::JALR), Mips::RA) .addReg(MI->getOperand(0).getReg()); diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 9fb2a75..65dfaed 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -783,9 +783,12 @@ class MFC3OP : InstSE<(outs RO:$rt, RO:$rd, uimm16:$sel), (ins), !strconcat(asmstr, "\t$rt, $rd, $sel"), [], NoItinerary, FrmFR>; -let isBarrier = 1, isTerminator = 1, isCodeGenOnly = 1 in -def TRAP : InstSE<(outs), (ins), "break", [(trap)], NoItinerary, FrmOther> { - let Inst = 0x0000000d; +class TrapBase + : PseudoSE<(outs), (ins), [(trap)], NoItinerary>, + PseudoInstExpansion<(RealInst 0, 0)> { + let isBarrier = 1; + let isTerminator = 1; + let isCodeGenOnly = 1; } //===----------------------------------------------------------------------===// @@ -941,6 +944,7 @@ def TNEI : TEQI_FT<"tnei", GPR32Opnd>, TEQI_FM<0xe>; def BREAK : BRK_FT<"break">, BRK_FM<0xd>; def SYSCALL : SYS_FT<"syscall">, SYS_FM<0xc>; +def TRAP : TrapBase; def ERET : ER_FT<"eret">, ER_FM<0x18>; def DERET : ER_FT<"deret">, ER_FM<0x1f>; -- cgit v1.1 From 69f8e0935af16622ca13d26e6a66464d3c1f3da4 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Sat, 7 Sep 2013 00:02:02 +0000 Subject: [mips] Use uimm5 and uimm6 instead of shamt and imm, if the immediate has to fit into a 5-bit or 6-bit field. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190226 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MicroMipsInstrInfo.td | 8 ++++---- lib/Target/Mips/Mips16InstrInfo.td | 2 +- lib/Target/Mips/Mips64InstrInfo.td | 34 ++++++++++++++-------------------- lib/Target/Mips/MipsDSPInstrInfo.td | 4 ++-- lib/Target/Mips/MipsInstrInfo.td | 35 ++++++++++++++++++++--------------- lib/Target/Mips/MipsMSAInstrInfo.td | 4 ---- 6 files changed, 41 insertions(+), 46 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td index b0dc2e8..eaf2f31 100644 --- a/lib/Target/Mips/MicroMipsInstrInfo.td +++ b/lib/Target/Mips/MicroMipsInstrInfo.td @@ -71,11 +71,11 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { MULT_FM_MM<0x26c>; /// Shift Instructions - def SLL_MM : MMRel, shift_rotate_imm<"sll", shamt, GPR32Opnd>, + def SLL_MM : MMRel, shift_rotate_imm<"sll", uimm5, GPR32Opnd>, SRA_FM_MM<0, 0>; - def SRL_MM : MMRel, shift_rotate_imm<"srl", shamt, GPR32Opnd>, + def SRL_MM : MMRel, shift_rotate_imm<"srl", uimm5, GPR32Opnd>, SRA_FM_MM<0x40, 0>; - def SRA_MM : MMRel, shift_rotate_imm<"sra", shamt, GPR32Opnd>, + def SRA_MM : MMRel, shift_rotate_imm<"sra", uimm5, GPR32Opnd>, SRA_FM_MM<0x80, 0>; def SLLV_MM : MMRel, shift_rotate_reg<"sllv", GPR32Opnd>, SRLV_FM_MM<0x10, 0>; @@ -83,7 +83,7 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { SRLV_FM_MM<0x50, 0>; def SRAV_MM : MMRel, shift_rotate_reg<"srav", GPR32Opnd>, SRLV_FM_MM<0x90, 0>; - def ROTR_MM : MMRel, shift_rotate_imm<"rotr", shamt, GPR32Opnd>, + def ROTR_MM : MMRel, shift_rotate_imm<"rotr", uimm5, GPR32Opnd>, SRA_FM_MM<0xc0, 0>; def ROTRV_MM : MMRel, shift_rotate_reg<"rotrv", GPR32Opnd>, SRLV_FM_MM<0xd0, 0>; diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index eb7d957..dd0cea6 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -220,7 +220,7 @@ class FEXT_RRI_A16_mem_ins op, string asmstr, Operand MemOpnd, // EXT-SHIFT instruction format // class FEXT_SHIFT16_ins _f, string asmstr, InstrItinClass itin>: - FEXT_SHIFT16<_f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry, shamt:$sa), + FEXT_SHIFT16<_f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry, uimm5:$sa), !strconcat(asmstr, "\t$rx, $ry, $sa"), [], itin>; // diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 9fb787c..f4e1074 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -15,9 +15,6 @@ // Mips Operand, Complex Patterns and Transformations Definitions. //===----------------------------------------------------------------------===// -// Instruction operand types -def shamt_64 : Operand; - // Unsigned Operand def uimm16_64 : Operand { let PrintMethod = "printUnsignedImm"; @@ -95,22 +92,22 @@ def NOR64 : LogicNOR<"nor", GPR64Opnd>, ADD_FM<0, 0x27>; } /// Shift Instructions -def DSLL : shift_rotate_imm<"dsll", shamt, GPR64Opnd, shl, immZExt6>, +def DSLL : shift_rotate_imm<"dsll", uimm6, GPR64Opnd, shl, immZExt6>, SRA_FM<0x38, 0>; -def DSRL : shift_rotate_imm<"dsrl", shamt, GPR64Opnd, srl, immZExt6>, +def DSRL : shift_rotate_imm<"dsrl", uimm6, GPR64Opnd, srl, immZExt6>, SRA_FM<0x3a, 0>; -def DSRA : shift_rotate_imm<"dsra", shamt, GPR64Opnd, sra, immZExt6>, +def DSRA : shift_rotate_imm<"dsra", uimm6, GPR64Opnd, sra, immZExt6>, SRA_FM<0x3b, 0>; def DSLLV : shift_rotate_reg<"dsllv", GPR64Opnd, shl>, SRLV_FM<0x14, 0>; def DSRLV : shift_rotate_reg<"dsrlv", GPR64Opnd, srl>, SRLV_FM<0x16, 0>; def DSRAV : shift_rotate_reg<"dsrav", GPR64Opnd, sra>, SRLV_FM<0x17, 0>; -def DSLL32 : shift_rotate_imm<"dsll32", shamt, GPR64Opnd>, SRA_FM<0x3c, 0>; -def DSRL32 : shift_rotate_imm<"dsrl32", shamt, GPR64Opnd>, SRA_FM<0x3e, 0>; -def DSRA32 : shift_rotate_imm<"dsra32", shamt, GPR64Opnd>, SRA_FM<0x3f, 0>; +def DSLL32 : shift_rotate_imm<"dsll32", uimm5, GPR64Opnd>, SRA_FM<0x3c, 0>; +def DSRL32 : shift_rotate_imm<"dsrl32", uimm5, GPR64Opnd>, SRA_FM<0x3e, 0>; +def DSRA32 : shift_rotate_imm<"dsra32", uimm5, GPR64Opnd>, SRA_FM<0x3f, 0>; // Rotate Instructions let Predicates = [HasMips64r2, HasStdEnc] in { - def DROTR : shift_rotate_imm<"drotr", shamt, GPR64Opnd, rotr, immZExt6>, + def DROTR : shift_rotate_imm<"drotr", uimm6, GPR64Opnd, rotr, immZExt6>, SRA_FM<0x3a, 1>; def DROTRV : shift_rotate_reg<"drotrv", GPR64Opnd, rotr>, SRLV_FM<0x16, 1>; @@ -204,16 +201,13 @@ def LEA_ADDiu64 : EffectiveAddress<"daddiu", GPR64Opnd>, LW_FM<0x19>; let isCodeGenOnly = 1 in def RDHWR64 : ReadHardware, RDHWR_FM; -def DEXT : ExtBase<"dext", GPR64Opnd>, EXT_FM<3>; -let Pattern = [] in { - def DEXTU : ExtBase<"dextu", GPR64Opnd>, EXT_FM<2>; - def DEXTM : ExtBase<"dextm", GPR64Opnd>, EXT_FM<1>; -} -def DINS : InsBase<"dins", GPR64Opnd>, EXT_FM<7>; -let Pattern = [] in { - def DINSU : InsBase<"dinsu", GPR64Opnd>, EXT_FM<6>; - def DINSM : InsBase<"dinsm", GPR64Opnd>, EXT_FM<5>; -} +def DEXT : ExtBase<"dext", GPR64Opnd, uimm6, MipsExt>, EXT_FM<3>; +def DEXTU : ExtBase<"dextu", GPR64Opnd, uimm6>, EXT_FM<2>; +def DEXTM : ExtBase<"dextm", GPR64Opnd, uimm5>, EXT_FM<1>; + +def DINS : InsBase<"dins", GPR64Opnd, uimm6, MipsIns>, EXT_FM<7>; +def DINSU : InsBase<"dinsu", GPR64Opnd, uimm6>, EXT_FM<6>; +def DINSM : InsBase<"dinsm", GPR64Opnd, uimm5>, EXT_FM<5>; let isCodeGenOnly = 1, rs = 0, shamt = 0 in { def DSLL64_32 : FR<0x00, 0x3c, (outs GPR64:$rd), (ins GPR32:$rt), diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td index b3e01b1..50212e1 100644 --- a/lib/Target/Mips/MipsDSPInstrInfo.td +++ b/lib/Target/Mips/MipsDSPInstrInfo.td @@ -299,7 +299,7 @@ class PRECR_SRA_PH_W_DESC_BASE { dag OutOperandList = (outs ROT:$rt); - dag InOperandList = (ins ROS:$rs, shamt:$sa, ROS:$src); + dag InOperandList = (ins ROS:$rs, uimm5:$sa, ROS:$src); string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa"); list Pattern = [(set ROT:$rt, (OpNode ROS:$src, ROS:$rs, immZExt5:$sa))]; InstrItinClass Itinerary = itin; @@ -368,7 +368,7 @@ class ADDUH_QB_DESC_BASE { dag OutOperandList = (outs GPR32Opnd:$rt); - dag InOperandList = (ins GPR32Opnd:$rs, shamt:$sa, GPR32Opnd:$src); + dag InOperandList = (ins GPR32Opnd:$rs, uimm5:$sa, GPR32Opnd:$src); string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa"); list Pattern = [(set GPR32Opnd:$rt, (OpNode GPR32Opnd:$src, GPR32Opnd:$rs, ImmOp:$sa))]; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 65dfaed..14d1665 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -263,13 +263,16 @@ def uimm10 : Operand { } def simm16_64 : Operand; -def shamt : Operand; // Unsigned Operand def uimm5 : Operand { let PrintMethod = "printUnsignedImm"; } +def uimm6 : Operand { + let PrintMethod = "printUnsignedImm"; +} + def uimm16 : Operand { let PrintMethod = "printUnsignedImm"; } @@ -737,18 +740,20 @@ class ReadHardware : IIArith, FrmR>; // Ext and Ins -class ExtBase: - InstSE<(outs RO:$rt), (ins RO:$rs, uimm16:$pos, size_ext:$size), +class ExtBase: + InstSE<(outs RO:$rt), (ins RO:$rs, PosOpnd:$pos, size_ext:$size), !strconcat(opstr, " $rt, $rs, $pos, $size"), - [(set RO:$rt, (MipsExt RO:$rs, imm:$pos, imm:$size))], NoItinerary, + [(set RO:$rt, (Op RO:$rs, imm:$pos, imm:$size))], NoItinerary, FrmR> { let Predicates = [HasMips32r2, HasStdEnc]; } -class InsBase: - InstSE<(outs RO:$rt), (ins RO:$rs, uimm16:$pos, size_ins:$size, RO:$src), +class InsBase: + InstSE<(outs RO:$rt), (ins RO:$rs, PosOpnd:$pos, size_ins:$size, RO:$src), !strconcat(opstr, " $rt, $rs, $pos, $size"), - [(set RO:$rt, (MipsIns RO:$rs, imm:$pos, imm:$size, RO:$src))], + [(set RO:$rt, (Op RO:$rs, imm:$pos, imm:$size, RO:$src))], NoItinerary, FrmR> { let Predicates = [HasMips32r2, HasStdEnc]; let Constraints = "$src = $rt"; @@ -888,11 +893,11 @@ def XOR : MMRel, ArithLogicR<"xor", GPR32Opnd, 1, IILogic, xor>, def NOR : MMRel, LogicNOR<"nor", GPR32Opnd>, ADD_FM<0, 0x27>; /// Shift Instructions -def SLL : MMRel, shift_rotate_imm<"sll", shamt, GPR32Opnd, shl, immZExt5>, +def SLL : MMRel, shift_rotate_imm<"sll", uimm5, GPR32Opnd, shl, immZExt5>, SRA_FM<0, 0>; -def SRL : MMRel, shift_rotate_imm<"srl", shamt, GPR32Opnd, srl, immZExt5>, +def SRL : MMRel, shift_rotate_imm<"srl", uimm5, GPR32Opnd, srl, immZExt5>, SRA_FM<2, 0>; -def SRA : MMRel, shift_rotate_imm<"sra", shamt, GPR32Opnd, sra, immZExt5>, +def SRA : MMRel, shift_rotate_imm<"sra", uimm5, GPR32Opnd, sra, immZExt5>, SRA_FM<3, 0>; def SLLV : MMRel, shift_rotate_reg<"sllv", GPR32Opnd, shl>, SRLV_FM<4, 0>; def SRLV : MMRel, shift_rotate_reg<"srlv", GPR32Opnd, srl>, SRLV_FM<6, 0>; @@ -900,7 +905,7 @@ def SRAV : MMRel, shift_rotate_reg<"srav", GPR32Opnd, sra>, SRLV_FM<7, 0>; // Rotate Instructions let Predicates = [HasMips32r2, HasStdEnc] in { - def ROTR : MMRel, shift_rotate_imm<"rotr", shamt, GPR32Opnd, rotr, + def ROTR : MMRel, shift_rotate_imm<"rotr", uimm5, GPR32Opnd, rotr, immZExt5>, SRA_FM<2, 1>; def ROTRV : MMRel, shift_rotate_reg<"rotrv", GPR32Opnd, rotr>, @@ -1053,8 +1058,8 @@ def PseudoMSUBU : MAddSubPseudo; def RDHWR : ReadHardware, RDHWR_FM; -def EXT : ExtBase<"ext", GPR32Opnd>, EXT_FM<0>; -def INS : InsBase<"ins", GPR32Opnd>, EXT_FM<4>; +def EXT : ExtBase<"ext", GPR32Opnd, uimm5, MipsExt>, EXT_FM<0>; +def INS : InsBase<"ins", GPR32Opnd, uimm5, MipsIns>, EXT_FM<4>; /// Move Control Registers From/To CPU Registers def MFC0 : MFC3OP<"mfc0", GPR32Opnd>, MFC3OP_FM<0x10, 0>; @@ -1121,7 +1126,7 @@ def : InstAlias<"tne $rs, $rt", (TNE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>; class LoadImm32< string instr_asm, Operand Od, RegisterOperand RO> : MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32), !strconcat(instr_asm, "\t$rt, $imm32")> ; -def LoadImm32Reg : LoadImm32<"li", shamt,GPR32Opnd>; +def LoadImm32Reg : LoadImm32<"li", uimm5, GPR32Opnd>; class LoadAddress : MipsAsmPseudoInst<(outs RO:$rt), (ins MemOpnd:$addr), @@ -1131,7 +1136,7 @@ def LoadAddr32Reg : LoadAddress<"la", mem, GPR32Opnd>; class LoadAddressImm : MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32), !strconcat(instr_asm, "\t$rt, $imm32")> ; -def LoadAddr32Imm : LoadAddressImm<"la", shamt,GPR32Opnd>; +def LoadAddr32Imm : LoadAddressImm<"la", uimm5, GPR32Opnd>; diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 96fb5f8..abac272 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -29,10 +29,6 @@ def uimm4 : Operand { let PrintMethod = "printUnsignedImm"; } -def uimm6 : Operand { - let PrintMethod = "printUnsignedImm"; -} - def uimm8 : Operand { let PrintMethod = "printUnsignedImm"; } -- cgit v1.1 From 1abf0afdd4d8e9d58518a878f30b9eede81303cc Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Sat, 7 Sep 2013 00:18:01 +0000 Subject: [mips] Add definition of instruction "drotr32" (double rotate right plus 32). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190232 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp | 4 ++++ lib/Target/Mips/Mips64InstrInfo.td | 1 + 2 files changed, 5 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 02e93b8..3dfe428 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -155,6 +155,9 @@ static void LowerLargeShift(MCInst& Inst) { case Mips::DSRA: Inst.setOpcode(Mips::DSRA32); return; + case Mips::DROTR: + Inst.setOpcode(Mips::DROTR32); + return; } } @@ -206,6 +209,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case Mips::DSLL: case Mips::DSRL: case Mips::DSRA: + case Mips::DROTR: LowerLargeShift(TmpInst); break; // Double extract instruction is chosen by pos and size operands diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index f4e1074..b6a4f72 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -111,6 +111,7 @@ let Predicates = [HasMips64r2, HasStdEnc] in { SRA_FM<0x3a, 1>; def DROTRV : shift_rotate_reg<"drotrv", GPR64Opnd, rotr>, SRLV_FM<0x16, 1>; + def DROTR32 : shift_rotate_imm<"drotr32", uimm5, GPR64Opnd>, SRA_FM<0x3e, 1>; } /// Load and Store Instructions -- cgit v1.1 From d65d2fde4eadcb40e80b361e4cf244c02dcc670b Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Sat, 7 Sep 2013 00:26:26 +0000 Subject: [mips] Place parentheses around && to silence warning. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190234 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp index 231d485..26321c9 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp @@ -232,9 +232,9 @@ bool MipsInstPrinter::printAlias(const MCInst &MI, raw_ostream &OS) { case Mips::BEQ: // beq $zero, $zero, $L2 => b $L2 // beq $r0, $zero, $L2 => beqz $r0, $L2 - return isReg(MI, 0) && isReg(MI, 1) && - printAlias("b", MI, 2, OS) || - isReg(MI, 1) && printAlias("beqz", MI, 0, 2, OS); + return (isReg(MI, 0) && isReg(MI, 1) && + printAlias("b", MI, 2, OS)) || + (isReg(MI, 1) && printAlias("beqz", MI, 0, 2, OS)); case Mips::BEQ64: // beq $r0, $zero, $L2 => beqz $r0, $L2 return isReg(MI, 1) && printAlias("beqz", MI, 0, 2, OS); -- cgit v1.1 From 3e6758541bb8c143f1f8d3ff550eba3dcc8d22e0 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Sat, 7 Sep 2013 00:52:30 +0000 Subject: [mips] Enhance command line option "-mno-ldc1-sdc1" to expand base+index double precision loads and stores as well as reg+imm double precision loads and stores. Previously, expansion of loads and stores was done after register allocation, but now it takes place during legalization. As a result, users will see double precision stores and loads being emitted to spill and restore 64-bit FP registers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190235 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.h | 5 ++- lib/Target/Mips/MipsInstrFPU.td | 12 ++---- lib/Target/Mips/MipsSEISelLowering.cpp | 74 ++++++++++++++++++++++++++++++++++ lib/Target/Mips/MipsSEISelLowering.h | 3 ++ lib/Target/Mips/MipsSEInstrInfo.cpp | 61 ---------------------------- lib/Target/Mips/MipsSEInstrInfo.h | 3 -- 6 files changed, 84 insertions(+), 74 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 4cc5a6a..85aa162 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -329,6 +329,9 @@ namespace llvm { SmallVector ByValArgs; }; protected: + SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const; + // Subtarget Info const MipsSubtarget *Subtarget; @@ -366,8 +369,6 @@ namespace llvm { SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const; SDValue lowerShiftRightParts(SDValue Op, SelectionDAG& DAG, bool IsSRA) const; - SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerADD(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index 536dff6..7aa080f 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -368,12 +368,8 @@ let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in { } let Predicates = [NotFP64bit, HasStdEnc] in { - let isPseudo = 1, isCodeGenOnly = 1 in { - def PseudoLDC1 : LW_FT<"", AFGR64Opnd, IIFLoad, load>; - def PseudoSDC1 : SW_FT<"", AFGR64Opnd, IIFStore, store>; - } - def LDC1 : LW_FT<"ldc1", AFGR64Opnd, IIFLoad>, LW_FM<0x35>; - def SDC1 : SW_FT<"sdc1", AFGR64Opnd, IIFStore>, LW_FM<0x3d>; + def LDC1 : LW_FT<"ldc1", AFGR64Opnd, IIFLoad, load>, LW_FM<0x35>; + def SDC1 : SW_FT<"sdc1", AFGR64Opnd, IIFStore, store>, LW_FM<0x3d>; } // Indexed loads and stores. @@ -595,7 +591,7 @@ let AddedComplexity = 40 in { } let Predicates = [NotFP64bit, HasStdEnc] in { - def : LoadRegImmPat; - def : StoreRegImmPat; + def : LoadRegImmPat; + def : StoreRegImmPat; } } diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index fae294c..f32e146 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -25,6 +25,11 @@ static cl::opt EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden, cl::desc("MIPS: Enable tail calls."), cl::init(false)); +static cl::opt NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), + cl::desc("Expand double precision loads and " + "stores to their single precision " + "counterparts")); + MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) : MipsTargetLowering(TM) { // Set up the register classes @@ -129,6 +134,11 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); + if (NoDPLoadStore) { + setOperationAction(ISD::LOAD, MVT::f64, Custom); + setOperationAction(ISD::STORE, MVT::f64, Custom); + } + computeRegisterProperties(); } @@ -168,6 +178,8 @@ MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { SDValue MipsSETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch(Op.getOpcode()) { + case ISD::LOAD: return lowerLOAD(Op, DAG); + case ISD::STORE: return lowerSTORE(Op, DAG); case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG); case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG); case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG); @@ -611,6 +623,68 @@ getOpndList(SmallVectorImpl &Ops, InternalLinkage, CLI, Callee, Chain); } +SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { + LoadSDNode &Nd = *cast(Op); + + if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) + return MipsTargetLowering::lowerLOAD(Op, DAG); + + // Replace a double precision load with two i32 loads and a buildpair64. + SDLoc DL(Op); + SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); + EVT PtrVT = Ptr.getValueType(); + + // i32 load from lower address. + SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, Ptr, + MachinePointerInfo(), Nd.isVolatile(), + Nd.isNonTemporal(), Nd.isInvariant(), + Nd.getAlignment()); + + // i32 load from higher address. + Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT)); + SDValue Hi = DAG.getLoad(MVT::i32, DL, Lo.getValue(1), Ptr, + MachinePointerInfo(), Nd.isVolatile(), + Nd.isNonTemporal(), Nd.isInvariant(), + Nd.getAlignment()); + + if (!Subtarget->isLittle()) + std::swap(Lo, Hi); + + SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi); + SDValue Ops[2] = {BP, Hi.getValue(1)}; + return DAG.getMergeValues(Ops, 2, DL); +} + +SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { + StoreSDNode &Nd = *cast(Op); + + if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) + return MipsTargetLowering::lowerSTORE(Op, DAG); + + // Replace a double precision store with two extractelement64s and i32 stores. + SDLoc DL(Op); + SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); + EVT PtrVT = Ptr.getValueType(); + SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, + Val, DAG.getConstant(0, MVT::i32)); + SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, + Val, DAG.getConstant(1, MVT::i32)); + + if (!Subtarget->isLittle()) + std::swap(Lo, Hi); + + // i32 store to lower address. + Chain = DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(), + Nd.isVolatile(), Nd.isNonTemporal(), Nd.getAlignment(), + Nd.getTBAAInfo()); + + // i32 store to higher address. + Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT)); + return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(), + Nd.isVolatile(), Nd.isNonTemporal(), Nd.getAlignment(), + Nd.getTBAAInfo()); +} + SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, bool HasLo, bool HasHi, SelectionDAG &DAG) const { diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h index d1a18e1..dde0c23 100644 --- a/lib/Target/Mips/MipsSEISelLowering.h +++ b/lib/Target/Mips/MipsSEISelLowering.h @@ -58,6 +58,9 @@ namespace llvm { bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const; + SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerMulDiv(SDValue Op, unsigned NewOpc, bool HasLo, bool HasHi, SelectionDAG &DAG) const; diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index 374837e..9c31254 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -24,11 +24,6 @@ using namespace llvm; -static cl::opt NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), - cl::desc("Expand double precision loads and " - "stores to their single precision " - "counterparts.")); - MipsSEInstrInfo::MipsSEInstrInfo(MipsTargetMachine &tm) : MipsInstrInfo(tm, tm.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J), @@ -294,12 +289,6 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { case Mips::ExtractElementF64_64: expandExtractElementF64(MBB, MI, true); break; - case Mips::PseudoLDC1: - expandDPLoadStore(MBB, MI, Mips::LDC1, Mips::LWC1); - break; - case Mips::PseudoSDC1: - expandDPLoadStore(MBB, MI, Mips::SDC1, Mips::SWC1); - break; case Mips::MIPSeh_return32: case Mips::MIPSeh_return64: expandEhReturn(MBB, MI); @@ -484,56 +473,6 @@ void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB, .addReg(HiReg); } -/// Add 4 to the displacement of operand MO. -static void fixDisp(MachineOperand &MO) { - switch (MO.getType()) { - default: - llvm_unreachable("Unhandled operand type."); - case MachineOperand::MO_Immediate: - MO.setImm(MO.getImm() + 4); - break; - case MachineOperand::MO_GlobalAddress: - case MachineOperand::MO_ConstantPoolIndex: - case MachineOperand::MO_BlockAddress: - case MachineOperand::MO_TargetIndex: - case MachineOperand::MO_ExternalSymbol: - MO.setOffset(MO.getOffset() + 4); - break; - } -} - -void MipsSEInstrInfo::expandDPLoadStore(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned OpcD, unsigned OpcS) const { - // If NoDPLoadStore is false, just change the opcode. - if (!NoDPLoadStore) { - genInstrWithNewOpc(OpcD, I); - return; - } - - // Expand a double precision FP load or store to two single precision - // instructions. - - const TargetRegisterInfo &TRI = getRegisterInfo(); - const MachineOperand &ValReg = I->getOperand(0); - unsigned LoReg = TRI.getSubReg(ValReg.getReg(), Mips::sub_lo); - unsigned HiReg = TRI.getSubReg(ValReg.getReg(), Mips::sub_hi); - - if (!TM.getSubtarget().isLittle()) - std::swap(LoReg, HiReg); - - // Create an instruction which loads from or stores to the lower memory - // address. - MachineInstrBuilder MIB = genInstrWithNewOpc(OpcS, I); - MIB->getOperand(0).setReg(LoReg); - - // Create an instruction which loads from or stores to the higher memory - // address. - MIB = genInstrWithNewOpc(OpcS, I); - MIB->getOperand(0).setReg(HiReg); - fixDisp(MIB->getOperand(2)); -} - void MipsSEInstrInfo::expandEhReturn(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { // This pseudo instruction is generated as part of the lowering of diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h index 6b4f89a..a2dfd95 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.h +++ b/lib/Target/Mips/MipsSEInstrInfo.h @@ -104,9 +104,6 @@ private: MachineBasicBlock::iterator I, bool FP64) const; void expandBuildPairF64(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, bool FP64) const; - void expandDPLoadStore(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, unsigned OpcD, - unsigned OpcS) const; void expandEhReturn(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; }; -- cgit v1.1 From 89bf2e163c72ec8668905cc61da5b20856f61070 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 8 Sep 2013 00:47:31 +0000 Subject: Using popcount should check the popcount feature flag not the SSE41 feature flag. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190258 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86TargetTransformInfo.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 5f81d33..935a6da 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -127,8 +127,8 @@ X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); // TODO: Currently the __builtin_popcount() implementation using SSE3 // instructions is inefficient. Once the problem is fixed, we should - // call ST->hasSSE3() instead of ST->hasSSE4(). - return ST->hasSSE41() ? PSK_FastHardware : PSK_Software; + // call ST->hasSSE3() instead of ST->hasPOPCNT(). + return ST->hasPOPCNT() ? PSK_FastHardware : PSK_Software; } unsigned X86TTI::getNumberOfRegisters(bool Vector) const { -- cgit v1.1 From 704e8d41fec124ce97060e1b72f531c812412d2d Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 8 Sep 2013 00:50:45 +0000 Subject: Add neverHasSideEffects=1 on a couple move instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190259 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.td | 2 +- lib/Target/X86/X86InstrSSE.td | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 9aa75ff..3123cbc 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1216,7 +1216,7 @@ def MOV8rr_NOREX : I<0x88, MRMDestReg, (outs GR8_NOREX:$dst), (ins GR8_NOREX:$src), "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], IIC_MOV>, Sched<[WriteMove]>; -let mayStore = 1 in +let mayStore = 1, neverHasSideEffects = 1 in def MOV8mr_NOREX : I<0x88, MRMDestMem, (outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src), "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index b1cfbee..fd525f6 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3643,7 +3643,7 @@ def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), XS, Requires<[UseSSE2]>; } -let mayStore = 1, SchedRW = [WriteStore] in { +let mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in { def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/], -- cgit v1.1 From 959cd8f49bb85c8dfe971eb5a8a648ff41ca8ebd Mon Sep 17 00:00:00 2001 From: Jiangning Liu Date: Mon, 9 Sep 2013 02:20:27 +0000 Subject: Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions, SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190288 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64InstrFormats.td | 20 + lib/Target/AArch64/AArch64InstrNEON.td | 677 +++++++++++++++++++++++++++++- 2 files changed, 696 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index 020ee6c..dd35367 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -990,6 +990,26 @@ class NeonI_3VSame size, bits<5> opcode, // Inherit Rd in 4-0 } +// Format AdvSIMD 3 vector registers with different vector type +class NeonI_3VDiff size, bits<4> opcode, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdnm +{ + let Inst{31} = 0b0; + let Inst{30} = q; + let Inst{29} = u; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21} = 0b1; + // Inherit Rm in 20-16 + let Inst{15-12} = opcode; + let Inst{11} = 0b0; + let Inst{10} = 0b0; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + // Format AdvSIMD 1 vector register with modified immediate class NeonI_1VModImm, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; def Neon_sqrshlImm : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>; -def Neon_uqrshlImm : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>; +def Neon_uqrshlImm : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>; //===----------------------------------------------------------------------===// @@ -2143,6 +2143,681 @@ defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs", defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu", int_arm_neon_vcvtfp2fxu>; +multiclass Neon_sshll2_0 +{ + def _v8i8 : PatFrag<(ops node:$Rn), + (v8i16 (ext (v8i8 (Neon_top16B node:$Rn))))>; + def _v4i16 : PatFrag<(ops node:$Rn), + (v4i32 (ext (v4i16 (Neon_top8H node:$Rn))))>; + def _v2i32 : PatFrag<(ops node:$Rn), + (v2i64 (ext (v2i32 (Neon_top4S node:$Rn))))>; +} + +defm NI_sext_high : Neon_sshll2_0; +defm NI_zext_high : Neon_sshll2_0; + +// The followings are for instruction class (3V Diff) + +// normal long/long2 pattern +class NeonI_3VDL size, bits<4> opcode, + string asmop, string ResS, string OpS, + SDPatternOperator opnode, SDPatternOperator ext, + RegisterClass OpVPR, + ValueType ResTy, ValueType OpTy> + : NeonI_3VDiff; + +multiclass NeonI_3VDL_s opcode, + string asmop, SDPatternOperator opnode, + bit Commutable = 0> +{ + let isCommutable = Commutable in { + def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b", + opnode, sext, VPR64, v8i16, v8i8>; + def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h", + opnode, sext, VPR64, v4i32, v4i16>; + def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s", + opnode, sext, VPR64, v2i64, v2i32>; + } +} + +multiclass NeonI_3VDL2_s opcode, + string asmop, SDPatternOperator opnode, + bit Commutable = 0> +{ + let isCommutable = Commutable in { + def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b", + opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>; + def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h", + opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>; + def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s", + opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>; + } +} + +multiclass NeonI_3VDL_u opcode, + string asmop, SDPatternOperator opnode, + bit Commutable = 0> +{ + let isCommutable = Commutable in { + def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b", + opnode, zext, VPR64, v8i16, v8i8>; + def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h", + opnode, zext, VPR64, v4i32, v4i16>; + def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s", + opnode, zext, VPR64, v2i64, v2i32>; + } +} + +multiclass NeonI_3VDL2_u opcode, + string asmop, SDPatternOperator opnode, + bit Commutable = 0> +{ + let isCommutable = Commutable in { + def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b", + opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>; + def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h", + opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>; + def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s", + opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>; + } +} + +defm SADDLvvv : NeonI_3VDL_s<0b0, 0b0000, "saddl", add, 1>; +defm UADDLvvv : NeonI_3VDL_u<0b1, 0b0000, "uaddl", add, 1>; + +defm SADDL2vvv : NeonI_3VDL2_s<0b0, 0b0000, "saddl2", add, 1>; +defm UADDL2vvv : NeonI_3VDL2_u<0b1, 0b0000, "uaddl2", add, 1>; + +defm SSUBLvvv : NeonI_3VDL_s<0b0, 0b0010, "ssubl", sub, 0>; +defm USUBLvvv : NeonI_3VDL_u<0b1, 0b0010, "usubl", sub, 0>; + +defm SSUBL2vvv : NeonI_3VDL2_s<0b0, 0b0010, "ssubl2", sub, 0>; +defm USUBL2vvv : NeonI_3VDL2_u<0b1, 0b0010, "usubl2", sub, 0>; + +// normal wide/wide2 pattern +class NeonI_3VDW size, bits<4> opcode, + string asmop, string ResS, string OpS, + SDPatternOperator opnode, SDPatternOperator ext, + RegisterClass OpVPR, + ValueType ResTy, ValueType OpTy> + : NeonI_3VDiff; + +multiclass NeonI_3VDW_s opcode, + string asmop, SDPatternOperator opnode> +{ + def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b", + opnode, sext, VPR64, v8i16, v8i8>; + def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h", + opnode, sext, VPR64, v4i32, v4i16>; + def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s", + opnode, sext, VPR64, v2i64, v2i32>; +} + +defm SADDWvvv : NeonI_3VDW_s<0b0, 0b0001, "saddw", add>; +defm SSUBWvvv : NeonI_3VDW_s<0b0, 0b0011, "ssubw", sub>; + +multiclass NeonI_3VDW2_s opcode, + string asmop, SDPatternOperator opnode> +{ + def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b", + opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>; + def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h", + opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>; + def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s", + opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>; +} + +defm SADDW2vvv : NeonI_3VDW2_s<0b0, 0b0001, "saddw2", add>; +defm SSUBW2vvv : NeonI_3VDW2_s<0b0, 0b0011, "ssubw2", sub>; + +multiclass NeonI_3VDW_u opcode, + string asmop, SDPatternOperator opnode> +{ + def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b", + opnode, zext, VPR64, v8i16, v8i8>; + def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h", + opnode, zext, VPR64, v4i32, v4i16>; + def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s", + opnode, zext, VPR64, v2i64, v2i32>; +} + +defm UADDWvvv : NeonI_3VDW_u<0b1, 0b0001, "uaddw", add>; +defm USUBWvvv : NeonI_3VDW_u<0b1, 0b0011, "usubw", sub>; + +multiclass NeonI_3VDW2_u opcode, + string asmop, SDPatternOperator opnode> +{ + def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b", + opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>; + def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h", + opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>; + def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s", + opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>; +} + +defm UADDW2vvv : NeonI_3VDW2_u<0b1, 0b0001, "uaddw2", add>; +defm USUBW2vvv : NeonI_3VDW2_u<0b1, 0b0011, "usubw2", sub>; + +// Get the high half part of the vector element. +multiclass NeonI_get_high +{ + def _8h : PatFrag<(ops node:$Rn), + (v8i8 (trunc (v8i16 (srl (v8i16 node:$Rn), + (v8i16 (Neon_dupImm 8))))))>; + def _4s : PatFrag<(ops node:$Rn), + (v4i16 (trunc (v4i32 (srl (v4i32 node:$Rn), + (v4i32 (Neon_dupImm 16))))))>; + def _2d : PatFrag<(ops node:$Rn), + (v2i32 (trunc (v2i64 (srl (v2i64 node:$Rn), + (v2i64 (Neon_dupImm 32))))))>; +} + +defm NI_get_hi : NeonI_get_high; + +// pattern for addhn/subhn with 2 operands +class NeonI_3VDN_addhn_2Op size, bits<4> opcode, + string asmop, string ResS, string OpS, + SDPatternOperator opnode, SDPatternOperator get_hi, + ValueType ResTy, ValueType OpTy> + : NeonI_3VDiff; + +multiclass NeonI_3VDN_addhn_2Op opcode, + string asmop, SDPatternOperator opnode, + bit Commutable = 0> +{ + let isCommutable = Commutable in { + def _8b8h : NeonI_3VDN_addhn_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h", + opnode, NI_get_hi_8h, v8i8, v8i16>; + def _4h4s : NeonI_3VDN_addhn_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s", + opnode, NI_get_hi_4s, v4i16, v4i32>; + def _2s2d : NeonI_3VDN_addhn_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d", + opnode, NI_get_hi_2d, v2i32, v2i64>; + } +} + +defm ADDHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0100, "addhn", add, 1>; +defm SUBHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>; + +// pattern for operation with 2 operands +class NeonI_3VD_2Op size, bits<4> opcode, + string asmop, string ResS, string OpS, + SDPatternOperator opnode, + RegisterClass ResVPR, RegisterClass OpVPR, + ValueType ResTy, ValueType OpTy> + : NeonI_3VDiff; + +// normal narrow pattern +multiclass NeonI_3VDN_2Op opcode, + string asmop, SDPatternOperator opnode, + bit Commutable = 0> +{ + let isCommutable = Commutable in { + def _8b8h : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h", + opnode, VPR64, VPR128, v8i8, v8i16>; + def _4h4s : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s", + opnode, VPR64, VPR128, v4i16, v4i32>; + def _2s2d : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d", + opnode, VPR64, VPR128, v2i32, v2i64>; + } +} + +defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>; +defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>; + +// pattern for acle intrinsic with 3 operands +class NeonI_3VDN_addhn2_3Op size, bits<4> opcode, + string asmop, string ResS, string OpS, + SDPatternOperator opnode, SDPatternOperator get_hi, + ValueType OpTy, ValueType OpSTy> + : NeonI_3VDiff { + let Constraints = "$src = $Rd"; +} + +multiclass NeonI_3VDN_addhn2_3Op_v1 opcode, + string asmop, + SDPatternOperator opnode> +{ + def _16b8h : NeonI_3VDN_addhn2_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h", + opnode, NI_get_hi_8h, v8i16, v8i8>; + def _8h4s : NeonI_3VDN_addhn2_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s", + opnode, NI_get_hi_4s, v4i32, v4i16>; + def _4s2d : NeonI_3VDN_addhn2_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d", + opnode, NI_get_hi_2d, v2i64, v2i32>; +} + +defm ADDHN2vvv : NeonI_3VDN_addhn2_3Op_v1<0b0, 0b0100, "addhn2", add>; +defm SUBHN2vvv : NeonI_3VDN_addhn2_3Op_v1<0b0, 0b0110, "subhn2", sub>; + +// pattern for acle intrinsic with 3 operands +class NeonI_3VDN_3Op size, bits<4> opcode, + string asmop, string ResS, string OpS, + SDPatternOperator opnode, + ValueType OpTy, ValueType OpSTy> + : NeonI_3VDiff { + let Constraints = "$src = $Rd"; +} + +multiclass NeonI_3VDN_3Op_v1 opcode, + string asmop, + SDPatternOperator opnode> +{ + def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h", + opnode, v8i16, v8i8>; + def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s", + opnode, v4i32, v4i16>; + def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d", + opnode, v2i64, v2i32>; +} + +defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2", + int_arm_neon_vraddhn>; +defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2", + int_arm_neon_vrsubhn>; + +// pattern that need to extend result +class NeonI_3VDL_Ext size, bits<4> opcode, + string asmop, string ResS, string OpS, + SDPatternOperator opnode, + RegisterClass OpVPR, + ValueType ResTy, ValueType OpTy, ValueType OpSTy> + : NeonI_3VDiff; + +multiclass NeonI_3VDL_zext opcode, + string asmop, SDPatternOperator opnode, + bit Commutable = 0> +{ + let isCommutable = Commutable in { + def _8h8b : NeonI_3VDL_Ext<0b0, u, 0b00, opcode, asmop, "8h", "8b", + opnode, VPR64, v8i16, v8i8, v8i8>; + def _4s4h : NeonI_3VDL_Ext<0b0, u, 0b01, opcode, asmop, "4s", "4h", + opnode, VPR64, v4i32, v4i16, v4i16>; + def _2d2s : NeonI_3VDL_Ext<0b0, u, 0b10, opcode, asmop, "2d", "2s", + opnode, VPR64, v2i64, v2i32, v2i32>; + } +} + +defm SABDLvvv : NeonI_3VDL_zext<0b0, 0b0111, "sabdl", int_arm_neon_vabds, 1>; +defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>; + +multiclass NeonI_Op_High +{ + def _16B : PatFrag<(ops node:$Rn, node:$Rm), + (op (Neon_top16B node:$Rn), (Neon_top16B node:$Rm))>; + def _8H : PatFrag<(ops node:$Rn, node:$Rm), + (op (Neon_top8H node:$Rn), (Neon_top8H node:$Rm))>; + def _4S : PatFrag<(ops node:$Rn, node:$Rm), + (op (Neon_top4S node:$Rn), (Neon_top4S node:$Rm))>; +} + +defm NI_sabdl_hi : NeonI_Op_High; +defm NI_uabdl_hi : NeonI_Op_High; +defm NI_smull_hi : NeonI_Op_High; +defm NI_umull_hi : NeonI_Op_High; +defm NI_qdmull_hi : NeonI_Op_High; +defm NI_pmull_hi : NeonI_Op_High; + +multiclass NeonI_3VDL_Abd_u opcode, + string asmop, string opnode, + bit Commutable = 0> +{ + let isCommutable = Commutable in { + def _8h8b : NeonI_3VDL_Ext<0b1, u, 0b00, opcode, asmop, "8h", "16b", + !cast(opnode # "_16B"), + VPR128, v8i16, v16i8, v8i8>; + def _4s4h : NeonI_3VDL_Ext<0b1, u, 0b01, opcode, asmop, "4s", "8h", + !cast(opnode # "_8H"), + VPR128, v4i32, v8i16, v4i16>; + def _2d2s : NeonI_3VDL_Ext<0b1, u, 0b10, opcode, asmop, "2d", "4s", + !cast(opnode # "_4S"), + VPR128, v2i64, v4i32, v2i32>; + } +} + +defm SABDL2vvv : NeonI_3VDL_Abd_u<0b0, 0b0111, "sabdl2", "NI_sabdl_hi", 1>; +defm UABDL2vvv : NeonI_3VDL_Abd_u<0b1, 0b0111, "uabdl2", "NI_uabdl_hi", 1>; + +// For pattern that need two operators being chained. +class NeonI_3VDL_Aba size, bits<4> opcode, + string asmop, string ResS, string OpS, + SDPatternOperator opnode, SDPatternOperator subop, + RegisterClass OpVPR, + ValueType ResTy, ValueType OpTy, ValueType OpSTy> + : NeonI_3VDiff { + let Constraints = "$src = $Rd"; +} + +multiclass NeonI_3VDL_Aba_v1 opcode, + string asmop, SDPatternOperator opnode, + SDPatternOperator subop> +{ + def _8h8b : NeonI_3VDL_Aba<0b0, u, 0b00, opcode, asmop, "8h", "8b", + opnode, subop, VPR64, v8i16, v8i8, v8i8>; + def _4s4h : NeonI_3VDL_Aba<0b0, u, 0b01, opcode, asmop, "4s", "4h", + opnode, subop, VPR64, v4i32, v4i16, v4i16>; + def _2d2s : NeonI_3VDL_Aba<0b0, u, 0b10, opcode, asmop, "2d", "2s", + opnode, subop, VPR64, v2i64, v2i32, v2i32>; +} + +defm SABALvvv : NeonI_3VDL_Aba_v1<0b0, 0b0101, "sabal", + add, int_arm_neon_vabds>; +defm UABALvvv : NeonI_3VDL_Aba_v1<0b1, 0b0101, "uabal", + add, int_arm_neon_vabdu>; + +multiclass NeonI_3VDL2_Aba_v1 opcode, + string asmop, SDPatternOperator opnode, + string subop> +{ + def _8h8b : NeonI_3VDL_Aba<0b1, u, 0b00, opcode, asmop, "8h", "16b", + opnode, !cast(subop # "_16B"), + VPR128, v8i16, v16i8, v8i8>; + def _4s4h : NeonI_3VDL_Aba<0b1, u, 0b01, opcode, asmop, "4s", "8h", + opnode, !cast(subop # "_8H"), + VPR128, v4i32, v8i16, v4i16>; + def _2d2s : NeonI_3VDL_Aba<0b1, u, 0b10, opcode, asmop, "2d", "4s", + opnode, !cast(subop # "_4S"), + VPR128, v2i64, v4i32, v2i32>; +} + +defm SABAL2vvv : NeonI_3VDL2_Aba_v1<0b0, 0b0101, "sabal2", add, + "NI_sabdl_hi">; +defm UABAL2vvv : NeonI_3VDL2_Aba_v1<0b1, 0b0101, "uabal2", add, + "NI_uabdl_hi">; + +// Long pattern with 2 operands +multiclass NeonI_3VDL_2Op opcode, + string asmop, SDPatternOperator opnode, + bit Commutable = 0> +{ + let isCommutable = Commutable in { + def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b", + opnode, VPR128, VPR64, v8i16, v8i8>; + def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h", + opnode, VPR128, VPR64, v4i32, v4i16>; + def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s", + opnode, VPR128, VPR64, v2i64, v2i32>; + } +} + +defm SMULLvvv : NeonI_3VDL_2Op<0b0, 0b1100, "smull", int_arm_neon_vmulls, 1>; +defm UMULLvvv : NeonI_3VDL_2Op<0b1, 0b1100, "umull", int_arm_neon_vmullu, 1>; + +class NeonI_3VDL2_2Op_mull size, bits<4> opcode, + string asmop, string ResS, string OpS, + SDPatternOperator opnode, + ValueType ResTy, ValueType OpTy> + : NeonI_3VDiff; + + +multiclass NeonI_3VDL2_2Op_mull_v1 opcode, + string asmop, + string opnode, + bit Commutable = 0> +{ + let isCommutable = Commutable in { + def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b", + !cast(opnode # "_16B"), + v8i16, v16i8>; + def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h", + !cast(opnode # "_8H"), + v4i32, v8i16>; + def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s", + !cast(opnode # "_4S"), + v2i64, v4i32>; + } +} + +defm SMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b0, 0b1100, "smull2", + "NI_smull_hi", 1>; +defm UMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b1, 0b1100, "umull2", + "NI_umull_hi", 1>; + +// Long pattern with 3 operands +class NeonI_3VDL_3Op size, bits<4> opcode, + string asmop, string ResS, string OpS, + SDPatternOperator opnode, + ValueType ResTy, ValueType OpTy> + : NeonI_3VDiff { + let Constraints = "$src = $Rd"; +} + +multiclass NeonI_3VDL_3Op_v1 opcode, + string asmop, SDPatternOperator opnode> +{ + def _8h8b : NeonI_3VDL_3Op<0b0, u, 0b00, opcode, asmop, "8h", "8b", + opnode, v8i16, v8i8>; + def _4s4h : NeonI_3VDL_3Op<0b0, u, 0b01, opcode, asmop, "4s", "4h", + opnode, v4i32, v4i16>; + def _2d2s : NeonI_3VDL_3Op<0b0, u, 0b10, opcode, asmop, "2d", "2s", + opnode, v2i64, v2i32>; +} + +def Neon_smlal : PatFrag<(ops node:$Rd, node:$Rm, node:$Rn), + (add node:$Rd, + (int_arm_neon_vmulls node:$Rn, node:$Rm))>; + +def Neon_umlal : PatFrag<(ops node:$Rd, node:$Rm, node:$Rn), + (add node:$Rd, + (int_arm_neon_vmullu node:$Rn, node:$Rm))>; + +def Neon_smlsl : PatFrag<(ops node:$Rd, node:$Rm, node:$Rn), + (sub node:$Rd, + (int_arm_neon_vmulls node:$Rn, node:$Rm))>; + +def Neon_umlsl : PatFrag<(ops node:$Rd, node:$Rm, node:$Rn), + (sub node:$Rd, + (int_arm_neon_vmullu node:$Rn, node:$Rm))>; + +defm SMLALvvv : NeonI_3VDL_3Op_v1<0b0, 0b1000, "smlal", Neon_smlal>; +defm UMLALvvv : NeonI_3VDL_3Op_v1<0b1, 0b1000, "umlal", Neon_umlal>; + +defm SMLSLvvv : NeonI_3VDL_3Op_v1<0b0, 0b1010, "smlsl", Neon_smlsl>; +defm UMLSLvvv : NeonI_3VDL_3Op_v1<0b1, 0b1010, "umlsl", Neon_umlsl>; + +class NeonI_3VDL2_3Op_mlas size, bits<4> opcode, + string asmop, string ResS, string OpS, + SDPatternOperator subop, SDPatternOperator opnode, + RegisterClass OpVPR, + ValueType ResTy, ValueType OpTy> + : NeonI_3VDiff { + let Constraints = "$src = $Rd"; +} + +multiclass NeonI_3VDL2_3Op_mlas_v1 opcode, + string asmop, + SDPatternOperator subop, + string opnode> +{ + def _8h16b : NeonI_3VDL2_3Op_mlas<0b1, u, 0b00, opcode, asmop, "8h", "16b", + subop, !cast(opnode # "_16B"), + VPR128, v8i16, v16i8>; + def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h", + subop, !cast(opnode # "_8H"), + VPR128, v4i32, v8i16>; + def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s", + subop, !cast(opnode # "_4S"), + VPR128, v2i64, v4i32>; +} + +defm SMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1000, "smlal2", + add, "NI_smull_hi">; +defm UMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1000, "umlal2", + add, "NI_umull_hi">; + +defm SMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1010, "smlsl2", + sub, "NI_smull_hi">; +defm UMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1010, "umlsl2", + sub, "NI_umull_hi">; + +multiclass NeonI_3VDL_qdmlal_3Op_v2 opcode, + string asmop, SDPatternOperator opnode> +{ + def _4s4h : NeonI_3VDL2_3Op_mlas<0b0, u, 0b01, opcode, asmop, "4s", "4h", + opnode, int_arm_neon_vqdmull, + VPR64, v4i32, v4i16>; + def _2d2s : NeonI_3VDL2_3Op_mlas<0b0, u, 0b10, opcode, asmop, "2d", "2s", + opnode, int_arm_neon_vqdmull, + VPR64, v2i64, v2i32>; +} + +defm SQDMLALvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1001, "sqdmlal", + int_arm_neon_vqadds>; +defm SQDMLSLvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1011, "sqdmlsl", + int_arm_neon_vqsubs>; + +multiclass NeonI_3VDL_v2 opcode, + string asmop, SDPatternOperator opnode, + bit Commutable = 0> +{ + let isCommutable = Commutable in { + def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h", + opnode, VPR128, VPR64, v4i32, v4i16>; + def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s", + opnode, VPR128, VPR64, v2i64, v2i32>; + } +} + +defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull", + int_arm_neon_vqdmull, 1>; + +multiclass NeonI_3VDL2_2Op_mull_v2 opcode, + string asmop, + string opnode, + bit Commutable = 0> +{ + let isCommutable = Commutable in { + def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h", + !cast(opnode # "_8H"), + v4i32, v8i16>; + def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s", + !cast(opnode # "_4S"), + v2i64, v4i32>; + } +} + +defm SQDMULL2vvv : NeonI_3VDL2_2Op_mull_v2<0b0, 0b1101, "sqdmull2", + "NI_qdmull_hi", 1>; + +multiclass NeonI_3VDL2_3Op_qdmlal_v2 opcode, + string asmop, + SDPatternOperator opnode> +{ + def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h", + opnode, NI_qdmull_hi_8H, + VPR128, v4i32, v8i16>; + def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s", + opnode, NI_qdmull_hi_4S, + VPR128, v2i64, v4i32>; +} + +defm SQDMLAL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1001, "sqdmlal2", + int_arm_neon_vqadds>; +defm SQDMLSL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1011, "sqdmlsl2", + int_arm_neon_vqsubs>; + +multiclass NeonI_3VDL_v3 opcode, + string asmop, SDPatternOperator opnode, + bit Commutable = 0> +{ + let isCommutable = Commutable in { + def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b", + opnode, VPR128, VPR64, v8i16, v8i8>; + } +} + +defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp, 1>; + +multiclass NeonI_3VDL2_2Op_mull_v3 opcode, + string asmop, + string opnode, + bit Commutable = 0> +{ + let isCommutable = Commutable in { + def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b", + !cast(opnode # "_16B"), + v8i16, v16i8>; + } +} + +defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2", + "NI_pmull_hi", 1>; + +// End of implementation for instruction class (3V Diff) + // Scalar Arithmetic class NeonI_Scalar3Same_D_size opcode, string asmop> -- cgit v1.1 From c3cee57f7d20f69a84fd88464ed8cf050e63c7ad Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Mon, 9 Sep 2013 02:37:14 +0000 Subject: Generate compact unwind encoding from CFI directives. We used to generate the compact unwind encoding from the machine instructions. However, this had the problem that if the user used `-save-temps' or compiled their hand-written `.s' file (with CFI directives), we wouldn't generate the compact unwind encoding. Move the algorithm that generates the compact unwind encoding into the MCAsmBackend. This way we can generate the encoding whether the code is from a `.ll' or `.s' file. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190290 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../AArch64/MCTargetDesc/AArch64AsmBackend.cpp | 4 +- .../AArch64/MCTargetDesc/AArch64MCTargetDesc.h | 5 +- lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp | 4 +- lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h | 3 +- lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp | 16 +- lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h | 16 +- lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp | 7 +- lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h | 3 +- lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp | 4 +- lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h | 4 +- .../SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp | 5 +- .../SystemZ/MCTargetDesc/SystemZMCTargetDesc.h | 5 +- lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 365 ++++++++++++++++++++- lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h | 6 +- lib/Target/X86/X86FrameLowering.cpp | 273 --------------- lib/Target/X86/X86FrameLowering.h | 27 -- 16 files changed, 405 insertions(+), 342 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index a3373b1..8a9077c 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -578,8 +578,8 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) { } MCAsmBackend * -llvm::createAArch64AsmBackend(const Target &T, StringRef TT, StringRef CPU) { +llvm::createAArch64AsmBackend(const Target &T, const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU) { Triple TheTriple(TT); - return new ELFAArch64AsmBackend(T, TT, TheTriple.getOS()); } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h index 3849fe3..670e657 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h @@ -43,8 +43,9 @@ MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII, MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS, uint8_t OSABI); -MCAsmBackend *createAArch64AsmBackend(const Target &T, StringRef TT, - StringRef CPU); +MCAsmBackend *createAArch64AsmBackend(const Target &T, + const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU); } // End llvm namespace diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 828442f..5615b80 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -660,7 +660,9 @@ public: } // end anonymous namespace -MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT, StringRef CPU) { +MCAsmBackend *llvm::createARMAsmBackend(const Target &T, + const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU) { Triple TheTriple(TT); if (TheTriple.isOSDarwin()) { diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index 4e94c53..a571907 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -47,7 +47,8 @@ MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII, const MCSubtargetInfo &STI, MCContext &Ctx); -MCAsmBackend *createARMAsmBackend(const Target &T, StringRef TT, StringRef CPU); +MCAsmBackend *createARMAsmBackend(const Target &T, const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU); /// createARMELFObjectWriter - Construct an ELF Mach-O object writer. MCObjectWriter *createARMELFObjectWriter(raw_ostream &OS, diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 0b13607..b2c8dd7 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -253,25 +253,33 @@ public: } // namespace // MCAsmBackend -MCAsmBackend *llvm::createMipsAsmBackendEL32(const Target &T, StringRef TT, +MCAsmBackend *llvm::createMipsAsmBackendEL32(const Target &T, + const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU) { return new MipsAsmBackend(T, Triple(TT).getOS(), /*IsLittle*/true, /*Is64Bit*/false); } -MCAsmBackend *llvm::createMipsAsmBackendEB32(const Target &T, StringRef TT, +MCAsmBackend *llvm::createMipsAsmBackendEB32(const Target &T, + const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU) { return new MipsAsmBackend(T, Triple(TT).getOS(), /*IsLittle*/false, /*Is64Bit*/false); } -MCAsmBackend *llvm::createMipsAsmBackendEL64(const Target &T, StringRef TT, +MCAsmBackend *llvm::createMipsAsmBackendEL64(const Target &T, + const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU) { return new MipsAsmBackend(T, Triple(TT).getOS(), /*IsLittle*/true, /*Is64Bit*/true); } -MCAsmBackend *llvm::createMipsAsmBackendEB64(const Target &T, StringRef TT, +MCAsmBackend *llvm::createMipsAsmBackendEB64(const Target &T, + const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU) { return new MipsAsmBackend(T, Triple(TT).getOS(), /*IsLittle*/false, /*Is64Bit*/true); diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h index 71954a4..eabebfe 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h @@ -42,14 +42,14 @@ MCCodeEmitter *createMipsMCCodeEmitterEL(const MCInstrInfo &MCII, const MCSubtargetInfo &STI, MCContext &Ctx); -MCAsmBackend *createMipsAsmBackendEB32(const Target &T, StringRef TT, - StringRef CPU); -MCAsmBackend *createMipsAsmBackendEL32(const Target &T, StringRef TT, - StringRef CPU); -MCAsmBackend *createMipsAsmBackendEB64(const Target &T, StringRef TT, - StringRef CPU); -MCAsmBackend *createMipsAsmBackendEL64(const Target &T, StringRef TT, - StringRef CPU); +MCAsmBackend *createMipsAsmBackendEB32(const Target &T, const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU); +MCAsmBackend *createMipsAsmBackendEL32(const Target &T, const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU); +MCAsmBackend *createMipsAsmBackendEB64(const Target &T, const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU); +MCAsmBackend *createMipsAsmBackendEL64(const Target &T, const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU); MCObjectWriter *createMipsELFObjectWriter(raw_ostream &OS, uint8_t OSABI, diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index ffeac43..91f1160 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -192,10 +192,9 @@ namespace { } // end anonymous namespace - - - -MCAsmBackend *llvm::createPPCAsmBackend(const Target &T, StringRef TT, StringRef CPU) { +MCAsmBackend *llvm::createPPCAsmBackend(const Target &T, + const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU) { if (Triple(TT).isOSDarwin()) return new DarwinPPCAsmBackend(T); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index 16c3cb4..0b0ca24 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -40,7 +40,8 @@ MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII, const MCSubtargetInfo &STI, MCContext &Ctx); -MCAsmBackend *createPPCAsmBackend(const Target &T, StringRef TT, StringRef CPU); +MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU); /// createPPCELFObjectWriter - Construct an PPC ELF object writer. MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS, diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp index 9a36903..29d0acf 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -95,7 +95,9 @@ public: } // end anonymous namespace -MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, StringRef TT, +MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, + const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU) { return new ELFAMDGPUAsmBackend(T); } diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h index abb0320..f6b3376 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h @@ -40,8 +40,8 @@ MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII, const MCSubtargetInfo &STI, MCContext &Ctx); -MCAsmBackend *createAMDGPUAsmBackend(const Target &T, StringRef TT, - StringRef CPU); +MCAsmBackend *createAMDGPUAsmBackend(const Target &T, const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU); MCObjectWriter *createAMDGPUELFObjectWriter(raw_ostream &OS); } // End llvm namespace diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp index 027db44..11b520d 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp @@ -143,8 +143,9 @@ bool SystemZMCAsmBackend::writeNopData(uint64_t Count, return true; } -MCAsmBackend *llvm::createSystemZMCAsmBackend(const Target &T, StringRef TT, - StringRef CPU) { +MCAsmBackend *llvm::createSystemZMCAsmBackend(const Target &T, + const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU) { uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS()); return new SystemZMCAsmBackend(OSABI); } diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h index 3c9f0cb..01ef093 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h @@ -54,8 +54,9 @@ MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII, const MCSubtargetInfo &STI, MCContext &Ctx); -MCAsmBackend *createSystemZMCAsmBackend(const Target &T, StringRef TT, - StringRef CPU); +MCAsmBackend *createSystemZMCAsmBackend(const Target &T, + const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU); MCObjectWriter *createSystemZObjectWriter(raw_ostream &OS, uint8_t OSABI); } // end namespace llvm diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index fc3bae3..e4e8776 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -27,6 +27,32 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +namespace CU { + + /// Compact unwind encoding values. + enum CompactUnwindEncodings { + /// [RE]BP based frame where [RE]BP is pused on the stack immediately after + /// the return address, then [RE]SP is moved to [RE]BP. + UNWIND_MODE_BP_FRAME = 0x01000000, + + /// A frameless function with a small constant stack size. + UNWIND_MODE_STACK_IMMD = 0x02000000, + + /// A frameless function with a large constant stack size. + UNWIND_MODE_STACK_IND = 0x03000000, + + /// No compact unwind encoding is available. + UNWIND_MODE_DWARF = 0x04000000, + + /// Mask for encoding the frame registers. + UNWIND_BP_FRAME_REGISTERS = 0x00007FFF, + + /// Mask for encoding the frameless registers. + UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF + }; + +} // end CU namespace + // Option to allow disabling arithmetic relaxation to workaround PR9807, which // is useful when running bitwise comparison experiments on Darwin. We should be // able to remove this once PR9807 is resolved. @@ -383,27 +409,330 @@ public: }; class DarwinX86AsmBackend : public X86AsmBackend { + const MCRegisterInfo &MRI; + + /// \brief Number of registers that can be saved in a compact unwind encoding. + enum { CU_NUM_SAVED_REGS = 6 }; + + mutable unsigned SavedRegs[CU_NUM_SAVED_REGS]; + bool Is64Bit; + + unsigned OffsetSize; ///< Offset of a "push" instruction. + unsigned PushInstrSize; ///< Size of a "push" instruction. + unsigned MoveInstrSize; ///< Size of a "move" instruction. + unsigned StackDivide; ///< Amount to adjust stack stize by. +protected: + /// \brief Implementation of algorithm to generate the compact unwind encoding + /// for the CFI instructions. + uint32_t + generateCompactUnwindEncodingImpl(ArrayRef Instrs) const { + if (Instrs.empty()) return 0; + + // Reset the saved registers. + unsigned SavedRegIdx = 0; + memset(SavedRegs, 0, sizeof(SavedRegs)); + + bool HasFP = false; + + // Encode that we are using EBP/RBP as the frame pointer. + uint32_t CompactUnwindEncoding = 0; + + unsigned SubtractInstrIdx = Is64Bit ? 3 : 2; + unsigned InstrOffset = 0; + unsigned StackAdjust = 0; + unsigned StackSize = 0; + unsigned PrevStackSize = 0; + unsigned NumDefCFAOffsets = 0; + + for (unsigned i = 0, e = Instrs.size(); i != e; ++i) { + const MCCFIInstruction &Inst = Instrs[i]; + + switch (Inst.getOperation()) { + default: + llvm_unreachable("cannot handle CFI directive for compact unwind!"); + case MCCFIInstruction::OpDefCfaRegister: { + // Defines a frame pointer. E.g. + // + // movq %rsp, %rbp + // L0: + // .cfi_def_cfa_register %rbp + // + HasFP = true; + assert(MRI.getLLVMRegNum(Inst.getRegister(), true) == + (Is64Bit ? X86::RBP : X86::EBP) && "Invalid frame pointer!"); + + // Reset the counts. + memset(SavedRegs, 0, sizeof(SavedRegs)); + StackAdjust = 0; + SavedRegIdx = 0; + InstrOffset += MoveInstrSize; + break; + } + case MCCFIInstruction::OpDefCfaOffset: { + // Defines a new offset for the CFA. E.g. + // + // With frame: + // + // pushq %rbp + // L0: + // .cfi_def_cfa_offset 16 + // + // Without frame: + // + // subq $72, %rsp + // L0: + // .cfi_def_cfa_offset 80 + // + PrevStackSize = StackSize; + StackSize = std::abs(Inst.getOffset()) / StackDivide; + ++NumDefCFAOffsets; + break; + } + case MCCFIInstruction::OpOffset: { + // Defines a "push" of a callee-saved register. E.g. + // + // pushq %r15 + // pushq %r14 + // pushq %rbx + // L0: + // subq $120, %rsp + // L1: + // .cfi_offset %rbx, -40 + // .cfi_offset %r14, -32 + // .cfi_offset %r15, -24 + // + if (SavedRegIdx == CU_NUM_SAVED_REGS) + // If there are too many saved registers, we cannot use a compact + // unwind encoding. + return CU::UNWIND_MODE_DWARF; + + unsigned Reg = MRI.getLLVMRegNum(Inst.getRegister(), true); + SavedRegs[SavedRegIdx++] = Reg; + StackAdjust += OffsetSize; + InstrOffset += PushInstrSize; + break; + } + } + } + + StackAdjust /= StackDivide; + + if (HasFP) { + if ((StackAdjust & 0xFF) != StackAdjust) + // Offset was too big for a compact unwind encoding. + return CU::UNWIND_MODE_DWARF; + + // Get the encoding of the saved registers when we have a frame pointer. + uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(); + if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; + + CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME; + CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16; + CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS; + } else { + // If the amount of the stack allocation is the size of a register, then + // we "push" the RAX/EAX register onto the stack instead of adjusting the + // stack pointer with a SUB instruction. We don't support the push of the + // RAX/EAX register with compact unwind. So we check for that situation + // here. + if ((NumDefCFAOffsets == SavedRegIdx + 1 && + StackSize - PrevStackSize == 1) || + (Instrs.size() == 1 && NumDefCFAOffsets == 1 && StackSize == 2)) + return CU::UNWIND_MODE_DWARF; + + SubtractInstrIdx += InstrOffset; + ++StackAdjust; + + if ((StackSize & 0xFF) == StackSize) { + // Frameless stack with a small stack size. + CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD; + + // Encode the stack size. + CompactUnwindEncoding |= (StackSize & 0xFF) << 16; + } else { + if ((StackAdjust & 0x7) != StackAdjust) + // The extra stack adjustments are too big for us to handle. + return CU::UNWIND_MODE_DWARF; + + // Frameless stack with an offset too large for us to encode compactly. + CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND; + + // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP' + // instruction. + CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16; + + // Encode any extra stack stack adjustments (done via push + // instructions). + CompactUnwindEncoding |= (StackAdjust & 0x7) << 13; + } + + // Encode the number of registers saved. (Reverse the list first.) + std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]); + CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10; + + // Get the encoding of the saved registers when we don't have a frame + // pointer. + uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx); + if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; + + // Encode the register encoding. + CompactUnwindEncoding |= + RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION; + } + + return CompactUnwindEncoding; + } + +private: + /// \brief Get the compact unwind number for a given register. The number + /// corresponds to the enum lists in compact_unwind_encoding.h. + int getCompactUnwindRegNum(unsigned Reg) const { + static const uint16_t CU32BitRegs[7] = { + X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 + }; + static const uint16_t CU64BitRegs[] = { + X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 + }; + const uint16_t *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs; + for (int Idx = 1; *CURegs; ++CURegs, ++Idx) + if (*CURegs == Reg) + return Idx; + + return -1; + } + + /// \brief Return the registers encoded for a compact encoding with a frame + /// pointer. + uint32_t encodeCompactUnwindRegistersWithFrame() const { + // Encode the registers in the order they were saved --- 3-bits per + // register. The list of saved registers is assumed to be in reverse + // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS. + uint32_t RegEnc = 0; + for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) { + unsigned Reg = SavedRegs[i]; + if (Reg == 0) break; + + int CURegNum = getCompactUnwindRegNum(Reg); + if (CURegNum == -1) return ~0U; + + // Encode the 3-bit register number in order, skipping over 3-bits for + // each register. + RegEnc |= (CURegNum & 0x7) << (Idx++ * 3); + } + + assert((RegEnc & 0x3FFFF) == RegEnc && + "Invalid compact register encoding!"); + return RegEnc; + } + + /// \brief Create the permutation encoding used with frameless stacks. It is + /// passed the number of registers to be saved and an array of the registers + /// saved. + uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const { + // The saved registers are numbered from 1 to 6. In order to encode the + // order in which they were saved, we re-number them according to their + // place in the register order. The re-numbering is relative to the last + // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in + // that order: + // + // Orig Re-Num + // ---- ------ + // 6 6 + // 2 2 + // 4 3 + // 5 3 + // + for (unsigned i = 0; i != CU_NUM_SAVED_REGS; ++i) { + int CUReg = getCompactUnwindRegNum(SavedRegs[i]); + if (CUReg == -1) return ~0U; + SavedRegs[i] = CUReg; + } + + // Reverse the list. + std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]); + + uint32_t RenumRegs[CU_NUM_SAVED_REGS]; + for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){ + unsigned Countless = 0; + for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j) + if (SavedRegs[j] < SavedRegs[i]) + ++Countless; + + RenumRegs[i] = SavedRegs[i] - Countless - 1; + } + + // Take the renumbered values and encode them into a 10-bit number. + uint32_t permutationEncoding = 0; + switch (RegCount) { + case 6: + permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] + + 6 * RenumRegs[2] + 2 * RenumRegs[3] + + RenumRegs[4]; + break; + case 5: + permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2] + + 6 * RenumRegs[3] + 2 * RenumRegs[4] + + RenumRegs[5]; + break; + case 4: + permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3] + + 3 * RenumRegs[4] + RenumRegs[5]; + break; + case 3: + permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4] + + RenumRegs[5]; + break; + case 2: + permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5]; + break; + case 1: + permutationEncoding |= RenumRegs[5]; + break; + } + + assert((permutationEncoding & 0x3FF) == permutationEncoding && + "Invalid compact register encoding!"); + return permutationEncoding; + } + public: - DarwinX86AsmBackend(const Target &T, StringRef CPU) - : X86AsmBackend(T, CPU) { } + DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef CPU, + bool Is64Bit) + : X86AsmBackend(T, CPU), MRI(MRI), Is64Bit(Is64Bit) { + memset(SavedRegs, 0, sizeof(SavedRegs)); + OffsetSize = Is64Bit ? 8 : 4; + MoveInstrSize = Is64Bit ? 3 : 2; + StackDivide = Is64Bit ? 8 : 4; + PushInstrSize = 1; + } }; class DarwinX86_32AsmBackend : public DarwinX86AsmBackend { + bool SupportsCU; public: - DarwinX86_32AsmBackend(const Target &T, StringRef CPU) - : DarwinX86AsmBackend(T, CPU) {} + DarwinX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI, + StringRef CPU, bool SupportsCU) + : DarwinX86AsmBackend(T, MRI, CPU, false), SupportsCU(SupportsCU) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const { return createX86MachObjectWriter(OS, /*Is64Bit=*/false, MachO::CPU_TYPE_I386, MachO::CPU_SUBTYPE_I386_ALL); } + + /// \brief Generate the compact unwind encoding for the CFI instructions. + virtual unsigned + generateCompactUnwindEncoding(ArrayRef Instrs) const { + return SupportsCU ? generateCompactUnwindEncodingImpl(Instrs) : 0; + } }; class DarwinX86_64AsmBackend : public DarwinX86AsmBackend { + bool SupportsCU; public: - DarwinX86_64AsmBackend(const Target &T, StringRef CPU) - : DarwinX86AsmBackend(T, CPU) { + DarwinX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI, + StringRef CPU, bool SupportsCU) + : DarwinX86AsmBackend(T, MRI, CPU, true), SupportsCU(SupportsCU) { HasReliableSymbolDifference = true; } @@ -445,15 +774,26 @@ public: return false; } } + + /// \brief Generate the compact unwind encoding for the CFI instructions. + virtual unsigned + generateCompactUnwindEncoding(ArrayRef Instrs) const { + return SupportsCU ? generateCompactUnwindEncodingImpl(Instrs) : 0; + } }; } // end anonymous namespace -MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, StringRef TT, StringRef CPU) { +MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, + const MCRegisterInfo &MRI, + StringRef TT, + StringRef CPU) { Triple TheTriple(TT); if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) - return new DarwinX86_32AsmBackend(T, CPU); + return new DarwinX86_32AsmBackend(T, MRI, CPU, + TheTriple.isMacOSX() && + !TheTriple.isMacOSXVersionLT(10, 7)); if (TheTriple.isOSWindows() && TheTriple.getEnvironment() != Triple::ELF) return new WindowsX86AsmBackend(T, false, CPU); @@ -462,11 +802,16 @@ MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, StringRef TT, String return new ELFX86_32AsmBackend(T, OSABI, CPU); } -MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, StringRef TT, StringRef CPU) { +MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, + const MCRegisterInfo &MRI, + StringRef TT, + StringRef CPU) { Triple TheTriple(TT); if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) - return new DarwinX86_64AsmBackend(T, CPU); + return new DarwinX86_64AsmBackend(T, MRI, CPU, + TheTriple.isMacOSX() && + !TheTriple.isMacOSXVersionLT(10, 7)); if (TheTriple.isOSWindows() && TheTriple.getEnvironment() != Triple::ELF) return new WindowsX86AsmBackend(T, true, CPU); diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h index 2f459b4..41ae435 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h @@ -79,8 +79,10 @@ MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII, const MCSubtargetInfo &STI, MCContext &Ctx); -MCAsmBackend *createX86_32AsmBackend(const Target &T, StringRef TT, StringRef CPU); -MCAsmBackend *createX86_64AsmBackend(const Target &T, StringRef TT, StringRef CPU); +MCAsmBackend *createX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU); +MCAsmBackend *createX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU); /// createX86MachObjectWriter - Construct an X86 Mach-O object writer. MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS, diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index f44a893..adcd4f7 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -365,274 +365,6 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF, } } -/// getCompactUnwindRegNum - Get the compact unwind number for a given -/// register. The number corresponds to the enum lists in -/// compact_unwind_encoding.h. -static int getCompactUnwindRegNum(unsigned Reg, bool is64Bit) { - static const uint16_t CU32BitRegs[] = { - X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 - }; - static const uint16_t CU64BitRegs[] = { - X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 - }; - const uint16_t *CURegs = is64Bit ? CU64BitRegs : CU32BitRegs; - for (int Idx = 1; *CURegs; ++CURegs, ++Idx) - if (*CURegs == Reg) - return Idx; - - return -1; -} - -// Number of registers that can be saved in a compact unwind encoding. -#define CU_NUM_SAVED_REGS 6 - -/// encodeCompactUnwindRegistersWithoutFrame - Create the permutation encoding -/// used with frameless stacks. It is passed the number of registers to be saved -/// and an array of the registers saved. -static uint32_t -encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS], - unsigned RegCount, bool Is64Bit) { - // The saved registers are numbered from 1 to 6. In order to encode the order - // in which they were saved, we re-number them according to their place in the - // register order. The re-numbering is relative to the last re-numbered - // register. E.g., if we have registers {6, 2, 4, 5} saved in that order: - // - // Orig Re-Num - // ---- ------ - // 6 6 - // 2 2 - // 4 3 - // 5 3 - // - for (unsigned i = 0; i != CU_NUM_SAVED_REGS; ++i) { - int CUReg = getCompactUnwindRegNum(SavedRegs[i], Is64Bit); - if (CUReg == -1) return ~0U; - SavedRegs[i] = CUReg; - } - - // Reverse the list. - std::swap(SavedRegs[0], SavedRegs[5]); - std::swap(SavedRegs[1], SavedRegs[4]); - std::swap(SavedRegs[2], SavedRegs[3]); - - uint32_t RenumRegs[CU_NUM_SAVED_REGS]; - for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i) { - unsigned Countless = 0; - for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j) - if (SavedRegs[j] < SavedRegs[i]) - ++Countless; - - RenumRegs[i] = SavedRegs[i] - Countless - 1; - } - - // Take the renumbered values and encode them into a 10-bit number. - uint32_t permutationEncoding = 0; - switch (RegCount) { - case 6: - permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] - + 6 * RenumRegs[2] + 2 * RenumRegs[3] - + RenumRegs[4]; - break; - case 5: - permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2] - + 6 * RenumRegs[3] + 2 * RenumRegs[4] - + RenumRegs[5]; - break; - case 4: - permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3] - + 3 * RenumRegs[4] + RenumRegs[5]; - break; - case 3: - permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4] - + RenumRegs[5]; - break; - case 2: - permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5]; - break; - case 1: - permutationEncoding |= RenumRegs[5]; - break; - } - - assert((permutationEncoding & 0x3FF) == permutationEncoding && - "Invalid compact register encoding!"); - return permutationEncoding; -} - -/// encodeCompactUnwindRegistersWithFrame - Return the registers encoded for a -/// compact encoding with a frame pointer. -static uint32_t -encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS], - bool Is64Bit) { - // Encode the registers in the order they were saved, 3-bits per register. The - // registers are numbered from 1 to CU_NUM_SAVED_REGS. - uint32_t RegEnc = 0; - for (int I = CU_NUM_SAVED_REGS - 1, Idx = 0; I != -1; --I) { - unsigned Reg = SavedRegs[I]; - if (Reg == 0) continue; - - int CURegNum = getCompactUnwindRegNum(Reg, Is64Bit); - if (CURegNum == -1) return ~0U; - - // Encode the 3-bit register number in order, skipping over 3-bits for each - // register. - RegEnc |= (CURegNum & 0x7) << (Idx++ * 3); - } - - assert((RegEnc & 0x3FFFF) == RegEnc && "Invalid compact register encoding!"); - return RegEnc; -} - -static uint32_t -doCompactUnwindEncoding(unsigned SavedRegs[CU_NUM_SAVED_REGS], - unsigned StackSize, unsigned StackAdjust, - unsigned SubtractInstrIdx, unsigned SavedRegIdx, - bool Is64Bit, bool HasFP) { - // Encode that we are using EBP/RBP as the frame pointer. - unsigned StackDivide = (Is64Bit ? 8 : 4); - uint32_t CompactUnwindEncoding = 0; - - StackAdjust /= StackDivide; - - if (HasFP) { - if ((StackAdjust & 0xFF) != StackAdjust) - // Offset was too big for compact encoding. - return CU::UNWIND_MODE_DWARF; - - // Get the encoding of the saved registers when we have a frame pointer. - uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(SavedRegs, Is64Bit); - if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; - - CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME; - CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16; - CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS; - } else { - ++StackAdjust; - uint32_t TotalStackSize = StackAdjust + StackSize; - if ((TotalStackSize & 0xFF) == TotalStackSize) { - // Frameless stack with a small stack size. - CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD; - - // Encode the stack size. - CompactUnwindEncoding |= (TotalStackSize & 0xFF) << 16; - } else { - if ((StackAdjust & 0x7) != StackAdjust) - // The extra stack adjustments are too big for us to handle. - return CU::UNWIND_MODE_DWARF; - - // Frameless stack with an offset too large for us to encode compactly. - CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND; - - // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP' - // instruction. - CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16; - - // Encode any extra stack stack adjustments (done via push instructions). - CompactUnwindEncoding |= (StackAdjust & 0x7) << 13; - } - - // Encode the number of registers saved. - CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10; - - // Get the encoding of the saved registers when we don't have a frame - // pointer. - uint32_t RegEnc = - encodeCompactUnwindRegistersWithoutFrame(SavedRegs, SavedRegIdx, - Is64Bit); - if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; - - // Encode the register encoding. - CompactUnwindEncoding |= - RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION; - } - - return CompactUnwindEncoding; -} - -uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { - const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); - unsigned FramePtr = RegInfo->getFrameRegister(MF); - unsigned StackPtr = RegInfo->getStackRegister(); - - bool Is64Bit = STI.is64Bit(); - - unsigned SavedRegs[CU_NUM_SAVED_REGS] = { 0, 0, 0, 0, 0, 0 }; - unsigned SavedRegIdx = 0; - - unsigned OffsetSize = (Is64Bit ? 8 : 4); - - unsigned PushInstr = (Is64Bit ? X86::PUSH64r : X86::PUSH32r); - unsigned PushInstrSize = 1; - unsigned MoveInstr = (Is64Bit ? X86::MOV64rr : X86::MOV32rr); - unsigned MoveInstrSize = (Is64Bit ? 3 : 2); - unsigned SubtractInstrIdx = (Is64Bit ? 3 : 2); - - unsigned StackDivide = (Is64Bit ? 8 : 4); - - unsigned InstrOffset = 0; - unsigned StackAdjust = 0; - unsigned StackSize = 0; - - bool ExpectEnd = false; - for (MachineBasicBlock::iterator MBBI = MF.front().begin(), - MBBE = MF.front().end(); MBBI != MBBE; ++MBBI) { - MachineInstr &MI = *MBBI; - unsigned Opc = MI.getOpcode(); - if (Opc == X86::PROLOG_LABEL) continue; - if (!MI.getFlag(MachineInstr::FrameSetup)) break; - - // We don't exect any more prolog instructions. - if (ExpectEnd) return CU::UNWIND_MODE_DWARF; - - if (Opc == PushInstr) { - // If there are too many saved registers, we cannot use compact encoding. - if (SavedRegIdx >= CU_NUM_SAVED_REGS) return CU::UNWIND_MODE_DWARF; - - unsigned Reg = MI.getOperand(0).getReg(); - if (Reg == (Is64Bit ? X86::RAX : X86::EAX)) { - ExpectEnd = true; - continue; - } - - SavedRegs[SavedRegIdx++] = MI.getOperand(0).getReg(); - StackAdjust += OffsetSize; - InstrOffset += PushInstrSize; - } else if (Opc == MoveInstr) { - unsigned SrcReg = MI.getOperand(1).getReg(); - unsigned DstReg = MI.getOperand(0).getReg(); - - if (DstReg != FramePtr || SrcReg != StackPtr) - return CU::UNWIND_MODE_DWARF; - - StackAdjust = 0; - memset(SavedRegs, 0, sizeof(SavedRegs)); - SavedRegIdx = 0; - InstrOffset += MoveInstrSize; - } else if (Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || - Opc == X86::SUB32ri || Opc == X86::SUB32ri8) { - if (StackSize) - // We already have a stack size. - return CU::UNWIND_MODE_DWARF; - - if (!MI.getOperand(0).isReg() || - MI.getOperand(0).getReg() != MI.getOperand(1).getReg() || - MI.getOperand(0).getReg() != StackPtr || !MI.getOperand(2).isImm()) - // We need this to be a stack adjustment pointer. Something like: - // - // %RSP = SUB64ri8 %RSP, 48 - return CU::UNWIND_MODE_DWARF; - - StackSize = MI.getOperand(2).getImm() / StackDivide; - SubtractInstrIdx += InstrOffset; - ExpectEnd = true; - } - } - - return doCompactUnwindEncoding(SavedRegs, StackSize, StackAdjust, - SubtractInstrIdx, SavedRegIdx, - Is64Bit, hasFP(MF)); -} - /// usesTheStack - This function checks if any of the users of EFLAGS /// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has /// to use the stack, and if we don't adjust the stack we clobber the first @@ -975,11 +707,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { if (PushedRegs) emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr); } - - // Darwin 10.7 and greater has support for compact unwind encoding. - if (STI.getTargetTriple().isMacOSX() && - !STI.getTargetTriple().isMacOSXVersionLT(10, 7)) - MMI.setCompactUnwindEncoding(getCompactUnwindEncoding(MF)); } void X86FrameLowering::emitEpilogue(MachineFunction &MF, diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h index 6e309d8..3d3b011 100644 --- a/lib/Target/X86/X86FrameLowering.h +++ b/lib/Target/X86/X86FrameLowering.h @@ -20,32 +20,6 @@ namespace llvm { -namespace CU { - - /// Compact unwind encoding values. - enum CompactUnwindEncodings { - /// [RE]BP based frame where [RE]BP is pused on the stack immediately after - /// the return address, then [RE]SP is moved to [RE]BP. - UNWIND_MODE_BP_FRAME = 0x01000000, - - /// A frameless function with a small constant stack size. - UNWIND_MODE_STACK_IMMD = 0x02000000, - - /// A frameless function with a large constant stack size. - UNWIND_MODE_STACK_IND = 0x03000000, - - /// No compact unwind encoding is available. - UNWIND_MODE_DWARF = 0x04000000, - - /// Mask for encoding the frame registers. - UNWIND_BP_FRAME_REGISTERS = 0x00007FFF, - - /// Mask for encoding the frameless registers. - UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF - }; - -} // end CU namespace - class MCSymbol; class X86TargetMachine; @@ -91,7 +65,6 @@ public: int getFrameIndexOffset(const MachineFunction &MF, int FI) const; int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const; - uint32_t getCompactUnwindEncoding(MachineFunction &MF) const; void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, -- cgit v1.1 From 5c28673c3b745176302267e523ba274ae6465034 Mon Sep 17 00:00:00 2001 From: Robert Lytton Date: Mon, 9 Sep 2013 10:41:57 +0000 Subject: XCore target: fix weak linkage attribute handling git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190298 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp index 6f44551..235028c 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp @@ -23,7 +23,6 @@ XCoreMCAsmInfo::XCoreMCAsmInfo(StringRef TT) { PrivateGlobalPrefix = ".L"; AscizDirective = ".asciiz"; - WeakDefDirective = "\t.weak\t"; WeakRefDirective = "\t.weak\t"; // Debug -- cgit v1.1 From 7c739380ee6e03cae74cc7280d974882778020b9 Mon Sep 17 00:00:00 2001 From: Robert Lytton Date: Mon, 9 Sep 2013 10:42:05 +0000 Subject: XCore target: change to Sched::Source This sidesteps a bug in PrescheduleNodesWithMultipleUses() which does not check if callResources will be affected by the transformation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190299 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCoreISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 6fc7eef5..9917ca3 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -79,7 +79,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setStackPointerRegisterToSaveRestore(XCore::SP); - setSchedulingPreference(Sched::RegPressure); + setSchedulingPreference(Sched::Source); // Use i32 for setcc operations results (slt, sgt, ...). setBooleanContents(ZeroOrOneBooleanContent); -- cgit v1.1 From 71537c178303c9d3f44aa0b9e0de167a7d670c77 Mon Sep 17 00:00:00 2001 From: Robert Lytton Date: Mon, 9 Sep 2013 10:42:11 +0000 Subject: XCore handling of thread local lowering Fix XCoreLowerThreadLocal trying to initialise globals which have no initializer. Add handling of const expressions containing thread local variables. These need to be replaced with instructions, as the thread ID is used to access the thread local variable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190300 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCoreLowerThreadLocal.cpp | 103 ++++++++++++++++++++++++++--- 1 file changed, 94 insertions(+), 9 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/lib/Target/XCore/XCoreLowerThreadLocal.cpp index 2e328b4..5a83282 100644 --- a/lib/Target/XCore/XCoreLowerThreadLocal.cpp +++ b/lib/Target/XCore/XCoreLowerThreadLocal.cpp @@ -22,6 +22,8 @@ #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/NoFolder.h" +#include "llvm/Support/ValueHandle.h" #define DEBUG_TYPE "xcore-lower-thread-local" @@ -71,13 +73,94 @@ createLoweredInitializer(ArrayType *NewType, Constant *OriginalInitializer) { return ConstantArray::get(NewType, Elements); } -static bool hasNonInstructionUse(GlobalVariable *GV) { - for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E; - ++UI) - if (!isa(*UI)) - return true; +static Instruction * +createReplacementInstr(ConstantExpr *CE, Instruction *Instr) { + IRBuilder Builder(Instr); + unsigned OpCode = CE->getOpcode(); + switch (OpCode) { + case Instruction::GetElementPtr: { + SmallVector CEOpVec(CE->op_begin(), CE->op_end()); + ArrayRef CEOps(CEOpVec); + return dyn_cast(Builder.CreateInBoundsGEP(CEOps[0], + CEOps.slice(1))); + } + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + return dyn_cast( + Builder.CreateBinOp((Instruction::BinaryOps)OpCode, + CE->getOperand(0), CE->getOperand(1), + CE->getName())); + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::BitCast: + return dyn_cast( + Builder.CreateCast((Instruction::CastOps)OpCode, + CE->getOperand(0), CE->getType(), + CE->getName())); + default: + assert(0 && "Unhandled constant expression!\n"); + } +} + +static bool replaceConstantExprOp(ConstantExpr *CE) { + do { + SmallVector WUsers; + for (Value::use_iterator I = CE->use_begin(), E = CE->use_end(); + I != E; ++I) + WUsers.push_back(WeakVH(*I)); + while (!WUsers.empty()) + if (WeakVH WU = WUsers.pop_back_val()) { + if (Instruction *Instr = dyn_cast(WU)) { + Instruction *NewInst = createReplacementInstr(CE, Instr); + assert(NewInst && "Must build an instruction\n"); + // If NewInst uses a CE being handled in an earlier recursion the + // earlier recursion's do-while-hasNUsesOrMore(1) will run again. + Instr->replaceUsesOfWith(CE, NewInst); + } else { + ConstantExpr *CExpr = dyn_cast(WU); + if (!CExpr || !replaceConstantExprOp(CExpr)) + return false; + } + } + } while (CE->hasNUsesOrMore(1)); // Does a recursion's NewInst use CE? + CE->destroyConstant(); + return true; +} - return false; +static bool rewriteNonInstructionUses(GlobalVariable *GV) { + SmallVector WUsers; + for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I) + if (!isa(*I)) + WUsers.push_back(WeakVH(*I)); + while (!WUsers.empty()) + if (WeakVH WU = WUsers.pop_back_val()) { + ConstantExpr *CE = dyn_cast(WU); + if (!CE || !replaceConstantExprOp(CE)) + return false; + } + return true; } static bool isZeroLengthArray(Type *Ty) { @@ -92,14 +175,16 @@ bool XCoreLowerThreadLocal::lowerGlobal(GlobalVariable *GV) { return false; // Skip globals that we can't lower and leave it for the backend to error. - if (hasNonInstructionUse(GV) || + if (!rewriteNonInstructionUses(GV) || !GV->getType()->isSized() || isZeroLengthArray(GV->getType())) return false; // Create replacement global. ArrayType *NewType = createLoweredType(GV->getType()->getElementType()); - Constant *NewInitializer = createLoweredInitializer(NewType, - GV->getInitializer()); + Constant *NewInitializer = 0; + if (GV->hasInitializer()) + NewInitializer = createLoweredInitializer(NewType, + GV->getInitializer()); GlobalVariable *NewGV = new GlobalVariable(*M, NewType, GV->isConstant(), GV->getLinkage(), NewInitializer, "", 0, GlobalVariable::NotThreadLocal, -- cgit v1.1 From 4220bb24f55dd13c482ebc1358facde213ec95f6 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Mon, 9 Sep 2013 13:22:45 +0000 Subject: Silencing a warning about control flow reaching the end of a non-void function. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190304 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCoreLowerThreadLocal.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/Target') diff --git a/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/lib/Target/XCore/XCoreLowerThreadLocal.cpp index 5a83282..3c90a54 100644 --- a/lib/Target/XCore/XCoreLowerThreadLocal.cpp +++ b/lib/Target/XCore/XCoreLowerThreadLocal.cpp @@ -122,6 +122,7 @@ createReplacementInstr(ConstantExpr *CE, Instruction *Instr) { default: assert(0 && "Unhandled constant expression!\n"); } + llvm_unreachable("Unhandled constant expression!\n"); } static bool replaceConstantExprOp(ConstantExpr *CE) { -- cgit v1.1 From 7b80d9233a0a69c47d1e5ebe647951349ed166e8 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Mon, 9 Sep 2013 14:17:30 +0000 Subject: A better way to silence the warning in MSVC (replaces r190304). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190308 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCoreLowerThreadLocal.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/lib/Target/XCore/XCoreLowerThreadLocal.cpp index 3c90a54..7340b2f 100644 --- a/lib/Target/XCore/XCoreLowerThreadLocal.cpp +++ b/lib/Target/XCore/XCoreLowerThreadLocal.cpp @@ -120,9 +120,8 @@ createReplacementInstr(ConstantExpr *CE, Instruction *Instr) { CE->getOperand(0), CE->getType(), CE->getName())); default: - assert(0 && "Unhandled constant expression!\n"); + llvm_unreachable("Unhandled constant expression!\n"); } - llvm_unreachable("Unhandled constant expression!\n"); } static bool replaceConstantExprOp(ConstantExpr *CE) { -- cgit v1.1 From b57d99694b87326a2eea26d76becf67bf5784b49 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Mon, 9 Sep 2013 14:21:49 +0000 Subject: [ARMv8] Prevent generation of deprecated IT blocks on ARMv8 in Thumb mode. IT blocks can only be one instruction lonf, and can only contain a subset of the 16 instructions. Patch by Artyom Skrobov! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190309 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseInstrInfo.cpp | 82 ++++++++++++++++++++++++++++++++++-- lib/Target/ARM/ARMFastISel.cpp | 6 +-- lib/Target/ARM/ARMTargetMachine.cpp | 7 ++- lib/Target/ARM/Thumb2ITBlockPass.cpp | 62 +++++++++++++++------------ 4 files changed, 121 insertions(+), 36 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 62e8063..696d039 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -513,6 +513,74 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, return Found; } +static bool isV8EligibleForIT(MachineInstr *MI) { + switch (MI->getOpcode()) { + default: + return false; + case ARM::tADC: + case ARM::tADDi3: + case ARM::tADDi8: + case ARM::tADDrSPi: + case ARM::tADDrr: + case ARM::tAND: + case ARM::tASRri: + case ARM::tASRrr: + case ARM::tBIC: + case ARM::tCMNz: + case ARM::tCMPi8: + case ARM::tCMPr: + case ARM::tEOR: + case ARM::tLDRBi: + case ARM::tLDRBr: + case ARM::tLDRHi: + case ARM::tLDRHr: + case ARM::tLDRSB: + case ARM::tLDRSH: + case ARM::tLDRi: + case ARM::tLDRr: + case ARM::tLDRspi: + case ARM::tLSLri: + case ARM::tLSLrr: + case ARM::tLSRri: + case ARM::tLSRrr: + case ARM::tMOVi8: + case ARM::tMUL: + case ARM::tMVN: + case ARM::tORR: + case ARM::tROR: + case ARM::tRSB: + case ARM::tSBC: + case ARM::tSTRBi: + case ARM::tSTRBr: + case ARM::tSTRHi: + case ARM::tSTRHr: + case ARM::tSTRi: + case ARM::tSTRr: + case ARM::tSTRspi: + case ARM::tSUBi3: + case ARM::tSUBi8: + case ARM::tSUBrr: + case ARM::tTST: + return true; +// there are some "conditionally deprecated" opcodes + case ARM::tADDspr: + return MI->getOperand(2).getReg() != ARM::PC; + case ARM::tADDrSP: + case ARM::tBX: + case ARM::tBLXr: + // ADD PC, SP and BLX PC were always unpredictable, + // now on top of it they're deprecated + return MI->getOperand(0).getReg() != ARM::PC; + case ARM::tADDhirr: + return MI->getOperand(0).getReg() != ARM::PC && + MI->getOperand(2).getReg() != ARM::PC; + case ARM::tCMPhir: + case ARM::tMOVr: + return MI->getOperand(0).getReg() != ARM::PC && + MI->getOperand(1).getReg() != ARM::PC; + } +} + /// isPredicable - Return true if the specified instruction can be predicated. /// By default, this returns true for every instruction with a /// PredicateOperand. @@ -520,11 +588,17 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { if (!MI->isPredicable()) return false; - if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) { - ARMFunctionInfo *AFI = - MI->getParent()->getParent()->getInfo(); - return AFI->isThumb2Function(); + ARMFunctionInfo *AFI = + MI->getParent()->getParent()->getInfo(); + + if (AFI->isThumb2Function()) { + if (getSubtarget().hasV8Ops()) + return isV8EligibleForIT(MI); + } else { // non-Thumb + if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) + return false; } + return true; } diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 3cbb9a8..f3a74c7 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -254,10 +254,10 @@ bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) { const MCInstrDesc &MCID = MI->getDesc(); - // If we're a thumb2 or not NEON function we were handled via isPredicable. + // If we're a thumb2 or not NEON function we'll be handled via isPredicable. if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON || AFI->isThumb2Function()) - return false; + return MI->isPredicable(); for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) if (MCID.OpInfo[i].isPredicate()) @@ -278,7 +278,7 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { // Do we use a predicate? or... // Are we NEON in ARM mode and have a predicate operand? If so, I know // we're not predicable but add it anyways. - if (TII.isPredicable(MI) || isARMNEONPred(MI)) + if (isARMNEONPred(MI)) AddDefaultPred(MIB); // Do we optionally set a predicate? Preds is size > 0 iff the predicate diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 1ba78e4..c78db54 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -196,8 +196,13 @@ bool ARMPassConfig::addPreSched2() { addPass(createARMExpandPseudoPass()); if (getOptLevel() != CodeGenOpt::None) { - if (!getARMSubtarget().isThumb1Only()) + if (!getARMSubtarget().isThumb1Only()) { + // in v8, IfConversion depends on Thumb instruction widths + if (getARMSubtarget().hasV8Ops() && + !getARMSubtarget().prefers32BitThumb()) + addPass(createThumb2SizeReductionPass()); addPass(&IfConverterID); + } } if (getARMSubtarget().isThumb2()) addPass(createThumb2ITBlockPass()); diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index d8596d7..8f1ae2e 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -28,6 +28,7 @@ namespace { static char ID; Thumb2ITBlockPass() : MachineFunctionPass(ID) {} + bool hasV8Ops; const Thumb2InstrInfo *TII; const TargetRegisterInfo *TRI; ARMFunctionInfo *AFI; @@ -192,37 +193,41 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { // Form IT block. ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC); unsigned Mask = 0, Pos = 3; - // Branches, including tricky ones like LDM_RET, need to end an IT - // block so check the instruction we just put in the block. - for (; MBBI != E && Pos && - (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) { - if (MBBI->isDebugValue()) - continue; - - MachineInstr *NMI = &*MBBI; - MI = NMI; - - unsigned NPredReg = 0; - ARMCC::CondCodes NCC = getITInstrPredicate(NMI, NPredReg); - if (NCC == CC || NCC == OCC) { - Mask |= (NCC & 1) << Pos; - // Add implicit use of ITSTATE. - NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/, - true/*isImp*/, false/*isKill*/)); - LastITMI = NMI; - } else { - if (NCC == ARMCC::AL && - MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) { - --MBBI; - MBB.remove(NMI); - MBB.insert(InsertPos, NMI); - ++NumMovedInsts; + + // v8 IT blocks are limited to one conditional op: skip the loop + if (!hasV8Ops) { + // Branches, including tricky ones like LDM_RET, need to end an IT + // block so check the instruction we just put in the block. + for (; MBBI != E && Pos && + (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) { + if (MBBI->isDebugValue()) continue; + + MachineInstr *NMI = &*MBBI; + MI = NMI; + + unsigned NPredReg = 0; + ARMCC::CondCodes NCC = getITInstrPredicate(NMI, NPredReg); + if (NCC == CC || NCC == OCC) { + Mask |= (NCC & 1) << Pos; + // Add implicit use of ITSTATE. + NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/, + true/*isImp*/, false/*isKill*/)); + LastITMI = NMI; + } else { + if (NCC == ARMCC::AL && + MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) { + --MBBI; + MBB.remove(NMI); + MBB.insert(InsertPos, NMI); + ++NumMovedInsts; + continue; + } + break; } - break; + TrackDefUses(NMI, Defs, Uses, TRI); + --Pos; } - TrackDefUses(NMI, Defs, Uses, TRI); - --Pos; } // Finalize IT mask. @@ -250,6 +255,7 @@ bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) { AFI = Fn.getInfo(); TII = static_cast(TM.getInstrInfo()); TRI = TM.getRegisterInfo(); + hasV8Ops = TM.getSubtarget().hasV8Ops(); if (!AFI->isThumbFunction()) return false; -- cgit v1.1 From 2dd3afc5e600b4585e4c2cd08f9a35fd1cf0df61 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Mon, 9 Sep 2013 17:59:32 +0000 Subject: [mips] When double precision loads and stores are split into two i32 loads and stores, make sure the load or store that accesses the higher half does not have an alignment that is larger than the offset from the original address. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190318 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsSEISelLowering.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index f32e146..2436e54 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -645,7 +645,7 @@ SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue Hi = DAG.getLoad(MVT::i32, DL, Lo.getValue(1), Ptr, MachinePointerInfo(), Nd.isVolatile(), Nd.isNonTemporal(), Nd.isInvariant(), - Nd.getAlignment()); + std::min(Nd.getAlignment(), 4U)); if (!Subtarget->isLittle()) std::swap(Lo, Hi); @@ -681,8 +681,8 @@ SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { // i32 store to higher address. Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT)); return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(), - Nd.isVolatile(), Nd.isNonTemporal(), Nd.getAlignment(), - Nd.getTBAAInfo()); + Nd.isVolatile(), Nd.isNonTemporal(), + std::min(Nd.getAlignment(), 4U), Nd.getTBAAInfo()); } SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, -- cgit v1.1 From db3a9e64f856e3a233a427da1f3969fd3a65a438 Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Mon, 9 Sep 2013 19:14:35 +0000 Subject: Revert patches to add case-range support for PR1255. The work on this project was left in an unfinished and inconsistent state. Hopefully someone will eventually get a chance to implement this feature, but in the meantime, it is better to put things back the way the were. I have left support in the bitcode reader to handle the case-range bitcode format, so that we do not lose bitcode compatibility with the llvm 3.3 release. This reverts the following commits: 155464, 156374, 156377, 156613, 156704, 156757, 156804 156808, 156985, 157046, 157112, 157183, 157315, 157384, 157575, 157576, 157586, 157612, 157810, 157814, 157815, 157880, 157881, 157882, 157884, 157887, 157901, 158979, 157987, 157989, 158986, 158997, 159076, 159101, 159100, 159200, 159201, 159207, 159527, 159532, 159540, 159583, 159618, 159658, 159659, 159660, 159661, 159703, 159704, 160076, 167356, 172025, 186736 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190328 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/CppBackend/CPPBackend.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index ace4b74..6f27af4 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -1140,7 +1140,7 @@ void CppWriter::printInstruction(const Instruction *I, nl(Out); for (SwitchInst::ConstCaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) { - const IntegersSubset CaseVal = i.getCaseValueEx(); + const ConstantInt* CaseVal = i.getCaseValue(); const BasicBlock *BB = i.getCaseSuccessor(); Out << iName << "->addCase(" << getOpName(CaseVal) << ", " -- cgit v1.1 From 5ebe6ccf658d829f9e3b7bb2ee2a5c4c392c2a7d Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Mon, 9 Sep 2013 23:28:15 +0000 Subject: Use a default value for the prologue's debug location. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190366 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index adcd4f7..dd02c4e 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -408,7 +408,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned FramePtr = RegInfo->getFrameRegister(MF); unsigned StackPtr = RegInfo->getStackRegister(); unsigned BasePtr = RegInfo->getBaseRegister(); - DebugLoc DL; + DebugLoc DL = MBB.findDebugLoc(MBBI); // If we're forcing a stack realignment we can't rely on just the frame // info, we need to know the ABI stack alignment as well in case we -- cgit v1.1 From 9c7448c8d337fde57a75f58baab8fed6afad0a10 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 10 Sep 2013 00:20:27 +0000 Subject: Revert r190366. It was breaking build bots. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190373 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index dd02c4e..adcd4f7 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -408,7 +408,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned FramePtr = RegInfo->getFrameRegister(MF); unsigned StackPtr = RegInfo->getStackRegister(); unsigned BasePtr = RegInfo->getBaseRegister(); - DebugLoc DL = MBB.findDebugLoc(MBBI); + DebugLoc DL; // If we're forcing a stack realignment we can't rely on just the frame // info, we need to know the ABI stack alignment as well in case we -- cgit v1.1 From 798cdc6af1bf2877a941bba4587e6bf72f5d140d Mon Sep 17 00:00:00 2001 From: Vladimir Medic Date: Tue, 10 Sep 2013 09:39:55 +0000 Subject: Remove obsolete code from MipsAsmParser.cpp. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190396 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 7214cb5..2aa90c7 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -52,14 +52,6 @@ private: namespace { class MipsAsmParser : public MCTargetAsmParser { - enum FpFormatTy { - FP_FORMAT_NONE = -1, - FP_FORMAT_S, - FP_FORMAT_D, - FP_FORMAT_L, - FP_FORMAT_W - } FpFormat; - MCSubtargetInfo &STI; MCAsmParser &Parser; MipsAssemblerOptions Options; @@ -202,8 +194,6 @@ class MipsAsmParser : public MCTargetAsmParser { int regKindToRegClass(int RegKind); - FpFormatTy getFpFormat() {return FpFormat;} - unsigned getReg(int RC, int RegNo); int getATReg(); @@ -1437,18 +1427,6 @@ MipsAsmParser::parseRegs(SmallVectorImpl &Operands, Operands.push_back(Op); hasConsumedDollar = false; Parser.Lex(); // Eat the register name. - if ((RegKind == MipsOperand::Kind_GPR32) - && (getLexer().is(AsmToken::LParen))) { - // Check if it is indexed addressing operand. - Operands.push_back(MipsOperand::CreateToken("(", getLexer().getLoc())); - Parser.Lex(); // Eat the parenthesis. - if (parseRegs(Operands,RegKind) != MatchOperand_Success) - return MatchOperand_NoMatch; - if (getLexer().isNot(AsmToken::RParen)) - return MatchOperand_NoMatch; - Operands.push_back(MipsOperand::CreateToken(")", getLexer().getLoc())); - Parser.Lex(); - } return MatchOperand_Success; } else if (getLexer().getKind() == AsmToken::Integer) { unsigned RegNum = Parser.getTok().getIntVal(); -- cgit v1.1 From b15da6dc09fdf2699146cd4317f3a43e70397553 Mon Sep 17 00:00:00 2001 From: Vladimir Medic Date: Tue, 10 Sep 2013 09:50:01 +0000 Subject: Add test cases for Mips mthc1/mfhc1 instructions. Add check for odd value of register when PFU is 32 bit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190397 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 2aa90c7..e547de5 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -1404,6 +1404,10 @@ MipsAsmParser::parseRegs(SmallVectorImpl &Operands, RegNum = matchFPURegisterName(RegName); if (RegKind == MipsOperand::Kind_AFGR64Regs) RegNum /= 2; + else if (RegKind == MipsOperand::Kind_FGRH32Regs + && !isFP64()) + if (RegNum != -1 && RegNum %2 != 0) + Warning(S, "Float register should be even."); break; case MipsOperand::Kind_FCCRegs: RegNum = matchFCCRegisterName(RegName); -- cgit v1.1 From 436f64567ceb0b45e6b5b680fa09485002094830 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Tue, 10 Sep 2013 09:51:43 +0000 Subject: [mips][msa] Removed unsupported dot product instructions (dotp_[su].b) The dotp_[su].b instructions never existed in any revision of the MSA spec. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190398 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 8 -------- 1 file changed, 8 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index abac272..a7d4790 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -250,12 +250,10 @@ class DIV_U_H_ENC : MSA_3R_FMT<0b101, 0b01, 0b010010>; class DIV_U_W_ENC : MSA_3R_FMT<0b101, 0b10, 0b010010>; class DIV_U_D_ENC : MSA_3R_FMT<0b101, 0b11, 0b010010>; -class DOTP_S_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b010011>; class DOTP_S_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b010011>; class DOTP_S_W_ENC : MSA_3R_FMT<0b000, 0b10, 0b010011>; class DOTP_S_D_ENC : MSA_3R_FMT<0b000, 0b11, 0b010011>; -class DOTP_U_B_ENC : MSA_3R_FMT<0b001, 0b00, 0b010011>; class DOTP_U_H_ENC : MSA_3R_FMT<0b001, 0b01, 0b010011>; class DOTP_U_W_ENC : MSA_3R_FMT<0b001, 0b10, 0b010011>; class DOTP_U_D_ENC : MSA_3R_FMT<0b001, 0b11, 0b010011>; @@ -1240,8 +1238,6 @@ class DIV_U_H_DESC : MSA_3R_DESC_BASE<"div_u.h", int_mips_div_u_h, MSA128H>; class DIV_U_W_DESC : MSA_3R_DESC_BASE<"div_u.w", int_mips_div_u_w, MSA128W>; class DIV_U_D_DESC : MSA_3R_DESC_BASE<"div_u.d", int_mips_div_u_d, MSA128D>; -class DOTP_S_B_DESC : MSA_3R_DESC_BASE<"dotp_s.b", int_mips_dotp_s_b, MSA128B>, - IsCommutable; class DOTP_S_H_DESC : MSA_3R_DESC_BASE<"dotp_s.h", int_mips_dotp_s_h, MSA128H>, IsCommutable; class DOTP_S_W_DESC : MSA_3R_DESC_BASE<"dotp_s.w", int_mips_dotp_s_w, MSA128W>, @@ -1249,8 +1245,6 @@ class DOTP_S_W_DESC : MSA_3R_DESC_BASE<"dotp_s.w", int_mips_dotp_s_w, MSA128W>, class DOTP_S_D_DESC : MSA_3R_DESC_BASE<"dotp_s.d", int_mips_dotp_s_d, MSA128D>, IsCommutable; -class DOTP_U_B_DESC : MSA_3R_DESC_BASE<"dotp_u.b", int_mips_dotp_u_b, MSA128B>, - IsCommutable; class DOTP_U_H_DESC : MSA_3R_DESC_BASE<"dotp_u.h", int_mips_dotp_u_h, MSA128H>, IsCommutable; class DOTP_U_W_DESC : MSA_3R_DESC_BASE<"dotp_u.w", int_mips_dotp_u_w, MSA128W>, @@ -2133,12 +2127,10 @@ def DIV_U_H : DIV_U_H_ENC, DIV_U_H_DESC; def DIV_U_W : DIV_U_W_ENC, DIV_U_W_DESC; def DIV_U_D : DIV_U_D_ENC, DIV_U_D_DESC; -def DOTP_S_B : DOTP_S_B_ENC, DOTP_S_B_DESC; def DOTP_S_H : DOTP_S_H_ENC, DOTP_S_H_DESC; def DOTP_S_W : DOTP_S_W_ENC, DOTP_S_W_DESC; def DOTP_S_D : DOTP_S_D_ENC, DOTP_S_D_DESC; -def DOTP_U_B : DOTP_U_B_ENC, DOTP_U_B_DESC; def DOTP_U_H : DOTP_U_H_ENC, DOTP_U_H_DESC; def DOTP_U_W : DOTP_U_W_ENC, DOTP_U_W_DESC; def DOTP_U_D : DOTP_U_D_ENC, DOTP_U_D_DESC; -- cgit v1.1 From 299fdd814f4c2850d44387d24c440980c5377d3e Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 10 Sep 2013 10:20:32 +0000 Subject: [SystemZ] Add TM and TMY The main complication here is that TM and TMY (the memory forms) set CC differently from the register forms. When the tested bits contain some 0s and some 1s, the register forms set CC to 1 or 2 based on the value the uppermost bit. The memory forms instead set CC to 1 regardless of the uppermost bit. Until now, I've tried to make it so that a branch never tests for an impossible CC value. E.g. NR only sets CC to 0 or 1, so branches on the result will only test for 0 or 1. Originally I'd tried to do the same thing for TM and TMY by using custom matching code in ISelDAGToDAG. That ended up being very ugly though, and would have meant duplicating some of the chain checks that the common isel code does. I've therefore gone for the simpler alternative of adding an extra operand to the TM DAG opcode to say whether a memory form would be OK. This means that the inverse of a "TM;JE" is "TM;JNE" rather than the more precise "TM;JNLE", just like the inverse of "TMLL;JE" is "TMLL;JNE". I suppose that's arguably less confusing though... git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190400 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelLowering.cpp | 14 ++++++++------ lib/Target/SystemZ/SystemZISelLowering.h | 5 ++++- lib/Target/SystemZ/SystemZInstrInfo.td | 14 ++++++++------ lib/Target/SystemZ/SystemZOperators.td | 6 +++++- 4 files changed, 25 insertions(+), 14 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 9fc56fe..5528404 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1226,13 +1226,13 @@ static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, return 0; } -// See whether the comparison (Opcode CmpOp0, CmpOp1) can be implemented -// as a TEST UNDER MASK instruction when the condition being tested is -// as described by CCValid and CCMask. Update the arguments with the -// TM version if so. +// See whether the comparison (Opcode CmpOp0, CmpOp1, ICmpType) can be +// implemented as a TEST UNDER MASK instruction when the condition being +// tested is as described by CCValid and CCMask. Update the arguments +// with the TM version if so. static void adjustForTestUnderMask(unsigned &Opcode, SDValue &CmpOp0, SDValue &CmpOp1, unsigned &CCValid, - unsigned &CCMask, unsigned ICmpType) { + unsigned &CCMask, unsigned &ICmpType) { // Check that we have a comparison with a constant. ConstantSDNode *ConstCmpOp1 = dyn_cast(CmpOp1); if (!ConstCmpOp1) @@ -1266,6 +1266,8 @@ static void adjustForTestUnderMask(unsigned &Opcode, SDValue &CmpOp0, Opcode = SystemZISD::TM; CmpOp0 = AndOp0; CmpOp1 = AndOp1; + ICmpType = (bool(NewCCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) != + bool(NewCCMask & SystemZ::CCMASK_TM_MIXED_MSB_1)); CCValid = SystemZ::CCMASK_TM; CCMask = NewCCMask; } @@ -1315,7 +1317,7 @@ static SDValue emitCmp(const SystemZTargetMachine &TM, SelectionDAG &DAG, } adjustForTestUnderMask(Opcode, CmpOp0, CmpOp1, CCValid, CCMask, ICmpType); - if (Opcode == SystemZISD::ICMP) + if (Opcode == SystemZISD::ICMP || Opcode == SystemZISD::TM) return DAG.getNode(Opcode, DL, MVT::Glue, CmpOp0, CmpOp1, DAG.getConstant(ICmpType, MVT::i32)); return DAG.getNode(Opcode, DL, MVT::Glue, CmpOp0, CmpOp1); diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 500abce..9822463 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -46,7 +46,10 @@ namespace SystemZISD { FCMP, // Test under mask. The first operand is ANDed with the second operand - // and the condition codes are set on the result. + // and the condition codes are set on the result. The third operand is + // a boolean that is true if the condition codes need to distinguish + // between CCMASK_TM_MIXED_MSB_0 and CCMASK_TM_MIXED_MSB_1 (which the + // register forms do but the memory forms don't). TM, // Branches if a condition is true. Operand 0 is the chain operand; diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 5bfa06d..32b70b9 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -1037,14 +1037,16 @@ let mayLoad = 1, Defs = [CC], Uses = [R0W] in // Test under mask. let Defs = [CC] in { let isCodeGenOnly = 1 in { - def TMLL32 : CompareRI<"tmll", 0xA71, z_tm, GR32, imm32ll16>; - def TMLH32 : CompareRI<"tmlh", 0xA70, z_tm, GR32, imm32lh16>; + def TMLL32 : CompareRI<"tmll", 0xA71, z_tm_reg, GR32, imm32ll16>; + def TMLH32 : CompareRI<"tmlh", 0xA70, z_tm_reg, GR32, imm32lh16>; } - def TMLL : CompareRI<"tmll", 0xA71, z_tm, GR64, imm64ll16>; - def TMLH : CompareRI<"tmlh", 0xA70, z_tm, GR64, imm64lh16>; - def TMHL : CompareRI<"tmhl", 0xA73, z_tm, GR64, imm64hl16>; - def TMHH : CompareRI<"tmhh", 0xA72, z_tm, GR64, imm64hh16>; + def TMLL : CompareRI<"tmll", 0xA71, z_tm_reg, GR64, imm64ll16>; + def TMLH : CompareRI<"tmlh", 0xA70, z_tm_reg, GR64, imm64lh16>; + def TMHL : CompareRI<"tmhl", 0xA73, z_tm_reg, GR64, imm64hl16>; + def TMHH : CompareRI<"tmhh", 0xA72, z_tm_reg, GR64, imm64hh16>; + + defm TM : CompareSIPair<"tm", 0x91, 0xEB51, z_tm_mem, anyextloadi8, imm32zx8>; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index c135b3b..4a6b665 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -99,7 +99,7 @@ def z_sibcall : SDNode<"SystemZISD::SIBCALL", SDT_ZCall, def z_pcrel_wrapper : SDNode<"SystemZISD::PCREL_WRAPPER", SDT_ZWrapPtr, []>; def z_icmp : SDNode<"SystemZISD::ICMP", SDT_ZICmp, [SDNPOutGlue]>; def z_fcmp : SDNode<"SystemZISD::FCMP", SDT_ZCmp, [SDNPOutGlue]>; -def z_tm : SDNode<"SystemZISD::TM", SDT_ZCmp, [SDNPOutGlue]>; +def z_tm : SDNode<"SystemZISD::TM", SDT_ZICmp, [SDNPOutGlue]>; def z_br_ccmask : SDNode<"SystemZISD::BR_CCMASK", SDT_ZBRCCMask, [SDNPHasChain, SDNPInGlue]>; def z_select_ccmask : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask, @@ -176,6 +176,10 @@ def z_ucmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{ return Type != SystemZICMP::SignedOnly; }]>; +// Register- and memory-based TEST UNDER MASK. +def z_tm_reg : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, imm)>; +def z_tm_mem : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, 0)>; + // Register sign-extend operations. Sub-32-bit values are represented as i32s. def sext8 : PatFrag<(ops node:$src), (sext_inreg node:$src, i8)>; def sext16 : PatFrag<(ops node:$src), (sext_inreg node:$src, i16)>; -- cgit v1.1 From 4c708e586eecb039811fdd54a8846002324936b2 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 10 Sep 2013 12:22:45 +0000 Subject: [SystemZ] Update README. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190404 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/README.txt | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt index d5361fb..afa6cf0 100644 --- a/lib/Target/SystemZ/README.txt +++ b/lib/Target/SystemZ/README.txt @@ -58,14 +58,11 @@ condition codes. For example, we could use LCDFR instead of LCDBR. -- -We don't optimize block memory operations, except using single MVCs -for memcpy and single CLCs for memcmp. +We only use MVC, XC and CLC for constant-length block operations. +We could extend them to variable-length operations too, +using EXECUTE RELATIVE LONG. -It's definitely worth using things like NC, XC and OC with -constant lengths. MVCIN may be worthwhile too. - -We should probably implement general memcpy using MVC with EXECUTE. -Likewise memcmp and CLC. MVCLE and CLCLE could be useful too. +MVCIN, MVCLE and CLCLE may be worthwhile too. -- @@ -93,8 +90,7 @@ We don't use the halfword forms of LOAD REVERSED and STORE REVERSED -- -We could take advantage of the various ... UNDER MASK instructions, -such as ICM and STCM. +We don't use ICM or STCM. -- -- cgit v1.1 From 28823ec4088e216f455278ba42f99f79d0d71969 Mon Sep 17 00:00:00 2001 From: Logan Chien Date: Tue, 10 Sep 2013 15:10:02 +0000 Subject: Remove unused private member in ARMAsmPrinter.cpp. This commit removes the unused "AttributeItem" from ObjectAttributeEmitter. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190412 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMAsmPrinter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 485b1c5..0e94045 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -111,7 +111,7 @@ namespace { unsigned Tag; unsigned IntValue; StringRef StringValue; - } AttributeItem; + }; MCObjectStreamer &Streamer; StringRef CurrentVendor; -- cgit v1.1 From 1039e106d095979b3707bb27cfa765cd2c0f3b54 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Tue, 10 Sep 2013 17:21:39 +0000 Subject: ARM: Use the PICADD opcode calculated. We were figuring out whether to use tPICADD or PICADD, then just using tPICADD unconditionally anyway. Oops. A testcase from someone familiar enough with ELF to produce one would be appreciated. The existing PIC testcase correctly verifies the .s generated, but that doesn't catch this bug, which only showed up in direct-to-object mode. http://llvm.org/bugs/show_bug.cgi?id=17180 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190417 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrInfo.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 3508795..df867b4 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -136,8 +136,12 @@ namespace { unsigned GlobalBaseReg = AFI->getGlobalBaseReg(); Opc = TM->getSubtarget().isThumb2() ? ARM::tPICADD : ARM::PICADD; - BuildMI(FirstMBB, MBBI, DL, TII.get(ARM::tPICADD), GlobalBaseReg) - .addReg(TempReg).addImm(ARMPCLabelIndex); + MIB = BuildMI(FirstMBB, MBBI, DL, TII.get(Opc), GlobalBaseReg) + .addReg(TempReg) + .addImm(ARMPCLabelIndex); + if (Opc == ARM::PICADD) + AddDefaultPred(MIB); + return true; } -- cgit v1.1 From 63a9660a414299bbb93a1edf914c5200413adf73 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Tue, 10 Sep 2013 22:42:31 +0000 Subject: Remove unused functions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190442 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCoreFrameLowering.cpp | 4 ---- lib/Target/XCore/XCoreISelLowering.cpp | 5 ----- 2 files changed, 9 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp index b57cf9d..c60259d 100644 --- a/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/lib/Target/XCore/XCoreFrameLowering.cpp @@ -30,10 +30,6 @@ using namespace llvm; // helper functions. FIXME: Eliminate. -static inline bool isImmUs(unsigned val) { - return val <= 11; -} - static inline bool isImmU6(unsigned val) { return val < (1 << 6); } diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 9917ca3..e209c29 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -259,11 +259,6 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const return GA; } -static inline SDValue BuildGetId(SelectionDAG &DAG, SDLoc dl) { - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, - DAG.getConstant(Intrinsic::xcore_getid, MVT::i32)); -} - SDValue XCoreTargetLowering:: LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { -- cgit v1.1 From ae43dac30037395cce2b54af0a02500985813183 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Tue, 10 Sep 2013 23:18:14 +0000 Subject: Fix unused variables. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190448 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonISelLowering.cpp | 2 -- lib/Target/Hexagon/HexagonMachineScheduler.cpp | 2 -- lib/Target/Mips/MipsISelLowering.cpp | 2 -- 3 files changed, 6 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 567faca..a9c276b 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -39,8 +39,6 @@ using namespace llvm; -const unsigned Hexagon_MAX_RET_SIZE = 64; - static cl::opt EmitJumpTables("hexagon-emit-jump-tables", cl::init(true), cl::Hidden, cl::desc("Control jump table emission on Hexagon target")); diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp index 22c485f..da44d10 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -457,9 +457,7 @@ static SUnit *getSingleUnscheduledSucc(SUnit *SU) { // Constants used to denote relative importance of // heuristic components for cost computation. static const unsigned PriorityOne = 200; -static const unsigned PriorityTwo = 100; static const unsigned PriorityThree = 50; -static const unsigned PriorityFour = 20; static const unsigned ScaleTwo = 10; static const unsigned FactorOne = 2; diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index a1706ab..220955e 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -2224,8 +2224,6 @@ static bool CC_MipsO32_FP64(unsigned ValNo, MVT ValVT, // Call Calling Convention Implementation //===----------------------------------------------------------------------===// -static const unsigned O32IntRegsSize = 4; - // Return next O32 integer argument register. static unsigned getNextIntArgReg(unsigned Reg) { assert((Reg == Mips::A0) || (Reg == Mips::A2)); -- cgit v1.1 From 3b389cb74e13a5631c9650115aafd2af4580ce83 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Wed, 11 Sep 2013 00:41:02 +0000 Subject: Rename variables for consistency. No functional change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190466 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonMachineScheduler.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp index da44d10..c94f081 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -457,7 +457,7 @@ static SUnit *getSingleUnscheduledSucc(SUnit *SU) { // Constants used to denote relative importance of // heuristic components for cost computation. static const unsigned PriorityOne = 200; -static const unsigned PriorityThree = 50; +static const unsigned PriorityTwo = 50; static const unsigned ScaleTwo = 10; static const unsigned FactorOne = 2; @@ -515,8 +515,8 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, ResCount += (NumNodesBlocking * ScaleTwo); // Factor in reg pressure as a heuristic. - ResCount -= (Delta.Excess.getUnitInc()*PriorityThree); - ResCount -= (Delta.CriticalMax.getUnitInc()*PriorityThree); + ResCount -= (Delta.Excess.getUnitInc()*PriorityTwo); + ResCount -= (Delta.CriticalMax.getUnitInc()*PriorityTwo); DEBUG(if (verbose) dbgs() << " Total(" << ResCount << ")"); -- cgit v1.1 From 8857294192bdc1992d60a14a6ff6c519ddee63e3 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Wed, 11 Sep 2013 09:59:17 +0000 Subject: [mips][msa] Corrected the definition of the dotp_[su].[hwd] intrinsics The elements of the operands should be half the width of the elements of the result. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190505 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index a7d4790..66d7f37 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -1238,19 +1238,19 @@ class DIV_U_H_DESC : MSA_3R_DESC_BASE<"div_u.h", int_mips_div_u_h, MSA128H>; class DIV_U_W_DESC : MSA_3R_DESC_BASE<"div_u.w", int_mips_div_u_w, MSA128W>; class DIV_U_D_DESC : MSA_3R_DESC_BASE<"div_u.d", int_mips_div_u_d, MSA128D>; -class DOTP_S_H_DESC : MSA_3R_DESC_BASE<"dotp_s.h", int_mips_dotp_s_h, MSA128H>, - IsCommutable; -class DOTP_S_W_DESC : MSA_3R_DESC_BASE<"dotp_s.w", int_mips_dotp_s_w, MSA128W>, - IsCommutable; -class DOTP_S_D_DESC : MSA_3R_DESC_BASE<"dotp_s.d", int_mips_dotp_s_d, MSA128D>, - IsCommutable; - -class DOTP_U_H_DESC : MSA_3R_DESC_BASE<"dotp_u.h", int_mips_dotp_u_h, MSA128H>, - IsCommutable; -class DOTP_U_W_DESC : MSA_3R_DESC_BASE<"dotp_u.w", int_mips_dotp_u_w, MSA128W>, - IsCommutable; -class DOTP_U_D_DESC : MSA_3R_DESC_BASE<"dotp_u.d", int_mips_dotp_u_d, MSA128D>, - IsCommutable; +class DOTP_S_H_DESC : MSA_3R_DESC_BASE<"dotp_s.h", int_mips_dotp_s_h, MSA128H, + MSA128B, MSA128B>, IsCommutable; +class DOTP_S_W_DESC : MSA_3R_DESC_BASE<"dotp_s.w", int_mips_dotp_s_w, MSA128W, + MSA128H, MSA128H>, IsCommutable; +class DOTP_S_D_DESC : MSA_3R_DESC_BASE<"dotp_s.d", int_mips_dotp_s_d, MSA128D, + MSA128W, MSA128W>, IsCommutable; + +class DOTP_U_H_DESC : MSA_3R_DESC_BASE<"dotp_u.h", int_mips_dotp_u_h, MSA128H, + MSA128B, MSA128B>, IsCommutable; +class DOTP_U_W_DESC : MSA_3R_DESC_BASE<"dotp_u.w", int_mips_dotp_u_w, MSA128W, + MSA128H, MSA128H>, IsCommutable; +class DOTP_U_D_DESC : MSA_3R_DESC_BASE<"dotp_u.d", int_mips_dotp_u_d, MSA128D, + MSA128W, MSA128W>, IsCommutable; class DPADD_S_H_DESC : MSA_3R_4R_DESC_BASE<"dpadd_s.h", int_mips_dpadd_s_h, MSA128H, MSA128B, MSA128B>, -- cgit v1.1 From ddfbd5805478cf108156bb0159b7495d2b236f7e Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Wed, 11 Sep 2013 10:15:48 +0000 Subject: [mips][msa] Separate the configuration of int/float vector types since they will diverge soon No functional change git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190506 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsSEISelLowering.cpp | 30 ++++++++++++++++++++++-------- lib/Target/Mips/MipsSEISelLowering.h | 4 +++- 2 files changed, 25 insertions(+), 9 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 2436e54..5999e19 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -83,13 +83,13 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::MUL, MVT::v2i16, Legal); if (Subtarget->hasMSA()) { - addMSAType(MVT::v16i8, &Mips::MSA128BRegClass); - addMSAType(MVT::v8i16, &Mips::MSA128HRegClass); - addMSAType(MVT::v4i32, &Mips::MSA128WRegClass); - addMSAType(MVT::v2i64, &Mips::MSA128DRegClass); - addMSAType(MVT::v8f16, &Mips::MSA128HRegClass); - addMSAType(MVT::v4f32, &Mips::MSA128WRegClass); - addMSAType(MVT::v2f64, &Mips::MSA128DRegClass); + addMSAIntType(MVT::v16i8, &Mips::MSA128BRegClass); + addMSAIntType(MVT::v8i16, &Mips::MSA128HRegClass); + addMSAIntType(MVT::v4i32, &Mips::MSA128WRegClass); + addMSAIntType(MVT::v2i64, &Mips::MSA128DRegClass); + addMSAFloatType(MVT::v8f16, &Mips::MSA128HRegClass); + addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass); + addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass); } if (!Subtarget->mipsSEUsesSoftFloat()) { @@ -148,7 +148,21 @@ llvm::createMipsSETargetLowering(MipsTargetMachine &TM) { } void MipsSETargetLowering:: -addMSAType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { +addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { + addRegisterClass(Ty, RC); + + // Expand all builtin opcodes. + for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) + setOperationAction(Opc, Ty, Expand); + + setOperationAction(ISD::BITCAST, Ty, Legal); + setOperationAction(ISD::LOAD, Ty, Legal); + setOperationAction(ISD::STORE, Ty, Legal); + +} + +void MipsSETargetLowering:: +addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { addRegisterClass(Ty, RC); // Expand all builtin opcodes. diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h index dde0c23..016d4ad 100644 --- a/lib/Target/Mips/MipsSEISelLowering.h +++ b/lib/Target/Mips/MipsSEISelLowering.h @@ -22,7 +22,9 @@ namespace llvm { public: explicit MipsSETargetLowering(MipsTargetMachine &TM); - void addMSAType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC); + void addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC); + void addMSAFloatType(MVT::SimpleValueType Ty, + const TargetRegisterClass *RC); virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const; -- cgit v1.1 From 68831cbd417b7e4c47b565038a4fe9a1269d5d50 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Wed, 11 Sep 2013 10:28:16 +0000 Subject: [mips][msa] Added support for matching addv from normal IR (i.e. not intrinsics) The corresponding intrinsic is now lowered into equivalent IR (ISD::ADD) before instruction selection. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190507 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 13 +++++-------- lib/Target/Mips/MipsSEISelLowering.cpp | 17 +++++++++++++++++ 2 files changed, 22 insertions(+), 8 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 66d7f37..1294121 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -18,6 +18,7 @@ def MipsVAnyNonZero : SDNode<"MipsISD::VANY_NONZERO", SDT_MipsVecCond>; def MipsVAllZero : SDNode<"MipsISD::VALL_ZERO", SDT_MipsVecCond>; def MipsVAnyZero : SDNode<"MipsISD::VANY_ZERO", SDT_MipsVecCond>; +// Immediates def immSExt5 : ImmLeaf(Imm);}]>; def immSExt10: ImmLeaf(Imm);}]>; @@ -980,14 +981,10 @@ class ADDS_U_W_DESC : MSA_3R_DESC_BASE<"adds_u.w", int_mips_adds_u_w, MSA128W>, class ADDS_U_D_DESC : MSA_3R_DESC_BASE<"adds_u.d", int_mips_adds_u_d, MSA128D>, IsCommutable; -class ADDV_B_DESC : MSA_3R_DESC_BASE<"addv.b", int_mips_addv_b, MSA128B>, - IsCommutable; -class ADDV_H_DESC : MSA_3R_DESC_BASE<"addv.h", int_mips_addv_h, MSA128H>, - IsCommutable; -class ADDV_W_DESC : MSA_3R_DESC_BASE<"addv.w", int_mips_addv_w, MSA128W>, - IsCommutable; -class ADDV_D_DESC : MSA_3R_DESC_BASE<"addv.d", int_mips_addv_d, MSA128D>, - IsCommutable; +class ADDV_B_DESC : MSA_3R_DESC_BASE<"addv.b", add, MSA128B>, IsCommutable; +class ADDV_H_DESC : MSA_3R_DESC_BASE<"addv.h", add, MSA128H>, IsCommutable; +class ADDV_W_DESC : MSA_3R_DESC_BASE<"addv.w", add, MSA128W>, IsCommutable; +class ADDV_D_DESC : MSA_3R_DESC_BASE<"addv.d", add, MSA128D>, IsCommutable; class ADDVI_B_DESC : MSA_I5_DESC_BASE<"addvi.b", int_mips_addvi_b, MSA128B>; class ADDVI_H_DESC : MSA_I5_DESC_BASE<"addvi.h", int_mips_addvi_h, MSA128H>; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 5999e19..0a39dda 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -159,6 +159,7 @@ addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { setOperationAction(ISD::LOAD, Ty, Legal); setOperationAction(ISD::STORE, Ty, Legal); + setOperationAction(ISD::ADD, Ty, Legal); } void MipsSETargetLowering:: @@ -799,6 +800,17 @@ static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { return DAG.getMergeValues(Vals, 2, DL); } +static SDValue lowerMSABinaryIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { + SDLoc DL(Op); + SDValue LHS = Op->getOperand(1); + SDValue RHS = Op->getOperand(2); + EVT ResTy = Op->getValueType(0); + + SDValue Result = DAG.getNode(Opc, DL, ResTy, LHS, RHS); + + return Result; +} + static SDValue lowerMSABranchIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { SDLoc DL(Op); SDValue Value = Op->getOperand(1); @@ -846,6 +858,11 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, return lowerDSPIntr(Op, DAG, MipsISD::MSub); case Intrinsic::mips_msubu: return lowerDSPIntr(Op, DAG, MipsISD::MSubu); + case Intrinsic::mips_addv_b: + case Intrinsic::mips_addv_h: + case Intrinsic::mips_addv_w: + case Intrinsic::mips_addv_d: + return lowerMSABinaryIntr(Op, DAG, ISD::ADD); case Intrinsic::mips_bnz_b: case Intrinsic::mips_bnz_h: case Intrinsic::mips_bnz_w: -- cgit v1.1 From ece929d6234b73ea248b7a5e89f915613ad748ea Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Wed, 11 Sep 2013 10:38:58 +0000 Subject: [mips][msa] Added support for matching div_[su] from normal IR (i.e. not intrinsics) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190509 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 18 +++++++++--------- lib/Target/Mips/MipsSEISelLowering.cpp | 12 ++++++++++++ 2 files changed, 21 insertions(+), 9 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 1294121..f590cf1 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -1225,15 +1225,15 @@ class CTCMSA_DESC { bit hasSideEffects = 1; } -class DIV_S_B_DESC : MSA_3R_DESC_BASE<"div_s.b", int_mips_div_s_b, MSA128B>; -class DIV_S_H_DESC : MSA_3R_DESC_BASE<"div_s.h", int_mips_div_s_h, MSA128H>; -class DIV_S_W_DESC : MSA_3R_DESC_BASE<"div_s.w", int_mips_div_s_w, MSA128W>; -class DIV_S_D_DESC : MSA_3R_DESC_BASE<"div_s.d", int_mips_div_s_d, MSA128D>; - -class DIV_U_B_DESC : MSA_3R_DESC_BASE<"div_u.b", int_mips_div_u_b, MSA128B>; -class DIV_U_H_DESC : MSA_3R_DESC_BASE<"div_u.h", int_mips_div_u_h, MSA128H>; -class DIV_U_W_DESC : MSA_3R_DESC_BASE<"div_u.w", int_mips_div_u_w, MSA128W>; -class DIV_U_D_DESC : MSA_3R_DESC_BASE<"div_u.d", int_mips_div_u_d, MSA128D>; +class DIV_S_B_DESC : MSA_3R_DESC_BASE<"div_s.b", sdiv, MSA128B>; +class DIV_S_H_DESC : MSA_3R_DESC_BASE<"div_s.h", sdiv, MSA128H>; +class DIV_S_W_DESC : MSA_3R_DESC_BASE<"div_s.w", sdiv, MSA128W>; +class DIV_S_D_DESC : MSA_3R_DESC_BASE<"div_s.d", sdiv, MSA128D>; + +class DIV_U_B_DESC : MSA_3R_DESC_BASE<"div_u.b", udiv, MSA128B>; +class DIV_U_H_DESC : MSA_3R_DESC_BASE<"div_u.h", udiv, MSA128H>; +class DIV_U_W_DESC : MSA_3R_DESC_BASE<"div_u.w", udiv, MSA128W>; +class DIV_U_D_DESC : MSA_3R_DESC_BASE<"div_u.d", udiv, MSA128D>; class DOTP_S_H_DESC : MSA_3R_DESC_BASE<"dotp_s.h", int_mips_dotp_s_h, MSA128H, MSA128B, MSA128B>, IsCommutable; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 0a39dda..2de21ea 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -160,6 +160,8 @@ addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { setOperationAction(ISD::STORE, Ty, Legal); setOperationAction(ISD::ADD, Ty, Legal); + setOperationAction(ISD::SDIV, Ty, Legal); + setOperationAction(ISD::UDIV, Ty, Legal); } void MipsSETargetLowering:: @@ -877,6 +879,16 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, return lowerMSABranchIntr(Op, DAG, MipsISD::VALL_ZERO); case Intrinsic::mips_bz_v: return lowerMSABranchIntr(Op, DAG, MipsISD::VANY_ZERO); + case Intrinsic::mips_div_s_b: + case Intrinsic::mips_div_s_h: + case Intrinsic::mips_div_s_w: + case Intrinsic::mips_div_s_d: + return lowerMSABinaryIntr(Op, DAG, ISD::SDIV); + case Intrinsic::mips_div_u_b: + case Intrinsic::mips_div_u_h: + case Intrinsic::mips_div_u_w: + case Intrinsic::mips_div_u_d: + return lowerMSABinaryIntr(Op, DAG, ISD::UDIV); } } -- cgit v1.1 From 2ac128292150c7ebb469d137877eaa3c6d26a8bb Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Wed, 11 Sep 2013 10:51:30 +0000 Subject: [mips][msa] Added support for matching fadd, fdiv, flog2, fmul, frint, fsqrt, and fsub from normal IR (i.e. not intrinsics) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190512 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 30 ++++++++++++------------- lib/Target/Mips/MipsSEISelLowering.cpp | 41 ++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 16 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index f590cf1..83f20ad 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -1283,10 +1283,8 @@ class DPSUB_U_W_DESC : MSA_3R_4R_DESC_BASE<"dpsub_u.w", int_mips_dpsub_u_w, class DPSUB_U_D_DESC : MSA_3R_4R_DESC_BASE<"dpsub_u.d", int_mips_dpsub_u_d, MSA128D, MSA128W, MSA128W>; -class FADD_W_DESC : MSA_3RF_DESC_BASE<"fadd.w", int_mips_fadd_w, MSA128W>, - IsCommutable; -class FADD_D_DESC : MSA_3RF_DESC_BASE<"fadd.d", int_mips_fadd_d, MSA128D>, - IsCommutable; +class FADD_W_DESC : MSA_3RF_DESC_BASE<"fadd.w", fadd, MSA128W>, IsCommutable; +class FADD_D_DESC : MSA_3RF_DESC_BASE<"fadd.d", fadd, MSA128D>, IsCommutable; class FCAF_W_DESC : MSA_3RF_DESC_BASE<"fcaf.w", int_mips_fcaf_w, MSA128W>, IsCommutable; @@ -1344,8 +1342,8 @@ class FCUNE_W_DESC : MSA_3RF_DESC_BASE<"fcune.w", int_mips_fcune_w, MSA128W>, class FCUNE_D_DESC : MSA_3RF_DESC_BASE<"fcune.d", int_mips_fcune_d, MSA128D>, IsCommutable; -class FDIV_W_DESC : MSA_3RF_DESC_BASE<"fdiv.w", int_mips_fdiv_w, MSA128W>; -class FDIV_D_DESC : MSA_3RF_DESC_BASE<"fdiv.d", int_mips_fdiv_d, MSA128D>; +class FDIV_W_DESC : MSA_3RF_DESC_BASE<"fdiv.w", fdiv, MSA128W>; +class FDIV_D_DESC : MSA_3RF_DESC_BASE<"fdiv.d", fdiv, MSA128D>; class FEXDO_H_DESC : MSA_3RF_DESC_BASE<"fexdo.h", int_mips_fexdo_h, MSA128H, MSA128W, MSA128W>; @@ -1392,8 +1390,8 @@ class FILL_H_DESC : MSA_2R_DESC_BASE<"fill.h", int_mips_fill_h, class FILL_W_DESC : MSA_2R_DESC_BASE<"fill.w", int_mips_fill_w, MSA128W, GPR32>; -class FLOG2_W_DESC : MSA_2RF_DESC_BASE<"flog2.w", int_mips_flog2_w, MSA128W>; -class FLOG2_D_DESC : MSA_2RF_DESC_BASE<"flog2.d", int_mips_flog2_d, MSA128D>; +class FLOG2_W_DESC : MSA_2RF_DESC_BASE<"flog2.w", flog2, MSA128W>; +class FLOG2_D_DESC : MSA_2RF_DESC_BASE<"flog2.d", flog2, MSA128D>; class FMADD_W_DESC : MSA_3RF_4RF_DESC_BASE<"fmadd.w", int_mips_fmadd_w, MSA128W>; @@ -1421,11 +1419,11 @@ class FMSUB_W_DESC : MSA_3RF_4RF_DESC_BASE<"fmsub.w", int_mips_fmsub_w, class FMSUB_D_DESC : MSA_3RF_4RF_DESC_BASE<"fmsub.d", int_mips_fmsub_d, MSA128D>; -class FMUL_W_DESC : MSA_3RF_DESC_BASE<"fmul.w", int_mips_fmul_w, MSA128W>; -class FMUL_D_DESC : MSA_3RF_DESC_BASE<"fmul.d", int_mips_fmul_d, MSA128D>; +class FMUL_W_DESC : MSA_3RF_DESC_BASE<"fmul.w", fmul, MSA128W>; +class FMUL_D_DESC : MSA_3RF_DESC_BASE<"fmul.d", fmul, MSA128D>; -class FRINT_W_DESC : MSA_2RF_DESC_BASE<"frint.w", int_mips_frint_w, MSA128W>; -class FRINT_D_DESC : MSA_2RF_DESC_BASE<"frint.d", int_mips_frint_d, MSA128D>; +class FRINT_W_DESC : MSA_2RF_DESC_BASE<"frint.w", frint, MSA128W>; +class FRINT_D_DESC : MSA_2RF_DESC_BASE<"frint.d", frint, MSA128D>; class FRCP_W_DESC : MSA_2RF_DESC_BASE<"frcp.w", int_mips_frcp_w, MSA128W>; class FRCP_D_DESC : MSA_2RF_DESC_BASE<"frcp.d", int_mips_frcp_d, MSA128D>; @@ -1453,11 +1451,11 @@ class FSNE_D_DESC : MSA_3RF_DESC_BASE<"fsne.d", int_mips_fsne_d, MSA128D>; class FSOR_W_DESC : MSA_3RF_DESC_BASE<"fsor.w", int_mips_fsor_w, MSA128W>; class FSOR_D_DESC : MSA_3RF_DESC_BASE<"fsor.d", int_mips_fsor_d, MSA128D>; -class FSQRT_W_DESC : MSA_2RF_DESC_BASE<"fsqrt.w", int_mips_fsqrt_w, MSA128W>; -class FSQRT_D_DESC : MSA_2RF_DESC_BASE<"fsqrt.d", int_mips_fsqrt_d, MSA128D>; +class FSQRT_W_DESC : MSA_2RF_DESC_BASE<"fsqrt.w", fsqrt, MSA128W>; +class FSQRT_D_DESC : MSA_2RF_DESC_BASE<"fsqrt.d", fsqrt, MSA128D>; -class FSUB_W_DESC : MSA_3RF_DESC_BASE<"fsub.w", int_mips_fsub_w, MSA128W>; -class FSUB_D_DESC : MSA_3RF_DESC_BASE<"fsub.d", int_mips_fsub_d, MSA128D>; +class FSUB_W_DESC : MSA_3RF_DESC_BASE<"fsub.w", fsub, MSA128W>; +class FSUB_D_DESC : MSA_3RF_DESC_BASE<"fsub.d", fsub, MSA128D>; class FSUEQ_W_DESC : MSA_3RF_DESC_BASE<"fsueq.w", int_mips_fsueq_w, MSA128W>; class FSUEQ_D_DESC : MSA_3RF_DESC_BASE<"fsueq.d", int_mips_fsueq_d, MSA128D>; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 2de21ea..c307aa7 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -175,6 +175,16 @@ addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { setOperationAction(ISD::LOAD, Ty, Legal); setOperationAction(ISD::STORE, Ty, Legal); setOperationAction(ISD::BITCAST, Ty, Legal); + + if (Ty != MVT::v8f16) { + setOperationAction(ISD::FADD, Ty, Legal); + setOperationAction(ISD::FDIV, Ty, Legal); + setOperationAction(ISD::FLOG2, Ty, Legal); + setOperationAction(ISD::FMUL, Ty, Legal); + setOperationAction(ISD::FRINT, Ty, Legal); + setOperationAction(ISD::FSQRT, Ty, Legal); + setOperationAction(ISD::FSUB, Ty, Legal); + } } bool @@ -823,6 +833,16 @@ static SDValue lowerMSABranchIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { return Result; } +static SDValue lowerMSAUnaryIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { + SDLoc DL(Op); + SDValue Value = Op->getOperand(1); + EVT ResTy = Op->getValueType(0); + + SDValue Result = DAG.getNode(Opc, DL, ResTy, Value); + + return Result; +} + SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { switch (cast(Op->getOperand(0))->getZExtValue()) { @@ -889,6 +909,27 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_div_u_w: case Intrinsic::mips_div_u_d: return lowerMSABinaryIntr(Op, DAG, ISD::UDIV); + case Intrinsic::mips_fadd_w: + case Intrinsic::mips_fadd_d: + return lowerMSABinaryIntr(Op, DAG, ISD::FADD); + case Intrinsic::mips_fdiv_w: + case Intrinsic::mips_fdiv_d: + return lowerMSABinaryIntr(Op, DAG, ISD::FDIV); + case Intrinsic::mips_flog2_w: + case Intrinsic::mips_flog2_d: + return lowerMSAUnaryIntr(Op, DAG, ISD::FLOG2); + case Intrinsic::mips_fmul_w: + case Intrinsic::mips_fmul_d: + return lowerMSABinaryIntr(Op, DAG, ISD::FMUL); + case Intrinsic::mips_frint_w: + case Intrinsic::mips_frint_d: + return lowerMSAUnaryIntr(Op, DAG, ISD::FRINT); + case Intrinsic::mips_fsqrt_w: + case Intrinsic::mips_fsqrt_d: + return lowerMSAUnaryIntr(Op, DAG, ISD::FSQRT); + case Intrinsic::mips_fsub_w: + case Intrinsic::mips_fsub_d: + return lowerMSABinaryIntr(Op, DAG, ISD::FSUB); } } -- cgit v1.1 From f2eb1e4286bf397d60a37e6f288ac81e644a3258 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Wed, 11 Sep 2013 11:58:30 +0000 Subject: [mips][msa] Added support for matching mulv, nlzc, sll, sra, srl, and subv from normal IR (i.e. not intrinsics) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190518 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 48 +++++++++++++++++----------------- lib/Target/Mips/MipsSEISelLowering.cpp | 36 +++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 24 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 83f20ad..d4dcbd1 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -1700,20 +1700,20 @@ class MULR_Q_H_DESC : MSA_3RF_DESC_BASE<"mulr_q.h", int_mips_mulr_q_h, class MULR_Q_W_DESC : MSA_3RF_DESC_BASE<"mulr_q.w", int_mips_mulr_q_w, MSA128W>; -class MULV_B_DESC : MSA_3R_DESC_BASE<"mulv.b", int_mips_mulv_b, MSA128B>; -class MULV_H_DESC : MSA_3R_DESC_BASE<"mulv.h", int_mips_mulv_h, MSA128H>; -class MULV_W_DESC : MSA_3R_DESC_BASE<"mulv.w", int_mips_mulv_w, MSA128W>; -class MULV_D_DESC : MSA_3R_DESC_BASE<"mulv.d", int_mips_mulv_d, MSA128D>; +class MULV_B_DESC : MSA_3R_DESC_BASE<"mulv.b", mul, MSA128B>; +class MULV_H_DESC : MSA_3R_DESC_BASE<"mulv.h", mul, MSA128H>; +class MULV_W_DESC : MSA_3R_DESC_BASE<"mulv.w", mul, MSA128W>; +class MULV_D_DESC : MSA_3R_DESC_BASE<"mulv.d", mul, MSA128D>; class NLOC_B_DESC : MSA_2R_DESC_BASE<"nloc.b", int_mips_nloc_b, MSA128B>; class NLOC_H_DESC : MSA_2R_DESC_BASE<"nloc.h", int_mips_nloc_h, MSA128H>; class NLOC_W_DESC : MSA_2R_DESC_BASE<"nloc.w", int_mips_nloc_w, MSA128W>; class NLOC_D_DESC : MSA_2R_DESC_BASE<"nloc.d", int_mips_nloc_d, MSA128D>; -class NLZC_B_DESC : MSA_2R_DESC_BASE<"nlzc.b", int_mips_nlzc_b, MSA128B>; -class NLZC_H_DESC : MSA_2R_DESC_BASE<"nlzc.h", int_mips_nlzc_h, MSA128H>; -class NLZC_W_DESC : MSA_2R_DESC_BASE<"nlzc.w", int_mips_nlzc_w, MSA128W>; -class NLZC_D_DESC : MSA_2R_DESC_BASE<"nlzc.d", int_mips_nlzc_d, MSA128D>; +class NLZC_B_DESC : MSA_2R_DESC_BASE<"nlzc.b", ctlz, MSA128B>; +class NLZC_H_DESC : MSA_2R_DESC_BASE<"nlzc.h", ctlz, MSA128H>; +class NLZC_W_DESC : MSA_2R_DESC_BASE<"nlzc.w", ctlz, MSA128W>; +class NLZC_D_DESC : MSA_2R_DESC_BASE<"nlzc.d", ctlz, MSA128D>; class NOR_V_DESC : MSA_VEC_DESC_BASE<"nor.v", int_mips_nor_v, MSA128B>; @@ -1762,10 +1762,10 @@ class SLDI_H_DESC : MSA_BIT_H_DESC_BASE<"sldi.h", int_mips_sldi_h, MSA128H>; class SLDI_W_DESC : MSA_BIT_W_DESC_BASE<"sldi.w", int_mips_sldi_w, MSA128W>; class SLDI_D_DESC : MSA_BIT_D_DESC_BASE<"sldi.d", int_mips_sldi_d, MSA128D>; -class SLL_B_DESC : MSA_3R_DESC_BASE<"sll.b", int_mips_sll_b, MSA128B>; -class SLL_H_DESC : MSA_3R_DESC_BASE<"sll.h", int_mips_sll_h, MSA128H>; -class SLL_W_DESC : MSA_3R_DESC_BASE<"sll.w", int_mips_sll_w, MSA128W>; -class SLL_D_DESC : MSA_3R_DESC_BASE<"sll.d", int_mips_sll_d, MSA128D>; +class SLL_B_DESC : MSA_3R_DESC_BASE<"sll.b", shl, MSA128B>; +class SLL_H_DESC : MSA_3R_DESC_BASE<"sll.h", shl, MSA128H>; +class SLL_W_DESC : MSA_3R_DESC_BASE<"sll.w", shl, MSA128W>; +class SLL_D_DESC : MSA_3R_DESC_BASE<"sll.d", shl, MSA128D>; class SLLI_B_DESC : MSA_BIT_B_DESC_BASE<"slli.b", int_mips_slli_b, MSA128B>; class SLLI_H_DESC : MSA_BIT_H_DESC_BASE<"slli.h", int_mips_slli_h, MSA128H>; @@ -1790,10 +1790,10 @@ class SPLATI_W_DESC : MSA_BIT_W_DESC_BASE<"splati.w", int_mips_splati_w, class SPLATI_D_DESC : MSA_BIT_D_DESC_BASE<"splati.d", int_mips_splati_d, MSA128D>; -class SRA_B_DESC : MSA_3R_DESC_BASE<"sra.b", int_mips_sra_b, MSA128B>; -class SRA_H_DESC : MSA_3R_DESC_BASE<"sra.h", int_mips_sra_h, MSA128H>; -class SRA_W_DESC : MSA_3R_DESC_BASE<"sra.w", int_mips_sra_w, MSA128W>; -class SRA_D_DESC : MSA_3R_DESC_BASE<"sra.d", int_mips_sra_d, MSA128D>; +class SRA_B_DESC : MSA_3R_DESC_BASE<"sra.b", sra, MSA128B>; +class SRA_H_DESC : MSA_3R_DESC_BASE<"sra.h", sra, MSA128H>; +class SRA_W_DESC : MSA_3R_DESC_BASE<"sra.w", sra, MSA128W>; +class SRA_D_DESC : MSA_3R_DESC_BASE<"sra.d", sra, MSA128D>; class SRAI_B_DESC : MSA_BIT_B_DESC_BASE<"srai.b", int_mips_srai_b, MSA128B>; class SRAI_H_DESC : MSA_BIT_H_DESC_BASE<"srai.h", int_mips_srai_h, MSA128H>; @@ -1810,10 +1810,10 @@ class SRARI_H_DESC : MSA_BIT_H_DESC_BASE<"srari.h", int_mips_srari_h, MSA128H>; class SRARI_W_DESC : MSA_BIT_W_DESC_BASE<"srari.w", int_mips_srari_w, MSA128W>; class SRARI_D_DESC : MSA_BIT_D_DESC_BASE<"srari.d", int_mips_srari_d, MSA128D>; -class SRL_B_DESC : MSA_3R_DESC_BASE<"srl.b", int_mips_srl_b, MSA128B>; -class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", int_mips_srl_h, MSA128H>; -class SRL_W_DESC : MSA_3R_DESC_BASE<"srl.w", int_mips_srl_w, MSA128W>; -class SRL_D_DESC : MSA_3R_DESC_BASE<"srl.d", int_mips_srl_d, MSA128D>; +class SRL_B_DESC : MSA_3R_DESC_BASE<"srl.b", srl, MSA128B>; +class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", srl, MSA128H>; +class SRL_W_DESC : MSA_3R_DESC_BASE<"srl.w", srl, MSA128W>; +class SRL_D_DESC : MSA_3R_DESC_BASE<"srl.d", srl, MSA128D>; class SRLI_B_DESC : MSA_BIT_B_DESC_BASE<"srli.b", int_mips_srli_b, MSA128B>; class SRLI_H_DESC : MSA_BIT_H_DESC_BASE<"srli.h", int_mips_srli_h, MSA128H>; @@ -1890,10 +1890,10 @@ class SUBSUU_S_W_DESC : MSA_3R_DESC_BASE<"subsuu_s.w", int_mips_subsuu_s_w, class SUBSUU_S_D_DESC : MSA_3R_DESC_BASE<"subsuu_s.d", int_mips_subsuu_s_d, MSA128D>; -class SUBV_B_DESC : MSA_3R_DESC_BASE<"subv.b", int_mips_subv_b, MSA128B>; -class SUBV_H_DESC : MSA_3R_DESC_BASE<"subv.h", int_mips_subv_h, MSA128H>; -class SUBV_W_DESC : MSA_3R_DESC_BASE<"subv.w", int_mips_subv_w, MSA128W>; -class SUBV_D_DESC : MSA_3R_DESC_BASE<"subv.d", int_mips_subv_d, MSA128D>; +class SUBV_B_DESC : MSA_3R_DESC_BASE<"subv.b", sub, MSA128B>; +class SUBV_H_DESC : MSA_3R_DESC_BASE<"subv.h", sub, MSA128H>; +class SUBV_W_DESC : MSA_3R_DESC_BASE<"subv.w", sub, MSA128W>; +class SUBV_D_DESC : MSA_3R_DESC_BASE<"subv.d", sub, MSA128D>; class SUBVI_B_DESC : MSA_I5_DESC_BASE<"subvi.b", int_mips_subvi_b, MSA128B>; class SUBVI_H_DESC : MSA_I5_DESC_BASE<"subvi.h", int_mips_subvi_h, MSA128H>; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index c307aa7..879df6d 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -160,7 +160,13 @@ addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { setOperationAction(ISD::STORE, Ty, Legal); setOperationAction(ISD::ADD, Ty, Legal); + setOperationAction(ISD::CTLZ, Ty, Legal); + setOperationAction(ISD::MUL, Ty, Legal); setOperationAction(ISD::SDIV, Ty, Legal); + setOperationAction(ISD::SHL, Ty, Legal); + setOperationAction(ISD::SRA, Ty, Legal); + setOperationAction(ISD::SRL, Ty, Legal); + setOperationAction(ISD::SUB, Ty, Legal); setOperationAction(ISD::UDIV, Ty, Legal); } @@ -930,6 +936,36 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_fsub_w: case Intrinsic::mips_fsub_d: return lowerMSABinaryIntr(Op, DAG, ISD::FSUB); + case Intrinsic::mips_mulv_b: + case Intrinsic::mips_mulv_h: + case Intrinsic::mips_mulv_w: + case Intrinsic::mips_mulv_d: + return lowerMSABinaryIntr(Op, DAG, ISD::MUL); + case Intrinsic::mips_nlzc_b: + case Intrinsic::mips_nlzc_h: + case Intrinsic::mips_nlzc_w: + case Intrinsic::mips_nlzc_d: + return lowerMSAUnaryIntr(Op, DAG, ISD::CTLZ); + case Intrinsic::mips_sll_b: + case Intrinsic::mips_sll_h: + case Intrinsic::mips_sll_w: + case Intrinsic::mips_sll_d: + return lowerMSABinaryIntr(Op, DAG, ISD::SHL); + case Intrinsic::mips_sra_b: + case Intrinsic::mips_sra_h: + case Intrinsic::mips_sra_w: + case Intrinsic::mips_sra_d: + return lowerMSABinaryIntr(Op, DAG, ISD::SRA); + case Intrinsic::mips_srl_b: + case Intrinsic::mips_srl_h: + case Intrinsic::mips_srl_w: + case Intrinsic::mips_srl_d: + return lowerMSABinaryIntr(Op, DAG, ISD::SRL); + case Intrinsic::mips_subv_b: + case Intrinsic::mips_subv_h: + case Intrinsic::mips_subv_w: + case Intrinsic::mips_subv_d: + return lowerMSABinaryIntr(Op, DAG, ISD::SUB); } } -- cgit v1.1 From 3373f3bb3f4d37a79a33351d1aa2b879b1fdb794 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 11 Sep 2013 20:38:09 +0000 Subject: Move into an anonymous namespace and closer to where it's used. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190547 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 53 ++++++++++++++------------- 1 file changed, 27 insertions(+), 26 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index e4e8776..e5d89e3 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -27,32 +27,6 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -namespace CU { - - /// Compact unwind encoding values. - enum CompactUnwindEncodings { - /// [RE]BP based frame where [RE]BP is pused on the stack immediately after - /// the return address, then [RE]SP is moved to [RE]BP. - UNWIND_MODE_BP_FRAME = 0x01000000, - - /// A frameless function with a small constant stack size. - UNWIND_MODE_STACK_IMMD = 0x02000000, - - /// A frameless function with a large constant stack size. - UNWIND_MODE_STACK_IND = 0x03000000, - - /// No compact unwind encoding is available. - UNWIND_MODE_DWARF = 0x04000000, - - /// Mask for encoding the frame registers. - UNWIND_BP_FRAME_REGISTERS = 0x00007FFF, - - /// Mask for encoding the frameless registers. - UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF - }; - -} // end CU namespace - // Option to allow disabling arithmetic relaxation to workaround PR9807, which // is useful when running bitwise comparison experiments on Darwin. We should be // able to remove this once PR9807 is resolved. @@ -360,6 +334,7 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { /* *** */ namespace { + class ELFX86AsmBackend : public X86AsmBackend { public: uint8_t OSABI; @@ -408,6 +383,32 @@ public: } }; +namespace CU { + + /// Compact unwind encoding values. + enum CompactUnwindEncodings { + /// [RE]BP based frame where [RE]BP is pused on the stack immediately after + /// the return address, then [RE]SP is moved to [RE]BP. + UNWIND_MODE_BP_FRAME = 0x01000000, + + /// A frameless function with a small constant stack size. + UNWIND_MODE_STACK_IMMD = 0x02000000, + + /// A frameless function with a large constant stack size. + UNWIND_MODE_STACK_IND = 0x03000000, + + /// No compact unwind encoding is available. + UNWIND_MODE_DWARF = 0x04000000, + + /// Mask for encoding the frame registers. + UNWIND_BP_FRAME_REGISTERS = 0x00007FFF, + + /// Mask for encoding the frameless registers. + UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF + }; + +} // end CU namespace + class DarwinX86AsmBackend : public X86AsmBackend { const MCRegisterInfo &MRI; -- cgit v1.1 From a5d756ca39c8566a0995a3f748812befe9021ef0 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Wed, 11 Sep 2013 21:20:40 +0000 Subject: Implement TTI getUnrollingPreferences for PowerPC The PowerPC A2 core greatly benefits from aggressive concatenation unrolling; use the new getUnrollingPreferences to enable this by default when targeting the PPC A2 core. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190549 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 2504ba7..8879630 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -77,6 +77,7 @@ public: /// \name Scalar TTI Implementations /// @{ virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const; + virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const; /// @} @@ -129,6 +130,14 @@ PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const { return PSK_Software; } +void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const { + if (ST->getDarwinDirective() == PPC::DIR_A2) { + // The A2 is in-order with a deep pipeline, and concatenation unrolling + // helps expose latency-hiding opportunities to the instruction scheduler. + UP.Partial = UP.Runtime = true; + } +} + unsigned PPCTTI::getNumberOfRegisters(bool Vector) const { if (Vector && !ST->hasAltivec()) return 0; -- cgit v1.1 From 2c35f3b3b1168bd7a411f2ebe0131eeb37c0d6c2 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 11 Sep 2013 21:47:57 +0000 Subject: Use the appropriate return type for the compact unwind encoding. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190551 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index e5d89e3..bac7481 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -722,7 +722,7 @@ public: } /// \brief Generate the compact unwind encoding for the CFI instructions. - virtual unsigned + virtual uint32_t generateCompactUnwindEncoding(ArrayRef Instrs) const { return SupportsCU ? generateCompactUnwindEncodingImpl(Instrs) : 0; } @@ -777,7 +777,7 @@ public: } /// \brief Generate the compact unwind encoding for the CFI instructions. - virtual unsigned + virtual uint32_t generateCompactUnwindEncoding(ArrayRef Instrs) const { return SupportsCU ? generateCompactUnwindEncodingImpl(Instrs) : 0; } -- cgit v1.1 From b7fbc5baad87eb5cc143193e66139824993883d3 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Wed, 11 Sep 2013 23:05:25 +0000 Subject: Enable MI scheduling (and CodeGen AA) by default for embedded PPC cores For embedded PPC cores (especially the A2 core), using the MI scheduler with AA is far superior to the other scheduling options. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190558 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 7 ++++-- lib/Target/PowerPC/PPCSubtarget.cpp | 39 ++++++++++++++++++++++++++++++++++ lib/Target/PowerPC/PPCSubtarget.h | 8 +++++++ 3 files changed, 52 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 244e00d..d341074 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -556,7 +556,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setInsertFencesForAtomic(true); - setSchedulingPreference(Sched::Hybrid); + if (Subtarget->enableMachineScheduler()) + setSchedulingPreference(Sched::Source); + else + setSchedulingPreference(Sched::Hybrid); computeRegisterProperties(); @@ -7853,7 +7856,7 @@ bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { } Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { - if (DisableILPPref) + if (DisableILPPref || PPCSubTarget.enableMachineScheduler()) return TargetLowering::getSchedulingPreference(N); return Sched::ILP; diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index f975f55..ace37c1 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -15,6 +15,7 @@ #include "PPC.h" #include "PPCRegisterInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineScheduler.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Function.h" @@ -186,3 +187,41 @@ bool PPCSubtarget::enablePostRAScheduler( return OptLevel >= CodeGenOpt::Default; } +// Embedded cores need aggressive scheduling. +static bool needsAggressiveScheduling(unsigned Directive) { + switch (Directive) { + default: return false; + case PPC::DIR_440: + case PPC::DIR_A2: + case PPC::DIR_E500mc: + case PPC::DIR_E5500: + return true; + } +} + +bool PPCSubtarget::enableMachineScheduler() const { + // Enable MI scheduling for the embedded cores. + // FIXME: Enable this for all cores (some additional modeling + // may be necessary). + return needsAggressiveScheduling(DarwinDirective); +} + +void PPCSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, + MachineInstr *begin, + MachineInstr *end, + unsigned NumRegionInstrs) const { + if (needsAggressiveScheduling(DarwinDirective)) { + Policy.OnlyTopDown = false; + Policy.OnlyBottomUp = false; + } + + // Spilling is generally expensive on all PPC cores, so always enable + // register-pressure tracking. + Policy.ShouldTrackPressure = true; +} + +bool PPCSubtarget::useAA() const { + // Use AA during code generation for the embedded cores. + return needsAggressiveScheduling(DarwinDirective); +} + diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index a933bf6..179ceb5 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -207,6 +207,14 @@ public: bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, TargetSubtargetInfo::AntiDepBreakMode& Mode, RegClassVector& CriticalPathRCs) const; + + // Scheduling customization. + bool enableMachineScheduler() const; + void overrideSchedPolicy(MachineSchedPolicy &Policy, + MachineInstr *begin, + MachineInstr *end, + unsigned NumRegionInstrs) const; + bool useAA() const; }; } // End llvm namespace -- cgit v1.1 From d24ba9ff6e7ffc64c0597171b1980cc4e9556eb0 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Wed, 11 Sep 2013 23:25:21 +0000 Subject: Greatly simplify the PPC A2 scheduling itinerary As Andy pointed out to me a long time ago, there are no structural hazards in the later pipeline stages of the A2, and so modeling them is useless. Also, modeling the top pre-dispatch stages is deceiving because, when multiple hardware threads are active, those resources are shared among the threads. The bypass definitions were mostly wrong, and so those have been removed. The resulting itinerary is much simpler, and more accurate. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190562 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCScheduleA2.td | 841 +++++--------------------------- lib/Target/PowerPC/PPCScheduleE500mc.td | 2 + lib/Target/PowerPC/PPCScheduleE5500.td | 1 + 3 files changed, 118 insertions(+), 726 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td index 8d5838e..2e41edf 100644 --- a/lib/Target/PowerPC/PPCScheduleA2.td +++ b/lib/Target/PowerPC/PPCScheduleA2.td @@ -14,39 +14,8 @@ //===----------------------------------------------------------------------===// // Functional units on the PowerPC A2 chip sets // -def IU0to3_0 : FuncUnit; // Fetch unit 1 to 4 slot 1 -def IU0to3_1 : FuncUnit; // Fetch unit 1 to 4 slot 2 -def IU0to3_2 : FuncUnit; // Fetch unit 1 to 4 slot 3 -def IU0to3_3 : FuncUnit; // Fetch unit 1 to 4 slot 4 -def IU4_0 : FuncUnit; // Instruction buffer slot 1 -def IU4_1 : FuncUnit; // Instruction buffer slot 2 -def IU4_2 : FuncUnit; // Instruction buffer slot 3 -def IU4_3 : FuncUnit; // Instruction buffer slot 4 -def IU4_4 : FuncUnit; // Instruction buffer slot 5 -def IU4_5 : FuncUnit; // Instruction buffer slot 6 -def IU4_6 : FuncUnit; // Instruction buffer slot 7 -def IU4_7 : FuncUnit; // Instruction buffer slot 8 -def IU5 : FuncUnit; // Dependency resolution -def IU6 : FuncUnit; // Instruction issue -def RF0 : FuncUnit; -def XRF1 : FuncUnit; -def XEX1 : FuncUnit; // Execution stage 1 for the XU pipeline -def XEX2 : FuncUnit; // Execution stage 2 for the XU pipeline -def XEX3 : FuncUnit; // Execution stage 3 for the XU pipeline -def XEX4 : FuncUnit; // Execution stage 4 for the XU pipeline -def XEX5 : FuncUnit; // Execution stage 5 for the XU pipeline -def XEX6 : FuncUnit; // Execution stage 6 for the XU pipeline -def FRF1 : FuncUnit; -def FEX1 : FuncUnit; // Execution stage 1 for the FU pipeline -def FEX2 : FuncUnit; // Execution stage 2 for the FU pipeline -def FEX3 : FuncUnit; // Execution stage 3 for the FU pipeline -def FEX4 : FuncUnit; // Execution stage 4 for the FU pipeline -def FEX5 : FuncUnit; // Execution stage 5 for the FU pipeline -def FEX6 : FuncUnit; // Execution stage 6 for the FU pipeline - -def CR_Bypass : Bypass; // The bypass for condition regs. -//def GPR_Bypass : Bypass; // The bypass for general-purpose regs. -//def FPR_Bypass : Bypass; // The bypass for floating-point regs. +def XU : FuncUnit; // XU pipeline +def FU : FuncUnit; // FI pipeline // // This file defines the itinerary class data for the PPC A2 processor. @@ -55,699 +24,119 @@ def CR_Bypass : Bypass; // The bypass for condition regs. def PPCA2Itineraries : ProcessorItineraries< - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3, - IU4_0, IU4_1, IU4_2, IU4_3, IU4_4, IU4_5, IU4_6, IU4_7, - IU5, IU6, RF0, XRF1, XEX1, XEX2, XEX3, XEX4, XEX5, XEX6, - FRF1, FEX1, FEX2, FEX3, FEX4, FEX5, FEX6], - [CR_Bypass, GPR_Bypass, FPR_Bypass], [ - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [10, 7, 7], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [10, 7, 7], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [10, 7, 7], - [CR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<38, [XEX6]>], - [53, 7, 7], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [10, 7, 7], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [10, 7, 7], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [14, 7, 7], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [14, 7, 7], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [15, 7, 7], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [10, 7, 7], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [10, 7, 7], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [10, 7, 7], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [10, 7, 7], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [10, 7, 7], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [10, 7, 7], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [15, 7, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [10, 7, 7], - [CR_Bypass, CR_Bypass, CR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [10, 7, 7], - [CR_Bypass, CR_Bypass, CR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [10, 7, 7], - [CR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [13, 11], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [13, 11], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [13, 11], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [14, 7], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [14, 7], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [14, 7], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [13, 7], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [13, 7], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [14, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [14, 7, 7], - [NoBypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [14, 7, 7], - [NoBypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [14, 7, 7], - [FPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [14, 7, 7], - [FPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [14, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [14, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [14, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<13, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [26, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [13, 7], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [13, 7], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<13, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [26, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<13, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [26, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<12, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [15, 7], - [GPR_Bypass, NoBypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [15, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [15, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [10, 7], - [GPR_Bypass, CR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [15, 7], - [GPR_Bypass, NoBypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [15, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>], - [29, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [15, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>], - [29, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>], - [29, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>], - [29, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>, - InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>, - InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>, - InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], - [15, 7, 7], - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>, - InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>, - InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>, - InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], - [15, 7, 7], - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>, - InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>, - InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>, - InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], - [13, 7, 7], - [CR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<71, [FRF1], 0>, - InstrStage<71, [FEX1], 0>, - InstrStage<71, [FEX2], 0>, - InstrStage<71, [FEX3], 0>, - InstrStage<71, [FEX4], 0>, - InstrStage<71, [FEX5], 0>, - InstrStage<71, [FEX6]>], - [86, 7, 7], - [NoBypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<58, [FRF1], 0>, - InstrStage<58, [FEX1], 0>, - InstrStage<58, [FEX2], 0>, - InstrStage<58, [FEX3], 0>, - InstrStage<58, [FEX4], 0>, - InstrStage<58, [FEX5], 0>, - InstrStage<58, [FEX6]>], - [73, 7, 7], - [NoBypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<68, [FRF1], 0>, - InstrStage<68, [FEX1], 0>, - InstrStage<68, [FEX2], 0>, - InstrStage<68, [FEX3], 0>, - InstrStage<68, [FEX4], 0>, - InstrStage<68, [FEX5], 0>, - InstrStage<68, [FEX6]>], - [86, 7], // FIXME: should be [86, 7] for double - // and [82, 7] for single. Likewise, - // the FEX? cycle count should be 68 - // for double and 64 for single. - [NoBypass, FPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>, - InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>, - InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>, - InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], - [15, 7, 7, 7], - [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>, - InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>, - InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>, - InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], - [15, 7], - [FPR_Bypass, FPR_Bypass]> + [XU, FU], [], [ + InstrItinData], + [1, 1, 1]>, + InstrItinData], + [2, 1, 1]>, + InstrItinData], + [2, 1, 1]>, + InstrItinData], + [39, 1, 1]>, + InstrItinData], + [71, 1, 1]>, + InstrItinData], + [5, 1, 1]>, + InstrItinData], + [5, 1, 1]>, + InstrItinData], + [6, 1, 1]>, + InstrItinData], + [2, 1, 1]>, + InstrItinData], + [2, 1, 1]>, + InstrItinData], + [2, 1, 1]>, + InstrItinData], + [2, 1, 1]>, + InstrItinData], + [2, 1]>, + InstrItinData], + [2, 1]>, + InstrItinData], + [6, 1, 1]>, + InstrItinData], + [1, 1, 1]>, + InstrItinData], + [5, 1, 1]>, + InstrItinData], + [1, 1, 1]>, + InstrItinData], + [1, 1, 1]>, + InstrItinData], + [1, 1, 1]>, + InstrItinData], + [1, 1, 1]>, + InstrItinData], + [6, 1, 1]>, + InstrItinData], + [6, 2, 1, 1]>, + InstrItinData], + [6, 1, 1]>, + InstrItinData], + [1, 1, 1]>, + InstrItinData], + [2, 1, 1, 1]>, + InstrItinData], + [16, 1, 1]>, + InstrItinData], + [1, 1, 1]>, + InstrItinData], + [2, 1, 1, 1]>, + InstrItinData], + [7, 1, 1]>, + InstrItinData], + [7, 2, 1, 1]>, + InstrItinData], + [6, 1, 1]>, + InstrItinData], + [6, 2, 1, 1]>, + InstrItinData], + [82, 1, 1]>, // L2 latency + InstrItinData], + [1, 1, 1]>, + InstrItinData], + [2, 1, 1, 1]>, + InstrItinData], + [82, 1, 1]>, // L2 latency + InstrItinData], + [82, 1, 1]>, // L2 latency + InstrItinData], + [6]>, + InstrItinData], + [16]>, + InstrItinData], + [16, 1]>, + InstrItinData], + [6, 1]>, + InstrItinData], + [4, 1]>, + InstrItinData], + [6, 1]>, + InstrItinData], + [4, 1]>, + InstrItinData], + [6, 1]>, + InstrItinData], + [16]>, + InstrItinData], + [16]>, + InstrItinData], + [6, 1, 1]>, + InstrItinData], + [6, 1, 1]>, + InstrItinData], + [5, 1, 1]>, + InstrItinData], + [72, 1, 1]>, + InstrItinData], + [59, 1, 1]>, + InstrItinData], + [69, 1, 1]>, + InstrItinData], + [6, 1, 1, 1]>, + InstrItinData], + [6, 1]> ]>; // ===---------------------------------------------------------------------===// diff --git a/lib/Target/PowerPC/PPCScheduleE500mc.td b/lib/Target/PowerPC/PPCScheduleE500mc.td index 9bb779a..c189b9e 100644 --- a/lib/Target/PowerPC/PPCScheduleE500mc.td +++ b/lib/Target/PowerPC/PPCScheduleE500mc.td @@ -36,6 +36,8 @@ def CFX_0 : FuncUnit; // CFX pipeline def LSU_0 : FuncUnit; // LSU pipeline def FPU_0 : FuncUnit; // FPU pipeline +def CR_Bypass : Bypass; + def PPCE500mcItineraries : ProcessorItineraries< [DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, LSU_0, FPU_0], [CR_Bypass, GPR_Bypass, FPR_Bypass], [ diff --git a/lib/Target/PowerPC/PPCScheduleE5500.td b/lib/Target/PowerPC/PPCScheduleE5500.td index d7e11ac..7a24d20 100644 --- a/lib/Target/PowerPC/PPCScheduleE5500.td +++ b/lib/Target/PowerPC/PPCScheduleE5500.td @@ -39,6 +39,7 @@ def CFX_1 : FuncUnit; // CFX pipeline stage 1 // def LSU_0 : FuncUnit; // LSU pipeline // def FPU_0 : FuncUnit; // FPU pipeline +// def CR_Bypass : Bypass; def PPCE5500Itineraries : ProcessorItineraries< [DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, CFX_1, -- cgit v1.1 From ac779b8494ad3d2f2ea40cb566552c0fb1b17363 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 12 Sep 2013 02:55:06 +0000 Subject: R600: Don't use trans slot for instructions that read LDS source registers This fixes some regressions in the piglit local memory store tests introduced by recent commits which made the scheduler aware of the trans slot. It's not possible to test this using lit, because there is no way to determine from the assembly dumps whether or not an instruction is in the trans slot. Even if this were possible, the test would be highly sensitive to changes in the scheduler and might generate confusing false negatives. Reviewed-by: Vincent Lejeune git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190574 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600InstrInfo.cpp | 16 ++++++++++++++++ lib/Target/R600/R600InstrInfo.h | 1 + lib/Target/R600/R600MachineScheduler.cpp | 4 ++++ lib/Target/R600/R600Packetizer.cpp | 4 ++++ lib/Target/R600/R600RegisterInfo.td | 10 +++++++++- 5 files changed, 34 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 0e7cfb4..93931e4 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -204,6 +204,22 @@ bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const { } } +bool R600InstrInfo::readsLDSSrcReg(const MachineInstr *MI) const { + if (!isALUInstr(MI->getOpcode())) { + return false; + } + for (MachineInstr::const_mop_iterator I = MI->operands_begin(), + E = MI->operands_end(); I != E; ++I) { + if (!I->isReg() || !I->isUse() || + TargetRegisterInfo::isVirtualRegister(I->getReg())) + continue; + + if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg())) + return true; + } + return false; +} + int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const { static const unsigned OpTable[] = { AMDGPU::OpName::src0, diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h index 24cc43d..0d1ffc8 100644 --- a/lib/Target/R600/R600InstrInfo.h +++ b/lib/Target/R600/R600InstrInfo.h @@ -78,6 +78,7 @@ namespace llvm { bool usesTextureCache(const MachineInstr *MI) const; bool mustBeLastInClause(unsigned Opcode) const; + bool readsLDSSrcReg(const MachineInstr *MI) const; /// \returns The operand index for the given source number. Legal values /// for SrcNum are 0, 1, and 2. diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp index 0499dd5..6c26d9e 100644 --- a/lib/Target/R600/R600MachineScheduler.cpp +++ b/lib/Target/R600/R600MachineScheduler.cpp @@ -314,6 +314,10 @@ R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const { if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass)) return AluT_XYZW; + // LDS src registers cannot be used in the Trans slot. + if (TII->readsLDSSrcReg(MI)) + return AluT_XYZW; + return AluAny; } diff --git a/lib/Target/R600/R600Packetizer.cpp b/lib/Target/R600/R600Packetizer.cpp index 6c70052..bed9115 100644 --- a/lib/Target/R600/R600Packetizer.cpp +++ b/lib/Target/R600/R600Packetizer.cpp @@ -272,6 +272,10 @@ public: return false; } + // We cannot read LDS source registrs from the Trans slot. + if (isTransSlot && TII->readsLDSSrcReg(MI)) + return false; + CurrentPacketMIs.pop_back(); return true; } diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td index fa987cf..514427e 100644 --- a/lib/Target/R600/R600RegisterInfo.td +++ b/lib/Target/R600/R600RegisterInfo.td @@ -95,6 +95,12 @@ foreach Index = 448-480 in { // Special Registers +def OQA : R600Reg<"OQA", 219>; +def OQB : R600Reg<"OQB", 220>; +def OQAP : R600Reg<"OQAP", 221>; +def OQBP : R600Reg<"OQAP", 222>; +def LDS_DIRECT_A : R600Reg<"LDS_DIRECT_A", 223>; +def LDS_DIRECT_B : R600Reg<"LDS_DIRECT_B", 224>; def ZERO : R600Reg<"0.0", 248>; def ONE : R600Reg<"1.0", 249>; def NEG_ONE : R600Reg<"-1.0", 249>; @@ -115,7 +121,6 @@ def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>; def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>; def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>; def AR_X : R600Reg<"AR.x", 0>; -def OQAP : R600Reg<"OQAP", 221>; def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32, (add (sequence "ArrayBase%u", 448, 480))>; @@ -130,6 +135,9 @@ let isAllocatable = 0 in { // XXX: Only use the X channel, until we support wider stack widths def R600_Addr : RegisterClass <"AMDGPU", [i32], 127, (add (sequence "Addr%u_X", 0, 127))>; +def R600_LDS_SRC_REG : RegisterClass<"AMDGPU", [i32], 32, + (add OQA, OQB, OQAP, OQBP, LDS_DIRECT_A, LDS_DIRECT_B)>; + } // End isAllocatable = 0 def R600_KC0_X : RegisterClass <"AMDGPU", [f32, i32], 32, -- cgit v1.1 From a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 12 Sep 2013 02:55:14 +0000 Subject: R600/SI: expose TBUFFER_STORE_FORMAT_* for OpenGL transform feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For _XYZ, the type of VDATA is v4i32, because v3i32 doesn't exist. The ADDR64 bit is not exposed. A simpler intrinsic that doesn't take a resource descriptor might be nicer. The maximum number of input SGPRs is bumped to 17. Signed-off-by: Marek Olšák Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190575 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUCallingConv.td | 3 ++- lib/Target/R600/AMDGPUISelLowering.cpp | 1 + lib/Target/R600/AMDGPUISelLowering.h | 1 + lib/Target/R600/SIISelLowering.cpp | 39 ++++++++++++++++++++++++++++++++++ lib/Target/R600/SIInstrInfo.td | 27 +++++++++++++++++++++++ lib/Target/R600/SIInstructions.td | 29 +++++++++++++++++++++---- lib/Target/R600/SIIntrinsics.td | 18 ++++++++++++++++ 7 files changed, 113 insertions(+), 5 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUCallingConv.td b/lib/Target/R600/AMDGPUCallingConv.td index 84d3118..d26be32 100644 --- a/lib/Target/R600/AMDGPUCallingConv.td +++ b/lib/Target/R600/AMDGPUCallingConv.td @@ -19,7 +19,8 @@ def CC_SI : CallingConv<[ CCIfInReg>>, CCIfInReg(Op.getOperand(1))->getZExtValue(); + + switch (IntrinsicID) { + case AMDGPUIntrinsic::SI_tbuffer_store: { + SDLoc DL(Op); + SDValue Ops [] = { + Chain, + ResourceDescriptorToi128(Op.getOperand(2), DAG), + Op.getOperand(3), + Op.getOperand(4), + Op.getOperand(5), + Op.getOperand(6), + Op.getOperand(7), + Op.getOperand(8), + Op.getOperand(9), + Op.getOperand(10), + Op.getOperand(11), + Op.getOperand(12), + Op.getOperand(13), + Op.getOperand(14) + }; + EVT VT = Op.getOperand(3).getValueType(); + + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo(), + MachineMemOperand::MOStore, + VT.getSizeInBits() / 8, 4); + return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL, + Op->getVTList(), Ops, + sizeof(Ops)/sizeof(Ops[0]), VT, MMO); + } + default: + break; + } } return SDValue(); } diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 09d5f01..e7d70f4 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -21,6 +21,25 @@ def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT", [SDNPMayLoad, SDNPMemOperand] >; +def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", + SDTypeProfile<0, 13, + [SDTCisVT<0, i128>, // rsrc(SGPR) + SDTCisVT<1, iAny>, // vdata(VGPR) + SDTCisVT<2, i32>, // num_channels(imm) + SDTCisVT<3, i32>, // vaddr(VGPR) + SDTCisVT<4, i32>, // soffset(SGPR) + SDTCisVT<5, i32>, // inst_offset(imm) + SDTCisVT<6, i32>, // dfmt(imm) + SDTCisVT<7, i32>, // nfmt(imm) + SDTCisVT<8, i32>, // offen(imm) + SDTCisVT<9, i32>, // idxen(imm) + SDTCisVT<10, i32>, // glc(imm) + SDTCisVT<11, i32>, // slc(imm) + SDTCisVT<12, i32> // tfe(imm) + ]>, + [SDNPMayStore, SDNPMemOperand, SDNPHasChain] +>; + def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT", SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, i128>, SDTCisVT<2, i16>, SDTCisVT<3, i32>]> @@ -65,6 +84,14 @@ def IMM8bitDWORD : ImmLeaf < }]> >; +def as_i1imm : SDNodeXFormgetTargetConstant(N->getZExtValue(), MVT::i1); +}]>; + +def as_i8imm : SDNodeXFormgetTargetConstant(N->getZExtValue(), MVT::i8); +}]>; + def as_i16imm : SDNodeXFormgetTargetConstant(N->getSExtValue(), MVT::i16); }]>; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 14a189a..99fedcb 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -477,10 +477,10 @@ def BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper < //def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "TBUFFER_LOAD_FORMAT_XY", []>; //def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "TBUFFER_LOAD_FORMAT_XYZ", []>; def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORMAT_XYZW", VReg_128>; -//def TBUFFER_STORE_FORMAT_X : MTBUF_ <0x00000004, "TBUFFER_STORE_FORMAT_X", []>; -//def TBUFFER_STORE_FORMAT_XY : MTBUF_ <0x00000005, "TBUFFER_STORE_FORMAT_XY", []>; -//def TBUFFER_STORE_FORMAT_XYZ : MTBUF_ <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", []>; -//def TBUFFER_STORE_FORMAT_XYZW : MTBUF_ <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", []>; +def TBUFFER_STORE_FORMAT_X : MTBUF_Store_Helper <0x00000004, "TBUFFER_STORE_FORMAT_X", VReg_32>; +def TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Helper <0x00000005, "TBUFFER_STORE_FORMAT_XY", VReg_64>; +def TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Helper <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", VReg_128>; +def TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Helper <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", VReg_128>; let mayLoad = 1 in { @@ -1881,6 +1881,27 @@ defm : MUBUFStore_Pattern ; defm : MUBUFStore_Pattern ; defm : MUBUFStore_Pattern ; +//===----------------------------------------------------------------------===// +// MTBUF Patterns +//===----------------------------------------------------------------------===// + +// TBUFFER_STORE_FORMAT_*, addr64=0 +class MTBUF_StoreResource : Pat< + (SItbuffer_store i128:$rsrc, vt:$vdata, num_channels, i32:$vaddr, + i32:$soffset, imm:$inst_offset, imm:$dfmt, + imm:$nfmt, imm:$offen, imm:$idxen, + imm:$glc, imm:$slc, imm:$tfe), + (opcode + $vdata, (as_i16imm $inst_offset), (as_i1imm $offen), (as_i1imm $idxen), + (as_i1imm $glc), 0, (as_i8imm $dfmt), (as_i8imm $nfmt), $vaddr, $rsrc, + (as_i1imm $slc), (as_i1imm $tfe), $soffset) +>; + +def : MTBUF_StoreResource ; +def : MTBUF_StoreResource ; +def : MTBUF_StoreResource ; +def : MTBUF_StoreResource ; + /********** ====================== **********/ /********** Indirect adressing **********/ /********** ====================== **********/ diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td index d6e26ad..7fcc964 100644 --- a/lib/Target/R600/SIIntrinsics.td +++ b/lib/Target/R600/SIIntrinsics.td @@ -20,6 +20,24 @@ let TargetPrefix = "SI", isTarget = 1 in { def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>; def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_anyint_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ; + // Fully-flexible TBUFFER_STORE_FORMAT_* except for the ADDR64 bit, which is not exposed + def int_SI_tbuffer_store : Intrinsic < + [], + [llvm_anyint_ty, // rsrc(SGPR) + llvm_anyint_ty, // vdata(VGPR), overloaded for types i32, v2i32, v4i32 + llvm_i32_ty, // num_channels(imm), selects opcode suffix: 1=X, 2=XY, 3=XYZ, 4=XYZW + llvm_i32_ty, // vaddr(VGPR) + llvm_i32_ty, // soffset(SGPR) + llvm_i32_ty, // inst_offset(imm) + llvm_i32_ty, // dfmt(imm) + llvm_i32_ty, // nfmt(imm) + llvm_i32_ty, // offen(imm) + llvm_i32_ty, // idxen(imm) + llvm_i32_ty, // glc(imm) + llvm_i32_ty, // slc(imm) + llvm_i32_ty], // tfe(imm) + []>; + class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>; def int_SI_sample : Sample; -- cgit v1.1 From 411dea0e7206ccc8018261831225d898d069ff1d Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Thu, 12 Sep 2013 05:24:49 +0000 Subject: PPC: Enable aggressive anti-dependency breaking Aggressive anti-dependency breaking is enabled by default for all PPC cores. This provides a general speedup on the P7 and other platforms (among other factors, the instruction group formation for the non-embedded PPC cores is done during post-RA scheduling). In order to do this safely, the incompatibility between uses of the MFOCRF instruction and anti-dependency breaking are resolved by marking MFOCRF with hasExtraSrcRegAllocReq. As noted in the removed FIXME, the problem was that MFOCRF's output is sensitive to the identify of the source register, and always paired with a shift to undo this effect. Because anti-dependency breaking is unaware of this hidden dependency of the shift amount on the source register of the MFOCRF instruction, changing that register must be inhibited. Two test cases were adjusted: The SjLj test was made more insensitive to register choices and scheduling; the saveCR test disabled anti-dependency breaking because part of what it is testing is proper register reuse. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190587 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCInstr64Bit.td | 1 + lib/Target/PowerPC/PPCInstrInfo.td | 1 + lib/Target/PowerPC/PPCSubtarget.cpp | 12 +----------- 3 files changed, 3 insertions(+), 11 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 4a8f59b..c9e70c8 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -270,6 +270,7 @@ def MTCRF8 : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, g8rc:$rS), "mtcrf $FXM, $rS", BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; +let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking. def MFOCRF8: XFXForm_5a<31, 19, (outs g8rc:$rT), (ins crbitm:$FXM), "mfocrf $rT, $FXM", SprMFCR>, PPC970_DGroup_First, PPC970_Unit_CRU; diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 35e9935..e19be00 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -1939,6 +1939,7 @@ def MTCRF : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, gprc:$rS), "mtcrf $FXM, $rS", BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; +let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking. def MFOCRF: XFXForm_5a<31, 19, (outs gprc:$rT), (ins crbitm:$FXM), "mfocrf $rT, $FXM", SprMFCR>, PPC970_DGroup_First, PPC970_Unit_CRU; diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index ace37c1..fd3bc2f 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -165,14 +165,7 @@ bool PPCSubtarget::enablePostRAScheduler( CodeGenOpt::Level OptLevel, TargetSubtargetInfo::AntiDepBreakMode& Mode, RegClassVector& CriticalPathRCs) const { - // FIXME: It would be best to use TargetSubtargetInfo::ANTIDEP_ALL here, - // but we can't because we can't reassign the cr registers. There is a - // dependence between the cr register and the RLWINM instruction used - // to extract its value which the anti-dependency breaker can't currently - // see. Maybe we should make a late-expanded pseudo to encode this dependency. - // (the relevant code is in PPCDAGToDAGISel::SelectSETCC) - - Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; + Mode = TargetSubtargetInfo::ANTIDEP_ALL; CriticalPathRCs.clear(); @@ -181,9 +174,6 @@ bool PPCSubtarget::enablePostRAScheduler( else CriticalPathRCs.push_back(&PPC::GPRCRegClass); - CriticalPathRCs.push_back(&PPC::F8RCRegClass); - CriticalPathRCs.push_back(&PPC::VRRCRegClass); - return OptLevel >= CodeGenOpt::Default; } -- cgit v1.1 From f9d2d2dc89f0c2d39f597038ee723fb9c9af91da Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Thu, 12 Sep 2013 08:55:00 +0000 Subject: AVX-512: implemented extractelement with variable index. Added parsing of mask register and "zeroing" semantic, like {%k1} {z}. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190595 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmParser/X86AsmParser.cpp | 41 + lib/Target/X86/X86ISelLowering.cpp | 33 +- lib/Target/X86/X86ISelLowering.h | 1 + lib/Target/X86/X86InstrAVX512.td | 6128 +++++++++++++++-------------- lib/Target/X86/X86InstrFragmentsSIMD.td | 2 + lib/Target/X86/X86InstrSSE.td | 12 + 6 files changed, 3154 insertions(+), 3063 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 7eb42c4..3d56acf 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1978,6 +1978,47 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, } } + if (STI.getFeatureBits() & X86::FeatureAVX512) { + // Parse mask register {%k1} + if (getLexer().is(AsmToken::LCurly)) { + SMLoc Loc = Parser.getTok().getLoc(); + Operands.push_back(X86Operand::CreateToken("{", Loc)); + Parser.Lex(); // Eat the { + if (X86Operand *Op = ParseOperand()) { + Operands.push_back(Op); + if (!getLexer().is(AsmToken::RCurly)) { + SMLoc Loc = getLexer().getLoc(); + Parser.eatToEndOfStatement(); + return Error(Loc, "Expected } at this point"); + } + Loc = Parser.getTok().getLoc(); + Operands.push_back(X86Operand::CreateToken("}", Loc)); + Parser.Lex(); // Eat the } + } else { + Parser.eatToEndOfStatement(); + return true; + } + } + // Parse "zeroing non-masked" semantic {z} + if (getLexer().is(AsmToken::LCurly)) { + SMLoc Loc = Parser.getTok().getLoc(); + Operands.push_back(X86Operand::CreateToken("{z}", Loc)); + Parser.Lex(); // Eat the { + if (!getLexer().is(AsmToken::Identifier) || getLexer().getTok().getIdentifier() != "z") { + SMLoc Loc = getLexer().getLoc(); + Parser.eatToEndOfStatement(); + return Error(Loc, "Expected z at this point"); + } + Parser.Lex(); // Eat the z + if (!getLexer().is(AsmToken::RCurly)) { + SMLoc Loc = getLexer().getLoc(); + Parser.eatToEndOfStatement(); + return Error(Loc, "Expected } at this point"); + } + Parser.Lex(); // Eat the } + } + } + if (getLexer().isNot(AsmToken::EndOfStatement)) { SMLoc Loc = getLexer().getLoc(); Parser.eatToEndOfStatement(); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 68c81ab..b3cac05 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7601,18 +7601,40 @@ SDValue X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); - if (!isa(Op.getOperand(1))) - return SDValue(); - SDValue Vec = Op.getOperand(0); MVT VecVT = Vec.getSimpleValueType(); + SDValue Idx = Op.getOperand(1); + if (!isa(Idx)) { + if (VecVT.is512BitVector() || + (VecVT.is256BitVector() && Subtarget->hasInt256() && + VecVT.getVectorElementType().getSizeInBits() == 32)) { + + MVT MaskEltVT = + MVT::getIntegerVT(VecVT.getVectorElementType().getSizeInBits()); + MVT MaskVT = MVT::getVectorVT(MaskEltVT, VecVT.getSizeInBits() / + MaskEltVT.getSizeInBits()); + + if (Idx.getSimpleValueType() != MaskEltVT) + if (Idx.getOpcode() == ISD::ZERO_EXTEND || + Idx.getOpcode() == ISD::SIGN_EXTEND) + Idx = Idx.getOperand(0); + assert(Idx.getSimpleValueType() == MaskEltVT && + "Unexpected index in insertelement"); + SDValue Mask = DAG.getNode(X86ISD::VINSERT, dl, MaskVT, + getZeroVector(MaskVT, Subtarget, DAG, dl), + Idx, DAG.getConstant(0, getPointerTy())); + SDValue Perm = DAG.getNode(X86ISD::VPERMV, dl, VecVT, Mask, Vec); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), + Perm, DAG.getConstant(0, getPointerTy())); + } + return SDValue(); + } // If this is a 256-bit vector result, first extract the 128-bit vector and // then extract the element from the 128-bit vector. if (VecVT.is256BitVector() || VecVT.is512BitVector()) { - SDValue Idx = Op.getOperand(1); - unsigned IdxVal = cast(Idx)->getZExtValue(); + unsigned IdxVal = cast(Idx)->getZExtValue(); // Get the 128-bit vector. Vec = Extract128BitVector(Vec, IdxVal, DAG, dl); MVT EltVT = VecVT.getVectorElementType(); @@ -13663,6 +13685,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VSEXT: return "X86ISD::VSEXT"; case X86ISD::VTRUNC: return "X86ISD::VTRUNC"; case X86ISD::VTRUNCM: return "X86ISD::VTRUNCM"; + case X86ISD::VINSERT: return "X86ISD::VINSERT"; case X86ISD::VFPEXT: return "X86ISD::VFPEXT"; case X86ISD::VFPROUND: return "X86ISD::VFPROUND"; case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ"; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 642df94..08dc115 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -342,6 +342,7 @@ namespace llvm { VBROADCAST, // masked broadcast VBROADCASTM, + VINSERT, // PMULUDQ - Vector multiply packed unsigned doubleword integers PMULUDQ, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index ea3a4e1..ce91687 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1,3058 +1,3070 @@ -// Bitcasts between 512-bit vector types. Return the original type since -// no instruction is needed for the conversion -let Predicates = [HasAVX512] in { - def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>; - def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>; - def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>; - def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>; - def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>; - def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>; - def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>; - def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>; - def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>; - def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>; - def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>; - def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>; - def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>; - - def : Pat<(v2i64 (bitconvert (v4i32 VR128X:$src))), (v2i64 VR128X:$src)>; - def : Pat<(v2i64 (bitconvert (v8i16 VR128X:$src))), (v2i64 VR128X:$src)>; - def : Pat<(v2i64 (bitconvert (v16i8 VR128X:$src))), (v2i64 VR128X:$src)>; - def : Pat<(v2i64 (bitconvert (v2f64 VR128X:$src))), (v2i64 VR128X:$src)>; - def : Pat<(v2i64 (bitconvert (v4f32 VR128X:$src))), (v2i64 VR128X:$src)>; - def : Pat<(v4i32 (bitconvert (v2i64 VR128X:$src))), (v4i32 VR128X:$src)>; - def : Pat<(v4i32 (bitconvert (v8i16 VR128X:$src))), (v4i32 VR128X:$src)>; - def : Pat<(v4i32 (bitconvert (v16i8 VR128X:$src))), (v4i32 VR128X:$src)>; - def : Pat<(v4i32 (bitconvert (v2f64 VR128X:$src))), (v4i32 VR128X:$src)>; - def : Pat<(v4i32 (bitconvert (v4f32 VR128X:$src))), (v4i32 VR128X:$src)>; - def : Pat<(v8i16 (bitconvert (v2i64 VR128X:$src))), (v8i16 VR128X:$src)>; - def : Pat<(v8i16 (bitconvert (v4i32 VR128X:$src))), (v8i16 VR128X:$src)>; - def : Pat<(v8i16 (bitconvert (v16i8 VR128X:$src))), (v8i16 VR128X:$src)>; - def : Pat<(v8i16 (bitconvert (v2f64 VR128X:$src))), (v8i16 VR128X:$src)>; - def : Pat<(v8i16 (bitconvert (v4f32 VR128X:$src))), (v8i16 VR128X:$src)>; - def : Pat<(v16i8 (bitconvert (v2i64 VR128X:$src))), (v16i8 VR128X:$src)>; - def : Pat<(v16i8 (bitconvert (v4i32 VR128X:$src))), (v16i8 VR128X:$src)>; - def : Pat<(v16i8 (bitconvert (v8i16 VR128X:$src))), (v16i8 VR128X:$src)>; - def : Pat<(v16i8 (bitconvert (v2f64 VR128X:$src))), (v16i8 VR128X:$src)>; - def : Pat<(v16i8 (bitconvert (v4f32 VR128X:$src))), (v16i8 VR128X:$src)>; - def : Pat<(v4f32 (bitconvert (v2i64 VR128X:$src))), (v4f32 VR128X:$src)>; - def : Pat<(v4f32 (bitconvert (v4i32 VR128X:$src))), (v4f32 VR128X:$src)>; - def : Pat<(v4f32 (bitconvert (v8i16 VR128X:$src))), (v4f32 VR128X:$src)>; - def : Pat<(v4f32 (bitconvert (v16i8 VR128X:$src))), (v4f32 VR128X:$src)>; - def : Pat<(v4f32 (bitconvert (v2f64 VR128X:$src))), (v4f32 VR128X:$src)>; - def : Pat<(v2f64 (bitconvert (v2i64 VR128X:$src))), (v2f64 VR128X:$src)>; - def : Pat<(v2f64 (bitconvert (v4i32 VR128X:$src))), (v2f64 VR128X:$src)>; - def : Pat<(v2f64 (bitconvert (v8i16 VR128X:$src))), (v2f64 VR128X:$src)>; - def : Pat<(v2f64 (bitconvert (v16i8 VR128X:$src))), (v2f64 VR128X:$src)>; - def : Pat<(v2f64 (bitconvert (v4f32 VR128X:$src))), (v2f64 VR128X:$src)>; - -// Bitcasts between 256-bit vector types. Return the original type since -// no instruction is needed for the conversion - def : Pat<(v4f64 (bitconvert (v8f32 VR256X:$src))), (v4f64 VR256X:$src)>; - def : Pat<(v4f64 (bitconvert (v8i32 VR256X:$src))), (v4f64 VR256X:$src)>; - def : Pat<(v4f64 (bitconvert (v4i64 VR256X:$src))), (v4f64 VR256X:$src)>; - def : Pat<(v4f64 (bitconvert (v16i16 VR256X:$src))), (v4f64 VR256X:$src)>; - def : Pat<(v4f64 (bitconvert (v32i8 VR256X:$src))), (v4f64 VR256X:$src)>; - def : Pat<(v8f32 (bitconvert (v8i32 VR256X:$src))), (v8f32 VR256X:$src)>; - def : Pat<(v8f32 (bitconvert (v4i64 VR256X:$src))), (v8f32 VR256X:$src)>; - def : Pat<(v8f32 (bitconvert (v4f64 VR256X:$src))), (v8f32 VR256X:$src)>; - def : Pat<(v8f32 (bitconvert (v32i8 VR256X:$src))), (v8f32 VR256X:$src)>; - def : Pat<(v8f32 (bitconvert (v16i16 VR256X:$src))), (v8f32 VR256X:$src)>; - def : Pat<(v4i64 (bitconvert (v8f32 VR256X:$src))), (v4i64 VR256X:$src)>; - def : Pat<(v4i64 (bitconvert (v8i32 VR256X:$src))), (v4i64 VR256X:$src)>; - def : Pat<(v4i64 (bitconvert (v4f64 VR256X:$src))), (v4i64 VR256X:$src)>; - def : Pat<(v4i64 (bitconvert (v32i8 VR256X:$src))), (v4i64 VR256X:$src)>; - def : Pat<(v4i64 (bitconvert (v16i16 VR256X:$src))), (v4i64 VR256X:$src)>; - def : Pat<(v32i8 (bitconvert (v4f64 VR256X:$src))), (v32i8 VR256X:$src)>; - def : Pat<(v32i8 (bitconvert (v4i64 VR256X:$src))), (v32i8 VR256X:$src)>; - def : Pat<(v32i8 (bitconvert (v8f32 VR256X:$src))), (v32i8 VR256X:$src)>; - def : Pat<(v32i8 (bitconvert (v8i32 VR256X:$src))), (v32i8 VR256X:$src)>; - def : Pat<(v32i8 (bitconvert (v16i16 VR256X:$src))), (v32i8 VR256X:$src)>; - def : Pat<(v8i32 (bitconvert (v32i8 VR256X:$src))), (v8i32 VR256X:$src)>; - def : Pat<(v8i32 (bitconvert (v16i16 VR256X:$src))), (v8i32 VR256X:$src)>; - def : Pat<(v8i32 (bitconvert (v8f32 VR256X:$src))), (v8i32 VR256X:$src)>; - def : Pat<(v8i32 (bitconvert (v4i64 VR256X:$src))), (v8i32 VR256X:$src)>; - def : Pat<(v8i32 (bitconvert (v4f64 VR256X:$src))), (v8i32 VR256X:$src)>; - def : Pat<(v16i16 (bitconvert (v8f32 VR256X:$src))), (v16i16 VR256X:$src)>; - def : Pat<(v16i16 (bitconvert (v8i32 VR256X:$src))), (v16i16 VR256X:$src)>; - def : Pat<(v16i16 (bitconvert (v4i64 VR256X:$src))), (v16i16 VR256X:$src)>; - def : Pat<(v16i16 (bitconvert (v4f64 VR256X:$src))), (v16i16 VR256X:$src)>; - def : Pat<(v16i16 (bitconvert (v32i8 VR256X:$src))), (v16i16 VR256X:$src)>; -} - -// -// AVX-512: VPXOR instruction writes zero to its upper part, it's safe build zeros. -// - -let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1, Predicates = [HasAVX512] in { -def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "", - [(set VR512:$dst, (v16f32 immAllZerosV))]>; -} - -def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>; -def : Pat<(v16i32 immAllZerosV), (AVX512_512_SET0)>; -def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>; -def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>; - -//===----------------------------------------------------------------------===// -// AVX-512 - VECTOR INSERT -// -// -- 32x8 form -- -let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in { -def VINSERTF32x4rr : AVX512AIi8<0x18, MRMSrcReg, (outs VR512:$dst), - (ins VR512:$src1, VR128X:$src2, i8imm:$src3), - "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, EVEX_4V, EVEX_V512; -let mayLoad = 1 in -def VINSERTF32x4rm : AVX512AIi8<0x18, MRMSrcMem, (outs VR512:$dst), - (ins VR512:$src1, f128mem:$src2, i8imm:$src3), - "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>; -} - -// -- 64x4 fp form -- -let neverHasSideEffects = 1, ExeDomain = SSEPackedDouble in { -def VINSERTF64x4rr : AVX512AIi8<0x1a, MRMSrcReg, (outs VR512:$dst), - (ins VR512:$src1, VR256X:$src2, i8imm:$src3), - "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, EVEX_4V, EVEX_V512, VEX_W; -let mayLoad = 1 in -def VINSERTF64x4rm : AVX512AIi8<0x1a, MRMSrcMem, (outs VR512:$dst), - (ins VR512:$src1, i256mem:$src2, i8imm:$src3), - "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>; -} -// -- 32x4 integer form -- -let neverHasSideEffects = 1 in { -def VINSERTI32x4rr : AVX512AIi8<0x38, MRMSrcReg, (outs VR512:$dst), - (ins VR512:$src1, VR128X:$src2, i8imm:$src3), - "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, EVEX_4V, EVEX_V512; -let mayLoad = 1 in -def VINSERTI32x4rm : AVX512AIi8<0x38, MRMSrcMem, (outs VR512:$dst), - (ins VR512:$src1, i128mem:$src2, i8imm:$src3), - "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>; - -} - -let neverHasSideEffects = 1 in { -// -- 64x4 form -- -def VINSERTI64x4rr : AVX512AIi8<0x3a, MRMSrcReg, (outs VR512:$dst), - (ins VR512:$src1, VR256X:$src2, i8imm:$src3), - "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, EVEX_4V, EVEX_V512, VEX_W; -let mayLoad = 1 in -def VINSERTI64x4rm : AVX512AIi8<0x3a, MRMSrcMem, (outs VR512:$dst), - (ins VR512:$src1, i256mem:$src2, i8imm:$src3), - "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>; -} - -def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (v4f32 VR128X:$src2), - (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2, - (INSERT_get_vinsert128_imm VR512:$ins))>; -def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (v2f64 VR128X:$src2), - (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2, - (INSERT_get_vinsert128_imm VR512:$ins))>; -def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v2i64 VR128X:$src2), - (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2, - (INSERT_get_vinsert128_imm VR512:$ins))>; -def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v4i32 VR128X:$src2), - (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2, - (INSERT_get_vinsert128_imm VR512:$ins))>; - -def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (loadv4f32 addr:$src2), - (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2, - (INSERT_get_vinsert128_imm VR512:$ins))>; -def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), - (bc_v4i32 (loadv2i64 addr:$src2)), - (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2, - (INSERT_get_vinsert128_imm VR512:$ins))>; -def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (loadv2f64 addr:$src2), - (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2, - (INSERT_get_vinsert128_imm VR512:$ins))>; -def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (loadv2i64 addr:$src2), - (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2, - (INSERT_get_vinsert128_imm VR512:$ins))>; - -def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (v8f32 VR256X:$src2), - (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2, - (INSERT_get_vinsert256_imm VR512:$ins))>; -def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (v4f64 VR256X:$src2), - (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2, - (INSERT_get_vinsert256_imm VR512:$ins))>; -def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v4i64 VR256X:$src2), - (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2, - (INSERT_get_vinsert256_imm VR512:$ins))>; -def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v8i32 VR256X:$src2), - (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2, - (INSERT_get_vinsert256_imm VR512:$ins))>; - -def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (loadv8f32 addr:$src2), - (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2, - (INSERT_get_vinsert256_imm VR512:$ins))>; -def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (loadv4f64 addr:$src2), - (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2, - (INSERT_get_vinsert256_imm VR512:$ins))>; -def : Pat<(vinsert256_insert:$ins (v8i64 VR512:$src1), (loadv4i64 addr:$src2), - (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2, - (INSERT_get_vinsert256_imm VR512:$ins))>; -def : Pat<(vinsert256_insert:$ins (v16i32 VR512:$src1), - (bc_v8i32 (loadv4i64 addr:$src2)), - (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2, - (INSERT_get_vinsert256_imm VR512:$ins))>; - -// vinsertps - insert f32 to XMM -def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), - (ins VR128X:$src1, VR128X:$src2, u32u8imm:$src3), - "vinsertps{z}\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - [(set VR128X:$dst, (X86insrtps VR128X:$src1, VR128X:$src2, imm:$src3))]>, - EVEX_4V; -def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), - (ins VR128X:$src1, f32mem:$src2, u32u8imm:$src3), - "vinsertps{z}\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - [(set VR128X:$dst, (X86insrtps VR128X:$src1, - (v4f32 (scalar_to_vector (loadf32 addr:$src2))), - imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>; - -//===----------------------------------------------------------------------===// -// AVX-512 VECTOR EXTRACT -//--- -let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in { -// -- 32x4 form -- -def VEXTRACTF32x4rr : AVX512AIi8<0x19, MRMDestReg, (outs VR128X:$dst), - (ins VR512:$src1, i8imm:$src2), - "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX, EVEX_V512; -def VEXTRACTF32x4mr : AVX512AIi8<0x19, MRMDestMem, (outs), - (ins f128mem:$dst, VR512:$src1, i8imm:$src2), - "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>; - -// -- 64x4 form -- -def VEXTRACTF64x4rr : AVX512AIi8<0x1b, MRMDestReg, (outs VR256X:$dst), - (ins VR512:$src1, i8imm:$src2), - "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX, EVEX_V512, VEX_W; -let mayStore = 1 in -def VEXTRACTF64x4mr : AVX512AIi8<0x1b, MRMDestMem, (outs), - (ins f256mem:$dst, VR512:$src1, i8imm:$src2), - "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>; -} - -let neverHasSideEffects = 1 in { -// -- 32x4 form -- -def VEXTRACTI32x4rr : AVX512AIi8<0x39, MRMDestReg, (outs VR128X:$dst), - (ins VR512:$src1, i8imm:$src2), - "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX, EVEX_V512; -def VEXTRACTI32x4mr : AVX512AIi8<0x39, MRMDestMem, (outs), - (ins i128mem:$dst, VR512:$src1, i8imm:$src2), - "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>; - -// -- 64x4 form -- -def VEXTRACTI64x4rr : AVX512AIi8<0x3b, MRMDestReg, (outs VR256X:$dst), - (ins VR512:$src1, i8imm:$src2), - "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX, EVEX_V512, VEX_W; -let mayStore = 1 in -def VEXTRACTI64x4mr : AVX512AIi8<0x3b, MRMDestMem, (outs), - (ins i256mem:$dst, VR512:$src1, i8imm:$src2), - "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>; -} - -def : Pat<(vextract128_extract:$ext (v16f32 VR512:$src1), (iPTR imm)), - (v4f32 (VEXTRACTF32x4rr VR512:$src1, - (EXTRACT_get_vextract128_imm VR128X:$ext)))>; - -def : Pat<(vextract128_extract:$ext VR512:$src1, (iPTR imm)), - (v4i32 (VEXTRACTF32x4rr VR512:$src1, - (EXTRACT_get_vextract128_imm VR128X:$ext)))>; - -def : Pat<(vextract128_extract:$ext (v8f64 VR512:$src1), (iPTR imm)), - (v2f64 (VEXTRACTF32x4rr VR512:$src1, - (EXTRACT_get_vextract128_imm VR128X:$ext)))>; - -def : Pat<(vextract128_extract:$ext (v8i64 VR512:$src1), (iPTR imm)), - (v2i64 (VEXTRACTI32x4rr VR512:$src1, - (EXTRACT_get_vextract128_imm VR128X:$ext)))>; - - -def : Pat<(vextract256_extract:$ext (v16f32 VR512:$src1), (iPTR imm)), - (v8f32 (VEXTRACTF64x4rr VR512:$src1, - (EXTRACT_get_vextract256_imm VR256X:$ext)))>; - -def : Pat<(vextract256_extract:$ext (v16i32 VR512:$src1), (iPTR imm)), - (v8i32 (VEXTRACTI64x4rr VR512:$src1, - (EXTRACT_get_vextract256_imm VR256X:$ext)))>; - -def : Pat<(vextract256_extract:$ext (v8f64 VR512:$src1), (iPTR imm)), - (v4f64 (VEXTRACTF64x4rr VR512:$src1, - (EXTRACT_get_vextract256_imm VR256X:$ext)))>; - -def : Pat<(vextract256_extract:$ext (v8i64 VR512:$src1), (iPTR imm)), - (v4i64 (VEXTRACTI64x4rr VR512:$src1, - (EXTRACT_get_vextract256_imm VR256X:$ext)))>; - -// A 256-bit subvector extract from the first 512-bit vector position -// is a subregister copy that needs no instruction. -def : Pat<(v8i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))), - (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm))>; -def : Pat<(v8f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))), - (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm))>; -def : Pat<(v4i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))), - (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm))>; -def : Pat<(v4f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))), - (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm))>; - -// zmm -> xmm -def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))), - (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>; -def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))), - (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>; -def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))), - (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>; -def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))), - (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>; - - -// A 128-bit subvector insert to the first 512-bit vector position -// is a subregister copy that needs no instruction. -def : Pat<(insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0)), - (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), - (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), - sub_ymm)>; -def : Pat<(insert_subvector undef, (v2f64 VR128X:$src), (iPTR 0)), - (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), - (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), - sub_ymm)>; -def : Pat<(insert_subvector undef, (v4i32 VR128X:$src), (iPTR 0)), - (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), - (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), - sub_ymm)>; -def : Pat<(insert_subvector undef, (v4f32 VR128X:$src), (iPTR 0)), - (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), - (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), - sub_ymm)>; - -def : Pat<(insert_subvector undef, (v4i64 VR256X:$src), (iPTR 0)), - (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>; -def : Pat<(insert_subvector undef, (v4f64 VR256X:$src), (iPTR 0)), - (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>; -def : Pat<(insert_subvector undef, (v8i32 VR256X:$src), (iPTR 0)), - (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>; -def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)), - (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>; - -// vextractps - extract 32 bits from XMM -def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst), - (ins VR128X:$src1, u32u8imm:$src2), - "vextractps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, - EVEX; - -def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs), - (ins f32mem:$dst, VR128X:$src1, u32u8imm:$src2), - "vextractps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2), - addr:$dst)]>, EVEX; - -//===---------------------------------------------------------------------===// -// AVX-512 BROADCAST -//--- -multiclass avx512_fp_broadcast opc, string OpcodeStr, - RegisterClass DestRC, - RegisterClass SrcRC, X86MemOperand x86memop> { - def rr : AVX5128I, EVEX; - def rm : AVX5128I, EVEX; -} -let ExeDomain = SSEPackedSingle in { - defm VBROADCASTSSZ : avx512_fp_broadcast<0x18, "vbroadcastss{z}", VR512, - VR128X, f32mem>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; -} - -let ExeDomain = SSEPackedDouble in { - defm VBROADCASTSDZ : avx512_fp_broadcast<0x19, "vbroadcastsd{z}", VR512, - VR128X, f64mem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; -} - -def : Pat<(v16f32 (X86VBroadcast (loadf32 addr:$src))), - (VBROADCASTSSZrm addr:$src)>; -def : Pat<(v8f64 (X86VBroadcast (loadf64 addr:$src))), - (VBROADCASTSDZrm addr:$src)>; - -multiclass avx512_int_broadcast_reg opc, string OpcodeStr, - RegisterClass SrcRC, RegisterClass KRC> { - def Zrr : AVX5128I, EVEX, EVEX_V512; - def Zkrr : AVX5128I, EVEX, EVEX_V512, EVEX_KZ; -} - -defm VPBROADCASTDr : avx512_int_broadcast_reg<0x7C, "vpbroadcastd", GR32, VK16WM>; -defm VPBROADCASTQr : avx512_int_broadcast_reg<0x7C, "vpbroadcastq", GR64, VK8WM>, - VEX_W; - -def : Pat <(v16i32 (X86vzext VK16WM:$mask)), - (VPBROADCASTDrZkrr VK16WM:$mask, (i32 (MOV32ri 0x1)))>; - -def : Pat <(v8i64 (X86vzext VK8WM:$mask)), - (VPBROADCASTQrZkrr VK8WM:$mask, (i64 (MOV64ri 0x1)))>; - -def : Pat<(v16i32 (X86VBroadcast (i32 GR32:$src))), - (VPBROADCASTDrZrr GR32:$src)>; -def : Pat<(v8i64 (X86VBroadcast (i64 GR64:$src))), - (VPBROADCASTQrZrr GR64:$src)>; - -multiclass avx512_int_broadcast_rm opc, string OpcodeStr, - X86MemOperand x86memop, PatFrag ld_frag, - RegisterClass DstRC, ValueType OpVT, ValueType SrcVT, - RegisterClass KRC> { - def rr : AVX5128I, EVEX; - def krr : AVX5128I, - EVEX, EVEX_KZ; - def rm : AVX5128I, EVEX; - def krm : AVX5128I, EVEX, EVEX_KZ; -} - -defm VPBROADCASTDZ : avx512_int_broadcast_rm<0x58, "vpbroadcastd", i32mem, - loadi32, VR512, v16i32, v4i32, VK16WM>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; -defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem, - loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VT1>; - -def : Pat<(v16f32 (X86VBroadcast (v4f32 VR128X:$src))), - (VBROADCASTSSZrr VR128X:$src)>; -def : Pat<(v8f64 (X86VBroadcast (v2f64 VR128X:$src))), - (VBROADCASTSDZrr VR128X:$src)>; - -// Provide fallback in case the load node that is used in the patterns above -// is used by additional users, which prevents the pattern selection. -def : Pat<(v16f32 (X86VBroadcast FR32X:$src)), - (VBROADCASTSSZrr (COPY_TO_REGCLASS FR32X:$src, VR128X))>; -def : Pat<(v8f64 (X86VBroadcast FR64X:$src)), - (VBROADCASTSDZrr (COPY_TO_REGCLASS FR64X:$src, VR128X))>; - - -let Predicates = [HasAVX512] in { -def : Pat<(v8i32 (X86VBroadcastm (v8i1 VK8WM:$mask), (loadi32 addr:$src))), - (EXTRACT_SUBREG - (v16i32 (VPBROADCASTDZkrm (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), - addr:$src)), sub_ymm)>; -} -//===----------------------------------------------------------------------===// -// AVX-512 BROADCAST MASK TO VECTOR REGISTER -//--- - -multiclass avx512_mask_broadcast opc, string OpcodeStr, - RegisterClass DstRC, RegisterClass KRC, - ValueType OpVT, ValueType SrcVT> { -def rr : AVX512XS8I, EVEX; -} - -defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", VR512, - VK16, v16i32, v16i1>, EVEX_V512; -defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", VR512, - VK8, v8i64, v8i1>, EVEX_V512, VEX_W; - -//===----------------------------------------------------------------------===// -// AVX-512 - VPERM -// -// -- immediate form -- -multiclass avx512_perm_imm opc, string OpcodeStr, RegisterClass RC, - SDNode OpNode, PatFrag mem_frag, - X86MemOperand x86memop, ValueType OpVT> { - def ri : AVX512AIi8, - EVEX; - def mi : AVX512AIi8, EVEX; -} - -defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", VR512, X86VPermi, memopv8i64, - i512mem, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -let ExeDomain = SSEPackedDouble in -defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", VR512, X86VPermi, memopv8f64, - f512mem, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -// -- VPERM - register form -- -multiclass avx512_perm opc, string OpcodeStr, RegisterClass RC, - PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> { - - def rr : AVX5128I, EVEX_4V; - - def rm : AVX5128I, - EVEX_4V; -} - -defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, memopv16i32, i512mem, - v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem, - v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -let ExeDomain = SSEPackedSingle in -defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, memopv16f32, f512mem, - v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -let ExeDomain = SSEPackedDouble in -defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem, - v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -// -- VPERM2I - 3 source operands form -- -multiclass avx512_perm_3src opc, string OpcodeStr, RegisterClass RC, - PatFrag mem_frag, X86MemOperand x86memop, - ValueType OpVT> { -let Constraints = "$src1 = $dst" in { - def rr : AVX5128I, - EVEX_4V; - - def rm : AVX5128I, EVEX_4V; - } -} -defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32, i512mem, - v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64, i512mem, - v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32, i512mem, - v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64, i512mem, - v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -//===----------------------------------------------------------------------===// -// AVX-512 - BLEND using mask -// -multiclass avx512_blendmask opc, string OpcodeStr, - RegisterClass KRC, RegisterClass RC, - X86MemOperand x86memop, PatFrag mem_frag, - SDNode OpNode, ValueType vt> { - def rr : AVX5128I, EVEX_4V, EVEX_K; - - def rm : AVX5128I, - EVEX_4V, EVEX_K; -} - -let ExeDomain = SSEPackedSingle in -defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps", VK16WM, VR512, f512mem, - memopv16f32, vselect, v16f32>, - EVEX_CD8<32, CD8VF>, EVEX_V512; -let ExeDomain = SSEPackedDouble in -defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd", VK8WM, VR512, f512mem, - memopv8f64, vselect, v8f64>, - VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512; - -defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd", VK16WM, VR512, f512mem, - memopv8i64, vselect, v16i32>, - EVEX_CD8<32, CD8VF>, EVEX_V512; - -defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq", VK8WM, VR512, f512mem, - memopv8i64, vselect, v8i64>, VEX_W, - EVEX_CD8<64, CD8VF>, EVEX_V512; - - -let Predicates = [HasAVX512] in { -def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1), - (v8f32 VR256X:$src2))), - (EXTRACT_SUBREG - (v16f32 (VBLENDMPSZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), - (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), - (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; - -def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1), - (v8i32 VR256X:$src2))), - (EXTRACT_SUBREG - (v16i32 (VPBLENDMDZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), - (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), - (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; -} - -multiclass avx512_icmp_packed opc, string OpcodeStr, RegisterClass KRC, - RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag, - SDNode OpNode, ValueType vt> { - def rr : AVX512BI, EVEX_4V; - def rm : AVX512BI, EVEX_4V; -} - -defm VPCMPEQDZ : avx512_icmp_packed<0x76, "vpcmpeqd", VK16, VR512, i512mem, - memopv16i32, X86pcmpeqm, v16i32>, EVEX_V512; -defm VPCMPEQQZ : avx512_icmp_packed<0x29, "vpcmpeqq", VK8, VR512, i512mem, - memopv8i64, X86pcmpeqm, v8i64>, T8, EVEX_V512, VEX_W; - -defm VPCMPGTDZ : avx512_icmp_packed<0x66, "vpcmpgtd", VK16, VR512, i512mem, - memopv16i32, X86pcmpgtm, v16i32>, EVEX_V512; -defm VPCMPGTQZ : avx512_icmp_packed<0x37, "vpcmpgtq", VK8, VR512, i512mem, - memopv8i64, X86pcmpgtm, v8i64>, T8, EVEX_V512, VEX_W; - -def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), - (COPY_TO_REGCLASS (VPCMPGTDZrr - (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), - (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>; - -def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), - (COPY_TO_REGCLASS (VPCMPEQDZrr - (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), - (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>; - -multiclass avx512_icmp_cc opc, RegisterClass KRC, - RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag, - SDNode OpNode, ValueType vt, Operand CC, string asm, - string asm_alt> { - def rri : AVX512AIi8, EVEX_4V; - def rmi : AVX512AIi8, EVEX_4V; - // Accept explicit immediate argument form instead of comparison code. - let neverHasSideEffects = 1 in { - def rri_alt : AVX512AIi8, EVEX_4V; - def rmi_alt : AVX512AIi8, EVEX_4V; - } -} - -defm VPCMPDZ : avx512_icmp_cc<0x1F, VK16, VR512, i512mem, memopv16i32, - X86cmpm, v16i32, AVXCC, - "vpcmp${cc}d\t{$src2, $src1, $dst|$dst, $src1, $src2}", - "vpcmpd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPCMPUDZ : avx512_icmp_cc<0x1E, VK16, VR512, i512mem, memopv16i32, - X86cmpmu, v16i32, AVXCC, - "vpcmp${cc}ud\t{$src2, $src1, $dst|$dst, $src1, $src2}", - "vpcmpud\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, - EVEX_V512, EVEX_CD8<32, CD8VF>; - -defm VPCMPQZ : avx512_icmp_cc<0x1F, VK8, VR512, i512mem, memopv8i64, - X86cmpm, v8i64, AVXCC, - "vpcmp${cc}q\t{$src2, $src1, $dst|$dst, $src1, $src2}", - "vpcmpq\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, - VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; -defm VPCMPUQZ : avx512_icmp_cc<0x1E, VK8, VR512, i512mem, memopv8i64, - X86cmpmu, v8i64, AVXCC, - "vpcmp${cc}uq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - "vpcmpuq\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, - VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; - -// avx512_cmp_packed - sse 1 & 2 compare packed instructions -multiclass avx512_cmp_packed { - def rri : AVX512PIi8<0xC2, MRMSrcReg, - (outs KRC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, - [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>; - def rmi : AVX512PIi8<0xC2, MRMSrcMem, - (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, - [(set KRC:$dst, - (OpNode (vt RC:$src1), (memop addr:$src2), imm:$cc))], d>; - - // Accept explicit immediate argument form instead of comparison code. - let neverHasSideEffects = 1 in { - def rri_alt : PIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_CMPP_RR, d>; - def rmi_alt : PIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_CMPP_RM, d>; - } -} - -defm VCMPPSZ : avx512_cmp_packed, TB, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VCMPPDZ : avx512_cmp_packed, TB, OpSize, EVEX_4V, VEX_W, EVEX_V512, - EVEX_CD8<64, CD8VF>; - -def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)), - (COPY_TO_REGCLASS (VCMPPSZrri - (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), - (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), - imm:$cc), VK8)>; -def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)), - (COPY_TO_REGCLASS (VPCMPDZrri - (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), - (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), - imm:$cc), VK8)>; -def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)), - (COPY_TO_REGCLASS (VPCMPUDZrri - (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), - (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), - imm:$cc), VK8)>; - -// Mask register copy, including -// - copy between mask registers -// - load/store mask registers -// - copy from GPR to mask register and vice versa -// -multiclass avx512_mask_mov opc_kk, bits<8> opc_km, bits<8> opc_mk, - string OpcodeStr, RegisterClass KRC, - ValueType vt, X86MemOperand x86memop> { - let neverHasSideEffects = 1 in { - def kk : I; - let mayLoad = 1 in - def km : I; - let mayStore = 1 in - def mk : I; - } -} - -multiclass avx512_mask_mov_gpr opc_kr, bits<8> opc_rk, - string OpcodeStr, - RegisterClass KRC, RegisterClass GRC> { - let neverHasSideEffects = 1 in { - def kr : I; - def rk : I; - } -} - -let Predicates = [HasAVX512] in { - defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>, - VEX, TB; - defm KMOVW : avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>, - VEX, TB; -} - -let Predicates = [HasAVX512] in { - // GR16 from/to 16-bit mask - def : Pat<(v16i1 (bitconvert (i16 GR16:$src))), - (KMOVWkr (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit))>; - def : Pat<(i16 (bitconvert (v16i1 VK16:$src))), - (EXTRACT_SUBREG (KMOVWrk VK16:$src), sub_16bit)>; - - // Store kreg in memory - def : Pat<(store (v16i1 VK16:$src), addr:$dst), - (KMOVWmk addr:$dst, VK16:$src)>; - - def : Pat<(store (v8i1 VK8:$src), addr:$dst), - (KMOVWmk addr:$dst, (v16i1 (COPY_TO_REGCLASS VK8:$src, VK16)))>; -} -// With AVX-512 only, 8-bit mask is promoted to 16-bit mask. -let Predicates = [HasAVX512] in { - // GR from/to 8-bit mask without native support - def : Pat<(v8i1 (bitconvert (i8 GR8:$src))), - (COPY_TO_REGCLASS - (KMOVWkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)), - VK8)>; - def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), - (EXTRACT_SUBREG - (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)), - sub_8bit)>; -} - -// Mask unary operation -// - KNOT -multiclass avx512_mask_unop opc, string OpcodeStr, - RegisterClass KRC, SDPatternOperator OpNode> { - let Predicates = [HasAVX512] in - def rr : I; -} - -multiclass avx512_mask_unop_w opc, string OpcodeStr, - SDPatternOperator OpNode> { - defm W : avx512_mask_unop, - VEX, TB; -} - -defm KNOT : avx512_mask_unop_w<0x44, "knot", not>; - -def : Pat<(xor VK16:$src1, (v16i1 immAllOnesV)), (KNOTWrr VK16:$src1)>; -def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)), - (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>; - -// With AVX-512, 8-bit mask is promoted to 16-bit mask. -def : Pat<(not VK8:$src), - (COPY_TO_REGCLASS - (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>; - -// Mask binary operation -// - KADD, KAND, KANDN, KOR, KXNOR, KXOR -multiclass avx512_mask_binop opc, string OpcodeStr, - RegisterClass KRC, SDPatternOperator OpNode> { - let Predicates = [HasAVX512] in - def rr : I; -} - -multiclass avx512_mask_binop_w opc, string OpcodeStr, - SDPatternOperator OpNode> { - defm W : avx512_mask_binop, - VEX_4V, VEX_L, TB; -} - -def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>; -def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>; - -let isCommutable = 1 in { - defm KADD : avx512_mask_binop_w<0x4a, "kadd", add>; - defm KAND : avx512_mask_binop_w<0x41, "kand", and>; - let isCommutable = 0 in - defm KANDN : avx512_mask_binop_w<0x42, "kandn", andn>; - defm KOR : avx512_mask_binop_w<0x45, "kor", or>; - defm KXNOR : avx512_mask_binop_w<0x46, "kxnor", xnor>; - defm KXOR : avx512_mask_binop_w<0x47, "kxor", xor>; -} - -multiclass avx512_mask_binop_int { - let Predicates = [HasAVX512] in - def : Pat<(!cast("int_x86_"##IntName##"_v16i1") - VK16:$src1, VK16:$src2), - (!cast(InstName##"Wrr") VK16:$src1, VK16:$src2)>; -} - -defm : avx512_mask_binop_int<"kadd", "KADD">; -defm : avx512_mask_binop_int<"kand", "KAND">; -defm : avx512_mask_binop_int<"kandn", "KANDN">; -defm : avx512_mask_binop_int<"kor", "KOR">; -defm : avx512_mask_binop_int<"kxnor", "KXNOR">; -defm : avx512_mask_binop_int<"kxor", "KXOR">; -// With AVX-512, 8-bit mask is promoted to 16-bit mask. -multiclass avx512_binop_pat { - let Predicates = [HasAVX512] in - def : Pat<(OpNode VK8:$src1, VK8:$src2), - (COPY_TO_REGCLASS - (Inst (COPY_TO_REGCLASS VK8:$src1, VK16), - (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>; -} - -defm : avx512_binop_pat; -defm : avx512_binop_pat; -defm : avx512_binop_pat; -defm : avx512_binop_pat; -defm : avx512_binop_pat; - -// Mask unpacking -multiclass avx512_mask_unpck opc, string OpcodeStr, - RegisterClass KRC1, RegisterClass KRC2> { - let Predicates = [HasAVX512] in - def rr : I; -} - -multiclass avx512_mask_unpck_bw opc, string OpcodeStr> { - defm BW : avx512_mask_unpck, - VEX_4V, VEX_L, OpSize, TB; -} - -defm KUNPCK : avx512_mask_unpck_bw<0x4b, "kunpck">; - -multiclass avx512_mask_unpck_int { - let Predicates = [HasAVX512] in - def : Pat<(!cast("int_x86_"##IntName##"_v16i1") - VK8:$src1, VK8:$src2), - (!cast(InstName##"BWrr") VK8:$src1, VK8:$src2)>; -} - -defm : avx512_mask_unpck_int<"kunpck", "KUNPCK">; -// Mask bit testing -multiclass avx512_mask_testop opc, string OpcodeStr, RegisterClass KRC, - SDNode OpNode> { - let Predicates = [HasAVX512], Defs = [EFLAGS] in - def rr : I; -} - -multiclass avx512_mask_testop_w opc, string OpcodeStr, SDNode OpNode> { - defm W : avx512_mask_testop, - VEX, TB; -} - -defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>; -defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest>; - -// Mask shift -multiclass avx512_mask_shiftop opc, string OpcodeStr, RegisterClass KRC, - SDNode OpNode> { - let Predicates = [HasAVX512] in - def ri : Ii8; -} - -multiclass avx512_mask_shiftop_w opc1, bits<8> opc2, string OpcodeStr, - SDNode OpNode> { - defm W : avx512_mask_shiftop, - VEX, OpSize, TA, VEX_W; -} - -defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", shl>; -defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", srl>; - -// Mask setting all 0s or 1s -multiclass avx512_mask_setop { - let Predicates = [HasAVX512] in - let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1 in - def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "", - [(set KRC:$dst, (VT Val))]>; -} - -multiclass avx512_mask_setop_w { - defm B : avx512_mask_setop; - defm W : avx512_mask_setop; -} - -defm KSET0 : avx512_mask_setop_w; -defm KSET1 : avx512_mask_setop_w; - -// With AVX-512 only, 8-bit mask is promoted to 16-bit mask. -let Predicates = [HasAVX512] in { - def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>; - def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>; -} -def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 0))), - (v8i1 (COPY_TO_REGCLASS VK16:$src, VK8))>; - -def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))), - (v16i1 (COPY_TO_REGCLASS VK8:$src, VK16))>; - -def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))), - (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>; - -//===----------------------------------------------------------------------===// -// AVX-512 - Aligned and unaligned load and store -// - -multiclass avx512_mov_packed opc, RegisterClass RC, RegisterClass KRC, - X86MemOperand x86memop, PatFrag ld_frag, - string asm, Domain d> { -let neverHasSideEffects = 1 in - def rr : AVX512PI, - EVEX; -let canFoldAsLoad = 1 in - def rm : AVX512PI, EVEX; -let Constraints = "$src1 = $dst" in { - def rrk : AVX512PI, - EVEX, EVEX_K; - def rmk : AVX512PI, EVEX, EVEX_K; -} -} - -defm VMOVAPSZ : avx512_mov_packed<0x28, VR512, VK16WM, f512mem, alignedloadv16f32, - "vmovaps", SSEPackedSingle>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VMOVAPDZ : avx512_mov_packed<0x28, VR512, VK8WM, f512mem, alignedloadv8f64, - "vmovapd", SSEPackedDouble>, - OpSize, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; -defm VMOVUPSZ : avx512_mov_packed<0x10, VR512, VK16WM, f512mem, loadv16f32, - "vmovups", SSEPackedSingle>, - TB, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VMOVUPDZ : avx512_mov_packed<0x10, VR512, VK8WM, f512mem, loadv8f64, - "vmovupd", SSEPackedDouble>, - OpSize, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; -def VMOVAPSZmr : AVX512PI<0x29, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src), - "vmovaps\t{$src, $dst|$dst, $src}", - [(alignedstore512 (v16f32 VR512:$src), addr:$dst)], - SSEPackedSingle>, EVEX, EVEX_V512, TB, - EVEX_CD8<32, CD8VF>; -def VMOVAPDZmr : AVX512PI<0x29, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src), - "vmovapd\t{$src, $dst|$dst, $src}", - [(alignedstore512 (v8f64 VR512:$src), addr:$dst)], - SSEPackedDouble>, EVEX, EVEX_V512, - OpSize, TB, VEX_W, EVEX_CD8<64, CD8VF>; -def VMOVUPSZmr : AVX512PI<0x11, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src), - "vmovups\t{$src, $dst|$dst, $src}", - [(store (v16f32 VR512:$src), addr:$dst)], - SSEPackedSingle>, EVEX, EVEX_V512, TB, - EVEX_CD8<32, CD8VF>; -def VMOVUPDZmr : AVX512PI<0x11, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src), - "vmovupd\t{$src, $dst|$dst, $src}", - [(store (v8f64 VR512:$src), addr:$dst)], - SSEPackedDouble>, EVEX, EVEX_V512, - OpSize, TB, VEX_W, EVEX_CD8<64, CD8VF>; - -// Use vmovaps/vmovups for AVX-512 integer load/store. -// 512-bit load/store -def : Pat<(alignedloadv8i64 addr:$src), - (VMOVAPSZrm addr:$src)>; -def : Pat<(loadv8i64 addr:$src), - (VMOVUPSZrm addr:$src)>; - -def : Pat<(alignedstore512 (v8i64 VR512:$src), addr:$dst), - (VMOVAPSZmr addr:$dst, VR512:$src)>; -def : Pat<(alignedstore512 (v16i32 VR512:$src), addr:$dst), - (VMOVAPSZmr addr:$dst, VR512:$src)>; - -def : Pat<(store (v8i64 VR512:$src), addr:$dst), - (VMOVUPDZmr addr:$dst, VR512:$src)>; -def : Pat<(store (v16i32 VR512:$src), addr:$dst), - (VMOVUPSZmr addr:$dst, VR512:$src)>; - -let neverHasSideEffects = 1 in { - def VMOVDQA32rr : AVX512BI<0x6F, MRMSrcReg, (outs VR512:$dst), - (ins VR512:$src), - "vmovdqa32\t{$src, $dst|$dst, $src}", []>, - EVEX, EVEX_V512; - def VMOVDQA64rr : AVX512BI<0x6F, MRMSrcReg, (outs VR512:$dst), - (ins VR512:$src), - "vmovdqa64\t{$src, $dst|$dst, $src}", []>, - EVEX, EVEX_V512, VEX_W; -let mayStore = 1 in { - def VMOVDQA32mr : AVX512BI<0x7F, MRMDestMem, (outs), - (ins i512mem:$dst, VR512:$src), - "vmovdqa32\t{$src, $dst|$dst, $src}", []>, - EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; - def VMOVDQA64mr : AVX512BI<0x7F, MRMDestMem, (outs), - (ins i512mem:$dst, VR512:$src), - "vmovdqa64\t{$src, $dst|$dst, $src}", []>, - EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -} -let mayLoad = 1 in { -def VMOVDQA32rm : AVX512BI<0x6F, MRMSrcMem, (outs VR512:$dst), - (ins i512mem:$src), - "vmovdqa32\t{$src, $dst|$dst, $src}", []>, - EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; -def VMOVDQA64rm : AVX512BI<0x6F, MRMSrcMem, (outs VR512:$dst), - (ins i512mem:$src), - "vmovdqa64\t{$src, $dst|$dst, $src}", []>, - EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -} -} - -multiclass avx512_mov_int opc, string asm, RegisterClass RC, - RegisterClass KRC, - PatFrag ld_frag, X86MemOperand x86memop> { -let neverHasSideEffects = 1 in - def rr : AVX512XSI, - EVEX; -let canFoldAsLoad = 1 in - def rm : AVX512XSI, - EVEX; -let Constraints = "$src1 = $dst" in { - def rrk : AVX512XSI, - EVEX, EVEX_K; - def rmk : AVX512XSI, EVEX, EVEX_K; -} -} - -defm VMOVDQU32 : avx512_mov_int<0x6F, "vmovdqu32", VR512, VK16WM, memopv16i32, i512mem>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VMOVDQU64 : avx512_mov_int<0x6F, "vmovdqu64", VR512, VK8WM, memopv8i64, i512mem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -let AddedComplexity = 20 in { -def : Pat<(v16f32 (vselect VK16WM:$mask, (v16f32 VR512:$src1), - (v16f32 VR512:$src2))), - (VMOVUPSZrrk VR512:$src2, VK16WM:$mask, VR512:$src1)>; -def : Pat<(v8f64 (vselect VK8WM:$mask, (v8f64 VR512:$src1), - (v8f64 VR512:$src2))), - (VMOVUPDZrrk VR512:$src2, VK8WM:$mask, VR512:$src1)>; -def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src1), - (v16i32 VR512:$src2))), - (VMOVDQU32rrk VR512:$src2, VK16WM:$mask, VR512:$src1)>; -def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src1), - (v8i64 VR512:$src2))), - (VMOVDQU64rrk VR512:$src2, VK8WM:$mask, VR512:$src1)>; -} -// Move Int Doubleword to Packed Double Int -// -def VMOVDI2PDIZrr : AVX512SI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", - [(set VR128X:$dst, - (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, - EVEX, VEX_LIG; -def VMOVDI2PDIZrm : AVX512SI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", - [(set VR128X:$dst, - (v4i32 (scalar_to_vector (loadi32 addr:$src))))], - IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; -def VMOV64toPQIZrr : AVX512SI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", - [(set VR128X:$dst, - (v2i64 (scalar_to_vector GR64:$src)))], - IIC_SSE_MOVDQ>, EVEX, VEX_W, VEX_LIG; -def VMOV64toSDZrr : AVX512SI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (bitconvert GR64:$src))], - IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>; -def VMOVSDto64Zrr : AVX512SI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (bitconvert FR64:$src))], - IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>; -def VMOVSDto64Zmr : AVX512SI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", - [(store (i64 (bitconvert FR64:$src)), addr:$dst)], - IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>, - EVEX_CD8<64, CD8VT1>; - -// Move Int Doubleword to Single Scalar -// -def VMOVDI2SSZrr : AVX512SI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", - [(set FR32X:$dst, (bitconvert GR32:$src))], - IIC_SSE_MOVDQ>, EVEX, VEX_LIG; - -def VMOVDI2SSZrm : AVX512SI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", - [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))], - IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; - -// Move Packed Doubleword Int to Packed Double Int -// -def VMOVPDI2DIZrr : AVX512SI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (vector_extract (v4i32 VR128X:$src), - (iPTR 0)))], IIC_SSE_MOVD_ToGP>, - EVEX, VEX_LIG; -def VMOVPDI2DIZmr : AVX512SI<0x7E, MRMDestMem, (outs), - (ins i32mem:$dst, VR128X:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", - [(store (i32 (vector_extract (v4i32 VR128X:$src), - (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, - EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; - -// Move Packed Doubleword Int first element to Doubleword Int -// -def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), - (iPTR 0)))], - IIC_SSE_MOVD_ToGP>, TB, OpSize, EVEX, VEX_LIG, VEX_W, - Requires<[HasAVX512, In64BitMode]>; - -def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), - (ins i64mem:$dst, VR128X:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", - [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), - addr:$dst)], IIC_SSE_MOVDQ>, - EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>, - Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>; - -// Move Scalar Single to Double Int -// -def VMOVSS2DIZrr : AVX512SI<0x7E, MRMDestReg, (outs GR32:$dst), - (ins FR32X:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (bitconvert FR32X:$src))], - IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG; -def VMOVSS2DIZmr : AVX512SI<0x7E, MRMDestMem, (outs), - (ins i32mem:$dst, FR32X:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", - [(store (i32 (bitconvert FR32X:$src)), addr:$dst)], - IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; - -// Move Quadword Int to Packed Quadword Int -// -def VMOVQI2PQIZrm : AVX512SI<0x7E, MRMSrcMem, (outs VR128X:$dst), - (ins i64mem:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", - [(set VR128X:$dst, - (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, - EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; - -//===----------------------------------------------------------------------===// -// AVX-512 MOVSS, MOVSD -//===----------------------------------------------------------------------===// - -multiclass avx512_move_scalar { - def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2), - !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128X:$dst, (vt (OpNode VR128X:$src1, - (scalar_to_vector RC:$src2))))], - IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG; - def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>, - EVEX, VEX_LIG; - def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, - EVEX, VEX_LIG; -} - -let ExeDomain = SSEPackedSingle in -defm VMOVSSZ : avx512_move_scalar<"movss{z}", FR32X, X86Movss, v4f32, f32mem, - loadf32>, XS, EVEX_CD8<32, CD8VT1>; - -let ExeDomain = SSEPackedDouble in -defm VMOVSDZ : avx512_move_scalar<"movsd{z}", FR64X, X86Movsd, v2f64, f64mem, - loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>; - - -// For the disassembler -let isCodeGenOnly = 1 in { - def VMOVSSZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst), - (ins VR128X:$src1, FR32X:$src2), - "movss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], - IIC_SSE_MOV_S_RR>, - XS, EVEX_4V, VEX_LIG; - def VMOVSDZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst), - (ins VR128X:$src1, FR64X:$src2), - "movsd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], - IIC_SSE_MOV_S_RR>, - XD, EVEX_4V, VEX_LIG, VEX_W; -} - -let Predicates = [HasAVX512] in { - let AddedComplexity = 15 in { - // Move scalar to XMM zero-extended, zeroing a VR128X then do a - // MOVS{S,D} to the lower bits. - def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32X:$src)))), - (VMOVSSZrr (v4f32 (V_SET0)), FR32X:$src)>; - def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))), - (VMOVSSZrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>; - def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))), - (VMOVSSZrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>; - def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))), - (VMOVSDZrr (v2f64 (V_SET0)), FR64X:$src)>; - - // Move low f32 and clear high bits. - def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))), - (SUBREG_TO_REG (i32 0), - (VMOVSSZrr (v4f32 (V_SET0)), - (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>; - def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))), - (SUBREG_TO_REG (i32 0), - (VMOVSSZrr (v4i32 (V_SET0)), - (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>; - } - - let AddedComplexity = 20 in { - // MOVSSrm zeros the high parts of the register; represent this - // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 - def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), - (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>; - def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), - (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>; - def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), - (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>; - - // MOVSDrm zeros the high parts of the register; represent this - // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 - def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), - (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; - def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), - (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; - def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), - (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; - def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), - (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; - def : Pat<(v2f64 (X86vzload addr:$src)), - (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; - - // Represent the same patterns above but in the form they appear for - // 256-bit types - def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, - (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))), - (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; - def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, - (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))), - (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; - def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, - (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))), - (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; - } - def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, - (v4f32 (scalar_to_vector FR32X:$src)), (iPTR 0)))), - (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSZrr (v4f32 (V_SET0)), - FR32X:$src)), sub_xmm)>; - def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, - (v2f64 (scalar_to_vector FR64X:$src)), (iPTR 0)))), - (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDZrr (v2f64 (V_SET0)), - FR64X:$src)), sub_xmm)>; - def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, - (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))), - (SUBREG_TO_REG (i64 0), (VMOVSDZrm addr:$src), sub_xmm)>; - - // Move low f64 and clear high bits. - def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))), - (SUBREG_TO_REG (i32 0), - (VMOVSDZrr (v2f64 (V_SET0)), - (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>; - - def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))), - (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (V_SET0)), - (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>; - - // Extract and store. - def : Pat<(store (f32 (vector_extract (v4f32 VR128X:$src), (iPTR 0))), - addr:$dst), - (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>; - def : Pat<(store (f64 (vector_extract (v2f64 VR128X:$src), (iPTR 0))), - addr:$dst), - (VMOVSDZmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X))>; - - // Shuffle with VMOVSS - def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)), - (VMOVSSZrr (v4i32 VR128X:$src1), - (COPY_TO_REGCLASS (v4i32 VR128X:$src2), FR32X))>; - def : Pat<(v4f32 (X86Movss VR128X:$src1, VR128X:$src2)), - (VMOVSSZrr (v4f32 VR128X:$src1), - (COPY_TO_REGCLASS (v4f32 VR128X:$src2), FR32X))>; - - // 256-bit variants - def : Pat<(v8i32 (X86Movss VR256X:$src1, VR256X:$src2)), - (SUBREG_TO_REG (i32 0), - (VMOVSSZrr (EXTRACT_SUBREG (v8i32 VR256X:$src1), sub_xmm), - (EXTRACT_SUBREG (v8i32 VR256X:$src2), sub_xmm)), - sub_xmm)>; - def : Pat<(v8f32 (X86Movss VR256X:$src1, VR256X:$src2)), - (SUBREG_TO_REG (i32 0), - (VMOVSSZrr (EXTRACT_SUBREG (v8f32 VR256X:$src1), sub_xmm), - (EXTRACT_SUBREG (v8f32 VR256X:$src2), sub_xmm)), - sub_xmm)>; - - // Shuffle with VMOVSD - def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)), - (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; - def : Pat<(v2f64 (X86Movsd VR128X:$src1, VR128X:$src2)), - (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; - def : Pat<(v4f32 (X86Movsd VR128X:$src1, VR128X:$src2)), - (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; - def : Pat<(v4i32 (X86Movsd VR128X:$src1, VR128X:$src2)), - (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; - - // 256-bit variants - def : Pat<(v4i64 (X86Movsd VR256X:$src1, VR256X:$src2)), - (SUBREG_TO_REG (i32 0), - (VMOVSDZrr (EXTRACT_SUBREG (v4i64 VR256X:$src1), sub_xmm), - (EXTRACT_SUBREG (v4i64 VR256X:$src2), sub_xmm)), - sub_xmm)>; - def : Pat<(v4f64 (X86Movsd VR256X:$src1, VR256X:$src2)), - (SUBREG_TO_REG (i32 0), - (VMOVSDZrr (EXTRACT_SUBREG (v4f64 VR256X:$src1), sub_xmm), - (EXTRACT_SUBREG (v4f64 VR256X:$src2), sub_xmm)), - sub_xmm)>; - - def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)), - (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; - def : Pat<(v2i64 (X86Movlpd VR128X:$src1, VR128X:$src2)), - (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; - def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)), - (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; - def : Pat<(v4i32 (X86Movlps VR128X:$src1, VR128X:$src2)), - (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; -} - -let AddedComplexity = 15 in -def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), - (ins VR128X:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", - [(set VR128X:$dst, (v2i64 (X86vzmovl - (v2i64 VR128X:$src))))], - IIC_SSE_MOVQ_RR>, EVEX, VEX_W; - -let AddedComplexity = 20 in -def VMOVZPQILo2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), - (ins i128mem:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", - [(set VR128X:$dst, (v2i64 (X86vzmovl - (loadv2i64 addr:$src))))], - IIC_SSE_MOVDQ>, EVEX, VEX_W, - EVEX_CD8<8, CD8VT8>; - -let Predicates = [HasAVX512] in { - // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. - let AddedComplexity = 20 in { - def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), - (VMOVDI2PDIZrm addr:$src)>; - - def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), - (VMOVDI2PDIZrm addr:$src)>; - def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), - (VMOVDI2PDIZrm addr:$src)>; - def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), - (VMOVZPQILo2PQIZrm addr:$src)>; - def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))), - (VMOVZPQILo2PQIZrr VR128X:$src)>; - } - // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext. - def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, - (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))), - (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>; - def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, - (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))), - (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>; -} - -//===----------------------------------------------------------------------===// -// AVX-512 - Integer arithmetic -// -multiclass avx512_binop_rm opc, string OpcodeStr, SDNode OpNode, - ValueType OpVT, RegisterClass RC, PatFrag memop_frag, - X86MemOperand x86memop, PatFrag scalar_mfrag, - X86MemOperand x86scalar_mop, string BrdcstStr, - OpndItins itins, bit IsCommutable = 0> { - let isCommutable = IsCommutable in - def rr : AVX512BI, EVEX_4V; - def rm : AVX512BI, EVEX_4V; - def rmb : AVX512BI, EVEX_4V, EVEX_B; -} -multiclass avx512_binop_rm2 opc, string OpcodeStr, - ValueType DstVT, ValueType SrcVT, RegisterClass RC, - PatFrag memop_frag, X86MemOperand x86memop, - OpndItins itins, - bit IsCommutable = 0> { - let isCommutable = IsCommutable in - def rr : AVX512BI, EVEX_4V, VEX_W; - def rm : AVX512BI, EVEX_4V, VEX_W; -} - -defm VPADDDZ : avx512_binop_rm<0xFE, "vpaddd", add, v16i32, VR512, memopv16i32, - i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>, - EVEX_V512, EVEX_CD8<32, CD8VF>; - -defm VPSUBDZ : avx512_binop_rm<0xFA, "vpsubd", sub, v16i32, VR512, memopv16i32, - i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 0>, - EVEX_V512, EVEX_CD8<32, CD8VF>; - -defm VPMULLDZ : avx512_binop_rm<0x40, "vpmulld", mul, v16i32, VR512, memopv16i32, - i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>, - T8, EVEX_V512, EVEX_CD8<32, CD8VF>; - -defm VPADDQZ : avx512_binop_rm<0xD4, "vpaddq", add, v8i64, VR512, memopv8i64, - i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 1>, - EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_W; - -defm VPSUBQZ : avx512_binop_rm<0xFB, "vpsubq", sub, v8i64, VR512, memopv8i64, - i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, - VR512, memopv8i64, i512mem, SSE_INTALU_ITINS_P, 1>, T8, - EVEX_V512, EVEX_CD8<64, CD8VF>; - -defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32, - VR512, memopv8i64, i512mem, SSE_INTMUL_ITINS_P, 1>, EVEX_V512, - EVEX_CD8<64, CD8VF>; - -def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))), - (VPMULUDQZrr VR512:$src1, VR512:$src2)>; - -//===----------------------------------------------------------------------===// -// AVX-512 - Unpack Instructions -//===----------------------------------------------------------------------===// - -multiclass avx512_unpack_fp opc, SDNode OpNode, ValueType vt, - PatFrag mem_frag, RegisterClass RC, - X86MemOperand x86memop, string asm, - Domain d> { - def rr : AVX512PI, EVEX_4V, TB; - def rm : AVX512PI, EVEX_4V, TB; -} - -defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, memopv8f64, - VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, memopv8f64, - VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, OpSize, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, memopv8f64, - VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, memopv8f64, - VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, OpSize, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -multiclass avx512_unpack_int opc, string OpcodeStr, SDNode OpNode, - ValueType OpVT, RegisterClass RC, PatFrag memop_frag, - X86MemOperand x86memop> { - def rr : AVX512BI, EVEX_4V; - def rm : AVX512BI, EVEX_4V; -} -defm VPUNPCKLDQZ : avx512_unpack_int<0x62, "vpunpckldq", X86Unpckl, v16i32, - VR512, memopv16i32, i512mem>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPUNPCKLQDQZ : avx512_unpack_int<0x6C, "vpunpcklqdq", X86Unpckl, v8i64, - VR512, memopv8i64, i512mem>, EVEX_V512, - VEX_W, EVEX_CD8<64, CD8VF>; -defm VPUNPCKHDQZ : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32, - VR512, memopv16i32, i512mem>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64, - VR512, memopv8i64, i512mem>, EVEX_V512, - VEX_W, EVEX_CD8<64, CD8VF>; -//===----------------------------------------------------------------------===// -// AVX-512 - PSHUFD -// - -multiclass avx512_pshuf_imm opc, string OpcodeStr, RegisterClass RC, - SDNode OpNode, PatFrag mem_frag, - X86MemOperand x86memop, ValueType OpVT> { - def ri : AVX512Ii8, - EVEX; - def mi : AVX512Ii8, EVEX; -} - -defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, memopv16i32, - i512mem, v16i32>, OpSize, EVEX_V512, EVEX_CD8<32, CD8VF>; - -let ExeDomain = SSEPackedSingle in -defm VPERMILPSZ : avx512_pshuf_imm<0x04, "vpermilps", VR512, X86VPermilp, - memopv16f32, i512mem, v16f32>, OpSize, TA, EVEX_V512, - EVEX_CD8<32, CD8VF>; -let ExeDomain = SSEPackedDouble in -defm VPERMILPDZ : avx512_pshuf_imm<0x05, "vpermilpd", VR512, X86VPermilp, - memopv8f64, i512mem, v8f64>, OpSize, TA, EVEX_V512, - VEX_W, EVEX_CD8<32, CD8VF>; - -def : Pat<(v16i32 (X86VPermilp VR512:$src1, (i8 imm:$imm))), - (VPERMILPSZri VR512:$src1, imm:$imm)>; -def : Pat<(v8i64 (X86VPermilp VR512:$src1, (i8 imm:$imm))), - (VPERMILPDZri VR512:$src1, imm:$imm)>; - -//===----------------------------------------------------------------------===// -// AVX-512 Logical Instructions -//===----------------------------------------------------------------------===// - -defm VPANDDZ : avx512_binop_rm<0xDB, "vpandd", and, v16i32, VR512, memopv16i32, - i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPANDQZ : avx512_binop_rm<0xDB, "vpandq", and, v8i64, VR512, memopv8i64, - i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPORDZ : avx512_binop_rm<0xEB, "vpord", or, v16i32, VR512, memopv16i32, - i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPORQZ : avx512_binop_rm<0xEB, "vporq", or, v8i64, VR512, memopv8i64, - i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPXORDZ : avx512_binop_rm<0xEF, "vpxord", xor, v16i32, VR512, memopv16i32, - i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPXORQZ : avx512_binop_rm<0xEF, "vpxorq", xor, v8i64, VR512, memopv8i64, - i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPANDNDZ : avx512_binop_rm<0xDF, "vpandnd", X86andnp, v16i32, VR512, - memopv16i32, i512mem, loadi32, i32mem, "{1to16}", - SSE_BIT_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPANDNQZ : avx512_binop_rm<0xDF, "vpandnq", X86andnp, v8i64, VR512, memopv8i64, - i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 0>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -//===----------------------------------------------------------------------===// -// AVX-512 FP arithmetic -//===----------------------------------------------------------------------===// - -multiclass avx512_binop_s opc, string OpcodeStr, SDNode OpNode, - SizeItins itins> { - defm SSZ : sse12_fp_scalar, XS, EVEX_4V, VEX_LIG, - EVEX_CD8<32, CD8VT1>; - defm SDZ : sse12_fp_scalar, XD, VEX_W, EVEX_4V, VEX_LIG, - EVEX_CD8<64, CD8VT1>; -} - -let isCommutable = 1 in { -defm VADD : avx512_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>; -defm VMUL : avx512_binop_s<0x59, "mul", fmul, SSE_ALU_ITINS_S>; -defm VMIN : avx512_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>; -defm VMAX : avx512_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>; -} -let isCommutable = 0 in { -defm VSUB : avx512_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>; -defm VDIV : avx512_binop_s<0x5E, "div", fdiv, SSE_ALU_ITINS_S>; -} - -multiclass avx512_fp_packed opc, string OpcodeStr, SDNode OpNode, - RegisterClass RC, ValueType vt, - X86MemOperand x86memop, PatFrag mem_frag, - X86MemOperand x86scalar_mop, PatFrag scalar_mfrag, - string BrdcstStr, - Domain d, OpndItins itins, bit commutable> { - let isCommutable = commutable in - def rr : PI, - EVEX_4V; - let mayLoad = 1 in { - def rm : PI, EVEX_4V; - def rmb : PI, EVEX_4V, EVEX_B; - } -} - -defm VADDPSZ : avx512_fp_packed<0x58, "addps", fadd, VR512, v16f32, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, - SSE_ALU_ITINS_P.s, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>; - -defm VADDPDZ : avx512_fp_packed<0x58, "addpd", fadd, VR512, v8f64, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, - SSE_ALU_ITINS_P.d, 1>, - EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; - -defm VMULPSZ : avx512_fp_packed<0x59, "mulps", fmul, VR512, v16f32, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, - SSE_ALU_ITINS_P.s, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VMULPDZ : avx512_fp_packed<0x59, "mulpd", fmul, VR512, v8f64, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, - SSE_ALU_ITINS_P.d, 1>, - EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; - -defm VMINPSZ : avx512_fp_packed<0x5D, "minps", X86fmin, VR512, v16f32, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, - SSE_ALU_ITINS_P.s, 1>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VMAXPSZ : avx512_fp_packed<0x5F, "maxps", X86fmax, VR512, v16f32, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, - SSE_ALU_ITINS_P.s, 1>, - EVEX_V512, EVEX_CD8<32, CD8VF>; - -defm VMINPDZ : avx512_fp_packed<0x5D, "minpd", X86fmin, VR512, v8f64, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, - SSE_ALU_ITINS_P.d, 1>, - EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; -defm VMAXPDZ : avx512_fp_packed<0x5F, "maxpd", X86fmax, VR512, v8f64, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, - SSE_ALU_ITINS_P.d, 1>, - EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; - -defm VSUBPSZ : avx512_fp_packed<0x5C, "subps", fsub, VR512, v16f32, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, - SSE_ALU_ITINS_P.s, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VDIVPSZ : avx512_fp_packed<0x5E, "divps", fdiv, VR512, v16f32, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, - SSE_ALU_ITINS_P.s, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>; - -defm VSUBPDZ : avx512_fp_packed<0x5C, "subpd", fsub, VR512, v8f64, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, - SSE_ALU_ITINS_P.d, 0>, - EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; -defm VDIVPDZ : avx512_fp_packed<0x5E, "divpd", fdiv, VR512, v8f64, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, - SSE_ALU_ITINS_P.d, 0>, - EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; - -//===----------------------------------------------------------------------===// -// AVX-512 VPTESTM instructions -//===----------------------------------------------------------------------===// - -multiclass avx512_vptest opc, string OpcodeStr, RegisterClass KRC, - RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag, - SDNode OpNode, ValueType vt> { - def rr : AVX5128I, EVEX_4V; - def rm : AVX5128I, EVEX_4V; -} - -defm VPTESTMDZ : avx512_vptest<0x27, "vptestmd", VK16, VR512, f512mem, - memopv16i32, X86testm, v16i32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem, - memopv8i64, X86testm, v8i64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - -//===----------------------------------------------------------------------===// -// AVX-512 Shift instructions -//===----------------------------------------------------------------------===// -multiclass avx512_shift_rmi opc, Format ImmFormR, Format ImmFormM, - string OpcodeStr, SDNode OpNode, RegisterClass RC, - ValueType vt, X86MemOperand x86memop, PatFrag mem_frag, - RegisterClass KRC> { - def ri : AVX512BIi8, EVEX_4V; - def rik : AVX512BIi8, EVEX_4V, EVEX_K; - def mi: AVX512BIi8, EVEX_4V; - def mik: AVX512BIi8, EVEX_4V, EVEX_K; -} - -multiclass avx512_shift_rrm opc, string OpcodeStr, SDNode OpNode, - RegisterClass RC, ValueType vt, ValueType SrcVT, - PatFrag bc_frag, RegisterClass KRC> { - // src2 is always 128-bit - def rr : AVX512BI, EVEX_4V; - def rrk : AVX512BI, EVEX_4V, EVEX_K; - def rm : AVX512BI, EVEX_4V; - def rmk : AVX512BI, EVEX_4V, EVEX_K; -} - -defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli, - VR512, v16i32, i512mem, memopv16i32, VK16WM>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl, - VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512, - EVEX_CD8<32, CD8VQ>; - -defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli, - VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512, - EVEX_CD8<64, CD8VF>, VEX_W; -defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl, - VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512, - EVEX_CD8<64, CD8VQ>, VEX_W; - -defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli, - VR512, v16i32, i512mem, memopv16i32, VK16WM>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl, - VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512, - EVEX_CD8<32, CD8VQ>; - -defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli, - VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512, - EVEX_CD8<64, CD8VF>, VEX_W; -defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl, - VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512, - EVEX_CD8<64, CD8VQ>, VEX_W; - -defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai, - VR512, v16i32, i512mem, memopv16i32, VK16WM>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra, - VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512, - EVEX_CD8<32, CD8VQ>; - -defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai, - VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512, - EVEX_CD8<64, CD8VF>, VEX_W; -defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra, - VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512, - EVEX_CD8<64, CD8VQ>, VEX_W; - -//===-------------------------------------------------------------------===// -// Variable Bit Shifts -//===-------------------------------------------------------------------===// -multiclass avx512_var_shift opc, string OpcodeStr, SDNode OpNode, - RegisterClass RC, ValueType vt, - X86MemOperand x86memop, PatFrag mem_frag> { - def rr : AVX5128I, - EVEX_4V; - def rm : AVX5128I, - EVEX_4V; -} - -defm VPSLLVDZ : avx512_var_shift<0x47, "vpsllvd", shl, VR512, v16i32, - i512mem, memopv16i32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPSLLVQZ : avx512_var_shift<0x47, "vpsllvq", shl, VR512, v8i64, - i512mem, memopv8i64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; -defm VPSRLVDZ : avx512_var_shift<0x45, "vpsrlvd", srl, VR512, v16i32, - i512mem, memopv16i32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPSRLVQZ : avx512_var_shift<0x45, "vpsrlvq", srl, VR512, v8i64, - i512mem, memopv8i64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; -defm VPSRAVDZ : avx512_var_shift<0x46, "vpsravd", sra, VR512, v16i32, - i512mem, memopv16i32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64, - i512mem, memopv8i64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - -//===----------------------------------------------------------------------===// -// AVX-512 - MOVDDUP -//===----------------------------------------------------------------------===// - -multiclass avx512_movddup { -def rr : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (VT (X86Movddup RC:$src)))]>, EVEX; -def rm : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, - (VT (X86Movddup (memop_frag addr:$src))))]>, EVEX; -} - -defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, memopv8f64>, - VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; -def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))), - (VMOVDDUPZrm addr:$src)>; - -def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst), - (ins VR128X:$src1, VR128X:$src2), - "vmovlhps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))], - IIC_SSE_MOV_LH>, EVEX_4V; -def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst), - (ins VR128X:$src1, VR128X:$src2), - "vmovhlps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))], - IIC_SSE_MOV_LH>, EVEX_4V; - -// MOVLHPS patterns -def : Pat<(v4i32 (X86Movlhps VR128X:$src1, VR128X:$src2)), - (VMOVLHPSZrr VR128X:$src1, VR128X:$src2)>; -def : Pat<(v2i64 (X86Movlhps VR128X:$src1, VR128X:$src2)), - (VMOVLHPSZrr (v2i64 VR128X:$src1), VR128X:$src2)>; - -// MOVHLPS patterns -def : Pat<(v4i32 (X86Movhlps VR128X:$src1, VR128X:$src2)), - (VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>; - -//===----------------------------------------------------------------------===// -// FMA - Fused Multiply Operations -// -let Constraints = "$src1 = $dst" in { -multiclass avx512_fma3p_rm opc, string OpcodeStr, - RegisterClass RC, X86MemOperand x86memop, - PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag, - string BrdcstStr, SDNode OpNode, ValueType OpVT> { - def r: AVX512FMA3; - - let mayLoad = 1 in - def m: AVX512FMA3; - def mb: AVX512FMA3, EVEX_B; -} -} // Constraints = "$src1 = $dst" - -let ExeDomain = SSEPackedSingle in { - defm VFMADD213PSZ : avx512_fma3p_rm<0xA8, "vfmadd213ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fmadd, v16f32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; - defm VFMSUB213PSZ : avx512_fma3p_rm<0xAA, "vfmsub213ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fmsub, v16f32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; - defm VFMADDSUB213PSZ : avx512_fma3p_rm<0xA6, "vfmaddsub213ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fmaddsub, v16f32>, - EVEX_V512, EVEX_CD8<32, CD8VF>; - defm VFMSUBADD213PSZ : avx512_fma3p_rm<0xA7, "vfmsubadd213ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fmsubadd, v16f32>, - EVEX_V512, EVEX_CD8<32, CD8VF>; - defm VFNMADD213PSZ : avx512_fma3p_rm<0xAC, "vfnmadd213ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fnmadd, v16f32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; - defm VFNMSUB213PSZ : avx512_fma3p_rm<0xAE, "vfnmsub213ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fnmsub, v16f32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -} -let ExeDomain = SSEPackedDouble in { - defm VFMADD213PDZ : avx512_fma3p_rm<0xA8, "vfmadd213pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fmadd, v8f64>, EVEX_V512, - VEX_W, EVEX_CD8<64, CD8VF>; - defm VFMSUB213PDZ : avx512_fma3p_rm<0xAA, "vfmsub213pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fmsub, v8f64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - defm VFMADDSUB213PDZ : avx512_fma3p_rm<0xA6, "vfmaddsub213pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fmaddsub, v8f64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - defm VFMSUBADD213PDZ : avx512_fma3p_rm<0xA7, "vfmsubadd213pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fmsubadd, v8f64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - defm VFNMADD213PDZ : avx512_fma3p_rm<0xAC, "vfnmadd213pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fnmadd, v8f64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - defm VFNMSUB213PDZ : avx512_fma3p_rm<0xAE, "vfnmsub213pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fnmsub, v8f64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; -} - -let Constraints = "$src1 = $dst" in { -multiclass avx512_fma3p_m132 opc, string OpcodeStr, - RegisterClass RC, X86MemOperand x86memop, - PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag, - string BrdcstStr, SDNode OpNode, ValueType OpVT> { - let mayLoad = 1 in - def m: AVX512FMA3; - def mb: AVX512FMA3, EVEX_B; -} -} // Constraints = "$src1 = $dst" - - -let ExeDomain = SSEPackedSingle in { - defm VFMADD132PSZ : avx512_fma3p_m132<0x98, "vfmadd132ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fmadd, v16f32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; - defm VFMSUB132PSZ : avx512_fma3p_m132<0x9A, "vfmsub132ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fmsub, v16f32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; - defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fmaddsub, v16f32>, - EVEX_V512, EVEX_CD8<32, CD8VF>; - defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fmsubadd, v16f32>, - EVEX_V512, EVEX_CD8<32, CD8VF>; - defm VFNMADD132PSZ : avx512_fma3p_m132<0x9C, "vfnmadd132ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fnmadd, v16f32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; - defm VFNMSUB132PSZ : avx512_fma3p_m132<0x9E, "vfnmsub132ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fnmsub, v16f32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -} -let ExeDomain = SSEPackedDouble in { - defm VFMADD132PDZ : avx512_fma3p_m132<0x98, "vfmadd132pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fmadd, v8f64>, EVEX_V512, - VEX_W, EVEX_CD8<64, CD8VF>; - defm VFMSUB132PDZ : avx512_fma3p_m132<0x9A, "vfmsub132pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fmsub, v8f64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fmaddsub, v8f64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fmsubadd, v8f64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fnmadd, v8f64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fnmsub, v8f64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; -} - -// Scalar FMA -let Constraints = "$src1 = $dst" in { -multiclass avx512_fma3s_rm opc, string OpcodeStr, SDNode OpNode, - RegisterClass RC, ValueType OpVT, - X86MemOperand x86memop, Operand memop, - PatFrag mem_frag> { - let isCommutable = 1 in - def r : AVX512FMA3; - let mayLoad = 1 in - def m : AVX512FMA3; -} - -} // Constraints = "$src1 = $dst" - -defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss{z}", X86Fmadd, FR32X, - f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; -defm VFMADDSDZ : avx512_fma3s_rm<0xA9, "vfmadd213sd{z}", X86Fmadd, FR64X, - f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VFMSUBSSZ : avx512_fma3s_rm<0xAB, "vfmsub213ss{z}", X86Fmsub, FR32X, - f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; -defm VFMSUBSDZ : avx512_fma3s_rm<0xAB, "vfmsub213sd{z}", X86Fmsub, FR64X, - f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VFNMADDSSZ : avx512_fma3s_rm<0xAD, "vfnmadd213ss{z}", X86Fnmadd, FR32X, - f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; -defm VFNMADDSDZ : avx512_fma3s_rm<0xAD, "vfnmadd213sd{z}", X86Fnmadd, FR64X, - f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VFNMSUBSSZ : avx512_fma3s_rm<0xAF, "vfnmsub213ss{z}", X86Fnmsub, FR32X, - f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; -defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd{z}", X86Fnmsub, FR64X, - f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; - -//===----------------------------------------------------------------------===// -// AVX-512 Scalar convert from sign integer to float/double -//===----------------------------------------------------------------------===// - -multiclass avx512_vcvtsi opc, RegisterClass SrcRC, RegisterClass DstRC, - X86MemOperand x86memop, string asm> { -let neverHasSideEffects = 1 in { - def rr : SI, EVEX_4V; - let mayLoad = 1 in - def rm : SI, EVEX_4V; -} // neverHasSideEffects = 1 -} - -defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}{z}">, - XS, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTSI2SS64Z : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}{z}">, - XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; -defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}{z}">, - XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTSI2SD64Z : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}{z}">, - XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; - -def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), - (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; -def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), - (VCVTSI2SS64Zrm (f32 (IMPLICIT_DEF)), addr:$src)>; -def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))), - (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; -def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))), - (VCVTSI2SD64Zrm (f64 (IMPLICIT_DEF)), addr:$src)>; - -def : Pat<(f32 (sint_to_fp GR32:$src)), - (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; -def : Pat<(f32 (sint_to_fp GR64:$src)), - (VCVTSI2SS64Zrr (f32 (IMPLICIT_DEF)), GR64:$src)>; -def : Pat<(f64 (sint_to_fp GR32:$src)), - (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; -def : Pat<(f64 (sint_to_fp GR64:$src)), - (VCVTSI2SD64Zrr (f64 (IMPLICIT_DEF)), GR64:$src)>; - - -//===----------------------------------------------------------------------===// -// AVX-512 Convert form float to double and back -//===----------------------------------------------------------------------===// -let neverHasSideEffects = 1 in { -def VCVTSS2SDZrr : AVX512XSI<0x5A, MRMSrcReg, (outs FR64X:$dst), - (ins FR32X:$src1, FR32X:$src2), - "vcvtss2sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>; -let mayLoad = 1 in -def VCVTSS2SDZrm : AVX512XSI<0x5A, MRMSrcMem, (outs FR64X:$dst), - (ins FR32X:$src1, f32mem:$src2), - "vcvtss2sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>, - EVEX_CD8<32, CD8VT1>; - -// Convert scalar double to scalar single -def VCVTSD2SSZrr : AVX512XDI<0x5A, MRMSrcReg, (outs FR32X:$dst), - (ins FR64X:$src1, FR64X:$src2), - "vcvtsd2ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2F]>; -let mayLoad = 1 in -def VCVTSD2SSZrm : AVX512XDI<0x5A, MRMSrcMem, (outs FR32X:$dst), - (ins FR64X:$src1, f64mem:$src2), - "vcvtsd2ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX_4V, VEX_LIG, VEX_W, - Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<64, CD8VT1>; -} - -def : Pat<(f64 (fextend FR32X:$src)), (VCVTSS2SDZrr FR32X:$src, FR32X:$src)>, - Requires<[HasAVX512]>; -def : Pat<(fextend (loadf32 addr:$src)), - (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512]>; - -def : Pat<(extloadf32 addr:$src), - (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, - Requires<[HasAVX512, OptForSize]>; - -def : Pat<(extloadf32 addr:$src), - (VCVTSS2SDZrr (f32 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>, - Requires<[HasAVX512, OptForSpeed]>; - -def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>, - Requires<[HasAVX512]>; - -multiclass avx512_vcvt_fp opc, string asm, RegisterClass SrcRC, - RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag, - X86MemOperand x86memop, ValueType OpVT, ValueType InVT, - Domain d> { -let neverHasSideEffects = 1 in { - def rr : AVX512PI, EVEX; - let mayLoad = 1 in - def rm : AVX512PI, EVEX; -} // neverHasSideEffects = 1 -} - -defm VCVTPD2PSZ : avx512_vcvt_fp<0x5A, "vcvtpd2ps", VR512, VR256X, fround, - memopv8f64, f512mem, v8f32, v8f64, - SSEPackedSingle>, EVEX_V512, VEX_W, OpSize, - EVEX_CD8<64, CD8VF>; - -defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend, - memopv4f64, f256mem, v8f64, v8f32, - SSEPackedDouble>, EVEX_V512, EVEX_CD8<32, CD8VH>; -def : Pat<(v8f64 (extloadv8f32 addr:$src)), - (VCVTPS2PDZrm addr:$src)>; - -//===----------------------------------------------------------------------===// -// AVX-512 Vector convert from sign integer to float/double -//===----------------------------------------------------------------------===// - -defm VCVTDQ2PSZ : avx512_vcvt_fp<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp, - memopv8i64, i512mem, v16f32, v16i32, - SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>; - -defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp, - memopv4i64, i256mem, v8f64, v8i32, - SSEPackedDouble>, EVEX_V512, XS, - EVEX_CD8<32, CD8VH>; - -defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint, - memopv16f32, f512mem, v16i32, v16f32, - SSEPackedSingle>, EVEX_V512, XS, - EVEX_CD8<32, CD8VF>; - -defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint, - memopv8f64, f512mem, v8i32, v8f64, - SSEPackedDouble>, EVEX_V512, OpSize, VEX_W, - EVEX_CD8<64, CD8VF>; - -defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint, - memopv16f32, f512mem, v16i32, v16f32, - SSEPackedSingle>, EVEX_V512, - EVEX_CD8<32, CD8VF>; - -defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint, - memopv8f64, f512mem, v8i32, v8f64, - SSEPackedDouble>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - -defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp, - memopv4i64, f256mem, v8f64, v8i32, - SSEPackedDouble>, EVEX_V512, XS, - EVEX_CD8<32, CD8VH>; - -defm VCVTUDQ2PSZ : avx512_vcvt_fp<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp, - memopv16i32, f512mem, v16f32, v16i32, - SSEPackedSingle>, EVEX_V512, XD, - EVEX_CD8<32, CD8VF>; - -def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))), - (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr - (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; - - -def : Pat<(int_x86_avx512_cvtdq2_ps_512 VR512:$src), - (VCVTDQ2PSZrr VR512:$src)>; -def : Pat<(int_x86_avx512_cvtdq2_ps_512 (bitconvert (memopv8i64 addr:$src))), - (VCVTDQ2PSZrm addr:$src)>; - -def VCVTPS2DQZrr : AVX512BI<0x5B, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src), - "vcvtps2dq\t{$src, $dst|$dst, $src}", - [(set VR512:$dst, - (int_x86_avx512_cvt_ps2dq_512 VR512:$src))], - IIC_SSE_CVT_PS_RR>, EVEX, EVEX_V512; -def VCVTPS2DQZrm : AVX512BI<0x5B, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src), - "vcvtps2dq\t{$src, $dst|$dst, $src}", - [(set VR512:$dst, - (int_x86_avx512_cvt_ps2dq_512 (memopv16f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; - - -let Predicates = [HasAVX512] in { - def : Pat<(v8f32 (fround (loadv8f64 addr:$src))), - (VCVTPD2PSZrm addr:$src)>; - def : Pat<(v8f64 (extloadv8f32 addr:$src)), - (VCVTPS2PDZrm addr:$src)>; -} - -let Defs = [EFLAGS], Predicates = [HasAVX512] in { - defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32, - "ucomiss{z}">, TB, EVEX, VEX_LIG, - EVEX_CD8<32, CD8VT1>; - defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64, - "ucomisd{z}">, TB, OpSize, EVEX, - VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; - let Pattern = [] in { - defm VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, undef, v4f32, f128mem, load, - "comiss{z}">, TB, EVEX, VEX_LIG, - EVEX_CD8<32, CD8VT1>; - defm VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, undef, v2f64, f128mem, load, - "comisd{z}">, TB, OpSize, EVEX, - VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; - } - defm Int_VUCOMISSZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem, - load, "ucomiss">, TB, EVEX, VEX_LIG, - EVEX_CD8<32, CD8VT1>; - defm Int_VUCOMISDZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem, - load, "ucomisd">, TB, OpSize, EVEX, - VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; - - defm Int_VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem, - load, "comiss">, TB, EVEX, VEX_LIG, - EVEX_CD8<32, CD8VT1>; - defm Int_VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem, - load, "comisd">, TB, OpSize, EVEX, - VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; -} - -/// avx512_unop_p - AVX-512 unops in packed form. -multiclass avx512_fp_unop_p opc, string OpcodeStr, SDNode OpNode> { - def PSZr : AVX5128I, - EVEX, EVEX_V512; - def PSZm : AVX5128I, - EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; - def PDZr : AVX5128I, - EVEX, EVEX_V512, VEX_W; - def PDZm : AVX5128I, - EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -} - -/// avx512_fp_unop_p_int - AVX-512 intrinsics unops in packed forms. -multiclass avx512_fp_unop_p_int opc, string OpcodeStr, - Intrinsic V16F32Int, Intrinsic V8F64Int> { - def PSZr_Int : AVX5128I, - EVEX, EVEX_V512; - def PSZm_Int : AVX5128I, EVEX, - EVEX_V512, EVEX_CD8<32, CD8VF>; - def PDZr_Int : AVX5128I, - EVEX, EVEX_V512, VEX_W; - def PDZm_Int : AVX5128I, - EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -} - -/// avx512_fp_unop_s - AVX-512 unops in scalar form. -multiclass avx512_fp_unop_s opc, string OpcodeStr, - Intrinsic F32Int, Intrinsic F64Int> { - let hasSideEffects = 0 in { - def SSZr : AVX5128I, EVEX_4V; - let mayLoad = 1 in { - def SSZm : AVX5128I, EVEX_4V, EVEX_CD8<32, CD8VT1>; - def SSZm_Int : AVX5128I, - EVEX_4V, EVEX_CD8<32, CD8VT1>; - } - def SDZr : AVX5128I, - EVEX_4V, VEX_W; - let mayLoad = 1 in { - def SDZm : AVX5128I, - EVEX_4V, VEX_W, EVEX_CD8<32, CD8VT1>; - def SDZm_Int : AVX5128I, - EVEX_4V, VEX_W, EVEX_CD8<32, CD8VT1>; - } -} -} - -defm VRCP14 : avx512_fp_unop_s<0x4D, "vrcp14", int_x86_avx512_rcp14_ss, - int_x86_avx512_rcp14_sd>, - avx512_fp_unop_p<0x4C, "vrcp14", X86frcp>, - avx512_fp_unop_p_int<0x4C, "vrcp14", - int_x86_avx512_rcp14_ps_512, int_x86_avx512_rcp14_pd_512>; - -defm VRSQRT14 : avx512_fp_unop_s<0x4F, "vrsqrt14", int_x86_avx512_rsqrt14_ss, - int_x86_avx512_rsqrt14_sd>, - avx512_fp_unop_p<0x4E, "vrsqrt14", X86frsqrt>, - avx512_fp_unop_p_int<0x4E, "vrsqrt14", - int_x86_avx512_rsqrt14_ps_512, int_x86_avx512_rsqrt14_pd_512>; - -multiclass avx512_sqrt_packed opc, string OpcodeStr, SDNode OpNode, - Intrinsic V16F32Int, Intrinsic V8F64Int, - OpndItins itins_s, OpndItins itins_d> { - def PSZrr :AVX512PSI, - EVEX, EVEX_V512; - - let mayLoad = 1 in - def PSZrm : AVX512PSI, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; - - def PDZrr : AVX512PDI, - EVEX, EVEX_V512; - - let mayLoad = 1 in - def PDZrm : AVX512PDI, EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>; - - def PSZr_Int : AVX512PSI, - EVEX, EVEX_V512; - def PSZm_Int : AVX512PSI, EVEX, - EVEX_V512, EVEX_CD8<32, CD8VF>; - def PDZr_Int : AVX512PDI, - EVEX, EVEX_V512, VEX_W; - def PDZm_Int : AVX512PDI, - EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -} - -multiclass avx512_sqrt_scalar opc, string OpcodeStr, - Intrinsic F32Int, Intrinsic F64Int, - OpndItins itins_s, OpndItins itins_d> { - def SSZr : SI, XS, EVEX_4V; - def SSZr_Int : SIi8, XS, EVEX_4V; - let mayLoad = 1 in { - def SSZm : SI, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>; - def SSZm_Int : SIi8, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>; - } - def SDZr : SI, - XD, EVEX_4V, VEX_W; - def SDZr_Int : SIi8, XD, EVEX_4V, VEX_W; - let mayLoad = 1 in { - def SDZm : SI, - XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>; - def SDZm_Int : SIi8, - XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>; - } -} - - -defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt", - int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd, - SSE_SQRTSS, SSE_SQRTSD>, - avx512_sqrt_packed<0x51, "vsqrt", fsqrt, - int_x86_avx512_sqrt_ps_512, int_x86_avx512_sqrt_pd_512, - SSE_SQRTPS, SSE_SQRTPD>; - -def : Pat<(f32 (fsqrt FR32X:$src)), - (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>; -def : Pat<(f32 (fsqrt (load addr:$src))), - (VSQRTSSZm (f32 (IMPLICIT_DEF)), addr:$src)>, - Requires<[OptForSize]>; -def : Pat<(f64 (fsqrt FR64X:$src)), - (VSQRTSDZr (f64 (IMPLICIT_DEF)), FR64X:$src)>; -def : Pat<(f64 (fsqrt (load addr:$src))), - (VSQRTSDZm (f64 (IMPLICIT_DEF)), addr:$src)>, - Requires<[OptForSize]>; - -def : Pat<(f32 (X86frsqrt FR32X:$src)), - (VRSQRT14SSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>; -def : Pat<(f32 (X86frsqrt (load addr:$src))), - (VRSQRT14SSZm (f32 (IMPLICIT_DEF)), addr:$src)>, - Requires<[OptForSize]>; - -def : Pat<(f32 (X86frcp FR32X:$src)), - (VRCP14SSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>; -def : Pat<(f32 (X86frcp (load addr:$src))), - (VRCP14SSZm (f32 (IMPLICIT_DEF)), addr:$src)>, - Requires<[OptForSize]>; - -multiclass avx512_fp_unop_rm opcps, bits<8> opcpd, string OpcodeStr, - X86MemOperand x86memop, RegisterClass RC, - PatFrag mem_frag32, PatFrag mem_frag64, - Intrinsic V4F32Int, Intrinsic V2F64Int, - CD8VForm VForm> { -let ExeDomain = SSEPackedSingle in { - // Intrinsic operation, reg. - // Vector intrinsic operation, reg - def PSr : AVX512AIi8; - - // Vector intrinsic operation, mem - def PSm : AVX512AIi8, - EVEX_CD8<32, VForm>; -} // ExeDomain = SSEPackedSingle - -let ExeDomain = SSEPackedDouble in { - // Vector intrinsic operation, reg - def PDr : AVX512AIi8; - - // Vector intrinsic operation, mem - def PDm : AVX512AIi8, - EVEX_CD8<64, VForm>; -} // ExeDomain = SSEPackedDouble -} - -multiclass avx512_fp_binop_rm opcss, bits<8> opcsd, - string OpcodeStr, - Intrinsic F32Int, - Intrinsic F64Int> { -let ExeDomain = GenericDomain in { - // Operation, reg. - let hasSideEffects = 0 in - def SSr : AVX512AIi8; - - // Intrinsic operation, reg. - def SSr_Int : AVX512AIi8; - - // Intrinsic operation, mem. - def SSm : AVX512AIi8, - EVEX_CD8<32, CD8VT1>; - - // Operation, reg. - let hasSideEffects = 0 in - def SDr : AVX512AIi8, VEX_W; - - // Intrinsic operation, reg. - def SDr_Int : AVX512AIi8, - VEX_W; - - // Intrinsic operation, mem. - def SDm : AVX512AIi8, - VEX_W, EVEX_CD8<64, CD8VT1>; -} // ExeDomain = GenericDomain -} - -let Predicates = [HasAVX512] in { - defm VRNDSCALE : avx512_fp_binop_rm<0x0A, 0x0B, "vrndscale", - int_x86_avx512_rndscale_ss, - int_x86_avx512_rndscale_sd>, EVEX_4V; - - defm VRNDSCALEZ : avx512_fp_unop_rm<0x08, 0x09, "vrndscale", f256mem, VR512, - memopv16f32, memopv8f64, - int_x86_avx512_rndscale_ps_512, - int_x86_avx512_rndscale_pd_512, CD8VF>, - EVEX, EVEX_V512; -} - -def : Pat<(ffloor FR32X:$src), - (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x1))>; -def : Pat<(f64 (ffloor FR64X:$src)), - (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x1))>; -def : Pat<(f32 (fnearbyint FR32X:$src)), - (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0xC))>; -def : Pat<(f64 (fnearbyint FR64X:$src)), - (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0xC))>; -def : Pat<(f32 (fceil FR32X:$src)), - (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x2))>; -def : Pat<(f64 (fceil FR64X:$src)), - (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x2))>; -def : Pat<(f32 (frint FR32X:$src)), - (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x4))>; -def : Pat<(f64 (frint FR64X:$src)), - (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x4))>; -def : Pat<(f32 (ftrunc FR32X:$src)), - (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x3))>; -def : Pat<(f64 (ftrunc FR64X:$src)), - (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x3))>; - -def : Pat<(v16f32 (ffloor VR512:$src)), - (VRNDSCALEZPSr VR512:$src, (i32 0x1))>; -def : Pat<(v16f32 (fnearbyint VR512:$src)), - (VRNDSCALEZPSr VR512:$src, (i32 0xC))>; -def : Pat<(v16f32 (fceil VR512:$src)), - (VRNDSCALEZPSr VR512:$src, (i32 0x2))>; -def : Pat<(v16f32 (frint VR512:$src)), - (VRNDSCALEZPSr VR512:$src, (i32 0x4))>; -def : Pat<(v16f32 (ftrunc VR512:$src)), - (VRNDSCALEZPSr VR512:$src, (i32 0x3))>; - -def : Pat<(v8f64 (ffloor VR512:$src)), - (VRNDSCALEZPDr VR512:$src, (i32 0x1))>; -def : Pat<(v8f64 (fnearbyint VR512:$src)), - (VRNDSCALEZPDr VR512:$src, (i32 0xC))>; -def : Pat<(v8f64 (fceil VR512:$src)), - (VRNDSCALEZPDr VR512:$src, (i32 0x2))>; -def : Pat<(v8f64 (frint VR512:$src)), - (VRNDSCALEZPDr VR512:$src, (i32 0x4))>; -def : Pat<(v8f64 (ftrunc VR512:$src)), - (VRNDSCALEZPDr VR512:$src, (i32 0x3))>; - -//------------------------------------------------- -// Integer truncate and extend operations -//------------------------------------------------- - -multiclass avx512_trunc_sat opc, string OpcodeStr, - RegisterClass dstRC, RegisterClass srcRC, - RegisterClass KRC, X86MemOperand x86memop> { - def rr : AVX512XS8I, EVEX; - - def krr : AVX512XS8I, EVEX, EVEX_KZ; - - def mr : AVX512XS8I, EVEX; -} -defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; -defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; -defm VPMOVUSQB : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; -defm VPMOVQW : avx512_trunc_sat<0x34, "vpmovqw", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; -defm VPMOVSQW : avx512_trunc_sat<0x24, "vpmovsqw", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; -defm VPMOVUSQW : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; -defm VPMOVQD : avx512_trunc_sat<0x35, "vpmovqd", VR256X, VR512, VK8WM, - i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; -defm VPMOVSQD : avx512_trunc_sat<0x25, "vpmovsqd", VR256X, VR512, VK8WM, - i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; -defm VPMOVUSQD : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM, - i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; -defm VPMOVDW : avx512_trunc_sat<0x33, "vpmovdw", VR256X, VR512, VK16WM, - i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; -defm VPMOVSDW : avx512_trunc_sat<0x23, "vpmovsdw", VR256X, VR512, VK16WM, - i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; -defm VPMOVUSDW : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM, - i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; -defm VPMOVDB : avx512_trunc_sat<0x31, "vpmovdb", VR128X, VR512, VK16WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; -defm VPMOVSDB : avx512_trunc_sat<0x21, "vpmovsdb", VR128X, VR512, VK16WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; -defm VPMOVUSDB : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; - -def : Pat<(v16i8 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQBrr VR512:$src)>; -def : Pat<(v8i16 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQWrr VR512:$src)>; -def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr VR512:$src)>; -def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>; -def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>; - -def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))), - (VPMOVDBkrr VK16WM:$mask, VR512:$src)>; -def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))), - (VPMOVDWkrr VK16WM:$mask, VR512:$src)>; -def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))), - (VPMOVQWkrr VK8WM:$mask, VR512:$src)>; -def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))), - (VPMOVQDkrr VK8WM:$mask, VR512:$src)>; - - -multiclass avx512_extend opc, string OpcodeStr, RegisterClass DstRC, - RegisterClass SrcRC, SDNode OpNode, PatFrag mem_frag, - X86MemOperand x86memop, ValueType OpVT, ValueType InVT> { - - def rr : AVX5128I, EVEX; - def rm : AVX5128I, - EVEX; -} - -defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VR512, VR128X, X86vzext, - memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512, - EVEX_CD8<8, CD8VQ>; -defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VR512, VR128X, X86vzext, - memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512, - EVEX_CD8<8, CD8VO>; -defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VR512, VR256X, X86vzext, - memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512, - EVEX_CD8<16, CD8VH>; -defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VR512, VR128X, X86vzext, - memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512, - EVEX_CD8<16, CD8VQ>; -defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VR512, VR256X, X86vzext, - memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512, - EVEX_CD8<32, CD8VH>; - -defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VR512, VR128X, X86vsext, - memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512, - EVEX_CD8<8, CD8VQ>; -defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VR512, VR128X, X86vsext, - memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512, - EVEX_CD8<8, CD8VO>; -defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VR512, VR256X, X86vsext, - memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512, - EVEX_CD8<16, CD8VH>; -defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VR512, VR128X, X86vsext, - memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512, - EVEX_CD8<16, CD8VQ>; -defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VR512, VR256X, X86vsext, - memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512, - EVEX_CD8<32, CD8VH>; - -//===----------------------------------------------------------------------===// -// GATHER - SCATTER Operations - -multiclass avx512_gather opc, string OpcodeStr, RegisterClass KRC, - RegisterClass RC, X86MemOperand memop> { -let mayLoad = 1, - Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in - def rm : AVX5128I, EVEX, EVEX_K; -} -defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", VK8WM, VR512, vy64xmem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", VK16WM, VR512, vz32mem>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; - -defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", VK8WM, VR512, vz64mem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; - -defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512, vy64xmem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; - -defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", VK8WM, VR512, vz64mem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", VK8WM, VR256X, vz64mem>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; - -multiclass avx512_scatter opc, string OpcodeStr, RegisterClass KRC, - RegisterClass RC, X86MemOperand memop> { -let mayStore = 1, Constraints = "$mask = $mask_wb" in - def mr : AVX5128I, EVEX, EVEX_K; -} - -defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", VK8WM, VR512, vy64xmem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", VK16WM, VR512, vz32mem>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; - -defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", VK8WM, VR512, vz64mem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", VK8WM, VR256X, vz64mem>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; - -defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", VK8WM, VR512, vy64xmem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", VK16WM, VR512, vz32mem>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; - -defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", VK8WM, VR512, vz64mem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", VK8WM, VR256X, vz64mem>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; - -//===----------------------------------------------------------------------===// -// VSHUFPS - VSHUFPD Operations - -multiclass avx512_shufp { - def rmi : AVX512PIi8<0xC6, MRMSrcMem, (outs RC:$dst), - (ins RC:$src1, x86memop:$src2, i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2), - (i8 imm:$src3))))], d, IIC_SSE_SHUFP>, - EVEX_4V, TB, Sched<[WriteShuffleLd, ReadAfterLd]>; - def rri : AVX512PIi8<0xC6, MRMSrcReg, (outs RC:$dst), - (ins RC:$src1, RC:$src2, i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, - (i8 imm:$src3))))], d, IIC_SSE_SHUFP>, - EVEX_4V, TB, Sched<[WriteShuffle]>; -} - -defm VSHUFPSZ : avx512_shufp, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VSHUFPDZ : avx512_shufp, OpSize, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; - - -multiclass avx512_alignr { - def rri : AVX512AIi8<0x03, MRMSrcReg, (outs RC:$dst), - (ins RC:$src1, RC:$src2, i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, EVEX_4V; - def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs RC:$dst), - (ins RC:$src1, x86memop:$src2, i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, EVEX_4V; -} -defm VALIGND : avx512_alignr<"valignd", VR512, i512mem>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VALIGNQ : avx512_alignr<"valignq", VR512, i512mem>, - VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; - -def : Pat<(v16f32 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))), - (VALIGNDrri VR512:$src2, VR512:$src1, imm:$imm)>; -def : Pat<(v8f64 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))), - (VALIGNQrri VR512:$src2, VR512:$src1, imm:$imm)>; -def : Pat<(v16i32 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))), - (VALIGNDrri VR512:$src2, VR512:$src1, imm:$imm)>; -def : Pat<(v8i64 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))), - (VALIGNQrri VR512:$src2, VR512:$src1, imm:$imm)>; - -multiclass avx512_vpabs opc, string OpcodeStr, RegisterClass RC, - X86MemOperand x86memop> { - def rr : AVX5128I, - EVEX; - def rm : AVX5128I, - EVEX; -} - -defm VPABSD : avx512_vpabs<0x1E, "vpabsd", VR512, i512mem>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPABSQ : avx512_vpabs<0x1F, "vpabsq", VR512, i512mem>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - +// Bitcasts between 512-bit vector types. Return the original type since +// no instruction is needed for the conversion +let Predicates = [HasAVX512] in { + def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>; + def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>; + def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>; + def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>; + def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>; + def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>; + def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>; + def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>; + def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>; + def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>; + def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>; + def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>; + def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>; + + def : Pat<(v2i64 (bitconvert (v4i32 VR128X:$src))), (v2i64 VR128X:$src)>; + def : Pat<(v2i64 (bitconvert (v8i16 VR128X:$src))), (v2i64 VR128X:$src)>; + def : Pat<(v2i64 (bitconvert (v16i8 VR128X:$src))), (v2i64 VR128X:$src)>; + def : Pat<(v2i64 (bitconvert (v2f64 VR128X:$src))), (v2i64 VR128X:$src)>; + def : Pat<(v2i64 (bitconvert (v4f32 VR128X:$src))), (v2i64 VR128X:$src)>; + def : Pat<(v4i32 (bitconvert (v2i64 VR128X:$src))), (v4i32 VR128X:$src)>; + def : Pat<(v4i32 (bitconvert (v8i16 VR128X:$src))), (v4i32 VR128X:$src)>; + def : Pat<(v4i32 (bitconvert (v16i8 VR128X:$src))), (v4i32 VR128X:$src)>; + def : Pat<(v4i32 (bitconvert (v2f64 VR128X:$src))), (v4i32 VR128X:$src)>; + def : Pat<(v4i32 (bitconvert (v4f32 VR128X:$src))), (v4i32 VR128X:$src)>; + def : Pat<(v8i16 (bitconvert (v2i64 VR128X:$src))), (v8i16 VR128X:$src)>; + def : Pat<(v8i16 (bitconvert (v4i32 VR128X:$src))), (v8i16 VR128X:$src)>; + def : Pat<(v8i16 (bitconvert (v16i8 VR128X:$src))), (v8i16 VR128X:$src)>; + def : Pat<(v8i16 (bitconvert (v2f64 VR128X:$src))), (v8i16 VR128X:$src)>; + def : Pat<(v8i16 (bitconvert (v4f32 VR128X:$src))), (v8i16 VR128X:$src)>; + def : Pat<(v16i8 (bitconvert (v2i64 VR128X:$src))), (v16i8 VR128X:$src)>; + def : Pat<(v16i8 (bitconvert (v4i32 VR128X:$src))), (v16i8 VR128X:$src)>; + def : Pat<(v16i8 (bitconvert (v8i16 VR128X:$src))), (v16i8 VR128X:$src)>; + def : Pat<(v16i8 (bitconvert (v2f64 VR128X:$src))), (v16i8 VR128X:$src)>; + def : Pat<(v16i8 (bitconvert (v4f32 VR128X:$src))), (v16i8 VR128X:$src)>; + def : Pat<(v4f32 (bitconvert (v2i64 VR128X:$src))), (v4f32 VR128X:$src)>; + def : Pat<(v4f32 (bitconvert (v4i32 VR128X:$src))), (v4f32 VR128X:$src)>; + def : Pat<(v4f32 (bitconvert (v8i16 VR128X:$src))), (v4f32 VR128X:$src)>; + def : Pat<(v4f32 (bitconvert (v16i8 VR128X:$src))), (v4f32 VR128X:$src)>; + def : Pat<(v4f32 (bitconvert (v2f64 VR128X:$src))), (v4f32 VR128X:$src)>; + def : Pat<(v2f64 (bitconvert (v2i64 VR128X:$src))), (v2f64 VR128X:$src)>; + def : Pat<(v2f64 (bitconvert (v4i32 VR128X:$src))), (v2f64 VR128X:$src)>; + def : Pat<(v2f64 (bitconvert (v8i16 VR128X:$src))), (v2f64 VR128X:$src)>; + def : Pat<(v2f64 (bitconvert (v16i8 VR128X:$src))), (v2f64 VR128X:$src)>; + def : Pat<(v2f64 (bitconvert (v4f32 VR128X:$src))), (v2f64 VR128X:$src)>; + +// Bitcasts between 256-bit vector types. Return the original type since +// no instruction is needed for the conversion + def : Pat<(v4f64 (bitconvert (v8f32 VR256X:$src))), (v4f64 VR256X:$src)>; + def : Pat<(v4f64 (bitconvert (v8i32 VR256X:$src))), (v4f64 VR256X:$src)>; + def : Pat<(v4f64 (bitconvert (v4i64 VR256X:$src))), (v4f64 VR256X:$src)>; + def : Pat<(v4f64 (bitconvert (v16i16 VR256X:$src))), (v4f64 VR256X:$src)>; + def : Pat<(v4f64 (bitconvert (v32i8 VR256X:$src))), (v4f64 VR256X:$src)>; + def : Pat<(v8f32 (bitconvert (v8i32 VR256X:$src))), (v8f32 VR256X:$src)>; + def : Pat<(v8f32 (bitconvert (v4i64 VR256X:$src))), (v8f32 VR256X:$src)>; + def : Pat<(v8f32 (bitconvert (v4f64 VR256X:$src))), (v8f32 VR256X:$src)>; + def : Pat<(v8f32 (bitconvert (v32i8 VR256X:$src))), (v8f32 VR256X:$src)>; + def : Pat<(v8f32 (bitconvert (v16i16 VR256X:$src))), (v8f32 VR256X:$src)>; + def : Pat<(v4i64 (bitconvert (v8f32 VR256X:$src))), (v4i64 VR256X:$src)>; + def : Pat<(v4i64 (bitconvert (v8i32 VR256X:$src))), (v4i64 VR256X:$src)>; + def : Pat<(v4i64 (bitconvert (v4f64 VR256X:$src))), (v4i64 VR256X:$src)>; + def : Pat<(v4i64 (bitconvert (v32i8 VR256X:$src))), (v4i64 VR256X:$src)>; + def : Pat<(v4i64 (bitconvert (v16i16 VR256X:$src))), (v4i64 VR256X:$src)>; + def : Pat<(v32i8 (bitconvert (v4f64 VR256X:$src))), (v32i8 VR256X:$src)>; + def : Pat<(v32i8 (bitconvert (v4i64 VR256X:$src))), (v32i8 VR256X:$src)>; + def : Pat<(v32i8 (bitconvert (v8f32 VR256X:$src))), (v32i8 VR256X:$src)>; + def : Pat<(v32i8 (bitconvert (v8i32 VR256X:$src))), (v32i8 VR256X:$src)>; + def : Pat<(v32i8 (bitconvert (v16i16 VR256X:$src))), (v32i8 VR256X:$src)>; + def : Pat<(v8i32 (bitconvert (v32i8 VR256X:$src))), (v8i32 VR256X:$src)>; + def : Pat<(v8i32 (bitconvert (v16i16 VR256X:$src))), (v8i32 VR256X:$src)>; + def : Pat<(v8i32 (bitconvert (v8f32 VR256X:$src))), (v8i32 VR256X:$src)>; + def : Pat<(v8i32 (bitconvert (v4i64 VR256X:$src))), (v8i32 VR256X:$src)>; + def : Pat<(v8i32 (bitconvert (v4f64 VR256X:$src))), (v8i32 VR256X:$src)>; + def : Pat<(v16i16 (bitconvert (v8f32 VR256X:$src))), (v16i16 VR256X:$src)>; + def : Pat<(v16i16 (bitconvert (v8i32 VR256X:$src))), (v16i16 VR256X:$src)>; + def : Pat<(v16i16 (bitconvert (v4i64 VR256X:$src))), (v16i16 VR256X:$src)>; + def : Pat<(v16i16 (bitconvert (v4f64 VR256X:$src))), (v16i16 VR256X:$src)>; + def : Pat<(v16i16 (bitconvert (v32i8 VR256X:$src))), (v16i16 VR256X:$src)>; +} + +// +// AVX-512: VPXOR instruction writes zero to its upper part, it's safe build zeros. +// + +let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, + isPseudo = 1, Predicates = [HasAVX512] in { +def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "", + [(set VR512:$dst, (v16f32 immAllZerosV))]>; +} + +def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>; +def : Pat<(v16i32 immAllZerosV), (AVX512_512_SET0)>; +def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>; +def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>; + +//===----------------------------------------------------------------------===// +// AVX-512 - VECTOR INSERT +// +// -- 32x8 form -- +let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in { +def VINSERTF32x4rr : AVX512AIi8<0x18, MRMSrcReg, (outs VR512:$dst), + (ins VR512:$src1, VR128X:$src2, i8imm:$src3), + "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + []>, EVEX_4V, EVEX_V512; +let mayLoad = 1 in +def VINSERTF32x4rm : AVX512AIi8<0x18, MRMSrcMem, (outs VR512:$dst), + (ins VR512:$src1, f128mem:$src2, i8imm:$src3), + "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>; +} + +// -- 64x4 fp form -- +let neverHasSideEffects = 1, ExeDomain = SSEPackedDouble in { +def VINSERTF64x4rr : AVX512AIi8<0x1a, MRMSrcReg, (outs VR512:$dst), + (ins VR512:$src1, VR256X:$src2, i8imm:$src3), + "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + []>, EVEX_4V, EVEX_V512, VEX_W; +let mayLoad = 1 in +def VINSERTF64x4rm : AVX512AIi8<0x1a, MRMSrcMem, (outs VR512:$dst), + (ins VR512:$src1, i256mem:$src2, i8imm:$src3), + "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>; +} +// -- 32x4 integer form -- +let neverHasSideEffects = 1 in { +def VINSERTI32x4rr : AVX512AIi8<0x38, MRMSrcReg, (outs VR512:$dst), + (ins VR512:$src1, VR128X:$src2, i8imm:$src3), + "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + []>, EVEX_4V, EVEX_V512; +let mayLoad = 1 in +def VINSERTI32x4rm : AVX512AIi8<0x38, MRMSrcMem, (outs VR512:$dst), + (ins VR512:$src1, i128mem:$src2, i8imm:$src3), + "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>; + +} + +let neverHasSideEffects = 1 in { +// -- 64x4 form -- +def VINSERTI64x4rr : AVX512AIi8<0x3a, MRMSrcReg, (outs VR512:$dst), + (ins VR512:$src1, VR256X:$src2, i8imm:$src3), + "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + []>, EVEX_4V, EVEX_V512, VEX_W; +let mayLoad = 1 in +def VINSERTI64x4rm : AVX512AIi8<0x3a, MRMSrcMem, (outs VR512:$dst), + (ins VR512:$src1, i256mem:$src2, i8imm:$src3), + "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>; +} + +def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (v4f32 VR128X:$src2), + (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2, + (INSERT_get_vinsert128_imm VR512:$ins))>; +def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (v2f64 VR128X:$src2), + (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2, + (INSERT_get_vinsert128_imm VR512:$ins))>; +def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v2i64 VR128X:$src2), + (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2, + (INSERT_get_vinsert128_imm VR512:$ins))>; +def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v4i32 VR128X:$src2), + (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2, + (INSERT_get_vinsert128_imm VR512:$ins))>; + +def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (loadv4f32 addr:$src2), + (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2, + (INSERT_get_vinsert128_imm VR512:$ins))>; +def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), + (bc_v4i32 (loadv2i64 addr:$src2)), + (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2, + (INSERT_get_vinsert128_imm VR512:$ins))>; +def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (loadv2f64 addr:$src2), + (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2, + (INSERT_get_vinsert128_imm VR512:$ins))>; +def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (loadv2i64 addr:$src2), + (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2, + (INSERT_get_vinsert128_imm VR512:$ins))>; + +def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (v8f32 VR256X:$src2), + (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2, + (INSERT_get_vinsert256_imm VR512:$ins))>; +def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (v4f64 VR256X:$src2), + (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2, + (INSERT_get_vinsert256_imm VR512:$ins))>; +def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v4i64 VR256X:$src2), + (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2, + (INSERT_get_vinsert256_imm VR512:$ins))>; +def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v8i32 VR256X:$src2), + (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2, + (INSERT_get_vinsert256_imm VR512:$ins))>; + +def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (loadv8f32 addr:$src2), + (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2, + (INSERT_get_vinsert256_imm VR512:$ins))>; +def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (loadv4f64 addr:$src2), + (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2, + (INSERT_get_vinsert256_imm VR512:$ins))>; +def : Pat<(vinsert256_insert:$ins (v8i64 VR512:$src1), (loadv4i64 addr:$src2), + (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2, + (INSERT_get_vinsert256_imm VR512:$ins))>; +def : Pat<(vinsert256_insert:$ins (v16i32 VR512:$src1), + (bc_v8i32 (loadv4i64 addr:$src2)), + (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2, + (INSERT_get_vinsert256_imm VR512:$ins))>; + +// vinsertps - insert f32 to XMM +def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), + (ins VR128X:$src1, VR128X:$src2, u32u8imm:$src3), + "vinsertps{z}\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + [(set VR128X:$dst, (X86insrtps VR128X:$src1, VR128X:$src2, imm:$src3))]>, + EVEX_4V; +def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), + (ins VR128X:$src1, f32mem:$src2, u32u8imm:$src3), + "vinsertps{z}\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + [(set VR128X:$dst, (X86insrtps VR128X:$src1, + (v4f32 (scalar_to_vector (loadf32 addr:$src2))), + imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>; + +//===----------------------------------------------------------------------===// +// AVX-512 VECTOR EXTRACT +//--- +let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in { +// -- 32x4 form -- +def VEXTRACTF32x4rr : AVX512AIi8<0x19, MRMDestReg, (outs VR128X:$dst), + (ins VR512:$src1, i8imm:$src2), + "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX, EVEX_V512; +def VEXTRACTF32x4mr : AVX512AIi8<0x19, MRMDestMem, (outs), + (ins f128mem:$dst, VR512:$src1, i8imm:$src2), + "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>; + +// -- 64x4 form -- +def VEXTRACTF64x4rr : AVX512AIi8<0x1b, MRMDestReg, (outs VR256X:$dst), + (ins VR512:$src1, i8imm:$src2), + "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX, EVEX_V512, VEX_W; +let mayStore = 1 in +def VEXTRACTF64x4mr : AVX512AIi8<0x1b, MRMDestMem, (outs), + (ins f256mem:$dst, VR512:$src1, i8imm:$src2), + "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>; +} + +let neverHasSideEffects = 1 in { +// -- 32x4 form -- +def VEXTRACTI32x4rr : AVX512AIi8<0x39, MRMDestReg, (outs VR128X:$dst), + (ins VR512:$src1, i8imm:$src2), + "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX, EVEX_V512; +def VEXTRACTI32x4mr : AVX512AIi8<0x39, MRMDestMem, (outs), + (ins i128mem:$dst, VR512:$src1, i8imm:$src2), + "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>; + +// -- 64x4 form -- +def VEXTRACTI64x4rr : AVX512AIi8<0x3b, MRMDestReg, (outs VR256X:$dst), + (ins VR512:$src1, i8imm:$src2), + "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX, EVEX_V512, VEX_W; +let mayStore = 1 in +def VEXTRACTI64x4mr : AVX512AIi8<0x3b, MRMDestMem, (outs), + (ins i256mem:$dst, VR512:$src1, i8imm:$src2), + "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>; +} + +def : Pat<(vextract128_extract:$ext (v16f32 VR512:$src1), (iPTR imm)), + (v4f32 (VEXTRACTF32x4rr VR512:$src1, + (EXTRACT_get_vextract128_imm VR128X:$ext)))>; + +def : Pat<(vextract128_extract:$ext VR512:$src1, (iPTR imm)), + (v4i32 (VEXTRACTF32x4rr VR512:$src1, + (EXTRACT_get_vextract128_imm VR128X:$ext)))>; + +def : Pat<(vextract128_extract:$ext (v8f64 VR512:$src1), (iPTR imm)), + (v2f64 (VEXTRACTF32x4rr VR512:$src1, + (EXTRACT_get_vextract128_imm VR128X:$ext)))>; + +def : Pat<(vextract128_extract:$ext (v8i64 VR512:$src1), (iPTR imm)), + (v2i64 (VEXTRACTI32x4rr VR512:$src1, + (EXTRACT_get_vextract128_imm VR128X:$ext)))>; + + +def : Pat<(vextract256_extract:$ext (v16f32 VR512:$src1), (iPTR imm)), + (v8f32 (VEXTRACTF64x4rr VR512:$src1, + (EXTRACT_get_vextract256_imm VR256X:$ext)))>; + +def : Pat<(vextract256_extract:$ext (v16i32 VR512:$src1), (iPTR imm)), + (v8i32 (VEXTRACTI64x4rr VR512:$src1, + (EXTRACT_get_vextract256_imm VR256X:$ext)))>; + +def : Pat<(vextract256_extract:$ext (v8f64 VR512:$src1), (iPTR imm)), + (v4f64 (VEXTRACTF64x4rr VR512:$src1, + (EXTRACT_get_vextract256_imm VR256X:$ext)))>; + +def : Pat<(vextract256_extract:$ext (v8i64 VR512:$src1), (iPTR imm)), + (v4i64 (VEXTRACTI64x4rr VR512:$src1, + (EXTRACT_get_vextract256_imm VR256X:$ext)))>; + +// A 256-bit subvector extract from the first 512-bit vector position +// is a subregister copy that needs no instruction. +def : Pat<(v8i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))), + (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm))>; +def : Pat<(v8f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))), + (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm))>; +def : Pat<(v4i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))), + (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm))>; +def : Pat<(v4f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))), + (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm))>; + +// zmm -> xmm +def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))), + (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>; +def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))), + (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>; +def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))), + (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>; +def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))), + (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>; + + +// A 128-bit subvector insert to the first 512-bit vector position +// is a subregister copy that needs no instruction. +def : Pat<(insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0)), + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), + (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), + sub_ymm)>; +def : Pat<(insert_subvector undef, (v2f64 VR128X:$src), (iPTR 0)), + (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), + (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), + sub_ymm)>; +def : Pat<(insert_subvector undef, (v4i32 VR128X:$src), (iPTR 0)), + (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), + (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), + sub_ymm)>; +def : Pat<(insert_subvector undef, (v4f32 VR128X:$src), (iPTR 0)), + (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), + (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), + sub_ymm)>; + +def : Pat<(insert_subvector undef, (v4i64 VR256X:$src), (iPTR 0)), + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>; +def : Pat<(insert_subvector undef, (v4f64 VR256X:$src), (iPTR 0)), + (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>; +def : Pat<(insert_subvector undef, (v8i32 VR256X:$src), (iPTR 0)), + (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>; +def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)), + (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>; + +// vextractps - extract 32 bits from XMM +def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst), + (ins VR128X:$src1, u32u8imm:$src2), + "vextractps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, + EVEX; + +def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs), + (ins f32mem:$dst, VR128X:$src1, u32u8imm:$src2), + "vextractps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2), + addr:$dst)]>, EVEX; + +//===---------------------------------------------------------------------===// +// AVX-512 BROADCAST +//--- +multiclass avx512_fp_broadcast opc, string OpcodeStr, + RegisterClass DestRC, + RegisterClass SrcRC, X86MemOperand x86memop> { + def rr : AVX5128I, EVEX; + def rm : AVX5128I, EVEX; +} +let ExeDomain = SSEPackedSingle in { + defm VBROADCASTSSZ : avx512_fp_broadcast<0x18, "vbroadcastss{z}", VR512, + VR128X, f32mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; +} + +let ExeDomain = SSEPackedDouble in { + defm VBROADCASTSDZ : avx512_fp_broadcast<0x19, "vbroadcastsd{z}", VR512, + VR128X, f64mem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +} + +def : Pat<(v16f32 (X86VBroadcast (loadf32 addr:$src))), + (VBROADCASTSSZrm addr:$src)>; +def : Pat<(v8f64 (X86VBroadcast (loadf64 addr:$src))), + (VBROADCASTSDZrm addr:$src)>; + +multiclass avx512_int_broadcast_reg opc, string OpcodeStr, + RegisterClass SrcRC, RegisterClass KRC> { + def Zrr : AVX5128I, EVEX, EVEX_V512; + def Zkrr : AVX5128I, EVEX, EVEX_V512, EVEX_KZ; +} + +defm VPBROADCASTDr : avx512_int_broadcast_reg<0x7C, "vpbroadcastd", GR32, VK16WM>; +defm VPBROADCASTQr : avx512_int_broadcast_reg<0x7C, "vpbroadcastq", GR64, VK8WM>, + VEX_W; + +def : Pat <(v16i32 (X86vzext VK16WM:$mask)), + (VPBROADCASTDrZkrr VK16WM:$mask, (i32 (MOV32ri 0x1)))>; + +def : Pat <(v8i64 (X86vzext VK8WM:$mask)), + (VPBROADCASTQrZkrr VK8WM:$mask, (i64 (MOV64ri 0x1)))>; + +def : Pat<(v16i32 (X86VBroadcast (i32 GR32:$src))), + (VPBROADCASTDrZrr GR32:$src)>; +def : Pat<(v8i64 (X86VBroadcast (i64 GR64:$src))), + (VPBROADCASTQrZrr GR64:$src)>; + +multiclass avx512_int_broadcast_rm opc, string OpcodeStr, + X86MemOperand x86memop, PatFrag ld_frag, + RegisterClass DstRC, ValueType OpVT, ValueType SrcVT, + RegisterClass KRC> { + def rr : AVX5128I, EVEX; + def krr : AVX5128I, + EVEX, EVEX_KZ; + def rm : AVX5128I, EVEX; + def krm : AVX5128I, EVEX, EVEX_KZ; +} + +defm VPBROADCASTDZ : avx512_int_broadcast_rm<0x58, "vpbroadcastd", i32mem, + loadi32, VR512, v16i32, v4i32, VK16WM>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; +defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem, + loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VT1>; + +def : Pat<(v16f32 (X86VBroadcast (v4f32 VR128X:$src))), + (VBROADCASTSSZrr VR128X:$src)>; +def : Pat<(v8f64 (X86VBroadcast (v2f64 VR128X:$src))), + (VBROADCASTSDZrr VR128X:$src)>; + +// Provide fallback in case the load node that is used in the patterns above +// is used by additional users, which prevents the pattern selection. +def : Pat<(v16f32 (X86VBroadcast FR32X:$src)), + (VBROADCASTSSZrr (COPY_TO_REGCLASS FR32X:$src, VR128X))>; +def : Pat<(v8f64 (X86VBroadcast FR64X:$src)), + (VBROADCASTSDZrr (COPY_TO_REGCLASS FR64X:$src, VR128X))>; + + +let Predicates = [HasAVX512] in { +def : Pat<(v8i32 (X86VBroadcastm (v8i1 VK8WM:$mask), (loadi32 addr:$src))), + (EXTRACT_SUBREG + (v16i32 (VPBROADCASTDZkrm (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), + addr:$src)), sub_ymm)>; +} +//===----------------------------------------------------------------------===// +// AVX-512 BROADCAST MASK TO VECTOR REGISTER +//--- + +multiclass avx512_mask_broadcast opc, string OpcodeStr, + RegisterClass DstRC, RegisterClass KRC, + ValueType OpVT, ValueType SrcVT> { +def rr : AVX512XS8I, EVEX; +} + +defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", VR512, + VK16, v16i32, v16i1>, EVEX_V512; +defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", VR512, + VK8, v8i64, v8i1>, EVEX_V512, VEX_W; + +//===----------------------------------------------------------------------===// +// AVX-512 - VPERM +// +// -- immediate form -- +multiclass avx512_perm_imm opc, string OpcodeStr, RegisterClass RC, + SDNode OpNode, PatFrag mem_frag, + X86MemOperand x86memop, ValueType OpVT> { + def ri : AVX512AIi8, + EVEX; + def mi : AVX512AIi8, EVEX; +} + +defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", VR512, X86VPermi, memopv8i64, + i512mem, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +let ExeDomain = SSEPackedDouble in +defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", VR512, X86VPermi, memopv8f64, + f512mem, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + +// -- VPERM - register form -- +multiclass avx512_perm opc, string OpcodeStr, RegisterClass RC, + PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> { + + def rr : AVX5128I, EVEX_4V; + + def rm : AVX5128I, + EVEX_4V; +} + +defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, memopv16i32, i512mem, + v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem, + v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +let ExeDomain = SSEPackedSingle in +defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, memopv16f32, f512mem, + v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; +let ExeDomain = SSEPackedDouble in +defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem, + v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + +// -- VPERM2I - 3 source operands form -- +multiclass avx512_perm_3src opc, string OpcodeStr, RegisterClass RC, + PatFrag mem_frag, X86MemOperand x86memop, + ValueType OpVT> { +let Constraints = "$src1 = $dst" in { + def rr : AVX5128I, + EVEX_4V; + + def rm : AVX5128I, EVEX_4V; + } +} +defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32, i512mem, + v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64, i512mem, + v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32, i512mem, + v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64, i512mem, + v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + +//===----------------------------------------------------------------------===// +// AVX-512 - BLEND using mask +// +multiclass avx512_blendmask opc, string OpcodeStr, + RegisterClass KRC, RegisterClass RC, + X86MemOperand x86memop, PatFrag mem_frag, + SDNode OpNode, ValueType vt> { + def rr : AVX5128I, EVEX_4V, EVEX_K; + + def rm : AVX5128I, + EVEX_4V, EVEX_K; +} + +let ExeDomain = SSEPackedSingle in +defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps", VK16WM, VR512, f512mem, + memopv16f32, vselect, v16f32>, + EVEX_CD8<32, CD8VF>, EVEX_V512; +let ExeDomain = SSEPackedDouble in +defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd", VK8WM, VR512, f512mem, + memopv8f64, vselect, v8f64>, + VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512; + +defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd", VK16WM, VR512, f512mem, + memopv8i64, vselect, v16i32>, + EVEX_CD8<32, CD8VF>, EVEX_V512; + +defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq", VK8WM, VR512, f512mem, + memopv8i64, vselect, v8i64>, VEX_W, + EVEX_CD8<64, CD8VF>, EVEX_V512; + + +let Predicates = [HasAVX512] in { +def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1), + (v8f32 VR256X:$src2))), + (EXTRACT_SUBREG + (v16f32 (VBLENDMPSZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), + (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), + (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; + +def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1), + (v8i32 VR256X:$src2))), + (EXTRACT_SUBREG + (v16i32 (VPBLENDMDZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), + (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), + (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; +} + +multiclass avx512_icmp_packed opc, string OpcodeStr, RegisterClass KRC, + RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag, + SDNode OpNode, ValueType vt> { + def rr : AVX512BI, EVEX_4V; + def rm : AVX512BI, EVEX_4V; +} + +defm VPCMPEQDZ : avx512_icmp_packed<0x76, "vpcmpeqd", VK16, VR512, i512mem, + memopv16i32, X86pcmpeqm, v16i32>, EVEX_V512; +defm VPCMPEQQZ : avx512_icmp_packed<0x29, "vpcmpeqq", VK8, VR512, i512mem, + memopv8i64, X86pcmpeqm, v8i64>, T8, EVEX_V512, VEX_W; + +defm VPCMPGTDZ : avx512_icmp_packed<0x66, "vpcmpgtd", VK16, VR512, i512mem, + memopv16i32, X86pcmpgtm, v16i32>, EVEX_V512; +defm VPCMPGTQZ : avx512_icmp_packed<0x37, "vpcmpgtq", VK8, VR512, i512mem, + memopv8i64, X86pcmpgtm, v8i64>, T8, EVEX_V512, VEX_W; + +def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), + (COPY_TO_REGCLASS (VPCMPGTDZrr + (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), + (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>; + +def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), + (COPY_TO_REGCLASS (VPCMPEQDZrr + (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), + (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>; + +multiclass avx512_icmp_cc opc, RegisterClass KRC, + RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag, + SDNode OpNode, ValueType vt, Operand CC, string asm, + string asm_alt> { + def rri : AVX512AIi8, EVEX_4V; + def rmi : AVX512AIi8, EVEX_4V; + // Accept explicit immediate argument form instead of comparison code. + let neverHasSideEffects = 1 in { + def rri_alt : AVX512AIi8, EVEX_4V; + def rmi_alt : AVX512AIi8, EVEX_4V; + } +} + +defm VPCMPDZ : avx512_icmp_cc<0x1F, VK16, VR512, i512mem, memopv16i32, + X86cmpm, v16i32, AVXCC, + "vpcmp${cc}d\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "vpcmpd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPCMPUDZ : avx512_icmp_cc<0x1E, VK16, VR512, i512mem, memopv16i32, + X86cmpmu, v16i32, AVXCC, + "vpcmp${cc}ud\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "vpcmpud\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, + EVEX_V512, EVEX_CD8<32, CD8VF>; + +defm VPCMPQZ : avx512_icmp_cc<0x1F, VK8, VR512, i512mem, memopv8i64, + X86cmpm, v8i64, AVXCC, + "vpcmp${cc}q\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "vpcmpq\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, + VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; +defm VPCMPUQZ : avx512_icmp_cc<0x1E, VK8, VR512, i512mem, memopv8i64, + X86cmpmu, v8i64, AVXCC, + "vpcmp${cc}uq\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "vpcmpuq\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, + VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; + +// avx512_cmp_packed - sse 1 & 2 compare packed instructions +multiclass avx512_cmp_packed { + def rri : AVX512PIi8<0xC2, MRMSrcReg, + (outs KRC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, + [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>; + def rmi : AVX512PIi8<0xC2, MRMSrcMem, + (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, + [(set KRC:$dst, + (OpNode (vt RC:$src1), (memop addr:$src2), imm:$cc))], d>; + + // Accept explicit immediate argument form instead of comparison code. + let neverHasSideEffects = 1 in { + def rri_alt : PIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), + asm_alt, [], IIC_SSE_CMPP_RR, d>; + def rmi_alt : PIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), + asm_alt, [], IIC_SSE_CMPP_RM, d>; + } +} + +defm VCMPPSZ : avx512_cmp_packed, TB, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VCMPPDZ : avx512_cmp_packed, TB, OpSize, EVEX_4V, VEX_W, EVEX_V512, + EVEX_CD8<64, CD8VF>; + +def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)), + (COPY_TO_REGCLASS (VCMPPSZrri + (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), + (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), + imm:$cc), VK8)>; +def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)), + (COPY_TO_REGCLASS (VPCMPDZrri + (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), + (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), + imm:$cc), VK8)>; +def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)), + (COPY_TO_REGCLASS (VPCMPUDZrri + (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), + (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), + imm:$cc), VK8)>; + +// Mask register copy, including +// - copy between mask registers +// - load/store mask registers +// - copy from GPR to mask register and vice versa +// +multiclass avx512_mask_mov opc_kk, bits<8> opc_km, bits<8> opc_mk, + string OpcodeStr, RegisterClass KRC, + ValueType vt, X86MemOperand x86memop> { + let neverHasSideEffects = 1 in { + def kk : I; + let mayLoad = 1 in + def km : I; + let mayStore = 1 in + def mk : I; + } +} + +multiclass avx512_mask_mov_gpr opc_kr, bits<8> opc_rk, + string OpcodeStr, + RegisterClass KRC, RegisterClass GRC> { + let neverHasSideEffects = 1 in { + def kr : I; + def rk : I; + } +} + +let Predicates = [HasAVX512] in { + defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>, + VEX, TB; + defm KMOVW : avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>, + VEX, TB; +} + +let Predicates = [HasAVX512] in { + // GR16 from/to 16-bit mask + def : Pat<(v16i1 (bitconvert (i16 GR16:$src))), + (KMOVWkr (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit))>; + def : Pat<(i16 (bitconvert (v16i1 VK16:$src))), + (EXTRACT_SUBREG (KMOVWrk VK16:$src), sub_16bit)>; + + // Store kreg in memory + def : Pat<(store (v16i1 VK16:$src), addr:$dst), + (KMOVWmk addr:$dst, VK16:$src)>; + + def : Pat<(store (v8i1 VK8:$src), addr:$dst), + (KMOVWmk addr:$dst, (v16i1 (COPY_TO_REGCLASS VK8:$src, VK16)))>; +} +// With AVX-512 only, 8-bit mask is promoted to 16-bit mask. +let Predicates = [HasAVX512] in { + // GR from/to 8-bit mask without native support + def : Pat<(v8i1 (bitconvert (i8 GR8:$src))), + (COPY_TO_REGCLASS + (KMOVWkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)), + VK8)>; + def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), + (EXTRACT_SUBREG + (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)), + sub_8bit)>; +} + +// Mask unary operation +// - KNOT +multiclass avx512_mask_unop opc, string OpcodeStr, + RegisterClass KRC, SDPatternOperator OpNode> { + let Predicates = [HasAVX512] in + def rr : I; +} + +multiclass avx512_mask_unop_w opc, string OpcodeStr, + SDPatternOperator OpNode> { + defm W : avx512_mask_unop, + VEX, TB; +} + +defm KNOT : avx512_mask_unop_w<0x44, "knot", not>; + +def : Pat<(xor VK16:$src1, (v16i1 immAllOnesV)), (KNOTWrr VK16:$src1)>; +def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)), + (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>; + +// With AVX-512, 8-bit mask is promoted to 16-bit mask. +def : Pat<(not VK8:$src), + (COPY_TO_REGCLASS + (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>; + +// Mask binary operation +// - KADD, KAND, KANDN, KOR, KXNOR, KXOR +multiclass avx512_mask_binop opc, string OpcodeStr, + RegisterClass KRC, SDPatternOperator OpNode> { + let Predicates = [HasAVX512] in + def rr : I; +} + +multiclass avx512_mask_binop_w opc, string OpcodeStr, + SDPatternOperator OpNode> { + defm W : avx512_mask_binop, + VEX_4V, VEX_L, TB; +} + +def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>; +def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>; + +let isCommutable = 1 in { + defm KADD : avx512_mask_binop_w<0x4a, "kadd", add>; + defm KAND : avx512_mask_binop_w<0x41, "kand", and>; + let isCommutable = 0 in + defm KANDN : avx512_mask_binop_w<0x42, "kandn", andn>; + defm KOR : avx512_mask_binop_w<0x45, "kor", or>; + defm KXNOR : avx512_mask_binop_w<0x46, "kxnor", xnor>; + defm KXOR : avx512_mask_binop_w<0x47, "kxor", xor>; +} + +multiclass avx512_mask_binop_int { + let Predicates = [HasAVX512] in + def : Pat<(!cast("int_x86_"##IntName##"_v16i1") + VK16:$src1, VK16:$src2), + (!cast(InstName##"Wrr") VK16:$src1, VK16:$src2)>; +} + +defm : avx512_mask_binop_int<"kadd", "KADD">; +defm : avx512_mask_binop_int<"kand", "KAND">; +defm : avx512_mask_binop_int<"kandn", "KANDN">; +defm : avx512_mask_binop_int<"kor", "KOR">; +defm : avx512_mask_binop_int<"kxnor", "KXNOR">; +defm : avx512_mask_binop_int<"kxor", "KXOR">; +// With AVX-512, 8-bit mask is promoted to 16-bit mask. +multiclass avx512_binop_pat { + let Predicates = [HasAVX512] in + def : Pat<(OpNode VK8:$src1, VK8:$src2), + (COPY_TO_REGCLASS + (Inst (COPY_TO_REGCLASS VK8:$src1, VK16), + (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>; +} + +defm : avx512_binop_pat; +defm : avx512_binop_pat; +defm : avx512_binop_pat; +defm : avx512_binop_pat; +defm : avx512_binop_pat; + +// Mask unpacking +multiclass avx512_mask_unpck opc, string OpcodeStr, + RegisterClass KRC1, RegisterClass KRC2> { + let Predicates = [HasAVX512] in + def rr : I; +} + +multiclass avx512_mask_unpck_bw opc, string OpcodeStr> { + defm BW : avx512_mask_unpck, + VEX_4V, VEX_L, OpSize, TB; +} + +defm KUNPCK : avx512_mask_unpck_bw<0x4b, "kunpck">; + +multiclass avx512_mask_unpck_int { + let Predicates = [HasAVX512] in + def : Pat<(!cast("int_x86_"##IntName##"_v16i1") + VK8:$src1, VK8:$src2), + (!cast(InstName##"BWrr") VK8:$src1, VK8:$src2)>; +} + +defm : avx512_mask_unpck_int<"kunpck", "KUNPCK">; +// Mask bit testing +multiclass avx512_mask_testop opc, string OpcodeStr, RegisterClass KRC, + SDNode OpNode> { + let Predicates = [HasAVX512], Defs = [EFLAGS] in + def rr : I; +} + +multiclass avx512_mask_testop_w opc, string OpcodeStr, SDNode OpNode> { + defm W : avx512_mask_testop, + VEX, TB; +} + +defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>; +defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest>; + +// Mask shift +multiclass avx512_mask_shiftop opc, string OpcodeStr, RegisterClass KRC, + SDNode OpNode> { + let Predicates = [HasAVX512] in + def ri : Ii8; +} + +multiclass avx512_mask_shiftop_w opc1, bits<8> opc2, string OpcodeStr, + SDNode OpNode> { + defm W : avx512_mask_shiftop, + VEX, OpSize, TA, VEX_W; +} + +defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", shl>; +defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", srl>; + +// Mask setting all 0s or 1s +multiclass avx512_mask_setop { + let Predicates = [HasAVX512] in + let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1 in + def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "", + [(set KRC:$dst, (VT Val))]>; +} + +multiclass avx512_mask_setop_w { + defm B : avx512_mask_setop; + defm W : avx512_mask_setop; +} + +defm KSET0 : avx512_mask_setop_w; +defm KSET1 : avx512_mask_setop_w; + +// With AVX-512 only, 8-bit mask is promoted to 16-bit mask. +let Predicates = [HasAVX512] in { + def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>; + def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>; +} +def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 0))), + (v8i1 (COPY_TO_REGCLASS VK16:$src, VK8))>; + +def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))), + (v16i1 (COPY_TO_REGCLASS VK8:$src, VK16))>; + +def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))), + (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>; + +//===----------------------------------------------------------------------===// +// AVX-512 - Aligned and unaligned load and store +// + +multiclass avx512_mov_packed opc, RegisterClass RC, RegisterClass KRC, + X86MemOperand x86memop, PatFrag ld_frag, + string asm, Domain d> { +let neverHasSideEffects = 1 in + def rr : AVX512PI, + EVEX; +let canFoldAsLoad = 1 in + def rm : AVX512PI, EVEX; +let Constraints = "$src1 = $dst" in { + def rrk : AVX512PI, + EVEX, EVEX_K; + def rmk : AVX512PI, EVEX, EVEX_K; +} +} + +defm VMOVAPSZ : avx512_mov_packed<0x28, VR512, VK16WM, f512mem, alignedloadv16f32, + "vmovaps", SSEPackedSingle>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VMOVAPDZ : avx512_mov_packed<0x28, VR512, VK8WM, f512mem, alignedloadv8f64, + "vmovapd", SSEPackedDouble>, + OpSize, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; +defm VMOVUPSZ : avx512_mov_packed<0x10, VR512, VK16WM, f512mem, loadv16f32, + "vmovups", SSEPackedSingle>, + TB, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VMOVUPDZ : avx512_mov_packed<0x10, VR512, VK8WM, f512mem, loadv8f64, + "vmovupd", SSEPackedDouble>, + OpSize, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; +def VMOVAPSZmr : AVX512PI<0x29, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src), + "vmovaps\t{$src, $dst|$dst, $src}", + [(alignedstore512 (v16f32 VR512:$src), addr:$dst)], + SSEPackedSingle>, EVEX, EVEX_V512, TB, + EVEX_CD8<32, CD8VF>; +def VMOVAPDZmr : AVX512PI<0x29, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src), + "vmovapd\t{$src, $dst|$dst, $src}", + [(alignedstore512 (v8f64 VR512:$src), addr:$dst)], + SSEPackedDouble>, EVEX, EVEX_V512, + OpSize, TB, VEX_W, EVEX_CD8<64, CD8VF>; +def VMOVUPSZmr : AVX512PI<0x11, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src), + "vmovups\t{$src, $dst|$dst, $src}", + [(store (v16f32 VR512:$src), addr:$dst)], + SSEPackedSingle>, EVEX, EVEX_V512, TB, + EVEX_CD8<32, CD8VF>; +def VMOVUPDZmr : AVX512PI<0x11, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src), + "vmovupd\t{$src, $dst|$dst, $src}", + [(store (v8f64 VR512:$src), addr:$dst)], + SSEPackedDouble>, EVEX, EVEX_V512, + OpSize, TB, VEX_W, EVEX_CD8<64, CD8VF>; + +// Use vmovaps/vmovups for AVX-512 integer load/store. +// 512-bit load/store +def : Pat<(alignedloadv8i64 addr:$src), + (VMOVAPSZrm addr:$src)>; +def : Pat<(loadv8i64 addr:$src), + (VMOVUPSZrm addr:$src)>; + +def : Pat<(alignedstore512 (v8i64 VR512:$src), addr:$dst), + (VMOVAPSZmr addr:$dst, VR512:$src)>; +def : Pat<(alignedstore512 (v16i32 VR512:$src), addr:$dst), + (VMOVAPSZmr addr:$dst, VR512:$src)>; + +def : Pat<(store (v8i64 VR512:$src), addr:$dst), + (VMOVUPDZmr addr:$dst, VR512:$src)>; +def : Pat<(store (v16i32 VR512:$src), addr:$dst), + (VMOVUPSZmr addr:$dst, VR512:$src)>; + +let neverHasSideEffects = 1 in { + def VMOVDQA32rr : AVX512BI<0x6F, MRMSrcReg, (outs VR512:$dst), + (ins VR512:$src), + "vmovdqa32\t{$src, $dst|$dst, $src}", []>, + EVEX, EVEX_V512; + def VMOVDQA64rr : AVX512BI<0x6F, MRMSrcReg, (outs VR512:$dst), + (ins VR512:$src), + "vmovdqa64\t{$src, $dst|$dst, $src}", []>, + EVEX, EVEX_V512, VEX_W; +let mayStore = 1 in { + def VMOVDQA32mr : AVX512BI<0x7F, MRMDestMem, (outs), + (ins i512mem:$dst, VR512:$src), + "vmovdqa32\t{$src, $dst|$dst, $src}", []>, + EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; + def VMOVDQA64mr : AVX512BI<0x7F, MRMDestMem, (outs), + (ins i512mem:$dst, VR512:$src), + "vmovdqa64\t{$src, $dst|$dst, $src}", []>, + EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +} +let mayLoad = 1 in { +def VMOVDQA32rm : AVX512BI<0x6F, MRMSrcMem, (outs VR512:$dst), + (ins i512mem:$src), + "vmovdqa32\t{$src, $dst|$dst, $src}", []>, + EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; +def VMOVDQA64rm : AVX512BI<0x6F, MRMSrcMem, (outs VR512:$dst), + (ins i512mem:$src), + "vmovdqa64\t{$src, $dst|$dst, $src}", []>, + EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +} +} + +multiclass avx512_mov_int opc, string asm, RegisterClass RC, + RegisterClass KRC, + PatFrag ld_frag, X86MemOperand x86memop> { +let neverHasSideEffects = 1 in + def rr : AVX512XSI, + EVEX; +let canFoldAsLoad = 1 in + def rm : AVX512XSI, + EVEX; +let Constraints = "$src1 = $dst" in { + def rrk : AVX512XSI, + EVEX, EVEX_K; + def rmk : AVX512XSI, EVEX, EVEX_K; +} +} + +defm VMOVDQU32 : avx512_mov_int<0x6F, "vmovdqu32", VR512, VK16WM, memopv16i32, i512mem>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VMOVDQU64 : avx512_mov_int<0x6F, "vmovdqu64", VR512, VK8WM, memopv8i64, i512mem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + +let AddedComplexity = 20 in { +def : Pat<(v16f32 (vselect VK16WM:$mask, (v16f32 VR512:$src1), + (v16f32 VR512:$src2))), + (VMOVUPSZrrk VR512:$src2, VK16WM:$mask, VR512:$src1)>; +def : Pat<(v8f64 (vselect VK8WM:$mask, (v8f64 VR512:$src1), + (v8f64 VR512:$src2))), + (VMOVUPDZrrk VR512:$src2, VK8WM:$mask, VR512:$src1)>; +def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src1), + (v16i32 VR512:$src2))), + (VMOVDQU32rrk VR512:$src2, VK16WM:$mask, VR512:$src1)>; +def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src1), + (v8i64 VR512:$src2))), + (VMOVDQU64rrk VR512:$src2, VK8WM:$mask, VR512:$src1)>; +} +// Move Int Doubleword to Packed Double Int +// +def VMOVDI2PDIZrr : AVX512SI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), + "vmovd{z}\t{$src, $dst|$dst, $src}", + [(set VR128X:$dst, + (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, + EVEX, VEX_LIG; +def VMOVDI2PDIZrm : AVX512SI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), + "vmovd{z}\t{$src, $dst|$dst, $src}", + [(set VR128X:$dst, + (v4i32 (scalar_to_vector (loadi32 addr:$src))))], + IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; +def VMOV64toPQIZrr : AVX512SI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), + "vmovq{z}\t{$src, $dst|$dst, $src}", + [(set VR128X:$dst, + (v2i64 (scalar_to_vector GR64:$src)))], + IIC_SSE_MOVDQ>, EVEX, VEX_W, VEX_LIG; +def VMOV64toSDZrr : AVX512SI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), + "vmovq{z}\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (bitconvert GR64:$src))], + IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>; +def VMOVSDto64Zrr : AVX512SI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), + "vmovq{z}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (bitconvert FR64:$src))], + IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>; +def VMOVSDto64Zmr : AVX512SI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), + "vmovq{z}\t{$src, $dst|$dst, $src}", + [(store (i64 (bitconvert FR64:$src)), addr:$dst)], + IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>, + EVEX_CD8<64, CD8VT1>; + +// Move Int Doubleword to Single Scalar +// +def VMOVDI2SSZrr : AVX512SI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), + "vmovd{z}\t{$src, $dst|$dst, $src}", + [(set FR32X:$dst, (bitconvert GR32:$src))], + IIC_SSE_MOVDQ>, EVEX, VEX_LIG; + +def VMOVDI2SSZrm : AVX512SI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src), + "vmovd{z}\t{$src, $dst|$dst, $src}", + [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))], + IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; + +// Move Packed Doubleword Int to Packed Double Int +// +def VMOVPDI2DIZrr : AVX512SI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), + "vmovd{z}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (vector_extract (v4i32 VR128X:$src), + (iPTR 0)))], IIC_SSE_MOVD_ToGP>, + EVEX, VEX_LIG; +def VMOVPDI2DIZmr : AVX512SI<0x7E, MRMDestMem, (outs), + (ins i32mem:$dst, VR128X:$src), + "vmovd{z}\t{$src, $dst|$dst, $src}", + [(store (i32 (vector_extract (v4i32 VR128X:$src), + (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, + EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; + +// Move Packed Doubleword Int first element to Doubleword Int +// +def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), + "vmovq{z}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), + (iPTR 0)))], + IIC_SSE_MOVD_ToGP>, TB, OpSize, EVEX, VEX_LIG, VEX_W, + Requires<[HasAVX512, In64BitMode]>; + +def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), + (ins i64mem:$dst, VR128X:$src), + "vmovq{z}\t{$src, $dst|$dst, $src}", + [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), + addr:$dst)], IIC_SSE_MOVDQ>, + EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>, + Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>; + +// Move Scalar Single to Double Int +// +def VMOVSS2DIZrr : AVX512SI<0x7E, MRMDestReg, (outs GR32:$dst), + (ins FR32X:$src), + "vmovd{z}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (bitconvert FR32X:$src))], + IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG; +def VMOVSS2DIZmr : AVX512SI<0x7E, MRMDestMem, (outs), + (ins i32mem:$dst, FR32X:$src), + "vmovd{z}\t{$src, $dst|$dst, $src}", + [(store (i32 (bitconvert FR32X:$src)), addr:$dst)], + IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; + +// Move Quadword Int to Packed Quadword Int +// +def VMOVQI2PQIZrm : AVX512SI<0x7E, MRMSrcMem, (outs VR128X:$dst), + (ins i64mem:$src), + "vmovq{z}\t{$src, $dst|$dst, $src}", + [(set VR128X:$dst, + (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, + EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; + +//===----------------------------------------------------------------------===// +// AVX-512 MOVSS, MOVSD +//===----------------------------------------------------------------------===// + +multiclass avx512_move_scalar { + def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128X:$dst, (vt (OpNode VR128X:$src1, + (scalar_to_vector RC:$src2))))], + IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG; + def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>, + EVEX, VEX_LIG; + def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, + EVEX, VEX_LIG; +} + +let ExeDomain = SSEPackedSingle in +defm VMOVSSZ : avx512_move_scalar<"movss{z}", FR32X, X86Movss, v4f32, f32mem, + loadf32>, XS, EVEX_CD8<32, CD8VT1>; + +let ExeDomain = SSEPackedDouble in +defm VMOVSDZ : avx512_move_scalar<"movsd{z}", FR64X, X86Movsd, v2f64, f64mem, + loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>; + + +// For the disassembler +let isCodeGenOnly = 1 in { + def VMOVSSZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst), + (ins VR128X:$src1, FR32X:$src2), + "movss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], + IIC_SSE_MOV_S_RR>, + XS, EVEX_4V, VEX_LIG; + def VMOVSDZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst), + (ins VR128X:$src1, FR64X:$src2), + "movsd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], + IIC_SSE_MOV_S_RR>, + XD, EVEX_4V, VEX_LIG, VEX_W; +} + +let Predicates = [HasAVX512] in { + let AddedComplexity = 15 in { + // Move scalar to XMM zero-extended, zeroing a VR128X then do a + // MOVS{S,D} to the lower bits. + def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32X:$src)))), + (VMOVSSZrr (v4f32 (V_SET0)), FR32X:$src)>; + def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))), + (VMOVSSZrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>; + def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))), + (VMOVSSZrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>; + def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))), + (VMOVSDZrr (v2f64 (V_SET0)), FR64X:$src)>; + + // Move low f32 and clear high bits. + def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))), + (SUBREG_TO_REG (i32 0), + (VMOVSSZrr (v4f32 (V_SET0)), + (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>; + def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))), + (SUBREG_TO_REG (i32 0), + (VMOVSSZrr (v4i32 (V_SET0)), + (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>; + } + + let AddedComplexity = 20 in { + // MOVSSrm zeros the high parts of the register; represent this + // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 + def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), + (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>; + def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), + (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>; + def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), + (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>; + + // MOVSDrm zeros the high parts of the register; represent this + // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 + def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), + (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; + def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), + (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; + def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), + (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; + def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), + (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; + def : Pat<(v2f64 (X86vzload addr:$src)), + (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; + + // Represent the same patterns above but in the form they appear for + // 256-bit types + def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, + (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))), + (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; + def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, + (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))), + (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; + def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, + (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))), + (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; + } + def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, + (v4f32 (scalar_to_vector FR32X:$src)), (iPTR 0)))), + (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSZrr (v4f32 (V_SET0)), + FR32X:$src)), sub_xmm)>; + def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, + (v2f64 (scalar_to_vector FR64X:$src)), (iPTR 0)))), + (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDZrr (v2f64 (V_SET0)), + FR64X:$src)), sub_xmm)>; + def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, + (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))), + (SUBREG_TO_REG (i64 0), (VMOVSDZrm addr:$src), sub_xmm)>; + + // Move low f64 and clear high bits. + def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))), + (SUBREG_TO_REG (i32 0), + (VMOVSDZrr (v2f64 (V_SET0)), + (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>; + + def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))), + (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (V_SET0)), + (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>; + + // Extract and store. + def : Pat<(store (f32 (vector_extract (v4f32 VR128X:$src), (iPTR 0))), + addr:$dst), + (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>; + def : Pat<(store (f64 (vector_extract (v2f64 VR128X:$src), (iPTR 0))), + addr:$dst), + (VMOVSDZmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X))>; + + // Shuffle with VMOVSS + def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)), + (VMOVSSZrr (v4i32 VR128X:$src1), + (COPY_TO_REGCLASS (v4i32 VR128X:$src2), FR32X))>; + def : Pat<(v4f32 (X86Movss VR128X:$src1, VR128X:$src2)), + (VMOVSSZrr (v4f32 VR128X:$src1), + (COPY_TO_REGCLASS (v4f32 VR128X:$src2), FR32X))>; + + // 256-bit variants + def : Pat<(v8i32 (X86Movss VR256X:$src1, VR256X:$src2)), + (SUBREG_TO_REG (i32 0), + (VMOVSSZrr (EXTRACT_SUBREG (v8i32 VR256X:$src1), sub_xmm), + (EXTRACT_SUBREG (v8i32 VR256X:$src2), sub_xmm)), + sub_xmm)>; + def : Pat<(v8f32 (X86Movss VR256X:$src1, VR256X:$src2)), + (SUBREG_TO_REG (i32 0), + (VMOVSSZrr (EXTRACT_SUBREG (v8f32 VR256X:$src1), sub_xmm), + (EXTRACT_SUBREG (v8f32 VR256X:$src2), sub_xmm)), + sub_xmm)>; + + // Shuffle with VMOVSD + def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)), + (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; + def : Pat<(v2f64 (X86Movsd VR128X:$src1, VR128X:$src2)), + (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; + def : Pat<(v4f32 (X86Movsd VR128X:$src1, VR128X:$src2)), + (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; + def : Pat<(v4i32 (X86Movsd VR128X:$src1, VR128X:$src2)), + (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; + + // 256-bit variants + def : Pat<(v4i64 (X86Movsd VR256X:$src1, VR256X:$src2)), + (SUBREG_TO_REG (i32 0), + (VMOVSDZrr (EXTRACT_SUBREG (v4i64 VR256X:$src1), sub_xmm), + (EXTRACT_SUBREG (v4i64 VR256X:$src2), sub_xmm)), + sub_xmm)>; + def : Pat<(v4f64 (X86Movsd VR256X:$src1, VR256X:$src2)), + (SUBREG_TO_REG (i32 0), + (VMOVSDZrr (EXTRACT_SUBREG (v4f64 VR256X:$src1), sub_xmm), + (EXTRACT_SUBREG (v4f64 VR256X:$src2), sub_xmm)), + sub_xmm)>; + + def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)), + (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; + def : Pat<(v2i64 (X86Movlpd VR128X:$src1, VR128X:$src2)), + (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; + def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)), + (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; + def : Pat<(v4i32 (X86Movlps VR128X:$src1, VR128X:$src2)), + (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; +} + +let AddedComplexity = 15 in +def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), + (ins VR128X:$src), + "vmovq{z}\t{$src, $dst|$dst, $src}", + [(set VR128X:$dst, (v2i64 (X86vzmovl + (v2i64 VR128X:$src))))], + IIC_SSE_MOVQ_RR>, EVEX, VEX_W; + +let AddedComplexity = 20 in +def VMOVZPQILo2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), + (ins i128mem:$src), + "vmovq{z}\t{$src, $dst|$dst, $src}", + [(set VR128X:$dst, (v2i64 (X86vzmovl + (loadv2i64 addr:$src))))], + IIC_SSE_MOVDQ>, EVEX, VEX_W, + EVEX_CD8<8, CD8VT8>; + +let Predicates = [HasAVX512] in { + // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. + let AddedComplexity = 20 in { + def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), + (VMOVDI2PDIZrm addr:$src)>; + + def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), + (VMOVDI2PDIZrm addr:$src)>; + def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), + (VMOVDI2PDIZrm addr:$src)>; + def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), + (VMOVZPQILo2PQIZrm addr:$src)>; + def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))), + (VMOVZPQILo2PQIZrr VR128X:$src)>; + } + // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext. + def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, + (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))), + (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>; + def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, + (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))), + (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>; +} + +def : Pat<(v16i32 (X86Vinsert (v16i32 immAllZerosV), GR32:$src2, (iPTR 0))), + (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>; + +def : Pat<(v8i64 (X86Vinsert (bc_v8i64 (v16i32 immAllZerosV)), GR64:$src2, (iPTR 0))), + (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>; + +def : Pat<(v16i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))), + (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>; + +def : Pat<(v8i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))), + (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>; + +//===----------------------------------------------------------------------===// +// AVX-512 - Integer arithmetic +// +multiclass avx512_binop_rm opc, string OpcodeStr, SDNode OpNode, + ValueType OpVT, RegisterClass RC, PatFrag memop_frag, + X86MemOperand x86memop, PatFrag scalar_mfrag, + X86MemOperand x86scalar_mop, string BrdcstStr, + OpndItins itins, bit IsCommutable = 0> { + let isCommutable = IsCommutable in + def rr : AVX512BI, EVEX_4V; + def rm : AVX512BI, EVEX_4V; + def rmb : AVX512BI, EVEX_4V, EVEX_B; +} +multiclass avx512_binop_rm2 opc, string OpcodeStr, + ValueType DstVT, ValueType SrcVT, RegisterClass RC, + PatFrag memop_frag, X86MemOperand x86memop, + OpndItins itins, + bit IsCommutable = 0> { + let isCommutable = IsCommutable in + def rr : AVX512BI, EVEX_4V, VEX_W; + def rm : AVX512BI, EVEX_4V, VEX_W; +} + +defm VPADDDZ : avx512_binop_rm<0xFE, "vpaddd", add, v16i32, VR512, memopv16i32, + i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + +defm VPSUBDZ : avx512_binop_rm<0xFA, "vpsubd", sub, v16i32, VR512, memopv16i32, + i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 0>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + +defm VPMULLDZ : avx512_binop_rm<0x40, "vpmulld", mul, v16i32, VR512, memopv16i32, + i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>, + T8, EVEX_V512, EVEX_CD8<32, CD8VF>; + +defm VPADDQZ : avx512_binop_rm<0xD4, "vpaddq", add, v8i64, VR512, memopv8i64, + i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 1>, + EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_W; + +defm VPSUBQZ : avx512_binop_rm<0xFB, "vpsubq", sub, v8i64, VR512, memopv8i64, + i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + +defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, + VR512, memopv8i64, i512mem, SSE_INTALU_ITINS_P, 1>, T8, + EVEX_V512, EVEX_CD8<64, CD8VF>; + +defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32, + VR512, memopv8i64, i512mem, SSE_INTMUL_ITINS_P, 1>, EVEX_V512, + EVEX_CD8<64, CD8VF>; + +def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))), + (VPMULUDQZrr VR512:$src1, VR512:$src2)>; + +//===----------------------------------------------------------------------===// +// AVX-512 - Unpack Instructions +//===----------------------------------------------------------------------===// + +multiclass avx512_unpack_fp opc, SDNode OpNode, ValueType vt, + PatFrag mem_frag, RegisterClass RC, + X86MemOperand x86memop, string asm, + Domain d> { + def rr : AVX512PI, EVEX_4V, TB; + def rm : AVX512PI, EVEX_4V, TB; +} + +defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, memopv8f64, + VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, memopv8f64, + VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, memopv8f64, + VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, memopv8f64, + VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + +multiclass avx512_unpack_int opc, string OpcodeStr, SDNode OpNode, + ValueType OpVT, RegisterClass RC, PatFrag memop_frag, + X86MemOperand x86memop> { + def rr : AVX512BI, EVEX_4V; + def rm : AVX512BI, EVEX_4V; +} +defm VPUNPCKLDQZ : avx512_unpack_int<0x62, "vpunpckldq", X86Unpckl, v16i32, + VR512, memopv16i32, i512mem>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPUNPCKLQDQZ : avx512_unpack_int<0x6C, "vpunpcklqdq", X86Unpckl, v8i64, + VR512, memopv8i64, i512mem>, EVEX_V512, + VEX_W, EVEX_CD8<64, CD8VF>; +defm VPUNPCKHDQZ : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32, + VR512, memopv16i32, i512mem>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64, + VR512, memopv8i64, i512mem>, EVEX_V512, + VEX_W, EVEX_CD8<64, CD8VF>; +//===----------------------------------------------------------------------===// +// AVX-512 - PSHUFD +// + +multiclass avx512_pshuf_imm opc, string OpcodeStr, RegisterClass RC, + SDNode OpNode, PatFrag mem_frag, + X86MemOperand x86memop, ValueType OpVT> { + def ri : AVX512Ii8, + EVEX; + def mi : AVX512Ii8, EVEX; +} + +defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, memopv16i32, + i512mem, v16i32>, OpSize, EVEX_V512, EVEX_CD8<32, CD8VF>; + +let ExeDomain = SSEPackedSingle in +defm VPERMILPSZ : avx512_pshuf_imm<0x04, "vpermilps", VR512, X86VPermilp, + memopv16f32, i512mem, v16f32>, OpSize, TA, EVEX_V512, + EVEX_CD8<32, CD8VF>; +let ExeDomain = SSEPackedDouble in +defm VPERMILPDZ : avx512_pshuf_imm<0x05, "vpermilpd", VR512, X86VPermilp, + memopv8f64, i512mem, v8f64>, OpSize, TA, EVEX_V512, + VEX_W, EVEX_CD8<32, CD8VF>; + +def : Pat<(v16i32 (X86VPermilp VR512:$src1, (i8 imm:$imm))), + (VPERMILPSZri VR512:$src1, imm:$imm)>; +def : Pat<(v8i64 (X86VPermilp VR512:$src1, (i8 imm:$imm))), + (VPERMILPDZri VR512:$src1, imm:$imm)>; + +//===----------------------------------------------------------------------===// +// AVX-512 Logical Instructions +//===----------------------------------------------------------------------===// + +defm VPANDDZ : avx512_binop_rm<0xDB, "vpandd", and, v16i32, VR512, memopv16i32, + i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPANDQZ : avx512_binop_rm<0xDB, "vpandq", and, v8i64, VR512, memopv8i64, + i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPORDZ : avx512_binop_rm<0xEB, "vpord", or, v16i32, VR512, memopv16i32, + i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPORQZ : avx512_binop_rm<0xEB, "vporq", or, v8i64, VR512, memopv8i64, + i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPXORDZ : avx512_binop_rm<0xEF, "vpxord", xor, v16i32, VR512, memopv16i32, + i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPXORQZ : avx512_binop_rm<0xEF, "vpxorq", xor, v8i64, VR512, memopv8i64, + i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPANDNDZ : avx512_binop_rm<0xDF, "vpandnd", X86andnp, v16i32, VR512, + memopv16i32, i512mem, loadi32, i32mem, "{1to16}", + SSE_BIT_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPANDNQZ : avx512_binop_rm<0xDF, "vpandnq", X86andnp, v8i64, VR512, memopv8i64, + i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 0>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + +//===----------------------------------------------------------------------===// +// AVX-512 FP arithmetic +//===----------------------------------------------------------------------===// + +multiclass avx512_binop_s opc, string OpcodeStr, SDNode OpNode, + SizeItins itins> { + defm SSZ : sse12_fp_scalar, XS, EVEX_4V, VEX_LIG, + EVEX_CD8<32, CD8VT1>; + defm SDZ : sse12_fp_scalar, XD, VEX_W, EVEX_4V, VEX_LIG, + EVEX_CD8<64, CD8VT1>; +} + +let isCommutable = 1 in { +defm VADD : avx512_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>; +defm VMUL : avx512_binop_s<0x59, "mul", fmul, SSE_ALU_ITINS_S>; +defm VMIN : avx512_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>; +defm VMAX : avx512_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>; +} +let isCommutable = 0 in { +defm VSUB : avx512_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>; +defm VDIV : avx512_binop_s<0x5E, "div", fdiv, SSE_ALU_ITINS_S>; +} + +multiclass avx512_fp_packed opc, string OpcodeStr, SDNode OpNode, + RegisterClass RC, ValueType vt, + X86MemOperand x86memop, PatFrag mem_frag, + X86MemOperand x86scalar_mop, PatFrag scalar_mfrag, + string BrdcstStr, + Domain d, OpndItins itins, bit commutable> { + let isCommutable = commutable in + def rr : PI, + EVEX_4V; + let mayLoad = 1 in { + def rm : PI, EVEX_4V; + def rmb : PI, EVEX_4V, EVEX_B; + } +} + +defm VADDPSZ : avx512_fp_packed<0x58, "addps", fadd, VR512, v16f32, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, + SSE_ALU_ITINS_P.s, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>; + +defm VADDPDZ : avx512_fp_packed<0x58, "addpd", fadd, VR512, v8f64, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, + SSE_ALU_ITINS_P.d, 1>, + EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; + +defm VMULPSZ : avx512_fp_packed<0x59, "mulps", fmul, VR512, v16f32, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, + SSE_ALU_ITINS_P.s, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VMULPDZ : avx512_fp_packed<0x59, "mulpd", fmul, VR512, v8f64, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, + SSE_ALU_ITINS_P.d, 1>, + EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; + +defm VMINPSZ : avx512_fp_packed<0x5D, "minps", X86fmin, VR512, v16f32, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, + SSE_ALU_ITINS_P.s, 1>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VMAXPSZ : avx512_fp_packed<0x5F, "maxps", X86fmax, VR512, v16f32, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, + SSE_ALU_ITINS_P.s, 1>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + +defm VMINPDZ : avx512_fp_packed<0x5D, "minpd", X86fmin, VR512, v8f64, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, + SSE_ALU_ITINS_P.d, 1>, + EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; +defm VMAXPDZ : avx512_fp_packed<0x5F, "maxpd", X86fmax, VR512, v8f64, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, + SSE_ALU_ITINS_P.d, 1>, + EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; + +defm VSUBPSZ : avx512_fp_packed<0x5C, "subps", fsub, VR512, v16f32, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, + SSE_ALU_ITINS_P.s, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VDIVPSZ : avx512_fp_packed<0x5E, "divps", fdiv, VR512, v16f32, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, + SSE_ALU_ITINS_P.s, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>; + +defm VSUBPDZ : avx512_fp_packed<0x5C, "subpd", fsub, VR512, v8f64, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, + SSE_ALU_ITINS_P.d, 0>, + EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; +defm VDIVPDZ : avx512_fp_packed<0x5E, "divpd", fdiv, VR512, v8f64, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, + SSE_ALU_ITINS_P.d, 0>, + EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; + +//===----------------------------------------------------------------------===// +// AVX-512 VPTESTM instructions +//===----------------------------------------------------------------------===// + +multiclass avx512_vptest opc, string OpcodeStr, RegisterClass KRC, + RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag, + SDNode OpNode, ValueType vt> { + def rr : AVX5128I, EVEX_4V; + def rm : AVX5128I, EVEX_4V; +} + +defm VPTESTMDZ : avx512_vptest<0x27, "vptestmd", VK16, VR512, f512mem, + memopv16i32, X86testm, v16i32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem, + memopv8i64, X86testm, v8i64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + +//===----------------------------------------------------------------------===// +// AVX-512 Shift instructions +//===----------------------------------------------------------------------===// +multiclass avx512_shift_rmi opc, Format ImmFormR, Format ImmFormM, + string OpcodeStr, SDNode OpNode, RegisterClass RC, + ValueType vt, X86MemOperand x86memop, PatFrag mem_frag, + RegisterClass KRC> { + def ri : AVX512BIi8, EVEX_4V; + def rik : AVX512BIi8, EVEX_4V, EVEX_K; + def mi: AVX512BIi8, EVEX_4V; + def mik: AVX512BIi8, EVEX_4V, EVEX_K; +} + +multiclass avx512_shift_rrm opc, string OpcodeStr, SDNode OpNode, + RegisterClass RC, ValueType vt, ValueType SrcVT, + PatFrag bc_frag, RegisterClass KRC> { + // src2 is always 128-bit + def rr : AVX512BI, EVEX_4V; + def rrk : AVX512BI, EVEX_4V, EVEX_K; + def rm : AVX512BI, EVEX_4V; + def rmk : AVX512BI, EVEX_4V, EVEX_K; +} + +defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli, + VR512, v16i32, i512mem, memopv16i32, VK16WM>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl, + VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512, + EVEX_CD8<32, CD8VQ>; + +defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli, + VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512, + EVEX_CD8<64, CD8VF>, VEX_W; +defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl, + VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512, + EVEX_CD8<64, CD8VQ>, VEX_W; + +defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli, + VR512, v16i32, i512mem, memopv16i32, VK16WM>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl, + VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512, + EVEX_CD8<32, CD8VQ>; + +defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli, + VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512, + EVEX_CD8<64, CD8VF>, VEX_W; +defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl, + VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512, + EVEX_CD8<64, CD8VQ>, VEX_W; + +defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai, + VR512, v16i32, i512mem, memopv16i32, VK16WM>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra, + VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512, + EVEX_CD8<32, CD8VQ>; + +defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai, + VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512, + EVEX_CD8<64, CD8VF>, VEX_W; +defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra, + VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512, + EVEX_CD8<64, CD8VQ>, VEX_W; + +//===-------------------------------------------------------------------===// +// Variable Bit Shifts +//===-------------------------------------------------------------------===// +multiclass avx512_var_shift opc, string OpcodeStr, SDNode OpNode, + RegisterClass RC, ValueType vt, + X86MemOperand x86memop, PatFrag mem_frag> { + def rr : AVX5128I, + EVEX_4V; + def rm : AVX5128I, + EVEX_4V; +} + +defm VPSLLVDZ : avx512_var_shift<0x47, "vpsllvd", shl, VR512, v16i32, + i512mem, memopv16i32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPSLLVQZ : avx512_var_shift<0x47, "vpsllvq", shl, VR512, v8i64, + i512mem, memopv8i64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; +defm VPSRLVDZ : avx512_var_shift<0x45, "vpsrlvd", srl, VR512, v16i32, + i512mem, memopv16i32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPSRLVQZ : avx512_var_shift<0x45, "vpsrlvq", srl, VR512, v8i64, + i512mem, memopv8i64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; +defm VPSRAVDZ : avx512_var_shift<0x46, "vpsravd", sra, VR512, v16i32, + i512mem, memopv16i32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64, + i512mem, memopv8i64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + +//===----------------------------------------------------------------------===// +// AVX-512 - MOVDDUP +//===----------------------------------------------------------------------===// + +multiclass avx512_movddup { +def rr : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set RC:$dst, (VT (X86Movddup RC:$src)))]>, EVEX; +def rm : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set RC:$dst, + (VT (X86Movddup (memop_frag addr:$src))))]>, EVEX; +} + +defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, memopv8f64>, + VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; +def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))), + (VMOVDDUPZrm addr:$src)>; + +def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst), + (ins VR128X:$src1, VR128X:$src2), + "vmovlhps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))], + IIC_SSE_MOV_LH>, EVEX_4V; +def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst), + (ins VR128X:$src1, VR128X:$src2), + "vmovhlps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))], + IIC_SSE_MOV_LH>, EVEX_4V; + +// MOVLHPS patterns +def : Pat<(v4i32 (X86Movlhps VR128X:$src1, VR128X:$src2)), + (VMOVLHPSZrr VR128X:$src1, VR128X:$src2)>; +def : Pat<(v2i64 (X86Movlhps VR128X:$src1, VR128X:$src2)), + (VMOVLHPSZrr (v2i64 VR128X:$src1), VR128X:$src2)>; + +// MOVHLPS patterns +def : Pat<(v4i32 (X86Movhlps VR128X:$src1, VR128X:$src2)), + (VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>; + +//===----------------------------------------------------------------------===// +// FMA - Fused Multiply Operations +// +let Constraints = "$src1 = $dst" in { +multiclass avx512_fma3p_rm opc, string OpcodeStr, + RegisterClass RC, X86MemOperand x86memop, + PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag, + string BrdcstStr, SDNode OpNode, ValueType OpVT> { + def r: AVX512FMA3; + + let mayLoad = 1 in + def m: AVX512FMA3; + def mb: AVX512FMA3, EVEX_B; +} +} // Constraints = "$src1 = $dst" + +let ExeDomain = SSEPackedSingle in { + defm VFMADD213PSZ : avx512_fma3p_rm<0xA8, "vfmadd213ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fmadd, v16f32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; + defm VFMSUB213PSZ : avx512_fma3p_rm<0xAA, "vfmsub213ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fmsub, v16f32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; + defm VFMADDSUB213PSZ : avx512_fma3p_rm<0xA6, "vfmaddsub213ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fmaddsub, v16f32>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + defm VFMSUBADD213PSZ : avx512_fma3p_rm<0xA7, "vfmsubadd213ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fmsubadd, v16f32>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + defm VFNMADD213PSZ : avx512_fma3p_rm<0xAC, "vfnmadd213ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fnmadd, v16f32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; + defm VFNMSUB213PSZ : avx512_fma3p_rm<0xAE, "vfnmsub213ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fnmsub, v16f32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +} +let ExeDomain = SSEPackedDouble in { + defm VFMADD213PDZ : avx512_fma3p_rm<0xA8, "vfmadd213pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fmadd, v8f64>, EVEX_V512, + VEX_W, EVEX_CD8<64, CD8VF>; + defm VFMSUB213PDZ : avx512_fma3p_rm<0xAA, "vfmsub213pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fmsub, v8f64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + defm VFMADDSUB213PDZ : avx512_fma3p_rm<0xA6, "vfmaddsub213pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fmaddsub, v8f64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + defm VFMSUBADD213PDZ : avx512_fma3p_rm<0xA7, "vfmsubadd213pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fmsubadd, v8f64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + defm VFNMADD213PDZ : avx512_fma3p_rm<0xAC, "vfnmadd213pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fnmadd, v8f64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + defm VFNMSUB213PDZ : avx512_fma3p_rm<0xAE, "vfnmsub213pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fnmsub, v8f64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; +} + +let Constraints = "$src1 = $dst" in { +multiclass avx512_fma3p_m132 opc, string OpcodeStr, + RegisterClass RC, X86MemOperand x86memop, + PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag, + string BrdcstStr, SDNode OpNode, ValueType OpVT> { + let mayLoad = 1 in + def m: AVX512FMA3; + def mb: AVX512FMA3, EVEX_B; +} +} // Constraints = "$src1 = $dst" + + +let ExeDomain = SSEPackedSingle in { + defm VFMADD132PSZ : avx512_fma3p_m132<0x98, "vfmadd132ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fmadd, v16f32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; + defm VFMSUB132PSZ : avx512_fma3p_m132<0x9A, "vfmsub132ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fmsub, v16f32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; + defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fmaddsub, v16f32>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fmsubadd, v16f32>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + defm VFNMADD132PSZ : avx512_fma3p_m132<0x9C, "vfnmadd132ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fnmadd, v16f32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; + defm VFNMSUB132PSZ : avx512_fma3p_m132<0x9E, "vfnmsub132ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fnmsub, v16f32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +} +let ExeDomain = SSEPackedDouble in { + defm VFMADD132PDZ : avx512_fma3p_m132<0x98, "vfmadd132pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fmadd, v8f64>, EVEX_V512, + VEX_W, EVEX_CD8<64, CD8VF>; + defm VFMSUB132PDZ : avx512_fma3p_m132<0x9A, "vfmsub132pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fmsub, v8f64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fmaddsub, v8f64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fmsubadd, v8f64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fnmadd, v8f64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fnmsub, v8f64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; +} + +// Scalar FMA +let Constraints = "$src1 = $dst" in { +multiclass avx512_fma3s_rm opc, string OpcodeStr, SDNode OpNode, + RegisterClass RC, ValueType OpVT, + X86MemOperand x86memop, Operand memop, + PatFrag mem_frag> { + let isCommutable = 1 in + def r : AVX512FMA3; + let mayLoad = 1 in + def m : AVX512FMA3; +} + +} // Constraints = "$src1 = $dst" + +defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss{z}", X86Fmadd, FR32X, + f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; +defm VFMADDSDZ : avx512_fma3s_rm<0xA9, "vfmadd213sd{z}", X86Fmadd, FR64X, + f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VFMSUBSSZ : avx512_fma3s_rm<0xAB, "vfmsub213ss{z}", X86Fmsub, FR32X, + f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; +defm VFMSUBSDZ : avx512_fma3s_rm<0xAB, "vfmsub213sd{z}", X86Fmsub, FR64X, + f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VFNMADDSSZ : avx512_fma3s_rm<0xAD, "vfnmadd213ss{z}", X86Fnmadd, FR32X, + f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; +defm VFNMADDSDZ : avx512_fma3s_rm<0xAD, "vfnmadd213sd{z}", X86Fnmadd, FR64X, + f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VFNMSUBSSZ : avx512_fma3s_rm<0xAF, "vfnmsub213ss{z}", X86Fnmsub, FR32X, + f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; +defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd{z}", X86Fnmsub, FR64X, + f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; + +//===----------------------------------------------------------------------===// +// AVX-512 Scalar convert from sign integer to float/double +//===----------------------------------------------------------------------===// + +multiclass avx512_vcvtsi opc, RegisterClass SrcRC, RegisterClass DstRC, + X86MemOperand x86memop, string asm> { +let neverHasSideEffects = 1 in { + def rr : SI, EVEX_4V; + let mayLoad = 1 in + def rm : SI, EVEX_4V; +} // neverHasSideEffects = 1 +} + +defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}{z}">, + XS, VEX_LIG, EVEX_CD8<32, CD8VT1>; +defm VCVTSI2SS64Z : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}{z}">, + XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; +defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}{z}">, + XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; +defm VCVTSI2SD64Z : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}{z}">, + XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; + +def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), + (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; +def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), + (VCVTSI2SS64Zrm (f32 (IMPLICIT_DEF)), addr:$src)>; +def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))), + (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; +def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))), + (VCVTSI2SD64Zrm (f64 (IMPLICIT_DEF)), addr:$src)>; + +def : Pat<(f32 (sint_to_fp GR32:$src)), + (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; +def : Pat<(f32 (sint_to_fp GR64:$src)), + (VCVTSI2SS64Zrr (f32 (IMPLICIT_DEF)), GR64:$src)>; +def : Pat<(f64 (sint_to_fp GR32:$src)), + (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; +def : Pat<(f64 (sint_to_fp GR64:$src)), + (VCVTSI2SD64Zrr (f64 (IMPLICIT_DEF)), GR64:$src)>; + + +//===----------------------------------------------------------------------===// +// AVX-512 Convert form float to double and back +//===----------------------------------------------------------------------===// +let neverHasSideEffects = 1 in { +def VCVTSS2SDZrr : AVX512XSI<0x5A, MRMSrcReg, (outs FR64X:$dst), + (ins FR32X:$src1, FR32X:$src2), + "vcvtss2sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>; +let mayLoad = 1 in +def VCVTSS2SDZrm : AVX512XSI<0x5A, MRMSrcMem, (outs FR64X:$dst), + (ins FR32X:$src1, f32mem:$src2), + "vcvtss2sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>, + EVEX_CD8<32, CD8VT1>; + +// Convert scalar double to scalar single +def VCVTSD2SSZrr : AVX512XDI<0x5A, MRMSrcReg, (outs FR32X:$dst), + (ins FR64X:$src1, FR64X:$src2), + "vcvtsd2ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2F]>; +let mayLoad = 1 in +def VCVTSD2SSZrm : AVX512XDI<0x5A, MRMSrcMem, (outs FR32X:$dst), + (ins FR64X:$src1, f64mem:$src2), + "vcvtsd2ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX_4V, VEX_LIG, VEX_W, + Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<64, CD8VT1>; +} + +def : Pat<(f64 (fextend FR32X:$src)), (VCVTSS2SDZrr FR32X:$src, FR32X:$src)>, + Requires<[HasAVX512]>; +def : Pat<(fextend (loadf32 addr:$src)), + (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512]>; + +def : Pat<(extloadf32 addr:$src), + (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, + Requires<[HasAVX512, OptForSize]>; + +def : Pat<(extloadf32 addr:$src), + (VCVTSS2SDZrr (f32 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>, + Requires<[HasAVX512, OptForSpeed]>; + +def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>, + Requires<[HasAVX512]>; + +multiclass avx512_vcvt_fp opc, string asm, RegisterClass SrcRC, + RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag, + X86MemOperand x86memop, ValueType OpVT, ValueType InVT, + Domain d> { +let neverHasSideEffects = 1 in { + def rr : AVX512PI, EVEX; + let mayLoad = 1 in + def rm : AVX512PI, EVEX; +} // neverHasSideEffects = 1 +} + +defm VCVTPD2PSZ : avx512_vcvt_fp<0x5A, "vcvtpd2ps", VR512, VR256X, fround, + memopv8f64, f512mem, v8f32, v8f64, + SSEPackedSingle>, EVEX_V512, VEX_W, OpSize, + EVEX_CD8<64, CD8VF>; + +defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend, + memopv4f64, f256mem, v8f64, v8f32, + SSEPackedDouble>, EVEX_V512, EVEX_CD8<32, CD8VH>; +def : Pat<(v8f64 (extloadv8f32 addr:$src)), + (VCVTPS2PDZrm addr:$src)>; + +//===----------------------------------------------------------------------===// +// AVX-512 Vector convert from sign integer to float/double +//===----------------------------------------------------------------------===// + +defm VCVTDQ2PSZ : avx512_vcvt_fp<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp, + memopv8i64, i512mem, v16f32, v16i32, + SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>; + +defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp, + memopv4i64, i256mem, v8f64, v8i32, + SSEPackedDouble>, EVEX_V512, XS, + EVEX_CD8<32, CD8VH>; + +defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint, + memopv16f32, f512mem, v16i32, v16f32, + SSEPackedSingle>, EVEX_V512, XS, + EVEX_CD8<32, CD8VF>; + +defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint, + memopv8f64, f512mem, v8i32, v8f64, + SSEPackedDouble>, EVEX_V512, OpSize, VEX_W, + EVEX_CD8<64, CD8VF>; + +defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint, + memopv16f32, f512mem, v16i32, v16f32, + SSEPackedSingle>, EVEX_V512, + EVEX_CD8<32, CD8VF>; + +defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint, + memopv8f64, f512mem, v8i32, v8f64, + SSEPackedDouble>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + +defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp, + memopv4i64, f256mem, v8f64, v8i32, + SSEPackedDouble>, EVEX_V512, XS, + EVEX_CD8<32, CD8VH>; + +defm VCVTUDQ2PSZ : avx512_vcvt_fp<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp, + memopv16i32, f512mem, v16f32, v16i32, + SSEPackedSingle>, EVEX_V512, XD, + EVEX_CD8<32, CD8VF>; + +def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))), + (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr + (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; + + +def : Pat<(int_x86_avx512_cvtdq2_ps_512 VR512:$src), + (VCVTDQ2PSZrr VR512:$src)>; +def : Pat<(int_x86_avx512_cvtdq2_ps_512 (bitconvert (memopv8i64 addr:$src))), + (VCVTDQ2PSZrm addr:$src)>; + +def VCVTPS2DQZrr : AVX512BI<0x5B, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src), + "vcvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR512:$dst, + (int_x86_avx512_cvt_ps2dq_512 VR512:$src))], + IIC_SSE_CVT_PS_RR>, EVEX, EVEX_V512; +def VCVTPS2DQZrm : AVX512BI<0x5B, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src), + "vcvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR512:$dst, + (int_x86_avx512_cvt_ps2dq_512 (memopv16f32 addr:$src)))], + IIC_SSE_CVT_PS_RM>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; + + +let Predicates = [HasAVX512] in { + def : Pat<(v8f32 (fround (loadv8f64 addr:$src))), + (VCVTPD2PSZrm addr:$src)>; + def : Pat<(v8f64 (extloadv8f32 addr:$src)), + (VCVTPS2PDZrm addr:$src)>; +} + +let Defs = [EFLAGS], Predicates = [HasAVX512] in { + defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32, + "ucomiss{z}">, TB, EVEX, VEX_LIG, + EVEX_CD8<32, CD8VT1>; + defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64, + "ucomisd{z}">, TB, OpSize, EVEX, + VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; + let Pattern = [] in { + defm VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, undef, v4f32, f128mem, load, + "comiss{z}">, TB, EVEX, VEX_LIG, + EVEX_CD8<32, CD8VT1>; + defm VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, undef, v2f64, f128mem, load, + "comisd{z}">, TB, OpSize, EVEX, + VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; + } + defm Int_VUCOMISSZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem, + load, "ucomiss">, TB, EVEX, VEX_LIG, + EVEX_CD8<32, CD8VT1>; + defm Int_VUCOMISDZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem, + load, "ucomisd">, TB, OpSize, EVEX, + VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; + + defm Int_VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem, + load, "comiss">, TB, EVEX, VEX_LIG, + EVEX_CD8<32, CD8VT1>; + defm Int_VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem, + load, "comisd">, TB, OpSize, EVEX, + VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; +} + +/// avx512_unop_p - AVX-512 unops in packed form. +multiclass avx512_fp_unop_p opc, string OpcodeStr, SDNode OpNode> { + def PSZr : AVX5128I, + EVEX, EVEX_V512; + def PSZm : AVX5128I, + EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; + def PDZr : AVX5128I, + EVEX, EVEX_V512, VEX_W; + def PDZm : AVX5128I, + EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +} + +/// avx512_fp_unop_p_int - AVX-512 intrinsics unops in packed forms. +multiclass avx512_fp_unop_p_int opc, string OpcodeStr, + Intrinsic V16F32Int, Intrinsic V8F64Int> { + def PSZr_Int : AVX5128I, + EVEX, EVEX_V512; + def PSZm_Int : AVX5128I, EVEX, + EVEX_V512, EVEX_CD8<32, CD8VF>; + def PDZr_Int : AVX5128I, + EVEX, EVEX_V512, VEX_W; + def PDZm_Int : AVX5128I, + EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +} + +/// avx512_fp_unop_s - AVX-512 unops in scalar form. +multiclass avx512_fp_unop_s opc, string OpcodeStr, + Intrinsic F32Int, Intrinsic F64Int> { + let hasSideEffects = 0 in { + def SSZr : AVX5128I, EVEX_4V; + let mayLoad = 1 in { + def SSZm : AVX5128I, EVEX_4V, EVEX_CD8<32, CD8VT1>; + def SSZm_Int : AVX5128I, + EVEX_4V, EVEX_CD8<32, CD8VT1>; + } + def SDZr : AVX5128I, + EVEX_4V, VEX_W; + let mayLoad = 1 in { + def SDZm : AVX5128I, + EVEX_4V, VEX_W, EVEX_CD8<32, CD8VT1>; + def SDZm_Int : AVX5128I, + EVEX_4V, VEX_W, EVEX_CD8<32, CD8VT1>; + } +} +} + +defm VRCP14 : avx512_fp_unop_s<0x4D, "vrcp14", int_x86_avx512_rcp14_ss, + int_x86_avx512_rcp14_sd>, + avx512_fp_unop_p<0x4C, "vrcp14", X86frcp>, + avx512_fp_unop_p_int<0x4C, "vrcp14", + int_x86_avx512_rcp14_ps_512, int_x86_avx512_rcp14_pd_512>; + +defm VRSQRT14 : avx512_fp_unop_s<0x4F, "vrsqrt14", int_x86_avx512_rsqrt14_ss, + int_x86_avx512_rsqrt14_sd>, + avx512_fp_unop_p<0x4E, "vrsqrt14", X86frsqrt>, + avx512_fp_unop_p_int<0x4E, "vrsqrt14", + int_x86_avx512_rsqrt14_ps_512, int_x86_avx512_rsqrt14_pd_512>; + +multiclass avx512_sqrt_packed opc, string OpcodeStr, SDNode OpNode, + Intrinsic V16F32Int, Intrinsic V8F64Int, + OpndItins itins_s, OpndItins itins_d> { + def PSZrr :AVX512PSI, + EVEX, EVEX_V512; + + let mayLoad = 1 in + def PSZrm : AVX512PSI, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; + + def PDZrr : AVX512PDI, + EVEX, EVEX_V512; + + let mayLoad = 1 in + def PDZrm : AVX512PDI, EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>; + + def PSZr_Int : AVX512PSI, + EVEX, EVEX_V512; + def PSZm_Int : AVX512PSI, EVEX, + EVEX_V512, EVEX_CD8<32, CD8VF>; + def PDZr_Int : AVX512PDI, + EVEX, EVEX_V512, VEX_W; + def PDZm_Int : AVX512PDI, + EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +} + +multiclass avx512_sqrt_scalar opc, string OpcodeStr, + Intrinsic F32Int, Intrinsic F64Int, + OpndItins itins_s, OpndItins itins_d> { + def SSZr : SI, XS, EVEX_4V; + def SSZr_Int : SIi8, XS, EVEX_4V; + let mayLoad = 1 in { + def SSZm : SI, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>; + def SSZm_Int : SIi8, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>; + } + def SDZr : SI, + XD, EVEX_4V, VEX_W; + def SDZr_Int : SIi8, XD, EVEX_4V, VEX_W; + let mayLoad = 1 in { + def SDZm : SI, + XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>; + def SDZm_Int : SIi8, + XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>; + } +} + + +defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt", + int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd, + SSE_SQRTSS, SSE_SQRTSD>, + avx512_sqrt_packed<0x51, "vsqrt", fsqrt, + int_x86_avx512_sqrt_ps_512, int_x86_avx512_sqrt_pd_512, + SSE_SQRTPS, SSE_SQRTPD>; + +def : Pat<(f32 (fsqrt FR32X:$src)), + (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>; +def : Pat<(f32 (fsqrt (load addr:$src))), + (VSQRTSSZm (f32 (IMPLICIT_DEF)), addr:$src)>, + Requires<[OptForSize]>; +def : Pat<(f64 (fsqrt FR64X:$src)), + (VSQRTSDZr (f64 (IMPLICIT_DEF)), FR64X:$src)>; +def : Pat<(f64 (fsqrt (load addr:$src))), + (VSQRTSDZm (f64 (IMPLICIT_DEF)), addr:$src)>, + Requires<[OptForSize]>; + +def : Pat<(f32 (X86frsqrt FR32X:$src)), + (VRSQRT14SSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>; +def : Pat<(f32 (X86frsqrt (load addr:$src))), + (VRSQRT14SSZm (f32 (IMPLICIT_DEF)), addr:$src)>, + Requires<[OptForSize]>; + +def : Pat<(f32 (X86frcp FR32X:$src)), + (VRCP14SSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>; +def : Pat<(f32 (X86frcp (load addr:$src))), + (VRCP14SSZm (f32 (IMPLICIT_DEF)), addr:$src)>, + Requires<[OptForSize]>; + +multiclass avx512_fp_unop_rm opcps, bits<8> opcpd, string OpcodeStr, + X86MemOperand x86memop, RegisterClass RC, + PatFrag mem_frag32, PatFrag mem_frag64, + Intrinsic V4F32Int, Intrinsic V2F64Int, + CD8VForm VForm> { +let ExeDomain = SSEPackedSingle in { + // Intrinsic operation, reg. + // Vector intrinsic operation, reg + def PSr : AVX512AIi8; + + // Vector intrinsic operation, mem + def PSm : AVX512AIi8, + EVEX_CD8<32, VForm>; +} // ExeDomain = SSEPackedSingle + +let ExeDomain = SSEPackedDouble in { + // Vector intrinsic operation, reg + def PDr : AVX512AIi8; + + // Vector intrinsic operation, mem + def PDm : AVX512AIi8, + EVEX_CD8<64, VForm>; +} // ExeDomain = SSEPackedDouble +} + +multiclass avx512_fp_binop_rm opcss, bits<8> opcsd, + string OpcodeStr, + Intrinsic F32Int, + Intrinsic F64Int> { +let ExeDomain = GenericDomain in { + // Operation, reg. + let hasSideEffects = 0 in + def SSr : AVX512AIi8; + + // Intrinsic operation, reg. + def SSr_Int : AVX512AIi8; + + // Intrinsic operation, mem. + def SSm : AVX512AIi8, + EVEX_CD8<32, CD8VT1>; + + // Operation, reg. + let hasSideEffects = 0 in + def SDr : AVX512AIi8, VEX_W; + + // Intrinsic operation, reg. + def SDr_Int : AVX512AIi8, + VEX_W; + + // Intrinsic operation, mem. + def SDm : AVX512AIi8, + VEX_W, EVEX_CD8<64, CD8VT1>; +} // ExeDomain = GenericDomain +} + +let Predicates = [HasAVX512] in { + defm VRNDSCALE : avx512_fp_binop_rm<0x0A, 0x0B, "vrndscale", + int_x86_avx512_rndscale_ss, + int_x86_avx512_rndscale_sd>, EVEX_4V; + + defm VRNDSCALEZ : avx512_fp_unop_rm<0x08, 0x09, "vrndscale", f256mem, VR512, + memopv16f32, memopv8f64, + int_x86_avx512_rndscale_ps_512, + int_x86_avx512_rndscale_pd_512, CD8VF>, + EVEX, EVEX_V512; +} + +def : Pat<(ffloor FR32X:$src), + (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x1))>; +def : Pat<(f64 (ffloor FR64X:$src)), + (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x1))>; +def : Pat<(f32 (fnearbyint FR32X:$src)), + (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0xC))>; +def : Pat<(f64 (fnearbyint FR64X:$src)), + (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0xC))>; +def : Pat<(f32 (fceil FR32X:$src)), + (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x2))>; +def : Pat<(f64 (fceil FR64X:$src)), + (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x2))>; +def : Pat<(f32 (frint FR32X:$src)), + (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x4))>; +def : Pat<(f64 (frint FR64X:$src)), + (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x4))>; +def : Pat<(f32 (ftrunc FR32X:$src)), + (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x3))>; +def : Pat<(f64 (ftrunc FR64X:$src)), + (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x3))>; + +def : Pat<(v16f32 (ffloor VR512:$src)), + (VRNDSCALEZPSr VR512:$src, (i32 0x1))>; +def : Pat<(v16f32 (fnearbyint VR512:$src)), + (VRNDSCALEZPSr VR512:$src, (i32 0xC))>; +def : Pat<(v16f32 (fceil VR512:$src)), + (VRNDSCALEZPSr VR512:$src, (i32 0x2))>; +def : Pat<(v16f32 (frint VR512:$src)), + (VRNDSCALEZPSr VR512:$src, (i32 0x4))>; +def : Pat<(v16f32 (ftrunc VR512:$src)), + (VRNDSCALEZPSr VR512:$src, (i32 0x3))>; + +def : Pat<(v8f64 (ffloor VR512:$src)), + (VRNDSCALEZPDr VR512:$src, (i32 0x1))>; +def : Pat<(v8f64 (fnearbyint VR512:$src)), + (VRNDSCALEZPDr VR512:$src, (i32 0xC))>; +def : Pat<(v8f64 (fceil VR512:$src)), + (VRNDSCALEZPDr VR512:$src, (i32 0x2))>; +def : Pat<(v8f64 (frint VR512:$src)), + (VRNDSCALEZPDr VR512:$src, (i32 0x4))>; +def : Pat<(v8f64 (ftrunc VR512:$src)), + (VRNDSCALEZPDr VR512:$src, (i32 0x3))>; + +//------------------------------------------------- +// Integer truncate and extend operations +//------------------------------------------------- + +multiclass avx512_trunc_sat opc, string OpcodeStr, + RegisterClass dstRC, RegisterClass srcRC, + RegisterClass KRC, X86MemOperand x86memop> { + def rr : AVX512XS8I, EVEX; + + def krr : AVX512XS8I, EVEX, EVEX_KZ; + + def mr : AVX512XS8I, EVEX; +} +defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM, + i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; +defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM, + i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; +defm VPMOVUSQB : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM, + i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; +defm VPMOVQW : avx512_trunc_sat<0x34, "vpmovqw", VR128X, VR512, VK8WM, + i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; +defm VPMOVSQW : avx512_trunc_sat<0x24, "vpmovsqw", VR128X, VR512, VK8WM, + i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; +defm VPMOVUSQW : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM, + i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; +defm VPMOVQD : avx512_trunc_sat<0x35, "vpmovqd", VR256X, VR512, VK8WM, + i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; +defm VPMOVSQD : avx512_trunc_sat<0x25, "vpmovsqd", VR256X, VR512, VK8WM, + i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; +defm VPMOVUSQD : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM, + i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; +defm VPMOVDW : avx512_trunc_sat<0x33, "vpmovdw", VR256X, VR512, VK16WM, + i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; +defm VPMOVSDW : avx512_trunc_sat<0x23, "vpmovsdw", VR256X, VR512, VK16WM, + i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; +defm VPMOVUSDW : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM, + i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; +defm VPMOVDB : avx512_trunc_sat<0x31, "vpmovdb", VR128X, VR512, VK16WM, + i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; +defm VPMOVSDB : avx512_trunc_sat<0x21, "vpmovsdb", VR128X, VR512, VK16WM, + i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; +defm VPMOVUSDB : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM, + i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; + +def : Pat<(v16i8 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQBrr VR512:$src)>; +def : Pat<(v8i16 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQWrr VR512:$src)>; +def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr VR512:$src)>; +def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>; +def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>; + +def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))), + (VPMOVDBkrr VK16WM:$mask, VR512:$src)>; +def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))), + (VPMOVDWkrr VK16WM:$mask, VR512:$src)>; +def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))), + (VPMOVQWkrr VK8WM:$mask, VR512:$src)>; +def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))), + (VPMOVQDkrr VK8WM:$mask, VR512:$src)>; + + +multiclass avx512_extend opc, string OpcodeStr, RegisterClass DstRC, + RegisterClass SrcRC, SDNode OpNode, PatFrag mem_frag, + X86MemOperand x86memop, ValueType OpVT, ValueType InVT> { + + def rr : AVX5128I, EVEX; + def rm : AVX5128I, + EVEX; +} + +defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VR512, VR128X, X86vzext, + memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512, + EVEX_CD8<8, CD8VQ>; +defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VR512, VR128X, X86vzext, + memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512, + EVEX_CD8<8, CD8VO>; +defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VR512, VR256X, X86vzext, + memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512, + EVEX_CD8<16, CD8VH>; +defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VR512, VR128X, X86vzext, + memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512, + EVEX_CD8<16, CD8VQ>; +defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VR512, VR256X, X86vzext, + memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512, + EVEX_CD8<32, CD8VH>; + +defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VR512, VR128X, X86vsext, + memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512, + EVEX_CD8<8, CD8VQ>; +defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VR512, VR128X, X86vsext, + memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512, + EVEX_CD8<8, CD8VO>; +defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VR512, VR256X, X86vsext, + memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512, + EVEX_CD8<16, CD8VH>; +defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VR512, VR128X, X86vsext, + memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512, + EVEX_CD8<16, CD8VQ>; +defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VR512, VR256X, X86vsext, + memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512, + EVEX_CD8<32, CD8VH>; + +//===----------------------------------------------------------------------===// +// GATHER - SCATTER Operations + +multiclass avx512_gather opc, string OpcodeStr, RegisterClass KRC, + RegisterClass RC, X86MemOperand memop> { +let mayLoad = 1, + Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in + def rm : AVX5128I, EVEX, EVEX_K; +} +defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", VK8WM, VR512, vy64xmem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", VK16WM, VR512, vz32mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", VK8WM, VR512, vz64mem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512, vy64xmem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", VK8WM, VR512, vz64mem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", VK8WM, VR256X, vz64mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +multiclass avx512_scatter opc, string OpcodeStr, RegisterClass KRC, + RegisterClass RC, X86MemOperand memop> { +let mayStore = 1, Constraints = "$mask = $mask_wb" in + def mr : AVX5128I, EVEX, EVEX_K; +} + +defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", VK8WM, VR512, vy64xmem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", VK16WM, VR512, vz32mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", VK8WM, VR512, vz64mem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", VK8WM, VR256X, vz64mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", VK8WM, VR512, vy64xmem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", VK16WM, VR512, vz32mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", VK8WM, VR512, vz64mem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", VK8WM, VR256X, vz64mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +//===----------------------------------------------------------------------===// +// VSHUFPS - VSHUFPD Operations + +multiclass avx512_shufp { + def rmi : AVX512PIi8<0xC6, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, x86memop:$src2, i8imm:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2), + (i8 imm:$src3))))], d, IIC_SSE_SHUFP>, + EVEX_4V, TB, Sched<[WriteShuffleLd, ReadAfterLd]>; + def rri : AVX512PIi8<0xC6, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2, i8imm:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, + (i8 imm:$src3))))], d, IIC_SSE_SHUFP>, + EVEX_4V, TB, Sched<[WriteShuffle]>; +} + +defm VSHUFPSZ : avx512_shufp, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VSHUFPDZ : avx512_shufp, OpSize, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; + + +multiclass avx512_alignr { + def rri : AVX512AIi8<0x03, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2, i8imm:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, EVEX_4V; + def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, x86memop:$src2, i8imm:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, EVEX_4V; +} +defm VALIGND : avx512_alignr<"valignd", VR512, i512mem>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VALIGNQ : avx512_alignr<"valignq", VR512, i512mem>, + VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; + +def : Pat<(v16f32 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))), + (VALIGNDrri VR512:$src2, VR512:$src1, imm:$imm)>; +def : Pat<(v8f64 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))), + (VALIGNQrri VR512:$src2, VR512:$src1, imm:$imm)>; +def : Pat<(v16i32 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))), + (VALIGNDrri VR512:$src2, VR512:$src1, imm:$imm)>; +def : Pat<(v8i64 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))), + (VALIGNQrri VR512:$src2, VR512:$src1, imm:$imm)>; + +multiclass avx512_vpabs opc, string OpcodeStr, RegisterClass RC, + X86MemOperand x86memop> { + def rr : AVX5128I, + EVEX; + def rm : AVX5128I, + EVEX; +} + +defm VPABSD : avx512_vpabs<0x1E, "vpabsd", VR512, i512mem>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPABSQ : avx512_vpabs<0x1F, "vpabsq", VR512, i512mem>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index f504480..c86b512 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -221,6 +221,8 @@ def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>; def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>; def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>; +def X86Vinsert : SDNode<"X86ISD::VINSERT", SDTypeProfile<1, 3, + [SDTCisSameAs<0, 1>, SDTCisPtrTy<3>]>, []>; def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>; def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index fd525f6..f36c042 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -4444,6 +4444,18 @@ def MOVPDI2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; +def : Pat<(v8i32 (X86Vinsert (v8i32 immAllZerosV), GR32:$src2, (iPTR 0))), + (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src2), sub_xmm)>; + +def : Pat<(v4i64 (X86Vinsert (bc_v4i64 (v8i32 immAllZerosV)), GR64:$src2, (iPTR 0))), + (SUBREG_TO_REG (i32 0), (VMOV64toPQIrr GR64:$src2), sub_xmm)>; + +def : Pat<(v8i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))), + (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src2), sub_xmm)>; + +def : Pat<(v4i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))), + (SUBREG_TO_REG (i32 0), (VMOV64toPQIrr GR64:$src2), sub_xmm)>; + //===---------------------------------------------------------------------===// // Move Packed Doubleword Int first element to Doubleword Int // -- cgit v1.1 From 715d98d657491b3fb8ea0e14643e9801b2f9628c Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Thu, 12 Sep 2013 10:28:05 +0000 Subject: Add an instruction deprecation feature to TableGen. The 'Deprecated' class allows you to specify a SubtargetFeature that the instruction is deprecated on. The 'ComplexDeprecationPredicate' class allows you to define a custom predicate that is called to check for deprecation. For example: ComplexDeprecationPredicate<"MCR"> would mean you would have to define the following function: bool getMCRDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, std::string &Info) Which returns 'false' for not deprecated, and 'true' for deprecated and store the warning message in 'Info'. The MCTargetAsmParser constructor was chaned to take an extra argument of the MCInstrInfo class, so out-of-tree targets will need to be changed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190598 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 5 ++-- lib/Target/ARM/ARMInstrInfo.td | 5 ++-- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 35 +++++------------------ lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp | 18 +++++++++++- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 10 +++++-- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 5 ++-- lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp | 5 ++-- lib/Target/X86/AsmParser/X86AsmParser.cpp | 5 ++-- 8 files changed, 47 insertions(+), 41 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 68d4be4..f7e9c6f 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -54,8 +54,9 @@ public: #include "AArch64GenAsmMatcher.inc" }; - AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser) - : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { + AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser, + const MCInstrInfo &MII) + : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { MCAsmParserExtension::Initialize(_Parser); // Initialize the set of available features. diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index c9bad1c..b488f26 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -1840,7 +1840,7 @@ defm PLDW : APreLoad<0, 1, "pldw">, Requires<[IsARM,HasV7,HasMP]>; defm PLI : APreLoad<1, 0, "pli">, Requires<[IsARM,HasV7]>; def SETEND : AXI<(outs), (ins setend_op:$end), MiscFrm, NoItinerary, - "setend\t$end", []>, Requires<[IsARM]> { + "setend\t$end", []>, Requires<[IsARM]>, Deprecated { bits<1> end; let Inst{31-10} = 0b1111000100000001000000; let Inst{9} = end; @@ -4772,7 +4772,8 @@ def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */, (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, - imm:$CRm, imm:$opc2)]>; + imm:$CRm, imm:$opc2)]>, + ComplexDeprecationPredicate<"MCR">; def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm", (MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, 0, pred:$p)>; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index cfa24f9..032e744 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -24,6 +24,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" @@ -47,6 +48,7 @@ enum VectorLaneTy { NoLanes, AllLanes, IndexedLane }; class ARMAsmParser : public MCTargetAsmParser { MCSubtargetInfo &STI; MCAsmParser &Parser; + const MCInstrInfo &MII; const MCRegisterInfo *MRI; // Unwind directives state @@ -232,8 +234,6 @@ class ARMAsmParser : public MCTargetAsmParser { SmallVectorImpl &Operands); bool shouldOmitPredicateOperand(StringRef Mnemonic, SmallVectorImpl &Operands); - bool isDeprecated(MCInst &Inst, StringRef &Info); - public: enum ARMMatchResultTy { Match_RequiresITBlock = FIRST_TARGET_MATCH_RESULT_TY, @@ -245,8 +245,9 @@ public: }; - ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser) - : MCTargetAsmParser(), STI(_STI), Parser(_Parser), FPReg(-1) { + ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser, + const MCInstrInfo &MII) + : MCTargetAsmParser(), STI(_STI), Parser(_Parser), MII(MII), FPReg(-1) { MCAsmParserExtension::Initialize(_Parser); // Cache the MCRegisterInfo. @@ -4972,14 +4973,6 @@ bool ARMAsmParser::shouldOmitPredicateOperand( return false; } -bool ARMAsmParser::isDeprecated(MCInst &Inst, StringRef &Info) { - if (hasV8Ops() && Inst.getOpcode() == ARM::SETEND) { - Info = "armv8"; - return true; - } - return false; -} - static bool isDataTypeToken(StringRef Tok) { return Tok == ".8" || Tok == ".16" || Tok == ".32" || Tok == ".64" || Tok == ".i8" || Tok == ".i16" || Tok == ".i32" || Tok == ".i64" || @@ -5296,16 +5289,6 @@ static bool listContainsReg(MCInst &Inst, unsigned OpNo, unsigned Reg) { return false; } -// FIXME: We would really prefer to have MCInstrInfo (the wrapper around -// the ARMInsts array) instead. Getting that here requires awkward -// API changes, though. Better way? -namespace llvm { -extern const MCInstrDesc ARMInsts[]; -} -static const MCInstrDesc &getInstDesc(unsigned Opcode) { - return ARMInsts[Opcode]; -} - // Return true if instruction has the interesting property of being // allowed in IT blocks, but not being predicable. static bool instIsBreakpoint(const MCInst &Inst) { @@ -5320,7 +5303,7 @@ static bool instIsBreakpoint(const MCInst &Inst) { bool ARMAsmParser:: validateInstruction(MCInst &Inst, const SmallVectorImpl &Operands) { - const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode()); + const MCInstrDesc &MCID = MII.get(Inst.getOpcode()); SMLoc Loc = Operands[0]->getStartLoc(); // Check the IT block state first. @@ -5513,10 +5496,6 @@ validateInstruction(MCInst &Inst, } } - StringRef DepInfo; - if (isDeprecated(Inst, DepInfo)) - Warning(Loc, "deprecated on " + DepInfo); - return false; } @@ -7553,7 +7532,7 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) { // 16-bit thumb arithmetic instructions either require or preclude the 'S' // suffix depending on whether they're in an IT block or not. unsigned Opc = Inst.getOpcode(); - const MCInstrDesc &MCID = getInstDesc(Opc); + const MCInstrDesc &MCID = MII.get(Opc); if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) { assert(MCID.hasOptionalDef() && "optionally flag setting instruction missing optional def operand"); diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index caa1949..ea5d7ba 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -26,16 +26,32 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" +using namespace llvm; + #define GET_REGINFO_MC_DESC #include "ARMGenRegisterInfo.inc" +static bool getMCRDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, + std::string &Info) { + // Checks for the deprecated CP15ISB encoding: + // mcr pX, #0, rX, c7, c5, #4 + if (STI.getFeatureBits() & llvm::ARM::HasV8Ops && + (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) && + (MI.getOperand(3).isImm() && MI.getOperand(3).getImm() == 7) && + (MI.getOperand(4).isImm() && MI.getOperand(4).getImm() == 5) && + (MI.getOperand(5).isImm() && MI.getOperand(5).getImm() == 4)) { + Info = "deprecated on armv8"; + return true; + } + return false; +} + #define GET_INSTRINFO_MC_DESC #include "ARMGenInstrInfo.inc" #define GET_SUBTARGETINFO_MC_DESC #include "ARMGenSubtargetInfo.inc" -using namespace llvm; std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { Triple triple(TT); diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index e547de5..c4ce4ff 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -24,6 +24,10 @@ using namespace llvm; +namespace llvm { +class MCInstrInfo; +} + namespace { class MipsAssemblerOptions { public: @@ -201,8 +205,10 @@ class MipsAsmParser : public MCTargetAsmParser { bool processInstruction(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl &Instructions); public: - MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) - : MCTargetAsmParser(), STI(sti), Parser(parser), hasConsumedDollar(false) { + MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser, + const MCInstrInfo &MII) + : MCTargetAsmParser(), STI(sti), Parser(parser), + hasConsumedDollar(false) { // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); } diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index bfa9bb7..4827c8f 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -218,8 +218,9 @@ class PPCAsmParser : public MCTargetAsmParser { public: - PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser) - : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { + PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser, + const MCInstrInfo &MII) + : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { // Check for 64-bit vs. 32-bit pointer mode. Triple TheTriple(STI.getTargetTriple()); IsPPC64 = (TheTriple.getArch() == Triple::ppc64 || diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index 58af2c4..3551b2d 100644 --- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -327,8 +327,9 @@ private: StringRef Mnemonic); public: - SystemZAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) - : MCTargetAsmParser(), STI(sti), Parser(parser) { + SystemZAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser, + const MCInstrInfo &MII) + : MCTargetAsmParser(), STI(sti), Parser(parser) { MCAsmParserExtension::Initialize(Parser); // Initialize the set of available features. diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 3d56acf..93c2169 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -556,8 +556,9 @@ private: /// } public: - X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) - : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) { + X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser, + const MCInstrInfo &MII) + : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) { // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); -- cgit v1.1 From c0b12dfd0a83081c1ebbb55a89c7a2c1f98f1842 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Thu, 12 Sep 2013 14:40:06 +0000 Subject: Mark PPC MFTB and DST (and friends) as deprecated Use the new instruction deprecation feature to mark mftb (now replaced with mfspr) and dst (along with the other Altivec cache control instructions) as deprecated when targeting cores supporting at least ISA v2.03. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190605 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 6 +++-- lib/Target/PowerPC/PPC.td | 37 ++++++++++++++++++--------- lib/Target/PowerPC/PPCInstrAltivec.td | 30 ++++++++++++++-------- lib/Target/PowerPC/PPCInstrInfo.td | 2 +- lib/Target/PowerPC/PPCSubtarget.cpp | 2 ++ lib/Target/PowerPC/PPCSubtarget.h | 4 +++ 6 files changed, 56 insertions(+), 25 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 4827c8f..6896e7a 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -13,6 +13,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" @@ -174,6 +175,7 @@ struct PPCOperand; class PPCAsmParser : public MCTargetAsmParser { MCSubtargetInfo &STI; MCAsmParser &Parser; + const MCInstrInfo &MII; bool IsPPC64; MCAsmParser &getParser() const { return Parser; } @@ -219,8 +221,8 @@ class PPCAsmParser : public MCTargetAsmParser { public: PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser, - const MCInstrInfo &MII) - : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { + const MCInstrInfo &_MII) + : MCTargetAsmParser(), STI(_STI), Parser(_Parser), MII(_MII) { // Check for 64-bit vs. 32-bit pointer mode. Triple TheTriple(STI.getTargetTriple()); IsPPC64 = (TheTriple.getArch() == Triple::ppc64 || diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index 0d950ee..57ee264 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -88,6 +88,11 @@ def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true", def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true", "Enable QPX instructions">; +def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true", + "Treat mftb as deprecated">; +def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true", + "Treat vector data stream cache control instructions as deprecated">; + // Note: Future features to add when support is extended to more // recent ISA levels: // @@ -148,10 +153,10 @@ include "PPCInstrInfo.td" def : Processor<"generic", G3Itineraries, [Directive32]>; def : Processor<"440", PPC440Itineraries, [Directive440, FeatureISEL, FeatureFRES, FeatureFRSQRTE, - FeatureBookE]>; + FeatureBookE, DeprecatedMFTB]>; def : Processor<"450", PPC440Itineraries, [Directive440, FeatureISEL, FeatureFRES, FeatureFRSQRTE, - FeatureBookE]>; + FeatureBookE, DeprecatedMFTB]>; def : Processor<"601", G3Itineraries, [Directive601]>; def : Processor<"602", G3Itineraries, [Directive602]>; def : Processor<"603", G3Itineraries, [Directive603, @@ -187,13 +192,16 @@ def : ProcessorModel<"g5", G5Model, [Directive970, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, FeatureFRES, FeatureFRSQRTE, - Feature64Bit /*, Feature64BitRegs */]>; + Feature64Bit /*, Feature64BitRegs */, + DeprecatedMFTB, DeprecatedDST]>; def : ProcessorModel<"e500mc", PPCE500mcModel, [DirectiveE500mc, FeatureMFOCRF, - FeatureSTFIWX, FeatureBookE, FeatureISEL]>; + FeatureSTFIWX, FeatureBookE, FeatureISEL, + DeprecatedMFTB]>; def : ProcessorModel<"e5500", PPCE5500Model, [DirectiveE5500, FeatureMFOCRF, Feature64Bit, - FeatureSTFIWX, FeatureBookE, FeatureISEL]>; + FeatureSTFIWX, FeatureBookE, FeatureISEL, + DeprecatedMFTB]>; def : ProcessorModel<"a2", PPCA2Model, [DirectiveA2, FeatureBookE, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, @@ -201,7 +209,7 @@ def : ProcessorModel<"a2", PPCA2Model, FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, FeatureFPCVT, FeatureISEL, FeaturePOPCNTD, FeatureLDBRX, Feature64Bit - /*, Feature64BitRegs */]>; + /*, Feature64BitRegs */, DeprecatedMFTB]>; def : ProcessorModel<"a2q", PPCA2Model, [DirectiveA2, FeatureBookE, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, @@ -209,7 +217,7 @@ def : ProcessorModel<"a2q", PPCA2Model, FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, FeatureFPCVT, FeatureISEL, FeaturePOPCNTD, FeatureLDBRX, Feature64Bit - /*, Feature64BitRegs */, FeatureQPX]>; + /*, Feature64BitRegs */, FeatureQPX, DeprecatedMFTB]>; def : ProcessorModel<"pwr3", G5Model, [DirectivePwr3, FeatureAltivec, FeatureFRES, FeatureFRSQRTE, FeatureMFOCRF, @@ -222,24 +230,28 @@ def : ProcessorModel<"pwr5", G5Model, [DirectivePwr5, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, - FeatureSTFIWX, Feature64Bit]>; + FeatureSTFIWX, Feature64Bit, + DeprecatedMFTB, DeprecatedDST]>; def : ProcessorModel<"pwr5x", G5Model, [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, - FeatureSTFIWX, FeatureFPRND, Feature64Bit]>; + FeatureSTFIWX, FeatureFPRND, Feature64Bit, + DeprecatedMFTB, DeprecatedDST]>; def : ProcessorModel<"pwr6", G5Model, [DirectivePwr6, FeatureAltivec, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, - FeatureFPRND, Feature64Bit /*, Feature64BitRegs */]>; + FeatureFPRND, Feature64Bit /*, Feature64BitRegs */, + DeprecatedMFTB, DeprecatedDST]>; def : ProcessorModel<"pwr6x", G5Model, [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, - FeatureFPRND, Feature64Bit]>; + FeatureFPRND, Feature64Bit, + DeprecatedMFTB, DeprecatedDST]>; def : ProcessorModel<"pwr7", G5Model, [DirectivePwr7, FeatureAltivec, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, @@ -247,7 +259,8 @@ def : ProcessorModel<"pwr7", G5Model, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, FeatureFPCVT, FeatureISEL, FeaturePOPCNTD, FeatureLDBRX, - Feature64Bit /*, Feature64BitRegs */]>; + Feature64Bit /*, Feature64BitRegs */, + DeprecatedMFTB, DeprecatedDST]>; def : Processor<"ppc", G3Itineraries, [Directive32]>; def : ProcessorModel<"ppc64", G5Model, [Directive64, FeatureAltivec, diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index fdea51d..a55abe3 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -229,35 +229,45 @@ let Predicates = [HasAltivec] in { let isCodeGenOnly = 1 in { def DSS : DSS_Form<822, (outs), (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2), - "dss $STRM", LdStLoad /*FIXME*/, []>; + "dss $STRM", LdStLoad /*FIXME*/, []>, + Deprecated; def DSSALL : DSS_Form<822, (outs), (ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2), - "dssall", LdStLoad /*FIXME*/, []>; + "dssall", LdStLoad /*FIXME*/, []>, + Deprecated; def DST : DSS_Form<342, (outs), (ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB), - "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; + "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + Deprecated; def DSTT : DSS_Form<342, (outs), (ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB), - "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; + "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + Deprecated; def DSTST : DSS_Form<374, (outs), (ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB), - "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; + "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + Deprecated; def DSTSTT : DSS_Form<374, (outs), (ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB), - "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; + "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + Deprecated; def DST64 : DSS_Form<342, (outs), (ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB), - "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; + "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + Deprecated; def DSTT64 : DSS_Form<342, (outs), (ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB), - "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; + "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + Deprecated; def DSTST64 : DSS_Form<374, (outs), (ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB), - "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; + "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + Deprecated; def DSTSTT64 : DSS_Form<374, (outs), (ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB), - "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; + "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + Deprecated; } def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins), diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index e19be00..df61844 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -1867,7 +1867,7 @@ def MTSPR : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, gprc:$RT), "mtspr $SPR, $RT", SprMTSPR>; def MFTB : XFXForm_1<31, 371, (outs gprc:$RT), (ins i32imm:$SPR), - "mftb $RT, $SPR", SprMFTB>; + "mftb $RT, $SPR", SprMFTB>, Deprecated; let Uses = [CTR] in { def MFCTR : XFXForm_1_ext<31, 339, 9, (outs gprc:$rT), (ins), diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index fd3bc2f..7231ab1 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -90,6 +90,8 @@ void PPCSubtarget::initializeEnvironment() { HasPOPCNTD = false; HasLDBRX = false; IsBookE = false; + DeprecatedMFTB = false; + DeprecatedDST = false; HasLazyResolverStubs = false; IsJITCodeModel = false; } diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 179ceb5..a9fa98f 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -88,6 +88,8 @@ protected: bool HasPOPCNTD; bool HasLDBRX; bool IsBookE; + bool DeprecatedMFTB; + bool DeprecatedDST; bool HasLazyResolverStubs; bool IsJITCodeModel; bool IsLittleEndian; @@ -190,6 +192,8 @@ public: bool hasPOPCNTD() const { return HasPOPCNTD; } bool hasLDBRX() const { return HasLDBRX; } bool isBookE() const { return IsBookE; } + bool isDeprecatedMFTB() const { return DeprecatedMFTB; } + bool isDeprecatedDST() const { return DeprecatedDST; } const Triple &getTargetTriple() const { return TargetTriple; } -- cgit v1.1 From 1f1bd9a54d25d4e2c5da13c2cae7fa5e3d8acc9f Mon Sep 17 00:00:00 2001 From: Ben Langmuir Date: Thu, 12 Sep 2013 15:51:31 +0000 Subject: Partial support for Intel SHA Extensions (sha1rnds4) Add basic assembly/disassembly support for the first Intel SHA instruction 'sha1rnds4'. Also includes feature flag, and test cases. Support for the remaining instructions will follow in a separate patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190611 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86.td | 3 +++ lib/Target/X86/X86InstrInfo.td | 1 + lib/Target/X86/X86InstrSSE.td | 16 ++++++++++++++++ lib/Target/X86/X86Subtarget.cpp | 5 +++++ lib/Target/X86/X86Subtarget.h | 4 ++++ 5 files changed, 29 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index da989ad..a183d3a 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -137,6 +137,9 @@ def FeatureHLE : SubtargetFeature<"hle", "HasHLE", "true", "Support HLE">; def FeatureADX : SubtargetFeature<"adx", "HasADX", "true", "Support ADX instructions">; +def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true", + "Enable SHA instructions", + [FeatureSSE2]>; def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true", "Support PRFCHW instructions">; def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true", diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 3123cbc..961109f 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -675,6 +675,7 @@ def HasRTM : Predicate<"Subtarget->hasRTM()">; def HasHLE : Predicate<"Subtarget->hasHLE()">; def HasTSX : Predicate<"Subtarget->hasRTM() || Subtarget->hasHLE()">; def HasADX : Predicate<"Subtarget->hasADX()">; +def HasSHA : Predicate<"Subtarget->hasSHA()">; def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">; def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">; def HasPrefetchW : Predicate<"Subtarget->has3DNow() || Subtarget->hasPRFCHW()">; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index f36c042..83dd320 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7321,6 +7321,22 @@ let Constraints = "$src1 = $dst" in { } //===----------------------------------------------------------------------===// +// SHA-NI Instructions +//===----------------------------------------------------------------------===// + +let Constraints = "$src1 = $dst", hasSideEffects = 0, Predicates = [HasSHA] in { + def SHA1RNDS4rri : Ii8<0xCC, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", + []>, TA; + let mayLoad = 1 in + def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", + []>, TA; +} + +//===----------------------------------------------------------------------===// // AES-NI Instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index a887b81..0c8e2c5 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -375,6 +375,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { HasCDI = true; ToggleFeature(X86::FeatureCDI); } + if (IsIntel && ((EBX >> 29) & 0x1)) { + HasSHA = true; + ToggleFeature(X86::FeatureSHA); + } } } } @@ -497,6 +501,7 @@ void X86Subtarget::initializeEnvironment() { HasCDI = false; HasPFI = false; HasADX = false; + HasSHA = false; HasPRFCHW = false; HasRDSEED = false; IsBTMemSlow = false; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 67b88eb..28aae20 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -127,6 +127,9 @@ protected: /// HasADX - Processor has ADX instructions. bool HasADX; + /// HasSHA - Processor has SHA instructions. + bool HasSHA; + /// HasPRFCHW - Processor has PRFCHW instructions. bool HasPRFCHW; @@ -281,6 +284,7 @@ public: bool hasRTM() const { return HasRTM; } bool hasHLE() const { return HasHLE; } bool hasADX() const { return HasADX; } + bool hasSHA() const { return HasSHA; } bool hasPRFCHW() const { return HasPRFCHW; } bool hasRDSEED() const { return HasRDSEED; } bool isBTMemSlow() const { return IsBTMemSlow; } -- cgit v1.1 From ba7183bc5284a0e4254ad12b78e2ea61e291dd88 Mon Sep 17 00:00:00 2001 From: Roman Divacky Date: Thu, 12 Sep 2013 17:50:54 +0000 Subject: Implement asm support for a few PowerPC bookIII that are needed for assembling FreeBSD kernel. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190618 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 2 + lib/Target/PowerPC/PPCInstrFormats.td | 33 +++++++++++++ lib/Target/PowerPC/PPCInstrInfo.td | 69 +++++++++++++++++++++++++++ lib/Target/PowerPC/PPCSchedule.td | 8 ++++ 4 files changed, 112 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 6896e7a..c2ea616 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -1360,6 +1360,8 @@ unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp, switch (Kind) { case MCK_0: ImmVal = 0; break; case MCK_1: ImmVal = 1; break; + case MCK_2: ImmVal = 2; break; + case MCK_3: ImmVal = 3; break; default: return Match_InvalidOperand; } diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index 42adc02..29233d4 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -398,6 +398,13 @@ class XForm_1a opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, let RST = 0; } +class XForm_rs opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : XForm_base_r3xo { + let A = 0; + let B = 0; +} + class XForm_6 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> : XForm_base_r3xo_swapped { @@ -438,6 +445,17 @@ class XForm_16 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, let Inst{31} = 0; } +class XForm_mtmsr opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I { + bits<5> RS; + bits<1> L; + + let Inst{6-10} = RS; + let Inst{15} = L; + let Inst{21-30} = xo; +} + class XForm_16_ext opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin> : XForm_16 { @@ -534,6 +552,21 @@ class XForm_43 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, let Inst{31} = RC; } +class XForm_0 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : XForm_base_r3xo { + let RST = 0; + let A = 0; + let B = 0; +} + +class XForm_16b opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : XForm_base_r3xo { + let RST = 0; + let A = 0; +} + // DCB_Form - Form X instruction, used for dcb* instructions. class DCB_Form xo, bits<5> immfield, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index df61844..a9c916f 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -2319,6 +2319,35 @@ def EIEIO : XForm_24_eieio<31, 854, (outs), (ins), def WAIT : XForm_24_sync<31, 62, (outs), (ins i32imm:$L), "wait $L", LdStLoad, []>; +def MTMSR: XForm_mtmsr<31, 146, (outs), (ins gprc:$RS, i32imm:$L), + "mtmsr $RS, $L", SprMTMSR>; + +def MFMSR : XForm_rs<31, 83, (outs gprc:$RT), (ins), + "mfmsr $RT", SprMFMSR, []>; + +def MTMSRD : XForm_mtmsr<31, 178, (outs), (ins gprc:$RS, i32imm:$L), + "mtmsrd $RS, $L", SprMTMSRD>; + +def SLBIE : XForm_16b<31, 434, (outs), (ins gprc:$RB), + "slbie $RB", SprSLBIE, []>; + +def SLBMTE : XForm_26<31, 402, (outs), (ins gprc:$RS, gprc:$RB), + "slbmte $RS, $RB", SprSLBMTE, []>; + +def SLBMFEE : XForm_26<31, 915, (outs gprc:$RT), (ins gprc:$RB), + "slbmfee $RT, $RB", SprSLBMFEE, []>; + +def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", SprSLBIA, []>; + +def TLBSYNC : XForm_0<31, 566, (outs), (ins), + "tlbsync", SprTLBSYNC, []>; + +def TLBIEL : XForm_16b<31, 274, (outs), (ins gprc:$RB), + "tlbiel $RB", SprTLBIEL, []>; + +def TLBIE : XForm_26<31, 306, (outs), (ins gprc:$RS, gprc:$RB), + "tlbie $RB,$RS", SprTLBIE, []>; + //===----------------------------------------------------------------------===// // PowerPC Assembler Instruction Aliases // @@ -2387,6 +2416,46 @@ def : InstAlias<"sub. $rA, $rB, $rC", (SUBF8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>; def : InstAlias<"subc $rA, $rB, $rC", (SUBFC8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>; def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>; +def : InstAlias<"mtmsrd $RS", (MTMSRD gprc:$RS, 0)>; +def : InstAlias<"mtmsr $RS", (MTMSR gprc:$RS, 0)>; + +def : InstAlias<"mfsprg $RT, 0", (MFSPR gprc:$RT, 272)>; +def : InstAlias<"mfsprg $RT, 1", (MFSPR gprc:$RT, 273)>; +def : InstAlias<"mfsprg $RT, 2", (MFSPR gprc:$RT, 274)>; +def : InstAlias<"mfsprg $RT, 3", (MFSPR gprc:$RT, 275)>; + +def : InstAlias<"mfsprg0 $RT", (MFSPR gprc:$RT, 272)>; +def : InstAlias<"mfsprg1 $RT", (MFSPR gprc:$RT, 273)>; +def : InstAlias<"mfsprg2 $RT", (MFSPR gprc:$RT, 274)>; +def : InstAlias<"mfsprg3 $RT", (MFSPR gprc:$RT, 275)>; + +def : InstAlias<"mtsprg 0, $RT", (MTSPR 272, gprc:$RT)>; +def : InstAlias<"mtsprg 1, $RT", (MTSPR 273, gprc:$RT)>; +def : InstAlias<"mtsprg 2, $RT", (MTSPR 274, gprc:$RT)>; +def : InstAlias<"mtsprg 3, $RT", (MTSPR 275, gprc:$RT)>; + +def : InstAlias<"mtsprg0 $RT", (MTSPR 272, gprc:$RT)>; +def : InstAlias<"mtsprg1 $RT", (MTSPR 273, gprc:$RT)>; +def : InstAlias<"mtsprg2 $RT", (MTSPR 274, gprc:$RT)>; +def : InstAlias<"mtsprg3 $RT", (MTSPR 275, gprc:$RT)>; + +def : InstAlias<"mtasr $RS", (MTSPR 280, gprc:$RS)>; + +def : InstAlias<"mfdec $RT", (MFSPR gprc:$RT, 22)>; +def : InstAlias<"mtdec $RT", (MTSPR 22, gprc:$RT)>; + +def : InstAlias<"mfpvr $RT", (MFSPR gprc:$RT, 287)>; + +def : InstAlias<"mfsdr1 $RT", (MFSPR gprc:$RT, 25)>; +def : InstAlias<"mtsdr1 $RT", (MTSPR 25, gprc:$RT)>; + +def : InstAlias<"mfsrr0 $RT", (MFSPR gprc:$RT, 26)>; +def : InstAlias<"mfsrr1 $RT", (MFSPR gprc:$RT, 27)>; +def : InstAlias<"mtsrr0 $RT", (MTSPR 26, gprc:$RT)>; +def : InstAlias<"mtsrr1 $RT", (MTSPR 27, gprc:$RT)>; + +def : InstAlias<"tlbie $RB", (TLBIE R0, gprc:$RB)>; + def EXTLWI : PPCAsmPseudo<"extlwi $rA, $rS, $n, $b", (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>; def EXTLWIo : PPCAsmPseudo<"extlwi. $rA, $rS, $n, $b", diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td index 660c0c3..92ba69c 100644 --- a/lib/Target/PowerPC/PPCSchedule.td +++ b/lib/Target/PowerPC/PPCSchedule.td @@ -108,6 +108,14 @@ def VecPerm : InstrItinClass; def VecFPRound : InstrItinClass; def VecVSL : InstrItinClass; def VecVSR : InstrItinClass; +def SprMTMSRD : InstrItinClass; +def SprSLIE : InstrItinClass; +def SprSLBIE : InstrItinClass; +def SprSLBMTE : InstrItinClass; +def SprSLBMFEE : InstrItinClass; +def SprSLBIA : InstrItinClass; +def SprTLBIEL : InstrItinClass; +def SprTLBIE : InstrItinClass; //===----------------------------------------------------------------------===// // Processor instruction itineraries. -- cgit v1.1 From 4a1535c0383254741bcddd3500081782aad11864 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Thu, 12 Sep 2013 19:04:12 +0000 Subject: Make the PPC fast-math sqrt expansion safe at 0 In fast-math mode sqrt(x) is calculated using the fast expansion of the reciprocal of the reciprocal sqrt expansion. The reciprocal and reciprocal sqrt expansions use the associated estimate instructions along with some Newton iterations. Unfortunately, as a result, sqrt(0) was being calculated as NaN, which is not correct. Now we explicitly return a result of zero if the input is zero. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190624 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index d341074..b4ba527 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -7021,8 +7021,28 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, if (RV.getNode() != 0) { DCI.AddToWorklist(RV.getNode()); RV = DAGCombineFastRecip(RV, DCI); - if (RV.getNode() != 0) + if (RV.getNode() != 0) { + // Unfortunately, RV is now NaN if the input was exactly 0. Select out + // this case and force the answer to 0. + + EVT VT = RV.getValueType(); + + SDValue Zero = DAG.getConstantFP(0.0, VT.getScalarType()); + if (VT.isVector()) { + assert(VT.getVectorNumElements() == 4 && "Unknown vector type"); + Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Zero, Zero, Zero, Zero); + } + + SDValue ZeroCmp = + DAG.getSetCC(dl, getSetCCResultType(*DAG.getContext(), VT), + N->getOperand(0), Zero, ISD::SETEQ); + DCI.AddToWorklist(ZeroCmp.getNode()); + DCI.AddToWorklist(RV.getNode()); + + RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, dl, VT, + ZeroCmp, Zero, RV); return RV; + } } } -- cgit v1.1 From 6671cd4db079eb993f9bd340e0c33d61f0f27d81 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Thu, 12 Sep 2013 23:20:06 +0000 Subject: Fix PPC ABI for ByVal structs with vector members When a structure is passed by value, and that structure contains a vector member, according to the PPC ABI, the structure will receive enhanced alignment (so that the vector within the structure will always be aligned). This should resolve PR16641. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190636 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 58 ++++++++++++++++++++++++++++------ 1 file changed, 49 insertions(+), 9 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index b4ba527..34571e2 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -578,24 +578,48 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) } } +/// getMaxByValAlign - Helper for getByValTypeAlignment to determine +/// the desired ByVal argument alignment. +static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, + unsigned MaxMaxAlign) { + if (MaxAlign == MaxMaxAlign) + return; + if (VectorType *VTy = dyn_cast(Ty)) { + if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256) + MaxAlign = 32; + else if (VTy->getBitWidth() >= 128 && MaxAlign < 16) + MaxAlign = 16; + } else if (ArrayType *ATy = dyn_cast(Ty)) { + unsigned EltAlign = 0; + getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign); + if (EltAlign > MaxAlign) + MaxAlign = EltAlign; + } else if (StructType *STy = dyn_cast(Ty)) { + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + unsigned EltAlign = 0; + getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign); + if (EltAlign > MaxAlign) + MaxAlign = EltAlign; + if (MaxAlign == MaxMaxAlign) + break; + } + } +} + /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { const TargetMachine &TM = getTargetMachine(); // Darwin passes everything on 4 byte boundary. - if (TM.getSubtarget().isDarwin()) + if (PPCSubTarget.isDarwin()) return 4; // 16byte and wider vectors are passed on 16byte boundary. - if (VectorType *VTy = dyn_cast(Ty)) - if (VTy->getBitWidth() >= 128) - return 16; - // The rest is 8 on PPC64 and 4 on PPC32 boundary. - if (PPCSubTarget.isPPC64()) - return 8; - - return 4; + unsigned Align = PPCSubTarget.isPPC64() ? 8 : 4; + if (PPCSubTarget.hasAltivec() || PPCSubTarget.hasQPX()) + getMaxByValAlign(Ty, Align, PPCSubTarget.hasQPX() ? 32 : 16); + return Align; } const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { @@ -2281,6 +2305,13 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( InVals.push_back(FIN); continue; } + + unsigned BVAlign = Flags.getByValAlign(); + if (BVAlign > 8) { + ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign; + CurArgOffset = ArgOffset; + } + // All aggregates smaller than 8 bytes must be passed right-justified. if (ObjSize < PtrByteSize) CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize); @@ -3870,6 +3901,15 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, if (Size == 0) continue; + unsigned BVAlign = Flags.getByValAlign(); + if (BVAlign > 8) { + if (BVAlign % PtrByteSize != 0) + llvm_unreachable( + "ByVal alignment is not a multiple of the pointer size"); + + ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign; + } + // All aggregates smaller than 8 bytes must be passed right-justified. if (Size==1 || Size==2 || Size==4) { EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); -- cgit v1.1 From a2c982129ed93f4bcb9738e0749a0b08e51a8b83 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Thu, 12 Sep 2013 23:30:48 +0000 Subject: Remove an unused variable, fixing -Werror build with latest Clang. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190640 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 34571e2..d042ed0 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -609,7 +609,6 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { - const TargetMachine &TM = getTargetMachine(); // Darwin passes everything on 4 byte boundary. if (PPCSubTarget.isDarwin()) return 4; -- cgit v1.1 From fe7831861432d71de47ce502e799fb7264b9f24c Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Thu, 12 Sep 2013 23:44:44 +0000 Subject: R600: Move fabs/fneg/sel folding logic into PostProcessIsel This move makes possible to correctly handle multiples instructions from a single pattern. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190643 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 262 +++------------------------------ lib/Target/R600/R600ISelLowering.cpp | 179 ++++++++++++++++++++++ lib/Target/R600/R600ISelLowering.h | 1 + 3 files changed, 197 insertions(+), 245 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index 46e50bc..85e1422 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -201,92 +201,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { } switch (Opc) { default: break; - case AMDGPUISD::CONST_ADDRESS: { - for (SDNode::use_iterator I = N->use_begin(), Next = llvm::next(I); - I != SDNode::use_end(); I = Next) { - Next = llvm::next(I); - if (!I->isMachineOpcode()) { - continue; - } - unsigned Opcode = I->getMachineOpcode(); - bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1; - int SrcIdx = I.getOperandNo(); - int SelIdx; - // Unlike MachineInstrs, SDNodes do not have results in their operand - // list, so we need to increment the SrcIdx, since - // R600InstrInfo::getOperandIdx is based on the MachineInstr indices. - if (HasDst) { - SrcIdx++; - } - - SelIdx = TII->getSelIdx(I->getMachineOpcode(), SrcIdx); - if (SelIdx < 0) { - continue; - } - - SDValue CstOffset; - if (N->getValueType(0).isVector() || - !SelectGlobalValueConstantOffset(N->getOperand(0), CstOffset)) - continue; - - // Gather constants values - int SrcIndices[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src2), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W) - }; - std::vector Consts; - for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) { - int OtherSrcIdx = SrcIndices[i]; - int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx); - if (OtherSrcIdx < 0 || OtherSelIdx < 0) { - continue; - } - if (HasDst) { - OtherSrcIdx--; - OtherSelIdx--; - } - if (RegisterSDNode *Reg = - dyn_cast(I->getOperand(OtherSrcIdx))) { - if (Reg->getReg() == AMDGPU::ALU_CONST) { - ConstantSDNode *Cst = dyn_cast(I->getOperand(OtherSelIdx)); - Consts.push_back(Cst->getZExtValue()); - } - } - } - - ConstantSDNode *Cst = dyn_cast(CstOffset); - Consts.push_back(Cst->getZExtValue()); - if (!TII->fitsConstReadLimitations(Consts)) - continue; - - // Convert back to SDNode indices - if (HasDst) { - SrcIdx--; - SelIdx--; - } - std::vector Ops; - for (int i = 0, e = I->getNumOperands(); i != e; ++i) { - if (i == SrcIdx) { - Ops.push_back(CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32)); - } else if (i == SelIdx) { - Ops.push_back(CstOffset); - } else { - Ops.push_back(I->getOperand(i)); - } - } - CurDAG->UpdateNodeOperands(*I, Ops.data(), Ops.size()); - } - break; - } case ISD::BUILD_VECTOR: { unsigned RegClassID; const AMDGPUSubtarget &ST = TM.getSubtarget(); @@ -508,38 +422,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { const R600InstrInfo *TII = static_cast(TM.getInstrInfo()); - if (Result && Result->isMachineOpcode() && Result->getMachineOpcode() == AMDGPU::DOT_4) { - bool IsModified = false; - do { - std::vector Ops; - for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end(); - I != E; ++I) - Ops.push_back(*I); - IsModified = FoldDotOperands(Result->getMachineOpcode(), TII, Ops); - if (IsModified) { - Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size()); - } - } while (IsModified); - - } if (Result && Result->isMachineOpcode() && !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR) && TII->hasInstrModifiers(Result->getMachineOpcode())) { - // Fold FNEG/FABS - // TODO: Isel can generate multiple MachineInst, we need to recursively - // parse Result - bool IsModified = false; - do { - std::vector Ops; - for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end(); - I != E; ++I) - Ops.push_back(*I); - IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops); - if (IsModified) { - Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size()); - } - } while (IsModified); - // If node has a single use which is CLAMP_R600, folds it if (Result->hasOneUse() && Result->isMachineOpcode()) { SDNode *PotentialClamp = *Result->use_begin(); @@ -564,120 +449,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { return Result; } -bool AMDGPUDAGToDAGISel::FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, - SDValue &Abs, const R600InstrInfo *TII) { - switch (Src.getOpcode()) { - case ISD::FNEG: - Src = Src.getOperand(0); - Neg = CurDAG->getTargetConstant(1, MVT::i32); - return true; - case ISD::FABS: - if (!Abs.getNode()) - return false; - Src = Src.getOperand(0); - Abs = CurDAG->getTargetConstant(1, MVT::i32); - return true; - case ISD::BITCAST: - Src = Src.getOperand(0); - return true; - default: - return false; - } -} - -bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode, - const R600InstrInfo *TII, std::vector &Ops) { - int OperandIdx[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src2) - }; - int SelIdx[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_sel) - }; - int NegIdx[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg) - }; - int AbsIdx[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs), - -1 - }; - - - for (unsigned i = 0; i < 3; i++) { - if (OperandIdx[i] < 0) - return false; - SDValue &Src = Ops[OperandIdx[i] - 1]; - SDValue &Sel = Ops[SelIdx[i] - 1]; - SDValue &Neg = Ops[NegIdx[i] - 1]; - SDValue FakeAbs; - SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs; - if (FoldOperand(Src, Sel, Neg, Abs, TII)) - return true; - } - return false; -} - -bool AMDGPUDAGToDAGISel::FoldDotOperands(unsigned Opcode, - const R600InstrInfo *TII, std::vector &Ops) { - int OperandIdx[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W) - }; - int SelIdx[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_W), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_W) - }; - int NegIdx[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W) - }; - int AbsIdx[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W) - }; - - for (unsigned i = 0; i < 8; i++) { - if (OperandIdx[i] < 0) - return false; - SDValue &Src = Ops[OperandIdx[i] - 1]; - SDValue &Sel = Ops[SelIdx[i] - 1]; - SDValue &Neg = Ops[NegIdx[i] - 1]; - SDValue &Abs = Ops[AbsIdx[i] - 1]; - if (FoldOperand(Src, Sel, Neg, Abs, TII)) - return true; - } - return false; -} bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) { if (!ptr) { @@ -890,26 +661,27 @@ bool AMDGPUDAGToDAGISel::SelectU24(SDValue Op, SDValue &U24) { } void AMDGPUDAGToDAGISel::PostprocessISelDAG() { - - if (Subtarget.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) { - return; - } - - // Go over all selected nodes and try to fold them a bit more const AMDGPUTargetLowering& Lowering = (*(const AMDGPUTargetLowering*)getTargetLowering()); - for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), - E = CurDAG->allnodes_end(); I != E; ++I) { + bool IsModified = false; + do { + IsModified = false; + // Go over all selected nodes and try to fold them a bit more + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), + E = CurDAG->allnodes_end(); I != E; ++I) { - SDNode *Node = I; + SDNode *Node = I; - MachineSDNode *MachineNode = dyn_cast(I); - if (!MachineNode) - continue; + MachineSDNode *MachineNode = dyn_cast(I); + if (!MachineNode) + continue; - SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); - if (ResNode != Node) { - ReplaceUses(Node, ResNode); + SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); + if (ResNode != Node) { + ReplaceUses(Node, ResNode); + IsModified = true; + } } - } + CurDAG->RemoveDeadNodes(); + } while (IsModified); } diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index ff9ba52..a7b7a84 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -1629,3 +1629,182 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, } return SDValue(); } + +static bool +FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg, + SDValue &Abs, SDValue &Sel, SelectionDAG &DAG) { + const R600InstrInfo *TII = + static_cast(DAG.getTarget().getInstrInfo()); + if (!Src.isMachineOpcode()) + return false; + switch (Src.getMachineOpcode()) { + case AMDGPU::FNEG_R600: + if (!Neg.getNode()) + return false; + Src = Src.getOperand(0); + Neg = DAG.getTargetConstant(1, MVT::i32); + return true; + case AMDGPU::FABS_R600: + if (!Abs.getNode()) + return false; + Src = Src.getOperand(0); + Abs = DAG.getTargetConstant(1, MVT::i32); + return true; + case AMDGPU::CONST_COPY: { + unsigned Opcode = ParentNode->getMachineOpcode(); + bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1; + + if (!Sel.getNode()) + return false; + + SDValue CstOffset = Src.getOperand(0); + if (ParentNode->getValueType(0).isVector()) + return false; + + // Gather constants values + int SrcIndices[] = { + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src2), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W) + }; + std::vector Consts; + for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) { + int OtherSrcIdx = SrcIndices[i]; + int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx); + if (OtherSrcIdx < 0 || OtherSelIdx < 0) + continue; + if (HasDst) { + OtherSrcIdx--; + OtherSelIdx--; + } + if (RegisterSDNode *Reg = + dyn_cast(ParentNode->getOperand(OtherSrcIdx))) { + if (Reg->getReg() == AMDGPU::ALU_CONST) { + ConstantSDNode *Cst = dyn_cast( + ParentNode->getOperand(OtherSelIdx)); + Consts.push_back(Cst->getZExtValue()); + } + } + } + + ConstantSDNode *Cst = dyn_cast(CstOffset); + Consts.push_back(Cst->getZExtValue()); + if (!TII->fitsConstReadLimitations(Consts)) { + return false; + } + + Sel = CstOffset; + Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32); + return true; + } + default: + return false; + } +} + + +/// \brief Fold the instructions after selecting them +SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node, + SelectionDAG &DAG) const { + const R600InstrInfo *TII = + static_cast(DAG.getTarget().getInstrInfo()); + if (!Node->isMachineOpcode()) + return Node; + unsigned Opcode = Node->getMachineOpcode(); + SDValue FakeOp; + + std::vector Ops; + for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end(); + I != E; ++I) + Ops.push_back(*I); + + if (Opcode == AMDGPU::DOT_4) { + int OperandIdx[] = { + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W) + }; + int NegIdx[] = { + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W) + }; + int AbsIdx[] = { + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W) + }; + for (unsigned i = 0; i < 8; i++) { + if (OperandIdx[i] < 0) + return Node; + SDValue &Src = Ops[OperandIdx[i] - 1]; + SDValue &Neg = Ops[NegIdx[i] - 1]; + SDValue &Abs = Ops[AbsIdx[i] - 1]; + bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1; + int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]); + if (HasDst) + SelIdx--; + SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp; + if (FoldOperand(Node, i, Src, Neg, Abs, Sel, DAG)) + return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); + } + } else { + if (!TII->hasInstrModifiers(Opcode)) + return Node; + int OperandIdx[] = { + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src2) + }; + int NegIdx[] = { + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg) + }; + int AbsIdx[] = { + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs), + -1 + }; + for (unsigned i = 0; i < 3; i++) { + if (OperandIdx[i] < 0) + return Node; + SDValue &Src = Ops[OperandIdx[i] - 1]; + SDValue &Neg = Ops[NegIdx[i] - 1]; + SDValue FakeAbs; + SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs; + bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1; + int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]); + if (HasDst) + SelIdx--; + SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp; + if (FoldOperand(Node, i, Src, Neg, Abs, Sel, DAG)) + return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); + } + } + + return Node; +} diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h index 811850d..6457ad4 100644 --- a/lib/Target/R600/R600ISelLowering.h +++ b/lib/Target/R600/R600ISelLowering.h @@ -67,6 +67,7 @@ private: void getStackAddress(unsigned StackWidth, unsigned ElemIdx, unsigned &Channel, unsigned &PtrIncr) const; bool isZero(SDValue Op) const; + virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const; }; } // End namespace llvm; -- cgit v1.1 From f57d692c11f0ff6e9c45d2c48c5f362f4c575cf7 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Thu, 12 Sep 2013 23:44:53 +0000 Subject: R600: Move code handling literal folding into R600ISelLowering. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190644 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 105 --------------------------------- lib/Target/R600/R600ISelLowering.cpp | 63 ++++++++++++++++++-- 2 files changed, 59 insertions(+), 109 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index 85e1422..95037ba 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -193,8 +193,6 @@ bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) { } SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { - const R600InstrInfo *TII = - static_cast(TM.getInstrInfo()); unsigned int Opc = N->getOpcode(); if (N->isMachineOpcode()) { return NULL; // Already selected. @@ -310,109 +308,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SDLoc(N), N->getValueType(0), Ops); } - - case ISD::ConstantFP: - case ISD::Constant: { - const AMDGPUSubtarget &ST = TM.getSubtarget(); - // XXX: Custom immediate lowering not implemented yet. Instead we use - // pseudo instructions defined in SIInstructions.td - if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { - break; - } - - uint64_t ImmValue = 0; - unsigned ImmReg = AMDGPU::ALU_LITERAL_X; - - if (N->getOpcode() == ISD::ConstantFP) { - // XXX: 64-bit Immediates not supported yet - assert(N->getValueType(0) != MVT::f64); - - ConstantFPSDNode *C = dyn_cast(N); - APFloat Value = C->getValueAPF(); - float FloatValue = Value.convertToFloat(); - if (FloatValue == 0.0) { - ImmReg = AMDGPU::ZERO; - } else if (FloatValue == 0.5) { - ImmReg = AMDGPU::HALF; - } else if (FloatValue == 1.0) { - ImmReg = AMDGPU::ONE; - } else { - ImmValue = Value.bitcastToAPInt().getZExtValue(); - } - } else { - // XXX: 64-bit Immediates not supported yet - assert(N->getValueType(0) != MVT::i64); - - ConstantSDNode *C = dyn_cast(N); - if (C->getZExtValue() == 0) { - ImmReg = AMDGPU::ZERO; - } else if (C->getZExtValue() == 1) { - ImmReg = AMDGPU::ONE_INT; - } else { - ImmValue = C->getZExtValue(); - } - } - - for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use); - Use != SDNode::use_end(); Use = Next) { - Next = llvm::next(Use); - std::vector Ops; - for (unsigned i = 0; i < Use->getNumOperands(); ++i) { - Ops.push_back(Use->getOperand(i)); - } - - if (!Use->isMachineOpcode()) { - if (ImmReg == AMDGPU::ALU_LITERAL_X) { - // We can only use literal constants (e.g. AMDGPU::ZERO, - // AMDGPU::ONE, etc) in machine opcodes. - continue; - } - } else { - switch(Use->getMachineOpcode()) { - case AMDGPU::REG_SEQUENCE: break; - default: - if (!TII->isALUInstr(Use->getMachineOpcode()) || - (TII->get(Use->getMachineOpcode()).TSFlags & - R600_InstFlag::VECTOR)) { - continue; - } - } - - // Check that we aren't already using an immediate. - // XXX: It's possible for an instruction to have more than one - // immediate operand, but this is not supported yet. - if (ImmReg == AMDGPU::ALU_LITERAL_X) { - int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(), - AMDGPU::OpName::literal); - if (ImmIdx == -1) { - continue; - } - - if (TII->getOperandIdx(Use->getMachineOpcode(), - AMDGPU::OpName::dst) != -1) { - // subtract one from ImmIdx, because the DST operand is usually index - // 0 for MachineInstrs, but we have no DST in the Ops vector. - ImmIdx--; - } - ConstantSDNode *C = dyn_cast(Use->getOperand(ImmIdx)); - assert(C); - - if (C->getZExtValue() != 0) { - // This instruction is already using an immediate. - continue; - } - - // Set the immediate value - Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32); - } - } - // Set the immediate register - Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32); - - CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands()); - } - break; - } } SDNode *Result = SelectCode(N); diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index a7b7a84..5db7937 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -1632,7 +1632,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, static bool FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg, - SDValue &Abs, SDValue &Sel, SelectionDAG &DAG) { + SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) { const R600InstrInfo *TII = static_cast(DAG.getTarget().getInstrInfo()); if (!Src.isMachineOpcode()) @@ -1705,6 +1705,51 @@ FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg, Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32); return true; } + case AMDGPU::MOV_IMM_I32: + case AMDGPU::MOV_IMM_F32: { + unsigned ImmReg = AMDGPU::ALU_LITERAL_X; + uint64_t ImmValue = 0; + + + if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) { + ConstantFPSDNode *FPC = dyn_cast(Src.getOperand(0)); + float FloatValue = FPC->getValueAPF().convertToFloat(); + if (FloatValue == 0.0) { + ImmReg = AMDGPU::ZERO; + } else if (FloatValue == 0.5) { + ImmReg = AMDGPU::HALF; + } else if (FloatValue == 1.0) { + ImmReg = AMDGPU::ONE; + } else { + ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue(); + } + } else { + ConstantSDNode *C = dyn_cast(Src.getOperand(0)); + uint64_t Value = C->getZExtValue(); + if (Value == 0) { + ImmReg = AMDGPU::ZERO; + } else if (Value == 1) { + ImmReg = AMDGPU::ONE_INT; + } else { + ImmValue = Value; + } + } + + // Check that we aren't already using an immediate. + // XXX: It's possible for an instruction to have more than one + // immediate operand, but this is not supported yet. + if (ImmReg == AMDGPU::ALU_LITERAL_X) { + if (!Imm.getNode()) + return false; + ConstantSDNode *C = dyn_cast(Imm); + assert(C); + if (C->getZExtValue()) + return false; + Imm = DAG.getTargetConstant(ImmValue, MVT::i32); + } + Src = DAG.getRegister(ImmReg, MVT::i32); + return true; + } default: return false; } @@ -1768,7 +1813,13 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node, if (HasDst) SelIdx--; SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp; - if (FoldOperand(Node, i, Src, Neg, Abs, Sel, DAG)) + if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG)) + return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); + } + } else if (Opcode == AMDGPU::REG_SEQUENCE) { + for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) { + SDValue &Src = Ops[i]; + if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG)) return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); } } else { @@ -1798,10 +1849,14 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node, SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs; bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1; int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]); - if (HasDst) + int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal); + if (HasDst) { SelIdx--; + ImmIdx--; + } SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp; - if (FoldOperand(Node, i, Src, Neg, Abs, Sel, DAG)) + SDValue &Imm = Ops[ImmIdx]; + if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG)) return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); } } -- cgit v1.1 From 5251d180f4af15414a9c7ae5723dd48bc938576b Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Thu, 12 Sep 2013 23:45:00 +0000 Subject: R600: Move clamp handling code to R600IselLowering.cpp git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190645 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 34 +--------------------------------- lib/Target/R600/R600ISelLowering.cpp | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 33 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index 95037ba..a008c96 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -309,39 +309,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { SDLoc(N), N->getValueType(0), Ops); } } - SDNode *Result = SelectCode(N); - - // Fold operands of selected node - - const AMDGPUSubtarget &ST = TM.getSubtarget(); - if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { - const R600InstrInfo *TII = - static_cast(TM.getInstrInfo()); - if (Result && Result->isMachineOpcode() && - !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR) - && TII->hasInstrModifiers(Result->getMachineOpcode())) { - // If node has a single use which is CLAMP_R600, folds it - if (Result->hasOneUse() && Result->isMachineOpcode()) { - SDNode *PotentialClamp = *Result->use_begin(); - if (PotentialClamp->isMachineOpcode() && - PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) { - unsigned ClampIdx = - TII->getOperandIdx(Result->getMachineOpcode(), AMDGPU::OpName::clamp); - std::vector Ops; - unsigned NumOp = Result->getNumOperands(); - for (unsigned i = 0; i < NumOp; ++i) { - Ops.push_back(Result->getOperand(i)); - } - Ops[ClampIdx - 1] = CurDAG->getTargetConstant(1, MVT::i32); - Result = CurDAG->SelectNodeTo(PotentialClamp, - Result->getMachineOpcode(), PotentialClamp->getVTList(), - Ops.data(), NumOp); - } - } - } - } - - return Result; + return SelectCode(N); } diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 5db7937..c5e814f 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -1822,6 +1822,22 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node, if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG)) return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); } + } else if (Opcode == AMDGPU::CLAMP_R600) { + SDValue Src = Node->getOperand(0); + if (!Src.isMachineOpcode() || + !TII->hasInstrModifiers(Src.getMachineOpcode())) + return Node; + int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(), + AMDGPU::OpName::clamp); + if (ClampIdx < 0) + return Node; + std::vector Ops; + unsigned NumOp = Src.getNumOperands(); + for(unsigned i = 0; i < NumOp; ++i) + Ops.push_back(Src.getOperand(i)); + Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32); + return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node), + Node->getVTList(), Ops); } else { if (!TII->hasInstrModifiers(Opcode)) return Node; -- cgit v1.1 From dc6fc4fa1f88e4accf1abe6db67399496bfe18b2 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 13 Sep 2013 04:41:06 +0000 Subject: Move operator to end of previous line to match coding standards. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190659 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86Subtarget.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 0c8e2c5..851ab63 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -283,8 +283,8 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { // Set processor type. Currently only Atom is detected. if (Family == 6 && - (Model == 28 || Model == 38 || Model == 39 - || Model == 53 || Model == 54)) { + (Model == 28 || Model == 38 || Model == 39 || + Model == 53 || Model == 54)) { X86ProcFamily = IntelAtom; UseLeaForSP = true; -- cgit v1.1 From 630c5e06d633fad142af4b145ee684e90754700e Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Fri, 13 Sep 2013 07:26:52 +0000 Subject: AArch64: use RegisterOperand for NEON registers. Previously we modelled VPR128 and VPR64 as essentially identical register-classes containing V0-V31 (which had Q0-Q31 as "sub_alias" sub-registers). This model is starting to cause significant problems for code generation, particularly writing EXTRACT/INSERT_SUBREG patterns for converting between the two. The change here switches to classifying VPR64 & VPR128 as RegisterOperands, which are essentially aliases for RegisterClasses with different parsing and printing behaviour. This fits almost exactly with their real status (VPR128 == FPR128 printed strangely, VPR64 == FPR64 printed strangely). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190665 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64AsmPrinter.cpp | 26 +- lib/Target/AArch64/AArch64ISelLowering.cpp | 30 +-- lib/Target/AArch64/AArch64InstrInfo.td | 12 +- lib/Target/AArch64/AArch64InstrNEON.td | 262 +++++++++------------ lib/Target/AArch64/AArch64RegisterInfo.td | 48 +--- lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 82 +++++-- .../AArch64/Disassembler/AArch64Disassembler.cpp | 32 +-- .../AArch64/InstPrinter/AArch64InstPrinter.cpp | 8 + .../AArch64/InstPrinter/AArch64InstPrinter.h | 1 + 9 files changed, 230 insertions(+), 271 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp index 9498722..759809f 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -32,17 +32,18 @@ using namespace llvm; /// argument to be printed as "bN". static bool printModifiedFPRAsmOperand(const MachineOperand &MO, const TargetRegisterInfo *TRI, - const TargetRegisterClass &RegClass, - raw_ostream &O) { + char RegType, raw_ostream &O) { if (!MO.isReg()) return true; for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) { - if (RegClass.contains(*AR)) { - O << AArch64InstPrinter::getRegisterName(*AR); + if (AArch64::FPR8RegClass.contains(*AR)) { + O << RegType << TRI->getEncodingValue(MO.getReg()); return false; } } + + // The register doesn't correspond to anything floating-point like. return true; } @@ -157,7 +158,7 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, // register. Technically, we could allocate the argument as a VPR128, but // that leads to extremely dodgy copies being generated to get the data // there. - if (printModifiedFPRAsmOperand(MO, TRI, AArch64::VPR128RegClass, O)) + if (printModifiedFPRAsmOperand(MO, TRI, 'v', O)) O << AArch64InstPrinter::getRegisterName(MO.getReg()); break; case MachineOperand::MO_Immediate: @@ -211,25 +212,12 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, // copies ...). llvm_unreachable("FIXME: Unimplemented register pairs"); case 'b': - // Output 8-bit FP/SIMD scalar register operand, prefixed with b. - return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, - AArch64::FPR8RegClass, O); case 'h': - // Output 16-bit FP/SIMD scalar register operand, prefixed with h. - return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, - AArch64::FPR16RegClass, O); case 's': - // Output 32-bit FP/SIMD scalar register operand, prefixed with s. - return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, - AArch64::FPR32RegClass, O); case 'd': - // Output 64-bit FP/SIMD scalar register operand, prefixed with d. - return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, - AArch64::FPR64RegClass, O); case 'q': - // Output 128-bit FP/SIMD scalar register operand, prefixed with q. return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, - AArch64::FPR128RegClass, O); + ExtraCode[0], O); case 'A': // Output symbolic address with appropriate relocation modifier (also // suitable for ADRP). diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index b68c43a..8597f07 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -57,17 +57,17 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) if (Subtarget->hasNEON()) { // And the vectors - addRegisterClass(MVT::v8i8, &AArch64::VPR64RegClass); - addRegisterClass(MVT::v4i16, &AArch64::VPR64RegClass); - addRegisterClass(MVT::v2i32, &AArch64::VPR64RegClass); - addRegisterClass(MVT::v1i64, &AArch64::VPR64RegClass); - addRegisterClass(MVT::v2f32, &AArch64::VPR64RegClass); - addRegisterClass(MVT::v16i8, &AArch64::VPR128RegClass); - addRegisterClass(MVT::v8i16, &AArch64::VPR128RegClass); - addRegisterClass(MVT::v4i32, &AArch64::VPR128RegClass); - addRegisterClass(MVT::v2i64, &AArch64::VPR128RegClass); - addRegisterClass(MVT::v4f32, &AArch64::VPR128RegClass); - addRegisterClass(MVT::v2f64, &AArch64::VPR128RegClass); + addRegisterClass(MVT::v8i8, &AArch64::FPR64RegClass); + addRegisterClass(MVT::v4i16, &AArch64::FPR64RegClass); + addRegisterClass(MVT::v2i32, &AArch64::FPR64RegClass); + addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass); + addRegisterClass(MVT::v2f32, &AArch64::FPR64RegClass); + addRegisterClass(MVT::v16i8, &AArch64::FPR128RegClass); + addRegisterClass(MVT::v8i16, &AArch64::FPR128RegClass); + addRegisterClass(MVT::v4i32, &AArch64::FPR128RegClass); + addRegisterClass(MVT::v2i64, &AArch64::FPR128RegClass); + addRegisterClass(MVT::v4f32, &AArch64::FPR128RegClass); + addRegisterClass(MVT::v2f64, &AArch64::FPR128RegClass); } computeRegisterProperties(); @@ -3610,14 +3610,10 @@ AArch64TargetLowering::getRegForInlineAsmConstraint( return std::make_pair(0U, &AArch64::FPR16RegClass); else if (VT == MVT::f32) return std::make_pair(0U, &AArch64::FPR32RegClass); - else if (VT == MVT::f64) - return std::make_pair(0U, &AArch64::FPR64RegClass); else if (VT.getSizeInBits() == 64) - return std::make_pair(0U, &AArch64::VPR64RegClass); - else if (VT == MVT::f128) - return std::make_pair(0U, &AArch64::FPR128RegClass); + return std::make_pair(0U, &AArch64::FPR64RegClass); else if (VT.getSizeInBits() == 128) - return std::make_pair(0U, &AArch64::VPR128RegClass); + return std::make_pair(0U, &AArch64::FPR128RegClass); break; } } diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 07289b0..fef3019 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -125,6 +125,8 @@ def A64Sbfx : SDNode<"AArch64ISD::SBFX", SDTA64BFX>; def A64Ubfx : SDNode<"AArch64ISD::UBFX", SDTA64BFX>; +class BinOpFrag : PatFrag<(ops node:$LHS, node:$RHS), res>; + //===----------------------------------------------------------------------===// // Call sequence pseudo-instructions //===----------------------------------------------------------------------===// @@ -2196,13 +2198,13 @@ def : Pat<(fsub (fmul FPR32:$Rn, FPR32:$Rm), FPR32:$Ra), def : Pat<(fsub (fneg FPR32:$Ra), (fmul FPR32:$Rn, FPR32:$Rm)), (FNMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; -def : Pat<(fadd FPR64:$Ra, (fmul FPR64:$Rn, FPR64:$Rm)), +def : Pat<(fadd FPR64:$Ra, (fmul (f64 FPR64:$Rn), FPR64:$Rm)), (FMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -def : Pat<(fsub FPR64:$Ra, (fmul FPR64:$Rn, FPR64:$Rm)), +def : Pat<(fsub FPR64:$Ra, (fmul (f64 FPR64:$Rn), FPR64:$Rm)), (FMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -def : Pat<(fsub (fmul FPR64:$Rn, FPR64:$Rm), FPR64:$Ra), +def : Pat<(fsub (fmul (f64 FPR64:$Rn), FPR64:$Rm), FPR64:$Ra), (FNMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -def : Pat<(fsub (fneg FPR64:$Ra), (fmul FPR64:$Rn, FPR64:$Rm)), +def : Pat<(fsub (fneg (f64 FPR64:$Ra)), (fmul FPR64:$Rn, FPR64:$Rm)), (FNMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; } @@ -5162,4 +5164,4 @@ defm : regoff_pats<"Xm", (add i64:$Rn, (shl i64:$Rm, SHIFT)), // Advanced SIMD (NEON) Support // -include "AArch64InstrNEON.td" \ No newline at end of file +include "AArch64InstrNEON.td" diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index b8840aa..f600d24 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -215,8 +215,8 @@ defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul", // class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and // two operands constraints. class NeonI_3VSame_Constraint_impl size, bits<5> opcode, - SDPatternOperator opnode> + RegisterOperand VPRC, ValueType OpTy, bit q, bit u, bits<2> size, + bits<5> opcode, SDPatternOperator opnode> : NeonI_3VSame; // ORR disassembled as MOV if Vn==Vm // Vector Move - register -// Alias for ORR if Vn=Vm and it is the preferred syntax +// Alias for ORR if Vn=Vm. +// FIXME: This is actually the preferred syntax but TableGen can't deal with +// custom printing of aliases. def : NeonInstAlias<"mov $Rd.8b, $Rn.8b", - (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn)>; + (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn), 0>; def : NeonInstAlias<"mov $Rd.16b, $Rn.16b", - (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn)>; + (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn), 0>; def Neon_immAllOnes: PatLeaf<(Neon_movi (i32 timm), (i32 imm)), [{ ConstantSDNode *ImmConstVal = cast(N->getOperand(0)); @@ -571,7 +573,7 @@ def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs), // NeonI_compare_aliases class: swaps register operands to implement // comparison aliases, e.g., CMLE is alias for CMGE with operands reversed. class NeonI_compare_aliases + Instruction inst, RegisterOperand VPRC> : NeonInstAlias; @@ -1324,7 +1326,7 @@ defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>; } class NeonI_mov_imm_lsl_aliases + Instruction inst, RegisterOperand VPRC> : NeonInstAlias; @@ -1401,7 +1403,7 @@ def MOVIdi : NeonI_1VModImm<0b0, 0b1, // Vector Floating Point Move Immediate -class NeonI_FMOV_impl : NeonI_1VModImm; def shr_imm64 : shr_imm<"64">; class N2VShift opcode, string asmop, string T, - RegisterClass VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode> + RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode> : NeonI_2VShiftImm; // Rounding/Saturating shift class N2VShift_RQ opcode, string asmop, string T, - RegisterClass VPRC, ValueType Ty, Operand ImmTy, + RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDPatternOperator OpNode> : NeonI_2VShiftImm; defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>; class N2VShiftAdd opcode, string asmop, string T, - RegisterClass VPRC, ValueType Ty, Operand ImmTy, + RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode> : NeonI_2VShiftImm; // Rounding shift accumulate class N2VShiftAdd_R opcode, string asmop, string T, - RegisterClass VPRC, ValueType Ty, Operand ImmTy, + RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDPatternOperator OpNode> : NeonI_2VShiftImm; // Shift insert by immediate class N2VShiftIns opcode, string asmop, string T, - RegisterClass VPRC, ValueType Ty, Operand ImmTy, + RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDPatternOperator OpNode> : NeonI_2VShiftImm opcode, string asmop, string DestT, class N2VShR_Narrow_Hi opcode, string asmop, string DestT, string SrcT, Operand ImmTy> : NeonI_2VShiftImm { let Constraints = "$src = $Rd"; @@ -2040,15 +2042,18 @@ multiclass Neon_shiftNarrow_patterns { def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert (v8i8 (trunc (!cast("Neon_" # shr # "Imm8H") VPR128:$Rn, imm:$Imm)))))), - (SHRNvvi_16B VPR64:$src, VPR128:$Rn, imm:$Imm)>; + (SHRNvvi_16B (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), + VPR128:$Rn, imm:$Imm)>; def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert (v4i16 (trunc (!cast("Neon_" # shr # "Imm4S") VPR128:$Rn, imm:$Imm)))))), - (SHRNvvi_8H VPR64:$src, VPR128:$Rn, imm:$Imm)>; + (SHRNvvi_8H (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), + VPR128:$Rn, imm:$Imm)>; def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert (v2i32 (trunc (!cast("Neon_" # shr # "Imm2D") VPR128:$Rn, imm:$Imm)))))), - (SHRNvvi_4S VPR64:$src, VPR128:$Rn, imm:$Imm)>; + (SHRNvvi_4S (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), + VPR128:$Rn, imm:$Imm)>; } multiclass Neon_shiftNarrow_QR_patterns { @@ -2060,17 +2065,20 @@ multiclass Neon_shiftNarrow_QR_patterns { (!cast(prefix # "_2S") VPR128:$Rn, imm:$Imm)>; def : Pat<(Neon_combine (v1i64 VPR64:$src), - (v1i64 (bitconvert (v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm))))), + (v1i64 (bitconvert (v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm))))), (!cast(prefix # "_16B") - VPR64:$src, VPR128:$Rn, imm:$Imm)>; + (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), + VPR128:$Rn, imm:$Imm)>; def : Pat<(Neon_combine (v1i64 VPR64:$src), - (v1i64 (bitconvert (v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm))))), + (v1i64 (bitconvert (v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm))))), (!cast(prefix # "_8H") - VPR64:$src, VPR128:$Rn, imm:$Imm)>; + (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), + VPR128:$Rn, imm:$Imm)>; def : Pat<(Neon_combine (v1i64 VPR64:$src), - (v1i64 (bitconvert (v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm))))), + (v1i64 (bitconvert (v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm))))), (!cast(prefix # "_4S") - VPR64:$src, VPR128:$Rn, imm:$Imm)>; + (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), + VPR128:$Rn, imm:$Imm)>; } defm : Neon_shiftNarrow_patterns<"lshr">; @@ -2086,7 +2094,7 @@ defm : Neon_shiftNarrow_QR_patterns; // Convert fix-point and float-pointing class N2VCvt_Fx opcode, string asmop, string T, - RegisterClass VPRC, ValueType DestTy, ValueType SrcTy, + RegisterOperand VPRC, ValueType DestTy, ValueType SrcTy, Operand ImmTy, SDPatternOperator IntOp> : NeonI_2VShiftImm; class NeonI_3VDL size, bits<4> opcode, string asmop, string ResS, string OpS, SDPatternOperator opnode, SDPatternOperator ext, - RegisterClass OpVPR, + RegisterOperand OpVPR, ValueType ResTy, ValueType OpTy> : NeonI_3VDiff; class NeonI_3VDW size, bits<4> opcode, string asmop, string ResS, string OpS, SDPatternOperator opnode, SDPatternOperator ext, - RegisterClass OpVPR, + RegisterOperand OpVPR, ValueType ResTy, ValueType OpTy> : NeonI_3VDiff size, bits<4> opcode, string asmop, string ResS, string OpS, @@ -2361,7 +2369,7 @@ defm SUBHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>; class NeonI_3VD_2Op size, bits<4> opcode, string asmop, string ResS, string OpS, SDPatternOperator opnode, - RegisterClass ResVPR, RegisterClass OpVPR, + RegisterOperand ResVPR, RegisterOperand OpVPR, ValueType ResTy, ValueType OpTy> : NeonI_3VDiff; defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>; // pattern for acle intrinsic with 3 operands -class NeonI_3VDN_addhn2_3Op size, bits<4> opcode, - string asmop, string ResS, string OpS, - SDPatternOperator opnode, SDPatternOperator get_hi, - ValueType OpTy, ValueType OpSTy> - : NeonI_3VDiff { - let Constraints = "$src = $Rd"; -} - -multiclass NeonI_3VDN_addhn2_3Op_v1 opcode, - string asmop, - SDPatternOperator opnode> -{ - def _16b8h : NeonI_3VDN_addhn2_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h", - opnode, NI_get_hi_8h, v8i16, v8i8>; - def _8h4s : NeonI_3VDN_addhn2_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s", - opnode, NI_get_hi_4s, v4i32, v4i16>; - def _4s2d : NeonI_3VDN_addhn2_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d", - opnode, NI_get_hi_2d, v2i64, v2i32>; -} - -defm ADDHN2vvv : NeonI_3VDN_addhn2_3Op_v1<0b0, 0b0100, "addhn2", add>; -defm SUBHN2vvv : NeonI_3VDN_addhn2_3Op_v1<0b0, 0b0110, "subhn2", sub>; - -// pattern for acle intrinsic with 3 operands class NeonI_3VDN_3Op size, bits<4> opcode, - string asmop, string ResS, string OpS, - SDPatternOperator opnode, - ValueType OpTy, ValueType OpSTy> + string asmop, string ResS, string OpS> : NeonI_3VDiff { + [], NoItinerary> { let Constraints = "$src = $Rd"; + let neverHasSideEffects = 1; } multiclass NeonI_3VDN_3Op_v1 opcode, - string asmop, - SDPatternOperator opnode> -{ - def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h", - opnode, v8i16, v8i8>; - def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s", - opnode, v4i32, v4i16>; - def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d", - opnode, v2i64, v2i32>; -} - -defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2", - int_arm_neon_vraddhn>; -defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2", - int_arm_neon_vrsubhn>; + string asmop> { + def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h">; + def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s">; + def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d">; +} + +defm ADDHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0100, "addhn2">; +defm SUBHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0110, "subhn2">; + +defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2">; +defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2">; + +// Patterns have to be separate because there's a SUBREG_TO_REG in the output +// part. +class NarrowHighHalfPat + : Pat<(Neon_combine (v1i64 VPR64:$src), + (v1i64 (bitconvert (DstTy (coreop (SrcTy VPR128:$Rn), + (SrcTy VPR128:$Rm)))))), + (INST (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), + VPR128:$Rn, VPR128:$Rm)>; + +// addhn2 patterns +def : NarrowHighHalfPat>; +def : NarrowHighHalfPat>; +def : NarrowHighHalfPat>; + +// subhn2 patterns +def : NarrowHighHalfPat>; +def : NarrowHighHalfPat>; +def : NarrowHighHalfPat>; + +// raddhn2 patterns +def : NarrowHighHalfPat; +def : NarrowHighHalfPat; +def : NarrowHighHalfPat; + +// rsubhn2 patterns +def : NarrowHighHalfPat; +def : NarrowHighHalfPat; +def : NarrowHighHalfPat; // pattern that need to extend result class NeonI_3VDL_Ext size, bits<4> opcode, string asmop, string ResS, string OpS, SDPatternOperator opnode, - RegisterClass OpVPR, + RegisterOperand OpVPR, ValueType ResTy, ValueType OpTy, ValueType OpSTy> : NeonI_3VDiff; class NeonI_3VDL_Aba size, bits<4> opcode, string asmop, string ResS, string OpS, SDPatternOperator opnode, SDPatternOperator subop, - RegisterClass OpVPR, + RegisterOperand OpVPR, ValueType ResTy, ValueType OpTy, ValueType OpSTy> : NeonI_3VDiff; class NeonI_3VDL2_3Op_mlas size, bits<4> opcode, string asmop, string ResS, string OpS, SDPatternOperator subop, SDPatternOperator opnode, - RegisterClass OpVPR, + RegisterOperand OpVPR, ValueType ResTy, ValueType OpTy> : NeonI_3VDiff opcode, class Neon_Scalar_D_size_patterns : Pat<(v1i64 (opnode (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))), - (SUBREG_TO_REG (i64 0), - (INSTD (EXTRACT_SUBREG VPR64:$Rn, sub_64), - (EXTRACT_SUBREG VPR64:$Rm, sub_64)), - sub_64)>; - + (INSTD VPR64:$Rn, VPR64:$Rm)>; // Scalar Integer Add let isCommutable = 1 in { @@ -2994,54 +2990,28 @@ def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>; def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>; def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>; -def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))), - (f64 (EXTRACT_SUBREG (v8i8 VPR64:$src), sub_64))>; -def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))), - (f64 (EXTRACT_SUBREG (v4i16 VPR64:$src), sub_64))>; -def : Pat<(f64 (bitconvert (v2i32 VPR64:$src))), - (f64 (EXTRACT_SUBREG (v2i32 VPR64:$src), sub_64))>; -def : Pat<(f64 (bitconvert (v2f32 VPR64:$src))), - (f64 (EXTRACT_SUBREG (v2f32 VPR64:$src), sub_64))>; -def : Pat<(f64 (bitconvert (v1i64 VPR64:$src))), - (f64 (EXTRACT_SUBREG (v1i64 VPR64:$src), sub_64))>; -def : Pat<(f128 (bitconvert (v16i8 VPR128:$src))), - (f128 (EXTRACT_SUBREG (v16i8 VPR128:$src), sub_alias))>; -def : Pat<(f128 (bitconvert (v8i16 VPR128:$src))), - (f128 (EXTRACT_SUBREG (v8i16 VPR128:$src), sub_alias))>; -def : Pat<(f128 (bitconvert (v4i32 VPR128:$src))), - (f128 (EXTRACT_SUBREG (v4i32 VPR128:$src), sub_alias))>; -def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))), - (f128 (EXTRACT_SUBREG (v2i64 VPR128:$src), sub_alias))>; -def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))), - (f128 (EXTRACT_SUBREG (v4f32 VPR128:$src), sub_alias))>; -def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), - (f128 (EXTRACT_SUBREG (v2f64 VPR128:$src), sub_alias))>; - -def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), - (v8i8 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>; -def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), - (v4i16 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>; -def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), - (v2i32 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>; -def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), - (v2f32 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>; -def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), - (v1i64 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>; -def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), - (v16i8 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src), - sub_alias))>; -def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), - (v8i16 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src), - sub_alias))>; -def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), - (v4i32 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src), - sub_alias))>; -def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), - (v2i64 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src), - sub_alias))>; -def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), - (v4f32 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src), - sub_alias))>; -def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), - (v2f64 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src), - sub_alias))>; +def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))), (f64 VPR64:$src)>; +def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))), (f64 VPR64:$src)>; +def : Pat<(f64 (bitconvert (v2i32 VPR64:$src))), (f64 VPR64:$src)>; +def : Pat<(f64 (bitconvert (v2f32 VPR64:$src))), (f64 VPR64:$src)>; +def : Pat<(f64 (bitconvert (v1i64 VPR64:$src))), (f64 VPR64:$src)>; + +def : Pat<(f128 (bitconvert (v16i8 VPR128:$src))), (f128 VPR128:$src)>; +def : Pat<(f128 (bitconvert (v8i16 VPR128:$src))), (f128 VPR128:$src)>; +def : Pat<(f128 (bitconvert (v4i32 VPR128:$src))), (f128 VPR128:$src)>; +def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))), (f128 VPR128:$src)>; +def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))), (f128 VPR128:$src)>; +def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), (f128 VPR128:$src)>; + +def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>; +def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>; +def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; + +def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td index b3a81b1..e0eca23 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/lib/Target/AArch64/AArch64RegisterInfo.td @@ -17,10 +17,6 @@ def sub_64 : SubRegIndex<64>; def sub_32 : SubRegIndex<32>; def sub_16 : SubRegIndex<16>; def sub_8 : SubRegIndex<8>; - -// The VPR registers are handled as sub-registers of FPR equivalents, but -// they're really the same thing. We give this concept a special index. -def sub_alias : SubRegIndex<128>; } // Registers are identified with 5-bit ID numbers. @@ -149,48 +145,28 @@ def FPR32 : RegisterClass<"AArch64", [f32], 32, (sequence "S%u", 0, 31)> { } -def FPR64 : RegisterClass<"AArch64", [f64], 64, - (sequence "D%u", 0, 31)> { -} +def FPR64 : RegisterClass<"AArch64", [f64, v2f32, v2i32, v4i16, v8i8, v1i64], + 64, (sequence "D%u", 0, 31)>; + +def FPR128 : RegisterClass<"AArch64", + [f128,v2f64, v2i64, v4f32, v4i32, v8i16, v16i8], 128, + (sequence "Q%u", 0, 31)>; -def FPR128 : RegisterClass<"AArch64", [f128], 128, - (sequence "Q%u", 0, 31)> { -} //===----------------------------------------------------------------------===// // Vector registers: //===----------------------------------------------------------------------===// -// NEON registers simply specify the overall vector, and it's expected that -// Instructions will individually specify the acceptable data layout. In -// principle this leaves two approaches open: -// + An operand, giving a single ADDvvv instruction (for example). This turns -// out to be unworkable in the assembly parser (without every Instruction -// having a "cvt" function, at least) because the constraints can't be -// properly enforced. It also complicates specifying patterns since each -// instruction will accept many types. -// + A bare token (e.g. ".2d"). This means the AsmParser has to know specific -// details about NEON registers, but simplifies most other details. -// -// The second approach was taken. - -foreach Index = 0-31 in { - def V # Index : AArch64RegWithSubs("Q" # Index)], - [sub_alias]>, - DwarfRegNum<[!add(Index, 64)]>; +def VPR64AsmOperand : AsmOperandClass { + let Name = "VPR"; + let PredicateMethod = "isReg"; + let RenderMethod = "addRegOperands"; } -// These two classes contain the same registers, which should be reasonably -// sensible for MC and allocation purposes, but allows them to be treated -// separately for things like stack spilling. -def VPR64 : RegisterClass<"AArch64", [v2f32, v2i32, v4i16, v8i8, v1i64], 64, - (sequence "V%u", 0, 31)>; +def VPR64 : RegisterOperand; -def VPR128 : RegisterClass<"AArch64", - [v2f64, v2i64, v4f32, v4i32, v8i16, v16i8], 128, - (sequence "V%u", 0, 31)>; +def VPR128 : RegisterOperand; // Flags register def NZCV : Register<"nzcv"> { diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index f7e9c6f..51638d9 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -1556,22 +1556,11 @@ AArch64AsmParser::IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc, std::string LowerReg = Tok.getString().lower(); size_t DotPos = LowerReg.find('.'); - RegNum = MatchRegisterName(LowerReg.substr(0, DotPos)); - if (RegNum == AArch64::NoRegister) { - RegNum = StringSwitch(LowerReg.substr(0, DotPos)) - .Case("ip0", AArch64::X16) - .Case("ip1", AArch64::X17) - .Case("fp", AArch64::X29) - .Case("lr", AArch64::X30) - .Default(AArch64::NoRegister); - } - if (RegNum == AArch64::NoRegister) - return false; - + bool IsVec128 = false; SMLoc S = Tok.getLoc(); RegEndLoc = SMLoc::getFromPointer(S.getPointer() + DotPos); - if (DotPos == StringRef::npos) { + if (DotPos == std::string::npos) { Layout = StringRef(); } else { // Everything afterwards needs to be a literal token, expected to be @@ -1582,19 +1571,76 @@ AArch64AsmParser::IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc, // would go out of scope when we return). LayoutLoc = SMLoc::getFromPointer(S.getPointer() + DotPos + 1); std::string LayoutText = LowerReg.substr(DotPos, StringRef::npos); + + // See if it's a 128-bit layout first. Layout = StringSwitch(LayoutText) - .Case(".d", ".d").Case(".1d", ".1d").Case(".2d", ".2d") - .Case(".s", ".s").Case(".2s", ".2s").Case(".4s", ".4s") - .Case(".h", ".h").Case(".4h", ".4h").Case(".8h", ".8h") - .Case(".b", ".b").Case(".8b", ".8b").Case(".16b", ".16b") + .Case(".d", ".d").Case(".2d", ".2d") + .Case(".s", ".s").Case(".4s", ".4s") + .Case(".h", ".h").Case(".8h", ".8h") + .Case(".b", ".b").Case(".16b", ".16b") .Default(""); + if (Layout.size() != 0) + IsVec128 = true; + else { + Layout = StringSwitch(LayoutText) + .Case(".1d", ".1d") + .Case(".2s", ".2s") + .Case(".4h", ".4h") + .Case(".8b", ".8b") + .Default(""); + } + if (Layout.size() == 0) { - // Malformed register + // If we've still not pinned it down the register is malformed. return false; } } + RegNum = MatchRegisterName(LowerReg.substr(0, DotPos)); + if (RegNum == AArch64::NoRegister) { + RegNum = StringSwitch(LowerReg.substr(0, DotPos)) + .Case("ip0", AArch64::X16) + .Case("ip1", AArch64::X17) + .Case("fp", AArch64::X29) + .Case("lr", AArch64::X30) + .Case("v0", IsVec128 ? AArch64::Q0 : AArch64::D0) + .Case("v1", IsVec128 ? AArch64::Q1 : AArch64::D1) + .Case("v2", IsVec128 ? AArch64::Q2 : AArch64::D2) + .Case("v3", IsVec128 ? AArch64::Q3 : AArch64::D3) + .Case("v4", IsVec128 ? AArch64::Q4 : AArch64::D4) + .Case("v5", IsVec128 ? AArch64::Q5 : AArch64::D5) + .Case("v6", IsVec128 ? AArch64::Q6 : AArch64::D6) + .Case("v7", IsVec128 ? AArch64::Q7 : AArch64::D7) + .Case("v8", IsVec128 ? AArch64::Q8 : AArch64::D8) + .Case("v9", IsVec128 ? AArch64::Q9 : AArch64::D9) + .Case("v10", IsVec128 ? AArch64::Q10 : AArch64::D10) + .Case("v11", IsVec128 ? AArch64::Q11 : AArch64::D11) + .Case("v12", IsVec128 ? AArch64::Q12 : AArch64::D12) + .Case("v13", IsVec128 ? AArch64::Q13 : AArch64::D13) + .Case("v14", IsVec128 ? AArch64::Q14 : AArch64::D14) + .Case("v15", IsVec128 ? AArch64::Q15 : AArch64::D15) + .Case("v16", IsVec128 ? AArch64::Q16 : AArch64::D16) + .Case("v17", IsVec128 ? AArch64::Q17 : AArch64::D17) + .Case("v18", IsVec128 ? AArch64::Q18 : AArch64::D18) + .Case("v19", IsVec128 ? AArch64::Q19 : AArch64::D19) + .Case("v20", IsVec128 ? AArch64::Q20 : AArch64::D20) + .Case("v21", IsVec128 ? AArch64::Q21 : AArch64::D21) + .Case("v22", IsVec128 ? AArch64::Q22 : AArch64::D22) + .Case("v23", IsVec128 ? AArch64::Q23 : AArch64::D23) + .Case("v24", IsVec128 ? AArch64::Q24 : AArch64::D24) + .Case("v25", IsVec128 ? AArch64::Q25 : AArch64::D25) + .Case("v26", IsVec128 ? AArch64::Q26 : AArch64::D26) + .Case("v27", IsVec128 ? AArch64::Q27 : AArch64::D27) + .Case("v28", IsVec128 ? AArch64::Q28 : AArch64::D28) + .Case("v29", IsVec128 ? AArch64::Q29 : AArch64::D29) + .Case("v30", IsVec128 ? AArch64::Q30 : AArch64::D30) + .Case("v31", IsVec128 ? AArch64::Q31 : AArch64::D31) + .Default(AArch64::NoRegister); + } + if (RegNum == AArch64::NoRegister) + return false; + return true; } diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index 5b57b50..3baa4b5 100644 --- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -85,12 +85,6 @@ static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeVPR128RegisterClass(llvm::MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder); static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst, unsigned OptionHiS, @@ -355,28 +349,6 @@ DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } -static DecodeStatus DecodeVPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder) { - if (RegNo > 31) - return MCDisassembler::Fail; - - uint16_t Register = getReg(Decoder, AArch64::VPR64RegClassID, RegNo); - Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; -} - -static DecodeStatus -DecodeVPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { - if (RegNo > 31) - return MCDisassembler::Fail; - - uint16_t Register = getReg(Decoder, AArch64::VPR128RegClassID, RegNo); - Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; -} - static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst, unsigned OptionHiS, uint64_t Address, @@ -608,11 +580,11 @@ static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn, unsigned IsToVec = fieldFromInstruction(Insn, 16, 1); if (IsToVec) { - DecodeVPR128RegisterClass(Inst, Rd, Address, Decoder); + DecodeFPR128RegisterClass(Inst, Rd, Address, Decoder); DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder); } else { DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder); - DecodeVPR128RegisterClass(Inst, Rn, Address, Decoder); + DecodeFPR128RegisterClass(Inst, Rn, Address, Decoder); } // Add the lane diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp index b624331..d2b6f6d 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -368,6 +368,14 @@ AArch64InstPrinter::printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum, O << "#" << (Imm * MemScale); } +void AArch64InstPrinter::printVPRRegister(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned Reg = MI->getOperand(OpNo).getReg(); + std::string Name = getRegisterName(Reg); + Name[0] = 'v'; + O << Name; +} + void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h index f7439be..b608908 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h @@ -157,6 +157,7 @@ public: void printRegExtendOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O, A64SE::ShiftExtSpecifiers Ext); + void printVPRRegister(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); -- cgit v1.1 From 856bf594338567a592086fe782f2f51650e4e294 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 13 Sep 2013 09:09:50 +0000 Subject: [SystemZ] Try to fold shifts into TMxx E.g. "SRL %r2, 2; TMLL %r2, 1" => "TMLL %r2, 4". git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190672 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelLowering.cpp | 77 ++++++++++++++++++++++-------- 1 file changed, 57 insertions(+), 20 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 5528404..131fd5c 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1135,17 +1135,37 @@ static bool shouldSwapCmpOperands(SDValue Op0, SDValue Op1, return false; } -// Check whether the CC value produced by TEST UNDER MASK is descriptive -// enough to handle an AND with Mask followed by a comparison of type Opcode -// with CmpVal. CCMask says which comparison result is being tested and -// BitSize is the number of bits in the operands. Return the CC mask that -// should be used for the TEST UNDER MASK result, or 0 if the condition is -// too complex. +// Return true if shift operation N has an in-range constant shift value. +// Store it in ShiftVal if so. +static bool isSimpleShift(SDValue N, unsigned &ShiftVal) { + ConstantSDNode *Shift = dyn_cast(N.getOperand(1)); + if (!Shift) + return false; + + uint64_t Amount = Shift->getZExtValue(); + if (Amount >= N.getValueType().getSizeInBits()) + return false; + + ShiftVal = Amount; + return true; +} + +// Check whether an AND with Mask is suitable for a TEST UNDER MASK +// instruction and whether the CC value is descriptive enough to handle +// a comparison of type Opcode between the AND result and CmpVal. +// CCMask says which comparison result is being tested and BitSize is +// the number of bits in the operands. If TEST UNDER MASK can be used, +// return the corresponding CC mask, otherwise return 0. static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, uint64_t Mask, uint64_t CmpVal, unsigned ICmpType) { assert(Mask != 0 && "ANDs with zero should have been removed by now"); + // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL. + if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) && + !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask)) + return 0; + // Work out the masks for the lowest and highest bits. unsigned HighShift = 63 - countLeadingZeros(Mask); uint64_t High = uint64_t(1) << HighShift; @@ -1230,13 +1250,15 @@ static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, // implemented as a TEST UNDER MASK instruction when the condition being // tested is as described by CCValid and CCMask. Update the arguments // with the TM version if so. -static void adjustForTestUnderMask(unsigned &Opcode, SDValue &CmpOp0, - SDValue &CmpOp1, unsigned &CCValid, - unsigned &CCMask, unsigned &ICmpType) { +static void adjustForTestUnderMask(SelectionDAG &DAG, unsigned &Opcode, + SDValue &CmpOp0, SDValue &CmpOp1, + unsigned &CCValid, unsigned &CCMask, + unsigned &ICmpType) { // Check that we have a comparison with a constant. ConstantSDNode *ConstCmpOp1 = dyn_cast(CmpOp1); if (!ConstCmpOp1) return; + uint64_t CmpVal = ConstCmpOp1->getZExtValue(); // Check whether the nonconstant input is an AND with a constant mask. if (CmpOp0.getOpcode() != ISD::AND) @@ -1246,21 +1268,35 @@ static void adjustForTestUnderMask(unsigned &Opcode, SDValue &CmpOp0, ConstantSDNode *Mask = dyn_cast(AndOp1.getNode()); if (!Mask) return; - - // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL. uint64_t MaskVal = Mask->getZExtValue(); - if (!SystemZ::isImmLL(MaskVal) && !SystemZ::isImmLH(MaskVal) && - !SystemZ::isImmHL(MaskVal) && !SystemZ::isImmHH(MaskVal)) - return; // Check whether the combination of mask, comparison value and comparison // type are suitable. unsigned BitSize = CmpOp0.getValueType().getSizeInBits(); - unsigned NewCCMask = getTestUnderMaskCond(BitSize, CCMask, MaskVal, - ConstCmpOp1->getZExtValue(), - ICmpType); - if (!NewCCMask) - return; + unsigned NewCCMask, ShiftVal; + if (ICmpType != SystemZICMP::SignedOnly && + AndOp0.getOpcode() == ISD::SHL && + isSimpleShift(AndOp0, ShiftVal) && + (NewCCMask = getTestUnderMaskCond(BitSize, CCMask, MaskVal >> ShiftVal, + CmpVal >> ShiftVal, + SystemZICMP::Any))) { + AndOp0 = AndOp0.getOperand(0); + AndOp1 = DAG.getConstant(MaskVal >> ShiftVal, AndOp0.getValueType()); + } else if (ICmpType != SystemZICMP::SignedOnly && + AndOp0.getOpcode() == ISD::SRL && + isSimpleShift(AndOp0, ShiftVal) && + (NewCCMask = getTestUnderMaskCond(BitSize, CCMask, + MaskVal << ShiftVal, + CmpVal << ShiftVal, + SystemZICMP::UnsignedOnly))) { + AndOp0 = AndOp0.getOperand(0); + AndOp1 = DAG.getConstant(MaskVal << ShiftVal, AndOp0.getValueType()); + } else { + NewCCMask = getTestUnderMaskCond(BitSize, CCMask, MaskVal, CmpVal, + ICmpType); + if (!NewCCMask) + return; + } // Go ahead and make the change. Opcode = SystemZISD::TM; @@ -1316,7 +1352,8 @@ static SDValue emitCmp(const SystemZTargetMachine &TM, SelectionDAG &DAG, (CCMask & SystemZ::CCMASK_CMP_UO)); } - adjustForTestUnderMask(Opcode, CmpOp0, CmpOp1, CCValid, CCMask, ICmpType); + adjustForTestUnderMask(DAG, Opcode, CmpOp0, CmpOp1, CCValid, CCMask, + ICmpType); if (Opcode == SystemZISD::ICMP || Opcode == SystemZISD::TM) return DAG.getNode(Opcode, DL, MVT::Glue, CmpOp0, CmpOp1, DAG.getConstant(ICmpType, MVT::i32)); -- cgit v1.1 From 7c7b431d2fa3ead0a2a24c4dd81fc92f293203dd Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 13 Sep 2013 09:12:44 +0000 Subject: [SystemZ] Use getTarget{Insert,Extract}Subreg rather than getMachineNode Just a clean-up, no behavioral change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190673 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 17 +++++------------ lib/Target/SystemZ/SystemZISelLowering.cpp | 28 +++++++++------------------- 2 files changed, 14 insertions(+), 31 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 9d71c39..01f008a 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -814,18 +814,11 @@ SDValue SystemZDAGToDAGISel::getUNDEF64(SDLoc DL) { } SDValue SystemZDAGToDAGISel::convertTo(SDLoc DL, EVT VT, SDValue N) { - if (N.getValueType() == MVT::i32 && VT == MVT::i64) { - SDValue Index = CurDAG->getTargetConstant(SystemZ::subreg_32bit, MVT::i64); - SDNode *Insert = CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, - DL, VT, getUNDEF64(DL), N, Index); - return SDValue(Insert, 0); - } - if (N.getValueType() == MVT::i64 && VT == MVT::i32) { - SDValue Index = CurDAG->getTargetConstant(SystemZ::subreg_32bit, MVT::i64); - SDNode *Extract = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, - DL, VT, N, Index); - return SDValue(Extract, 0); - } + if (N.getValueType() == MVT::i32 && VT == MVT::i64) + return CurDAG->getTargetInsertSubreg(SystemZ::subreg_32bit, + DL, VT, getUNDEF64(DL), N); + if (N.getValueType() == MVT::i64 && VT == MVT::i32) + return CurDAG->getTargetExtractSubreg(SystemZ::subreg_32bit, DL, VT, N); assert(N.getValueType() == VT && "Unexpected value types"); return N; } diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 131fd5c..81d824c 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1387,14 +1387,8 @@ static void lowerGR128Binary(SelectionDAG &DAG, SDLoc DL, EVT VT, SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, SDValue(In128, 0), Op1); bool Is32Bit = is32Bit(VT); - SDValue SubReg0 = DAG.getTargetConstant(SystemZ::even128(Is32Bit), VT); - SDValue SubReg1 = DAG.getTargetConstant(SystemZ::odd128(Is32Bit), VT); - SDNode *Reg0 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, - VT, Result, SubReg0); - SDNode *Reg1 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, - VT, Result, SubReg1); - Even = SDValue(Reg0, 0); - Odd = SDValue(Reg1, 0); + Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result); + Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result); } SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { @@ -1559,21 +1553,19 @@ SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op, EVT InVT = In.getValueType(); EVT ResVT = Op.getValueType(); - SDValue SubReg32 = DAG.getTargetConstant(SystemZ::subreg_32bit, MVT::i64); SDValue Shift32 = DAG.getConstant(32, MVT::i64); if (InVT == MVT::i32 && ResVT == MVT::f32) { SDValue In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In); SDValue Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, In64, Shift32); SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Shift); - SDNode *Out = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, - MVT::f32, Out64, SubReg32); - return SDValue(Out, 0); + return DAG.getTargetExtractSubreg(SystemZ::subreg_32bit, + DL, MVT::f32, Out64); } if (InVT == MVT::f32 && ResVT == MVT::i32) { SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64); - SDNode *In64 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, - MVT::f64, SDValue(U64, 0), In, SubReg32); - SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, SDValue(In64, 0)); + SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_32bit, DL, + MVT::f64, SDValue(U64, 0), In); + SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64); SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64, Shift32); SDValue Out = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift); return Out; @@ -1817,10 +1809,8 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { // can be folded. SDLoc DL(Op); SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp); - SDValue SubReg32 = DAG.getTargetConstant(SystemZ::subreg_32bit, MVT::i64); - SDNode *Result = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, - MVT::i64, HighOp, Low32, SubReg32); - return SDValue(Result, 0); + return DAG.getTargetInsertSubreg(SystemZ::subreg_32bit, DL, + MVT::i64, HighOp, Low32); } // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first -- cgit v1.1 From 906ae15d5fa5784595273228b33e16ca16a0431d Mon Sep 17 00:00:00 2001 From: Zoran Jovanovic Date: Fri, 13 Sep 2013 10:08:05 +0000 Subject: Test commit to verify that commit access works. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190676 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 7161850..10221d5 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -353,7 +353,7 @@ bool MipsSEDAGToDAGISel::selectAddrRegImm12(SDValue Addr, SDValue &Base, ConstantSDNode *CN = dyn_cast(Addr.getOperand(1)); if (isInt<12>(CN->getSExtValue())) { - // If the first operand is a FI, get the TargetFI Node + // If the first operand is a FI then get the TargetFI Node if (FrameIndexSDNode *FIN = dyn_cast (Addr.getOperand(0))) Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); -- cgit v1.1 From 8312905790f2250d7c807b4c153be7d64ab22572 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Fri, 13 Sep 2013 11:51:52 +0000 Subject: [ARMv8] Emit the proper .fpu directive. Patch by Bradley Smith! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190683 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMAsmPrinter.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 0e94045..ff43d46 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -807,8 +807,11 @@ void ARMAsmPrinter::emitAttributes() { if (Subtarget->hasNEON() && emitFPU) { /* NEON is not exactly a VFP architecture, but GAS emit one of - * neon/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */ - if (Subtarget->hasVFP4()) + * neon/neon-fp-armv8/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */ + if (Subtarget->hasV8FP()) + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, + "neon-fp-armv8"); + else if (Subtarget->hasVFP4()) AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon-vfpv4"); else @@ -823,7 +826,7 @@ void ARMAsmPrinter::emitAttributes() { AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch, ARMBuildAttrs::AllowV8FPA); if (emitFPU) - AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "v8fp"); + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "fp-armv8"); /* VFPv4 + .fpu */ } else if (Subtarget->hasVFP4()) { AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch, -- cgit v1.1 From 2a9af9f18eac90b0de739b6ceddf6c2209086303 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Fri, 13 Sep 2013 13:46:57 +0000 Subject: [ARMv8] Change hasV8Fp to hasFPARMv8, and other command line options to be more consistent. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190692 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARM.td | 2 +- lib/Target/ARM/ARMAsmPrinter.cpp | 8 ++++---- lib/Target/ARM/ARMBuildAttrs.h | 4 ++-- lib/Target/ARM/ARMISelLowering.cpp | 4 ++-- lib/Target/ARM/ARMInstrInfo.td | 4 ++-- lib/Target/ARM/ARMInstrVFP.td | 32 ++++++++++++++++---------------- lib/Target/ARM/ARMSubtarget.cpp | 2 +- lib/Target/ARM/ARMSubtarget.h | 6 +++--- 8 files changed, 31 insertions(+), 31 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 067ad13..5752005 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -45,7 +45,7 @@ def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", "Enable VFP4 instructions", [FeatureVFP3, FeatureFP16]>; -def FeatureV8FP : SubtargetFeature<"v8fp", "HasV8FP", +def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", "true", "Enable ARMv8 FP", [FeatureVFP4]>; def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true", diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index ff43d46..ee01fcf 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -808,7 +808,7 @@ void ARMAsmPrinter::emitAttributes() { if (Subtarget->hasNEON() && emitFPU) { /* NEON is not exactly a VFP architecture, but GAS emit one of * neon/neon-fp-armv8/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */ - if (Subtarget->hasV8FP()) + if (Subtarget->hasFPARMv8()) AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon-fp-armv8"); else if (Subtarget->hasVFP4()) @@ -821,10 +821,10 @@ void ARMAsmPrinter::emitAttributes() { emitFPU = false; } - /* V8FP + .fpu */ - if (Subtarget->hasV8FP()) { + /* FPARMv8 + .fpu */ + if (Subtarget->hasFPARMv8()) { AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch, - ARMBuildAttrs::AllowV8FPA); + ARMBuildAttrs::AllowFPARMv8A); if (emitFPU) AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "fp-armv8"); /* VFPv4 + .fpu */ diff --git a/lib/Target/ARM/ARMBuildAttrs.h b/lib/Target/ARM/ARMBuildAttrs.h index f614dca..1671732 100644 --- a/lib/Target/ARM/ARMBuildAttrs.h +++ b/lib/Target/ARM/ARMBuildAttrs.h @@ -114,8 +114,8 @@ namespace ARMBuildAttrs { AllowFPv3B = 4, // v3 FP ISA permitted, but only D0-D15, S0-S31 AllowFPv4A = 5, // v4 FP ISA permitted (implies use of v3 FP ISA) AllowFPv4B = 6, // v4 FP ISA was permitted, but only D0-D15, S0-S31 - AllowV8FPA = 7, // Use of the ARM v8-A FP ISA was permitted - AllowV8FPB = 8, // Use of the ARM v8-A FP ISA was permitted, but only D0-D15, S0-S31 + AllowFPARMv8A = 7, // Use of the ARM v8-A FP ISA was permitted + AllowFPARMv8B = 8, // Use of the ARM v8-A FP ISA was permitted, but only D0-D15, S0-S31 // Tag_WMMX_arch, (=11), uleb128 AllowThumb32 = 2, // 32-bit Thumb (implies 16-bit instructions) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 94270ed..c83f7b1 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -3258,7 +3258,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { // inverting the compare condition, swapping 'less' and 'greater') and // sometimes need to swap the operands to the VSEL (which inverts the // condition in the sense of firing whenever the previous condition didn't) - if (getSubtarget()->hasV8FP() && (TrueVal.getValueType() == MVT::f32 || + if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || TrueVal.getValueType() == MVT::f64)) { ARMCC::CondCodes CondCode = IntCCToARMCC(CC); if (CondCode == ARMCC::LT || CondCode == ARMCC::LE || @@ -3279,7 +3279,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { FPCCToARMCC(CC, CondCode, CondCode2); // Try to generate VSEL on ARMv8. - if (getSubtarget()->hasV8FP() && (TrueVal.getValueType() == MVT::f32 || + if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || TrueVal.getValueType() == MVT::f64)) { // We can select VMAXNM/VMINNM from a compare followed by a select with the // same operands, as follows: diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index b488f26..ef34e26 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -208,8 +208,8 @@ def HasVFP3 : Predicate<"Subtarget->hasVFP3()">, AssemblerPredicate<"FeatureVFP3", "VFP3">; def HasVFP4 : Predicate<"Subtarget->hasVFP4()">, AssemblerPredicate<"FeatureVFP4", "VFP4">; -def HasV8FP : Predicate<"Subtarget->hasV8FP()">, - AssemblerPredicate<"FeatureV8FP", "V8FP">; +def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, + AssemblerPredicate<"FeatureFPARMv8", "FPARMv8">; def HasNEON : Predicate<"Subtarget->hasNEON()">, AssemblerPredicate<"FeatureNEON", "NEON">; def HasFP16 : Predicate<"Subtarget->hasFP16()">, diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 34d83c6..f0da06d 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -340,13 +340,13 @@ multiclass vsel_inst opc, int CC> { (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), NoItinerary, !strconcat("vsel", op, ".f32\t$Sd, $Sn, $Sm"), [(set SPR:$Sd, (ARMcmov SPR:$Sm, SPR:$Sn, CC))]>, - Requires<[HasV8FP]>; + Requires<[HasFPARMv8]>; def D : ADbInp<0b11100, opc, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), NoItinerary, !strconcat("vsel", op, ".f64\t$Dd, $Dn, $Dm"), [(set DPR:$Dd, (ARMcmov (f64 DPR:$Dm), (f64 DPR:$Dn), CC))]>, - Requires<[HasV8FP]>; + Requires<[HasFPARMv8]>; } } @@ -362,13 +362,13 @@ multiclass vmaxmin_inst { (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), NoItinerary, !strconcat(op, ".f32\t$Sd, $Sn, $Sm"), [(set SPR:$Sd, (SD SPR:$Sn, SPR:$Sm))]>, - Requires<[HasV8FP]>; + Requires<[HasFPARMv8]>; def D : ADbInp<0b11101, 0b00, opc, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), NoItinerary, !strconcat(op, ".f64\t$Dd, $Dn, $Dm"), [(set DPR:$Dd, (f64 (SD (f64 DPR:$Dn), (f64 DPR:$Dm))))]>, - Requires<[HasV8FP]>; + Requires<[HasFPARMv8]>; } } @@ -538,7 +538,7 @@ def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs DPR:$Dd), (ins SPR:$Sm), NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm", - []>, Requires<[HasV8FP]> { + []>, Requires<[HasFPARMv8]> { // Instruction operands. bits<5> Sm; @@ -550,7 +550,7 @@ def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0, def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins DPR:$Dm), NoItinerary, "vcvtb", ".f16.f64\t$Sd, $Dm", - []>, Requires<[HasV8FP]> { + []>, Requires<[HasFPARMv8]> { // Instruction operands. bits<5> Sd; bits<5> Dm; @@ -565,7 +565,7 @@ def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0, def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs DPR:$Dd), (ins SPR:$Sm), NoItinerary, "vcvtt", ".f64.f16\t$Dd, $Sm", - []>, Requires<[HasV8FP]> { + []>, Requires<[HasFPARMv8]> { // Instruction operands. bits<5> Sm; @@ -577,7 +577,7 @@ def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0, def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins DPR:$Dm), NoItinerary, "vcvtt", ".f16.f64\t$Sd, $Dm", - []>, Requires<[HasV8FP]> { + []>, Requires<[HasFPARMv8]> { // Instruction operands. bits<5> Sd; bits<5> Dm; @@ -594,21 +594,21 @@ multiclass vcvt_inst rm> { def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vcvt", opc, ".s32.f32\t$Sd, $Sm"), - []>, Requires<[HasV8FP]> { + []>, Requires<[HasFPARMv8]> { let Inst{17-16} = rm; } def US : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vcvt", opc, ".u32.f32\t$Sd, $Sm"), - []>, Requires<[HasV8FP]> { + []>, Requires<[HasFPARMv8]> { let Inst{17-16} = rm; } def SD : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0, (outs SPR:$Sd), (ins DPR:$Dm), NoItinerary, !strconcat("vcvt", opc, ".s32.f64\t$Sd, $Dm"), - []>, Requires<[HasV8FP]> { + []>, Requires<[HasFPARMv8]> { bits<5> Dm; let Inst{17-16} = rm; @@ -622,7 +622,7 @@ multiclass vcvt_inst rm> { def UD : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0, (outs SPR:$Sd), (ins DPR:$Dm), NoItinerary, !strconcat("vcvt", opc, ".u32.f64\t$Sd, $Dm"), - []>, Requires<[HasV8FP]> { + []>, Requires<[HasFPARMv8]> { bits<5> Dm; let Inst{17-16} = rm; @@ -658,14 +658,14 @@ multiclass vrint_inst_zrx { def S : ASuI<0b11101, 0b11, 0b0110, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm", - []>, Requires<[HasV8FP]> { + []>, Requires<[HasFPARMv8]> { let Inst{7} = op2; let Inst{16} = op; } def D : ADuI<0b11101, 0b11, 0b0110, 0b11, 0, (outs DPR:$Dd), (ins DPR:$Dm), NoItinerary, !strconcat("vrint", opc), ".f64\t$Dd, $Dm", - []>, Requires<[HasV8FP]> { + []>, Requires<[HasFPARMv8]> { let Inst{7} = op2; let Inst{16} = op; } @@ -685,13 +685,13 @@ multiclass vrint_inst_anpm rm> { def S : ASuInp<0b11101, 0b11, 0b1000, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vrint", opc, ".f32\t$Sd, $Sm"), - []>, Requires<[HasV8FP]> { + []>, Requires<[HasFPARMv8]> { let Inst{17-16} = rm; } def D : ADuInp<0b11101, 0b11, 0b1000, 0b01, 0, (outs DPR:$Dd), (ins DPR:$Dm), NoItinerary, !strconcat("vrint", opc, ".f64\t$Dd, $Dm"), - []>, Requires<[HasV8FP]> { + []>, Requires<[HasFPARMv8]> { let Inst{17-16} = rm; } } diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index e9254c3..a227718 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -81,7 +81,7 @@ void ARMSubtarget::initializeEnvironment() { HasVFPv2 = false; HasVFPv3 = false; HasVFPv4 = false; - HasV8FP = false; + HasFPARMv8 = false; HasNEON = false; UseNEONForSinglePrecisionFP = false; UseMulOps = UseFusedMulOps; diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index b13ff9d..65278a5 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -48,12 +48,12 @@ protected: bool HasV7Ops; bool HasV8Ops; - /// HasVFPv2, HasVFPv3, HasVFPv4, HasV8FP, HasNEON - Specify what + /// HasVFPv2, HasVFPv3, HasVFPv4, HasFPARMv8, HasNEON - Specify what /// floating point ISAs are supported. bool HasVFPv2; bool HasVFPv3; bool HasVFPv4; - bool HasV8FP; + bool HasFPARMv8; bool HasNEON; /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been @@ -246,7 +246,7 @@ public: bool hasVFP2() const { return HasVFPv2; } bool hasVFP3() const { return HasVFPv3; } bool hasVFP4() const { return HasVFPv4; } - bool hasV8FP() const { return HasV8FP; } + bool hasFPARMv8() const { return HasFPARMv8; } bool hasNEON() const { return HasNEON; } bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; } -- cgit v1.1 From 94dc6540a8f3aaadb43dda50e49fc79141fae8ed Mon Sep 17 00:00:00 2001 From: Preston Gurd Date: Fri, 13 Sep 2013 19:23:28 +0000 Subject: Adds support for Atom Silvermont (SLM) - -march=slm Implements Instruction scheduler latencies for Silvermont, using latencies from the Intel Silvermont Optimization Guide. Auto detects SLM. Turns on post RA scheduler when generating code for SLM. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190717 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86.td | 24 +- lib/Target/X86/X86InstrAVX512.td | 16 +- lib/Target/X86/X86InstrArithmetic.td | 68 ++-- lib/Target/X86/X86InstrExtension.td | 12 +- lib/Target/X86/X86InstrFormats.td | 2 +- lib/Target/X86/X86InstrInfo.td | 27 +- lib/Target/X86/X86InstrMMX.td | 2 +- lib/Target/X86/X86InstrSSE.td | 295 ++++++++++------ lib/Target/X86/X86Schedule.td | 48 ++- lib/Target/X86/X86ScheduleAtom.td | 24 +- lib/Target/X86/X86ScheduleSLM.td | 668 +++++++++++++++++++++++++++++++++++ lib/Target/X86/X86Subtarget.cpp | 8 +- lib/Target/X86/X86Subtarget.h | 2 +- 13 files changed, 1000 insertions(+), 196 deletions(-) create mode 100644 lib/Target/X86/X86ScheduleSLM.td (limited to 'lib/Target') diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index a183d3a..dc4a7ea 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -166,10 +166,17 @@ include "X86Schedule.td" def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom", "Intel Atom processors">; +def ProcIntelSLM : SubtargetFeature<"slm", "X86ProcFamily", "IntelSLM", + "Intel Silvermont processors">; class Proc Features> : ProcessorModel; +//class AtomProc Features> +// : ProcessorModel; +//class SLMProc Features> +// : ProcessorModel; + def : Proc<"generic", []>; def : Proc<"i386", []>; def : Proc<"i486", []>; @@ -209,15 +216,14 @@ def : ProcessorModel<"atom", AtomModel, FeatureLEAUsesAG, FeaturePadShortFunctions]>; -// Silvermont. -def : ProcessorModel<"slm", AtomModel, - [ProcIntelAtom, FeatureSSE42, FeatureCMPXCHG16B, - FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP, - FeatureSlowDivide, - FeatureCallRegIndirect, - FeatureLEAUsesAG, - FeaturePadShortFunctions]>; - +// Atom Silvermont. +def : ProcessorModel<"slm", SLMModel, [ProcIntelSLM, + FeatureSSE42, FeatureCMPXCHG16B, + FeatureMOVBE, FeaturePOPCNT, + FeaturePCLMUL, FeatureAES, + FeatureCallRegIndirect, + FeaturePRFCHW, + FeatureSlowBTMem]>; // "Arrandale" along with corei3 and corei5 def : ProcessorModel<"corei7", SandyBridgeModel, [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index ce91687..da2b9c1 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -642,12 +642,12 @@ multiclass avx512_icmp_packed opc, string OpcodeStr, RegisterClass KRC, (outs KRC:$dst), (ins RC:$src1, RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))], - IIC_SSE_CMPP_RR>, EVEX_4V; + IIC_SSE_ALU_F32P_RR>, EVEX_4V; def rm : AVX512BI, EVEX_4V; + IIC_SSE_ALU_F32P_RM>, EVEX_4V; } defm VPCMPEQDZ : avx512_icmp_packed<0x76, "vpcmpeqd", VK16, VR512, i512mem, @@ -677,19 +677,19 @@ multiclass avx512_icmp_cc opc, RegisterClass KRC, def rri : AVX512AIi8, EVEX_4V; + IIC_SSE_ALU_F32P_RR>, EVEX_4V; def rmi : AVX512AIi8, EVEX_4V; + imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V; // Accept explicit immediate argument form instead of comparison code. let neverHasSideEffects = 1 in { def rri_alt : AVX512AIi8, EVEX_4V; + asm_alt, [], IIC_SSE_ALU_F32P_RR>, EVEX_4V; def rmi_alt : AVX512AIi8, EVEX_4V; + asm_alt, [], IIC_SSE_ALU_F32P_RM>, EVEX_4V; } } @@ -732,10 +732,10 @@ multiclass avx512_cmp_packed; + asm_alt, [], IIC_SSE_ALU_F32P_RR, d>; def rmi_alt : PIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_CMPP_RM, d>; + asm_alt, [], IIC_SSE_ALU_F32P_RM, d>; } } diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index 9ce02ba..0411975 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -726,20 +726,25 @@ class BinOpRR_RFF opcode, string mnemonic, X86TypeInfo typeinfo, : BinOpRR; + EFLAGS))], IIC_BIN_CARRY_NONMEM>; // BinOpRR_Rev - Instructions like "add reg, reg, reg" (reversed encoding). -class BinOpRR_Rev opcode, string mnemonic, X86TypeInfo typeinfo> +class BinOpRR_Rev opcode, string mnemonic, X86TypeInfo typeinfo, + InstrItinClass itin = IIC_BIN_NONMEM> : ITy, + mnemonic, "{$src2, $dst|$dst, $src2}", [], itin>, Sched<[WriteALU]> { // The disassembler should know about this, but not the asmparser. let isCodeGenOnly = 1; let hasSideEffects = 0; } +// BinOpRR_RDD_Rev - Instructions like "adc reg, reg, reg" (reversed encoding). +class BinOpRR_RFF_Rev opcode, string mnemonic, X86TypeInfo typeinfo> + : BinOpRR_Rev; + // BinOpRR_F_Rev - Instructions like "cmp reg, reg" (reversed encoding). class BinOpRR_F_Rev opcode, string mnemonic, X86TypeInfo typeinfo> : ITy opcode, string mnemonic, X86TypeInfo typeinfo> // BinOpRM - Instructions like "add reg, reg, [mem]". class BinOpRM opcode, string mnemonic, X86TypeInfo typeinfo, - dag outlist, list pattern> + dag outlist, list pattern, + InstrItinClass itin = IIC_BIN_MEM> : ITy, + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>, Sched<[WriteALULd, ReadAfterLd]>; // BinOpRM_R - Instructions like "add reg, reg, [mem]". @@ -786,14 +792,15 @@ class BinOpRM_RFF opcode, string mnemonic, X86TypeInfo typeinfo, : BinOpRM; + EFLAGS))], IIC_BIN_CARRY_MEM>; // BinOpRI - Instructions like "add reg, reg, imm". class BinOpRI opcode, string mnemonic, X86TypeInfo typeinfo, - Format f, dag outlist, list pattern> + Format f, dag outlist, list pattern, + InstrItinClass itin = IIC_BIN_NONMEM> : ITy, + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>, Sched<[WriteALU]> { let ImmT = typeinfo.ImmEncoding; } @@ -824,14 +831,15 @@ class BinOpRI_RFF opcode, string mnemonic, X86TypeInfo typeinfo, : BinOpRI; + EFLAGS))], IIC_BIN_CARRY_NONMEM>; // BinOpRI8 - Instructions like "add reg, reg, imm8". class BinOpRI8 opcode, string mnemonic, X86TypeInfo typeinfo, - Format f, dag outlist, list pattern> + Format f, dag outlist, list pattern, + InstrItinClass itin = IIC_BIN_NONMEM> : ITy, + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>, Sched<[WriteALU]> { let ImmT = Imm8; // Always 8-bit immediate. } @@ -863,14 +871,14 @@ class BinOpRI8_RFF opcode, string mnemonic, X86TypeInfo typeinfo, : BinOpRI8; + EFLAGS))], IIC_BIN_CARRY_NONMEM>; // BinOpMR - Instructions like "add [mem], reg". class BinOpMR opcode, string mnemonic, X86TypeInfo typeinfo, - list pattern> + list pattern, InstrItinClass itin = IIC_BIN_MEM> : ITy, + mnemonic, "{$src, $dst|$dst, $src}", pattern, itin>, Sched<[WriteALULd, WriteRMW]>; // BinOpMR_RMW - Instructions like "add [mem], reg". @@ -886,7 +894,7 @@ class BinOpMR_RMW_FF opcode, string mnemonic, X86TypeInfo typeinfo, : BinOpMR; + (implicit EFLAGS)], IIC_BIN_CARRY_MEM>; // BinOpMR_F - Instructions like "cmp [mem], reg". class BinOpMR_F opcode, string mnemonic, X86TypeInfo typeinfo, @@ -896,10 +904,11 @@ class BinOpMR_F opcode, string mnemonic, X86TypeInfo typeinfo, // BinOpMI - Instructions like "add [mem], imm". class BinOpMI pattern, bits<8> opcode = 0x80> + Format f, list pattern, bits<8> opcode = 0x80, + InstrItinClass itin = IIC_BIN_MEM> : ITy, + mnemonic, "{$src, $dst|$dst, $src}", pattern, itin>, Sched<[WriteALULd, WriteRMW]> { let ImmT = typeinfo.ImmEncoding; } @@ -917,7 +926,7 @@ class BinOpMI_RMW_FF; + (implicit EFLAGS)], 0x80, IIC_BIN_CARRY_MEM>; // BinOpMI_F - Instructions like "cmp [mem], imm". class BinOpMI_F pattern> + Format f, list pattern, + InstrItinClass itin = IIC_BIN_MEM> : ITy<0x82, f, typeinfo, (outs), (ins typeinfo.MemOperand:$dst, typeinfo.Imm8Operand:$src), - mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>, + mnemonic, "{$src, $dst|$dst, $src}", pattern, itin>, Sched<[WriteALULd, WriteRMW]> { let ImmT = Imm8; // Always 8-bit immediate. } @@ -951,7 +961,7 @@ class BinOpMI8_RMW_FF; + (implicit EFLAGS)], IIC_BIN_CARRY_MEM>; // BinOpMI8_F - Instructions like "cmp [mem], imm8". class BinOpMI8_F opcode, string mnemonic, X86TypeInfo typeinfo, - Register areg, string operands> + Register areg, string operands, + InstrItinClass itin = IIC_BIN_NONMEM> : ITy, Sched<[WriteALU]> { + mnemonic, operands, [], itin>, Sched<[WriteALU]> { let ImmT = typeinfo.ImmEncoding; let Uses = [areg]; let Defs = [areg, EFLAGS]; @@ -976,7 +987,8 @@ class BinOpAI opcode, string mnemonic, X86TypeInfo typeinfo, // and use EFLAGS. class BinOpAI_FF opcode, string mnemonic, X86TypeInfo typeinfo, Register areg, string operands> - : BinOpAI { + : BinOpAI { let Uses = [areg, EFLAGS]; } @@ -1070,10 +1082,10 @@ multiclass ArithBinOp_RFF BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, def NAME#64rr : BinOpRR_RFF; } // isCommutable - def NAME#8rr_REV : BinOpRR_Rev; - def NAME#16rr_REV : BinOpRR_Rev; - def NAME#32rr_REV : BinOpRR_Rev; - def NAME#64rr_REV : BinOpRR_Rev; + def NAME#8rr_REV : BinOpRR_RFF_Rev; + def NAME#16rr_REV : BinOpRR_RFF_Rev; + def NAME#32rr_REV : BinOpRR_RFF_Rev; + def NAME#64rr_REV : BinOpRR_RFF_Rev; def NAME#8rm : BinOpRM_RFF; def NAME#16rm : BinOpRM_RFF; diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td index 28954c6..4090550 100644 --- a/lib/Target/X86/X86InstrExtension.td +++ b/lib/Target/X86/X86InstrExtension.td @@ -14,26 +14,26 @@ let neverHasSideEffects = 1 in { let Defs = [AX], Uses = [AL] in def CBW : I<0x98, RawFrm, (outs), (ins), - "{cbtw|cbw}", []>, OpSize; // AX = signext(AL) + "{cbtw|cbw}", [], IIC_CBW>, OpSize; // AX = signext(AL) let Defs = [EAX], Uses = [AX] in def CWDE : I<0x98, RawFrm, (outs), (ins), - "{cwtl|cwde}", []>; // EAX = signext(AX) + "{cwtl|cwde}", [], IIC_CBW>; // EAX = signext(AX) let Defs = [AX,DX], Uses = [AX] in def CWD : I<0x99, RawFrm, (outs), (ins), - "{cwtd|cwd}", []>, OpSize; // DX:AX = signext(AX) + "{cwtd|cwd}", [], IIC_CBW>, OpSize; // DX:AX = signext(AX) let Defs = [EAX,EDX], Uses = [EAX] in def CDQ : I<0x99, RawFrm, (outs), (ins), - "{cltd|cdq}", []>; // EDX:EAX = signext(EAX) + "{cltd|cdq}", [], IIC_CBW>; // EDX:EAX = signext(EAX) let Defs = [RAX], Uses = [EAX] in def CDQE : RI<0x98, RawFrm, (outs), (ins), - "{cltq|cdqe}", []>; // RAX = signext(EAX) + "{cltq|cdqe}", [], IIC_CBW>; // RAX = signext(EAX) let Defs = [RAX,RDX], Uses = [RAX] in def CQO : RI<0x99, RawFrm, (outs), (ins), - "{cqto|cqo}", []>; // RDX:RAX = signext(RAX) + "{cqto|cqo}", [], IIC_CBW>; // RDX:RAX = signext(RAX) } diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index b50706c..fb07ed0 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -671,7 +671,7 @@ class AVX512FMA3 o, Format F, dag outs, dag ins, string asm, // AES8I // These use the same encoding as the SSE4.2 T8 and TA encodings. class AES8I o, Format F, dag outs, dag ins, string asm, - listpattern, InstrItinClass itin = NoItinerary> + listpattern, InstrItinClass itin = IIC_AES> : I, T8, Requires<[HasAES]>; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 961109f..148ac6d 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -977,53 +977,56 @@ let Defs = [EFLAGS] in { def BSF16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "bsf{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))], - IIC_BSF>, TB, OpSize, Sched<[WriteShift]>; + IIC_BIT_SCAN_REG>, TB, OpSize, Sched<[WriteShift]>; def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "bsf{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))], - IIC_BSF>, TB, OpSize, Sched<[WriteShiftLd]>; + IIC_BIT_SCAN_MEM>, TB, OpSize, Sched<[WriteShiftLd]>; def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "bsf{l}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))], IIC_BSF>, TB, + [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))], + IIC_BIT_SCAN_REG>, TB, Sched<[WriteShift]>; def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "bsf{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))], - IIC_BSF>, TB, Sched<[WriteShiftLd]>; + IIC_BIT_SCAN_MEM>, TB, Sched<[WriteShiftLd]>; def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "bsf{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))], - IIC_BSF>, TB, Sched<[WriteShift]>; + IIC_BIT_SCAN_REG>, TB, Sched<[WriteShift]>; def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "bsf{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))], - IIC_BSF>, TB, Sched<[WriteShiftLd]>; + IIC_BIT_SCAN_MEM>, TB, Sched<[WriteShiftLd]>; def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "bsr{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))], IIC_BSR>, + [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))], + IIC_BIT_SCAN_REG>, TB, OpSize, Sched<[WriteShift]>; def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "bsr{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))], - IIC_BSR>, TB, + IIC_BIT_SCAN_MEM>, TB, OpSize, Sched<[WriteShiftLd]>; def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "bsr{l}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))], IIC_BSR>, TB, + [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))], + IIC_BIT_SCAN_REG>, TB, Sched<[WriteShift]>; def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "bsr{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))], - IIC_BSR>, TB, Sched<[WriteShiftLd]>; + IIC_BIT_SCAN_MEM>, TB, Sched<[WriteShiftLd]>; def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "bsr{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))], IIC_BSR>, TB, + [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))], IIC_BIT_SCAN_REG>, TB, Sched<[WriteShift]>; def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "bsr{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))], - IIC_BSR>, TB, Sched<[WriteShiftLd]>; + IIC_BIT_SCAN_MEM>, TB, Sched<[WriteShiftLd]>; } // Defs = [EFLAGS] let SchedRW = [WriteMicrocoded] in { diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index cb12956..8ab8e02 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -204,7 +204,7 @@ multiclass sse12_cvt_pint_3addr opc, RegisterClass SrcRC, //===----------------------------------------------------------------------===// def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms", - [(int_x86_mmx_emms)]>; + [(int_x86_mmx_emms)], IIC_MMX_EMMS>; //===----------------------------------------------------------------------===// // MMX Scalar Instructions diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 83dd320..16cad1e 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -151,6 +151,34 @@ def SSE_MOVU_ITINS : OpndItins< IIC_SSE_MOVU_P_RR, IIC_SSE_MOVU_P_RM >; +def SSE_DPPD_ITINS : OpndItins< + IIC_SSE_DPPD_RR, IIC_SSE_DPPD_RM +>; + +def SSE_DPPS_ITINS : OpndItins< + IIC_SSE_DPPS_RR, IIC_SSE_DPPD_RM +>; + +def DEFAULT_ITINS : OpndItins< + IIC_ALU_NONMEM, IIC_ALU_MEM +>; + +def SSE_EXTRACT_ITINS : OpndItins< + IIC_SSE_EXTRACTPS_RR, IIC_SSE_EXTRACTPS_RM +>; + +def SSE_INSERT_ITINS : OpndItins< + IIC_SSE_INSERTPS_RR, IIC_SSE_INSERTPS_RM +>; + +def SSE_MPSADBW_ITINS : OpndItins< + IIC_SSE_MPSADBW_RR, IIC_SSE_MPSADBW_RM +>; + +def SSE_PMULLD_ITINS : OpndItins< + IIC_SSE_PMULLD_RR, IIC_SSE_PMULLD_RM +>; + //===----------------------------------------------------------------------===// // SSE 1 & 2 Instructions Classes //===----------------------------------------------------------------------===// @@ -2307,7 +2335,7 @@ let Constraints = "$src1 = $dst" in { defm CMPSD : sse12_cmp_scalar, // same latency as 32 bit compare + SSE_ALU_F64S>, XD; } @@ -2342,7 +2370,7 @@ let Constraints = "$src1 = $dst" in { SSE_ALU_F32S>, XS; defm Int_CMPSD : sse12_cmp_scalar_int, // same latency as f32 + SSE_ALU_F64S>, XD; } @@ -2411,26 +2439,27 @@ let Defs = [EFLAGS] in { // sse12_cmp_packed - sse 1 & 2 compare packed instructions multiclass sse12_cmp_packed { + string asm_alt, Domain d, + OpndItins itins = SSE_ALU_F32P> { def rri : PIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))], - IIC_SSE_CMPP_RR, d>, + itins.rr, d>, Sched<[WriteFAdd]>; def rmi : PIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))], - IIC_SSE_CMPP_RM, d>, + itins.rm, d>, Sched<[WriteFAddLd, ReadAfterLd]>; // Accept explicit immediate argument form instead of comparison code. let neverHasSideEffects = 1 in { def rri_alt : PIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_CMPP_RR, d>, Sched<[WriteFAdd]>; + asm_alt, [], itins.rr, d>, Sched<[WriteFAdd]>; def rmi_alt : PIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_CMPP_RM, d>, + asm_alt, [], itins.rm, d>, Sched<[WriteFAddLd, ReadAfterLd]>; } } @@ -2455,11 +2484,11 @@ let Constraints = "$src1 = $dst" in { defm CMPPS : sse12_cmp_packed, TB; + SSEPackedSingle, SSE_ALU_F32P>, TB; defm CMPPD : sse12_cmp_packed, TB, OpSize; + SSEPackedDouble, SSE_ALU_F64P>, TB, OpSize; } let Predicates = [HasAVX] in { @@ -3830,7 +3859,7 @@ defm PAVGB : PDI_binop_all_int<0xE0, "pavgb", int_x86_sse2_pavg_b, defm PAVGW : PDI_binop_all_int<0xE3, "pavgw", int_x86_sse2_pavg_w, int_x86_avx2_pavg_w, SSE_INTALU_ITINS_P, 1>; defm PSADBW : PDI_binop_all_int<0xF6, "psadbw", int_x86_sse2_psad_bw, - int_x86_avx2_psad_bw, SSE_INTALU_ITINS_P, 1>; + int_x86_avx2_psad_bw, SSE_PMADD, 1>; let Predicates = [HasAVX] in defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128, @@ -3974,12 +4003,14 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), "pslldq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))]>; + (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))], + IIC_SSE_INTSHDQ_P_RI>; def PSRLDQri : PDIi8<0x73, MRM3r, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), "psrldq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))]>; + (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))], + IIC_SSE_INTSHDQ_P_RI>; // PSRADQri doesn't exist in SSE[1-3]. } } // Constraints = "$src1 = $dst" @@ -4063,14 +4094,14 @@ let Predicates = [HasAVX] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF>, VEX, Sched<[WriteShuffle]>; + IIC_SSE_PSHUF_RI>, VEX, Sched<[WriteShuffle]>; def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, + (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX, Sched<[WriteShuffleLd]>; } @@ -4081,14 +4112,14 @@ let Predicates = [HasAVX2] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF>, VEX, VEX_L, Sched<[WriteShuffle]>; + IIC_SSE_PSHUF_RI>, VEX, VEX_L, Sched<[WriteShuffle]>; def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2), !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode (bitconvert (memopv4i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L, + (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX, VEX_L, Sched<[WriteShuffleLd]>; } @@ -4099,14 +4130,14 @@ let Predicates = [UseSSE2] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF>, Sched<[WriteShuffle]>; + IIC_SSE_PSHUF_RI>, Sched<[WriteShuffle]>; def mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF>, + (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, Sched<[WriteShuffleLd]>; } } @@ -5382,7 +5413,7 @@ multiclass ssse3_palignr { !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffle]>; + [], IIC_SSE_PALIGNRR>, OpSize, Sched<[WriteShuffle]>; let mayLoad = 1 in def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), @@ -5390,7 +5421,7 @@ multiclass ssse3_palignr { !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>; + [], IIC_SSE_PALIGNRM>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>; } } @@ -5482,16 +5513,17 @@ def : InstAlias<"monitor\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORrrr)>, // SSE4.1 - Packed Move with Sign/Zero Extend //===----------------------------------------------------------------------===// -multiclass SS41I_binop_rm_int8 opc, string OpcodeStr, Intrinsic IntId> { +multiclass SS41I_binop_rm_int8 opc, string OpcodeStr, Intrinsic IntId, + OpndItins itins = DEFAULT_ITINS> { def rr : SS48I, OpSize; + [(set VR128:$dst, (IntId VR128:$src))], itins.rr>, OpSize; def rm : SS48I, - OpSize; + (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))], + itins.rm>, OpSize; } multiclass SS41I_binop_rm_int16_y opc, string OpcodeStr, @@ -5502,22 +5534,23 @@ multiclass SS41I_binop_rm_int16_y opc, string OpcodeStr, def Yrm : SS48I, OpSize; + [(set VR256:$dst, (IntId (load addr:$src)))]>, + OpSize; } let Predicates = [HasAVX] in { -defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>, - VEX; -defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", int_x86_sse41_pmovsxwd>, - VEX; -defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq", int_x86_sse41_pmovsxdq>, - VEX; -defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw", int_x86_sse41_pmovzxbw>, - VEX; -defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd", int_x86_sse41_pmovzxwd>, - VEX; -defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", int_x86_sse41_pmovzxdq>, - VEX; +defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", + int_x86_sse41_pmovsxbw>, VEX; +defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", + int_x86_sse41_pmovsxwd>, VEX; +defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq", + int_x86_sse41_pmovsxdq>, VEX; +defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw", + int_x86_sse41_pmovzxbw>, VEX; +defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd", + int_x86_sse41_pmovzxwd>, VEX; +defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", + int_x86_sse41_pmovzxdq>, VEX; } let Predicates = [HasAVX2] in { @@ -5535,12 +5568,12 @@ defm VPMOVZXDQ : SS41I_binop_rm_int16_y<0x35, "vpmovzxdq", int_x86_avx2_pmovzxdq>, VEX, VEX_L; } -defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>; -defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>; -defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>; -defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw>; -defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>; -defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>; +defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw, SSE_INTALU_ITINS_P>; +defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd, SSE_INTALU_ITINS_P>; +defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq, SSE_INTALU_ITINS_P>; +defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw, SSE_INTALU_ITINS_P>; +defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd, SSE_INTALU_ITINS_P>; +defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq, SSE_INTALU_ITINS_P>; let Predicates = [HasAVX] in { // Common patterns involving scalar load. @@ -5655,15 +5688,17 @@ let Predicates = [UseSSE41] in { } -multiclass SS41I_binop_rm_int4 opc, string OpcodeStr, Intrinsic IntId> { +multiclass SS41I_binop_rm_int4 opc, string OpcodeStr, Intrinsic IntId, + OpndItins itins = DEFAULT_ITINS> { def rr : SS48I, OpSize; + [(set VR128:$dst, (IntId VR128:$src))], itins.rr>, OpSize; def rm : SS48I, + (IntId (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))], + itins.rm>, OpSize; } @@ -5702,10 +5737,14 @@ defm VPMOVZXWQ : SS41I_binop_rm_int8_y<0x34, "vpmovzxwq", int_x86_avx2_pmovzxwq>, VEX, VEX_L; } -defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>; -defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>; -defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>; -defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>; +defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd, + SSE_INTALU_ITINS_P>; +defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq, + SSE_INTALU_ITINS_P>; +defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd, + SSE_INTALU_ITINS_P>; +defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq, + SSE_INTALU_ITINS_P>; let Predicates = [HasAVX] in { // Common patterns involving scalar load @@ -5733,7 +5772,8 @@ let Predicates = [UseSSE41] in { (PMOVZXWQrm addr:$src)>; } -multiclass SS41I_binop_rm_int2 opc, string OpcodeStr, Intrinsic IntId> { +multiclass SS41I_binop_rm_int2 opc, string OpcodeStr, Intrinsic IntId, + OpndItins itins = DEFAULT_ITINS> { def rr : SS48I, OpSize; @@ -5772,8 +5812,10 @@ defm VPMOVSXBQ : SS41I_binop_rm_int4_y<0x22, "vpmovsxbq", defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq", int_x86_avx2_pmovzxbq>, VEX, VEX_L; } -defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>; -defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>; +defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq, + SSE_INTALU_ITINS_P>; +defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq, + SSE_INTALU_ITINS_P>; let Predicates = [HasAVX2] in { def : Pat<(v16i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWYrr VR128:$src)>; @@ -6115,20 +6157,22 @@ defm PEXTRQ : SS41I_extract64<0x16, "pextrq">; /// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory /// destination -multiclass SS41I_extractf32 opc, string OpcodeStr> { +multiclass SS41I_extractf32 opc, string OpcodeStr, + OpndItins itins = DEFAULT_ITINS> { def rr : SS4AIi8, + (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))], + itins.rr>, OpSize; def mr : SS4AIi8, OpSize; + addr:$dst)], itins.rm>, OpSize; } let ExeDomain = SSEPackedSingle in { @@ -6139,7 +6183,7 @@ let ExeDomain = SSEPackedSingle in { "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, OpSize, VEX; } - defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">; + defm EXTRACTPS : SS41I_extractf32<0x17, "extractps", SSE_EXTRACT_ITINS>; } // Also match an EXTRACTPS store when the store is done as f32 instead of i32. @@ -6239,7 +6283,8 @@ let Constraints = "$src1 = $dst" in // are optimized inserts that won't zero arbitrary elements in the destination // vector. The next one matches the intrinsic and could zero arbitrary elements // in the target vector. -multiclass SS41I_insertf32 opc, string asm, bit Is2Addr = 1> { +multiclass SS41I_insertf32 opc, string asm, bit Is2Addr = 1, + OpndItins itins = DEFAULT_ITINS> { def rr : SS4AIi8 opc, string asm, bit Is2Addr = 1> { !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, - (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>, + (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))], itins.rr>, OpSize; def rm : SS4AIi8 opc, string asm, bit Is2Addr = 1> { [(set VR128:$dst, (X86insrtps VR128:$src1, (v4f32 (scalar_to_vector (loadf32 addr:$src2))), - imm:$src3))]>, OpSize; + imm:$src3))], itins.rm>, OpSize; } let ExeDomain = SSEPackedSingle in { let Predicates = [HasAVX] in defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V; let Constraints = "$src1 = $dst" in - defm INSERTPS : SS41I_insertf32<0x21, "insertps">; + defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1, SSE_INSERT_ITINS>; } //===----------------------------------------------------------------------===// @@ -6283,7 +6328,8 @@ let ExeDomain = SSEPackedSingle in { (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>, + [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))], + IIC_SSE_ROUNDPS_REG>, OpSize; // Vector intrinsic operation, mem @@ -6292,7 +6338,8 @@ let ExeDomain = SSEPackedSingle in { !strconcat(OpcodeStr, "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, - (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>, + (V4F32Int (mem_frag32 addr:$src1),imm:$src2))], + IIC_SSE_ROUNDPS_MEM>, OpSize; } // ExeDomain = SSEPackedSingle @@ -6302,7 +6349,8 @@ let ExeDomain = SSEPackedDouble in { (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>, + [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))], + IIC_SSE_ROUNDPS_REG>, OpSize; // Vector intrinsic operation, mem @@ -6311,7 +6359,8 @@ let ExeDomain = SSEPackedDouble in { !strconcat(OpcodeStr, "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, - (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>, + (V2F64Int (mem_frag64 addr:$src1),imm:$src2))], + IIC_SSE_ROUNDPS_REG>, OpSize; } // ExeDomain = SSEPackedDouble } @@ -6593,30 +6642,33 @@ defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>, let Defs = [EFLAGS], Predicates = [HasPOPCNT] in { def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "popcnt{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)]>, + [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)], + IIC_SSE_POPCNT_RR>, OpSize, XS; def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "popcnt{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, (ctpop (loadi16 addr:$src))), - (implicit EFLAGS)]>, OpSize, XS; + (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, OpSize, XS; def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "popcnt{l}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)]>, + [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)], + IIC_SSE_POPCNT_RR>, XS; def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "popcnt{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (ctpop (loadi32 addr:$src))), - (implicit EFLAGS)]>, XS; + (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, XS; def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "popcnt{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)]>, + [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)], + IIC_SSE_POPCNT_RR>, XS; def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "popcnt{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (ctpop (loadi64 addr:$src))), - (implicit EFLAGS)]>, XS; + (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, XS; } @@ -6644,14 +6696,16 @@ defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw", /// SS41I_binop_rm_int - Simple SSE 4.1 binary operator multiclass SS41I_binop_rm_int opc, string OpcodeStr, - Intrinsic IntId128, bit Is2Addr = 1> { + Intrinsic IntId128, bit Is2Addr = 1, + OpndItins itins = DEFAULT_ITINS> { let isCommutable = 1 in def rr : SS48I, OpSize; + [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))], + itins.rr>, OpSize; def rm : SS48I opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (IntId128 VR128:$src1, - (bitconvert (memopv2i64 addr:$src2))))]>, OpSize; + (bitconvert (memopv2i64 addr:$src2))))], + itins.rm>, OpSize; } /// SS41I_binop_rm_int_y - Simple SSE 4.1 binary operator @@ -6682,7 +6737,8 @@ multiclass SS41I_binop_rm_int_y opc, string OpcodeStr, /// SS48I_binop_rm - Simple SSE41 binary operator. multiclass SS48I_binop_rm opc, string OpcodeStr, SDNode OpNode, ValueType OpVT, RegisterClass RC, PatFrag memop_frag, - X86MemOperand x86memop, bit Is2Addr = 1> { + X86MemOperand x86memop, bit Is2Addr = 1, + OpndItins itins = DEFAULT_ITINS> { let isCommutable = 1 in def rr : SS48I; defm PMINSB : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8, VR128, - memopv2i64, i128mem>; + memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; defm PMINSD : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32, VR128, - memopv2i64, i128mem>; + memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; defm PMINUD : SS48I_binop_rm<0x3B, "pminud", X86umin, v4i32, VR128, - memopv2i64, i128mem>; + memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", X86umin, v8i16, VR128, - memopv2i64, i128mem>; + memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", X86smax, v16i8, VR128, - memopv2i64, i128mem>; + memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", X86smax, v4i32, VR128, - memopv2i64, i128mem>; + memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", X86umax, v4i32, VR128, - memopv2i64, i128mem>; + memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", X86umax, v8i16, VR128, - memopv2i64, i128mem>; - defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>; + memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; + defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq, + 1, SSE_INTMUL_ITINS_P>; } let Predicates = [HasAVX] in { @@ -6785,15 +6842,16 @@ let Predicates = [HasAVX2] in { let Constraints = "$src1 = $dst" in { defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128, - memopv2i64, i128mem>; + memopv2i64, i128mem, 1, SSE_PMULLD_ITINS>; defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128, - memopv2i64, i128mem>; + memopv2i64, i128mem, 1, SSE_INTALUQ_ITINS_P>; } /// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate multiclass SS41I_binop_rmi_int opc, string OpcodeStr, Intrinsic IntId, RegisterClass RC, PatFrag memop_frag, - X86MemOperand x86memop, bit Is2Addr = 1> { + X86MemOperand x86memop, bit Is2Addr = 1, + OpndItins itins = DEFAULT_ITINS> { let isCommutable = 1 in def rri : SS4AIi8 opc, string OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>, + [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))], itins.rr>, OpSize; def rmi : SS4AIi8 opc, string OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set RC:$dst, (IntId RC:$src1, - (bitconvert (memop_frag addr:$src2)), imm:$src3))]>, + (bitconvert (memop_frag addr:$src2)), imm:$src3))], itins.rm>, OpSize; } @@ -6862,21 +6920,27 @@ let Constraints = "$src1 = $dst" in { let isCommutable = 0 in { let ExeDomain = SSEPackedSingle in defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps, - VR128, memopv4f32, f128mem>; + VR128, memopv4f32, f128mem, + 1, SSE_INTALU_ITINS_P>; let ExeDomain = SSEPackedDouble in defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd, - VR128, memopv2f64, f128mem>; + VR128, memopv2f64, f128mem, + 1, SSE_INTALU_ITINS_P>; defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw, - VR128, memopv2i64, i128mem>; + VR128, memopv2i64, i128mem, + 1, SSE_INTALU_ITINS_P>; defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw, - VR128, memopv2i64, i128mem>; + VR128, memopv2i64, i128mem, + 1, SSE_INTMUL_ITINS_P>; } let ExeDomain = SSEPackedSingle in defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps, - VR128, memopv4f32, f128mem>; + VR128, memopv4f32, f128mem, 1, + SSE_DPPS_ITINS>; let ExeDomain = SSEPackedDouble in defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd, - VR128, memopv2f64, f128mem>; + VR128, memopv2f64, f128mem, 1, + SSE_DPPD_ITINS>; } /// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators @@ -6981,13 +7045,14 @@ let Predicates = [HasAVX2] in { /// SS41I_ternary_int - SSE 4.1 ternary operator let Uses = [XMM0], Constraints = "$src1 = $dst" in { multiclass SS41I_ternary_int opc, string OpcodeStr, PatFrag mem_frag, - X86MemOperand x86memop, Intrinsic IntId> { + X86MemOperand x86memop, Intrinsic IntId, + OpndItins itins = DEFAULT_ITINS> { def rr0 : SS48I, - OpSize; + [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))], + itins.rr>, OpSize; def rm0 : SS48I, OpSize; + (bitconvert (mem_frag addr:$src2)), XMM0))], + itins.rm>, OpSize; } } @@ -7262,61 +7328,66 @@ let Constraints = "$src1 = $dst" in { "crc32{b}\t{$src2, $src1|$src1, $src2}", [(set GR32:$dst, (int_x86_sse42_crc32_32_8 GR32:$src1, - (load addr:$src2)))]>; + (load addr:$src2)))], IIC_CRC32_MEM>; def CRC32r32r8 : SS42FI<0xF0, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR8:$src2), "crc32{b}\t{$src2, $src1|$src1, $src2}", [(set GR32:$dst, - (int_x86_sse42_crc32_32_8 GR32:$src1, GR8:$src2))]>; + (int_x86_sse42_crc32_32_8 GR32:$src1, GR8:$src2))], + IIC_CRC32_REG>; def CRC32r32m16 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i16mem:$src2), "crc32{w}\t{$src2, $src1|$src1, $src2}", [(set GR32:$dst, (int_x86_sse42_crc32_32_16 GR32:$src1, - (load addr:$src2)))]>, + (load addr:$src2)))], IIC_CRC32_MEM>, OpSize; def CRC32r32r16 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR16:$src2), "crc32{w}\t{$src2, $src1|$src1, $src2}", [(set GR32:$dst, - (int_x86_sse42_crc32_32_16 GR32:$src1, GR16:$src2))]>, + (int_x86_sse42_crc32_32_16 GR32:$src1, GR16:$src2))], + IIC_CRC32_REG>, OpSize; def CRC32r32m32 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), "crc32{l}\t{$src2, $src1|$src1, $src2}", [(set GR32:$dst, (int_x86_sse42_crc32_32_32 GR32:$src1, - (load addr:$src2)))]>; + (load addr:$src2)))], IIC_CRC32_MEM>; def CRC32r32r32 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "crc32{l}\t{$src2, $src1|$src1, $src2}", [(set GR32:$dst, - (int_x86_sse42_crc32_32_32 GR32:$src1, GR32:$src2))]>; + (int_x86_sse42_crc32_32_32 GR32:$src1, GR32:$src2))], + IIC_CRC32_REG>; def CRC32r64m8 : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i8mem:$src2), "crc32{b}\t{$src2, $src1|$src1, $src2}", [(set GR64:$dst, (int_x86_sse42_crc32_64_8 GR64:$src1, - (load addr:$src2)))]>, + (load addr:$src2)))], IIC_CRC32_MEM>, REX_W; def CRC32r64r8 : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR8:$src2), "crc32{b}\t{$src2, $src1|$src1, $src2}", [(set GR64:$dst, - (int_x86_sse42_crc32_64_8 GR64:$src1, GR8:$src2))]>, + (int_x86_sse42_crc32_64_8 GR64:$src1, GR8:$src2))], + IIC_CRC32_REG>, REX_W; def CRC32r64m64 : SS42FI<0xF1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), "crc32{q}\t{$src2, $src1|$src1, $src2}", [(set GR64:$dst, (int_x86_sse42_crc32_64_64 GR64:$src1, - (load addr:$src2)))]>, + (load addr:$src2)))], IIC_CRC32_MEM>, REX_W; def CRC32r64r64 : SS42FI<0xF1, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "crc32{q}\t{$src2, $src1|$src1, $src2}", [(set GR64:$dst, - (int_x86_sse42_crc32_64_64 GR64:$src1, GR64:$src2))]>, + (int_x86_sse42_crc32_64_64 GR64:$src1, GR64:$src2))], + IIC_CRC32_REG>, REX_W; } @@ -7458,13 +7529,15 @@ def PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, - (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>; + (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))], + IIC_SSE_PCLMULQDQ_RR>; def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1, - (memopv2i64 addr:$src2), imm:$src3))]>; + (memopv2i64 addr:$src2), imm:$src3))], + IIC_SSE_PCLMULQDQ_RM>; } // Constraints = "$src1 = $dst" diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index ceb2e05..0556437 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -141,9 +141,12 @@ def IIC_IDIV64 : InstrItinClass; // neg/not/inc/dec def IIC_UNARY_REG : InstrItinClass; def IIC_UNARY_MEM : InstrItinClass; -// add/sub/and/or/xor/adc/sbc/cmp/test +// add/sub/and/or/xor/sbc/cmp/test def IIC_BIN_MEM : InstrItinClass; def IIC_BIN_NONMEM : InstrItinClass; +// adc/sbc +def IIC_BIN_CARRY_MEM : InstrItinClass; +def IIC_BIN_CARRY_NONMEM : InstrItinClass; // shift/rotate def IIC_SR : InstrItinClass; // shift double @@ -250,11 +253,11 @@ def IIC_SSE_INTSH_P_RR : InstrItinClass; def IIC_SSE_INTSH_P_RM : InstrItinClass; def IIC_SSE_INTSH_P_RI : InstrItinClass; -def IIC_SSE_CMPP_RR : InstrItinClass; -def IIC_SSE_CMPP_RM : InstrItinClass; +def IIC_SSE_INTSHDQ_P_RI : InstrItinClass; def IIC_SSE_SHUFP : InstrItinClass; -def IIC_SSE_PSHUF : InstrItinClass; +def IIC_SSE_PSHUF_RI : InstrItinClass; +def IIC_SSE_PSHUF_MI : InstrItinClass; def IIC_SSE_UNPCK : InstrItinClass; @@ -316,7 +319,8 @@ def IIC_SSE_PSIGN_RM : InstrItinClass; def IIC_SSE_PMADD : InstrItinClass; def IIC_SSE_PMULHRSW : InstrItinClass; -def IIC_SSE_PALIGNR : InstrItinClass; +def IIC_SSE_PALIGNRR : InstrItinClass; +def IIC_SSE_PALIGNRM : InstrItinClass; def IIC_SSE_MWAIT : InstrItinClass; def IIC_SSE_MONITOR : InstrItinClass; @@ -492,8 +496,8 @@ def IIC_PUSH_REG : InstrItinClass; def IIC_PUSH_F : InstrItinClass; def IIC_PUSH_A : InstrItinClass; def IIC_BSWAP : InstrItinClass; -def IIC_BSF : InstrItinClass; -def IIC_BSR : InstrItinClass; +def IIC_BIT_SCAN_MEM : InstrItinClass; +def IIC_BIT_SCAN_REG : InstrItinClass; def IIC_MOVS : InstrItinClass; def IIC_STOS : InstrItinClass; def IIC_SCAS : InstrItinClass; @@ -540,6 +544,33 @@ def IIC_BOUND : InstrItinClass; def IIC_ARPL_REG : InstrItinClass; def IIC_ARPL_MEM : InstrItinClass; def IIC_MOVBE : InstrItinClass; +def IIC_AES : InstrItinClass; +def IIC_BLEND_MEM : InstrItinClass; +def IIC_BLEND_NOMEM : InstrItinClass; +def IIC_CBW : InstrItinClass; +def IIC_CRC32_REG : InstrItinClass; +def IIC_CRC32_MEM : InstrItinClass; +def IIC_SSE_DPPD_RR : InstrItinClass; +def IIC_SSE_DPPD_RM : InstrItinClass; +def IIC_SSE_DPPS_RR : InstrItinClass; +def IIC_SSE_DPPS_RM : InstrItinClass; +def IIC_MMX_EMMS : InstrItinClass; +def IIC_SSE_EXTRACTPS_RR : InstrItinClass; +def IIC_SSE_EXTRACTPS_RM : InstrItinClass; +def IIC_SSE_INSERTPS_RR : InstrItinClass; +def IIC_SSE_INSERTPS_RM : InstrItinClass; +def IIC_SSE_MPSADBW_RR : InstrItinClass; +def IIC_SSE_MPSADBW_RM : InstrItinClass; +def IIC_SSE_PMULLD_RR : InstrItinClass; +def IIC_SSE_PMULLD_RM : InstrItinClass; +def IIC_SSE_ROUNDPS_REG : InstrItinClass; +def IIC_SSE_ROUNDPS_MEM : InstrItinClass; +def IIC_SSE_ROUNDPD_REG : InstrItinClass; +def IIC_SSE_ROUNDPD_MEM : InstrItinClass; +def IIC_SSE_POPCNT_RR : InstrItinClass; +def IIC_SSE_POPCNT_RM : InstrItinClass; +def IIC_SSE_PCLMULQDQ_RR : InstrItinClass; +def IIC_SSE_PCLMULQDQ_RM : InstrItinClass; def IIC_NOP : InstrItinClass; @@ -561,7 +592,7 @@ def IIC_NOP : InstrItinClass; // latencies. Since these latencies are not used for pipeline hazards, // they do not need to be exact. // -// The GenericModel contains no instruciton itineraries. +// The GenericModel contains no instruction itineraries. def GenericModel : SchedMachineModel { let IssueWidth = 4; let MicroOpBufferSize = 32; @@ -572,3 +603,4 @@ def GenericModel : SchedMachineModel { include "X86ScheduleAtom.td" include "X86SchedSandyBridge.td" include "X86SchedHaswell.td" +include "X86ScheduleSLM.td" diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td index 14a1471..ba72f29 100644 --- a/lib/Target/X86/X86ScheduleAtom.td +++ b/lib/Target/X86/X86ScheduleAtom.td @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This file defines the itinerary class data for the Intel Atom (Bonnell) -// processors. +// This file defines the itinerary class data for the Intel Atom +// in order (Saltwell-32nm/Bonnell-45nm) processors. // //===----------------------------------------------------------------------===// @@ -79,9 +79,12 @@ def AtomItineraries : ProcessorItineraries< // neg/not/inc/dec InstrItinData] >, InstrItinData] >, - // add/sub/and/or/xor/adc/sbc/cmp/test + // add/sub/and/or/xor/cmp/test InstrItinData] >, InstrItinData] >, + // adc/sbc + InstrItinData] >, + InstrItinData] >, // shift/rotate InstrItinData] >, // shift double @@ -203,11 +206,11 @@ def AtomItineraries : ProcessorItineraries< InstrItinData] >, InstrItinData] >, - InstrItinData] >, - InstrItinData] >, + InstrItinData] >, InstrItinData] >, - InstrItinData] >, + InstrItinData] >, + InstrItinData] >, InstrItinData] >, @@ -278,7 +281,8 @@ def AtomItineraries : ProcessorItineraries< InstrItinData] >, InstrItinData] >, - InstrItinData] >, + InstrItinData] >, + InstrItinData] >, InstrItinData] >, InstrItinData] >, @@ -470,8 +474,8 @@ def AtomItineraries : ProcessorItineraries< InstrItinData] >, InstrItinData] >, - InstrItinData] >, - InstrItinData] >, + InstrItinData] >, + InstrItinData] >, InstrItinData] >, InstrItinData] >, InstrItinData] >, @@ -518,6 +522,8 @@ def AtomItineraries : ProcessorItineraries< InstrItinData] >, InstrItinData] >, InstrItinData] >, + InstrItinData] >, + InstrItinData] >, InstrItinData] > ]>; diff --git a/lib/Target/X86/X86ScheduleSLM.td b/lib/Target/X86/X86ScheduleSLM.td new file mode 100644 index 0000000..6c2a304 --- /dev/null +++ b/lib/Target/X86/X86ScheduleSLM.td @@ -0,0 +1,668 @@ +//===- X86ScheduleSLM.td - X86 Atom Scheduling Definitions -*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the Intel Atom +// (Silvermont) processor. +// +//===----------------------------------------------------------------------===// + +def IEC_RSV0 : FuncUnit; +def IEC_RSV1 : FuncUnit; +def FPC_RSV0 : FuncUnit; +def FPC_RSV1 : FuncUnit; +def MEC_RSV : FuncUnit; + + + + + + + + + + + + + + +def SLMItineraries : ProcessorItineraries< + [ IEC_RSV0, IEC_RSV1, FPC_RSV0, FPC_RSV1, MEC_RSV ], + [], [ + // [InstrStage] + // [InstrStage, InstrStage] + // [InstrStage] + // [InstrStage,InstrStage] + // + // Default is 1 cycle, IEC_RSV0 or IEC_RSV1 + //InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + // mul + InstrItinData] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<3, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + // imul by al, ax, eax, rax + InstrItinData] >, + InstrItinData, + InstrStage<6, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<6, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + // imul reg by reg|mem + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<3, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + InstrItinData] >, + // imul reg = reg/mem * imm + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + InstrItinData, + InstrStage<3, [MEC_RSV]>] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + // idiv - min latency + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + // div - min latency + InstrItinData] >, + InstrItinData, + InstrStage<25, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + // neg/not/inc/dec + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + // add/sub/and/or/xor/adc/sbc/cmp/test + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + // adc/sbb + InstrItinData] >, + InstrItinData, + InstrStage<2, [MEC_RSV]>] >, + // shift/rotate + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + // shift double + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<2, [MEC_RSV]>] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<2, [MEC_RSV]>] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<2, [MEC_RSV]>] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + // cmov + InstrItinData, + InstrStage<2, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<2, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<2, [MEC_RSV]>] >, + InstrItinData] >, + // set + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData] >, + // jcc + InstrItinData] >, + // jcxz/jecxz/jrcxz + InstrItinData] >, + // jmp rel + InstrItinData] >, + // jmp indirect + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + // jmp far + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData] >, + // loop/loope/loopne + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + // call - all but reg/imm + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData] >, + //ret + InstrItinData] >, + InstrItinData] >, + //sign extension movs + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + //zero extension movs + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + + InstrItinData] >, + InstrItinData] >, + + // SSE binary operations + // arithmetic fp scalar + InstrItinData] >, + InstrItinData, + InstrStage<3, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<3, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<2, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<13, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<13, [MEC_RSV]>] >, + + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData, + InstrStage<6, [MEC_RSV]>] >, + + // arithmetic fp parallel + InstrItinData] >, + InstrItinData, + InstrStage<3, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<2, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<27, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<27, [MEC_RSV]>] >, + + // bitwise parallel + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + + // arithmetic int parallel + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + + // multiply int parallel + InstrItinData] >, + InstrItinData, + InstrStage<5, [MEC_RSV]>] >, + + // shift parallel + InstrItinData] >, + InstrItinData, + InstrStage<2, [MEC_RSV]>] >, + InstrItinData] >, + + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + + InstrItinData] >, + + InstrItinData] >, + InstrItinData, + InstrStage<26, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<13, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<26, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<13, [MEC_RSV]>] >, + + InstrItinData] >, + InstrItinData, + InstrStage<9, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + + InstrItinData] >, + + InstrItinData] >, + + InstrItinData] >, + InstrItinData, + InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >, + InstrItinData] >, + + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData, + InstrStage<6, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<9, [MEC_RSV]>] >, + InstrItinData, + InstrStage<9, [MEC_RSV]>] >, + InstrItinData, + InstrStage<9, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<5, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + + // conversions + // to/from PD ... + InstrItinData] >, + InstrItinData, + InstrStage<5, [MEC_RSV]>] >, + // to/from PS except to/from PD and PS2PI + InstrItinData] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + + // MMX MOVs + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + // other MMX + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + // conversions + // from/to PD + InstrItinData] >, + InstrItinData] >, + // from/to PI + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + + // System instructions + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + // worst case for mov REG_CRx + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + // LAR + InstrItinData] >, + InstrItinData] >, + // LSL + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + // push control register, segment registers + InstrItinData] >, + InstrItinData] >, + // pop control register, segment registers + InstrItinData] >, + InstrItinData] >, + // VERR, VERW + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + // WRMSR, RDMSR + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + // SMSW, LMSW + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData, + InstrStage<10, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<5, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<5, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<6, [MEC_RSV]>] >, + InstrItinData, + InstrStage<6, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + InstrItinData, + InstrStage<10, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<3, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<12, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<15, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<1, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<11, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<5, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<4, [MEC_RSV]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<10, [MEC_RSV]>] >, + + InstrItinData] > + ]>; + +// Silvermont machine model. +def SLMModel : SchedMachineModel { + let IssueWidth = 2; // Allows 2 instructions per scheduling group. + let MinLatency = 1; // InstrStage cycles overrides MinLatency. + // OperandCycles may be used for expected latency. + let LoadLatency = 3; // Expected cycles, may be overriden by OperandCycles. + let HighLatency = 30;// Expected, may be overriden by OperandCycles. + + let Itineraries = SLMItineraries; +} diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 851ab63..78c9a1a 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -281,7 +281,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { ToggleFeature(X86::FeatureFastUAMem); } - // Set processor type. Currently only Atom is detected. + // Set processor type. Currently only Atom or Silvermont (SLM) is detected. if (Family == 6 && (Model == 28 || Model == 38 || Model == 39 || Model == 53 || Model == 54)) { @@ -290,6 +290,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { UseLeaForSP = true; ToggleFeature(X86::FeatureLeaForSP); } + else if (Family == 6 && + (Model == 55 || Model == 74 || Model == 77)) { + X86ProcFamily = IntelSLM; + } unsigned MaxExtLevel; X86_MC::GetCpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); @@ -451,7 +455,7 @@ void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { // new MCSchedModel is used. InitMCProcessorInfo(CPUName, FS); - if (X86ProcFamily == IntelAtom) + if (X86ProcFamily == IntelAtom || X86ProcFamily == IntelSLM) PostRAScheduler = true; InstrItins = getInstrItineraryForCPU(CPUName); diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 28aae20..90378fc 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -50,7 +50,7 @@ protected: }; enum X86ProcFamilyEnum { - Others, IntelAtom + Others, IntelAtom, IntelSLM }; /// X86ProcFamily - X86 processor family: Intel Atom, and others -- cgit v1.1 From 98bae99266d8e527e7399c717a79c6dc9a073331 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Fri, 13 Sep 2013 20:09:02 +0000 Subject: Add missing break statement in PPCISelLowering As it turns out, not a problem in practice, but it should be there. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190720 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index d042ed0..ae046f0 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -7321,6 +7321,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } } } + + break; case ISD::BSWAP: // Turn BSWAP (LOAD) -> lhbrx/lwbrx. if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && -- cgit v1.1 From 47b33528d1b4298bf8cc5dcca8b531dfd0e704bb Mon Sep 17 00:00:00 2001 From: Zoran Jovanovic Date: Sat, 14 Sep 2013 06:49:25 +0000 Subject: Support for misc microMIPS instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190744 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MicroMipsInstrFormats.td | 42 ++++++++++++++++++++++++++++++++ lib/Target/Mips/MicroMipsInstrInfo.td | 16 ++++++++++++ lib/Target/Mips/MipsInstrFormats.td | 6 ++--- lib/Target/Mips/MipsInstrInfo.td | 26 ++++++++++---------- 4 files changed, 74 insertions(+), 16 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MicroMipsInstrFormats.td b/lib/Target/Mips/MicroMipsInstrFormats.td index 6d8d29e..0b82ef8 100644 --- a/lib/Target/Mips/MicroMipsInstrFormats.td +++ b/lib/Target/Mips/MicroMipsInstrFormats.td @@ -162,3 +162,45 @@ class MFLO_FM_MM funct> : MMArch { let Inst{15-6} = funct; let Inst{5-0} = 0x3c; } + +class CLO_FM_MM funct> : MMArch { + bits<5> rd; + bits<5> rs; + + bits<32> Inst; + + let Inst{31-26} = 0x00; + let Inst{25-21} = rd; + let Inst{20-16} = rs; + let Inst{15-6} = funct; + let Inst{5-0} = 0x3c; +} + +class SEB_FM_MM funct> : MMArch { + bits<5> rd; + bits<5> rt; + + bits<32> Inst; + + let Inst{31-26} = 0x00; + let Inst{25-21} = rd; + let Inst{20-16} = rt; + let Inst{15-6} = funct; + let Inst{5-0} = 0x3c; +} + +class EXT_FM_MM funct> : MMArch { + bits<5> rt; + bits<5> rs; + bits<5> pos; + bits<5> size; + + bits<32> Inst; + + let Inst{31-26} = 0x00; + let Inst{25-21} = rt; + let Inst{20-16} = rs; + let Inst{15-11} = size; + let Inst{10-6} = pos; + let Inst{5-0} = funct; +} diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td index eaf2f31..b274d6f 100644 --- a/lib/Target/Mips/MicroMipsInstrInfo.td +++ b/lib/Target/Mips/MicroMipsInstrInfo.td @@ -135,4 +135,20 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { def MADDU_MM : MMRel, MArithR<"maddu", 1>, MULT_FM_MM<0x36c>; def MSUB_MM : MMRel, MArithR<"msub">, MULT_FM_MM<0x3ac>; def MSUBU_MM : MMRel, MArithR<"msubu">, MULT_FM_MM<0x3ec>; + + /// Count Leading + def CLZ_MM : MMRel, CountLeading0<"clz", GPR32Opnd>, CLO_FM_MM<0x16c>; + def CLO_MM : MMRel, CountLeading1<"clo", GPR32Opnd>, CLO_FM_MM<0x12c>; + + /// Sign Ext In Register Instructions. + def SEB_MM : MMRel, SignExtInReg<"seb", i8, GPR32Opnd>, SEB_FM_MM<0x0ac>; + def SEH_MM : MMRel, SignExtInReg<"seh", i16, GPR32Opnd>, SEB_FM_MM<0x0ec>; + + /// Word Swap Bytes Within Halfwords + def WSBH_MM : MMRel, SubwordSwap<"wsbh", GPR32Opnd>, SEB_FM_MM<0x1ec>; + + def EXT_MM : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, MipsExt>, + EXT_FM_MM<0x2c>; + def INS_MM : MMRel, InsBase<"ins", GPR32Opnd, uimm5, MipsIns>, + EXT_FM_MM<0x0c>; } diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index c536264..f0d82dc 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -333,7 +333,7 @@ class MTLO_FM funct> : StdArch { let Inst{5-0} = funct; } -class SEB_FM funct, bits<6> funct2> { +class SEB_FM funct, bits<6> funct2> : StdArch { bits<5> rd; bits<5> rt; @@ -347,7 +347,7 @@ class SEB_FM funct, bits<6> funct2> { let Inst{5-0} = funct2; } -class CLO_FM funct> { +class CLO_FM funct> : StdArch { bits<5> rd; bits<5> rs; bits<5> rt; @@ -424,7 +424,7 @@ class MULT_FM op, bits<6> funct> : StdArch { let Inst{5-0} = funct; } -class EXT_FM funct> { +class EXT_FM funct> : StdArch { bits<5> rt; bits<5> rs; bits<5> pos; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 14d1665..e4b51ce 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -710,26 +710,26 @@ class EffectiveAddress : // Count Leading Ones/Zeros in Word class CountLeading0: InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"), - [(set RO:$rd, (ctlz RO:$rs))], IIArith, FrmR>, + [(set RO:$rd, (ctlz RO:$rs))], IIArith, FrmR, opstr>, Requires<[HasBitCount, HasStdEnc]>; class CountLeading1: InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"), - [(set RO:$rd, (ctlz (not RO:$rs)))], IIArith, FrmR>, + [(set RO:$rd, (ctlz (not RO:$rs)))], IIArith, FrmR, opstr>, Requires<[HasBitCount, HasStdEnc]>; // Sign Extend in Register. class SignExtInReg : InstSE<(outs RO:$rd), (ins RO:$rt), !strconcat(opstr, "\t$rd, $rt"), - [(set RO:$rd, (sext_inreg RO:$rt, vt))], IIseb, FrmR> { + [(set RO:$rd, (sext_inreg RO:$rt, vt))], IIseb, FrmR, opstr> { let Predicates = [HasSEInReg, HasStdEnc]; } // Subword Swap class SubwordSwap: InstSE<(outs RO:$rd), (ins RO:$rt), !strconcat(opstr, "\t$rd, $rt"), [], - NoItinerary, FrmR> { + NoItinerary, FrmR, opstr> { let Predicates = [HasSwap, HasStdEnc]; let neverHasSideEffects = 1; } @@ -745,7 +745,7 @@ class ExtBase { + FrmR, opstr> { let Predicates = [HasMips32r2, HasStdEnc]; } @@ -754,7 +754,7 @@ class InsBase { + NoItinerary, FrmR, opstr> { let Predicates = [HasMips32r2, HasStdEnc]; let Constraints = "$src = $rt"; } @@ -1027,15 +1027,15 @@ def MFHI : MMRel, MoveFromLOHI<"mfhi", GPR32Opnd, [HI0]>, MFLO_FM<0x10>; def MFLO : MMRel, MoveFromLOHI<"mflo", GPR32Opnd, [LO0]>, MFLO_FM<0x12>; /// Sign Ext In Register Instructions. -def SEB : SignExtInReg<"seb", i8, GPR32Opnd>, SEB_FM<0x10, 0x20>; -def SEH : SignExtInReg<"seh", i16, GPR32Opnd>, SEB_FM<0x18, 0x20>; +def SEB : MMRel, SignExtInReg<"seb", i8, GPR32Opnd>, SEB_FM<0x10, 0x20>; +def SEH : MMRel, SignExtInReg<"seh", i16, GPR32Opnd>, SEB_FM<0x18, 0x20>; /// Count Leading -def CLZ : CountLeading0<"clz", GPR32Opnd>, CLO_FM<0x20>; -def CLO : CountLeading1<"clo", GPR32Opnd>, CLO_FM<0x21>; +def CLZ : MMRel, CountLeading0<"clz", GPR32Opnd>, CLO_FM<0x20>; +def CLO : MMRel, CountLeading1<"clo", GPR32Opnd>, CLO_FM<0x21>; /// Word Swap Bytes Within Halfwords -def WSBH : SubwordSwap<"wsbh", GPR32Opnd>, SEB_FM<2, 0x20>; +def WSBH : MMRel, SubwordSwap<"wsbh", GPR32Opnd>, SEB_FM<2, 0x20>; /// No operation. def NOP : PseudoSE<(outs), (ins), []>, PseudoInstExpansion<(SLL ZERO, ZERO, 0)>; @@ -1058,8 +1058,8 @@ def PseudoMSUBU : MAddSubPseudo; def RDHWR : ReadHardware, RDHWR_FM; -def EXT : ExtBase<"ext", GPR32Opnd, uimm5, MipsExt>, EXT_FM<0>; -def INS : InsBase<"ins", GPR32Opnd, uimm5, MipsIns>, EXT_FM<4>; +def EXT : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, MipsExt>, EXT_FM<0>; +def INS : MMRel, InsBase<"ins", GPR32Opnd, uimm5, MipsIns>, EXT_FM<4>; /// Move Control Registers From/To CPU Registers def MFC0 : MFC3OP<"mfc0", GPR32Opnd>, MFC3OP_FM<0x10, 0>; -- cgit v1.1 From ab48d10effb223de0c9516ccae616a80fef27df8 Mon Sep 17 00:00:00 2001 From: Zoran Jovanovic Date: Sat, 14 Sep 2013 07:15:21 +0000 Subject: Support for microMIPS DIV instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190745 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MicroMipsInstrInfo.td | 4 ++++ lib/Target/Mips/MipsInstrInfo.td | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td index b274d6f..ee43299 100644 --- a/lib/Target/Mips/MicroMipsInstrInfo.td +++ b/lib/Target/Mips/MicroMipsInstrInfo.td @@ -69,6 +69,10 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { MULT_FM_MM<0x22c>; def MULTu_MM : MMRel, Mult<"multu", IIImul, GPR32Opnd, [HI0, LO0]>, MULT_FM_MM<0x26c>; + def SDIV_MM : MMRel, Div<"div", IIIdiv, GPR32Opnd, [HI0, LO0]>, + MULT_FM_MM<0x2ac>; + def UDIV_MM : MMRel, Div<"divu", IIIdiv, GPR32Opnd, [HI0, LO0]>, + MULT_FM_MM<0x2ec>; /// Shift Instructions def SLL_MM : MMRel, shift_rotate_imm<"sll", uimm5, GPR32Opnd>, diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index e4b51ce..1c6b3cd 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -681,7 +681,7 @@ class MAddSubPseudo class Div DefRegs> : InstSE<(outs), (ins RO:$rs, RO:$rt), !strconcat(opstr, "\t$$zero, $rs, $rt"), - [], itin, FrmR> { + [], itin, FrmR, opstr> { let Defs = DefRegs; } -- cgit v1.1 From dcc425c6301c088b4c0598696de50c01fbca5733 Mon Sep 17 00:00:00 2001 From: Zoran Jovanovic Date: Sat, 14 Sep 2013 07:35:41 +0000 Subject: Fixed bug when generating Load Upper Immediate microMIPS instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190746 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsInstrFormats.td | 2 +- lib/Target/Mips/MipsInstrInfo.td | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index f0d82dc..68872fe 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -363,7 +363,7 @@ class CLO_FM funct> : StdArch { let rt = rd; } -class LUI_FM { +class LUI_FM : StdArch { bits<5> rt; bits<16> imm16; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 1c6b3cd..4795969 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -447,7 +447,7 @@ class shift_rotate_reg: InstSE<(outs RO:$rt), (ins Imm:$imm16), !strconcat(opstr, "\t$rt, $imm16"), - [], IIArith, FrmI>, IsAsCheapAsAMove { + [], IIArith, FrmI, opstr>, IsAsCheapAsAMove { let neverHasSideEffects = 1; let isReMaterializable = 1; } -- cgit v1.1 From d9d6e6d59159160299a51fe5010a940db27ae89b Mon Sep 17 00:00:00 2001 From: Robert Wilhelm Date: Sat, 14 Sep 2013 09:34:24 +0000 Subject: Fix spelling. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190749 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseInstrInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 696d039..321c3f4 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -4201,7 +4201,7 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines // the full D-register by loading the same value to both lanes. The // instruction is micro-coded with 2 uops, so don't do this until we can - // properly schedule micro-coded instuctions. The dispatcher stalls cause + // properly schedule micro-coded instructions. The dispatcher stalls cause // too big regressions. // Insert the dependency-breaking FCONSTD before MI. -- cgit v1.1 From a247e9d42b03851be8425631e2716d4ff9f37c47 Mon Sep 17 00:00:00 2001 From: Ben Langmuir Date: Sat, 14 Sep 2013 15:03:21 +0000 Subject: Add the remaining Intel SHA instructions Also assembly/disassembly tests, and for sha256rnds2, aliases with an explicit xmm0 dependency. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190754 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 16cad1e..45789db 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7395,6 +7395,17 @@ let Constraints = "$src1 = $dst" in { // SHA-NI Instructions //===----------------------------------------------------------------------===// +multiclass SHAI_binop Opc, string OpcodeStr> { + def rr : I, T8; + + let mayLoad = 1 in + def rm : I, T8; +} + let Constraints = "$src1 = $dst", hasSideEffects = 0, Predicates = [HasSHA] in { def SHA1RNDS4rri : Ii8<0xCC, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), @@ -7405,8 +7416,24 @@ let Constraints = "$src1 = $dst", hasSideEffects = 0, Predicates = [HasSHA] in { (ins VR128:$src1, i128mem:$src2, i8imm:$src3), "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, TA; + + defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte">; + defm SHA1MSG1 : SHAI_binop<0xC9, "sha1msg1">; + defm SHA1MSG2 : SHAI_binop<0xCA, "sha1msg2">; + + let Uses=[XMM0] in + defm SHA256RNDS2 : SHAI_binop<0xCB, "sha256rnds2">; + + defm SHA256MSG1 : SHAI_binop<0xCC, "sha256msg1">; + defm SHA256MSG2 : SHAI_binop<0xCD, "sha256msg2">; } +// Aliases with explicit %xmm0 +def : InstAlias<"sha256rnds2\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", + (SHA256RNDS2rr VR128:$dst, VR128:$src2)>; +def : InstAlias<"sha256rnds2\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", + (SHA256RNDS2rm VR128:$dst, i128mem:$src2)>; + //===----------------------------------------------------------------------===// // AES-NI Instructions //===----------------------------------------------------------------------===// -- cgit v1.1 From 6febf857f690665bc33b84c957cdefb39a27f63d Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Sun, 15 Sep 2013 02:09:08 +0000 Subject: Expand the mask capability for deciding which functions are mips16 and mips32 so it can be better used for general interoperability testing between mips32 and mips16. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190762 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsOs16.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsOs16.cpp b/lib/Target/Mips/MipsOs16.cpp index 5f9b60c..49c73b5 100644 --- a/lib/Target/Mips/MipsOs16.cpp +++ b/lib/Target/Mips/MipsOs16.cpp @@ -103,8 +103,9 @@ bool MipsOs16::runOnModule(Module &M) { if (F->isDeclaration()) continue; DEBUG(dbgs() << "Working on " << F->getName() << "\n"); if (usingMask) { - if ((functionIndex < Mips32FunctionMask.length()) && - (Mips32FunctionMask[functionIndex] == '1')) { + if (functionIndex == Mips32FunctionMask.length()) + functionIndex = 0; + if (Mips32FunctionMask[functionIndex] == '1') { DEBUG(dbgs() << "mask forced mips32: " << F->getName() << "\n"); F->addFnAttr("nomips16"); } -- cgit v1.1 From 55532adc687a87574fd79822c876b10db95cd9ee Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Sun, 15 Sep 2013 15:20:54 +0000 Subject: PPC: Don't restrict lvsl generation to after type legalization The PPC backend uses a target-specific DAG combine to turn unaligned Altivec loads into a permutation-based sequence when possible. Unfortunately, the target-specific DAG combine is not always called on all loads of interest (sometimes the routines in DAGCombine call CombineTo such that the new node and users are not added to the worklist); allowing the combine to trigger early (before type legalization) mitigates this problem. Because the autovectorizers only create legal vector types, I don't expect a lot of cases where this optimization is enabled by type legalization in practice. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190764 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index ae046f0..ddad9f8 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -7177,7 +7177,6 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty); if (ISD::isNON_EXTLoad(N) && VT.isVector() && TM.getSubtarget().hasAltivec() && - DCI.getDAGCombineLevel() == AfterLegalizeTypes && LD->getAlignment() < ABIAlignment) { // This is a type-legal unaligned Altivec load. SDValue Chain = LD->getChain(); -- cgit v1.1 From 19b59e66afa8d19c35444e0a739b467d368725d3 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Sun, 15 Sep 2013 15:41:11 +0000 Subject: Revert r190764: PPC: Don't restrict lvsl generation to after type legalization This is causing test-suite failures. Original commit message: The PPC backend uses a target-specific DAG combine to turn unaligned Altivec loads into a permutation-based sequence when possible. Unfortunately, the target-specific DAG combine is not always called on all loads of interest (sometimes the routines in DAGCombine call CombineTo such that the new node and users are not added to the worklist); allowing the combine to trigger early (before type legalization) mitigates this problem. Because the autovectorizers only create legal vector types, I don't expect a lot of cases where this optimization is enabled by type legalization in practice. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190765 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index ddad9f8..ae046f0 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -7177,6 +7177,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty); if (ISD::isNON_EXTLoad(N) && VT.isVector() && TM.getSubtarget().hasAltivec() && + DCI.getDAGCombineLevel() == AfterLegalizeTypes && LD->getAlignment() < ABIAlignment) { // This is a type-legal unaligned Altivec load. SDValue Chain = LD->getChain(); -- cgit v1.1 From 94ee55d4b39d6506cf4e0f4e4b1c0b7fbbfeaed5 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sun, 15 Sep 2013 22:04:42 +0000 Subject: Replace some unnecessary vector copies with references. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190770 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMConstantPoolValue.cpp | 6 +++--- lib/Target/Hexagon/HexagonHardwareLoops.cpp | 2 +- lib/Target/SystemZ/SystemZConstantPoolValue.cpp | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp index 4e703ec..8e17f47 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -164,7 +164,7 @@ const BlockAddress *ARMConstantPoolConstant::getBlockAddress() const { int ARMConstantPoolConstant::getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) { unsigned AlignMask = Alignment - 1; - const std::vector Constants = CP->getConstants(); + const std::vector &Constants = CP->getConstants(); for (unsigned i = 0, e = Constants.size(); i != e; ++i) { if (Constants[i].isMachineConstantPoolEntry() && (Constants[i].getAlignment() & AlignMask) == 0) { @@ -217,7 +217,7 @@ ARMConstantPoolSymbol::Create(LLVMContext &C, const char *s, int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) { unsigned AlignMask = Alignment - 1; - const std::vector Constants = CP->getConstants(); + const std::vector &Constants = CP->getConstants(); for (unsigned i = 0, e = Constants.size(); i != e; ++i) { if (Constants[i].isMachineConstantPoolEntry() && (Constants[i].getAlignment() & AlignMask) == 0) { @@ -272,7 +272,7 @@ ARMConstantPoolMBB *ARMConstantPoolMBB::Create(LLVMContext &C, int ARMConstantPoolMBB::getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) { unsigned AlignMask = Alignment - 1; - const std::vector Constants = CP->getConstants(); + const std::vector &Constants = CP->getConstants(); for (unsigned i = 0, e = Constants.size(); i != e; ++i) { if (Constants[i].isMachineConstantPoolEntry() && (Constants[i].getAlignment() & AlignMask) == 0) { diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp index 3c4ca0f..52d5ab2 100644 --- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -871,7 +871,7 @@ bool HexagonHardwareLoops::isInvalidLoopOperation( /// \brief - Return true if the loop contains an instruction that inhibits /// the use of the hardware loop function. bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const { - const std::vector Blocks = L->getBlocks(); + const std::vector &Blocks = L->getBlocks(); for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { MachineBasicBlock *MBB = Blocks[i]; for (MachineBasicBlock::iterator diff --git a/lib/Target/SystemZ/SystemZConstantPoolValue.cpp b/lib/Target/SystemZ/SystemZConstantPoolValue.cpp index e9c4f6d..6c70811 100644 --- a/lib/Target/SystemZ/SystemZConstantPoolValue.cpp +++ b/lib/Target/SystemZ/SystemZConstantPoolValue.cpp @@ -39,7 +39,7 @@ unsigned SystemZConstantPoolValue::getRelocationInfo() const { int SystemZConstantPoolValue:: getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) { unsigned AlignMask = Alignment - 1; - const std::vector Constants = CP->getConstants(); + const std::vector &Constants = CP->getConstants(); for (unsigned I = 0, E = Constants.size(); I != E; ++I) { if (Constants[I].isMachineConstantPoolEntry() && (Constants[I].getAlignment() & AlignMask) == 0) { -- cgit v1.1 From fabfb5d5880354983c89c6f475312dd359e5bb03 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Sun, 15 Sep 2013 22:09:58 +0000 Subject: PPC: Don't restrict lvsl generation to after type legalization This is a re-commit of r190764, with an extra check to make sure that we're not performing the transformation on illegal types (a small test case has been added for this as well). Original commit message: The PPC backend uses a target-specific DAG combine to turn unaligned Altivec loads into a permutation-based sequence when possible. Unfortunately, the target-specific DAG combine is not always called on all loads of interest (sometimes the routines in DAGCombine call CombineTo such that the new node and users are not added to the worklist); allowing the combine to trigger early (before type legalization) mitigates this problem. Because the autovectorizers only create legal vector types, I don't expect a lot of cases where this optimization is enabled by type legalization in practice. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190771 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index ae046f0..83bd1ab 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -7177,7 +7177,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty); if (ISD::isNON_EXTLoad(N) && VT.isVector() && TM.getSubtarget().hasAltivec() && - DCI.getDAGCombineLevel() == AfterLegalizeTypes && + (VT == MVT::v16i8 || VT == MVT::v8i16 || + VT == MVT::v4i32 || VT == MVT::v4f32) && LD->getAlignment() < ABIAlignment) { // This is a type-legal unaligned Altivec load. SDValue Chain = LD->getChain(); -- cgit v1.1 From 5fefc00bac5ddd6f0700e71169bd7823af6b7c65 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 16 Sep 2013 04:29:58 +0000 Subject: Make F16C feature flag imply AVX rather than just checking both at the patterns. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190775 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86.td | 3 ++- lib/Target/X86/X86InstrSSE.td | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index dc4a7ea..a045c35 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -122,7 +122,8 @@ def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true", def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true", "Support RDRAND instruction">; def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true", - "Support 16-bit floating point conversion instructions">; + "Support 16-bit floating point conversion instructions", + [FeatureAVX]>; def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true", "Support FS/GS Base instructions">; def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true", diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 45789db..5aa5be6 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -8010,7 +8010,7 @@ multiclass f16c_ps2ph { TA, OpSize, VEX; } -let Predicates = [HasAVX, HasF16C] in { +let Predicates = [HasF16C] in { defm VCVTPH2PS : f16c_ph2ps; defm VCVTPH2PSY : f16c_ph2ps, VEX_L; defm VCVTPS2PH : f16c_ps2ph; -- cgit v1.1 From 219d2b8695d4322b7a3d6b9892880e65915106a7 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Mon, 16 Sep 2013 09:03:10 +0000 Subject: [SystemZ] Improve extload handling The port originally had special patterns for extload, mapping them to the same instructions as sextload. It seemed neater to have patterns that match "an extension that is allowed to be signed" and "an extension that is allowed to be unsigned". This was originally meant to be a clean-up, but it does improve the handling of promoted integers a little, as shown by args-06.ll. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190777 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZInstrInfo.td | 126 +++++++++++++++------------------ lib/Target/SystemZ/SystemZOperators.td | 40 +++++++++-- 2 files changed, 92 insertions(+), 74 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 32b70b9..d98d75a 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -353,6 +353,13 @@ let mayLoad = 1, mayStore = 1, Defs = [CC], Uses = [R0W] in //===----------------------------------------------------------------------===// // Sign extensions //===----------------------------------------------------------------------===// +// +// Note that putting these before zero extensions mean that we will prefer +// them for anyextload*. There's not really much to choose between the two +// either way, but signed-extending loads have a short LH and a long LHY, +// while zero-extending loads have only the long LLH. +// +//===----------------------------------------------------------------------===// // 32-bit extensions from registers. let neverHasSideEffects = 1 in { @@ -375,37 +382,18 @@ def : Pat<(sext_inreg GR64:$src, i32), (LGFR (EXTRACT_SUBREG GR64:$src, subreg_32bit))>; // 32-bit extensions from memory. -def LB : UnaryRXY<"lb", 0xE376, sextloadi8, GR32, 1>; -defm LH : UnaryRXPair<"lh", 0x48, 0xE378, sextloadi16, GR32, 2>; -def LHRL : UnaryRILPC<"lhrl", 0xC45, aligned_sextloadi16, GR32>; +def LB : UnaryRXY<"lb", 0xE376, asextloadi8, GR32, 1>; +defm LH : UnaryRXPair<"lh", 0x48, 0xE378, asextloadi16, GR32, 2>; +def LHRL : UnaryRILPC<"lhrl", 0xC45, aligned_asextloadi16, GR32>; // 64-bit extensions from memory. -def LGB : UnaryRXY<"lgb", 0xE377, sextloadi8, GR64, 1>; -def LGH : UnaryRXY<"lgh", 0xE315, sextloadi16, GR64, 2>; -def LGF : UnaryRXY<"lgf", 0xE314, sextloadi32, GR64, 4>; -def LGHRL : UnaryRILPC<"lghrl", 0xC44, aligned_sextloadi16, GR64>; -def LGFRL : UnaryRILPC<"lgfrl", 0xC4C, aligned_sextloadi32, GR64>; +def LGB : UnaryRXY<"lgb", 0xE377, asextloadi8, GR64, 1>; +def LGH : UnaryRXY<"lgh", 0xE315, asextloadi16, GR64, 2>; +def LGF : UnaryRXY<"lgf", 0xE314, asextloadi32, GR64, 4>; +def LGHRL : UnaryRILPC<"lghrl", 0xC44, aligned_asextloadi16, GR64>; +def LGFRL : UnaryRILPC<"lgfrl", 0xC4C, aligned_asextloadi32, GR64>; let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in - def LTGF : UnaryRXY<"ltgf", 0xE332, sextloadi32, GR64, 4>; - -// If the sign of a load-extend operation doesn't matter, use the signed ones. -// There's not really much to choose between the sign and zero extensions, -// but LH is more compact than LLH for small offsets. -def : Pat<(i32 (extloadi8 bdxaddr20only:$src)), (LB bdxaddr20only:$src)>; -def : Pat<(i32 (extloadi16 bdxaddr12pair:$src)), (LH bdxaddr12pair:$src)>; -def : Pat<(i32 (extloadi16 bdxaddr20pair:$src)), (LHY bdxaddr20pair:$src)>; - -def : Pat<(i64 (extloadi8 bdxaddr20only:$src)), (LGB bdxaddr20only:$src)>; -def : Pat<(i64 (extloadi16 bdxaddr20only:$src)), (LGH bdxaddr20only:$src)>; -def : Pat<(i64 (extloadi32 bdxaddr20only:$src)), (LGF bdxaddr20only:$src)>; - -// We want PC-relative addresses to be tried ahead of BD and BDX addresses. -// However, BDXs have two extra operands and are therefore 6 units more -// complex. -let AddedComplexity = 7 in { - def : Pat<(i32 (extloadi16 pcrel32:$src)), (LHRL pcrel32:$src)>; - def : Pat<(i64 (extloadi16 pcrel32:$src)), (LGHRL pcrel32:$src)>; -} + def LTGF : UnaryRXY<"ltgf", 0xE332, asextloadi32, GR64, 4>; //===----------------------------------------------------------------------===// // Zero extensions @@ -430,16 +418,16 @@ def : Pat<(and GR64:$src, 0xffffffff), (LLGFR (EXTRACT_SUBREG GR64:$src, subreg_32bit))>; // 32-bit extensions from memory. -def LLC : UnaryRXY<"llc", 0xE394, zextloadi8, GR32, 1>; -def LLH : UnaryRXY<"llh", 0xE395, zextloadi16, GR32, 2>; -def LLHRL : UnaryRILPC<"llhrl", 0xC42, aligned_zextloadi16, GR32>; +def LLC : UnaryRXY<"llc", 0xE394, azextloadi8, GR32, 1>; +def LLH : UnaryRXY<"llh", 0xE395, azextloadi16, GR32, 2>; +def LLHRL : UnaryRILPC<"llhrl", 0xC42, aligned_azextloadi16, GR32>; // 64-bit extensions from memory. -def LLGC : UnaryRXY<"llgc", 0xE390, zextloadi8, GR64, 1>; -def LLGH : UnaryRXY<"llgh", 0xE391, zextloadi16, GR64, 2>; -def LLGF : UnaryRXY<"llgf", 0xE316, zextloadi32, GR64, 4>; -def LLGHRL : UnaryRILPC<"llghrl", 0xC46, aligned_zextloadi16, GR64>; -def LLGFRL : UnaryRILPC<"llgfrl", 0xC4E, aligned_zextloadi32, GR64>; +def LLGC : UnaryRXY<"llgc", 0xE390, azextloadi8, GR64, 1>; +def LLGH : UnaryRXY<"llgh", 0xE391, azextloadi16, GR64, 2>; +def LLGF : UnaryRXY<"llgf", 0xE316, azextloadi32, GR64, 4>; +def LLGHRL : UnaryRILPC<"llghrl", 0xC46, aligned_azextloadi16, GR64>; +def LLGFRL : UnaryRILPC<"llgfrl", 0xC4E, aligned_azextloadi32, GR64>; //===----------------------------------------------------------------------===// // Truncations @@ -558,14 +546,14 @@ defm : SXU; //===----------------------------------------------------------------------===// let isCodeGenOnly = 1 in - defm IC32 : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR32, zextloadi8, 1>; -defm IC : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR64, zextloadi8, 1>; + defm IC32 : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR32, azextloadi8, 1>; +defm IC : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR64, azextloadi8, 1>; -defm : InsertMem<"inserti8", IC32, GR32, zextloadi8, bdxaddr12pair>; -defm : InsertMem<"inserti8", IC32Y, GR32, zextloadi8, bdxaddr20pair>; +defm : InsertMem<"inserti8", IC32, GR32, azextloadi8, bdxaddr12pair>; +defm : InsertMem<"inserti8", IC32Y, GR32, azextloadi8, bdxaddr20pair>; -defm : InsertMem<"inserti8", IC, GR64, zextloadi8, bdxaddr12pair>; -defm : InsertMem<"inserti8", ICY, GR64, zextloadi8, bdxaddr20pair>; +defm : InsertMem<"inserti8", IC, GR64, azextloadi8, bdxaddr12pair>; +defm : InsertMem<"inserti8", ICY, GR64, azextloadi8, bdxaddr20pair>; // Insertions of a 16-bit immediate, leaving other bits unaffected. // We don't have or_as_insert equivalents of these operations because @@ -618,9 +606,9 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { def AGFI : BinaryRIL<"agfi", 0xC28, add, GR64, imm64sx32>; // Addition of memory. - defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, add, GR32, sextloadi16, 2>; + defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, add, GR32, asextloadi16, 2>; defm A : BinaryRXPair<"a", 0x5A, 0xE35A, add, GR32, load, 4>; - def AGF : BinaryRXY<"agf", 0xE318, add, GR64, sextloadi32, 4>; + def AGF : BinaryRXY<"agf", 0xE318, add, GR64, asextloadi32, 4>; def AG : BinaryRXY<"ag", 0xE308, add, GR64, load, 8>; // Addition to memory. @@ -650,7 +638,7 @@ let Defs = [CC] in { // Addition of memory. defm AL : BinaryRXPair<"al", 0x5E, 0xE35E, addc, GR32, load, 4>; - def ALGF : BinaryRXY<"algf", 0xE31A, addc, GR64, zextloadi32, 4>; + def ALGF : BinaryRXY<"algf", 0xE31A, addc, GR64, azextloadi32, 4>; def ALG : BinaryRXY<"alg", 0xE30A, addc, GR64, load, 8>; } defm : ZXB; @@ -679,9 +667,9 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { defm SGR : BinaryRREAndK<"sg", 0xB909, 0xB9E9, sub, GR64, GR64>; // Subtraction of memory. - defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, sub, GR32, sextloadi16, 2>; + defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, sub, GR32, asextloadi16, 2>; defm S : BinaryRXPair<"s", 0x5B, 0xE35B, sub, GR32, load, 4>; - def SGF : BinaryRXY<"sgf", 0xE319, sub, GR64, sextloadi32, 4>; + def SGF : BinaryRXY<"sgf", 0xE319, sub, GR64, asextloadi32, 4>; def SG : BinaryRXY<"sg", 0xE309, sub, GR64, load, 8>; } defm : SXB; @@ -700,7 +688,7 @@ let Defs = [CC] in { // Subtraction of memory. defm SL : BinaryRXPair<"sl", 0x5F, 0xE35F, subc, GR32, load, 4>; - def SLGF : BinaryRXY<"slgf", 0xE31B, subc, GR64, zextloadi32, 4>; + def SLGF : BinaryRXY<"slgf", 0xE31B, subc, GR64, azextloadi32, 4>; def SLG : BinaryRXY<"slg", 0xE30B, subc, GR64, load, 8>; } defm : ZXB; @@ -866,9 +854,9 @@ def MSFI : BinaryRIL<"msfi", 0xC21, mul, GR32, simm32>; def MSGFI : BinaryRIL<"msgfi", 0xC20, mul, GR64, imm64sx32>; // Multiplication of memory. -defm MH : BinaryRXPair<"mh", 0x4C, 0xE37C, mul, GR32, sextloadi16, 2>; +defm MH : BinaryRXPair<"mh", 0x4C, 0xE37C, mul, GR32, asextloadi16, 2>; defm MS : BinaryRXPair<"ms", 0x71, 0xE351, mul, GR32, load, 4>; -def MSGF : BinaryRXY<"msgf", 0xE31C, mul, GR64, sextloadi32, 4>; +def MSGF : BinaryRXY<"msgf", 0xE31C, mul, GR64, asextloadi32, 4>; def MSG : BinaryRXY<"msg", 0xE30C, mul, GR64, load, 8>; // Multiplication of a register, producing two results. @@ -972,21 +960,21 @@ let Defs = [CC], CCValues = 0xE in { def CGFI : CompareRIL<"cgfi", 0xC2C, z_scmp, GR64, imm64sx32>; // Comparison with memory. - defm CH : CompareRXPair<"ch", 0x49, 0xE379, z_scmp, GR32, sextloadi16, 2>; + defm CH : CompareRXPair<"ch", 0x49, 0xE379, z_scmp, GR32, asextloadi16, 2>; defm C : CompareRXPair<"c", 0x59, 0xE359, z_scmp, GR32, load, 4>; - def CGH : CompareRXY<"cgh", 0xE334, z_scmp, GR64, sextloadi16, 2>; - def CGF : CompareRXY<"cgf", 0xE330, z_scmp, GR64, sextloadi32, 4>; + def CGH : CompareRXY<"cgh", 0xE334, z_scmp, GR64, asextloadi16, 2>; + def CGF : CompareRXY<"cgf", 0xE330, z_scmp, GR64, asextloadi32, 4>; def CG : CompareRXY<"cg", 0xE320, z_scmp, GR64, load, 8>; - def CHRL : CompareRILPC<"chrl", 0xC65, z_scmp, GR32, aligned_sextloadi16>; + def CHRL : CompareRILPC<"chrl", 0xC65, z_scmp, GR32, aligned_asextloadi16>; def CRL : CompareRILPC<"crl", 0xC6D, z_scmp, GR32, aligned_load>; - def CGHRL : CompareRILPC<"cghrl", 0xC64, z_scmp, GR64, aligned_sextloadi16>; - def CGFRL : CompareRILPC<"cgfrl", 0xC6C, z_scmp, GR64, aligned_sextloadi32>; + def CGHRL : CompareRILPC<"cghrl", 0xC64, z_scmp, GR64, aligned_asextloadi16>; + def CGFRL : CompareRILPC<"cgfrl", 0xC6C, z_scmp, GR64, aligned_asextloadi32>; def CGRL : CompareRILPC<"cgrl", 0xC68, z_scmp, GR64, aligned_load>; // Comparison between memory and a signed 16-bit immediate. - def CHHSI : CompareSIL<"chhsi", 0xE554, z_scmp, sextloadi16, imm32sx16>; - def CHSI : CompareSIL<"chsi", 0xE55C, z_scmp, load, imm32sx16>; - def CGHSI : CompareSIL<"cghsi", 0xE558, z_scmp, load, imm64sx16>; + def CHHSI : CompareSIL<"chhsi", 0xE554, z_scmp, asextloadi16, imm32sx16>; + def CHSI : CompareSIL<"chsi", 0xE55C, z_scmp, load, imm32sx16>; + def CGHSI : CompareSIL<"cghsi", 0xE558, z_scmp, load, imm64sx16>; } defm : SXB; @@ -1003,26 +991,26 @@ let Defs = [CC], CCValues = 0xE, IsLogical = 1 in { // Comparison with memory. defm CL : CompareRXPair<"cl", 0x55, 0xE355, z_ucmp, GR32, load, 4>; - def CLGF : CompareRXY<"clgf", 0xE331, z_ucmp, GR64, zextloadi32, 4>; + def CLGF : CompareRXY<"clgf", 0xE331, z_ucmp, GR64, azextloadi32, 4>; def CLG : CompareRXY<"clg", 0xE321, z_ucmp, GR64, load, 8>; def CLHRL : CompareRILPC<"clhrl", 0xC67, z_ucmp, GR32, - aligned_zextloadi16>; + aligned_azextloadi16>; def CLRL : CompareRILPC<"clrl", 0xC6F, z_ucmp, GR32, aligned_load>; def CLGHRL : CompareRILPC<"clghrl", 0xC66, z_ucmp, GR64, - aligned_zextloadi16>; + aligned_azextloadi16>; def CLGFRL : CompareRILPC<"clgfrl", 0xC6E, z_ucmp, GR64, - aligned_zextloadi32>; + aligned_azextloadi32>; def CLGRL : CompareRILPC<"clgrl", 0xC6A, z_ucmp, GR64, aligned_load>; // Comparison between memory and an unsigned 8-bit immediate. - defm CLI : CompareSIPair<"cli", 0x95, 0xEB55, z_ucmp, zextloadi8, imm32zx8>; + defm CLI : CompareSIPair<"cli", 0x95, 0xEB55, z_ucmp, azextloadi8, imm32zx8>; // Comparison between memory and an unsigned 16-bit immediate. - def CLHHSI : CompareSIL<"clhhsi", 0xE555, z_ucmp, zextloadi16, imm32zx16>; - def CLFHSI : CompareSIL<"clfhsi", 0xE55D, z_ucmp, load, imm32zx16>; - def CLGHSI : CompareSIL<"clghsi", 0xE559, z_ucmp, load, imm64zx16>; + def CLHHSI : CompareSIL<"clhhsi", 0xE555, z_ucmp, azextloadi16, imm32zx16>; + def CLFHSI : CompareSIL<"clfhsi", 0xE55D, z_ucmp, load, imm32zx16>; + def CLGHSI : CompareSIL<"clghsi", 0xE559, z_ucmp, load, imm64zx16>; } defm : ZXB; @@ -1227,14 +1215,14 @@ let mayLoad = 1, Defs = [CC], Uses = [R0W] in defm : ZXB; def : Pat<(add GR64:$src1, imm64zx32:$src2), (ALGFI GR64:$src1, imm64zx32:$src2)>; -def : Pat<(add GR64:$src1, (zextloadi32 bdxaddr20only:$addr)), +def : Pat<(add GR64:$src1, (azextloadi32 bdxaddr20only:$addr)), (ALGF GR64:$src1, bdxaddr20only:$addr)>; // Use SL* for GR64 subtractions of unsigned 32-bit values. defm : ZXB; def : Pat<(add GR64:$src1, imm64zx32n:$src2), (SLGFI GR64:$src1, imm64zx32n:$src2)>; -def : Pat<(sub GR64:$src1, (zextloadi32 bdxaddr20only:$addr)), +def : Pat<(sub GR64:$src1, (azextloadi32 bdxaddr20only:$addr)), (SLGF GR64:$src1, bdxaddr20only:$addr)>; // Optimize sign-extended 1/0 selects to -1/0 selects. This is important diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index 4a6b665..c90be8f 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -194,6 +194,36 @@ def zext32 : PatFrag<(ops node:$src), (zext (i32 node:$src))>; def loadf32 : PatFrag<(ops node:$src), (f32 (load node:$src))>; def loadf64 : PatFrag<(ops node:$src), (f64 (load node:$src))>; +// Extending loads in which the extension type can be signed. +def asextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{ + unsigned Type = cast(N)->getExtensionType(); + return Type == ISD::EXTLOAD || Type == ISD::SEXTLOAD; +}]>; +def asextloadi8 : PatFrag<(ops node:$ptr), (asextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; +}]>; +def asextloadi16 : PatFrag<(ops node:$ptr), (asextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; +def asextloadi32 : PatFrag<(ops node:$ptr), (asextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; + +// Extending loads in which the extension type can be unsigned. +def azextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{ + unsigned Type = cast(N)->getExtensionType(); + return Type == ISD::EXTLOAD || Type == ISD::ZEXTLOAD; +}]>; +def azextloadi8 : PatFrag<(ops node:$ptr), (azextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; +}]>; +def azextloadi16 : PatFrag<(ops node:$ptr), (azextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; +def azextloadi32 : PatFrag<(ops node:$ptr), (azextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; + // Extending loads in which the extension type doesn't matter. def anyextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{ return cast(N)->getExtensionType() != ISD::NON_EXTLOAD; @@ -214,11 +244,11 @@ class AlignedLoad LoadSDNode *Load = cast(N); return Load->getAlignment() >= Load->getMemoryVT().getStoreSize(); }]>; -def aligned_load : AlignedLoad; -def aligned_sextloadi16 : AlignedLoad; -def aligned_sextloadi32 : AlignedLoad; -def aligned_zextloadi16 : AlignedLoad; -def aligned_zextloadi32 : AlignedLoad; +def aligned_load : AlignedLoad; +def aligned_asextloadi16 : AlignedLoad; +def aligned_asextloadi32 : AlignedLoad; +def aligned_azextloadi16 : AlignedLoad; +def aligned_azextloadi32 : AlignedLoad; // Aligned stores. class AlignedStore -- cgit v1.1 From 9bc7603750926c15648dae0d31a5451861a0d11e Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 16 Sep 2013 10:17:31 +0000 Subject: ARM: Deduplicate ConstantPoolValues. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190779 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMConstantPoolValue.cpp | 50 ++------------------------------- lib/Target/ARM/ARMConstantPoolValue.h | 33 ++++++++++++++++++++++ 2 files changed, 36 insertions(+), 47 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp index 8e17f47..7d41c69 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -163,21 +163,7 @@ const BlockAddress *ARMConstantPoolConstant::getBlockAddress() const { int ARMConstantPoolConstant::getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) { - unsigned AlignMask = Alignment - 1; - const std::vector &Constants = CP->getConstants(); - for (unsigned i = 0, e = Constants.size(); i != e; ++i) { - if (Constants[i].isMachineConstantPoolEntry() && - (Constants[i].getAlignment() & AlignMask) == 0) { - ARMConstantPoolValue *CPV = - (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal; - ARMConstantPoolConstant *APC = dyn_cast(CPV); - if (!APC) continue; - if (APC->CVal == CVal && equals(APC)) - return i; - } - } - - return -1; + return getExistingMachineCPValueImpl(CP, Alignment); } bool ARMConstantPoolConstant::hasSameValue(ARMConstantPoolValue *ACPV) { @@ -216,22 +202,7 @@ ARMConstantPoolSymbol::Create(LLVMContext &C, const char *s, int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) { - unsigned AlignMask = Alignment - 1; - const std::vector &Constants = CP->getConstants(); - for (unsigned i = 0, e = Constants.size(); i != e; ++i) { - if (Constants[i].isMachineConstantPoolEntry() && - (Constants[i].getAlignment() & AlignMask) == 0) { - ARMConstantPoolValue *CPV = - (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal; - ARMConstantPoolSymbol *APS = dyn_cast(CPV); - if (!APS) continue; - - if (APS->S == S && equals(APS)) - return i; - } - } - - return -1; + return getExistingMachineCPValueImpl(CP, Alignment); } bool ARMConstantPoolSymbol::hasSameValue(ARMConstantPoolValue *ACPV) { @@ -271,22 +242,7 @@ ARMConstantPoolMBB *ARMConstantPoolMBB::Create(LLVMContext &C, int ARMConstantPoolMBB::getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) { - unsigned AlignMask = Alignment - 1; - const std::vector &Constants = CP->getConstants(); - for (unsigned i = 0, e = Constants.size(); i != e; ++i) { - if (Constants[i].isMachineConstantPoolEntry() && - (Constants[i].getAlignment() & AlignMask) == 0) { - ARMConstantPoolValue *CPV = - (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal; - ARMConstantPoolMBB *APMBB = dyn_cast(CPV); - if (!APMBB) continue; - - if (APMBB->MBB == MBB && equals(APMBB)) - return i; - } - } - - return -1; + return getExistingMachineCPValueImpl(CP, Alignment); } bool ARMConstantPoolMBB::hasSameValue(ARMConstantPoolValue *ACPV) { diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h index 93812fe..7ae7bf4 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.h +++ b/lib/Target/ARM/ARMConstantPoolValue.h @@ -15,6 +15,7 @@ #define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include @@ -64,6 +65,26 @@ protected: ARMConstantPoolValue(LLVMContext &C, unsigned id, ARMCP::ARMCPKind Kind, unsigned char PCAdj, ARMCP::ARMCPModifier Modifier, bool AddCurrentAddress); + + template + int getExistingMachineCPValueImpl(MachineConstantPool *CP, + unsigned Alignment) { + unsigned AlignMask = Alignment - 1; + const std::vector &Constants = CP->getConstants(); + for (unsigned i = 0, e = Constants.size(); i != e; ++i) { + if (Constants[i].isMachineConstantPoolEntry() && + (Constants[i].getAlignment() & AlignMask) == 0) { + ARMConstantPoolValue *CPV = + (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal; + if (Derived *APC = dyn_cast(CPV)) + if (cast(this)->equals(APC)) + return i; + } + } + + return -1; + } + public: virtual ~ARMConstantPoolValue(); @@ -156,6 +177,10 @@ public: static bool classof(const ARMConstantPoolValue *APV) { return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA(); } + + bool equals(const ARMConstantPoolConstant *A) const { + return CVal == A->CVal && ARMConstantPoolValue::equals(A); + } }; /// ARMConstantPoolSymbol - ARM-specific constantpool values for external @@ -187,6 +212,10 @@ public: static bool classof(const ARMConstantPoolValue *ACPV) { return ACPV->isExtSymbol(); } + + bool equals(const ARMConstantPoolSymbol *A) const { + return S == A->S && ARMConstantPoolValue::equals(A); + } }; /// ARMConstantPoolMBB - ARM-specific constantpool value of a machine basic @@ -219,6 +248,10 @@ public: static bool classof(const ARMConstantPoolValue *ACPV) { return ACPV->isMachineBasicBlock(); } + + bool equals(const ARMConstantPoolMBB *A) const { + return MBB == A->MBB && ARMConstantPoolValue::equals(A); + } }; } // End llvm namespace -- cgit v1.1 From e925f7dbbf497412cd0cc3f67b9b96fed0cc3712 Mon Sep 17 00:00:00 2001 From: Vladimir Medic Date: Mon, 16 Sep 2013 10:29:42 +0000 Subject: This patch implements Mips load/store instructions from/to coprocessor 2. Test cases are added. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190780 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 36 ++++++++++++++++++++++++++++- lib/Target/Mips/MipsInstrFPU.td | 8 +++++++ lib/Target/Mips/MipsRegisterInfo.td | 17 ++++++++++++++ 3 files changed, 60 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index c4ce4ff..3033fef 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -125,6 +125,9 @@ class MipsAsmParser : public MCTargetAsmParser { MipsAsmParser::OperandMatchResultTy parseHI32DSP(SmallVectorImpl &Operands); + MipsAsmParser::OperandMatchResultTy + parseCOP2(SmallVectorImpl &Operands); + bool searchSymbolAlias(SmallVectorImpl &Operands, unsigned RegKind); @@ -239,7 +242,8 @@ public: Kind_FCCRegs, Kind_ACC64DSP, Kind_LO32DSP, - Kind_HI32DSP + Kind_HI32DSP, + Kind_COP2 }; private: @@ -457,6 +461,10 @@ public: return Kind == k_Register && Reg.Kind == Kind_HI32DSP; } + bool isCOP2Asm() const { + return Kind == k_Register && Reg.Kind == Kind_COP2; + } + /// getStartLoc - Get the location of the first token of this operand. SMLoc getStartLoc() const { return StartLoc; @@ -1585,6 +1593,32 @@ MipsAsmParser::parseHI32DSP(SmallVectorImpl &Operands) { return MatchOperand_Success; } +MipsAsmParser::OperandMatchResultTy +MipsAsmParser::parseCOP2(SmallVectorImpl &Operands) { + // If the first token is not '$' we have an error. + if (Parser.getTok().isNot(AsmToken::Dollar)) + return MatchOperand_NoMatch; + + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat the '$' + + const AsmToken &Tok = Parser.getTok(); // Get next token. + + if (Tok.isNot(AsmToken::Integer)) + return MatchOperand_NoMatch; + + unsigned IntVal = Tok.getIntVal(); + + unsigned Reg = matchRegisterByNumber(IntVal, Mips::COP2RegClassID); + + MipsOperand *Op = MipsOperand::CreateReg(Reg, S, Parser.getTok().getLoc()); + Op->setRegKind(MipsOperand::Kind_COP2); + Operands.push_back(Op); + + Parser.Lex(); // Eat the register number. + return MatchOperand_Success; +} + bool MipsAsmParser::searchSymbolAlias( SmallVectorImpl &Operands, unsigned RegKind) { diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index 7aa080f..9f7ce9a 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -372,6 +372,14 @@ let Predicates = [NotFP64bit, HasStdEnc] in { def SDC1 : SW_FT<"sdc1", AFGR64Opnd, IIFStore, store>, LW_FM<0x3d>; } +/// Cop2 Memory Instructions +let Predicates = [HasStdEnc] in { + def LWC2 : LW_FT<"lwc2", COP2Opnd, NoItinerary, load>, LW_FM<0x32>; + def SWC2 : SW_FT<"swc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3a>; + def LDC2 : LW_FT<"ldc2", COP2Opnd, NoItinerary, load>, LW_FM<0x36>; + def SDC2 : SW_FT<"sdc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3e>; +} + // Indexed loads and stores. let Predicates = [HasFPIdx, HasStdEnc] in { def LWXC1 : LWXC1_FT<"lwxc1", FGR32Opnd, IIFLoad, load>, LWXC1_FM<0>; diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index cef5ebf..0b88e0a 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -201,6 +201,10 @@ let Namespace = "Mips" in { foreach I = 0-7 in def FCC#I : MipsReg<#I, "fcc"#I>; + // COP2 registers. + foreach I = 0-31 in + def COP2#I : MipsReg<#I, ""#I>; + // PC register def PC : Register<"pc">; @@ -368,6 +372,10 @@ def ACC64DSP : RegisterClass<"Mips", [untyped], 64, (sequence "AC%u", 0, 3)> { def DSPCC : RegisterClass<"Mips", [v4i8, v2i16], 32, (add DSPCCond)>; +// Coprocessor 2 registers. +def COP2 : RegisterClass<"Mips", [i32], 32, (sequence "COP2%u", 0, 31)>, + Unallocatable; + // Register Operands. class MipsAsmRegOperand : AsmOperandClass { @@ -449,6 +457,11 @@ def HWRegsAsmOperand : MipsAsmRegOperand { let ParserMethod = "parseHWRegs"; } +def COP2AsmOperand : MipsAsmRegOperand { + let Name = "COP2Asm"; + let ParserMethod = "parseCOP2"; +} + def HWRegsOpnd : RegisterOperand { let ParserMatchClass = HWRegsAsmOperand; } @@ -484,3 +497,7 @@ def HI32DSPOpnd : RegisterOperand { def ACC64DSPOpnd : RegisterOperand { let ParserMatchClass = ACC64DSPAsmOperand; } + +def COP2Opnd : RegisterOperand { + let ParserMatchClass = COP2AsmOperand; +} -- cgit v1.1 From 5c616f91a5136295bf83850bc3610bd28756e58e Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Mon, 16 Sep 2013 17:25:12 +0000 Subject: [PowerPC] Fix PR17155 - Ignore COPY_TO_REGCLASS during emit. Fast-isel generates a COPY_TO_REGCLASS for widening f32 to f64, which is a nop on PPC64. This is needed to keep the register class system happy, but on the fast-isel path it is not removed before emit as it is for DAG select. Ignore this op when emitting instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190795 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 59ba9c4..346a9be 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -23,6 +23,7 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOpcodes.h" using namespace llvm; STATISTIC(MCNumEmitted, "Number of MC instructions emitted"); @@ -76,11 +77,17 @@ public: SmallVectorImpl &Fixups) const; void EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups) const { + // For fast-isel, a float COPY_TO_REGCLASS can survive this long. + // It's just a nop to keep the register classes happy, so don't + // generate anything. + unsigned Opcode = MI.getOpcode(); + if (Opcode == TargetOpcode::COPY_TO_REGCLASS) + return; + uint64_t Bits = getBinaryCodeForInstr(MI, Fixups); // BL8_NOP etc. all have a size of 8 because of the following 'nop'. unsigned Size = 4; // FIXME: Have Desc.getSize() return the correct value! - unsigned Opcode = MI.getOpcode(); if (Opcode == PPC::BL8_NOP || Opcode == PPC::BLA8_NOP || Opcode == PPC::BL8_NOP_TLS) Size = 8; -- cgit v1.1 From e54360be01d1eaccd5ef27f510634927aaa887a4 Mon Sep 17 00:00:00 2001 From: Kevin Qin Date: Tue, 17 Sep 2013 02:21:02 +0000 Subject: Implement 3 AArch64 neon instructions : umov smov ins. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190839 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64InstrFormats.td | 40 +++ lib/Target/AArch64/AArch64InstrInfo.cpp | 54 ++- lib/Target/AArch64/AArch64InstrNEON.td | 374 +++++++++++++++++++++ .../AArch64/InstPrinter/AArch64InstPrinter.cpp | 12 + .../AArch64/InstPrinter/AArch64InstPrinter.h | 2 + 5 files changed, 468 insertions(+), 14 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index dd35367..735670b 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -1089,5 +1089,45 @@ class NeonI_2VShiftImm opcode, // Inherit Rd in 4-0 } +// Format AdvSIMD duplicate and insert +class NeonI_copy imm4, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdn +{ + bits<5> Imm5; + let Inst{31} = 0b0; + let Inst{30} = q; + let Inst{29} = op; + let Inst{28-21} = 0b01110000; + let Inst{20-16} = Imm5; + let Inst{15} = 0b0; + let Inst{14-11} = imm4; + let Inst{10} = 0b1; + + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} +// Format AdvSIMD insert from element to vector +class NeonI_insert patterns, InstrItinClass itin> + : A64InstRdn +{ + bits<5> Imm5; + bits<4> Imm4; + let Inst{31} = 0b0; + let Inst{30} = q; + let Inst{29} = op; + let Inst{28-21} = 0b01110000; + let Inst{20-16} = Imm5; + let Inst{15} = 0b0; + let Inst{14-11} = Imm4; + let Inst{10} = 0b1; + + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + } diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index d8f45eb..14daab3 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -68,23 +68,49 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, BuildMI(MBB, I, DL, get(AArch64::MRSxi), DestReg) .addImm(A64SysReg::NZCV); } else if (AArch64::GPR64RegClass.contains(DestReg)) { - assert(AArch64::GPR64RegClass.contains(SrcReg)); - Opc = AArch64::ORRxxx_lsl; - ZeroReg = AArch64::XZR; + if(AArch64::GPR64RegClass.contains(SrcReg)){ + Opc = AArch64::ORRxxx_lsl; + ZeroReg = AArch64::XZR; + } else{ + assert(AArch64::FPR64RegClass.contains(SrcReg)); + BuildMI(MBB, I, DL, get(AArch64::FMOVxd), DestReg) + .addReg(SrcReg); + return; + } } else if (AArch64::GPR32RegClass.contains(DestReg)) { - assert(AArch64::GPR32RegClass.contains(SrcReg)); - Opc = AArch64::ORRwww_lsl; - ZeroReg = AArch64::WZR; + if(AArch64::GPR32RegClass.contains(SrcReg)){ + Opc = AArch64::ORRwww_lsl; + ZeroReg = AArch64::WZR; + } else{ + assert(AArch64::FPR32RegClass.contains(SrcReg)); + BuildMI(MBB, I, DL, get(AArch64::FMOVws), DestReg) + .addReg(SrcReg); + return; + } } else if (AArch64::FPR32RegClass.contains(DestReg)) { - assert(AArch64::FPR32RegClass.contains(SrcReg)); - BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg) - .addReg(SrcReg); - return; + if(AArch64::FPR32RegClass.contains(SrcReg)){ + BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg) + .addReg(SrcReg); + return; + } + else { + assert(AArch64::GPR32RegClass.contains(SrcReg)); + BuildMI(MBB, I, DL, get(AArch64::FMOVsw), DestReg) + .addReg(SrcReg); + return; + } } else if (AArch64::FPR64RegClass.contains(DestReg)) { - assert(AArch64::FPR64RegClass.contains(SrcReg)); - BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg) - .addReg(SrcReg); - return; + if(AArch64::FPR64RegClass.contains(SrcReg)){ + BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg) + .addReg(SrcReg); + return; + } + else { + assert(AArch64::GPR64RegClass.contains(SrcReg)); + BuildMI(MBB, I, DL, get(AArch64::FMOVdx), DestReg) + .addReg(SrcReg); + return; + } } else if (AArch64::FPR128RegClass.contains(DestReg)) { assert(AArch64::FPR128RegClass.contains(SrcReg)); diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index f600d24..5506aff 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -1034,6 +1034,20 @@ defm neon_mov_imm_LSLH : neon_mov_imm_shift_operands<"LSL", "H", "true", [{ return (HasShift && !ShiftOnesIn); }]>; +def neon_uimm1_asmoperand : AsmOperandClass +{ + let Name = "UImm1"; + let PredicateMethod = "isUImm<1>"; + let RenderMethod = "addImmOperands"; +} + +def neon_uimm2_asmoperand : AsmOperandClass +{ + let Name = "UImm2"; + let PredicateMethod = "isUImm<2>"; + let RenderMethod = "addImmOperands"; +} + def neon_uimm8_asmoperand : AsmOperandClass { let Name = "UImm8"; @@ -3015,3 +3029,363 @@ def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>; def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; + +def neon_uimm0_bare : Operand, + ImmLeaf { + let ParserMatchClass = neon_uimm0_asmoperand; + let PrintMethod = "printNeonUImm8OperandBare"; +} + +def neon_uimm1_bare : Operand, + ImmLeaf { + let ParserMatchClass = neon_uimm1_asmoperand; + let PrintMethod = "printNeonUImm8OperandBare"; +} + +def neon_uimm2_bare : Operand, + ImmLeaf { + let ParserMatchClass = neon_uimm2_asmoperand; + let PrintMethod = "printNeonUImm8OperandBare"; +} + +def neon_uimm3_bare : Operand, + ImmLeaf { + let ParserMatchClass = uimm3_asmoperand; + let PrintMethod = "printNeonUImm8OperandBare"; +} + +def neon_uimm4_bare : Operand, + ImmLeaf { + let ParserMatchClass = uimm4_asmoperand; + let PrintMethod = "printNeonUImm8OperandBare"; +} + +class NeonI_INS_main + : NeonI_copy<0b1, 0b0, 0b0011, + (outs VPR128:$Rd), (ins VPR128:$src, OpGPR:$Rn, OpImm:$Imm), + asmop # "\t$Rd." # Res # "[$Imm], $Rn", + [(set (ResTy VPR128:$Rd), + (ResTy (vector_insert + (ResTy VPR128:$src), + (OpTy OpGPR:$Rn), + (OpImm:$Imm))))], + NoItinerary> { + bits<4> Imm; + let Constraints = "$src = $Rd"; +} + + +//Insert element (vector, from main) +def INSbw : NeonI_INS_main<"ins", "b", v16i8, GPR32, i32, + neon_uimm4_bare> { + let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; +} +def INShw : NeonI_INS_main<"ins", "h", v8i16, GPR32, i32, + neon_uimm3_bare> { + let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; +} +def INSsw : NeonI_INS_main<"ins", "s", v4i32, GPR32, i32, + neon_uimm2_bare> { + let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; +} +def INSdx : NeonI_INS_main<"ins", "d", v2i64, GPR64, i64, + neon_uimm1_bare> { + let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0}; +} + +class Neon_INS_main_pattern + : Pat<(ResTy (vector_insert + (ResTy VPR64:$src), + (OpTy OpGPR:$Rn), + (OpImm:$Imm))), + (ResTy (EXTRACT_SUBREG + (ExtResTy (INS (ExtResTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)), + OpGPR:$Rn, OpImm:$Imm)), sub_64))>; + +def INSbw_pattern : Neon_INS_main_pattern; +def INShw_pattern : Neon_INS_main_pattern; +def INSsw_pattern : Neon_INS_main_pattern; +def INSdx_pattern : Neon_INS_main_pattern; + +class NeonI_INS_element + : NeonI_insert<0b1, 0b1, + (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, + ResImm:$Immd, ResImm:$Immn), + asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]", + [(set (ResTy VPR128:$Rd), + (ResTy (vector_insert + (ResTy VPR128:$src), + (MidTy (vector_extract + (ResTy VPR128:$Rn), + (ResImm:$Immn))), + (ResImm:$Immd))))], + NoItinerary> { + let Constraints = "$src = $Rd"; + bits<4> Immd; + bits<4> Immn; +} + +//Insert element (vector, from element) +def INSELb : NeonI_INS_element<"ins", "b", v16i8, neon_uimm4_bare, i32> { + let Inst{20-16} = {Immd{3}, Immd{2}, Immd{1}, Immd{0}, 0b1}; + let Inst{14-11} = {Immn{3}, Immn{2}, Immn{1}, Immn{0}}; +} +def INSELh : NeonI_INS_element<"ins", "h", v8i16, neon_uimm3_bare, i32> { + let Inst{20-16} = {Immd{2}, Immd{1}, Immd{0}, 0b1, 0b0}; + let Inst{14-12} = {Immn{2}, Immn{1}, Immn{0}}; + // bit 11 is unspecified. +} +def INSELs : NeonI_INS_element<"ins", "s", v4i32, neon_uimm2_bare, i32> { + let Inst{20-16} = {Immd{1}, Immd{0}, 0b1, 0b0, 0b0}; + let Inst{14-13} = {Immn{1}, Immn{0}}; + // bits 11-12 are unspecified. +} +def INSELd : NeonI_INS_element<"ins", "d", v2i64, neon_uimm1_bare, i64> { + let Inst{20-16} = {Immd, 0b1, 0b0, 0b0, 0b0}; + let Inst{14} = Immn{0}; + // bits 11-13 are unspecified. +} + +multiclass Neon_INS_elt_pattern { +def : Pat<(NaTy (vector_insert + (NaTy VPR64:$src), + (MidTy (vector_extract + (StTy VPR128:$Rn), + (StImm:$Immn))), + (NaImm:$Immd))), + (NaTy (EXTRACT_SUBREG + (StTy (INS + (StTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)), + (StTy VPR128:$Rn), + NaImm:$Immd, + StImm:$Immn)), + sub_64))>; + +def : Pat<(StTy (vector_insert + (StTy VPR128:$src), + (MidTy (vector_extract + (NaTy VPR64:$Rn), + (NaImm:$Immn))), + (StImm:$Immd))), + (StTy (INS + (StTy VPR128:$src), + (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + StImm:$Immd, + NaImm:$Immn))>; + +def : Pat<(NaTy (vector_insert + (NaTy VPR64:$src), + (MidTy (vector_extract + (NaTy VPR64:$Rn), + (NaImm:$Immn))), + (NaImm:$Immd))), + (NaTy (EXTRACT_SUBREG + (StTy (INS + (StTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)), + (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + NaImm:$Immd, + NaImm:$Immn)), + sub_64))>; +} + +defm INSb_pattern : Neon_INS_elt_pattern; +defm INSh_pattern : Neon_INS_elt_pattern; +defm INSs_pattern : Neon_INS_elt_pattern; +defm INSd_pattern : Neon_INS_elt_pattern; + +class NeonI_SMOV + : NeonI_copy { + bits<4> Imm; +} + +//Signed integer move (main, from element) +def SMOVwb : NeonI_SMOV<"smov", "b", 0b0, v16i8, i8, neon_uimm4_bare, + GPR32, i32> { + let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; +} +def SMOVwh : NeonI_SMOV<"smov", "h", 0b0, v8i16, i16, neon_uimm3_bare, + GPR32, i32> { + let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; +} +def SMOVxb : NeonI_SMOV<"smov", "b", 0b1, v16i8, i8, neon_uimm4_bare, + GPR64, i64> { + let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; +} +def SMOVxh : NeonI_SMOV<"smov", "h", 0b1, v8i16, i16, neon_uimm3_bare, + GPR64, i64> { + let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; +} +def SMOVxs : NeonI_SMOV<"smov", "s", 0b1, v4i32, i32, neon_uimm2_bare, + GPR64, i64> { + let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; +} + +multiclass Neon_SMOVx_pattern { + def : Pat<(i64 (sext_inreg + (i64 (anyext + (i32 (vector_extract + (StTy VPR128:$Rn), (StImm:$Imm))))), + eleTy)), + (SMOVI VPR128:$Rn, StImm:$Imm)>; + + def : Pat<(i64 (sext + (i32 (vector_extract + (StTy VPR128:$Rn), (StImm:$Imm))))), + (SMOVI VPR128:$Rn, StImm:$Imm)>; + + def : Pat<(i64 (sext_inreg + (i64 (vector_extract + (NaTy VPR64:$Rn), (NaImm:$Imm))), + eleTy)), + (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + NaImm:$Imm)>; + + def : Pat<(i64 (sext_inreg + (i64 (anyext + (i32 (vector_extract + (NaTy VPR64:$Rn), (NaImm:$Imm))))), + eleTy)), + (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + NaImm:$Imm)>; + + def : Pat<(i64 (sext + (i32 (vector_extract + (NaTy VPR64:$Rn), (NaImm:$Imm))))), + (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + NaImm:$Imm)>; +} + +defm SMOVxb_pattern : Neon_SMOVx_pattern; +defm SMOVxh_pattern : Neon_SMOVx_pattern; +defm SMOVxs_pattern : Neon_SMOVx_pattern; + +class Neon_SMOVw_pattern + : Pat<(i32 (sext_inreg + (i32 (vector_extract + (NaTy VPR64:$Rn), (NaImm:$Imm))), + eleTy)), + (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + NaImm:$Imm)>; + +def SMOVwb_pattern : Neon_SMOVw_pattern; +def SMOVwh_pattern : Neon_SMOVw_pattern; + + +class NeonI_UMOV + : NeonI_copy { + bits<4> Imm; +} + +//Unsigned integer move (main, from element) +def UMOVwb : NeonI_UMOV<"umov", "b", 0b0, v16i8, neon_uimm4_bare, + GPR32, i32> { + let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; +} +def UMOVwh : NeonI_UMOV<"umov", "h", 0b0, v8i16, neon_uimm3_bare, + GPR32, i32> { + let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; +} +def UMOVws : NeonI_UMOV<"umov", "s", 0b0, v4i32, neon_uimm2_bare, + GPR32, i32> { + let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; +} +def UMOVxd : NeonI_UMOV<"umov", "d", 0b1, v2i64, neon_uimm1_bare, + GPR64, i64> { + let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0}; +} + +class Neon_UMOV_pattern + : Pat<(ResTy (vector_extract + (NaTy VPR64:$Rn), NaImm:$Imm)), + (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + NaImm:$Imm)>; + +def UMOVwb_pattern : Neon_UMOV_pattern; +def UMOVwh_pattern : Neon_UMOV_pattern; +def UMOVws_pattern : Neon_UMOV_pattern; +def UMOVxd_pattern : Neon_UMOV_pattern; + +def : Pat<(i32 (and + (i32 (vector_extract + (v16i8 VPR128:$Rn), (neon_uimm4_bare:$Imm))), + 255)), + (UMOVwb VPR128:$Rn, neon_uimm4_bare:$Imm)>; + +def : Pat<(i32 (and + (i32 (vector_extract + (v8i16 VPR128:$Rn), (neon_uimm3_bare:$Imm))), + 65535)), + (UMOVwh VPR128:$Rn, neon_uimm3_bare:$Imm)>; + +def : Pat<(i64 (zext + (i32 (vector_extract + (v2i64 VPR128:$Rn), (neon_uimm1_bare:$Imm))))), + (UMOVxd VPR128:$Rn, neon_uimm1_bare:$Imm)>; + +def : Pat<(i32 (and + (i32 (vector_extract + (v8i8 VPR64:$Rn), (neon_uimm3_bare:$Imm))), + 255)), + (UMOVwb (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64), + neon_uimm3_bare:$Imm)>; + +def : Pat<(i32 (and + (i32 (vector_extract + (v4i16 VPR64:$Rn), (neon_uimm2_bare:$Imm))), + 65535)), + (UMOVwh (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64), + neon_uimm2_bare:$Imm)>; + +def : Pat<(i64 (zext + (i32 (vector_extract + (v1i64 VPR64:$Rn), (neon_uimm0_bare:$Imm))))), + (UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64), + neon_uimm0_bare:$Imm)>; + diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp index d2b6f6d..26bd797 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -475,6 +475,18 @@ void AArch64InstPrinter::printNeonUImm8Operand(const MCInst *MI, unsigned OpNum, O.write_hex(Imm); } +void AArch64InstPrinter::printNeonUImm8OperandBare(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + const MCOperand &MOUImm = MI->getOperand(OpNum); + + assert(MOUImm.isImm() + && "Immediate operand required for Neon vector immediate inst."); + + unsigned Imm = MOUImm.getImm(); + O << Imm; +} + void AArch64InstPrinter::printNeonUImm64MaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h index b608908..71c9f4a 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h @@ -170,6 +170,8 @@ public: raw_ostream &O); void printNeonUImm0Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printNeonUImm8Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printNeonUImm8OperandBare(const MCInst *MI, unsigned OpNum, + raw_ostream &O); void printNeonUImm64MaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); }; -- cgit v1.1 From 0faffd1aea24995cefade95c495294dbb663ca82 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 17 Sep 2013 06:05:17 +0000 Subject: Fix column alignment. No functional change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190849 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 9b02b07..9137e1d 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -587,10 +587,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) // AVX-512 foldable instructions { X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 }, { X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 }, - { X86::VMOVDQA32rr, X86::VMOVDQA32rm, TB_ALIGN_64 }, - { X86::VMOVDQA64rr, X86::VMOVDQA64rm, TB_ALIGN_64 }, - { X86::VMOVDQU32rr, X86::VMOVDQU32rm, 0 }, - { X86::VMOVDQU64rr, X86::VMOVDQU64rm, 0 }, + { X86::VMOVDQA32rr, X86::VMOVDQA32rm, TB_ALIGN_64 }, + { X86::VMOVDQA64rr, X86::VMOVDQA64rm, TB_ALIGN_64 }, + { X86::VMOVDQU32rr, X86::VMOVDQU32rm, 0 }, + { X86::VMOVDQU64rr, X86::VMOVDQU64rm, 0 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { -- cgit v1.1 From c8f377d5ec99744a43c6c5705d1e41e047a3d80b Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 17 Sep 2013 06:50:11 +0000 Subject: Add AES and SHA instructions to the load folding tables. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190850 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 9137e1d..80d681a 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -591,6 +591,12 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VMOVDQA64rr, X86::VMOVDQA64rm, TB_ALIGN_64 }, { X86::VMOVDQU32rr, X86::VMOVDQU32rm, 0 }, { X86::VMOVDQU64rr, X86::VMOVDQU64rm, 0 }, + + // AES foldable instructions + { X86::AESIMCrr, X86::AESIMCrm, TB_ALIGN_16 }, + { X86::AESKEYGENASSIST128rr, X86::AESKEYGENASSIST128rm, TB_ALIGN_16 }, + { X86::VAESIMCrr, X86::VAESIMCrm, TB_ALIGN_16 }, + { X86::VAESKEYGENASSIST128rr, X86::VAESKEYGENASSIST128rm, TB_ALIGN_16 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { @@ -1219,6 +1225,25 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VSHUFPSZrri, X86::VSHUFPSZrmi, 0 }, { X86::VALIGNQrri, X86::VALIGNQrmi, 0 }, { X86::VALIGNDrri, X86::VALIGNDrmi, 0 }, + + // AES foldable instructions + { X86::AESDECLASTrr, X86::AESDECLASTrm, TB_ALIGN_16 }, + { X86::AESDECrr, X86::AESDECrm, TB_ALIGN_16 }, + { X86::AESENCLASTrr, X86::AESENCLASTrm, TB_ALIGN_16 }, + { X86::AESENCrr, X86::AESENCrm, TB_ALIGN_16 }, + { X86::VAESDECLASTrr, X86::VAESDECLASTrm, TB_ALIGN_16 }, + { X86::VAESDECrr, X86::VAESDECrm, TB_ALIGN_16 }, + { X86::VAESENCLASTrr, X86::VAESENCLASTrm, TB_ALIGN_16 }, + { X86::VAESENCrr, X86::VAESENCrm, TB_ALIGN_16 }, + + // SHA foldable instructions + { X86::SHA1MSG1rr, X86::SHA1MSG1rm, TB_ALIGN_16 }, + { X86::SHA1MSG2rr, X86::SHA1MSG2rm, TB_ALIGN_16 }, + { X86::SHA1NEXTErr, X86::SHA1NEXTErm, TB_ALIGN_16 }, + { X86::SHA1RNDS4rri, X86::SHA1RNDS4rmi, TB_ALIGN_16 }, + { X86::SHA256MSG1rr, X86::SHA256MSG1rm, TB_ALIGN_16 }, + { X86::SHA256MSG2rr, X86::SHA256MSG2rm, TB_ALIGN_16 }, + { X86::SHA256RNDS2rr, X86::SHA256RNDS2rm, TB_ALIGN_16 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { -- cgit v1.1 From 8e5fae2b1bee2b7a944360b91df0d223d06b7927 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Tue, 17 Sep 2013 07:34:34 +0000 Subject: AVX-512: Converted to Unix style git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190851 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 6140 +++++++++++++++++++------------------- 1 file changed, 3070 insertions(+), 3070 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index da2b9c1..884d725 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1,3070 +1,3070 @@ -// Bitcasts between 512-bit vector types. Return the original type since -// no instruction is needed for the conversion -let Predicates = [HasAVX512] in { - def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>; - def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>; - def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>; - def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>; - def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>; - def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>; - def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>; - def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>; - def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>; - def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>; - def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>; - def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>; - def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>; - - def : Pat<(v2i64 (bitconvert (v4i32 VR128X:$src))), (v2i64 VR128X:$src)>; - def : Pat<(v2i64 (bitconvert (v8i16 VR128X:$src))), (v2i64 VR128X:$src)>; - def : Pat<(v2i64 (bitconvert (v16i8 VR128X:$src))), (v2i64 VR128X:$src)>; - def : Pat<(v2i64 (bitconvert (v2f64 VR128X:$src))), (v2i64 VR128X:$src)>; - def : Pat<(v2i64 (bitconvert (v4f32 VR128X:$src))), (v2i64 VR128X:$src)>; - def : Pat<(v4i32 (bitconvert (v2i64 VR128X:$src))), (v4i32 VR128X:$src)>; - def : Pat<(v4i32 (bitconvert (v8i16 VR128X:$src))), (v4i32 VR128X:$src)>; - def : Pat<(v4i32 (bitconvert (v16i8 VR128X:$src))), (v4i32 VR128X:$src)>; - def : Pat<(v4i32 (bitconvert (v2f64 VR128X:$src))), (v4i32 VR128X:$src)>; - def : Pat<(v4i32 (bitconvert (v4f32 VR128X:$src))), (v4i32 VR128X:$src)>; - def : Pat<(v8i16 (bitconvert (v2i64 VR128X:$src))), (v8i16 VR128X:$src)>; - def : Pat<(v8i16 (bitconvert (v4i32 VR128X:$src))), (v8i16 VR128X:$src)>; - def : Pat<(v8i16 (bitconvert (v16i8 VR128X:$src))), (v8i16 VR128X:$src)>; - def : Pat<(v8i16 (bitconvert (v2f64 VR128X:$src))), (v8i16 VR128X:$src)>; - def : Pat<(v8i16 (bitconvert (v4f32 VR128X:$src))), (v8i16 VR128X:$src)>; - def : Pat<(v16i8 (bitconvert (v2i64 VR128X:$src))), (v16i8 VR128X:$src)>; - def : Pat<(v16i8 (bitconvert (v4i32 VR128X:$src))), (v16i8 VR128X:$src)>; - def : Pat<(v16i8 (bitconvert (v8i16 VR128X:$src))), (v16i8 VR128X:$src)>; - def : Pat<(v16i8 (bitconvert (v2f64 VR128X:$src))), (v16i8 VR128X:$src)>; - def : Pat<(v16i8 (bitconvert (v4f32 VR128X:$src))), (v16i8 VR128X:$src)>; - def : Pat<(v4f32 (bitconvert (v2i64 VR128X:$src))), (v4f32 VR128X:$src)>; - def : Pat<(v4f32 (bitconvert (v4i32 VR128X:$src))), (v4f32 VR128X:$src)>; - def : Pat<(v4f32 (bitconvert (v8i16 VR128X:$src))), (v4f32 VR128X:$src)>; - def : Pat<(v4f32 (bitconvert (v16i8 VR128X:$src))), (v4f32 VR128X:$src)>; - def : Pat<(v4f32 (bitconvert (v2f64 VR128X:$src))), (v4f32 VR128X:$src)>; - def : Pat<(v2f64 (bitconvert (v2i64 VR128X:$src))), (v2f64 VR128X:$src)>; - def : Pat<(v2f64 (bitconvert (v4i32 VR128X:$src))), (v2f64 VR128X:$src)>; - def : Pat<(v2f64 (bitconvert (v8i16 VR128X:$src))), (v2f64 VR128X:$src)>; - def : Pat<(v2f64 (bitconvert (v16i8 VR128X:$src))), (v2f64 VR128X:$src)>; - def : Pat<(v2f64 (bitconvert (v4f32 VR128X:$src))), (v2f64 VR128X:$src)>; - -// Bitcasts between 256-bit vector types. Return the original type since -// no instruction is needed for the conversion - def : Pat<(v4f64 (bitconvert (v8f32 VR256X:$src))), (v4f64 VR256X:$src)>; - def : Pat<(v4f64 (bitconvert (v8i32 VR256X:$src))), (v4f64 VR256X:$src)>; - def : Pat<(v4f64 (bitconvert (v4i64 VR256X:$src))), (v4f64 VR256X:$src)>; - def : Pat<(v4f64 (bitconvert (v16i16 VR256X:$src))), (v4f64 VR256X:$src)>; - def : Pat<(v4f64 (bitconvert (v32i8 VR256X:$src))), (v4f64 VR256X:$src)>; - def : Pat<(v8f32 (bitconvert (v8i32 VR256X:$src))), (v8f32 VR256X:$src)>; - def : Pat<(v8f32 (bitconvert (v4i64 VR256X:$src))), (v8f32 VR256X:$src)>; - def : Pat<(v8f32 (bitconvert (v4f64 VR256X:$src))), (v8f32 VR256X:$src)>; - def : Pat<(v8f32 (bitconvert (v32i8 VR256X:$src))), (v8f32 VR256X:$src)>; - def : Pat<(v8f32 (bitconvert (v16i16 VR256X:$src))), (v8f32 VR256X:$src)>; - def : Pat<(v4i64 (bitconvert (v8f32 VR256X:$src))), (v4i64 VR256X:$src)>; - def : Pat<(v4i64 (bitconvert (v8i32 VR256X:$src))), (v4i64 VR256X:$src)>; - def : Pat<(v4i64 (bitconvert (v4f64 VR256X:$src))), (v4i64 VR256X:$src)>; - def : Pat<(v4i64 (bitconvert (v32i8 VR256X:$src))), (v4i64 VR256X:$src)>; - def : Pat<(v4i64 (bitconvert (v16i16 VR256X:$src))), (v4i64 VR256X:$src)>; - def : Pat<(v32i8 (bitconvert (v4f64 VR256X:$src))), (v32i8 VR256X:$src)>; - def : Pat<(v32i8 (bitconvert (v4i64 VR256X:$src))), (v32i8 VR256X:$src)>; - def : Pat<(v32i8 (bitconvert (v8f32 VR256X:$src))), (v32i8 VR256X:$src)>; - def : Pat<(v32i8 (bitconvert (v8i32 VR256X:$src))), (v32i8 VR256X:$src)>; - def : Pat<(v32i8 (bitconvert (v16i16 VR256X:$src))), (v32i8 VR256X:$src)>; - def : Pat<(v8i32 (bitconvert (v32i8 VR256X:$src))), (v8i32 VR256X:$src)>; - def : Pat<(v8i32 (bitconvert (v16i16 VR256X:$src))), (v8i32 VR256X:$src)>; - def : Pat<(v8i32 (bitconvert (v8f32 VR256X:$src))), (v8i32 VR256X:$src)>; - def : Pat<(v8i32 (bitconvert (v4i64 VR256X:$src))), (v8i32 VR256X:$src)>; - def : Pat<(v8i32 (bitconvert (v4f64 VR256X:$src))), (v8i32 VR256X:$src)>; - def : Pat<(v16i16 (bitconvert (v8f32 VR256X:$src))), (v16i16 VR256X:$src)>; - def : Pat<(v16i16 (bitconvert (v8i32 VR256X:$src))), (v16i16 VR256X:$src)>; - def : Pat<(v16i16 (bitconvert (v4i64 VR256X:$src))), (v16i16 VR256X:$src)>; - def : Pat<(v16i16 (bitconvert (v4f64 VR256X:$src))), (v16i16 VR256X:$src)>; - def : Pat<(v16i16 (bitconvert (v32i8 VR256X:$src))), (v16i16 VR256X:$src)>; -} - -// -// AVX-512: VPXOR instruction writes zero to its upper part, it's safe build zeros. -// - -let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1, Predicates = [HasAVX512] in { -def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "", - [(set VR512:$dst, (v16f32 immAllZerosV))]>; -} - -def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>; -def : Pat<(v16i32 immAllZerosV), (AVX512_512_SET0)>; -def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>; -def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>; - -//===----------------------------------------------------------------------===// -// AVX-512 - VECTOR INSERT -// -// -- 32x8 form -- -let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in { -def VINSERTF32x4rr : AVX512AIi8<0x18, MRMSrcReg, (outs VR512:$dst), - (ins VR512:$src1, VR128X:$src2, i8imm:$src3), - "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, EVEX_4V, EVEX_V512; -let mayLoad = 1 in -def VINSERTF32x4rm : AVX512AIi8<0x18, MRMSrcMem, (outs VR512:$dst), - (ins VR512:$src1, f128mem:$src2, i8imm:$src3), - "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>; -} - -// -- 64x4 fp form -- -let neverHasSideEffects = 1, ExeDomain = SSEPackedDouble in { -def VINSERTF64x4rr : AVX512AIi8<0x1a, MRMSrcReg, (outs VR512:$dst), - (ins VR512:$src1, VR256X:$src2, i8imm:$src3), - "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, EVEX_4V, EVEX_V512, VEX_W; -let mayLoad = 1 in -def VINSERTF64x4rm : AVX512AIi8<0x1a, MRMSrcMem, (outs VR512:$dst), - (ins VR512:$src1, i256mem:$src2, i8imm:$src3), - "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>; -} -// -- 32x4 integer form -- -let neverHasSideEffects = 1 in { -def VINSERTI32x4rr : AVX512AIi8<0x38, MRMSrcReg, (outs VR512:$dst), - (ins VR512:$src1, VR128X:$src2, i8imm:$src3), - "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, EVEX_4V, EVEX_V512; -let mayLoad = 1 in -def VINSERTI32x4rm : AVX512AIi8<0x38, MRMSrcMem, (outs VR512:$dst), - (ins VR512:$src1, i128mem:$src2, i8imm:$src3), - "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>; - -} - -let neverHasSideEffects = 1 in { -// -- 64x4 form -- -def VINSERTI64x4rr : AVX512AIi8<0x3a, MRMSrcReg, (outs VR512:$dst), - (ins VR512:$src1, VR256X:$src2, i8imm:$src3), - "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, EVEX_4V, EVEX_V512, VEX_W; -let mayLoad = 1 in -def VINSERTI64x4rm : AVX512AIi8<0x3a, MRMSrcMem, (outs VR512:$dst), - (ins VR512:$src1, i256mem:$src2, i8imm:$src3), - "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>; -} - -def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (v4f32 VR128X:$src2), - (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2, - (INSERT_get_vinsert128_imm VR512:$ins))>; -def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (v2f64 VR128X:$src2), - (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2, - (INSERT_get_vinsert128_imm VR512:$ins))>; -def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v2i64 VR128X:$src2), - (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2, - (INSERT_get_vinsert128_imm VR512:$ins))>; -def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v4i32 VR128X:$src2), - (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2, - (INSERT_get_vinsert128_imm VR512:$ins))>; - -def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (loadv4f32 addr:$src2), - (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2, - (INSERT_get_vinsert128_imm VR512:$ins))>; -def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), - (bc_v4i32 (loadv2i64 addr:$src2)), - (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2, - (INSERT_get_vinsert128_imm VR512:$ins))>; -def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (loadv2f64 addr:$src2), - (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2, - (INSERT_get_vinsert128_imm VR512:$ins))>; -def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (loadv2i64 addr:$src2), - (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2, - (INSERT_get_vinsert128_imm VR512:$ins))>; - -def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (v8f32 VR256X:$src2), - (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2, - (INSERT_get_vinsert256_imm VR512:$ins))>; -def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (v4f64 VR256X:$src2), - (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2, - (INSERT_get_vinsert256_imm VR512:$ins))>; -def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v4i64 VR256X:$src2), - (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2, - (INSERT_get_vinsert256_imm VR512:$ins))>; -def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v8i32 VR256X:$src2), - (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2, - (INSERT_get_vinsert256_imm VR512:$ins))>; - -def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (loadv8f32 addr:$src2), - (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2, - (INSERT_get_vinsert256_imm VR512:$ins))>; -def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (loadv4f64 addr:$src2), - (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2, - (INSERT_get_vinsert256_imm VR512:$ins))>; -def : Pat<(vinsert256_insert:$ins (v8i64 VR512:$src1), (loadv4i64 addr:$src2), - (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2, - (INSERT_get_vinsert256_imm VR512:$ins))>; -def : Pat<(vinsert256_insert:$ins (v16i32 VR512:$src1), - (bc_v8i32 (loadv4i64 addr:$src2)), - (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2, - (INSERT_get_vinsert256_imm VR512:$ins))>; - -// vinsertps - insert f32 to XMM -def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), - (ins VR128X:$src1, VR128X:$src2, u32u8imm:$src3), - "vinsertps{z}\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - [(set VR128X:$dst, (X86insrtps VR128X:$src1, VR128X:$src2, imm:$src3))]>, - EVEX_4V; -def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), - (ins VR128X:$src1, f32mem:$src2, u32u8imm:$src3), - "vinsertps{z}\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - [(set VR128X:$dst, (X86insrtps VR128X:$src1, - (v4f32 (scalar_to_vector (loadf32 addr:$src2))), - imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>; - -//===----------------------------------------------------------------------===// -// AVX-512 VECTOR EXTRACT -//--- -let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in { -// -- 32x4 form -- -def VEXTRACTF32x4rr : AVX512AIi8<0x19, MRMDestReg, (outs VR128X:$dst), - (ins VR512:$src1, i8imm:$src2), - "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX, EVEX_V512; -def VEXTRACTF32x4mr : AVX512AIi8<0x19, MRMDestMem, (outs), - (ins f128mem:$dst, VR512:$src1, i8imm:$src2), - "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>; - -// -- 64x4 form -- -def VEXTRACTF64x4rr : AVX512AIi8<0x1b, MRMDestReg, (outs VR256X:$dst), - (ins VR512:$src1, i8imm:$src2), - "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX, EVEX_V512, VEX_W; -let mayStore = 1 in -def VEXTRACTF64x4mr : AVX512AIi8<0x1b, MRMDestMem, (outs), - (ins f256mem:$dst, VR512:$src1, i8imm:$src2), - "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>; -} - -let neverHasSideEffects = 1 in { -// -- 32x4 form -- -def VEXTRACTI32x4rr : AVX512AIi8<0x39, MRMDestReg, (outs VR128X:$dst), - (ins VR512:$src1, i8imm:$src2), - "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX, EVEX_V512; -def VEXTRACTI32x4mr : AVX512AIi8<0x39, MRMDestMem, (outs), - (ins i128mem:$dst, VR512:$src1, i8imm:$src2), - "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>; - -// -- 64x4 form -- -def VEXTRACTI64x4rr : AVX512AIi8<0x3b, MRMDestReg, (outs VR256X:$dst), - (ins VR512:$src1, i8imm:$src2), - "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX, EVEX_V512, VEX_W; -let mayStore = 1 in -def VEXTRACTI64x4mr : AVX512AIi8<0x3b, MRMDestMem, (outs), - (ins i256mem:$dst, VR512:$src1, i8imm:$src2), - "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>; -} - -def : Pat<(vextract128_extract:$ext (v16f32 VR512:$src1), (iPTR imm)), - (v4f32 (VEXTRACTF32x4rr VR512:$src1, - (EXTRACT_get_vextract128_imm VR128X:$ext)))>; - -def : Pat<(vextract128_extract:$ext VR512:$src1, (iPTR imm)), - (v4i32 (VEXTRACTF32x4rr VR512:$src1, - (EXTRACT_get_vextract128_imm VR128X:$ext)))>; - -def : Pat<(vextract128_extract:$ext (v8f64 VR512:$src1), (iPTR imm)), - (v2f64 (VEXTRACTF32x4rr VR512:$src1, - (EXTRACT_get_vextract128_imm VR128X:$ext)))>; - -def : Pat<(vextract128_extract:$ext (v8i64 VR512:$src1), (iPTR imm)), - (v2i64 (VEXTRACTI32x4rr VR512:$src1, - (EXTRACT_get_vextract128_imm VR128X:$ext)))>; - - -def : Pat<(vextract256_extract:$ext (v16f32 VR512:$src1), (iPTR imm)), - (v8f32 (VEXTRACTF64x4rr VR512:$src1, - (EXTRACT_get_vextract256_imm VR256X:$ext)))>; - -def : Pat<(vextract256_extract:$ext (v16i32 VR512:$src1), (iPTR imm)), - (v8i32 (VEXTRACTI64x4rr VR512:$src1, - (EXTRACT_get_vextract256_imm VR256X:$ext)))>; - -def : Pat<(vextract256_extract:$ext (v8f64 VR512:$src1), (iPTR imm)), - (v4f64 (VEXTRACTF64x4rr VR512:$src1, - (EXTRACT_get_vextract256_imm VR256X:$ext)))>; - -def : Pat<(vextract256_extract:$ext (v8i64 VR512:$src1), (iPTR imm)), - (v4i64 (VEXTRACTI64x4rr VR512:$src1, - (EXTRACT_get_vextract256_imm VR256X:$ext)))>; - -// A 256-bit subvector extract from the first 512-bit vector position -// is a subregister copy that needs no instruction. -def : Pat<(v8i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))), - (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm))>; -def : Pat<(v8f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))), - (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm))>; -def : Pat<(v4i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))), - (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm))>; -def : Pat<(v4f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))), - (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm))>; - -// zmm -> xmm -def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))), - (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>; -def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))), - (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>; -def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))), - (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>; -def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))), - (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>; - - -// A 128-bit subvector insert to the first 512-bit vector position -// is a subregister copy that needs no instruction. -def : Pat<(insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0)), - (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), - (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), - sub_ymm)>; -def : Pat<(insert_subvector undef, (v2f64 VR128X:$src), (iPTR 0)), - (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), - (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), - sub_ymm)>; -def : Pat<(insert_subvector undef, (v4i32 VR128X:$src), (iPTR 0)), - (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), - (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), - sub_ymm)>; -def : Pat<(insert_subvector undef, (v4f32 VR128X:$src), (iPTR 0)), - (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), - (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), - sub_ymm)>; - -def : Pat<(insert_subvector undef, (v4i64 VR256X:$src), (iPTR 0)), - (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>; -def : Pat<(insert_subvector undef, (v4f64 VR256X:$src), (iPTR 0)), - (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>; -def : Pat<(insert_subvector undef, (v8i32 VR256X:$src), (iPTR 0)), - (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>; -def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)), - (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>; - -// vextractps - extract 32 bits from XMM -def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst), - (ins VR128X:$src1, u32u8imm:$src2), - "vextractps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, - EVEX; - -def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs), - (ins f32mem:$dst, VR128X:$src1, u32u8imm:$src2), - "vextractps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2), - addr:$dst)]>, EVEX; - -//===---------------------------------------------------------------------===// -// AVX-512 BROADCAST -//--- -multiclass avx512_fp_broadcast opc, string OpcodeStr, - RegisterClass DestRC, - RegisterClass SrcRC, X86MemOperand x86memop> { - def rr : AVX5128I, EVEX; - def rm : AVX5128I, EVEX; -} -let ExeDomain = SSEPackedSingle in { - defm VBROADCASTSSZ : avx512_fp_broadcast<0x18, "vbroadcastss{z}", VR512, - VR128X, f32mem>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; -} - -let ExeDomain = SSEPackedDouble in { - defm VBROADCASTSDZ : avx512_fp_broadcast<0x19, "vbroadcastsd{z}", VR512, - VR128X, f64mem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; -} - -def : Pat<(v16f32 (X86VBroadcast (loadf32 addr:$src))), - (VBROADCASTSSZrm addr:$src)>; -def : Pat<(v8f64 (X86VBroadcast (loadf64 addr:$src))), - (VBROADCASTSDZrm addr:$src)>; - -multiclass avx512_int_broadcast_reg opc, string OpcodeStr, - RegisterClass SrcRC, RegisterClass KRC> { - def Zrr : AVX5128I, EVEX, EVEX_V512; - def Zkrr : AVX5128I, EVEX, EVEX_V512, EVEX_KZ; -} - -defm VPBROADCASTDr : avx512_int_broadcast_reg<0x7C, "vpbroadcastd", GR32, VK16WM>; -defm VPBROADCASTQr : avx512_int_broadcast_reg<0x7C, "vpbroadcastq", GR64, VK8WM>, - VEX_W; - -def : Pat <(v16i32 (X86vzext VK16WM:$mask)), - (VPBROADCASTDrZkrr VK16WM:$mask, (i32 (MOV32ri 0x1)))>; - -def : Pat <(v8i64 (X86vzext VK8WM:$mask)), - (VPBROADCASTQrZkrr VK8WM:$mask, (i64 (MOV64ri 0x1)))>; - -def : Pat<(v16i32 (X86VBroadcast (i32 GR32:$src))), - (VPBROADCASTDrZrr GR32:$src)>; -def : Pat<(v8i64 (X86VBroadcast (i64 GR64:$src))), - (VPBROADCASTQrZrr GR64:$src)>; - -multiclass avx512_int_broadcast_rm opc, string OpcodeStr, - X86MemOperand x86memop, PatFrag ld_frag, - RegisterClass DstRC, ValueType OpVT, ValueType SrcVT, - RegisterClass KRC> { - def rr : AVX5128I, EVEX; - def krr : AVX5128I, - EVEX, EVEX_KZ; - def rm : AVX5128I, EVEX; - def krm : AVX5128I, EVEX, EVEX_KZ; -} - -defm VPBROADCASTDZ : avx512_int_broadcast_rm<0x58, "vpbroadcastd", i32mem, - loadi32, VR512, v16i32, v4i32, VK16WM>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; -defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem, - loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VT1>; - -def : Pat<(v16f32 (X86VBroadcast (v4f32 VR128X:$src))), - (VBROADCASTSSZrr VR128X:$src)>; -def : Pat<(v8f64 (X86VBroadcast (v2f64 VR128X:$src))), - (VBROADCASTSDZrr VR128X:$src)>; - -// Provide fallback in case the load node that is used in the patterns above -// is used by additional users, which prevents the pattern selection. -def : Pat<(v16f32 (X86VBroadcast FR32X:$src)), - (VBROADCASTSSZrr (COPY_TO_REGCLASS FR32X:$src, VR128X))>; -def : Pat<(v8f64 (X86VBroadcast FR64X:$src)), - (VBROADCASTSDZrr (COPY_TO_REGCLASS FR64X:$src, VR128X))>; - - -let Predicates = [HasAVX512] in { -def : Pat<(v8i32 (X86VBroadcastm (v8i1 VK8WM:$mask), (loadi32 addr:$src))), - (EXTRACT_SUBREG - (v16i32 (VPBROADCASTDZkrm (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), - addr:$src)), sub_ymm)>; -} -//===----------------------------------------------------------------------===// -// AVX-512 BROADCAST MASK TO VECTOR REGISTER -//--- - -multiclass avx512_mask_broadcast opc, string OpcodeStr, - RegisterClass DstRC, RegisterClass KRC, - ValueType OpVT, ValueType SrcVT> { -def rr : AVX512XS8I, EVEX; -} - -defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", VR512, - VK16, v16i32, v16i1>, EVEX_V512; -defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", VR512, - VK8, v8i64, v8i1>, EVEX_V512, VEX_W; - -//===----------------------------------------------------------------------===// -// AVX-512 - VPERM -// -// -- immediate form -- -multiclass avx512_perm_imm opc, string OpcodeStr, RegisterClass RC, - SDNode OpNode, PatFrag mem_frag, - X86MemOperand x86memop, ValueType OpVT> { - def ri : AVX512AIi8, - EVEX; - def mi : AVX512AIi8, EVEX; -} - -defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", VR512, X86VPermi, memopv8i64, - i512mem, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -let ExeDomain = SSEPackedDouble in -defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", VR512, X86VPermi, memopv8f64, - f512mem, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -// -- VPERM - register form -- -multiclass avx512_perm opc, string OpcodeStr, RegisterClass RC, - PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> { - - def rr : AVX5128I, EVEX_4V; - - def rm : AVX5128I, - EVEX_4V; -} - -defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, memopv16i32, i512mem, - v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem, - v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -let ExeDomain = SSEPackedSingle in -defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, memopv16f32, f512mem, - v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -let ExeDomain = SSEPackedDouble in -defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem, - v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -// -- VPERM2I - 3 source operands form -- -multiclass avx512_perm_3src opc, string OpcodeStr, RegisterClass RC, - PatFrag mem_frag, X86MemOperand x86memop, - ValueType OpVT> { -let Constraints = "$src1 = $dst" in { - def rr : AVX5128I, - EVEX_4V; - - def rm : AVX5128I, EVEX_4V; - } -} -defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32, i512mem, - v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64, i512mem, - v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32, i512mem, - v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64, i512mem, - v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -//===----------------------------------------------------------------------===// -// AVX-512 - BLEND using mask -// -multiclass avx512_blendmask opc, string OpcodeStr, - RegisterClass KRC, RegisterClass RC, - X86MemOperand x86memop, PatFrag mem_frag, - SDNode OpNode, ValueType vt> { - def rr : AVX5128I, EVEX_4V, EVEX_K; - - def rm : AVX5128I, - EVEX_4V, EVEX_K; -} - -let ExeDomain = SSEPackedSingle in -defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps", VK16WM, VR512, f512mem, - memopv16f32, vselect, v16f32>, - EVEX_CD8<32, CD8VF>, EVEX_V512; -let ExeDomain = SSEPackedDouble in -defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd", VK8WM, VR512, f512mem, - memopv8f64, vselect, v8f64>, - VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512; - -defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd", VK16WM, VR512, f512mem, - memopv8i64, vselect, v16i32>, - EVEX_CD8<32, CD8VF>, EVEX_V512; - -defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq", VK8WM, VR512, f512mem, - memopv8i64, vselect, v8i64>, VEX_W, - EVEX_CD8<64, CD8VF>, EVEX_V512; - - -let Predicates = [HasAVX512] in { -def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1), - (v8f32 VR256X:$src2))), - (EXTRACT_SUBREG - (v16f32 (VBLENDMPSZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), - (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), - (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; - -def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1), - (v8i32 VR256X:$src2))), - (EXTRACT_SUBREG - (v16i32 (VPBLENDMDZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), - (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), - (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; -} - -multiclass avx512_icmp_packed opc, string OpcodeStr, RegisterClass KRC, - RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag, - SDNode OpNode, ValueType vt> { - def rr : AVX512BI, EVEX_4V; - def rm : AVX512BI, EVEX_4V; -} - -defm VPCMPEQDZ : avx512_icmp_packed<0x76, "vpcmpeqd", VK16, VR512, i512mem, - memopv16i32, X86pcmpeqm, v16i32>, EVEX_V512; -defm VPCMPEQQZ : avx512_icmp_packed<0x29, "vpcmpeqq", VK8, VR512, i512mem, - memopv8i64, X86pcmpeqm, v8i64>, T8, EVEX_V512, VEX_W; - -defm VPCMPGTDZ : avx512_icmp_packed<0x66, "vpcmpgtd", VK16, VR512, i512mem, - memopv16i32, X86pcmpgtm, v16i32>, EVEX_V512; -defm VPCMPGTQZ : avx512_icmp_packed<0x37, "vpcmpgtq", VK8, VR512, i512mem, - memopv8i64, X86pcmpgtm, v8i64>, T8, EVEX_V512, VEX_W; - -def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), - (COPY_TO_REGCLASS (VPCMPGTDZrr - (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), - (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>; - -def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), - (COPY_TO_REGCLASS (VPCMPEQDZrr - (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), - (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>; - -multiclass avx512_icmp_cc opc, RegisterClass KRC, - RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag, - SDNode OpNode, ValueType vt, Operand CC, string asm, - string asm_alt> { - def rri : AVX512AIi8, EVEX_4V; - def rmi : AVX512AIi8, EVEX_4V; - // Accept explicit immediate argument form instead of comparison code. - let neverHasSideEffects = 1 in { - def rri_alt : AVX512AIi8, EVEX_4V; - def rmi_alt : AVX512AIi8, EVEX_4V; - } -} - -defm VPCMPDZ : avx512_icmp_cc<0x1F, VK16, VR512, i512mem, memopv16i32, - X86cmpm, v16i32, AVXCC, - "vpcmp${cc}d\t{$src2, $src1, $dst|$dst, $src1, $src2}", - "vpcmpd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPCMPUDZ : avx512_icmp_cc<0x1E, VK16, VR512, i512mem, memopv16i32, - X86cmpmu, v16i32, AVXCC, - "vpcmp${cc}ud\t{$src2, $src1, $dst|$dst, $src1, $src2}", - "vpcmpud\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, - EVEX_V512, EVEX_CD8<32, CD8VF>; - -defm VPCMPQZ : avx512_icmp_cc<0x1F, VK8, VR512, i512mem, memopv8i64, - X86cmpm, v8i64, AVXCC, - "vpcmp${cc}q\t{$src2, $src1, $dst|$dst, $src1, $src2}", - "vpcmpq\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, - VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; -defm VPCMPUQZ : avx512_icmp_cc<0x1E, VK8, VR512, i512mem, memopv8i64, - X86cmpmu, v8i64, AVXCC, - "vpcmp${cc}uq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - "vpcmpuq\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, - VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; - -// avx512_cmp_packed - sse 1 & 2 compare packed instructions -multiclass avx512_cmp_packed { - def rri : AVX512PIi8<0xC2, MRMSrcReg, - (outs KRC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, - [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>; - def rmi : AVX512PIi8<0xC2, MRMSrcMem, - (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, - [(set KRC:$dst, - (OpNode (vt RC:$src1), (memop addr:$src2), imm:$cc))], d>; - - // Accept explicit immediate argument form instead of comparison code. - let neverHasSideEffects = 1 in { - def rri_alt : PIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_ALU_F32P_RR, d>; - def rmi_alt : PIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_ALU_F32P_RM, d>; - } -} - -defm VCMPPSZ : avx512_cmp_packed, TB, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VCMPPDZ : avx512_cmp_packed, TB, OpSize, EVEX_4V, VEX_W, EVEX_V512, - EVEX_CD8<64, CD8VF>; - -def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)), - (COPY_TO_REGCLASS (VCMPPSZrri - (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), - (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), - imm:$cc), VK8)>; -def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)), - (COPY_TO_REGCLASS (VPCMPDZrri - (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), - (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), - imm:$cc), VK8)>; -def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)), - (COPY_TO_REGCLASS (VPCMPUDZrri - (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), - (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), - imm:$cc), VK8)>; - -// Mask register copy, including -// - copy between mask registers -// - load/store mask registers -// - copy from GPR to mask register and vice versa -// -multiclass avx512_mask_mov opc_kk, bits<8> opc_km, bits<8> opc_mk, - string OpcodeStr, RegisterClass KRC, - ValueType vt, X86MemOperand x86memop> { - let neverHasSideEffects = 1 in { - def kk : I; - let mayLoad = 1 in - def km : I; - let mayStore = 1 in - def mk : I; - } -} - -multiclass avx512_mask_mov_gpr opc_kr, bits<8> opc_rk, - string OpcodeStr, - RegisterClass KRC, RegisterClass GRC> { - let neverHasSideEffects = 1 in { - def kr : I; - def rk : I; - } -} - -let Predicates = [HasAVX512] in { - defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>, - VEX, TB; - defm KMOVW : avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>, - VEX, TB; -} - -let Predicates = [HasAVX512] in { - // GR16 from/to 16-bit mask - def : Pat<(v16i1 (bitconvert (i16 GR16:$src))), - (KMOVWkr (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit))>; - def : Pat<(i16 (bitconvert (v16i1 VK16:$src))), - (EXTRACT_SUBREG (KMOVWrk VK16:$src), sub_16bit)>; - - // Store kreg in memory - def : Pat<(store (v16i1 VK16:$src), addr:$dst), - (KMOVWmk addr:$dst, VK16:$src)>; - - def : Pat<(store (v8i1 VK8:$src), addr:$dst), - (KMOVWmk addr:$dst, (v16i1 (COPY_TO_REGCLASS VK8:$src, VK16)))>; -} -// With AVX-512 only, 8-bit mask is promoted to 16-bit mask. -let Predicates = [HasAVX512] in { - // GR from/to 8-bit mask without native support - def : Pat<(v8i1 (bitconvert (i8 GR8:$src))), - (COPY_TO_REGCLASS - (KMOVWkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)), - VK8)>; - def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), - (EXTRACT_SUBREG - (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)), - sub_8bit)>; -} - -// Mask unary operation -// - KNOT -multiclass avx512_mask_unop opc, string OpcodeStr, - RegisterClass KRC, SDPatternOperator OpNode> { - let Predicates = [HasAVX512] in - def rr : I; -} - -multiclass avx512_mask_unop_w opc, string OpcodeStr, - SDPatternOperator OpNode> { - defm W : avx512_mask_unop, - VEX, TB; -} - -defm KNOT : avx512_mask_unop_w<0x44, "knot", not>; - -def : Pat<(xor VK16:$src1, (v16i1 immAllOnesV)), (KNOTWrr VK16:$src1)>; -def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)), - (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>; - -// With AVX-512, 8-bit mask is promoted to 16-bit mask. -def : Pat<(not VK8:$src), - (COPY_TO_REGCLASS - (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>; - -// Mask binary operation -// - KADD, KAND, KANDN, KOR, KXNOR, KXOR -multiclass avx512_mask_binop opc, string OpcodeStr, - RegisterClass KRC, SDPatternOperator OpNode> { - let Predicates = [HasAVX512] in - def rr : I; -} - -multiclass avx512_mask_binop_w opc, string OpcodeStr, - SDPatternOperator OpNode> { - defm W : avx512_mask_binop, - VEX_4V, VEX_L, TB; -} - -def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>; -def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>; - -let isCommutable = 1 in { - defm KADD : avx512_mask_binop_w<0x4a, "kadd", add>; - defm KAND : avx512_mask_binop_w<0x41, "kand", and>; - let isCommutable = 0 in - defm KANDN : avx512_mask_binop_w<0x42, "kandn", andn>; - defm KOR : avx512_mask_binop_w<0x45, "kor", or>; - defm KXNOR : avx512_mask_binop_w<0x46, "kxnor", xnor>; - defm KXOR : avx512_mask_binop_w<0x47, "kxor", xor>; -} - -multiclass avx512_mask_binop_int { - let Predicates = [HasAVX512] in - def : Pat<(!cast("int_x86_"##IntName##"_v16i1") - VK16:$src1, VK16:$src2), - (!cast(InstName##"Wrr") VK16:$src1, VK16:$src2)>; -} - -defm : avx512_mask_binop_int<"kadd", "KADD">; -defm : avx512_mask_binop_int<"kand", "KAND">; -defm : avx512_mask_binop_int<"kandn", "KANDN">; -defm : avx512_mask_binop_int<"kor", "KOR">; -defm : avx512_mask_binop_int<"kxnor", "KXNOR">; -defm : avx512_mask_binop_int<"kxor", "KXOR">; -// With AVX-512, 8-bit mask is promoted to 16-bit mask. -multiclass avx512_binop_pat { - let Predicates = [HasAVX512] in - def : Pat<(OpNode VK8:$src1, VK8:$src2), - (COPY_TO_REGCLASS - (Inst (COPY_TO_REGCLASS VK8:$src1, VK16), - (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>; -} - -defm : avx512_binop_pat; -defm : avx512_binop_pat; -defm : avx512_binop_pat; -defm : avx512_binop_pat; -defm : avx512_binop_pat; - -// Mask unpacking -multiclass avx512_mask_unpck opc, string OpcodeStr, - RegisterClass KRC1, RegisterClass KRC2> { - let Predicates = [HasAVX512] in - def rr : I; -} - -multiclass avx512_mask_unpck_bw opc, string OpcodeStr> { - defm BW : avx512_mask_unpck, - VEX_4V, VEX_L, OpSize, TB; -} - -defm KUNPCK : avx512_mask_unpck_bw<0x4b, "kunpck">; - -multiclass avx512_mask_unpck_int { - let Predicates = [HasAVX512] in - def : Pat<(!cast("int_x86_"##IntName##"_v16i1") - VK8:$src1, VK8:$src2), - (!cast(InstName##"BWrr") VK8:$src1, VK8:$src2)>; -} - -defm : avx512_mask_unpck_int<"kunpck", "KUNPCK">; -// Mask bit testing -multiclass avx512_mask_testop opc, string OpcodeStr, RegisterClass KRC, - SDNode OpNode> { - let Predicates = [HasAVX512], Defs = [EFLAGS] in - def rr : I; -} - -multiclass avx512_mask_testop_w opc, string OpcodeStr, SDNode OpNode> { - defm W : avx512_mask_testop, - VEX, TB; -} - -defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>; -defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest>; - -// Mask shift -multiclass avx512_mask_shiftop opc, string OpcodeStr, RegisterClass KRC, - SDNode OpNode> { - let Predicates = [HasAVX512] in - def ri : Ii8; -} - -multiclass avx512_mask_shiftop_w opc1, bits<8> opc2, string OpcodeStr, - SDNode OpNode> { - defm W : avx512_mask_shiftop, - VEX, OpSize, TA, VEX_W; -} - -defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", shl>; -defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", srl>; - -// Mask setting all 0s or 1s -multiclass avx512_mask_setop { - let Predicates = [HasAVX512] in - let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1 in - def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "", - [(set KRC:$dst, (VT Val))]>; -} - -multiclass avx512_mask_setop_w { - defm B : avx512_mask_setop; - defm W : avx512_mask_setop; -} - -defm KSET0 : avx512_mask_setop_w; -defm KSET1 : avx512_mask_setop_w; - -// With AVX-512 only, 8-bit mask is promoted to 16-bit mask. -let Predicates = [HasAVX512] in { - def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>; - def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>; -} -def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 0))), - (v8i1 (COPY_TO_REGCLASS VK16:$src, VK8))>; - -def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))), - (v16i1 (COPY_TO_REGCLASS VK8:$src, VK16))>; - -def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))), - (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>; - -//===----------------------------------------------------------------------===// -// AVX-512 - Aligned and unaligned load and store -// - -multiclass avx512_mov_packed opc, RegisterClass RC, RegisterClass KRC, - X86MemOperand x86memop, PatFrag ld_frag, - string asm, Domain d> { -let neverHasSideEffects = 1 in - def rr : AVX512PI, - EVEX; -let canFoldAsLoad = 1 in - def rm : AVX512PI, EVEX; -let Constraints = "$src1 = $dst" in { - def rrk : AVX512PI, - EVEX, EVEX_K; - def rmk : AVX512PI, EVEX, EVEX_K; -} -} - -defm VMOVAPSZ : avx512_mov_packed<0x28, VR512, VK16WM, f512mem, alignedloadv16f32, - "vmovaps", SSEPackedSingle>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VMOVAPDZ : avx512_mov_packed<0x28, VR512, VK8WM, f512mem, alignedloadv8f64, - "vmovapd", SSEPackedDouble>, - OpSize, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; -defm VMOVUPSZ : avx512_mov_packed<0x10, VR512, VK16WM, f512mem, loadv16f32, - "vmovups", SSEPackedSingle>, - TB, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VMOVUPDZ : avx512_mov_packed<0x10, VR512, VK8WM, f512mem, loadv8f64, - "vmovupd", SSEPackedDouble>, - OpSize, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; -def VMOVAPSZmr : AVX512PI<0x29, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src), - "vmovaps\t{$src, $dst|$dst, $src}", - [(alignedstore512 (v16f32 VR512:$src), addr:$dst)], - SSEPackedSingle>, EVEX, EVEX_V512, TB, - EVEX_CD8<32, CD8VF>; -def VMOVAPDZmr : AVX512PI<0x29, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src), - "vmovapd\t{$src, $dst|$dst, $src}", - [(alignedstore512 (v8f64 VR512:$src), addr:$dst)], - SSEPackedDouble>, EVEX, EVEX_V512, - OpSize, TB, VEX_W, EVEX_CD8<64, CD8VF>; -def VMOVUPSZmr : AVX512PI<0x11, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src), - "vmovups\t{$src, $dst|$dst, $src}", - [(store (v16f32 VR512:$src), addr:$dst)], - SSEPackedSingle>, EVEX, EVEX_V512, TB, - EVEX_CD8<32, CD8VF>; -def VMOVUPDZmr : AVX512PI<0x11, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src), - "vmovupd\t{$src, $dst|$dst, $src}", - [(store (v8f64 VR512:$src), addr:$dst)], - SSEPackedDouble>, EVEX, EVEX_V512, - OpSize, TB, VEX_W, EVEX_CD8<64, CD8VF>; - -// Use vmovaps/vmovups for AVX-512 integer load/store. -// 512-bit load/store -def : Pat<(alignedloadv8i64 addr:$src), - (VMOVAPSZrm addr:$src)>; -def : Pat<(loadv8i64 addr:$src), - (VMOVUPSZrm addr:$src)>; - -def : Pat<(alignedstore512 (v8i64 VR512:$src), addr:$dst), - (VMOVAPSZmr addr:$dst, VR512:$src)>; -def : Pat<(alignedstore512 (v16i32 VR512:$src), addr:$dst), - (VMOVAPSZmr addr:$dst, VR512:$src)>; - -def : Pat<(store (v8i64 VR512:$src), addr:$dst), - (VMOVUPDZmr addr:$dst, VR512:$src)>; -def : Pat<(store (v16i32 VR512:$src), addr:$dst), - (VMOVUPSZmr addr:$dst, VR512:$src)>; - -let neverHasSideEffects = 1 in { - def VMOVDQA32rr : AVX512BI<0x6F, MRMSrcReg, (outs VR512:$dst), - (ins VR512:$src), - "vmovdqa32\t{$src, $dst|$dst, $src}", []>, - EVEX, EVEX_V512; - def VMOVDQA64rr : AVX512BI<0x6F, MRMSrcReg, (outs VR512:$dst), - (ins VR512:$src), - "vmovdqa64\t{$src, $dst|$dst, $src}", []>, - EVEX, EVEX_V512, VEX_W; -let mayStore = 1 in { - def VMOVDQA32mr : AVX512BI<0x7F, MRMDestMem, (outs), - (ins i512mem:$dst, VR512:$src), - "vmovdqa32\t{$src, $dst|$dst, $src}", []>, - EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; - def VMOVDQA64mr : AVX512BI<0x7F, MRMDestMem, (outs), - (ins i512mem:$dst, VR512:$src), - "vmovdqa64\t{$src, $dst|$dst, $src}", []>, - EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -} -let mayLoad = 1 in { -def VMOVDQA32rm : AVX512BI<0x6F, MRMSrcMem, (outs VR512:$dst), - (ins i512mem:$src), - "vmovdqa32\t{$src, $dst|$dst, $src}", []>, - EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; -def VMOVDQA64rm : AVX512BI<0x6F, MRMSrcMem, (outs VR512:$dst), - (ins i512mem:$src), - "vmovdqa64\t{$src, $dst|$dst, $src}", []>, - EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -} -} - -multiclass avx512_mov_int opc, string asm, RegisterClass RC, - RegisterClass KRC, - PatFrag ld_frag, X86MemOperand x86memop> { -let neverHasSideEffects = 1 in - def rr : AVX512XSI, - EVEX; -let canFoldAsLoad = 1 in - def rm : AVX512XSI, - EVEX; -let Constraints = "$src1 = $dst" in { - def rrk : AVX512XSI, - EVEX, EVEX_K; - def rmk : AVX512XSI, EVEX, EVEX_K; -} -} - -defm VMOVDQU32 : avx512_mov_int<0x6F, "vmovdqu32", VR512, VK16WM, memopv16i32, i512mem>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VMOVDQU64 : avx512_mov_int<0x6F, "vmovdqu64", VR512, VK8WM, memopv8i64, i512mem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -let AddedComplexity = 20 in { -def : Pat<(v16f32 (vselect VK16WM:$mask, (v16f32 VR512:$src1), - (v16f32 VR512:$src2))), - (VMOVUPSZrrk VR512:$src2, VK16WM:$mask, VR512:$src1)>; -def : Pat<(v8f64 (vselect VK8WM:$mask, (v8f64 VR512:$src1), - (v8f64 VR512:$src2))), - (VMOVUPDZrrk VR512:$src2, VK8WM:$mask, VR512:$src1)>; -def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src1), - (v16i32 VR512:$src2))), - (VMOVDQU32rrk VR512:$src2, VK16WM:$mask, VR512:$src1)>; -def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src1), - (v8i64 VR512:$src2))), - (VMOVDQU64rrk VR512:$src2, VK8WM:$mask, VR512:$src1)>; -} -// Move Int Doubleword to Packed Double Int -// -def VMOVDI2PDIZrr : AVX512SI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", - [(set VR128X:$dst, - (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, - EVEX, VEX_LIG; -def VMOVDI2PDIZrm : AVX512SI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", - [(set VR128X:$dst, - (v4i32 (scalar_to_vector (loadi32 addr:$src))))], - IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; -def VMOV64toPQIZrr : AVX512SI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", - [(set VR128X:$dst, - (v2i64 (scalar_to_vector GR64:$src)))], - IIC_SSE_MOVDQ>, EVEX, VEX_W, VEX_LIG; -def VMOV64toSDZrr : AVX512SI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (bitconvert GR64:$src))], - IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>; -def VMOVSDto64Zrr : AVX512SI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (bitconvert FR64:$src))], - IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>; -def VMOVSDto64Zmr : AVX512SI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", - [(store (i64 (bitconvert FR64:$src)), addr:$dst)], - IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>, - EVEX_CD8<64, CD8VT1>; - -// Move Int Doubleword to Single Scalar -// -def VMOVDI2SSZrr : AVX512SI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", - [(set FR32X:$dst, (bitconvert GR32:$src))], - IIC_SSE_MOVDQ>, EVEX, VEX_LIG; - -def VMOVDI2SSZrm : AVX512SI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", - [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))], - IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; - -// Move Packed Doubleword Int to Packed Double Int -// -def VMOVPDI2DIZrr : AVX512SI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (vector_extract (v4i32 VR128X:$src), - (iPTR 0)))], IIC_SSE_MOVD_ToGP>, - EVEX, VEX_LIG; -def VMOVPDI2DIZmr : AVX512SI<0x7E, MRMDestMem, (outs), - (ins i32mem:$dst, VR128X:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", - [(store (i32 (vector_extract (v4i32 VR128X:$src), - (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, - EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; - -// Move Packed Doubleword Int first element to Doubleword Int -// -def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), - (iPTR 0)))], - IIC_SSE_MOVD_ToGP>, TB, OpSize, EVEX, VEX_LIG, VEX_W, - Requires<[HasAVX512, In64BitMode]>; - -def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), - (ins i64mem:$dst, VR128X:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", - [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), - addr:$dst)], IIC_SSE_MOVDQ>, - EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>, - Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>; - -// Move Scalar Single to Double Int -// -def VMOVSS2DIZrr : AVX512SI<0x7E, MRMDestReg, (outs GR32:$dst), - (ins FR32X:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (bitconvert FR32X:$src))], - IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG; -def VMOVSS2DIZmr : AVX512SI<0x7E, MRMDestMem, (outs), - (ins i32mem:$dst, FR32X:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", - [(store (i32 (bitconvert FR32X:$src)), addr:$dst)], - IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; - -// Move Quadword Int to Packed Quadword Int -// -def VMOVQI2PQIZrm : AVX512SI<0x7E, MRMSrcMem, (outs VR128X:$dst), - (ins i64mem:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", - [(set VR128X:$dst, - (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, - EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; - -//===----------------------------------------------------------------------===// -// AVX-512 MOVSS, MOVSD -//===----------------------------------------------------------------------===// - -multiclass avx512_move_scalar { - def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2), - !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128X:$dst, (vt (OpNode VR128X:$src1, - (scalar_to_vector RC:$src2))))], - IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG; - def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>, - EVEX, VEX_LIG; - def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, - EVEX, VEX_LIG; -} - -let ExeDomain = SSEPackedSingle in -defm VMOVSSZ : avx512_move_scalar<"movss{z}", FR32X, X86Movss, v4f32, f32mem, - loadf32>, XS, EVEX_CD8<32, CD8VT1>; - -let ExeDomain = SSEPackedDouble in -defm VMOVSDZ : avx512_move_scalar<"movsd{z}", FR64X, X86Movsd, v2f64, f64mem, - loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>; - - -// For the disassembler -let isCodeGenOnly = 1 in { - def VMOVSSZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst), - (ins VR128X:$src1, FR32X:$src2), - "movss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], - IIC_SSE_MOV_S_RR>, - XS, EVEX_4V, VEX_LIG; - def VMOVSDZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst), - (ins VR128X:$src1, FR64X:$src2), - "movsd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], - IIC_SSE_MOV_S_RR>, - XD, EVEX_4V, VEX_LIG, VEX_W; -} - -let Predicates = [HasAVX512] in { - let AddedComplexity = 15 in { - // Move scalar to XMM zero-extended, zeroing a VR128X then do a - // MOVS{S,D} to the lower bits. - def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32X:$src)))), - (VMOVSSZrr (v4f32 (V_SET0)), FR32X:$src)>; - def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))), - (VMOVSSZrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>; - def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))), - (VMOVSSZrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>; - def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))), - (VMOVSDZrr (v2f64 (V_SET0)), FR64X:$src)>; - - // Move low f32 and clear high bits. - def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))), - (SUBREG_TO_REG (i32 0), - (VMOVSSZrr (v4f32 (V_SET0)), - (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>; - def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))), - (SUBREG_TO_REG (i32 0), - (VMOVSSZrr (v4i32 (V_SET0)), - (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>; - } - - let AddedComplexity = 20 in { - // MOVSSrm zeros the high parts of the register; represent this - // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 - def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), - (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>; - def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), - (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>; - def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), - (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>; - - // MOVSDrm zeros the high parts of the register; represent this - // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 - def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), - (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; - def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), - (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; - def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), - (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; - def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), - (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; - def : Pat<(v2f64 (X86vzload addr:$src)), - (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; - - // Represent the same patterns above but in the form they appear for - // 256-bit types - def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, - (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))), - (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; - def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, - (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))), - (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; - def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, - (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))), - (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; - } - def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, - (v4f32 (scalar_to_vector FR32X:$src)), (iPTR 0)))), - (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSZrr (v4f32 (V_SET0)), - FR32X:$src)), sub_xmm)>; - def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, - (v2f64 (scalar_to_vector FR64X:$src)), (iPTR 0)))), - (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDZrr (v2f64 (V_SET0)), - FR64X:$src)), sub_xmm)>; - def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, - (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))), - (SUBREG_TO_REG (i64 0), (VMOVSDZrm addr:$src), sub_xmm)>; - - // Move low f64 and clear high bits. - def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))), - (SUBREG_TO_REG (i32 0), - (VMOVSDZrr (v2f64 (V_SET0)), - (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>; - - def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))), - (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (V_SET0)), - (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>; - - // Extract and store. - def : Pat<(store (f32 (vector_extract (v4f32 VR128X:$src), (iPTR 0))), - addr:$dst), - (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>; - def : Pat<(store (f64 (vector_extract (v2f64 VR128X:$src), (iPTR 0))), - addr:$dst), - (VMOVSDZmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X))>; - - // Shuffle with VMOVSS - def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)), - (VMOVSSZrr (v4i32 VR128X:$src1), - (COPY_TO_REGCLASS (v4i32 VR128X:$src2), FR32X))>; - def : Pat<(v4f32 (X86Movss VR128X:$src1, VR128X:$src2)), - (VMOVSSZrr (v4f32 VR128X:$src1), - (COPY_TO_REGCLASS (v4f32 VR128X:$src2), FR32X))>; - - // 256-bit variants - def : Pat<(v8i32 (X86Movss VR256X:$src1, VR256X:$src2)), - (SUBREG_TO_REG (i32 0), - (VMOVSSZrr (EXTRACT_SUBREG (v8i32 VR256X:$src1), sub_xmm), - (EXTRACT_SUBREG (v8i32 VR256X:$src2), sub_xmm)), - sub_xmm)>; - def : Pat<(v8f32 (X86Movss VR256X:$src1, VR256X:$src2)), - (SUBREG_TO_REG (i32 0), - (VMOVSSZrr (EXTRACT_SUBREG (v8f32 VR256X:$src1), sub_xmm), - (EXTRACT_SUBREG (v8f32 VR256X:$src2), sub_xmm)), - sub_xmm)>; - - // Shuffle with VMOVSD - def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)), - (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; - def : Pat<(v2f64 (X86Movsd VR128X:$src1, VR128X:$src2)), - (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; - def : Pat<(v4f32 (X86Movsd VR128X:$src1, VR128X:$src2)), - (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; - def : Pat<(v4i32 (X86Movsd VR128X:$src1, VR128X:$src2)), - (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; - - // 256-bit variants - def : Pat<(v4i64 (X86Movsd VR256X:$src1, VR256X:$src2)), - (SUBREG_TO_REG (i32 0), - (VMOVSDZrr (EXTRACT_SUBREG (v4i64 VR256X:$src1), sub_xmm), - (EXTRACT_SUBREG (v4i64 VR256X:$src2), sub_xmm)), - sub_xmm)>; - def : Pat<(v4f64 (X86Movsd VR256X:$src1, VR256X:$src2)), - (SUBREG_TO_REG (i32 0), - (VMOVSDZrr (EXTRACT_SUBREG (v4f64 VR256X:$src1), sub_xmm), - (EXTRACT_SUBREG (v4f64 VR256X:$src2), sub_xmm)), - sub_xmm)>; - - def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)), - (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; - def : Pat<(v2i64 (X86Movlpd VR128X:$src1, VR128X:$src2)), - (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; - def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)), - (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; - def : Pat<(v4i32 (X86Movlps VR128X:$src1, VR128X:$src2)), - (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; -} - -let AddedComplexity = 15 in -def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), - (ins VR128X:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", - [(set VR128X:$dst, (v2i64 (X86vzmovl - (v2i64 VR128X:$src))))], - IIC_SSE_MOVQ_RR>, EVEX, VEX_W; - -let AddedComplexity = 20 in -def VMOVZPQILo2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), - (ins i128mem:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", - [(set VR128X:$dst, (v2i64 (X86vzmovl - (loadv2i64 addr:$src))))], - IIC_SSE_MOVDQ>, EVEX, VEX_W, - EVEX_CD8<8, CD8VT8>; - -let Predicates = [HasAVX512] in { - // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. - let AddedComplexity = 20 in { - def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), - (VMOVDI2PDIZrm addr:$src)>; - - def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), - (VMOVDI2PDIZrm addr:$src)>; - def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), - (VMOVDI2PDIZrm addr:$src)>; - def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), - (VMOVZPQILo2PQIZrm addr:$src)>; - def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))), - (VMOVZPQILo2PQIZrr VR128X:$src)>; - } - // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext. - def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, - (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))), - (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>; - def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, - (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))), - (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>; -} - -def : Pat<(v16i32 (X86Vinsert (v16i32 immAllZerosV), GR32:$src2, (iPTR 0))), - (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>; - -def : Pat<(v8i64 (X86Vinsert (bc_v8i64 (v16i32 immAllZerosV)), GR64:$src2, (iPTR 0))), - (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>; - -def : Pat<(v16i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))), - (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>; - -def : Pat<(v8i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))), - (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>; - -//===----------------------------------------------------------------------===// -// AVX-512 - Integer arithmetic -// -multiclass avx512_binop_rm opc, string OpcodeStr, SDNode OpNode, - ValueType OpVT, RegisterClass RC, PatFrag memop_frag, - X86MemOperand x86memop, PatFrag scalar_mfrag, - X86MemOperand x86scalar_mop, string BrdcstStr, - OpndItins itins, bit IsCommutable = 0> { - let isCommutable = IsCommutable in - def rr : AVX512BI, EVEX_4V; - def rm : AVX512BI, EVEX_4V; - def rmb : AVX512BI, EVEX_4V, EVEX_B; -} -multiclass avx512_binop_rm2 opc, string OpcodeStr, - ValueType DstVT, ValueType SrcVT, RegisterClass RC, - PatFrag memop_frag, X86MemOperand x86memop, - OpndItins itins, - bit IsCommutable = 0> { - let isCommutable = IsCommutable in - def rr : AVX512BI, EVEX_4V, VEX_W; - def rm : AVX512BI, EVEX_4V, VEX_W; -} - -defm VPADDDZ : avx512_binop_rm<0xFE, "vpaddd", add, v16i32, VR512, memopv16i32, - i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>, - EVEX_V512, EVEX_CD8<32, CD8VF>; - -defm VPSUBDZ : avx512_binop_rm<0xFA, "vpsubd", sub, v16i32, VR512, memopv16i32, - i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 0>, - EVEX_V512, EVEX_CD8<32, CD8VF>; - -defm VPMULLDZ : avx512_binop_rm<0x40, "vpmulld", mul, v16i32, VR512, memopv16i32, - i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>, - T8, EVEX_V512, EVEX_CD8<32, CD8VF>; - -defm VPADDQZ : avx512_binop_rm<0xD4, "vpaddq", add, v8i64, VR512, memopv8i64, - i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 1>, - EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_W; - -defm VPSUBQZ : avx512_binop_rm<0xFB, "vpsubq", sub, v8i64, VR512, memopv8i64, - i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, - VR512, memopv8i64, i512mem, SSE_INTALU_ITINS_P, 1>, T8, - EVEX_V512, EVEX_CD8<64, CD8VF>; - -defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32, - VR512, memopv8i64, i512mem, SSE_INTMUL_ITINS_P, 1>, EVEX_V512, - EVEX_CD8<64, CD8VF>; - -def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))), - (VPMULUDQZrr VR512:$src1, VR512:$src2)>; - -//===----------------------------------------------------------------------===// -// AVX-512 - Unpack Instructions -//===----------------------------------------------------------------------===// - -multiclass avx512_unpack_fp opc, SDNode OpNode, ValueType vt, - PatFrag mem_frag, RegisterClass RC, - X86MemOperand x86memop, string asm, - Domain d> { - def rr : AVX512PI, EVEX_4V, TB; - def rm : AVX512PI, EVEX_4V, TB; -} - -defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, memopv8f64, - VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, memopv8f64, - VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, OpSize, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, memopv8f64, - VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, memopv8f64, - VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, OpSize, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -multiclass avx512_unpack_int opc, string OpcodeStr, SDNode OpNode, - ValueType OpVT, RegisterClass RC, PatFrag memop_frag, - X86MemOperand x86memop> { - def rr : AVX512BI, EVEX_4V; - def rm : AVX512BI, EVEX_4V; -} -defm VPUNPCKLDQZ : avx512_unpack_int<0x62, "vpunpckldq", X86Unpckl, v16i32, - VR512, memopv16i32, i512mem>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPUNPCKLQDQZ : avx512_unpack_int<0x6C, "vpunpcklqdq", X86Unpckl, v8i64, - VR512, memopv8i64, i512mem>, EVEX_V512, - VEX_W, EVEX_CD8<64, CD8VF>; -defm VPUNPCKHDQZ : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32, - VR512, memopv16i32, i512mem>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64, - VR512, memopv8i64, i512mem>, EVEX_V512, - VEX_W, EVEX_CD8<64, CD8VF>; -//===----------------------------------------------------------------------===// -// AVX-512 - PSHUFD -// - -multiclass avx512_pshuf_imm opc, string OpcodeStr, RegisterClass RC, - SDNode OpNode, PatFrag mem_frag, - X86MemOperand x86memop, ValueType OpVT> { - def ri : AVX512Ii8, - EVEX; - def mi : AVX512Ii8, EVEX; -} - -defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, memopv16i32, - i512mem, v16i32>, OpSize, EVEX_V512, EVEX_CD8<32, CD8VF>; - -let ExeDomain = SSEPackedSingle in -defm VPERMILPSZ : avx512_pshuf_imm<0x04, "vpermilps", VR512, X86VPermilp, - memopv16f32, i512mem, v16f32>, OpSize, TA, EVEX_V512, - EVEX_CD8<32, CD8VF>; -let ExeDomain = SSEPackedDouble in -defm VPERMILPDZ : avx512_pshuf_imm<0x05, "vpermilpd", VR512, X86VPermilp, - memopv8f64, i512mem, v8f64>, OpSize, TA, EVEX_V512, - VEX_W, EVEX_CD8<32, CD8VF>; - -def : Pat<(v16i32 (X86VPermilp VR512:$src1, (i8 imm:$imm))), - (VPERMILPSZri VR512:$src1, imm:$imm)>; -def : Pat<(v8i64 (X86VPermilp VR512:$src1, (i8 imm:$imm))), - (VPERMILPDZri VR512:$src1, imm:$imm)>; - -//===----------------------------------------------------------------------===// -// AVX-512 Logical Instructions -//===----------------------------------------------------------------------===// - -defm VPANDDZ : avx512_binop_rm<0xDB, "vpandd", and, v16i32, VR512, memopv16i32, - i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPANDQZ : avx512_binop_rm<0xDB, "vpandq", and, v8i64, VR512, memopv8i64, - i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPORDZ : avx512_binop_rm<0xEB, "vpord", or, v16i32, VR512, memopv16i32, - i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPORQZ : avx512_binop_rm<0xEB, "vporq", or, v8i64, VR512, memopv8i64, - i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPXORDZ : avx512_binop_rm<0xEF, "vpxord", xor, v16i32, VR512, memopv16i32, - i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPXORQZ : avx512_binop_rm<0xEF, "vpxorq", xor, v8i64, VR512, memopv8i64, - i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPANDNDZ : avx512_binop_rm<0xDF, "vpandnd", X86andnp, v16i32, VR512, - memopv16i32, i512mem, loadi32, i32mem, "{1to16}", - SSE_BIT_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPANDNQZ : avx512_binop_rm<0xDF, "vpandnq", X86andnp, v8i64, VR512, memopv8i64, - i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 0>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -//===----------------------------------------------------------------------===// -// AVX-512 FP arithmetic -//===----------------------------------------------------------------------===// - -multiclass avx512_binop_s opc, string OpcodeStr, SDNode OpNode, - SizeItins itins> { - defm SSZ : sse12_fp_scalar, XS, EVEX_4V, VEX_LIG, - EVEX_CD8<32, CD8VT1>; - defm SDZ : sse12_fp_scalar, XD, VEX_W, EVEX_4V, VEX_LIG, - EVEX_CD8<64, CD8VT1>; -} - -let isCommutable = 1 in { -defm VADD : avx512_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>; -defm VMUL : avx512_binop_s<0x59, "mul", fmul, SSE_ALU_ITINS_S>; -defm VMIN : avx512_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>; -defm VMAX : avx512_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>; -} -let isCommutable = 0 in { -defm VSUB : avx512_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>; -defm VDIV : avx512_binop_s<0x5E, "div", fdiv, SSE_ALU_ITINS_S>; -} - -multiclass avx512_fp_packed opc, string OpcodeStr, SDNode OpNode, - RegisterClass RC, ValueType vt, - X86MemOperand x86memop, PatFrag mem_frag, - X86MemOperand x86scalar_mop, PatFrag scalar_mfrag, - string BrdcstStr, - Domain d, OpndItins itins, bit commutable> { - let isCommutable = commutable in - def rr : PI, - EVEX_4V; - let mayLoad = 1 in { - def rm : PI, EVEX_4V; - def rmb : PI, EVEX_4V, EVEX_B; - } -} - -defm VADDPSZ : avx512_fp_packed<0x58, "addps", fadd, VR512, v16f32, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, - SSE_ALU_ITINS_P.s, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>; - -defm VADDPDZ : avx512_fp_packed<0x58, "addpd", fadd, VR512, v8f64, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, - SSE_ALU_ITINS_P.d, 1>, - EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; - -defm VMULPSZ : avx512_fp_packed<0x59, "mulps", fmul, VR512, v16f32, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, - SSE_ALU_ITINS_P.s, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VMULPDZ : avx512_fp_packed<0x59, "mulpd", fmul, VR512, v8f64, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, - SSE_ALU_ITINS_P.d, 1>, - EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; - -defm VMINPSZ : avx512_fp_packed<0x5D, "minps", X86fmin, VR512, v16f32, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, - SSE_ALU_ITINS_P.s, 1>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VMAXPSZ : avx512_fp_packed<0x5F, "maxps", X86fmax, VR512, v16f32, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, - SSE_ALU_ITINS_P.s, 1>, - EVEX_V512, EVEX_CD8<32, CD8VF>; - -defm VMINPDZ : avx512_fp_packed<0x5D, "minpd", X86fmin, VR512, v8f64, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, - SSE_ALU_ITINS_P.d, 1>, - EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; -defm VMAXPDZ : avx512_fp_packed<0x5F, "maxpd", X86fmax, VR512, v8f64, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, - SSE_ALU_ITINS_P.d, 1>, - EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; - -defm VSUBPSZ : avx512_fp_packed<0x5C, "subps", fsub, VR512, v16f32, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, - SSE_ALU_ITINS_P.s, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VDIVPSZ : avx512_fp_packed<0x5E, "divps", fdiv, VR512, v16f32, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, - SSE_ALU_ITINS_P.s, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>; - -defm VSUBPDZ : avx512_fp_packed<0x5C, "subpd", fsub, VR512, v8f64, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, - SSE_ALU_ITINS_P.d, 0>, - EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; -defm VDIVPDZ : avx512_fp_packed<0x5E, "divpd", fdiv, VR512, v8f64, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, - SSE_ALU_ITINS_P.d, 0>, - EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; - -//===----------------------------------------------------------------------===// -// AVX-512 VPTESTM instructions -//===----------------------------------------------------------------------===// - -multiclass avx512_vptest opc, string OpcodeStr, RegisterClass KRC, - RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag, - SDNode OpNode, ValueType vt> { - def rr : AVX5128I, EVEX_4V; - def rm : AVX5128I, EVEX_4V; -} - -defm VPTESTMDZ : avx512_vptest<0x27, "vptestmd", VK16, VR512, f512mem, - memopv16i32, X86testm, v16i32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem, - memopv8i64, X86testm, v8i64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - -//===----------------------------------------------------------------------===// -// AVX-512 Shift instructions -//===----------------------------------------------------------------------===// -multiclass avx512_shift_rmi opc, Format ImmFormR, Format ImmFormM, - string OpcodeStr, SDNode OpNode, RegisterClass RC, - ValueType vt, X86MemOperand x86memop, PatFrag mem_frag, - RegisterClass KRC> { - def ri : AVX512BIi8, EVEX_4V; - def rik : AVX512BIi8, EVEX_4V, EVEX_K; - def mi: AVX512BIi8, EVEX_4V; - def mik: AVX512BIi8, EVEX_4V, EVEX_K; -} - -multiclass avx512_shift_rrm opc, string OpcodeStr, SDNode OpNode, - RegisterClass RC, ValueType vt, ValueType SrcVT, - PatFrag bc_frag, RegisterClass KRC> { - // src2 is always 128-bit - def rr : AVX512BI, EVEX_4V; - def rrk : AVX512BI, EVEX_4V, EVEX_K; - def rm : AVX512BI, EVEX_4V; - def rmk : AVX512BI, EVEX_4V, EVEX_K; -} - -defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli, - VR512, v16i32, i512mem, memopv16i32, VK16WM>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl, - VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512, - EVEX_CD8<32, CD8VQ>; - -defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli, - VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512, - EVEX_CD8<64, CD8VF>, VEX_W; -defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl, - VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512, - EVEX_CD8<64, CD8VQ>, VEX_W; - -defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli, - VR512, v16i32, i512mem, memopv16i32, VK16WM>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl, - VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512, - EVEX_CD8<32, CD8VQ>; - -defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli, - VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512, - EVEX_CD8<64, CD8VF>, VEX_W; -defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl, - VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512, - EVEX_CD8<64, CD8VQ>, VEX_W; - -defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai, - VR512, v16i32, i512mem, memopv16i32, VK16WM>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra, - VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512, - EVEX_CD8<32, CD8VQ>; - -defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai, - VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512, - EVEX_CD8<64, CD8VF>, VEX_W; -defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra, - VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512, - EVEX_CD8<64, CD8VQ>, VEX_W; - -//===-------------------------------------------------------------------===// -// Variable Bit Shifts -//===-------------------------------------------------------------------===// -multiclass avx512_var_shift opc, string OpcodeStr, SDNode OpNode, - RegisterClass RC, ValueType vt, - X86MemOperand x86memop, PatFrag mem_frag> { - def rr : AVX5128I, - EVEX_4V; - def rm : AVX5128I, - EVEX_4V; -} - -defm VPSLLVDZ : avx512_var_shift<0x47, "vpsllvd", shl, VR512, v16i32, - i512mem, memopv16i32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPSLLVQZ : avx512_var_shift<0x47, "vpsllvq", shl, VR512, v8i64, - i512mem, memopv8i64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; -defm VPSRLVDZ : avx512_var_shift<0x45, "vpsrlvd", srl, VR512, v16i32, - i512mem, memopv16i32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPSRLVQZ : avx512_var_shift<0x45, "vpsrlvq", srl, VR512, v8i64, - i512mem, memopv8i64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; -defm VPSRAVDZ : avx512_var_shift<0x46, "vpsravd", sra, VR512, v16i32, - i512mem, memopv16i32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64, - i512mem, memopv8i64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - -//===----------------------------------------------------------------------===// -// AVX-512 - MOVDDUP -//===----------------------------------------------------------------------===// - -multiclass avx512_movddup { -def rr : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (VT (X86Movddup RC:$src)))]>, EVEX; -def rm : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, - (VT (X86Movddup (memop_frag addr:$src))))]>, EVEX; -} - -defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, memopv8f64>, - VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; -def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))), - (VMOVDDUPZrm addr:$src)>; - -def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst), - (ins VR128X:$src1, VR128X:$src2), - "vmovlhps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))], - IIC_SSE_MOV_LH>, EVEX_4V; -def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst), - (ins VR128X:$src1, VR128X:$src2), - "vmovhlps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))], - IIC_SSE_MOV_LH>, EVEX_4V; - -// MOVLHPS patterns -def : Pat<(v4i32 (X86Movlhps VR128X:$src1, VR128X:$src2)), - (VMOVLHPSZrr VR128X:$src1, VR128X:$src2)>; -def : Pat<(v2i64 (X86Movlhps VR128X:$src1, VR128X:$src2)), - (VMOVLHPSZrr (v2i64 VR128X:$src1), VR128X:$src2)>; - -// MOVHLPS patterns -def : Pat<(v4i32 (X86Movhlps VR128X:$src1, VR128X:$src2)), - (VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>; - -//===----------------------------------------------------------------------===// -// FMA - Fused Multiply Operations -// -let Constraints = "$src1 = $dst" in { -multiclass avx512_fma3p_rm opc, string OpcodeStr, - RegisterClass RC, X86MemOperand x86memop, - PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag, - string BrdcstStr, SDNode OpNode, ValueType OpVT> { - def r: AVX512FMA3; - - let mayLoad = 1 in - def m: AVX512FMA3; - def mb: AVX512FMA3, EVEX_B; -} -} // Constraints = "$src1 = $dst" - -let ExeDomain = SSEPackedSingle in { - defm VFMADD213PSZ : avx512_fma3p_rm<0xA8, "vfmadd213ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fmadd, v16f32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; - defm VFMSUB213PSZ : avx512_fma3p_rm<0xAA, "vfmsub213ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fmsub, v16f32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; - defm VFMADDSUB213PSZ : avx512_fma3p_rm<0xA6, "vfmaddsub213ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fmaddsub, v16f32>, - EVEX_V512, EVEX_CD8<32, CD8VF>; - defm VFMSUBADD213PSZ : avx512_fma3p_rm<0xA7, "vfmsubadd213ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fmsubadd, v16f32>, - EVEX_V512, EVEX_CD8<32, CD8VF>; - defm VFNMADD213PSZ : avx512_fma3p_rm<0xAC, "vfnmadd213ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fnmadd, v16f32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; - defm VFNMSUB213PSZ : avx512_fma3p_rm<0xAE, "vfnmsub213ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fnmsub, v16f32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -} -let ExeDomain = SSEPackedDouble in { - defm VFMADD213PDZ : avx512_fma3p_rm<0xA8, "vfmadd213pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fmadd, v8f64>, EVEX_V512, - VEX_W, EVEX_CD8<64, CD8VF>; - defm VFMSUB213PDZ : avx512_fma3p_rm<0xAA, "vfmsub213pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fmsub, v8f64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - defm VFMADDSUB213PDZ : avx512_fma3p_rm<0xA6, "vfmaddsub213pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fmaddsub, v8f64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - defm VFMSUBADD213PDZ : avx512_fma3p_rm<0xA7, "vfmsubadd213pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fmsubadd, v8f64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - defm VFNMADD213PDZ : avx512_fma3p_rm<0xAC, "vfnmadd213pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fnmadd, v8f64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - defm VFNMSUB213PDZ : avx512_fma3p_rm<0xAE, "vfnmsub213pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fnmsub, v8f64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; -} - -let Constraints = "$src1 = $dst" in { -multiclass avx512_fma3p_m132 opc, string OpcodeStr, - RegisterClass RC, X86MemOperand x86memop, - PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag, - string BrdcstStr, SDNode OpNode, ValueType OpVT> { - let mayLoad = 1 in - def m: AVX512FMA3; - def mb: AVX512FMA3, EVEX_B; -} -} // Constraints = "$src1 = $dst" - - -let ExeDomain = SSEPackedSingle in { - defm VFMADD132PSZ : avx512_fma3p_m132<0x98, "vfmadd132ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fmadd, v16f32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; - defm VFMSUB132PSZ : avx512_fma3p_m132<0x9A, "vfmsub132ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fmsub, v16f32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; - defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fmaddsub, v16f32>, - EVEX_V512, EVEX_CD8<32, CD8VF>; - defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fmsubadd, v16f32>, - EVEX_V512, EVEX_CD8<32, CD8VF>; - defm VFNMADD132PSZ : avx512_fma3p_m132<0x9C, "vfnmadd132ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fnmadd, v16f32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; - defm VFNMSUB132PSZ : avx512_fma3p_m132<0x9E, "vfnmsub132ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fnmsub, v16f32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -} -let ExeDomain = SSEPackedDouble in { - defm VFMADD132PDZ : avx512_fma3p_m132<0x98, "vfmadd132pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fmadd, v8f64>, EVEX_V512, - VEX_W, EVEX_CD8<64, CD8VF>; - defm VFMSUB132PDZ : avx512_fma3p_m132<0x9A, "vfmsub132pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fmsub, v8f64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fmaddsub, v8f64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fmsubadd, v8f64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fnmadd, v8f64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fnmsub, v8f64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; -} - -// Scalar FMA -let Constraints = "$src1 = $dst" in { -multiclass avx512_fma3s_rm opc, string OpcodeStr, SDNode OpNode, - RegisterClass RC, ValueType OpVT, - X86MemOperand x86memop, Operand memop, - PatFrag mem_frag> { - let isCommutable = 1 in - def r : AVX512FMA3; - let mayLoad = 1 in - def m : AVX512FMA3; -} - -} // Constraints = "$src1 = $dst" - -defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss{z}", X86Fmadd, FR32X, - f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; -defm VFMADDSDZ : avx512_fma3s_rm<0xA9, "vfmadd213sd{z}", X86Fmadd, FR64X, - f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VFMSUBSSZ : avx512_fma3s_rm<0xAB, "vfmsub213ss{z}", X86Fmsub, FR32X, - f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; -defm VFMSUBSDZ : avx512_fma3s_rm<0xAB, "vfmsub213sd{z}", X86Fmsub, FR64X, - f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VFNMADDSSZ : avx512_fma3s_rm<0xAD, "vfnmadd213ss{z}", X86Fnmadd, FR32X, - f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; -defm VFNMADDSDZ : avx512_fma3s_rm<0xAD, "vfnmadd213sd{z}", X86Fnmadd, FR64X, - f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VFNMSUBSSZ : avx512_fma3s_rm<0xAF, "vfnmsub213ss{z}", X86Fnmsub, FR32X, - f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; -defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd{z}", X86Fnmsub, FR64X, - f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; - -//===----------------------------------------------------------------------===// -// AVX-512 Scalar convert from sign integer to float/double -//===----------------------------------------------------------------------===// - -multiclass avx512_vcvtsi opc, RegisterClass SrcRC, RegisterClass DstRC, - X86MemOperand x86memop, string asm> { -let neverHasSideEffects = 1 in { - def rr : SI, EVEX_4V; - let mayLoad = 1 in - def rm : SI, EVEX_4V; -} // neverHasSideEffects = 1 -} - -defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}{z}">, - XS, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTSI2SS64Z : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}{z}">, - XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; -defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}{z}">, - XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTSI2SD64Z : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}{z}">, - XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; - -def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), - (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; -def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), - (VCVTSI2SS64Zrm (f32 (IMPLICIT_DEF)), addr:$src)>; -def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))), - (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; -def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))), - (VCVTSI2SD64Zrm (f64 (IMPLICIT_DEF)), addr:$src)>; - -def : Pat<(f32 (sint_to_fp GR32:$src)), - (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; -def : Pat<(f32 (sint_to_fp GR64:$src)), - (VCVTSI2SS64Zrr (f32 (IMPLICIT_DEF)), GR64:$src)>; -def : Pat<(f64 (sint_to_fp GR32:$src)), - (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; -def : Pat<(f64 (sint_to_fp GR64:$src)), - (VCVTSI2SD64Zrr (f64 (IMPLICIT_DEF)), GR64:$src)>; - - -//===----------------------------------------------------------------------===// -// AVX-512 Convert form float to double and back -//===----------------------------------------------------------------------===// -let neverHasSideEffects = 1 in { -def VCVTSS2SDZrr : AVX512XSI<0x5A, MRMSrcReg, (outs FR64X:$dst), - (ins FR32X:$src1, FR32X:$src2), - "vcvtss2sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>; -let mayLoad = 1 in -def VCVTSS2SDZrm : AVX512XSI<0x5A, MRMSrcMem, (outs FR64X:$dst), - (ins FR32X:$src1, f32mem:$src2), - "vcvtss2sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>, - EVEX_CD8<32, CD8VT1>; - -// Convert scalar double to scalar single -def VCVTSD2SSZrr : AVX512XDI<0x5A, MRMSrcReg, (outs FR32X:$dst), - (ins FR64X:$src1, FR64X:$src2), - "vcvtsd2ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2F]>; -let mayLoad = 1 in -def VCVTSD2SSZrm : AVX512XDI<0x5A, MRMSrcMem, (outs FR32X:$dst), - (ins FR64X:$src1, f64mem:$src2), - "vcvtsd2ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX_4V, VEX_LIG, VEX_W, - Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<64, CD8VT1>; -} - -def : Pat<(f64 (fextend FR32X:$src)), (VCVTSS2SDZrr FR32X:$src, FR32X:$src)>, - Requires<[HasAVX512]>; -def : Pat<(fextend (loadf32 addr:$src)), - (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512]>; - -def : Pat<(extloadf32 addr:$src), - (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, - Requires<[HasAVX512, OptForSize]>; - -def : Pat<(extloadf32 addr:$src), - (VCVTSS2SDZrr (f32 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>, - Requires<[HasAVX512, OptForSpeed]>; - -def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>, - Requires<[HasAVX512]>; - -multiclass avx512_vcvt_fp opc, string asm, RegisterClass SrcRC, - RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag, - X86MemOperand x86memop, ValueType OpVT, ValueType InVT, - Domain d> { -let neverHasSideEffects = 1 in { - def rr : AVX512PI, EVEX; - let mayLoad = 1 in - def rm : AVX512PI, EVEX; -} // neverHasSideEffects = 1 -} - -defm VCVTPD2PSZ : avx512_vcvt_fp<0x5A, "vcvtpd2ps", VR512, VR256X, fround, - memopv8f64, f512mem, v8f32, v8f64, - SSEPackedSingle>, EVEX_V512, VEX_W, OpSize, - EVEX_CD8<64, CD8VF>; - -defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend, - memopv4f64, f256mem, v8f64, v8f32, - SSEPackedDouble>, EVEX_V512, EVEX_CD8<32, CD8VH>; -def : Pat<(v8f64 (extloadv8f32 addr:$src)), - (VCVTPS2PDZrm addr:$src)>; - -//===----------------------------------------------------------------------===// -// AVX-512 Vector convert from sign integer to float/double -//===----------------------------------------------------------------------===// - -defm VCVTDQ2PSZ : avx512_vcvt_fp<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp, - memopv8i64, i512mem, v16f32, v16i32, - SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>; - -defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp, - memopv4i64, i256mem, v8f64, v8i32, - SSEPackedDouble>, EVEX_V512, XS, - EVEX_CD8<32, CD8VH>; - -defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint, - memopv16f32, f512mem, v16i32, v16f32, - SSEPackedSingle>, EVEX_V512, XS, - EVEX_CD8<32, CD8VF>; - -defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint, - memopv8f64, f512mem, v8i32, v8f64, - SSEPackedDouble>, EVEX_V512, OpSize, VEX_W, - EVEX_CD8<64, CD8VF>; - -defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint, - memopv16f32, f512mem, v16i32, v16f32, - SSEPackedSingle>, EVEX_V512, - EVEX_CD8<32, CD8VF>; - -defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint, - memopv8f64, f512mem, v8i32, v8f64, - SSEPackedDouble>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - -defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp, - memopv4i64, f256mem, v8f64, v8i32, - SSEPackedDouble>, EVEX_V512, XS, - EVEX_CD8<32, CD8VH>; - -defm VCVTUDQ2PSZ : avx512_vcvt_fp<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp, - memopv16i32, f512mem, v16f32, v16i32, - SSEPackedSingle>, EVEX_V512, XD, - EVEX_CD8<32, CD8VF>; - -def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))), - (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr - (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; - - -def : Pat<(int_x86_avx512_cvtdq2_ps_512 VR512:$src), - (VCVTDQ2PSZrr VR512:$src)>; -def : Pat<(int_x86_avx512_cvtdq2_ps_512 (bitconvert (memopv8i64 addr:$src))), - (VCVTDQ2PSZrm addr:$src)>; - -def VCVTPS2DQZrr : AVX512BI<0x5B, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src), - "vcvtps2dq\t{$src, $dst|$dst, $src}", - [(set VR512:$dst, - (int_x86_avx512_cvt_ps2dq_512 VR512:$src))], - IIC_SSE_CVT_PS_RR>, EVEX, EVEX_V512; -def VCVTPS2DQZrm : AVX512BI<0x5B, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src), - "vcvtps2dq\t{$src, $dst|$dst, $src}", - [(set VR512:$dst, - (int_x86_avx512_cvt_ps2dq_512 (memopv16f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; - - -let Predicates = [HasAVX512] in { - def : Pat<(v8f32 (fround (loadv8f64 addr:$src))), - (VCVTPD2PSZrm addr:$src)>; - def : Pat<(v8f64 (extloadv8f32 addr:$src)), - (VCVTPS2PDZrm addr:$src)>; -} - -let Defs = [EFLAGS], Predicates = [HasAVX512] in { - defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32, - "ucomiss{z}">, TB, EVEX, VEX_LIG, - EVEX_CD8<32, CD8VT1>; - defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64, - "ucomisd{z}">, TB, OpSize, EVEX, - VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; - let Pattern = [] in { - defm VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, undef, v4f32, f128mem, load, - "comiss{z}">, TB, EVEX, VEX_LIG, - EVEX_CD8<32, CD8VT1>; - defm VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, undef, v2f64, f128mem, load, - "comisd{z}">, TB, OpSize, EVEX, - VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; - } - defm Int_VUCOMISSZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem, - load, "ucomiss">, TB, EVEX, VEX_LIG, - EVEX_CD8<32, CD8VT1>; - defm Int_VUCOMISDZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem, - load, "ucomisd">, TB, OpSize, EVEX, - VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; - - defm Int_VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem, - load, "comiss">, TB, EVEX, VEX_LIG, - EVEX_CD8<32, CD8VT1>; - defm Int_VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem, - load, "comisd">, TB, OpSize, EVEX, - VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; -} - -/// avx512_unop_p - AVX-512 unops in packed form. -multiclass avx512_fp_unop_p opc, string OpcodeStr, SDNode OpNode> { - def PSZr : AVX5128I, - EVEX, EVEX_V512; - def PSZm : AVX5128I, - EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; - def PDZr : AVX5128I, - EVEX, EVEX_V512, VEX_W; - def PDZm : AVX5128I, - EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -} - -/// avx512_fp_unop_p_int - AVX-512 intrinsics unops in packed forms. -multiclass avx512_fp_unop_p_int opc, string OpcodeStr, - Intrinsic V16F32Int, Intrinsic V8F64Int> { - def PSZr_Int : AVX5128I, - EVEX, EVEX_V512; - def PSZm_Int : AVX5128I, EVEX, - EVEX_V512, EVEX_CD8<32, CD8VF>; - def PDZr_Int : AVX5128I, - EVEX, EVEX_V512, VEX_W; - def PDZm_Int : AVX5128I, - EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -} - -/// avx512_fp_unop_s - AVX-512 unops in scalar form. -multiclass avx512_fp_unop_s opc, string OpcodeStr, - Intrinsic F32Int, Intrinsic F64Int> { - let hasSideEffects = 0 in { - def SSZr : AVX5128I, EVEX_4V; - let mayLoad = 1 in { - def SSZm : AVX5128I, EVEX_4V, EVEX_CD8<32, CD8VT1>; - def SSZm_Int : AVX5128I, - EVEX_4V, EVEX_CD8<32, CD8VT1>; - } - def SDZr : AVX5128I, - EVEX_4V, VEX_W; - let mayLoad = 1 in { - def SDZm : AVX5128I, - EVEX_4V, VEX_W, EVEX_CD8<32, CD8VT1>; - def SDZm_Int : AVX5128I, - EVEX_4V, VEX_W, EVEX_CD8<32, CD8VT1>; - } -} -} - -defm VRCP14 : avx512_fp_unop_s<0x4D, "vrcp14", int_x86_avx512_rcp14_ss, - int_x86_avx512_rcp14_sd>, - avx512_fp_unop_p<0x4C, "vrcp14", X86frcp>, - avx512_fp_unop_p_int<0x4C, "vrcp14", - int_x86_avx512_rcp14_ps_512, int_x86_avx512_rcp14_pd_512>; - -defm VRSQRT14 : avx512_fp_unop_s<0x4F, "vrsqrt14", int_x86_avx512_rsqrt14_ss, - int_x86_avx512_rsqrt14_sd>, - avx512_fp_unop_p<0x4E, "vrsqrt14", X86frsqrt>, - avx512_fp_unop_p_int<0x4E, "vrsqrt14", - int_x86_avx512_rsqrt14_ps_512, int_x86_avx512_rsqrt14_pd_512>; - -multiclass avx512_sqrt_packed opc, string OpcodeStr, SDNode OpNode, - Intrinsic V16F32Int, Intrinsic V8F64Int, - OpndItins itins_s, OpndItins itins_d> { - def PSZrr :AVX512PSI, - EVEX, EVEX_V512; - - let mayLoad = 1 in - def PSZrm : AVX512PSI, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; - - def PDZrr : AVX512PDI, - EVEX, EVEX_V512; - - let mayLoad = 1 in - def PDZrm : AVX512PDI, EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>; - - def PSZr_Int : AVX512PSI, - EVEX, EVEX_V512; - def PSZm_Int : AVX512PSI, EVEX, - EVEX_V512, EVEX_CD8<32, CD8VF>; - def PDZr_Int : AVX512PDI, - EVEX, EVEX_V512, VEX_W; - def PDZm_Int : AVX512PDI, - EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -} - -multiclass avx512_sqrt_scalar opc, string OpcodeStr, - Intrinsic F32Int, Intrinsic F64Int, - OpndItins itins_s, OpndItins itins_d> { - def SSZr : SI, XS, EVEX_4V; - def SSZr_Int : SIi8, XS, EVEX_4V; - let mayLoad = 1 in { - def SSZm : SI, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>; - def SSZm_Int : SIi8, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>; - } - def SDZr : SI, - XD, EVEX_4V, VEX_W; - def SDZr_Int : SIi8, XD, EVEX_4V, VEX_W; - let mayLoad = 1 in { - def SDZm : SI, - XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>; - def SDZm_Int : SIi8, - XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>; - } -} - - -defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt", - int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd, - SSE_SQRTSS, SSE_SQRTSD>, - avx512_sqrt_packed<0x51, "vsqrt", fsqrt, - int_x86_avx512_sqrt_ps_512, int_x86_avx512_sqrt_pd_512, - SSE_SQRTPS, SSE_SQRTPD>; - -def : Pat<(f32 (fsqrt FR32X:$src)), - (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>; -def : Pat<(f32 (fsqrt (load addr:$src))), - (VSQRTSSZm (f32 (IMPLICIT_DEF)), addr:$src)>, - Requires<[OptForSize]>; -def : Pat<(f64 (fsqrt FR64X:$src)), - (VSQRTSDZr (f64 (IMPLICIT_DEF)), FR64X:$src)>; -def : Pat<(f64 (fsqrt (load addr:$src))), - (VSQRTSDZm (f64 (IMPLICIT_DEF)), addr:$src)>, - Requires<[OptForSize]>; - -def : Pat<(f32 (X86frsqrt FR32X:$src)), - (VRSQRT14SSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>; -def : Pat<(f32 (X86frsqrt (load addr:$src))), - (VRSQRT14SSZm (f32 (IMPLICIT_DEF)), addr:$src)>, - Requires<[OptForSize]>; - -def : Pat<(f32 (X86frcp FR32X:$src)), - (VRCP14SSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>; -def : Pat<(f32 (X86frcp (load addr:$src))), - (VRCP14SSZm (f32 (IMPLICIT_DEF)), addr:$src)>, - Requires<[OptForSize]>; - -multiclass avx512_fp_unop_rm opcps, bits<8> opcpd, string OpcodeStr, - X86MemOperand x86memop, RegisterClass RC, - PatFrag mem_frag32, PatFrag mem_frag64, - Intrinsic V4F32Int, Intrinsic V2F64Int, - CD8VForm VForm> { -let ExeDomain = SSEPackedSingle in { - // Intrinsic operation, reg. - // Vector intrinsic operation, reg - def PSr : AVX512AIi8; - - // Vector intrinsic operation, mem - def PSm : AVX512AIi8, - EVEX_CD8<32, VForm>; -} // ExeDomain = SSEPackedSingle - -let ExeDomain = SSEPackedDouble in { - // Vector intrinsic operation, reg - def PDr : AVX512AIi8; - - // Vector intrinsic operation, mem - def PDm : AVX512AIi8, - EVEX_CD8<64, VForm>; -} // ExeDomain = SSEPackedDouble -} - -multiclass avx512_fp_binop_rm opcss, bits<8> opcsd, - string OpcodeStr, - Intrinsic F32Int, - Intrinsic F64Int> { -let ExeDomain = GenericDomain in { - // Operation, reg. - let hasSideEffects = 0 in - def SSr : AVX512AIi8; - - // Intrinsic operation, reg. - def SSr_Int : AVX512AIi8; - - // Intrinsic operation, mem. - def SSm : AVX512AIi8, - EVEX_CD8<32, CD8VT1>; - - // Operation, reg. - let hasSideEffects = 0 in - def SDr : AVX512AIi8, VEX_W; - - // Intrinsic operation, reg. - def SDr_Int : AVX512AIi8, - VEX_W; - - // Intrinsic operation, mem. - def SDm : AVX512AIi8, - VEX_W, EVEX_CD8<64, CD8VT1>; -} // ExeDomain = GenericDomain -} - -let Predicates = [HasAVX512] in { - defm VRNDSCALE : avx512_fp_binop_rm<0x0A, 0x0B, "vrndscale", - int_x86_avx512_rndscale_ss, - int_x86_avx512_rndscale_sd>, EVEX_4V; - - defm VRNDSCALEZ : avx512_fp_unop_rm<0x08, 0x09, "vrndscale", f256mem, VR512, - memopv16f32, memopv8f64, - int_x86_avx512_rndscale_ps_512, - int_x86_avx512_rndscale_pd_512, CD8VF>, - EVEX, EVEX_V512; -} - -def : Pat<(ffloor FR32X:$src), - (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x1))>; -def : Pat<(f64 (ffloor FR64X:$src)), - (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x1))>; -def : Pat<(f32 (fnearbyint FR32X:$src)), - (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0xC))>; -def : Pat<(f64 (fnearbyint FR64X:$src)), - (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0xC))>; -def : Pat<(f32 (fceil FR32X:$src)), - (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x2))>; -def : Pat<(f64 (fceil FR64X:$src)), - (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x2))>; -def : Pat<(f32 (frint FR32X:$src)), - (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x4))>; -def : Pat<(f64 (frint FR64X:$src)), - (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x4))>; -def : Pat<(f32 (ftrunc FR32X:$src)), - (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x3))>; -def : Pat<(f64 (ftrunc FR64X:$src)), - (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x3))>; - -def : Pat<(v16f32 (ffloor VR512:$src)), - (VRNDSCALEZPSr VR512:$src, (i32 0x1))>; -def : Pat<(v16f32 (fnearbyint VR512:$src)), - (VRNDSCALEZPSr VR512:$src, (i32 0xC))>; -def : Pat<(v16f32 (fceil VR512:$src)), - (VRNDSCALEZPSr VR512:$src, (i32 0x2))>; -def : Pat<(v16f32 (frint VR512:$src)), - (VRNDSCALEZPSr VR512:$src, (i32 0x4))>; -def : Pat<(v16f32 (ftrunc VR512:$src)), - (VRNDSCALEZPSr VR512:$src, (i32 0x3))>; - -def : Pat<(v8f64 (ffloor VR512:$src)), - (VRNDSCALEZPDr VR512:$src, (i32 0x1))>; -def : Pat<(v8f64 (fnearbyint VR512:$src)), - (VRNDSCALEZPDr VR512:$src, (i32 0xC))>; -def : Pat<(v8f64 (fceil VR512:$src)), - (VRNDSCALEZPDr VR512:$src, (i32 0x2))>; -def : Pat<(v8f64 (frint VR512:$src)), - (VRNDSCALEZPDr VR512:$src, (i32 0x4))>; -def : Pat<(v8f64 (ftrunc VR512:$src)), - (VRNDSCALEZPDr VR512:$src, (i32 0x3))>; - -//------------------------------------------------- -// Integer truncate and extend operations -//------------------------------------------------- - -multiclass avx512_trunc_sat opc, string OpcodeStr, - RegisterClass dstRC, RegisterClass srcRC, - RegisterClass KRC, X86MemOperand x86memop> { - def rr : AVX512XS8I, EVEX; - - def krr : AVX512XS8I, EVEX, EVEX_KZ; - - def mr : AVX512XS8I, EVEX; -} -defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; -defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; -defm VPMOVUSQB : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; -defm VPMOVQW : avx512_trunc_sat<0x34, "vpmovqw", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; -defm VPMOVSQW : avx512_trunc_sat<0x24, "vpmovsqw", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; -defm VPMOVUSQW : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; -defm VPMOVQD : avx512_trunc_sat<0x35, "vpmovqd", VR256X, VR512, VK8WM, - i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; -defm VPMOVSQD : avx512_trunc_sat<0x25, "vpmovsqd", VR256X, VR512, VK8WM, - i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; -defm VPMOVUSQD : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM, - i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; -defm VPMOVDW : avx512_trunc_sat<0x33, "vpmovdw", VR256X, VR512, VK16WM, - i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; -defm VPMOVSDW : avx512_trunc_sat<0x23, "vpmovsdw", VR256X, VR512, VK16WM, - i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; -defm VPMOVUSDW : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM, - i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; -defm VPMOVDB : avx512_trunc_sat<0x31, "vpmovdb", VR128X, VR512, VK16WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; -defm VPMOVSDB : avx512_trunc_sat<0x21, "vpmovsdb", VR128X, VR512, VK16WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; -defm VPMOVUSDB : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; - -def : Pat<(v16i8 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQBrr VR512:$src)>; -def : Pat<(v8i16 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQWrr VR512:$src)>; -def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr VR512:$src)>; -def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>; -def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>; - -def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))), - (VPMOVDBkrr VK16WM:$mask, VR512:$src)>; -def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))), - (VPMOVDWkrr VK16WM:$mask, VR512:$src)>; -def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))), - (VPMOVQWkrr VK8WM:$mask, VR512:$src)>; -def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))), - (VPMOVQDkrr VK8WM:$mask, VR512:$src)>; - - -multiclass avx512_extend opc, string OpcodeStr, RegisterClass DstRC, - RegisterClass SrcRC, SDNode OpNode, PatFrag mem_frag, - X86MemOperand x86memop, ValueType OpVT, ValueType InVT> { - - def rr : AVX5128I, EVEX; - def rm : AVX5128I, - EVEX; -} - -defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VR512, VR128X, X86vzext, - memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512, - EVEX_CD8<8, CD8VQ>; -defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VR512, VR128X, X86vzext, - memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512, - EVEX_CD8<8, CD8VO>; -defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VR512, VR256X, X86vzext, - memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512, - EVEX_CD8<16, CD8VH>; -defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VR512, VR128X, X86vzext, - memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512, - EVEX_CD8<16, CD8VQ>; -defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VR512, VR256X, X86vzext, - memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512, - EVEX_CD8<32, CD8VH>; - -defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VR512, VR128X, X86vsext, - memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512, - EVEX_CD8<8, CD8VQ>; -defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VR512, VR128X, X86vsext, - memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512, - EVEX_CD8<8, CD8VO>; -defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VR512, VR256X, X86vsext, - memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512, - EVEX_CD8<16, CD8VH>; -defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VR512, VR128X, X86vsext, - memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512, - EVEX_CD8<16, CD8VQ>; -defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VR512, VR256X, X86vsext, - memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512, - EVEX_CD8<32, CD8VH>; - -//===----------------------------------------------------------------------===// -// GATHER - SCATTER Operations - -multiclass avx512_gather opc, string OpcodeStr, RegisterClass KRC, - RegisterClass RC, X86MemOperand memop> { -let mayLoad = 1, - Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in - def rm : AVX5128I, EVEX, EVEX_K; -} -defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", VK8WM, VR512, vy64xmem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", VK16WM, VR512, vz32mem>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; - -defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", VK8WM, VR512, vz64mem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; - -defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512, vy64xmem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; - -defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", VK8WM, VR512, vz64mem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", VK8WM, VR256X, vz64mem>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; - -multiclass avx512_scatter opc, string OpcodeStr, RegisterClass KRC, - RegisterClass RC, X86MemOperand memop> { -let mayStore = 1, Constraints = "$mask = $mask_wb" in - def mr : AVX5128I, EVEX, EVEX_K; -} - -defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", VK8WM, VR512, vy64xmem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", VK16WM, VR512, vz32mem>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; - -defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", VK8WM, VR512, vz64mem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", VK8WM, VR256X, vz64mem>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; - -defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", VK8WM, VR512, vy64xmem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", VK16WM, VR512, vz32mem>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; - -defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", VK8WM, VR512, vz64mem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", VK8WM, VR256X, vz64mem>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; - -//===----------------------------------------------------------------------===// -// VSHUFPS - VSHUFPD Operations - -multiclass avx512_shufp { - def rmi : AVX512PIi8<0xC6, MRMSrcMem, (outs RC:$dst), - (ins RC:$src1, x86memop:$src2, i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2), - (i8 imm:$src3))))], d, IIC_SSE_SHUFP>, - EVEX_4V, TB, Sched<[WriteShuffleLd, ReadAfterLd]>; - def rri : AVX512PIi8<0xC6, MRMSrcReg, (outs RC:$dst), - (ins RC:$src1, RC:$src2, i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, - (i8 imm:$src3))))], d, IIC_SSE_SHUFP>, - EVEX_4V, TB, Sched<[WriteShuffle]>; -} - -defm VSHUFPSZ : avx512_shufp, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VSHUFPDZ : avx512_shufp, OpSize, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; - - -multiclass avx512_alignr { - def rri : AVX512AIi8<0x03, MRMSrcReg, (outs RC:$dst), - (ins RC:$src1, RC:$src2, i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, EVEX_4V; - def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs RC:$dst), - (ins RC:$src1, x86memop:$src2, i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, EVEX_4V; -} -defm VALIGND : avx512_alignr<"valignd", VR512, i512mem>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VALIGNQ : avx512_alignr<"valignq", VR512, i512mem>, - VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; - -def : Pat<(v16f32 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))), - (VALIGNDrri VR512:$src2, VR512:$src1, imm:$imm)>; -def : Pat<(v8f64 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))), - (VALIGNQrri VR512:$src2, VR512:$src1, imm:$imm)>; -def : Pat<(v16i32 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))), - (VALIGNDrri VR512:$src2, VR512:$src1, imm:$imm)>; -def : Pat<(v8i64 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))), - (VALIGNQrri VR512:$src2, VR512:$src1, imm:$imm)>; - -multiclass avx512_vpabs opc, string OpcodeStr, RegisterClass RC, - X86MemOperand x86memop> { - def rr : AVX5128I, - EVEX; - def rm : AVX5128I, - EVEX; -} - -defm VPABSD : avx512_vpabs<0x1E, "vpabsd", VR512, i512mem>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPABSQ : avx512_vpabs<0x1F, "vpabsq", VR512, i512mem>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - +// Bitcasts between 512-bit vector types. Return the original type since +// no instruction is needed for the conversion +let Predicates = [HasAVX512] in { + def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>; + def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>; + def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>; + def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>; + def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>; + def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>; + def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>; + def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>; + def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>; + def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>; + def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>; + def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>; + def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>; + + def : Pat<(v2i64 (bitconvert (v4i32 VR128X:$src))), (v2i64 VR128X:$src)>; + def : Pat<(v2i64 (bitconvert (v8i16 VR128X:$src))), (v2i64 VR128X:$src)>; + def : Pat<(v2i64 (bitconvert (v16i8 VR128X:$src))), (v2i64 VR128X:$src)>; + def : Pat<(v2i64 (bitconvert (v2f64 VR128X:$src))), (v2i64 VR128X:$src)>; + def : Pat<(v2i64 (bitconvert (v4f32 VR128X:$src))), (v2i64 VR128X:$src)>; + def : Pat<(v4i32 (bitconvert (v2i64 VR128X:$src))), (v4i32 VR128X:$src)>; + def : Pat<(v4i32 (bitconvert (v8i16 VR128X:$src))), (v4i32 VR128X:$src)>; + def : Pat<(v4i32 (bitconvert (v16i8 VR128X:$src))), (v4i32 VR128X:$src)>; + def : Pat<(v4i32 (bitconvert (v2f64 VR128X:$src))), (v4i32 VR128X:$src)>; + def : Pat<(v4i32 (bitconvert (v4f32 VR128X:$src))), (v4i32 VR128X:$src)>; + def : Pat<(v8i16 (bitconvert (v2i64 VR128X:$src))), (v8i16 VR128X:$src)>; + def : Pat<(v8i16 (bitconvert (v4i32 VR128X:$src))), (v8i16 VR128X:$src)>; + def : Pat<(v8i16 (bitconvert (v16i8 VR128X:$src))), (v8i16 VR128X:$src)>; + def : Pat<(v8i16 (bitconvert (v2f64 VR128X:$src))), (v8i16 VR128X:$src)>; + def : Pat<(v8i16 (bitconvert (v4f32 VR128X:$src))), (v8i16 VR128X:$src)>; + def : Pat<(v16i8 (bitconvert (v2i64 VR128X:$src))), (v16i8 VR128X:$src)>; + def : Pat<(v16i8 (bitconvert (v4i32 VR128X:$src))), (v16i8 VR128X:$src)>; + def : Pat<(v16i8 (bitconvert (v8i16 VR128X:$src))), (v16i8 VR128X:$src)>; + def : Pat<(v16i8 (bitconvert (v2f64 VR128X:$src))), (v16i8 VR128X:$src)>; + def : Pat<(v16i8 (bitconvert (v4f32 VR128X:$src))), (v16i8 VR128X:$src)>; + def : Pat<(v4f32 (bitconvert (v2i64 VR128X:$src))), (v4f32 VR128X:$src)>; + def : Pat<(v4f32 (bitconvert (v4i32 VR128X:$src))), (v4f32 VR128X:$src)>; + def : Pat<(v4f32 (bitconvert (v8i16 VR128X:$src))), (v4f32 VR128X:$src)>; + def : Pat<(v4f32 (bitconvert (v16i8 VR128X:$src))), (v4f32 VR128X:$src)>; + def : Pat<(v4f32 (bitconvert (v2f64 VR128X:$src))), (v4f32 VR128X:$src)>; + def : Pat<(v2f64 (bitconvert (v2i64 VR128X:$src))), (v2f64 VR128X:$src)>; + def : Pat<(v2f64 (bitconvert (v4i32 VR128X:$src))), (v2f64 VR128X:$src)>; + def : Pat<(v2f64 (bitconvert (v8i16 VR128X:$src))), (v2f64 VR128X:$src)>; + def : Pat<(v2f64 (bitconvert (v16i8 VR128X:$src))), (v2f64 VR128X:$src)>; + def : Pat<(v2f64 (bitconvert (v4f32 VR128X:$src))), (v2f64 VR128X:$src)>; + +// Bitcasts between 256-bit vector types. Return the original type since +// no instruction is needed for the conversion + def : Pat<(v4f64 (bitconvert (v8f32 VR256X:$src))), (v4f64 VR256X:$src)>; + def : Pat<(v4f64 (bitconvert (v8i32 VR256X:$src))), (v4f64 VR256X:$src)>; + def : Pat<(v4f64 (bitconvert (v4i64 VR256X:$src))), (v4f64 VR256X:$src)>; + def : Pat<(v4f64 (bitconvert (v16i16 VR256X:$src))), (v4f64 VR256X:$src)>; + def : Pat<(v4f64 (bitconvert (v32i8 VR256X:$src))), (v4f64 VR256X:$src)>; + def : Pat<(v8f32 (bitconvert (v8i32 VR256X:$src))), (v8f32 VR256X:$src)>; + def : Pat<(v8f32 (bitconvert (v4i64 VR256X:$src))), (v8f32 VR256X:$src)>; + def : Pat<(v8f32 (bitconvert (v4f64 VR256X:$src))), (v8f32 VR256X:$src)>; + def : Pat<(v8f32 (bitconvert (v32i8 VR256X:$src))), (v8f32 VR256X:$src)>; + def : Pat<(v8f32 (bitconvert (v16i16 VR256X:$src))), (v8f32 VR256X:$src)>; + def : Pat<(v4i64 (bitconvert (v8f32 VR256X:$src))), (v4i64 VR256X:$src)>; + def : Pat<(v4i64 (bitconvert (v8i32 VR256X:$src))), (v4i64 VR256X:$src)>; + def : Pat<(v4i64 (bitconvert (v4f64 VR256X:$src))), (v4i64 VR256X:$src)>; + def : Pat<(v4i64 (bitconvert (v32i8 VR256X:$src))), (v4i64 VR256X:$src)>; + def : Pat<(v4i64 (bitconvert (v16i16 VR256X:$src))), (v4i64 VR256X:$src)>; + def : Pat<(v32i8 (bitconvert (v4f64 VR256X:$src))), (v32i8 VR256X:$src)>; + def : Pat<(v32i8 (bitconvert (v4i64 VR256X:$src))), (v32i8 VR256X:$src)>; + def : Pat<(v32i8 (bitconvert (v8f32 VR256X:$src))), (v32i8 VR256X:$src)>; + def : Pat<(v32i8 (bitconvert (v8i32 VR256X:$src))), (v32i8 VR256X:$src)>; + def : Pat<(v32i8 (bitconvert (v16i16 VR256X:$src))), (v32i8 VR256X:$src)>; + def : Pat<(v8i32 (bitconvert (v32i8 VR256X:$src))), (v8i32 VR256X:$src)>; + def : Pat<(v8i32 (bitconvert (v16i16 VR256X:$src))), (v8i32 VR256X:$src)>; + def : Pat<(v8i32 (bitconvert (v8f32 VR256X:$src))), (v8i32 VR256X:$src)>; + def : Pat<(v8i32 (bitconvert (v4i64 VR256X:$src))), (v8i32 VR256X:$src)>; + def : Pat<(v8i32 (bitconvert (v4f64 VR256X:$src))), (v8i32 VR256X:$src)>; + def : Pat<(v16i16 (bitconvert (v8f32 VR256X:$src))), (v16i16 VR256X:$src)>; + def : Pat<(v16i16 (bitconvert (v8i32 VR256X:$src))), (v16i16 VR256X:$src)>; + def : Pat<(v16i16 (bitconvert (v4i64 VR256X:$src))), (v16i16 VR256X:$src)>; + def : Pat<(v16i16 (bitconvert (v4f64 VR256X:$src))), (v16i16 VR256X:$src)>; + def : Pat<(v16i16 (bitconvert (v32i8 VR256X:$src))), (v16i16 VR256X:$src)>; +} + +// +// AVX-512: VPXOR instruction writes zero to its upper part, it's safe build zeros. +// + +let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, + isPseudo = 1, Predicates = [HasAVX512] in { +def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "", + [(set VR512:$dst, (v16f32 immAllZerosV))]>; +} + +def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>; +def : Pat<(v16i32 immAllZerosV), (AVX512_512_SET0)>; +def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>; +def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>; + +//===----------------------------------------------------------------------===// +// AVX-512 - VECTOR INSERT +// +// -- 32x8 form -- +let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in { +def VINSERTF32x4rr : AVX512AIi8<0x18, MRMSrcReg, (outs VR512:$dst), + (ins VR512:$src1, VR128X:$src2, i8imm:$src3), + "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + []>, EVEX_4V, EVEX_V512; +let mayLoad = 1 in +def VINSERTF32x4rm : AVX512AIi8<0x18, MRMSrcMem, (outs VR512:$dst), + (ins VR512:$src1, f128mem:$src2, i8imm:$src3), + "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>; +} + +// -- 64x4 fp form -- +let neverHasSideEffects = 1, ExeDomain = SSEPackedDouble in { +def VINSERTF64x4rr : AVX512AIi8<0x1a, MRMSrcReg, (outs VR512:$dst), + (ins VR512:$src1, VR256X:$src2, i8imm:$src3), + "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + []>, EVEX_4V, EVEX_V512, VEX_W; +let mayLoad = 1 in +def VINSERTF64x4rm : AVX512AIi8<0x1a, MRMSrcMem, (outs VR512:$dst), + (ins VR512:$src1, i256mem:$src2, i8imm:$src3), + "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>; +} +// -- 32x4 integer form -- +let neverHasSideEffects = 1 in { +def VINSERTI32x4rr : AVX512AIi8<0x38, MRMSrcReg, (outs VR512:$dst), + (ins VR512:$src1, VR128X:$src2, i8imm:$src3), + "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + []>, EVEX_4V, EVEX_V512; +let mayLoad = 1 in +def VINSERTI32x4rm : AVX512AIi8<0x38, MRMSrcMem, (outs VR512:$dst), + (ins VR512:$src1, i128mem:$src2, i8imm:$src3), + "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>; + +} + +let neverHasSideEffects = 1 in { +// -- 64x4 form -- +def VINSERTI64x4rr : AVX512AIi8<0x3a, MRMSrcReg, (outs VR512:$dst), + (ins VR512:$src1, VR256X:$src2, i8imm:$src3), + "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + []>, EVEX_4V, EVEX_V512, VEX_W; +let mayLoad = 1 in +def VINSERTI64x4rm : AVX512AIi8<0x3a, MRMSrcMem, (outs VR512:$dst), + (ins VR512:$src1, i256mem:$src2, i8imm:$src3), + "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>; +} + +def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (v4f32 VR128X:$src2), + (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2, + (INSERT_get_vinsert128_imm VR512:$ins))>; +def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (v2f64 VR128X:$src2), + (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2, + (INSERT_get_vinsert128_imm VR512:$ins))>; +def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v2i64 VR128X:$src2), + (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2, + (INSERT_get_vinsert128_imm VR512:$ins))>; +def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v4i32 VR128X:$src2), + (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2, + (INSERT_get_vinsert128_imm VR512:$ins))>; + +def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (loadv4f32 addr:$src2), + (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2, + (INSERT_get_vinsert128_imm VR512:$ins))>; +def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), + (bc_v4i32 (loadv2i64 addr:$src2)), + (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2, + (INSERT_get_vinsert128_imm VR512:$ins))>; +def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (loadv2f64 addr:$src2), + (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2, + (INSERT_get_vinsert128_imm VR512:$ins))>; +def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (loadv2i64 addr:$src2), + (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2, + (INSERT_get_vinsert128_imm VR512:$ins))>; + +def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (v8f32 VR256X:$src2), + (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2, + (INSERT_get_vinsert256_imm VR512:$ins))>; +def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (v4f64 VR256X:$src2), + (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2, + (INSERT_get_vinsert256_imm VR512:$ins))>; +def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v4i64 VR256X:$src2), + (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2, + (INSERT_get_vinsert256_imm VR512:$ins))>; +def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v8i32 VR256X:$src2), + (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2, + (INSERT_get_vinsert256_imm VR512:$ins))>; + +def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (loadv8f32 addr:$src2), + (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2, + (INSERT_get_vinsert256_imm VR512:$ins))>; +def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (loadv4f64 addr:$src2), + (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2, + (INSERT_get_vinsert256_imm VR512:$ins))>; +def : Pat<(vinsert256_insert:$ins (v8i64 VR512:$src1), (loadv4i64 addr:$src2), + (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2, + (INSERT_get_vinsert256_imm VR512:$ins))>; +def : Pat<(vinsert256_insert:$ins (v16i32 VR512:$src1), + (bc_v8i32 (loadv4i64 addr:$src2)), + (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2, + (INSERT_get_vinsert256_imm VR512:$ins))>; + +// vinsertps - insert f32 to XMM +def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), + (ins VR128X:$src1, VR128X:$src2, u32u8imm:$src3), + "vinsertps{z}\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + [(set VR128X:$dst, (X86insrtps VR128X:$src1, VR128X:$src2, imm:$src3))]>, + EVEX_4V; +def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), + (ins VR128X:$src1, f32mem:$src2, u32u8imm:$src3), + "vinsertps{z}\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + [(set VR128X:$dst, (X86insrtps VR128X:$src1, + (v4f32 (scalar_to_vector (loadf32 addr:$src2))), + imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>; + +//===----------------------------------------------------------------------===// +// AVX-512 VECTOR EXTRACT +//--- +let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in { +// -- 32x4 form -- +def VEXTRACTF32x4rr : AVX512AIi8<0x19, MRMDestReg, (outs VR128X:$dst), + (ins VR512:$src1, i8imm:$src2), + "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX, EVEX_V512; +def VEXTRACTF32x4mr : AVX512AIi8<0x19, MRMDestMem, (outs), + (ins f128mem:$dst, VR512:$src1, i8imm:$src2), + "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>; + +// -- 64x4 form -- +def VEXTRACTF64x4rr : AVX512AIi8<0x1b, MRMDestReg, (outs VR256X:$dst), + (ins VR512:$src1, i8imm:$src2), + "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX, EVEX_V512, VEX_W; +let mayStore = 1 in +def VEXTRACTF64x4mr : AVX512AIi8<0x1b, MRMDestMem, (outs), + (ins f256mem:$dst, VR512:$src1, i8imm:$src2), + "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>; +} + +let neverHasSideEffects = 1 in { +// -- 32x4 form -- +def VEXTRACTI32x4rr : AVX512AIi8<0x39, MRMDestReg, (outs VR128X:$dst), + (ins VR512:$src1, i8imm:$src2), + "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX, EVEX_V512; +def VEXTRACTI32x4mr : AVX512AIi8<0x39, MRMDestMem, (outs), + (ins i128mem:$dst, VR512:$src1, i8imm:$src2), + "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>; + +// -- 64x4 form -- +def VEXTRACTI64x4rr : AVX512AIi8<0x3b, MRMDestReg, (outs VR256X:$dst), + (ins VR512:$src1, i8imm:$src2), + "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX, EVEX_V512, VEX_W; +let mayStore = 1 in +def VEXTRACTI64x4mr : AVX512AIi8<0x3b, MRMDestMem, (outs), + (ins i256mem:$dst, VR512:$src1, i8imm:$src2), + "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>; +} + +def : Pat<(vextract128_extract:$ext (v16f32 VR512:$src1), (iPTR imm)), + (v4f32 (VEXTRACTF32x4rr VR512:$src1, + (EXTRACT_get_vextract128_imm VR128X:$ext)))>; + +def : Pat<(vextract128_extract:$ext VR512:$src1, (iPTR imm)), + (v4i32 (VEXTRACTF32x4rr VR512:$src1, + (EXTRACT_get_vextract128_imm VR128X:$ext)))>; + +def : Pat<(vextract128_extract:$ext (v8f64 VR512:$src1), (iPTR imm)), + (v2f64 (VEXTRACTF32x4rr VR512:$src1, + (EXTRACT_get_vextract128_imm VR128X:$ext)))>; + +def : Pat<(vextract128_extract:$ext (v8i64 VR512:$src1), (iPTR imm)), + (v2i64 (VEXTRACTI32x4rr VR512:$src1, + (EXTRACT_get_vextract128_imm VR128X:$ext)))>; + + +def : Pat<(vextract256_extract:$ext (v16f32 VR512:$src1), (iPTR imm)), + (v8f32 (VEXTRACTF64x4rr VR512:$src1, + (EXTRACT_get_vextract256_imm VR256X:$ext)))>; + +def : Pat<(vextract256_extract:$ext (v16i32 VR512:$src1), (iPTR imm)), + (v8i32 (VEXTRACTI64x4rr VR512:$src1, + (EXTRACT_get_vextract256_imm VR256X:$ext)))>; + +def : Pat<(vextract256_extract:$ext (v8f64 VR512:$src1), (iPTR imm)), + (v4f64 (VEXTRACTF64x4rr VR512:$src1, + (EXTRACT_get_vextract256_imm VR256X:$ext)))>; + +def : Pat<(vextract256_extract:$ext (v8i64 VR512:$src1), (iPTR imm)), + (v4i64 (VEXTRACTI64x4rr VR512:$src1, + (EXTRACT_get_vextract256_imm VR256X:$ext)))>; + +// A 256-bit subvector extract from the first 512-bit vector position +// is a subregister copy that needs no instruction. +def : Pat<(v8i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))), + (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm))>; +def : Pat<(v8f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))), + (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm))>; +def : Pat<(v4i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))), + (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm))>; +def : Pat<(v4f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))), + (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm))>; + +// zmm -> xmm +def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))), + (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>; +def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))), + (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>; +def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))), + (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>; +def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))), + (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>; + + +// A 128-bit subvector insert to the first 512-bit vector position +// is a subregister copy that needs no instruction. +def : Pat<(insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0)), + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), + (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), + sub_ymm)>; +def : Pat<(insert_subvector undef, (v2f64 VR128X:$src), (iPTR 0)), + (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), + (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), + sub_ymm)>; +def : Pat<(insert_subvector undef, (v4i32 VR128X:$src), (iPTR 0)), + (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), + (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), + sub_ymm)>; +def : Pat<(insert_subvector undef, (v4f32 VR128X:$src), (iPTR 0)), + (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), + (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), + sub_ymm)>; + +def : Pat<(insert_subvector undef, (v4i64 VR256X:$src), (iPTR 0)), + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>; +def : Pat<(insert_subvector undef, (v4f64 VR256X:$src), (iPTR 0)), + (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>; +def : Pat<(insert_subvector undef, (v8i32 VR256X:$src), (iPTR 0)), + (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>; +def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)), + (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>; + +// vextractps - extract 32 bits from XMM +def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst), + (ins VR128X:$src1, u32u8imm:$src2), + "vextractps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, + EVEX; + +def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs), + (ins f32mem:$dst, VR128X:$src1, u32u8imm:$src2), + "vextractps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2), + addr:$dst)]>, EVEX; + +//===---------------------------------------------------------------------===// +// AVX-512 BROADCAST +//--- +multiclass avx512_fp_broadcast opc, string OpcodeStr, + RegisterClass DestRC, + RegisterClass SrcRC, X86MemOperand x86memop> { + def rr : AVX5128I, EVEX; + def rm : AVX5128I, EVEX; +} +let ExeDomain = SSEPackedSingle in { + defm VBROADCASTSSZ : avx512_fp_broadcast<0x18, "vbroadcastss{z}", VR512, + VR128X, f32mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; +} + +let ExeDomain = SSEPackedDouble in { + defm VBROADCASTSDZ : avx512_fp_broadcast<0x19, "vbroadcastsd{z}", VR512, + VR128X, f64mem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +} + +def : Pat<(v16f32 (X86VBroadcast (loadf32 addr:$src))), + (VBROADCASTSSZrm addr:$src)>; +def : Pat<(v8f64 (X86VBroadcast (loadf64 addr:$src))), + (VBROADCASTSDZrm addr:$src)>; + +multiclass avx512_int_broadcast_reg opc, string OpcodeStr, + RegisterClass SrcRC, RegisterClass KRC> { + def Zrr : AVX5128I, EVEX, EVEX_V512; + def Zkrr : AVX5128I, EVEX, EVEX_V512, EVEX_KZ; +} + +defm VPBROADCASTDr : avx512_int_broadcast_reg<0x7C, "vpbroadcastd", GR32, VK16WM>; +defm VPBROADCASTQr : avx512_int_broadcast_reg<0x7C, "vpbroadcastq", GR64, VK8WM>, + VEX_W; + +def : Pat <(v16i32 (X86vzext VK16WM:$mask)), + (VPBROADCASTDrZkrr VK16WM:$mask, (i32 (MOV32ri 0x1)))>; + +def : Pat <(v8i64 (X86vzext VK8WM:$mask)), + (VPBROADCASTQrZkrr VK8WM:$mask, (i64 (MOV64ri 0x1)))>; + +def : Pat<(v16i32 (X86VBroadcast (i32 GR32:$src))), + (VPBROADCASTDrZrr GR32:$src)>; +def : Pat<(v8i64 (X86VBroadcast (i64 GR64:$src))), + (VPBROADCASTQrZrr GR64:$src)>; + +multiclass avx512_int_broadcast_rm opc, string OpcodeStr, + X86MemOperand x86memop, PatFrag ld_frag, + RegisterClass DstRC, ValueType OpVT, ValueType SrcVT, + RegisterClass KRC> { + def rr : AVX5128I, EVEX; + def krr : AVX5128I, + EVEX, EVEX_KZ; + def rm : AVX5128I, EVEX; + def krm : AVX5128I, EVEX, EVEX_KZ; +} + +defm VPBROADCASTDZ : avx512_int_broadcast_rm<0x58, "vpbroadcastd", i32mem, + loadi32, VR512, v16i32, v4i32, VK16WM>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; +defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem, + loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VT1>; + +def : Pat<(v16f32 (X86VBroadcast (v4f32 VR128X:$src))), + (VBROADCASTSSZrr VR128X:$src)>; +def : Pat<(v8f64 (X86VBroadcast (v2f64 VR128X:$src))), + (VBROADCASTSDZrr VR128X:$src)>; + +// Provide fallback in case the load node that is used in the patterns above +// is used by additional users, which prevents the pattern selection. +def : Pat<(v16f32 (X86VBroadcast FR32X:$src)), + (VBROADCASTSSZrr (COPY_TO_REGCLASS FR32X:$src, VR128X))>; +def : Pat<(v8f64 (X86VBroadcast FR64X:$src)), + (VBROADCASTSDZrr (COPY_TO_REGCLASS FR64X:$src, VR128X))>; + + +let Predicates = [HasAVX512] in { +def : Pat<(v8i32 (X86VBroadcastm (v8i1 VK8WM:$mask), (loadi32 addr:$src))), + (EXTRACT_SUBREG + (v16i32 (VPBROADCASTDZkrm (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), + addr:$src)), sub_ymm)>; +} +//===----------------------------------------------------------------------===// +// AVX-512 BROADCAST MASK TO VECTOR REGISTER +//--- + +multiclass avx512_mask_broadcast opc, string OpcodeStr, + RegisterClass DstRC, RegisterClass KRC, + ValueType OpVT, ValueType SrcVT> { +def rr : AVX512XS8I, EVEX; +} + +defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", VR512, + VK16, v16i32, v16i1>, EVEX_V512; +defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", VR512, + VK8, v8i64, v8i1>, EVEX_V512, VEX_W; + +//===----------------------------------------------------------------------===// +// AVX-512 - VPERM +// +// -- immediate form -- +multiclass avx512_perm_imm opc, string OpcodeStr, RegisterClass RC, + SDNode OpNode, PatFrag mem_frag, + X86MemOperand x86memop, ValueType OpVT> { + def ri : AVX512AIi8, + EVEX; + def mi : AVX512AIi8, EVEX; +} + +defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", VR512, X86VPermi, memopv8i64, + i512mem, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +let ExeDomain = SSEPackedDouble in +defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", VR512, X86VPermi, memopv8f64, + f512mem, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + +// -- VPERM - register form -- +multiclass avx512_perm opc, string OpcodeStr, RegisterClass RC, + PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> { + + def rr : AVX5128I, EVEX_4V; + + def rm : AVX5128I, + EVEX_4V; +} + +defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, memopv16i32, i512mem, + v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem, + v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +let ExeDomain = SSEPackedSingle in +defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, memopv16f32, f512mem, + v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; +let ExeDomain = SSEPackedDouble in +defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem, + v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + +// -- VPERM2I - 3 source operands form -- +multiclass avx512_perm_3src opc, string OpcodeStr, RegisterClass RC, + PatFrag mem_frag, X86MemOperand x86memop, + ValueType OpVT> { +let Constraints = "$src1 = $dst" in { + def rr : AVX5128I, + EVEX_4V; + + def rm : AVX5128I, EVEX_4V; + } +} +defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32, i512mem, + v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64, i512mem, + v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32, i512mem, + v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64, i512mem, + v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + +//===----------------------------------------------------------------------===// +// AVX-512 - BLEND using mask +// +multiclass avx512_blendmask opc, string OpcodeStr, + RegisterClass KRC, RegisterClass RC, + X86MemOperand x86memop, PatFrag mem_frag, + SDNode OpNode, ValueType vt> { + def rr : AVX5128I, EVEX_4V, EVEX_K; + + def rm : AVX5128I, + EVEX_4V, EVEX_K; +} + +let ExeDomain = SSEPackedSingle in +defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps", VK16WM, VR512, f512mem, + memopv16f32, vselect, v16f32>, + EVEX_CD8<32, CD8VF>, EVEX_V512; +let ExeDomain = SSEPackedDouble in +defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd", VK8WM, VR512, f512mem, + memopv8f64, vselect, v8f64>, + VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512; + +defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd", VK16WM, VR512, f512mem, + memopv8i64, vselect, v16i32>, + EVEX_CD8<32, CD8VF>, EVEX_V512; + +defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq", VK8WM, VR512, f512mem, + memopv8i64, vselect, v8i64>, VEX_W, + EVEX_CD8<64, CD8VF>, EVEX_V512; + + +let Predicates = [HasAVX512] in { +def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1), + (v8f32 VR256X:$src2))), + (EXTRACT_SUBREG + (v16f32 (VBLENDMPSZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), + (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), + (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; + +def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1), + (v8i32 VR256X:$src2))), + (EXTRACT_SUBREG + (v16i32 (VPBLENDMDZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), + (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), + (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; +} + +multiclass avx512_icmp_packed opc, string OpcodeStr, RegisterClass KRC, + RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag, + SDNode OpNode, ValueType vt> { + def rr : AVX512BI, EVEX_4V; + def rm : AVX512BI, EVEX_4V; +} + +defm VPCMPEQDZ : avx512_icmp_packed<0x76, "vpcmpeqd", VK16, VR512, i512mem, + memopv16i32, X86pcmpeqm, v16i32>, EVEX_V512; +defm VPCMPEQQZ : avx512_icmp_packed<0x29, "vpcmpeqq", VK8, VR512, i512mem, + memopv8i64, X86pcmpeqm, v8i64>, T8, EVEX_V512, VEX_W; + +defm VPCMPGTDZ : avx512_icmp_packed<0x66, "vpcmpgtd", VK16, VR512, i512mem, + memopv16i32, X86pcmpgtm, v16i32>, EVEX_V512; +defm VPCMPGTQZ : avx512_icmp_packed<0x37, "vpcmpgtq", VK8, VR512, i512mem, + memopv8i64, X86pcmpgtm, v8i64>, T8, EVEX_V512, VEX_W; + +def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), + (COPY_TO_REGCLASS (VPCMPGTDZrr + (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), + (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>; + +def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), + (COPY_TO_REGCLASS (VPCMPEQDZrr + (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), + (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>; + +multiclass avx512_icmp_cc opc, RegisterClass KRC, + RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag, + SDNode OpNode, ValueType vt, Operand CC, string asm, + string asm_alt> { + def rri : AVX512AIi8, EVEX_4V; + def rmi : AVX512AIi8, EVEX_4V; + // Accept explicit immediate argument form instead of comparison code. + let neverHasSideEffects = 1 in { + def rri_alt : AVX512AIi8, EVEX_4V; + def rmi_alt : AVX512AIi8, EVEX_4V; + } +} + +defm VPCMPDZ : avx512_icmp_cc<0x1F, VK16, VR512, i512mem, memopv16i32, + X86cmpm, v16i32, AVXCC, + "vpcmp${cc}d\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "vpcmpd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPCMPUDZ : avx512_icmp_cc<0x1E, VK16, VR512, i512mem, memopv16i32, + X86cmpmu, v16i32, AVXCC, + "vpcmp${cc}ud\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "vpcmpud\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, + EVEX_V512, EVEX_CD8<32, CD8VF>; + +defm VPCMPQZ : avx512_icmp_cc<0x1F, VK8, VR512, i512mem, memopv8i64, + X86cmpm, v8i64, AVXCC, + "vpcmp${cc}q\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "vpcmpq\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, + VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; +defm VPCMPUQZ : avx512_icmp_cc<0x1E, VK8, VR512, i512mem, memopv8i64, + X86cmpmu, v8i64, AVXCC, + "vpcmp${cc}uq\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "vpcmpuq\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, + VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; + +// avx512_cmp_packed - sse 1 & 2 compare packed instructions +multiclass avx512_cmp_packed { + def rri : AVX512PIi8<0xC2, MRMSrcReg, + (outs KRC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, + [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>; + def rmi : AVX512PIi8<0xC2, MRMSrcMem, + (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, + [(set KRC:$dst, + (OpNode (vt RC:$src1), (memop addr:$src2), imm:$cc))], d>; + + // Accept explicit immediate argument form instead of comparison code. + let neverHasSideEffects = 1 in { + def rri_alt : PIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), + asm_alt, [], IIC_SSE_ALU_F32P_RR, d>; + def rmi_alt : PIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), + asm_alt, [], IIC_SSE_ALU_F32P_RM, d>; + } +} + +defm VCMPPSZ : avx512_cmp_packed, TB, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VCMPPDZ : avx512_cmp_packed, TB, OpSize, EVEX_4V, VEX_W, EVEX_V512, + EVEX_CD8<64, CD8VF>; + +def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)), + (COPY_TO_REGCLASS (VCMPPSZrri + (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), + (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), + imm:$cc), VK8)>; +def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)), + (COPY_TO_REGCLASS (VPCMPDZrri + (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), + (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), + imm:$cc), VK8)>; +def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)), + (COPY_TO_REGCLASS (VPCMPUDZrri + (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), + (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), + imm:$cc), VK8)>; + +// Mask register copy, including +// - copy between mask registers +// - load/store mask registers +// - copy from GPR to mask register and vice versa +// +multiclass avx512_mask_mov opc_kk, bits<8> opc_km, bits<8> opc_mk, + string OpcodeStr, RegisterClass KRC, + ValueType vt, X86MemOperand x86memop> { + let neverHasSideEffects = 1 in { + def kk : I; + let mayLoad = 1 in + def km : I; + let mayStore = 1 in + def mk : I; + } +} + +multiclass avx512_mask_mov_gpr opc_kr, bits<8> opc_rk, + string OpcodeStr, + RegisterClass KRC, RegisterClass GRC> { + let neverHasSideEffects = 1 in { + def kr : I; + def rk : I; + } +} + +let Predicates = [HasAVX512] in { + defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>, + VEX, TB; + defm KMOVW : avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>, + VEX, TB; +} + +let Predicates = [HasAVX512] in { + // GR16 from/to 16-bit mask + def : Pat<(v16i1 (bitconvert (i16 GR16:$src))), + (KMOVWkr (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit))>; + def : Pat<(i16 (bitconvert (v16i1 VK16:$src))), + (EXTRACT_SUBREG (KMOVWrk VK16:$src), sub_16bit)>; + + // Store kreg in memory + def : Pat<(store (v16i1 VK16:$src), addr:$dst), + (KMOVWmk addr:$dst, VK16:$src)>; + + def : Pat<(store (v8i1 VK8:$src), addr:$dst), + (KMOVWmk addr:$dst, (v16i1 (COPY_TO_REGCLASS VK8:$src, VK16)))>; +} +// With AVX-512 only, 8-bit mask is promoted to 16-bit mask. +let Predicates = [HasAVX512] in { + // GR from/to 8-bit mask without native support + def : Pat<(v8i1 (bitconvert (i8 GR8:$src))), + (COPY_TO_REGCLASS + (KMOVWkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)), + VK8)>; + def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), + (EXTRACT_SUBREG + (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)), + sub_8bit)>; +} + +// Mask unary operation +// - KNOT +multiclass avx512_mask_unop opc, string OpcodeStr, + RegisterClass KRC, SDPatternOperator OpNode> { + let Predicates = [HasAVX512] in + def rr : I; +} + +multiclass avx512_mask_unop_w opc, string OpcodeStr, + SDPatternOperator OpNode> { + defm W : avx512_mask_unop, + VEX, TB; +} + +defm KNOT : avx512_mask_unop_w<0x44, "knot", not>; + +def : Pat<(xor VK16:$src1, (v16i1 immAllOnesV)), (KNOTWrr VK16:$src1)>; +def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)), + (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>; + +// With AVX-512, 8-bit mask is promoted to 16-bit mask. +def : Pat<(not VK8:$src), + (COPY_TO_REGCLASS + (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>; + +// Mask binary operation +// - KADD, KAND, KANDN, KOR, KXNOR, KXOR +multiclass avx512_mask_binop opc, string OpcodeStr, + RegisterClass KRC, SDPatternOperator OpNode> { + let Predicates = [HasAVX512] in + def rr : I; +} + +multiclass avx512_mask_binop_w opc, string OpcodeStr, + SDPatternOperator OpNode> { + defm W : avx512_mask_binop, + VEX_4V, VEX_L, TB; +} + +def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>; +def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>; + +let isCommutable = 1 in { + defm KADD : avx512_mask_binop_w<0x4a, "kadd", add>; + defm KAND : avx512_mask_binop_w<0x41, "kand", and>; + let isCommutable = 0 in + defm KANDN : avx512_mask_binop_w<0x42, "kandn", andn>; + defm KOR : avx512_mask_binop_w<0x45, "kor", or>; + defm KXNOR : avx512_mask_binop_w<0x46, "kxnor", xnor>; + defm KXOR : avx512_mask_binop_w<0x47, "kxor", xor>; +} + +multiclass avx512_mask_binop_int { + let Predicates = [HasAVX512] in + def : Pat<(!cast("int_x86_"##IntName##"_v16i1") + VK16:$src1, VK16:$src2), + (!cast(InstName##"Wrr") VK16:$src1, VK16:$src2)>; +} + +defm : avx512_mask_binop_int<"kadd", "KADD">; +defm : avx512_mask_binop_int<"kand", "KAND">; +defm : avx512_mask_binop_int<"kandn", "KANDN">; +defm : avx512_mask_binop_int<"kor", "KOR">; +defm : avx512_mask_binop_int<"kxnor", "KXNOR">; +defm : avx512_mask_binop_int<"kxor", "KXOR">; +// With AVX-512, 8-bit mask is promoted to 16-bit mask. +multiclass avx512_binop_pat { + let Predicates = [HasAVX512] in + def : Pat<(OpNode VK8:$src1, VK8:$src2), + (COPY_TO_REGCLASS + (Inst (COPY_TO_REGCLASS VK8:$src1, VK16), + (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>; +} + +defm : avx512_binop_pat; +defm : avx512_binop_pat; +defm : avx512_binop_pat; +defm : avx512_binop_pat; +defm : avx512_binop_pat; + +// Mask unpacking +multiclass avx512_mask_unpck opc, string OpcodeStr, + RegisterClass KRC1, RegisterClass KRC2> { + let Predicates = [HasAVX512] in + def rr : I; +} + +multiclass avx512_mask_unpck_bw opc, string OpcodeStr> { + defm BW : avx512_mask_unpck, + VEX_4V, VEX_L, OpSize, TB; +} + +defm KUNPCK : avx512_mask_unpck_bw<0x4b, "kunpck">; + +multiclass avx512_mask_unpck_int { + let Predicates = [HasAVX512] in + def : Pat<(!cast("int_x86_"##IntName##"_v16i1") + VK8:$src1, VK8:$src2), + (!cast(InstName##"BWrr") VK8:$src1, VK8:$src2)>; +} + +defm : avx512_mask_unpck_int<"kunpck", "KUNPCK">; +// Mask bit testing +multiclass avx512_mask_testop opc, string OpcodeStr, RegisterClass KRC, + SDNode OpNode> { + let Predicates = [HasAVX512], Defs = [EFLAGS] in + def rr : I; +} + +multiclass avx512_mask_testop_w opc, string OpcodeStr, SDNode OpNode> { + defm W : avx512_mask_testop, + VEX, TB; +} + +defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>; +defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest>; + +// Mask shift +multiclass avx512_mask_shiftop opc, string OpcodeStr, RegisterClass KRC, + SDNode OpNode> { + let Predicates = [HasAVX512] in + def ri : Ii8; +} + +multiclass avx512_mask_shiftop_w opc1, bits<8> opc2, string OpcodeStr, + SDNode OpNode> { + defm W : avx512_mask_shiftop, + VEX, OpSize, TA, VEX_W; +} + +defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", shl>; +defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", srl>; + +// Mask setting all 0s or 1s +multiclass avx512_mask_setop { + let Predicates = [HasAVX512] in + let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1 in + def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "", + [(set KRC:$dst, (VT Val))]>; +} + +multiclass avx512_mask_setop_w { + defm B : avx512_mask_setop; + defm W : avx512_mask_setop; +} + +defm KSET0 : avx512_mask_setop_w; +defm KSET1 : avx512_mask_setop_w; + +// With AVX-512 only, 8-bit mask is promoted to 16-bit mask. +let Predicates = [HasAVX512] in { + def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>; + def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>; +} +def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 0))), + (v8i1 (COPY_TO_REGCLASS VK16:$src, VK8))>; + +def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))), + (v16i1 (COPY_TO_REGCLASS VK8:$src, VK16))>; + +def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))), + (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>; + +//===----------------------------------------------------------------------===// +// AVX-512 - Aligned and unaligned load and store +// + +multiclass avx512_mov_packed opc, RegisterClass RC, RegisterClass KRC, + X86MemOperand x86memop, PatFrag ld_frag, + string asm, Domain d> { +let neverHasSideEffects = 1 in + def rr : AVX512PI, + EVEX; +let canFoldAsLoad = 1 in + def rm : AVX512PI, EVEX; +let Constraints = "$src1 = $dst" in { + def rrk : AVX512PI, + EVEX, EVEX_K; + def rmk : AVX512PI, EVEX, EVEX_K; +} +} + +defm VMOVAPSZ : avx512_mov_packed<0x28, VR512, VK16WM, f512mem, alignedloadv16f32, + "vmovaps", SSEPackedSingle>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VMOVAPDZ : avx512_mov_packed<0x28, VR512, VK8WM, f512mem, alignedloadv8f64, + "vmovapd", SSEPackedDouble>, + OpSize, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; +defm VMOVUPSZ : avx512_mov_packed<0x10, VR512, VK16WM, f512mem, loadv16f32, + "vmovups", SSEPackedSingle>, + TB, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VMOVUPDZ : avx512_mov_packed<0x10, VR512, VK8WM, f512mem, loadv8f64, + "vmovupd", SSEPackedDouble>, + OpSize, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; +def VMOVAPSZmr : AVX512PI<0x29, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src), + "vmovaps\t{$src, $dst|$dst, $src}", + [(alignedstore512 (v16f32 VR512:$src), addr:$dst)], + SSEPackedSingle>, EVEX, EVEX_V512, TB, + EVEX_CD8<32, CD8VF>; +def VMOVAPDZmr : AVX512PI<0x29, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src), + "vmovapd\t{$src, $dst|$dst, $src}", + [(alignedstore512 (v8f64 VR512:$src), addr:$dst)], + SSEPackedDouble>, EVEX, EVEX_V512, + OpSize, TB, VEX_W, EVEX_CD8<64, CD8VF>; +def VMOVUPSZmr : AVX512PI<0x11, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src), + "vmovups\t{$src, $dst|$dst, $src}", + [(store (v16f32 VR512:$src), addr:$dst)], + SSEPackedSingle>, EVEX, EVEX_V512, TB, + EVEX_CD8<32, CD8VF>; +def VMOVUPDZmr : AVX512PI<0x11, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src), + "vmovupd\t{$src, $dst|$dst, $src}", + [(store (v8f64 VR512:$src), addr:$dst)], + SSEPackedDouble>, EVEX, EVEX_V512, + OpSize, TB, VEX_W, EVEX_CD8<64, CD8VF>; + +// Use vmovaps/vmovups for AVX-512 integer load/store. +// 512-bit load/store +def : Pat<(alignedloadv8i64 addr:$src), + (VMOVAPSZrm addr:$src)>; +def : Pat<(loadv8i64 addr:$src), + (VMOVUPSZrm addr:$src)>; + +def : Pat<(alignedstore512 (v8i64 VR512:$src), addr:$dst), + (VMOVAPSZmr addr:$dst, VR512:$src)>; +def : Pat<(alignedstore512 (v16i32 VR512:$src), addr:$dst), + (VMOVAPSZmr addr:$dst, VR512:$src)>; + +def : Pat<(store (v8i64 VR512:$src), addr:$dst), + (VMOVUPDZmr addr:$dst, VR512:$src)>; +def : Pat<(store (v16i32 VR512:$src), addr:$dst), + (VMOVUPSZmr addr:$dst, VR512:$src)>; + +let neverHasSideEffects = 1 in { + def VMOVDQA32rr : AVX512BI<0x6F, MRMSrcReg, (outs VR512:$dst), + (ins VR512:$src), + "vmovdqa32\t{$src, $dst|$dst, $src}", []>, + EVEX, EVEX_V512; + def VMOVDQA64rr : AVX512BI<0x6F, MRMSrcReg, (outs VR512:$dst), + (ins VR512:$src), + "vmovdqa64\t{$src, $dst|$dst, $src}", []>, + EVEX, EVEX_V512, VEX_W; +let mayStore = 1 in { + def VMOVDQA32mr : AVX512BI<0x7F, MRMDestMem, (outs), + (ins i512mem:$dst, VR512:$src), + "vmovdqa32\t{$src, $dst|$dst, $src}", []>, + EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; + def VMOVDQA64mr : AVX512BI<0x7F, MRMDestMem, (outs), + (ins i512mem:$dst, VR512:$src), + "vmovdqa64\t{$src, $dst|$dst, $src}", []>, + EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +} +let mayLoad = 1 in { +def VMOVDQA32rm : AVX512BI<0x6F, MRMSrcMem, (outs VR512:$dst), + (ins i512mem:$src), + "vmovdqa32\t{$src, $dst|$dst, $src}", []>, + EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; +def VMOVDQA64rm : AVX512BI<0x6F, MRMSrcMem, (outs VR512:$dst), + (ins i512mem:$src), + "vmovdqa64\t{$src, $dst|$dst, $src}", []>, + EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +} +} + +multiclass avx512_mov_int opc, string asm, RegisterClass RC, + RegisterClass KRC, + PatFrag ld_frag, X86MemOperand x86memop> { +let neverHasSideEffects = 1 in + def rr : AVX512XSI, + EVEX; +let canFoldAsLoad = 1 in + def rm : AVX512XSI, + EVEX; +let Constraints = "$src1 = $dst" in { + def rrk : AVX512XSI, + EVEX, EVEX_K; + def rmk : AVX512XSI, EVEX, EVEX_K; +} +} + +defm VMOVDQU32 : avx512_mov_int<0x6F, "vmovdqu32", VR512, VK16WM, memopv16i32, i512mem>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VMOVDQU64 : avx512_mov_int<0x6F, "vmovdqu64", VR512, VK8WM, memopv8i64, i512mem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + +let AddedComplexity = 20 in { +def : Pat<(v16f32 (vselect VK16WM:$mask, (v16f32 VR512:$src1), + (v16f32 VR512:$src2))), + (VMOVUPSZrrk VR512:$src2, VK16WM:$mask, VR512:$src1)>; +def : Pat<(v8f64 (vselect VK8WM:$mask, (v8f64 VR512:$src1), + (v8f64 VR512:$src2))), + (VMOVUPDZrrk VR512:$src2, VK8WM:$mask, VR512:$src1)>; +def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src1), + (v16i32 VR512:$src2))), + (VMOVDQU32rrk VR512:$src2, VK16WM:$mask, VR512:$src1)>; +def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src1), + (v8i64 VR512:$src2))), + (VMOVDQU64rrk VR512:$src2, VK8WM:$mask, VR512:$src1)>; +} +// Move Int Doubleword to Packed Double Int +// +def VMOVDI2PDIZrr : AVX512SI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), + "vmovd{z}\t{$src, $dst|$dst, $src}", + [(set VR128X:$dst, + (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, + EVEX, VEX_LIG; +def VMOVDI2PDIZrm : AVX512SI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), + "vmovd{z}\t{$src, $dst|$dst, $src}", + [(set VR128X:$dst, + (v4i32 (scalar_to_vector (loadi32 addr:$src))))], + IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; +def VMOV64toPQIZrr : AVX512SI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), + "vmovq{z}\t{$src, $dst|$dst, $src}", + [(set VR128X:$dst, + (v2i64 (scalar_to_vector GR64:$src)))], + IIC_SSE_MOVDQ>, EVEX, VEX_W, VEX_LIG; +def VMOV64toSDZrr : AVX512SI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), + "vmovq{z}\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (bitconvert GR64:$src))], + IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>; +def VMOVSDto64Zrr : AVX512SI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), + "vmovq{z}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (bitconvert FR64:$src))], + IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>; +def VMOVSDto64Zmr : AVX512SI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), + "vmovq{z}\t{$src, $dst|$dst, $src}", + [(store (i64 (bitconvert FR64:$src)), addr:$dst)], + IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>, + EVEX_CD8<64, CD8VT1>; + +// Move Int Doubleword to Single Scalar +// +def VMOVDI2SSZrr : AVX512SI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), + "vmovd{z}\t{$src, $dst|$dst, $src}", + [(set FR32X:$dst, (bitconvert GR32:$src))], + IIC_SSE_MOVDQ>, EVEX, VEX_LIG; + +def VMOVDI2SSZrm : AVX512SI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src), + "vmovd{z}\t{$src, $dst|$dst, $src}", + [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))], + IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; + +// Move Packed Doubleword Int to Packed Double Int +// +def VMOVPDI2DIZrr : AVX512SI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), + "vmovd{z}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (vector_extract (v4i32 VR128X:$src), + (iPTR 0)))], IIC_SSE_MOVD_ToGP>, + EVEX, VEX_LIG; +def VMOVPDI2DIZmr : AVX512SI<0x7E, MRMDestMem, (outs), + (ins i32mem:$dst, VR128X:$src), + "vmovd{z}\t{$src, $dst|$dst, $src}", + [(store (i32 (vector_extract (v4i32 VR128X:$src), + (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, + EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; + +// Move Packed Doubleword Int first element to Doubleword Int +// +def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), + "vmovq{z}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), + (iPTR 0)))], + IIC_SSE_MOVD_ToGP>, TB, OpSize, EVEX, VEX_LIG, VEX_W, + Requires<[HasAVX512, In64BitMode]>; + +def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), + (ins i64mem:$dst, VR128X:$src), + "vmovq{z}\t{$src, $dst|$dst, $src}", + [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), + addr:$dst)], IIC_SSE_MOVDQ>, + EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>, + Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>; + +// Move Scalar Single to Double Int +// +def VMOVSS2DIZrr : AVX512SI<0x7E, MRMDestReg, (outs GR32:$dst), + (ins FR32X:$src), + "vmovd{z}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (bitconvert FR32X:$src))], + IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG; +def VMOVSS2DIZmr : AVX512SI<0x7E, MRMDestMem, (outs), + (ins i32mem:$dst, FR32X:$src), + "vmovd{z}\t{$src, $dst|$dst, $src}", + [(store (i32 (bitconvert FR32X:$src)), addr:$dst)], + IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; + +// Move Quadword Int to Packed Quadword Int +// +def VMOVQI2PQIZrm : AVX512SI<0x7E, MRMSrcMem, (outs VR128X:$dst), + (ins i64mem:$src), + "vmovq{z}\t{$src, $dst|$dst, $src}", + [(set VR128X:$dst, + (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, + EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; + +//===----------------------------------------------------------------------===// +// AVX-512 MOVSS, MOVSD +//===----------------------------------------------------------------------===// + +multiclass avx512_move_scalar { + def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128X:$dst, (vt (OpNode VR128X:$src1, + (scalar_to_vector RC:$src2))))], + IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG; + def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>, + EVEX, VEX_LIG; + def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, + EVEX, VEX_LIG; +} + +let ExeDomain = SSEPackedSingle in +defm VMOVSSZ : avx512_move_scalar<"movss{z}", FR32X, X86Movss, v4f32, f32mem, + loadf32>, XS, EVEX_CD8<32, CD8VT1>; + +let ExeDomain = SSEPackedDouble in +defm VMOVSDZ : avx512_move_scalar<"movsd{z}", FR64X, X86Movsd, v2f64, f64mem, + loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>; + + +// For the disassembler +let isCodeGenOnly = 1 in { + def VMOVSSZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst), + (ins VR128X:$src1, FR32X:$src2), + "movss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], + IIC_SSE_MOV_S_RR>, + XS, EVEX_4V, VEX_LIG; + def VMOVSDZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst), + (ins VR128X:$src1, FR64X:$src2), + "movsd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], + IIC_SSE_MOV_S_RR>, + XD, EVEX_4V, VEX_LIG, VEX_W; +} + +let Predicates = [HasAVX512] in { + let AddedComplexity = 15 in { + // Move scalar to XMM zero-extended, zeroing a VR128X then do a + // MOVS{S,D} to the lower bits. + def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32X:$src)))), + (VMOVSSZrr (v4f32 (V_SET0)), FR32X:$src)>; + def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))), + (VMOVSSZrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>; + def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))), + (VMOVSSZrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>; + def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))), + (VMOVSDZrr (v2f64 (V_SET0)), FR64X:$src)>; + + // Move low f32 and clear high bits. + def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))), + (SUBREG_TO_REG (i32 0), + (VMOVSSZrr (v4f32 (V_SET0)), + (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>; + def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))), + (SUBREG_TO_REG (i32 0), + (VMOVSSZrr (v4i32 (V_SET0)), + (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>; + } + + let AddedComplexity = 20 in { + // MOVSSrm zeros the high parts of the register; represent this + // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 + def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), + (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>; + def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), + (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>; + def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), + (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>; + + // MOVSDrm zeros the high parts of the register; represent this + // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 + def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), + (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; + def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), + (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; + def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), + (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; + def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), + (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; + def : Pat<(v2f64 (X86vzload addr:$src)), + (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>; + + // Represent the same patterns above but in the form they appear for + // 256-bit types + def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, + (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))), + (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; + def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, + (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))), + (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; + def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, + (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))), + (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; + } + def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, + (v4f32 (scalar_to_vector FR32X:$src)), (iPTR 0)))), + (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSZrr (v4f32 (V_SET0)), + FR32X:$src)), sub_xmm)>; + def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, + (v2f64 (scalar_to_vector FR64X:$src)), (iPTR 0)))), + (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDZrr (v2f64 (V_SET0)), + FR64X:$src)), sub_xmm)>; + def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, + (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))), + (SUBREG_TO_REG (i64 0), (VMOVSDZrm addr:$src), sub_xmm)>; + + // Move low f64 and clear high bits. + def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))), + (SUBREG_TO_REG (i32 0), + (VMOVSDZrr (v2f64 (V_SET0)), + (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>; + + def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))), + (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (V_SET0)), + (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>; + + // Extract and store. + def : Pat<(store (f32 (vector_extract (v4f32 VR128X:$src), (iPTR 0))), + addr:$dst), + (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>; + def : Pat<(store (f64 (vector_extract (v2f64 VR128X:$src), (iPTR 0))), + addr:$dst), + (VMOVSDZmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X))>; + + // Shuffle with VMOVSS + def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)), + (VMOVSSZrr (v4i32 VR128X:$src1), + (COPY_TO_REGCLASS (v4i32 VR128X:$src2), FR32X))>; + def : Pat<(v4f32 (X86Movss VR128X:$src1, VR128X:$src2)), + (VMOVSSZrr (v4f32 VR128X:$src1), + (COPY_TO_REGCLASS (v4f32 VR128X:$src2), FR32X))>; + + // 256-bit variants + def : Pat<(v8i32 (X86Movss VR256X:$src1, VR256X:$src2)), + (SUBREG_TO_REG (i32 0), + (VMOVSSZrr (EXTRACT_SUBREG (v8i32 VR256X:$src1), sub_xmm), + (EXTRACT_SUBREG (v8i32 VR256X:$src2), sub_xmm)), + sub_xmm)>; + def : Pat<(v8f32 (X86Movss VR256X:$src1, VR256X:$src2)), + (SUBREG_TO_REG (i32 0), + (VMOVSSZrr (EXTRACT_SUBREG (v8f32 VR256X:$src1), sub_xmm), + (EXTRACT_SUBREG (v8f32 VR256X:$src2), sub_xmm)), + sub_xmm)>; + + // Shuffle with VMOVSD + def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)), + (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; + def : Pat<(v2f64 (X86Movsd VR128X:$src1, VR128X:$src2)), + (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; + def : Pat<(v4f32 (X86Movsd VR128X:$src1, VR128X:$src2)), + (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; + def : Pat<(v4i32 (X86Movsd VR128X:$src1, VR128X:$src2)), + (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; + + // 256-bit variants + def : Pat<(v4i64 (X86Movsd VR256X:$src1, VR256X:$src2)), + (SUBREG_TO_REG (i32 0), + (VMOVSDZrr (EXTRACT_SUBREG (v4i64 VR256X:$src1), sub_xmm), + (EXTRACT_SUBREG (v4i64 VR256X:$src2), sub_xmm)), + sub_xmm)>; + def : Pat<(v4f64 (X86Movsd VR256X:$src1, VR256X:$src2)), + (SUBREG_TO_REG (i32 0), + (VMOVSDZrr (EXTRACT_SUBREG (v4f64 VR256X:$src1), sub_xmm), + (EXTRACT_SUBREG (v4f64 VR256X:$src2), sub_xmm)), + sub_xmm)>; + + def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)), + (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; + def : Pat<(v2i64 (X86Movlpd VR128X:$src1, VR128X:$src2)), + (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; + def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)), + (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; + def : Pat<(v4i32 (X86Movlps VR128X:$src1, VR128X:$src2)), + (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; +} + +let AddedComplexity = 15 in +def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), + (ins VR128X:$src), + "vmovq{z}\t{$src, $dst|$dst, $src}", + [(set VR128X:$dst, (v2i64 (X86vzmovl + (v2i64 VR128X:$src))))], + IIC_SSE_MOVQ_RR>, EVEX, VEX_W; + +let AddedComplexity = 20 in +def VMOVZPQILo2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), + (ins i128mem:$src), + "vmovq{z}\t{$src, $dst|$dst, $src}", + [(set VR128X:$dst, (v2i64 (X86vzmovl + (loadv2i64 addr:$src))))], + IIC_SSE_MOVDQ>, EVEX, VEX_W, + EVEX_CD8<8, CD8VT8>; + +let Predicates = [HasAVX512] in { + // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. + let AddedComplexity = 20 in { + def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), + (VMOVDI2PDIZrm addr:$src)>; + + def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), + (VMOVDI2PDIZrm addr:$src)>; + def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), + (VMOVDI2PDIZrm addr:$src)>; + def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), + (VMOVZPQILo2PQIZrm addr:$src)>; + def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))), + (VMOVZPQILo2PQIZrr VR128X:$src)>; + } + // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext. + def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, + (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))), + (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>; + def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, + (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))), + (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>; +} + +def : Pat<(v16i32 (X86Vinsert (v16i32 immAllZerosV), GR32:$src2, (iPTR 0))), + (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>; + +def : Pat<(v8i64 (X86Vinsert (bc_v8i64 (v16i32 immAllZerosV)), GR64:$src2, (iPTR 0))), + (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>; + +def : Pat<(v16i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))), + (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>; + +def : Pat<(v8i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))), + (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>; + +//===----------------------------------------------------------------------===// +// AVX-512 - Integer arithmetic +// +multiclass avx512_binop_rm opc, string OpcodeStr, SDNode OpNode, + ValueType OpVT, RegisterClass RC, PatFrag memop_frag, + X86MemOperand x86memop, PatFrag scalar_mfrag, + X86MemOperand x86scalar_mop, string BrdcstStr, + OpndItins itins, bit IsCommutable = 0> { + let isCommutable = IsCommutable in + def rr : AVX512BI, EVEX_4V; + def rm : AVX512BI, EVEX_4V; + def rmb : AVX512BI, EVEX_4V, EVEX_B; +} +multiclass avx512_binop_rm2 opc, string OpcodeStr, + ValueType DstVT, ValueType SrcVT, RegisterClass RC, + PatFrag memop_frag, X86MemOperand x86memop, + OpndItins itins, + bit IsCommutable = 0> { + let isCommutable = IsCommutable in + def rr : AVX512BI, EVEX_4V, VEX_W; + def rm : AVX512BI, EVEX_4V, VEX_W; +} + +defm VPADDDZ : avx512_binop_rm<0xFE, "vpaddd", add, v16i32, VR512, memopv16i32, + i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + +defm VPSUBDZ : avx512_binop_rm<0xFA, "vpsubd", sub, v16i32, VR512, memopv16i32, + i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 0>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + +defm VPMULLDZ : avx512_binop_rm<0x40, "vpmulld", mul, v16i32, VR512, memopv16i32, + i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>, + T8, EVEX_V512, EVEX_CD8<32, CD8VF>; + +defm VPADDQZ : avx512_binop_rm<0xD4, "vpaddq", add, v8i64, VR512, memopv8i64, + i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 1>, + EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_W; + +defm VPSUBQZ : avx512_binop_rm<0xFB, "vpsubq", sub, v8i64, VR512, memopv8i64, + i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + +defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, + VR512, memopv8i64, i512mem, SSE_INTALU_ITINS_P, 1>, T8, + EVEX_V512, EVEX_CD8<64, CD8VF>; + +defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32, + VR512, memopv8i64, i512mem, SSE_INTMUL_ITINS_P, 1>, EVEX_V512, + EVEX_CD8<64, CD8VF>; + +def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))), + (VPMULUDQZrr VR512:$src1, VR512:$src2)>; + +//===----------------------------------------------------------------------===// +// AVX-512 - Unpack Instructions +//===----------------------------------------------------------------------===// + +multiclass avx512_unpack_fp opc, SDNode OpNode, ValueType vt, + PatFrag mem_frag, RegisterClass RC, + X86MemOperand x86memop, string asm, + Domain d> { + def rr : AVX512PI, EVEX_4V, TB; + def rm : AVX512PI, EVEX_4V, TB; +} + +defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, memopv8f64, + VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, memopv8f64, + VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, memopv8f64, + VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, memopv8f64, + VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + +multiclass avx512_unpack_int opc, string OpcodeStr, SDNode OpNode, + ValueType OpVT, RegisterClass RC, PatFrag memop_frag, + X86MemOperand x86memop> { + def rr : AVX512BI, EVEX_4V; + def rm : AVX512BI, EVEX_4V; +} +defm VPUNPCKLDQZ : avx512_unpack_int<0x62, "vpunpckldq", X86Unpckl, v16i32, + VR512, memopv16i32, i512mem>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPUNPCKLQDQZ : avx512_unpack_int<0x6C, "vpunpcklqdq", X86Unpckl, v8i64, + VR512, memopv8i64, i512mem>, EVEX_V512, + VEX_W, EVEX_CD8<64, CD8VF>; +defm VPUNPCKHDQZ : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32, + VR512, memopv16i32, i512mem>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64, + VR512, memopv8i64, i512mem>, EVEX_V512, + VEX_W, EVEX_CD8<64, CD8VF>; +//===----------------------------------------------------------------------===// +// AVX-512 - PSHUFD +// + +multiclass avx512_pshuf_imm opc, string OpcodeStr, RegisterClass RC, + SDNode OpNode, PatFrag mem_frag, + X86MemOperand x86memop, ValueType OpVT> { + def ri : AVX512Ii8, + EVEX; + def mi : AVX512Ii8, EVEX; +} + +defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, memopv16i32, + i512mem, v16i32>, OpSize, EVEX_V512, EVEX_CD8<32, CD8VF>; + +let ExeDomain = SSEPackedSingle in +defm VPERMILPSZ : avx512_pshuf_imm<0x04, "vpermilps", VR512, X86VPermilp, + memopv16f32, i512mem, v16f32>, OpSize, TA, EVEX_V512, + EVEX_CD8<32, CD8VF>; +let ExeDomain = SSEPackedDouble in +defm VPERMILPDZ : avx512_pshuf_imm<0x05, "vpermilpd", VR512, X86VPermilp, + memopv8f64, i512mem, v8f64>, OpSize, TA, EVEX_V512, + VEX_W, EVEX_CD8<32, CD8VF>; + +def : Pat<(v16i32 (X86VPermilp VR512:$src1, (i8 imm:$imm))), + (VPERMILPSZri VR512:$src1, imm:$imm)>; +def : Pat<(v8i64 (X86VPermilp VR512:$src1, (i8 imm:$imm))), + (VPERMILPDZri VR512:$src1, imm:$imm)>; + +//===----------------------------------------------------------------------===// +// AVX-512 Logical Instructions +//===----------------------------------------------------------------------===// + +defm VPANDDZ : avx512_binop_rm<0xDB, "vpandd", and, v16i32, VR512, memopv16i32, + i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPANDQZ : avx512_binop_rm<0xDB, "vpandq", and, v8i64, VR512, memopv8i64, + i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPORDZ : avx512_binop_rm<0xEB, "vpord", or, v16i32, VR512, memopv16i32, + i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPORQZ : avx512_binop_rm<0xEB, "vporq", or, v8i64, VR512, memopv8i64, + i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPXORDZ : avx512_binop_rm<0xEF, "vpxord", xor, v16i32, VR512, memopv16i32, + i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPXORQZ : avx512_binop_rm<0xEF, "vpxorq", xor, v8i64, VR512, memopv8i64, + i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPANDNDZ : avx512_binop_rm<0xDF, "vpandnd", X86andnp, v16i32, VR512, + memopv16i32, i512mem, loadi32, i32mem, "{1to16}", + SSE_BIT_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPANDNQZ : avx512_binop_rm<0xDF, "vpandnq", X86andnp, v8i64, VR512, memopv8i64, + i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 0>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + +//===----------------------------------------------------------------------===// +// AVX-512 FP arithmetic +//===----------------------------------------------------------------------===// + +multiclass avx512_binop_s opc, string OpcodeStr, SDNode OpNode, + SizeItins itins> { + defm SSZ : sse12_fp_scalar, XS, EVEX_4V, VEX_LIG, + EVEX_CD8<32, CD8VT1>; + defm SDZ : sse12_fp_scalar, XD, VEX_W, EVEX_4V, VEX_LIG, + EVEX_CD8<64, CD8VT1>; +} + +let isCommutable = 1 in { +defm VADD : avx512_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>; +defm VMUL : avx512_binop_s<0x59, "mul", fmul, SSE_ALU_ITINS_S>; +defm VMIN : avx512_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>; +defm VMAX : avx512_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>; +} +let isCommutable = 0 in { +defm VSUB : avx512_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>; +defm VDIV : avx512_binop_s<0x5E, "div", fdiv, SSE_ALU_ITINS_S>; +} + +multiclass avx512_fp_packed opc, string OpcodeStr, SDNode OpNode, + RegisterClass RC, ValueType vt, + X86MemOperand x86memop, PatFrag mem_frag, + X86MemOperand x86scalar_mop, PatFrag scalar_mfrag, + string BrdcstStr, + Domain d, OpndItins itins, bit commutable> { + let isCommutable = commutable in + def rr : PI, + EVEX_4V; + let mayLoad = 1 in { + def rm : PI, EVEX_4V; + def rmb : PI, EVEX_4V, EVEX_B; + } +} + +defm VADDPSZ : avx512_fp_packed<0x58, "addps", fadd, VR512, v16f32, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, + SSE_ALU_ITINS_P.s, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>; + +defm VADDPDZ : avx512_fp_packed<0x58, "addpd", fadd, VR512, v8f64, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, + SSE_ALU_ITINS_P.d, 1>, + EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; + +defm VMULPSZ : avx512_fp_packed<0x59, "mulps", fmul, VR512, v16f32, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, + SSE_ALU_ITINS_P.s, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VMULPDZ : avx512_fp_packed<0x59, "mulpd", fmul, VR512, v8f64, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, + SSE_ALU_ITINS_P.d, 1>, + EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; + +defm VMINPSZ : avx512_fp_packed<0x5D, "minps", X86fmin, VR512, v16f32, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, + SSE_ALU_ITINS_P.s, 1>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VMAXPSZ : avx512_fp_packed<0x5F, "maxps", X86fmax, VR512, v16f32, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, + SSE_ALU_ITINS_P.s, 1>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + +defm VMINPDZ : avx512_fp_packed<0x5D, "minpd", X86fmin, VR512, v8f64, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, + SSE_ALU_ITINS_P.d, 1>, + EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; +defm VMAXPDZ : avx512_fp_packed<0x5F, "maxpd", X86fmax, VR512, v8f64, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, + SSE_ALU_ITINS_P.d, 1>, + EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; + +defm VSUBPSZ : avx512_fp_packed<0x5C, "subps", fsub, VR512, v16f32, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, + SSE_ALU_ITINS_P.s, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VDIVPSZ : avx512_fp_packed<0x5E, "divps", fdiv, VR512, v16f32, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, + SSE_ALU_ITINS_P.s, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>; + +defm VSUBPDZ : avx512_fp_packed<0x5C, "subpd", fsub, VR512, v8f64, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, + SSE_ALU_ITINS_P.d, 0>, + EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; +defm VDIVPDZ : avx512_fp_packed<0x5E, "divpd", fdiv, VR512, v8f64, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, + SSE_ALU_ITINS_P.d, 0>, + EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; + +//===----------------------------------------------------------------------===// +// AVX-512 VPTESTM instructions +//===----------------------------------------------------------------------===// + +multiclass avx512_vptest opc, string OpcodeStr, RegisterClass KRC, + RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag, + SDNode OpNode, ValueType vt> { + def rr : AVX5128I, EVEX_4V; + def rm : AVX5128I, EVEX_4V; +} + +defm VPTESTMDZ : avx512_vptest<0x27, "vptestmd", VK16, VR512, f512mem, + memopv16i32, X86testm, v16i32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem, + memopv8i64, X86testm, v8i64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + +//===----------------------------------------------------------------------===// +// AVX-512 Shift instructions +//===----------------------------------------------------------------------===// +multiclass avx512_shift_rmi opc, Format ImmFormR, Format ImmFormM, + string OpcodeStr, SDNode OpNode, RegisterClass RC, + ValueType vt, X86MemOperand x86memop, PatFrag mem_frag, + RegisterClass KRC> { + def ri : AVX512BIi8, EVEX_4V; + def rik : AVX512BIi8, EVEX_4V, EVEX_K; + def mi: AVX512BIi8, EVEX_4V; + def mik: AVX512BIi8, EVEX_4V, EVEX_K; +} + +multiclass avx512_shift_rrm opc, string OpcodeStr, SDNode OpNode, + RegisterClass RC, ValueType vt, ValueType SrcVT, + PatFrag bc_frag, RegisterClass KRC> { + // src2 is always 128-bit + def rr : AVX512BI, EVEX_4V; + def rrk : AVX512BI, EVEX_4V, EVEX_K; + def rm : AVX512BI, EVEX_4V; + def rmk : AVX512BI, EVEX_4V, EVEX_K; +} + +defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli, + VR512, v16i32, i512mem, memopv16i32, VK16WM>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl, + VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512, + EVEX_CD8<32, CD8VQ>; + +defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli, + VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512, + EVEX_CD8<64, CD8VF>, VEX_W; +defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl, + VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512, + EVEX_CD8<64, CD8VQ>, VEX_W; + +defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli, + VR512, v16i32, i512mem, memopv16i32, VK16WM>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl, + VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512, + EVEX_CD8<32, CD8VQ>; + +defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli, + VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512, + EVEX_CD8<64, CD8VF>, VEX_W; +defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl, + VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512, + EVEX_CD8<64, CD8VQ>, VEX_W; + +defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai, + VR512, v16i32, i512mem, memopv16i32, VK16WM>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra, + VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512, + EVEX_CD8<32, CD8VQ>; + +defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai, + VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512, + EVEX_CD8<64, CD8VF>, VEX_W; +defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra, + VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512, + EVEX_CD8<64, CD8VQ>, VEX_W; + +//===-------------------------------------------------------------------===// +// Variable Bit Shifts +//===-------------------------------------------------------------------===// +multiclass avx512_var_shift opc, string OpcodeStr, SDNode OpNode, + RegisterClass RC, ValueType vt, + X86MemOperand x86memop, PatFrag mem_frag> { + def rr : AVX5128I, + EVEX_4V; + def rm : AVX5128I, + EVEX_4V; +} + +defm VPSLLVDZ : avx512_var_shift<0x47, "vpsllvd", shl, VR512, v16i32, + i512mem, memopv16i32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPSLLVQZ : avx512_var_shift<0x47, "vpsllvq", shl, VR512, v8i64, + i512mem, memopv8i64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; +defm VPSRLVDZ : avx512_var_shift<0x45, "vpsrlvd", srl, VR512, v16i32, + i512mem, memopv16i32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPSRLVQZ : avx512_var_shift<0x45, "vpsrlvq", srl, VR512, v8i64, + i512mem, memopv8i64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; +defm VPSRAVDZ : avx512_var_shift<0x46, "vpsravd", sra, VR512, v16i32, + i512mem, memopv16i32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64, + i512mem, memopv8i64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + +//===----------------------------------------------------------------------===// +// AVX-512 - MOVDDUP +//===----------------------------------------------------------------------===// + +multiclass avx512_movddup { +def rr : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set RC:$dst, (VT (X86Movddup RC:$src)))]>, EVEX; +def rm : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set RC:$dst, + (VT (X86Movddup (memop_frag addr:$src))))]>, EVEX; +} + +defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, memopv8f64>, + VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; +def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))), + (VMOVDDUPZrm addr:$src)>; + +def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst), + (ins VR128X:$src1, VR128X:$src2), + "vmovlhps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))], + IIC_SSE_MOV_LH>, EVEX_4V; +def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst), + (ins VR128X:$src1, VR128X:$src2), + "vmovhlps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))], + IIC_SSE_MOV_LH>, EVEX_4V; + +// MOVLHPS patterns +def : Pat<(v4i32 (X86Movlhps VR128X:$src1, VR128X:$src2)), + (VMOVLHPSZrr VR128X:$src1, VR128X:$src2)>; +def : Pat<(v2i64 (X86Movlhps VR128X:$src1, VR128X:$src2)), + (VMOVLHPSZrr (v2i64 VR128X:$src1), VR128X:$src2)>; + +// MOVHLPS patterns +def : Pat<(v4i32 (X86Movhlps VR128X:$src1, VR128X:$src2)), + (VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>; + +//===----------------------------------------------------------------------===// +// FMA - Fused Multiply Operations +// +let Constraints = "$src1 = $dst" in { +multiclass avx512_fma3p_rm opc, string OpcodeStr, + RegisterClass RC, X86MemOperand x86memop, + PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag, + string BrdcstStr, SDNode OpNode, ValueType OpVT> { + def r: AVX512FMA3; + + let mayLoad = 1 in + def m: AVX512FMA3; + def mb: AVX512FMA3, EVEX_B; +} +} // Constraints = "$src1 = $dst" + +let ExeDomain = SSEPackedSingle in { + defm VFMADD213PSZ : avx512_fma3p_rm<0xA8, "vfmadd213ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fmadd, v16f32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; + defm VFMSUB213PSZ : avx512_fma3p_rm<0xAA, "vfmsub213ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fmsub, v16f32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; + defm VFMADDSUB213PSZ : avx512_fma3p_rm<0xA6, "vfmaddsub213ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fmaddsub, v16f32>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + defm VFMSUBADD213PSZ : avx512_fma3p_rm<0xA7, "vfmsubadd213ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fmsubadd, v16f32>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + defm VFNMADD213PSZ : avx512_fma3p_rm<0xAC, "vfnmadd213ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fnmadd, v16f32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; + defm VFNMSUB213PSZ : avx512_fma3p_rm<0xAE, "vfnmsub213ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fnmsub, v16f32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +} +let ExeDomain = SSEPackedDouble in { + defm VFMADD213PDZ : avx512_fma3p_rm<0xA8, "vfmadd213pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fmadd, v8f64>, EVEX_V512, + VEX_W, EVEX_CD8<64, CD8VF>; + defm VFMSUB213PDZ : avx512_fma3p_rm<0xAA, "vfmsub213pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fmsub, v8f64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + defm VFMADDSUB213PDZ : avx512_fma3p_rm<0xA6, "vfmaddsub213pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fmaddsub, v8f64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + defm VFMSUBADD213PDZ : avx512_fma3p_rm<0xA7, "vfmsubadd213pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fmsubadd, v8f64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + defm VFNMADD213PDZ : avx512_fma3p_rm<0xAC, "vfnmadd213pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fnmadd, v8f64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + defm VFNMSUB213PDZ : avx512_fma3p_rm<0xAE, "vfnmsub213pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fnmsub, v8f64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; +} + +let Constraints = "$src1 = $dst" in { +multiclass avx512_fma3p_m132 opc, string OpcodeStr, + RegisterClass RC, X86MemOperand x86memop, + PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag, + string BrdcstStr, SDNode OpNode, ValueType OpVT> { + let mayLoad = 1 in + def m: AVX512FMA3; + def mb: AVX512FMA3, EVEX_B; +} +} // Constraints = "$src1 = $dst" + + +let ExeDomain = SSEPackedSingle in { + defm VFMADD132PSZ : avx512_fma3p_m132<0x98, "vfmadd132ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fmadd, v16f32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; + defm VFMSUB132PSZ : avx512_fma3p_m132<0x9A, "vfmsub132ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fmsub, v16f32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; + defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fmaddsub, v16f32>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fmsubadd, v16f32>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + defm VFNMADD132PSZ : avx512_fma3p_m132<0x9C, "vfnmadd132ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fnmadd, v16f32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; + defm VFNMSUB132PSZ : avx512_fma3p_m132<0x9E, "vfnmsub132ps", VR512, f512mem, + memopv16f32, f32mem, loadf32, "{1to16}", + X86Fnmsub, v16f32>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +} +let ExeDomain = SSEPackedDouble in { + defm VFMADD132PDZ : avx512_fma3p_m132<0x98, "vfmadd132pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fmadd, v8f64>, EVEX_V512, + VEX_W, EVEX_CD8<64, CD8VF>; + defm VFMSUB132PDZ : avx512_fma3p_m132<0x9A, "vfmsub132pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fmsub, v8f64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fmaddsub, v8f64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fmsubadd, v8f64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fnmadd, v8f64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", VR512, f512mem, + memopv8f64, f64mem, loadf64, "{1to8}", + X86Fnmsub, v8f64>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; +} + +// Scalar FMA +let Constraints = "$src1 = $dst" in { +multiclass avx512_fma3s_rm opc, string OpcodeStr, SDNode OpNode, + RegisterClass RC, ValueType OpVT, + X86MemOperand x86memop, Operand memop, + PatFrag mem_frag> { + let isCommutable = 1 in + def r : AVX512FMA3; + let mayLoad = 1 in + def m : AVX512FMA3; +} + +} // Constraints = "$src1 = $dst" + +defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss{z}", X86Fmadd, FR32X, + f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; +defm VFMADDSDZ : avx512_fma3s_rm<0xA9, "vfmadd213sd{z}", X86Fmadd, FR64X, + f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VFMSUBSSZ : avx512_fma3s_rm<0xAB, "vfmsub213ss{z}", X86Fmsub, FR32X, + f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; +defm VFMSUBSDZ : avx512_fma3s_rm<0xAB, "vfmsub213sd{z}", X86Fmsub, FR64X, + f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VFNMADDSSZ : avx512_fma3s_rm<0xAD, "vfnmadd213ss{z}", X86Fnmadd, FR32X, + f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; +defm VFNMADDSDZ : avx512_fma3s_rm<0xAD, "vfnmadd213sd{z}", X86Fnmadd, FR64X, + f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VFNMSUBSSZ : avx512_fma3s_rm<0xAF, "vfnmsub213ss{z}", X86Fnmsub, FR32X, + f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; +defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd{z}", X86Fnmsub, FR64X, + f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; + +//===----------------------------------------------------------------------===// +// AVX-512 Scalar convert from sign integer to float/double +//===----------------------------------------------------------------------===// + +multiclass avx512_vcvtsi opc, RegisterClass SrcRC, RegisterClass DstRC, + X86MemOperand x86memop, string asm> { +let neverHasSideEffects = 1 in { + def rr : SI, EVEX_4V; + let mayLoad = 1 in + def rm : SI, EVEX_4V; +} // neverHasSideEffects = 1 +} + +defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}{z}">, + XS, VEX_LIG, EVEX_CD8<32, CD8VT1>; +defm VCVTSI2SS64Z : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}{z}">, + XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; +defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}{z}">, + XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; +defm VCVTSI2SD64Z : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}{z}">, + XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; + +def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), + (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; +def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), + (VCVTSI2SS64Zrm (f32 (IMPLICIT_DEF)), addr:$src)>; +def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))), + (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; +def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))), + (VCVTSI2SD64Zrm (f64 (IMPLICIT_DEF)), addr:$src)>; + +def : Pat<(f32 (sint_to_fp GR32:$src)), + (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; +def : Pat<(f32 (sint_to_fp GR64:$src)), + (VCVTSI2SS64Zrr (f32 (IMPLICIT_DEF)), GR64:$src)>; +def : Pat<(f64 (sint_to_fp GR32:$src)), + (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; +def : Pat<(f64 (sint_to_fp GR64:$src)), + (VCVTSI2SD64Zrr (f64 (IMPLICIT_DEF)), GR64:$src)>; + + +//===----------------------------------------------------------------------===// +// AVX-512 Convert form float to double and back +//===----------------------------------------------------------------------===// +let neverHasSideEffects = 1 in { +def VCVTSS2SDZrr : AVX512XSI<0x5A, MRMSrcReg, (outs FR64X:$dst), + (ins FR32X:$src1, FR32X:$src2), + "vcvtss2sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>; +let mayLoad = 1 in +def VCVTSS2SDZrm : AVX512XSI<0x5A, MRMSrcMem, (outs FR64X:$dst), + (ins FR32X:$src1, f32mem:$src2), + "vcvtss2sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>, + EVEX_CD8<32, CD8VT1>; + +// Convert scalar double to scalar single +def VCVTSD2SSZrr : AVX512XDI<0x5A, MRMSrcReg, (outs FR32X:$dst), + (ins FR64X:$src1, FR64X:$src2), + "vcvtsd2ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2F]>; +let mayLoad = 1 in +def VCVTSD2SSZrm : AVX512XDI<0x5A, MRMSrcMem, (outs FR32X:$dst), + (ins FR64X:$src1, f64mem:$src2), + "vcvtsd2ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, EVEX_4V, VEX_LIG, VEX_W, + Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<64, CD8VT1>; +} + +def : Pat<(f64 (fextend FR32X:$src)), (VCVTSS2SDZrr FR32X:$src, FR32X:$src)>, + Requires<[HasAVX512]>; +def : Pat<(fextend (loadf32 addr:$src)), + (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512]>; + +def : Pat<(extloadf32 addr:$src), + (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, + Requires<[HasAVX512, OptForSize]>; + +def : Pat<(extloadf32 addr:$src), + (VCVTSS2SDZrr (f32 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>, + Requires<[HasAVX512, OptForSpeed]>; + +def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>, + Requires<[HasAVX512]>; + +multiclass avx512_vcvt_fp opc, string asm, RegisterClass SrcRC, + RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag, + X86MemOperand x86memop, ValueType OpVT, ValueType InVT, + Domain d> { +let neverHasSideEffects = 1 in { + def rr : AVX512PI, EVEX; + let mayLoad = 1 in + def rm : AVX512PI, EVEX; +} // neverHasSideEffects = 1 +} + +defm VCVTPD2PSZ : avx512_vcvt_fp<0x5A, "vcvtpd2ps", VR512, VR256X, fround, + memopv8f64, f512mem, v8f32, v8f64, + SSEPackedSingle>, EVEX_V512, VEX_W, OpSize, + EVEX_CD8<64, CD8VF>; + +defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend, + memopv4f64, f256mem, v8f64, v8f32, + SSEPackedDouble>, EVEX_V512, EVEX_CD8<32, CD8VH>; +def : Pat<(v8f64 (extloadv8f32 addr:$src)), + (VCVTPS2PDZrm addr:$src)>; + +//===----------------------------------------------------------------------===// +// AVX-512 Vector convert from sign integer to float/double +//===----------------------------------------------------------------------===// + +defm VCVTDQ2PSZ : avx512_vcvt_fp<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp, + memopv8i64, i512mem, v16f32, v16i32, + SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>; + +defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp, + memopv4i64, i256mem, v8f64, v8i32, + SSEPackedDouble>, EVEX_V512, XS, + EVEX_CD8<32, CD8VH>; + +defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint, + memopv16f32, f512mem, v16i32, v16f32, + SSEPackedSingle>, EVEX_V512, XS, + EVEX_CD8<32, CD8VF>; + +defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint, + memopv8f64, f512mem, v8i32, v8f64, + SSEPackedDouble>, EVEX_V512, OpSize, VEX_W, + EVEX_CD8<64, CD8VF>; + +defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint, + memopv16f32, f512mem, v16i32, v16f32, + SSEPackedSingle>, EVEX_V512, + EVEX_CD8<32, CD8VF>; + +defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint, + memopv8f64, f512mem, v8i32, v8f64, + SSEPackedDouble>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + +defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp, + memopv4i64, f256mem, v8f64, v8i32, + SSEPackedDouble>, EVEX_V512, XS, + EVEX_CD8<32, CD8VH>; + +defm VCVTUDQ2PSZ : avx512_vcvt_fp<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp, + memopv16i32, f512mem, v16f32, v16i32, + SSEPackedSingle>, EVEX_V512, XD, + EVEX_CD8<32, CD8VF>; + +def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))), + (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr + (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; + + +def : Pat<(int_x86_avx512_cvtdq2_ps_512 VR512:$src), + (VCVTDQ2PSZrr VR512:$src)>; +def : Pat<(int_x86_avx512_cvtdq2_ps_512 (bitconvert (memopv8i64 addr:$src))), + (VCVTDQ2PSZrm addr:$src)>; + +def VCVTPS2DQZrr : AVX512BI<0x5B, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src), + "vcvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR512:$dst, + (int_x86_avx512_cvt_ps2dq_512 VR512:$src))], + IIC_SSE_CVT_PS_RR>, EVEX, EVEX_V512; +def VCVTPS2DQZrm : AVX512BI<0x5B, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src), + "vcvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR512:$dst, + (int_x86_avx512_cvt_ps2dq_512 (memopv16f32 addr:$src)))], + IIC_SSE_CVT_PS_RM>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; + + +let Predicates = [HasAVX512] in { + def : Pat<(v8f32 (fround (loadv8f64 addr:$src))), + (VCVTPD2PSZrm addr:$src)>; + def : Pat<(v8f64 (extloadv8f32 addr:$src)), + (VCVTPS2PDZrm addr:$src)>; +} + +let Defs = [EFLAGS], Predicates = [HasAVX512] in { + defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32, + "ucomiss{z}">, TB, EVEX, VEX_LIG, + EVEX_CD8<32, CD8VT1>; + defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64, + "ucomisd{z}">, TB, OpSize, EVEX, + VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; + let Pattern = [] in { + defm VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, undef, v4f32, f128mem, load, + "comiss{z}">, TB, EVEX, VEX_LIG, + EVEX_CD8<32, CD8VT1>; + defm VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, undef, v2f64, f128mem, load, + "comisd{z}">, TB, OpSize, EVEX, + VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; + } + defm Int_VUCOMISSZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem, + load, "ucomiss">, TB, EVEX, VEX_LIG, + EVEX_CD8<32, CD8VT1>; + defm Int_VUCOMISDZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem, + load, "ucomisd">, TB, OpSize, EVEX, + VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; + + defm Int_VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem, + load, "comiss">, TB, EVEX, VEX_LIG, + EVEX_CD8<32, CD8VT1>; + defm Int_VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem, + load, "comisd">, TB, OpSize, EVEX, + VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; +} + +/// avx512_unop_p - AVX-512 unops in packed form. +multiclass avx512_fp_unop_p opc, string OpcodeStr, SDNode OpNode> { + def PSZr : AVX5128I, + EVEX, EVEX_V512; + def PSZm : AVX5128I, + EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; + def PDZr : AVX5128I, + EVEX, EVEX_V512, VEX_W; + def PDZm : AVX5128I, + EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +} + +/// avx512_fp_unop_p_int - AVX-512 intrinsics unops in packed forms. +multiclass avx512_fp_unop_p_int opc, string OpcodeStr, + Intrinsic V16F32Int, Intrinsic V8F64Int> { + def PSZr_Int : AVX5128I, + EVEX, EVEX_V512; + def PSZm_Int : AVX5128I, EVEX, + EVEX_V512, EVEX_CD8<32, CD8VF>; + def PDZr_Int : AVX5128I, + EVEX, EVEX_V512, VEX_W; + def PDZm_Int : AVX5128I, + EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +} + +/// avx512_fp_unop_s - AVX-512 unops in scalar form. +multiclass avx512_fp_unop_s opc, string OpcodeStr, + Intrinsic F32Int, Intrinsic F64Int> { + let hasSideEffects = 0 in { + def SSZr : AVX5128I, EVEX_4V; + let mayLoad = 1 in { + def SSZm : AVX5128I, EVEX_4V, EVEX_CD8<32, CD8VT1>; + def SSZm_Int : AVX5128I, + EVEX_4V, EVEX_CD8<32, CD8VT1>; + } + def SDZr : AVX5128I, + EVEX_4V, VEX_W; + let mayLoad = 1 in { + def SDZm : AVX5128I, + EVEX_4V, VEX_W, EVEX_CD8<32, CD8VT1>; + def SDZm_Int : AVX5128I, + EVEX_4V, VEX_W, EVEX_CD8<32, CD8VT1>; + } +} +} + +defm VRCP14 : avx512_fp_unop_s<0x4D, "vrcp14", int_x86_avx512_rcp14_ss, + int_x86_avx512_rcp14_sd>, + avx512_fp_unop_p<0x4C, "vrcp14", X86frcp>, + avx512_fp_unop_p_int<0x4C, "vrcp14", + int_x86_avx512_rcp14_ps_512, int_x86_avx512_rcp14_pd_512>; + +defm VRSQRT14 : avx512_fp_unop_s<0x4F, "vrsqrt14", int_x86_avx512_rsqrt14_ss, + int_x86_avx512_rsqrt14_sd>, + avx512_fp_unop_p<0x4E, "vrsqrt14", X86frsqrt>, + avx512_fp_unop_p_int<0x4E, "vrsqrt14", + int_x86_avx512_rsqrt14_ps_512, int_x86_avx512_rsqrt14_pd_512>; + +multiclass avx512_sqrt_packed opc, string OpcodeStr, SDNode OpNode, + Intrinsic V16F32Int, Intrinsic V8F64Int, + OpndItins itins_s, OpndItins itins_d> { + def PSZrr :AVX512PSI, + EVEX, EVEX_V512; + + let mayLoad = 1 in + def PSZrm : AVX512PSI, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; + + def PDZrr : AVX512PDI, + EVEX, EVEX_V512; + + let mayLoad = 1 in + def PDZrm : AVX512PDI, EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>; + + def PSZr_Int : AVX512PSI, + EVEX, EVEX_V512; + def PSZm_Int : AVX512PSI, EVEX, + EVEX_V512, EVEX_CD8<32, CD8VF>; + def PDZr_Int : AVX512PDI, + EVEX, EVEX_V512, VEX_W; + def PDZm_Int : AVX512PDI, + EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +} + +multiclass avx512_sqrt_scalar opc, string OpcodeStr, + Intrinsic F32Int, Intrinsic F64Int, + OpndItins itins_s, OpndItins itins_d> { + def SSZr : SI, XS, EVEX_4V; + def SSZr_Int : SIi8, XS, EVEX_4V; + let mayLoad = 1 in { + def SSZm : SI, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>; + def SSZm_Int : SIi8, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>; + } + def SDZr : SI, + XD, EVEX_4V, VEX_W; + def SDZr_Int : SIi8, XD, EVEX_4V, VEX_W; + let mayLoad = 1 in { + def SDZm : SI, + XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>; + def SDZm_Int : SIi8, + XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>; + } +} + + +defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt", + int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd, + SSE_SQRTSS, SSE_SQRTSD>, + avx512_sqrt_packed<0x51, "vsqrt", fsqrt, + int_x86_avx512_sqrt_ps_512, int_x86_avx512_sqrt_pd_512, + SSE_SQRTPS, SSE_SQRTPD>; + +def : Pat<(f32 (fsqrt FR32X:$src)), + (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>; +def : Pat<(f32 (fsqrt (load addr:$src))), + (VSQRTSSZm (f32 (IMPLICIT_DEF)), addr:$src)>, + Requires<[OptForSize]>; +def : Pat<(f64 (fsqrt FR64X:$src)), + (VSQRTSDZr (f64 (IMPLICIT_DEF)), FR64X:$src)>; +def : Pat<(f64 (fsqrt (load addr:$src))), + (VSQRTSDZm (f64 (IMPLICIT_DEF)), addr:$src)>, + Requires<[OptForSize]>; + +def : Pat<(f32 (X86frsqrt FR32X:$src)), + (VRSQRT14SSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>; +def : Pat<(f32 (X86frsqrt (load addr:$src))), + (VRSQRT14SSZm (f32 (IMPLICIT_DEF)), addr:$src)>, + Requires<[OptForSize]>; + +def : Pat<(f32 (X86frcp FR32X:$src)), + (VRCP14SSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>; +def : Pat<(f32 (X86frcp (load addr:$src))), + (VRCP14SSZm (f32 (IMPLICIT_DEF)), addr:$src)>, + Requires<[OptForSize]>; + +multiclass avx512_fp_unop_rm opcps, bits<8> opcpd, string OpcodeStr, + X86MemOperand x86memop, RegisterClass RC, + PatFrag mem_frag32, PatFrag mem_frag64, + Intrinsic V4F32Int, Intrinsic V2F64Int, + CD8VForm VForm> { +let ExeDomain = SSEPackedSingle in { + // Intrinsic operation, reg. + // Vector intrinsic operation, reg + def PSr : AVX512AIi8; + + // Vector intrinsic operation, mem + def PSm : AVX512AIi8, + EVEX_CD8<32, VForm>; +} // ExeDomain = SSEPackedSingle + +let ExeDomain = SSEPackedDouble in { + // Vector intrinsic operation, reg + def PDr : AVX512AIi8; + + // Vector intrinsic operation, mem + def PDm : AVX512AIi8, + EVEX_CD8<64, VForm>; +} // ExeDomain = SSEPackedDouble +} + +multiclass avx512_fp_binop_rm opcss, bits<8> opcsd, + string OpcodeStr, + Intrinsic F32Int, + Intrinsic F64Int> { +let ExeDomain = GenericDomain in { + // Operation, reg. + let hasSideEffects = 0 in + def SSr : AVX512AIi8; + + // Intrinsic operation, reg. + def SSr_Int : AVX512AIi8; + + // Intrinsic operation, mem. + def SSm : AVX512AIi8, + EVEX_CD8<32, CD8VT1>; + + // Operation, reg. + let hasSideEffects = 0 in + def SDr : AVX512AIi8, VEX_W; + + // Intrinsic operation, reg. + def SDr_Int : AVX512AIi8, + VEX_W; + + // Intrinsic operation, mem. + def SDm : AVX512AIi8, + VEX_W, EVEX_CD8<64, CD8VT1>; +} // ExeDomain = GenericDomain +} + +let Predicates = [HasAVX512] in { + defm VRNDSCALE : avx512_fp_binop_rm<0x0A, 0x0B, "vrndscale", + int_x86_avx512_rndscale_ss, + int_x86_avx512_rndscale_sd>, EVEX_4V; + + defm VRNDSCALEZ : avx512_fp_unop_rm<0x08, 0x09, "vrndscale", f256mem, VR512, + memopv16f32, memopv8f64, + int_x86_avx512_rndscale_ps_512, + int_x86_avx512_rndscale_pd_512, CD8VF>, + EVEX, EVEX_V512; +} + +def : Pat<(ffloor FR32X:$src), + (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x1))>; +def : Pat<(f64 (ffloor FR64X:$src)), + (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x1))>; +def : Pat<(f32 (fnearbyint FR32X:$src)), + (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0xC))>; +def : Pat<(f64 (fnearbyint FR64X:$src)), + (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0xC))>; +def : Pat<(f32 (fceil FR32X:$src)), + (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x2))>; +def : Pat<(f64 (fceil FR64X:$src)), + (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x2))>; +def : Pat<(f32 (frint FR32X:$src)), + (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x4))>; +def : Pat<(f64 (frint FR64X:$src)), + (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x4))>; +def : Pat<(f32 (ftrunc FR32X:$src)), + (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x3))>; +def : Pat<(f64 (ftrunc FR64X:$src)), + (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x3))>; + +def : Pat<(v16f32 (ffloor VR512:$src)), + (VRNDSCALEZPSr VR512:$src, (i32 0x1))>; +def : Pat<(v16f32 (fnearbyint VR512:$src)), + (VRNDSCALEZPSr VR512:$src, (i32 0xC))>; +def : Pat<(v16f32 (fceil VR512:$src)), + (VRNDSCALEZPSr VR512:$src, (i32 0x2))>; +def : Pat<(v16f32 (frint VR512:$src)), + (VRNDSCALEZPSr VR512:$src, (i32 0x4))>; +def : Pat<(v16f32 (ftrunc VR512:$src)), + (VRNDSCALEZPSr VR512:$src, (i32 0x3))>; + +def : Pat<(v8f64 (ffloor VR512:$src)), + (VRNDSCALEZPDr VR512:$src, (i32 0x1))>; +def : Pat<(v8f64 (fnearbyint VR512:$src)), + (VRNDSCALEZPDr VR512:$src, (i32 0xC))>; +def : Pat<(v8f64 (fceil VR512:$src)), + (VRNDSCALEZPDr VR512:$src, (i32 0x2))>; +def : Pat<(v8f64 (frint VR512:$src)), + (VRNDSCALEZPDr VR512:$src, (i32 0x4))>; +def : Pat<(v8f64 (ftrunc VR512:$src)), + (VRNDSCALEZPDr VR512:$src, (i32 0x3))>; + +//------------------------------------------------- +// Integer truncate and extend operations +//------------------------------------------------- + +multiclass avx512_trunc_sat opc, string OpcodeStr, + RegisterClass dstRC, RegisterClass srcRC, + RegisterClass KRC, X86MemOperand x86memop> { + def rr : AVX512XS8I, EVEX; + + def krr : AVX512XS8I, EVEX, EVEX_KZ; + + def mr : AVX512XS8I, EVEX; +} +defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM, + i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; +defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM, + i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; +defm VPMOVUSQB : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM, + i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; +defm VPMOVQW : avx512_trunc_sat<0x34, "vpmovqw", VR128X, VR512, VK8WM, + i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; +defm VPMOVSQW : avx512_trunc_sat<0x24, "vpmovsqw", VR128X, VR512, VK8WM, + i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; +defm VPMOVUSQW : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM, + i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; +defm VPMOVQD : avx512_trunc_sat<0x35, "vpmovqd", VR256X, VR512, VK8WM, + i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; +defm VPMOVSQD : avx512_trunc_sat<0x25, "vpmovsqd", VR256X, VR512, VK8WM, + i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; +defm VPMOVUSQD : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM, + i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; +defm VPMOVDW : avx512_trunc_sat<0x33, "vpmovdw", VR256X, VR512, VK16WM, + i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; +defm VPMOVSDW : avx512_trunc_sat<0x23, "vpmovsdw", VR256X, VR512, VK16WM, + i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; +defm VPMOVUSDW : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM, + i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; +defm VPMOVDB : avx512_trunc_sat<0x31, "vpmovdb", VR128X, VR512, VK16WM, + i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; +defm VPMOVSDB : avx512_trunc_sat<0x21, "vpmovsdb", VR128X, VR512, VK16WM, + i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; +defm VPMOVUSDB : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM, + i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; + +def : Pat<(v16i8 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQBrr VR512:$src)>; +def : Pat<(v8i16 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQWrr VR512:$src)>; +def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr VR512:$src)>; +def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>; +def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>; + +def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))), + (VPMOVDBkrr VK16WM:$mask, VR512:$src)>; +def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))), + (VPMOVDWkrr VK16WM:$mask, VR512:$src)>; +def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))), + (VPMOVQWkrr VK8WM:$mask, VR512:$src)>; +def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))), + (VPMOVQDkrr VK8WM:$mask, VR512:$src)>; + + +multiclass avx512_extend opc, string OpcodeStr, RegisterClass DstRC, + RegisterClass SrcRC, SDNode OpNode, PatFrag mem_frag, + X86MemOperand x86memop, ValueType OpVT, ValueType InVT> { + + def rr : AVX5128I, EVEX; + def rm : AVX5128I, + EVEX; +} + +defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VR512, VR128X, X86vzext, + memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512, + EVEX_CD8<8, CD8VQ>; +defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VR512, VR128X, X86vzext, + memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512, + EVEX_CD8<8, CD8VO>; +defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VR512, VR256X, X86vzext, + memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512, + EVEX_CD8<16, CD8VH>; +defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VR512, VR128X, X86vzext, + memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512, + EVEX_CD8<16, CD8VQ>; +defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VR512, VR256X, X86vzext, + memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512, + EVEX_CD8<32, CD8VH>; + +defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VR512, VR128X, X86vsext, + memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512, + EVEX_CD8<8, CD8VQ>; +defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VR512, VR128X, X86vsext, + memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512, + EVEX_CD8<8, CD8VO>; +defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VR512, VR256X, X86vsext, + memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512, + EVEX_CD8<16, CD8VH>; +defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VR512, VR128X, X86vsext, + memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512, + EVEX_CD8<16, CD8VQ>; +defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VR512, VR256X, X86vsext, + memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512, + EVEX_CD8<32, CD8VH>; + +//===----------------------------------------------------------------------===// +// GATHER - SCATTER Operations + +multiclass avx512_gather opc, string OpcodeStr, RegisterClass KRC, + RegisterClass RC, X86MemOperand memop> { +let mayLoad = 1, + Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in + def rm : AVX5128I, EVEX, EVEX_K; +} +defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", VK8WM, VR512, vy64xmem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", VK16WM, VR512, vz32mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", VK8WM, VR512, vz64mem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512, vy64xmem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", VK8WM, VR512, vz64mem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", VK8WM, VR256X, vz64mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +multiclass avx512_scatter opc, string OpcodeStr, RegisterClass KRC, + RegisterClass RC, X86MemOperand memop> { +let mayStore = 1, Constraints = "$mask = $mask_wb" in + def mr : AVX5128I, EVEX, EVEX_K; +} + +defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", VK8WM, VR512, vy64xmem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", VK16WM, VR512, vz32mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", VK8WM, VR512, vz64mem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", VK8WM, VR256X, vz64mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", VK8WM, VR512, vy64xmem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", VK16WM, VR512, vz32mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", VK8WM, VR512, vz64mem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", VK8WM, VR256X, vz64mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +//===----------------------------------------------------------------------===// +// VSHUFPS - VSHUFPD Operations + +multiclass avx512_shufp { + def rmi : AVX512PIi8<0xC6, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, x86memop:$src2, i8imm:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2), + (i8 imm:$src3))))], d, IIC_SSE_SHUFP>, + EVEX_4V, TB, Sched<[WriteShuffleLd, ReadAfterLd]>; + def rri : AVX512PIi8<0xC6, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2, i8imm:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, + (i8 imm:$src3))))], d, IIC_SSE_SHUFP>, + EVEX_4V, TB, Sched<[WriteShuffle]>; +} + +defm VSHUFPSZ : avx512_shufp, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VSHUFPDZ : avx512_shufp, OpSize, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; + + +multiclass avx512_alignr { + def rri : AVX512AIi8<0x03, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2, i8imm:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, EVEX_4V; + def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, x86memop:$src2, i8imm:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, EVEX_4V; +} +defm VALIGND : avx512_alignr<"valignd", VR512, i512mem>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VALIGNQ : avx512_alignr<"valignq", VR512, i512mem>, + VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; + +def : Pat<(v16f32 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))), + (VALIGNDrri VR512:$src2, VR512:$src1, imm:$imm)>; +def : Pat<(v8f64 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))), + (VALIGNQrri VR512:$src2, VR512:$src1, imm:$imm)>; +def : Pat<(v16i32 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))), + (VALIGNDrri VR512:$src2, VR512:$src1, imm:$imm)>; +def : Pat<(v8i64 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))), + (VALIGNQrri VR512:$src2, VR512:$src1, imm:$imm)>; + +multiclass avx512_vpabs opc, string OpcodeStr, RegisterClass RC, + X86MemOperand x86memop> { + def rr : AVX5128I, + EVEX; + def rm : AVX5128I, + EVEX; +} + +defm VPABSD : avx512_vpabs<0x1E, "vpabsd", VR512, i512mem>, EVEX_V512, + EVEX_CD8<32, CD8VF>; +defm VPABSQ : avx512_vpabs<0x1F, "vpabsq", VR512, i512mem>, EVEX_V512, VEX_W, + EVEX_CD8<64, CD8VF>; + -- cgit v1.1 From dc0de80f24a83336cb26dcb9ed1fa030142a504d Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Tue, 17 Sep 2013 09:54:57 +0000 Subject: [ARM] Fix the deprecation of MCR encodings that map to CP15{ISB,DSB,DMB}. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190862 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrThumb2.td | 3 ++- lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp | 33 +++++++++++++++++++------ 2 files changed, 27 insertions(+), 9 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 83edf16..666d980 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -4007,7 +4007,8 @@ def t2MCR : t2MovRCopro<0b1110, "mcr", 0, (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, - imm:$CRm, imm:$opc2)]>; + imm:$CRm, imm:$opc2)]>, + ComplexDeprecationPredicate<"MCR">; def : t2InstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm", (t2MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, 0, pred:$p)>; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index ea5d7ba..64001b4 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -33,15 +33,32 @@ using namespace llvm; static bool getMCRDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, std::string &Info) { - // Checks for the deprecated CP15ISB encoding: - // mcr pX, #0, rX, c7, c5, #4 - if (STI.getFeatureBits() & llvm::ARM::HasV8Ops && + if (STI.getFeatureBits() & llvm::ARM::HasV7Ops && + (MI.getOperand(0).isImm() && MI.getOperand(0).getImm() == 15) && (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) && - (MI.getOperand(3).isImm() && MI.getOperand(3).getImm() == 7) && - (MI.getOperand(4).isImm() && MI.getOperand(4).getImm() == 5) && - (MI.getOperand(5).isImm() && MI.getOperand(5).getImm() == 4)) { - Info = "deprecated on armv8"; - return true; + // Checks for the deprecated CP15ISB encoding: + // mcr p15, #0, rX, c7, c5, #4 + (MI.getOperand(3).isImm() && MI.getOperand(3).getImm() == 7)) { + if ((MI.getOperand(5).isImm() && MI.getOperand(5).getImm() == 4)) { + if (MI.getOperand(4).isImm() && MI.getOperand(4).getImm() == 5) { + Info = "deprecated since v7, use 'isb'"; + return true; + } + + // Checks for the deprecated CP15DSB encoding: + // mcr p15, #0, rX, c7, c10, #4 + if (MI.getOperand(4).isImm() && MI.getOperand(4).getImm() == 10) { + Info = "deprecated since v7, use 'dsb'"; + return true; + } + } + // Checks for the deprecated CP15DMB encoding: + // mcr p15, #0, rX, c7, c10, #5 + if (MI.getOperand(4).isImm() && MI.getOperand(4).getImm() == 10 && + (MI.getOperand(5).isImm() && MI.getOperand(5).getImm() == 5)) { + Info = "deprecated since v7, use 'dmb'"; + return true; + } } return false; } -- cgit v1.1 From 215585920ff264a46cd41a7c5b4aeb8ce17daa90 Mon Sep 17 00:00:00 2001 From: Ben Langmuir Date: Tue, 17 Sep 2013 13:44:39 +0000 Subject: Add llvm.x86.* intrinsics for Intel SHA Extensions Add llvm.x86.* intrinsics for all of the Intel SHA Extensions instructions, as well as tests. Also remove mayLoad and hasSideEffects, which can be inferred from the instruction patterns. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190864 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 5aa5be6..2b271e9 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7395,37 +7395,49 @@ let Constraints = "$src1 = $dst" in { // SHA-NI Instructions //===----------------------------------------------------------------------===// -multiclass SHAI_binop Opc, string OpcodeStr> { +multiclass SHAI_binop Opc, string OpcodeStr, Intrinsic IntId, + bit UsesXMM0 = 0> { def rr : I, T8; + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + [!if(UsesXMM0, + (set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)), + (set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))]>, T8; - let mayLoad = 1 in def rm : I, T8; + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + [!if(UsesXMM0, + (set VR128:$dst, (IntId VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)), XMM0)), + (set VR128:$dst, (IntId VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)))))]>, T8; } -let Constraints = "$src1 = $dst", hasSideEffects = 0, Predicates = [HasSHA] in { +let Constraints = "$src1 = $dst", Predicates = [HasSHA] in { def SHA1RNDS4rri : Ii8<0xCC, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", - []>, TA; - let mayLoad = 1 in + [(set VR128:$dst, + (int_x86_sha1rnds4 VR128:$src1, VR128:$src2, + (i8 imm:$src3)))]>, TA; def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", - []>, TA; + [(set VR128:$dst, + (int_x86_sha1rnds4 VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)), + (i8 imm:$src3)))]>, TA; - defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte">; - defm SHA1MSG1 : SHAI_binop<0xC9, "sha1msg1">; - defm SHA1MSG2 : SHAI_binop<0xCA, "sha1msg2">; + defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte", int_x86_sha1nexte>; + defm SHA1MSG1 : SHAI_binop<0xC9, "sha1msg1", int_x86_sha1msg1>; + defm SHA1MSG2 : SHAI_binop<0xCA, "sha1msg2", int_x86_sha1msg2>; let Uses=[XMM0] in - defm SHA256RNDS2 : SHAI_binop<0xCB, "sha256rnds2">; + defm SHA256RNDS2 : SHAI_binop<0xCB, "sha256rnds2", int_x86_sha256rnds2, 1>; - defm SHA256MSG1 : SHAI_binop<0xCC, "sha256msg1">; - defm SHA256MSG2 : SHAI_binop<0xCD, "sha256msg2">; + defm SHA256MSG1 : SHAI_binop<0xCC, "sha256msg1", int_x86_sha256msg1>; + defm SHA256MSG2 : SHAI_binop<0xCD, "sha256msg2", int_x86_sha256msg2>; } // Aliases with explicit %xmm0 -- cgit v1.1 From 2ff37c701ebb4e4d593df085fe8bb60f2d6d340a Mon Sep 17 00:00:00 2001 From: Preston Gurd Date: Tue, 17 Sep 2013 16:53:36 +0000 Subject: Remove unused code, which had been commented out. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190869 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86.td | 5 ----- 1 file changed, 5 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index a045c35..82716f8 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -173,11 +173,6 @@ def ProcIntelSLM : SubtargetFeature<"slm", "X86ProcFamily", "IntelSLM", class Proc Features> : ProcessorModel; -//class AtomProc Features> -// : ProcessorModel; -//class SLMProc Features> -// : ProcessorModel; - def : Proc<"generic", []>; def : Proc<"i386", []>; def : Proc<"i486", []>; -- cgit v1.1 From 5bd1dfa2b5deab10a1a7a5a4ba117c28d878595d Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Tue, 17 Sep 2013 20:03:25 +0000 Subject: [PowerPC] Fix problems with large code model (PR17169). Large code model on PPC64 requires creating and referencing TOC entries when using the addis/ld form of addressing. This was not being done in all cases. The changes in this patch to PPCAsmPrinter::EmitInstruction() fix this. Two test cases are also modified to reflect this requirement. Fast-isel was not creating correct code for loading floating-point constants using large code model. This also requires the addis/ld form of addressing. Previously we were using the addis/lfd shortcut which is only applicable to medium code model. One test case is modified to reflect this requirement. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190882 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCAsmPrinter.cpp | 13 +++++++++---- lib/Target/PowerPC/PPCFastISel.cpp | 17 +++++++++++++---- 2 files changed, 22 insertions(+), 8 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index e4a631a..f4fd138 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -413,7 +413,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { else if (MO.isJTI()) MOSymbol = GetJTISymbol(MO.getIndex()); - if (IsExternal || IsFunction || IsCommon || IsAvailExt || MO.isJTI()) + if (IsExternal || IsFunction || IsCommon || IsAvailExt || MO.isJTI() || + TM.getCodeModel() == CodeModel::Large) MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); const MCExpr *Exp = @@ -438,8 +439,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (MO.isJTI()) MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex())); - else if (MO.isCPI()) + else if (MO.isCPI()) { MOSymbol = GetCPISymbol(MO.getIndex()); + if (TM.getCodeModel() == CodeModel::Large) + MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); + } else if (MO.isGlobal()) { const GlobalValue *GValue = MO.getGlobal(); const GlobalAlias *GAlias = dyn_cast(GValue); @@ -449,7 +453,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalVariable *GVar = dyn_cast(RealGValue); if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() || - RealGValue->hasAvailableExternallyLinkage()) + RealGValue->hasAvailableExternallyLinkage() || + TM.getCodeModel() == CodeModel::Large) MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); } @@ -486,7 +491,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } else if (MO.isCPI()) MOSymbol = GetCPISymbol(MO.getIndex()); - if (IsFunction || IsExternal) + if (IsFunction || IsExternal || TM.getCodeModel() == CodeModel::Large) MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); const MCExpr *Exp = diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 09cc21b..4f8e6c1 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -1821,10 +1821,19 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) { // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)). BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDIStocHA), TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) - .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO) - .addReg(TmpReg) - .addMemOperand(MMO); + // But for large code model, we must generate a LDtocL followed + // by the LF[SD]. + if (CModel == CodeModel::Large) { + unsigned TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocL), + TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + .addImm(0).addReg(TmpReg2); + } else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO) + .addReg(TmpReg) + .addMemOperand(MMO); } return DestReg; -- cgit v1.1 From 3789209b79ba8097cc489630c005898365134f65 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Tue, 17 Sep 2013 20:22:05 +0000 Subject: [PowerPC] Add a FIXME. Documenting a design choice to generate only medium model sequences for TLS addresses at this time. Small and large code models could be supported if necessary. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190883 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 83bd1ab..5e12664 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1408,6 +1408,10 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { + // FIXME: TLS addresses currently use medium model code sequences, + // which is the most useful form. Eventually support for small and + // large models could be added if users need it, at the cost of + // additional complexity. GlobalAddressSDNode *GA = cast(Op); SDLoc dl(GA); const GlobalValue *GV = GA->getGlobal(); -- cgit v1.1 From 28860823ad34d41d4f58561dc14a982fb0843fdd Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Tue, 17 Sep 2013 23:18:05 +0000 Subject: COFF: Ensure that objects produced by LLVM link with /safeseh Summary: We indicate that the object files are safe by emitting a @feat.00 absolute address symbol. The address is presumably interpreted as a bitfield of features that the compiler would like to enable. Bit 0 is documented in the PE COFF spec to opt in to "registered SEH", which is what /safeseh enables. LLVM's object files are safe by default because LLVM doesn't know how to produce SEH handlers. Reviewers: Bigcheese CC: llvm-commits Differential Revision: http://llvm-reviews.chandlerc.com/D1691 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190898 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86AsmPrinter.cpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 9e0ab82..7d7a1ad 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -518,6 +518,26 @@ bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, void X86AsmPrinter::EmitStartOfAsmFile(Module &M) { if (Subtarget->isTargetEnvMacho()) OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); + + if (Subtarget->isTargetCOFF()) { + // Emit an absolute @feat.00 symbol. This appears to be some kind of + // compiler features bitfield read by link.exe. + if (!Subtarget->is64Bit()) { + MCSymbol *S = MMI->getContext().GetOrCreateSymbol(StringRef("@feat.00")); + OutStreamer.BeginCOFFSymbolDef(S); + OutStreamer.EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC); + OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_NULL); + OutStreamer.EndCOFFSymbolDef(); + // According to the PE-COFF spec, the LSB of this value marks the object + // for "registered SEH". This means that all SEH handler entry points + // must be registered in .sxdata. Use of any unregistered handlers will + // cause the process to terminate immediately. LLVM does not know how to + // register any SEH handlers, so its object files should be safe. + S->setAbsolute(); + OutStreamer.EmitAssignment( + S, MCConstantExpr::Create(int64_t(1), MMI->getContext())); + } + } } -- cgit v1.1 From 4acd20a20be9f7d91ed35c1c6a501cec1605e854 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 18 Sep 2013 03:55:53 +0000 Subject: Lift alignment restrictions for load/store folding on VINSERTF128/VEXTRACTF128. Fixes PR17268. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190916 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 2b271e9..81ea275 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7716,11 +7716,11 @@ def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2), (VINSERTF128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsert128_imm VR256:$ins))>; -def : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (memopv4f32 addr:$src2), +def : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2), (iPTR imm)), (VINSERTF128rm VR256:$src1, addr:$src2, (INSERT_get_vinsert128_imm VR256:$ins))>; -def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (memopv2f64 addr:$src2), +def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2), (iPTR imm)), (VINSERTF128rm VR256:$src1, addr:$src2, (INSERT_get_vinsert128_imm VR256:$ins))>; @@ -7744,22 +7744,22 @@ def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), (VINSERTF128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsert128_imm VR256:$ins))>; -def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2), +def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2), (iPTR imm)), (VINSERTF128rm VR256:$src1, addr:$src2, (INSERT_get_vinsert128_imm VR256:$ins))>; def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1), - (bc_v4i32 (memopv2i64 addr:$src2)), + (bc_v4i32 (loadv2i64 addr:$src2)), (iPTR imm)), (VINSERTF128rm VR256:$src1, addr:$src2, (INSERT_get_vinsert128_imm VR256:$ins))>; def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1), - (bc_v16i8 (memopv2i64 addr:$src2)), + (bc_v16i8 (loadv2i64 addr:$src2)), (iPTR imm)), (VINSERTF128rm VR256:$src1, addr:$src2, (INSERT_get_vinsert128_imm VR256:$ins))>; def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), - (bc_v8i16 (memopv2i64 addr:$src2)), + (bc_v8i16 (loadv2i64 addr:$src2)), (iPTR imm)), (VINSERTF128rm VR256:$src1, addr:$src2, (INSERT_get_vinsert128_imm VR256:$ins))>; @@ -7791,12 +7791,12 @@ def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), (v4f64 VR256:$src1), (EXTRACT_get_vextract128_imm VR128:$ext)))>; -def : Pat<(alignedstore (v4f32 (vextract128_extract:$ext (v8f32 VR256:$src1), - (iPTR imm))), addr:$dst), +def : Pat<(store (v4f32 (vextract128_extract:$ext (v8f32 VR256:$src1), + (iPTR imm))), addr:$dst), (VEXTRACTF128mr addr:$dst, VR256:$src1, (EXTRACT_get_vextract128_imm VR128:$ext))>; -def : Pat<(alignedstore (v2f64 (vextract128_extract:$ext (v4f64 VR256:$src1), - (iPTR imm))), addr:$dst), +def : Pat<(store (v2f64 (vextract128_extract:$ext (v4f64 VR256:$src1), + (iPTR imm))), addr:$dst), (VEXTRACTF128mr addr:$dst, VR256:$src1, (EXTRACT_get_vextract128_imm VR128:$ext))>; } -- cgit v1.1 From 773c07606e61c5090d73ea1317a0d1b0c29ec023 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 18 Sep 2013 05:54:09 +0000 Subject: Fix X86 subtarget to not overwrite the autodetected features by calling InitMCProcessorInfo right after detecting them. Instead add a new function that only updates the scheduling model and call that. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190919 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86Subtarget.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 78c9a1a..ae31bb8 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -453,7 +453,7 @@ void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { // CPUName may have been set by the CPU detection code. Make sure the // new MCSchedModel is used. - InitMCProcessorInfo(CPUName, FS); + InitCPUSchedModel(CPUName); if (X86ProcFamily == IntelAtom || X86ProcFamily == IntelSLM) PostRAScheduler = true; -- cgit v1.1 From 42848553918540e90f77de3c70b5f1ff1dc2be9f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 18 Sep 2013 06:01:53 +0000 Subject: Prevent extra calls to ToggleFeature for Feature64Bit and FeatureCMOV if they've already been enabled. The extra call ends up clearing the bit in FeatureBits since its a 'toggle'. Can't prove that anything was broken because of this since I don't think the FeatureBits for these are used. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190920 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86Subtarget.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index ae31bb8..f5b2614 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -440,8 +440,8 @@ void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { // Make sure 64-bit features are available in 64-bit mode. if (In64BitMode) { - HasX86_64 = true; ToggleFeature(X86::Feature64Bit); - HasCMov = true; ToggleFeature(X86::FeatureCMOV); + if (!HasX86_64) { HasX86_64 = true; ToggleFeature(X86::Feature64Bit); } + if (!HasCMov) { HasCMov = true; ToggleFeature(X86::FeatureCMOV); } if (X86SSELevel < SSE2) { X86SSELevel = SSE2; -- cgit v1.1 From a4d46d7fc6431ec3576839f11cb61862b784cb3e Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Wed, 18 Sep 2013 09:45:55 +0000 Subject: [ARMv8] Add CRC instructions. Patch by Bradley Smith! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190928 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrInfo.td | 39 ++++++++++++++++++++++++++++++ lib/Target/ARM/ARMInstrThumb2.td | 40 +++++++++++++++++++++++++++++++ lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 2 +- 3 files changed, 80 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index ef34e26..e432aca 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -4047,6 +4047,45 @@ def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000), (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm1_15:$sh)>; //===----------------------------------------------------------------------===// +// CRC Instructions +// +// Polynomials: +// + CRC32{B,H,W} 0x04C11DB7 +// + CRC32C{B,H,W} 0x1EDC6F41 +// + +class AI_crc32 sz, string suffix, SDPatternOperator builtin> + : AInoP<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm), MiscFrm, NoItinerary, + !strconcat("crc32", suffix), "\t$Rd, $Rn, $Rm", + [(set GPRnopc:$Rd, (builtin GPRnopc:$Rn, GPRnopc:$Rm))]>, + Requires<[IsARM, HasV8]> { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + + let Inst{31-28} = 0b1110; + let Inst{27-23} = 0b00010; + let Inst{22-21} = sz; + let Inst{20} = 0; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-10} = 0b00; + let Inst{9} = C; + let Inst{8} = 0; + let Inst{7-4} = 0b0100; + let Inst{3-0} = Rm; + + let Unpredictable{11-8} = 0b1101; +} + +def CRC32B : AI_crc32<0, 0b00, "b", int_arm_crc32b>; +def CRC32CB : AI_crc32<1, 0b00, "cb", int_arm_crc32cb>; +def CRC32H : AI_crc32<0, 0b01, "h", int_arm_crc32h>; +def CRC32CH : AI_crc32<1, 0b01, "ch", int_arm_crc32ch>; +def CRC32W : AI_crc32<0, 0b10, "w", int_arm_crc32w>; +def CRC32CW : AI_crc32<1, 0b10, "cw", int_arm_crc32cw>; + +//===----------------------------------------------------------------------===// // Comparison Instructions... // diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 666d980..435b0ea 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -465,6 +465,18 @@ class T2ThreeReg pattern> + : T2XI { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + + let Inst{11-8} = Rd; + let Inst{19-16} = Rn; + let Inst{3-0} = Rm; +} + class T2sThreeReg pattern> : T2sI { @@ -3004,6 +3016,34 @@ def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), Requires<[HasT2ExtractPack, IsThumb2]>; //===----------------------------------------------------------------------===// +// CRC32 Instructions +// +// Polynomials: +// + CRC32{B,H,W} 0x04C11DB7 +// + CRC32C{B,H,W} 0x1EDC6F41 +// + +class T2I_crc32 sz, string suffix, SDPatternOperator builtin> + : T2ThreeRegNoP<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), NoItinerary, + !strconcat("crc32", suffix, "\t$Rd, $Rn, $Rm"), + [(set rGPR:$Rd, (builtin rGPR:$Rn, rGPR:$Rm))]>, + Requires<[IsThumb2, HasV8]> { + let Inst{31-27} = 0b11111; + let Inst{26-21} = 0b010110; + let Inst{20} = C; + let Inst{15-12} = 0b1111; + let Inst{7-6} = 0b10; + let Inst{5-4} = sz; +} + +def t2CRC32B : T2I_crc32<0, 0b00, "b", int_arm_crc32b>; +def t2CRC32CB : T2I_crc32<1, 0b00, "cb", int_arm_crc32cb>; +def t2CRC32H : T2I_crc32<0, 0b01, "h", int_arm_crc32h>; +def t2CRC32CH : T2I_crc32<1, 0b01, "ch", int_arm_crc32ch>; +def t2CRC32W : T2I_crc32<0, 0b10, "w", int_arm_crc32w>; +def t2CRC32CW : T2I_crc32<1, 0b10, "cw", int_arm_crc32cw>; + +//===----------------------------------------------------------------------===// // Comparison Instructions... // defm t2CMP : T2I_cmp_irs<0b1101, "cmp", diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 032e744..ed9c8c4 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -4803,7 +4803,7 @@ getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, if (Mnemonic == "bkpt" || Mnemonic == "cbnz" || Mnemonic == "setend" || Mnemonic == "cps" || Mnemonic == "it" || Mnemonic == "cbz" || - Mnemonic == "trap" || Mnemonic == "hlt" || + Mnemonic == "trap" || Mnemonic == "hlt" || Mnemonic.startswith("crc32") || Mnemonic.startswith("cps") || Mnemonic.startswith("vsel") || Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || Mnemonic == "vcvta" || Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" || -- cgit v1.1 From e2d6f91d63a2e8cf77b07794cda7d9ef72504769 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 18 Sep 2013 09:56:40 +0000 Subject: [SystemZ] Add unsigned compare-and-branch instructions For some reason I never got around to adding these at the same time as the signed versions. No idea why. I'm not sure whether this SystemZII::BranchC* stuff is useful, or whether it should just be replaced with an "is normal" flag. I'll leave that for later though. There are some boundary conditions that can be tweaked, such as preferring unsigned comparisons for equality with [128, 256), and "<= 255" over "< 256", but again I'll leave those for a separate patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190930 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZInstrInfo.cpp | 18 ++++++++++++++++++ lib/Target/SystemZ/SystemZInstrInfo.h | 8 ++++++++ lib/Target/SystemZ/SystemZInstrInfo.td | 24 ++++++++++++++++++++++++ lib/Target/SystemZ/SystemZLongBranch.cpp | 23 +++++++++++++++++++++-- lib/Target/SystemZ/SystemZOperands.td | 4 ++++ 5 files changed, 75 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index a84ce28..6f6b2bf 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -794,11 +794,21 @@ SystemZInstrInfo::getBranchInfo(const MachineInstr *MI) const { return SystemZII::Branch(SystemZII::BranchC, SystemZ::CCMASK_ICMP, MI->getOperand(2).getImm(), &MI->getOperand(3)); + case SystemZ::CLIJ: + case SystemZ::CLRJ: + return SystemZII::Branch(SystemZII::BranchCL, SystemZ::CCMASK_ICMP, + MI->getOperand(2).getImm(), &MI->getOperand(3)); + case SystemZ::CGIJ: case SystemZ::CGRJ: return SystemZII::Branch(SystemZII::BranchCG, SystemZ::CCMASK_ICMP, MI->getOperand(2).getImm(), &MI->getOperand(3)); + case SystemZ::CLGIJ: + case SystemZ::CLGRJ: + return SystemZII::Branch(SystemZII::BranchCLG, SystemZ::CCMASK_ICMP, + MI->getOperand(2).getImm(), &MI->getOperand(3)); + default: llvm_unreachable("Unrecognized branch opcode"); } @@ -927,6 +937,14 @@ unsigned SystemZInstrInfo::getCompareAndBranch(unsigned Opcode, return MI && isInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CIJ : 0; case SystemZ::CGHI: return MI && isInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CGIJ : 0; + case SystemZ::CLR: + return SystemZ::CLRJ; + case SystemZ::CLGR: + return SystemZ::CLGRJ; + case SystemZ::CLFI: + return MI && isUInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CLIJ : 0; + case SystemZ::CLGFI: + return MI && isUInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CLGIJ : 0; default: return 0; } diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index 3c4e8af..40fd1b6 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -70,10 +70,18 @@ namespace SystemZII { // on the result. BranchC, + // An instruction that peforms a 32-bit unsigned comparison and branches + // on the result. + BranchCL, + // An instruction that peforms a 64-bit signed comparison and branches // on the result. BranchCG, + // An instruction that peforms a 64-bit unsigned comparison and branches + // on the result. + BranchCLG, + // An instruction that decrements a 32-bit register and branches if // the result is nonzero. BranchCT, diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index d98d75a..d028461 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -94,6 +94,18 @@ multiclass CompareBranches { def GIJ : InstRIEc<0xEC7C, (outs), (ins GR64:$R1, imm64sx8:$I2, ccmask:$M3, brtarget16:$RI4), "cgij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>; + def LRJ : InstRIEb<0xEC77, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3, + brtarget16:$RI4), + "clrj"##pos1##"\t$R1, $R2, "##pos2##"$RI4", []>; + def LGRJ : InstRIEb<0xEC65, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3, + brtarget16:$RI4), + "clgrj"##pos1##"\t$R1, $R2, "##pos2##"$RI4", []>; + def LIJ : InstRIEc<0xEC7F, (outs), (ins GR32:$R1, imm32zx8:$I2, ccmask:$M3, + brtarget16:$RI4), + "clij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>; + def LGIJ : InstRIEc<0xEC7D, (outs), (ins GR64:$R1, imm64zx8:$I2, ccmask:$M3, + brtarget16:$RI4), + "clgij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>; } } let isCodeGenOnly = 1 in @@ -152,6 +164,18 @@ multiclass IntCondExtendedMnemonicA ccmask, string name> { def CGI : InstRIEc<0xEC7C, (outs), (ins GR64:$R1, imm64sx8:$I2, brtarget16:$RI4), "cgij"##name##"\t$R1, $I2, $RI4", []>; + def CLR : InstRIEb<0xEC77, (outs), (ins GR32:$R1, GR32:$R2, + brtarget16:$RI4), + "clrj"##name##"\t$R1, $R2, $RI4", []>; + def CLGR : InstRIEb<0xEC65, (outs), (ins GR64:$R1, GR64:$R2, + brtarget16:$RI4), + "clgrj"##name##"\t$R1, $R2, $RI4", []>; + def CLI : InstRIEc<0xEC7F, (outs), (ins GR32:$R1, imm32zx8:$I2, + brtarget16:$RI4), + "clij"##name##"\t$R1, $I2, $RI4", []>; + def CLGI : InstRIEc<0xEC7D, (outs), (ins GR64:$R1, imm64zx8:$I2, + brtarget16:$RI4), + "clgij"##name##"\t$R1, $I2, $RI4", []>; } } multiclass IntCondExtendedMnemonic ccmask, string name1, string name2> diff --git a/lib/Target/SystemZ/SystemZLongBranch.cpp b/lib/Target/SystemZ/SystemZLongBranch.cpp index 114f74e..ba027d4 100644 --- a/lib/Target/SystemZ/SystemZLongBranch.cpp +++ b/lib/Target/SystemZ/SystemZLongBranch.cpp @@ -225,11 +225,13 @@ TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr *MI) { Terminator.ExtraRelaxSize = 6; break; case SystemZ::CRJ: - // Relaxes to a CR/BRCL sequence, which is 2 bytes longer. + case SystemZ::CLRJ: + // Relaxes to a C(L)R/BRCL sequence, which is 2 bytes longer. Terminator.ExtraRelaxSize = 2; break; case SystemZ::CGRJ: - // Relaxes to a CGR/BRCL sequence, which is 4 bytes longer. + case SystemZ::CLGRJ: + // Relaxes to a C(L)GR/BRCL sequence, which is 4 bytes longer. Terminator.ExtraRelaxSize = 4; break; case SystemZ::CIJ: @@ -237,6 +239,11 @@ TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr *MI) { // Relaxes to a C(G)HI/BRCL sequence, which is 4 bytes longer. Terminator.ExtraRelaxSize = 4; break; + case SystemZ::CLIJ: + case SystemZ::CLGIJ: + // Relaxes to a CL(G)FI/BRCL sequence, which is 6 bytes longer. + Terminator.ExtraRelaxSize = 6; + break; default: llvm_unreachable("Unrecognized branch instruction"); } @@ -401,6 +408,18 @@ void SystemZLongBranch::relaxBranch(TerminatorInfo &Terminator) { case SystemZ::CGIJ: splitCompareBranch(Branch, SystemZ::CGHI); break; + case SystemZ::CLRJ: + splitCompareBranch(Branch, SystemZ::CLR); + break; + case SystemZ::CLGRJ: + splitCompareBranch(Branch, SystemZ::CLGR); + break; + case SystemZ::CLIJ: + splitCompareBranch(Branch, SystemZ::CLFI); + break; + case SystemZ::CLGIJ: + splitCompareBranch(Branch, SystemZ::CLGFI); + break; default: llvm_unreachable("Unrecognized branch"); } diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td index 421e41f..e1e4541 100644 --- a/lib/Target/SystemZ/SystemZOperands.td +++ b/lib/Target/SystemZ/SystemZOperands.td @@ -333,6 +333,10 @@ def imm64sx8 : Immediate(N->getSExtValue()); }], SIMM8, "S8Imm">; +def imm64zx8 : Immediate(N->getSExtValue()); +}], UIMM8, "U8Imm">; + def imm64sx16 : Immediate(N->getSExtValue()); }], SIMM16, "S16Imm">; -- cgit v1.1 From d072d1b2a36f6c6e1b4b513dc69e91b61ad8c21a Mon Sep 17 00:00:00 2001 From: Robert Lytton Date: Wed, 18 Sep 2013 12:43:35 +0000 Subject: Prevent LoopVectorizer and SLPVectorizer running if the target has no vector registers. XCore target: Add XCoreTargetTransformInfo This is where getNumberOfRegisters() resides, which in turn returns the number of vector registers (=0). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190936 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCore.h | 2 + lib/Target/XCore/XCoreTargetMachine.cpp | 8 +++ lib/Target/XCore/XCoreTargetMachine.h | 2 + lib/Target/XCore/XCoreTargetTransformInfo.cpp | 85 +++++++++++++++++++++++++++ 4 files changed, 97 insertions(+) create mode 100644 lib/Target/XCore/XCoreTargetTransformInfo.cpp (limited to 'lib/Target') diff --git a/lib/Target/XCore/XCore.h b/lib/Target/XCore/XCore.h index 2f375fc..73c310b 100644 --- a/lib/Target/XCore/XCore.h +++ b/lib/Target/XCore/XCore.h @@ -31,6 +31,8 @@ namespace llvm { CodeGenOpt::Level OptLevel); ModulePass *createXCoreLowerThreadLocalPass(); + ImmutablePass *createXCoreTargetTransformInfoPass(const XCoreTargetMachine *TM); + } // end namespace llvm; #endif diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index 3ef1520..9ae0b86 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -70,3 +70,11 @@ bool XCorePassConfig::addInstSelector() { extern "C" void LLVMInitializeXCoreTarget() { RegisterTargetMachine X(TheXCoreTarget); } + +void XCoreTargetMachine::addAnalysisPasses(PassManagerBase &PM) { + // Add first the target-independent BasicTTI pass, then our XCore pass. This + // allows the XCore pass to delegate to the target independent layer when + // appropriate. + PM.add(createBasicTargetTransformInfoPass(this)); + PM.add(createXCoreTargetTransformInfoPass(this)); +} diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h index eb9a1aa..a19a677 100644 --- a/lib/Target/XCore/XCoreTargetMachine.h +++ b/lib/Target/XCore/XCoreTargetMachine.h @@ -57,6 +57,8 @@ public: // Pass Pipeline Configuration virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); + + virtual void addAnalysisPasses(PassManagerBase &PM); }; } // end namespace llvm diff --git a/lib/Target/XCore/XCoreTargetTransformInfo.cpp b/lib/Target/XCore/XCoreTargetTransformInfo.cpp new file mode 100644 index 0000000..4862138 --- /dev/null +++ b/lib/Target/XCore/XCoreTargetTransformInfo.cpp @@ -0,0 +1,85 @@ +//===-- XCoreTargetTransformInfo.cpp - XCore specific TTI pass ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements a TargetTransformInfo analysis pass specific to the +/// XCore target machine. It uses the target's detailed information to provide +/// more precise answers to certain TTI queries, while letting the target +/// independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "xcoretti" +#include "XCore.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/CostTable.h" +using namespace llvm; + +// Declare the pass initialization routine locally as target-specific passes +// don't havve a target-wide initialization entry point, and so we rely on the +// pass constructor initialization. +namespace llvm { +void initializeXCoreTTIPass(PassRegistry &); +} + +namespace { + +class XCoreTTI : public ImmutablePass, public TargetTransformInfo { + const XCoreTargetMachine *TM; + +public: + XCoreTTI() : ImmutablePass(ID), TM(0) { + llvm_unreachable("This pass cannot be directly constructed"); + } + + XCoreTTI(const XCoreTargetMachine *TM) + : ImmutablePass(ID), TM(TM) { + initializeXCoreTTIPass(*PassRegistry::getPassRegistry()); + } + + virtual void initializePass() { + pushTTIStack(this); + } + + virtual void finalizePass() { + popTTIStack(); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + TargetTransformInfo::getAnalysisUsage(AU); + } + + static char ID; + + virtual void *getAdjustedAnalysisPointer(const void *ID) { + if (ID == &TargetTransformInfo::ID) + return (TargetTransformInfo*)this; + return this; + } + + unsigned getNumberOfRegisters(bool Vector) const { + if (Vector) { + return 0; + } + return 12; + } +}; + +} // end anonymous namespace + +INITIALIZE_AG_PASS(XCoreTTI, TargetTransformInfo, "xcoretti", + "XCore Target Transform Info", true, true, false) +char XCoreTTI::ID = 0; + + +ImmutablePass * +llvm::createXCoreTargetTransformInfoPass(const XCoreTargetMachine *TM) { + return new XCoreTTI(TM); +} -- cgit v1.1 From 8c03d6a9e8381f60cd5ae7f14cc119e26b32daa5 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Wed, 18 Sep 2013 12:59:41 +0000 Subject: Target/XCore/CMakeLists.txt: Add XCoreTargetTransformInfo.cpp. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190937 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/Target') diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt index 85d2a1d..3fa3b34 100644 --- a/lib/Target/XCore/CMakeLists.txt +++ b/lib/Target/XCore/CMakeLists.txt @@ -22,6 +22,7 @@ add_llvm_target(XCoreCodeGen XCoreSubtarget.cpp XCoreTargetMachine.cpp XCoreTargetObjectFile.cpp + XCoreTargetTransformInfo.cpp XCoreSelectionDAGInfo.cpp ) -- cgit v1.1 From 69f99bf3a391c3fa1b6e49d963dbb914790b428c Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Wed, 18 Sep 2013 14:11:11 +0000 Subject: More XCore TTI cleanup -- remove an unused private field flagged by -Wunused-private-field with Clang. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190941 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCoreTargetTransformInfo.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/XCore/XCoreTargetTransformInfo.cpp b/lib/Target/XCore/XCoreTargetTransformInfo.cpp index 4862138..cc165f7 100644 --- a/lib/Target/XCore/XCoreTargetTransformInfo.cpp +++ b/lib/Target/XCore/XCoreTargetTransformInfo.cpp @@ -32,15 +32,13 @@ void initializeXCoreTTIPass(PassRegistry &); namespace { class XCoreTTI : public ImmutablePass, public TargetTransformInfo { - const XCoreTargetMachine *TM; - public: - XCoreTTI() : ImmutablePass(ID), TM(0) { + XCoreTTI() : ImmutablePass(ID) { llvm_unreachable("This pass cannot be directly constructed"); } XCoreTTI(const XCoreTargetMachine *TM) - : ImmutablePass(ID), TM(TM) { + : ImmutablePass(ID) { initializeXCoreTTIPass(*PassRegistry::getPassRegistry()); } -- cgit v1.1 From c1fe3e3b330d9404530abc2fbbd0f6d1fa64ce01 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Wed, 18 Sep 2013 22:46:09 +0000 Subject: Fix two issues regarding Got pointer (GP) setup. 1) make sure that the first two instructions of the sequence cannot separate from each other. The linker requires that they be sequential. If they get separated, it can still work but it will not work in all cases because the first of the instructions mostly involves the hi part of the pc relative offset and that part changes slowly. You would have to be at the right boundary for this to matter. 2) make sure that this sequence begins on a longword boundary. There appears to be a bug in binutils which makes some of these calculations get messed up if the instruction sequence does not begin on a longword boundary. This is being investigated with the appropriate binutils folks. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190966 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16ISelDAGToDAG.cpp | 9 +++++---- lib/Target/Mips/Mips16InstrInfo.td | 9 +++++++++ 2 files changed, 14 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp index 0caa277..4948f40 100644 --- a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp +++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp @@ -80,10 +80,11 @@ void Mips16DAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) { V1 = RegInfo.createVirtualRegister(RC); V2 = RegInfo.createVirtualRegister(RC); - BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0) - .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI); - BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1) - .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO); + BuildMI(MBB, I, DL, TII.get(Mips::GotPrologue16), V0). + addReg(V1, RegState::Define). + addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI). + addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO); + BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16); BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg) .addReg(V1).addReg(V2); diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index dd0cea6..c9840b3 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -768,6 +768,10 @@ def LiRxImm16: FRI16_ins<0b01101, "li", IIAlu>; // def LiRxImmX16: FEXT_RI16_ins<0b01101, "li", IIAlu>; +def LiRxImmAlignX16: FEXT_RI16_ins<0b01101, ".align 2\n\tli", IIAlu> { + let isCodeGenOnly = 1; +} + // // Format: LW ry, offset(rx) MIPS16e // Purpose: Load Word (Extended) @@ -1808,3 +1812,8 @@ def : Mips16Pat<(i32 (extloadi16 addr16:$src)), def: Mips16Pat<(trap), (Break16)>; +def GotPrologue16: + MipsPseudo16< + (outs CPU16Regs:$rh, CPU16Regs:$rl), + (ins simm16:$immHi, simm16:$immLo), + ".align 2\n\tli\t$rh, $immHi\n\taddiu\t$rl, $$pc, $immLo\n ",[]> ; -- cgit v1.1 From adadf887cb57f8281ec23f846a946fb59bd0a0d7 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Thu, 19 Sep 2013 11:33:53 +0000 Subject: X86: FrameIndex addressing modes do have a base register. When selecting the DAG (add (WrapperRIP ...), (FrameIndex ...)), X86 code had spotted the FrameIndex possibility and was working out whether it could fold the WrapperRIP into this. The test for forming a %rip version is notionally whether we already have a base or index register (%rip precludes both), but we were forgetting to account for the register that would be inserted later to access the frame. rdar://problem/15024520 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190995 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelDAGToDAG.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 33c80c7..32ad1aa 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -79,7 +79,8 @@ namespace { } bool hasBaseOrIndexReg() const { - return IndexReg.getNode() != 0 || Base_Reg.getNode() != 0; + return BaseType == FrameIndexBase || + IndexReg.getNode() != 0 || Base_Reg.getNode() != 0; } /// isRIPRelative - Return true if this addressing mode is already RIP -- cgit v1.1 From 5df37dab763ce377095389c4ea1cff88db369954 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Thu, 19 Sep 2013 11:59:01 +0000 Subject: [ARMv8] Add support for the v8 cryptography extensions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190996 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARM.td | 3 + lib/Target/ARM/ARMInstrFormats.td | 4 +- lib/Target/ARM/ARMInstrInfo.td | 2 + lib/Target/ARM/ARMInstrNEON.td | 111 ++++++++++++++++++++++-- lib/Target/ARM/ARMSubtarget.cpp | 1 + lib/Target/ARM/ARMSubtarget.h | 4 + lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 13 +-- lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 20 +++++ 8 files changed, 142 insertions(+), 16 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 5752005..c9d71bd 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -67,6 +67,9 @@ def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", "Enable support for Performance Monitor extensions">; def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true", "Enable support for TrustZone security extensions">; +def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", + "Enable support for Cryptography extensions", + [FeatureNEON]>; // Some processors have FP multiply-accumulate instructions that don't // play nicely with other VFP / NEON instructions, and it's generally better diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index e505e1a..27cfe96 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -2015,7 +2015,7 @@ class N2V op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, } // Same as N2V but not predicated. -class N2Vnp op17_16, bits<3> op10_8, bit op7, bit op6, +class N2Vnp op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, dag oops, dag iops, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, list pattern> : NeonInp op17_16, bits<3> op10_8, bit op7, bit op6, // Encode constant bits let Inst{27-23} = 0b00111; let Inst{21-20} = 0b11; - let Inst{19-18} = 0b10; + let Inst{19-18} = op19_18; let Inst{17-16} = op17_16; let Inst{11} = 0; let Inst{10-8} = op10_8; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index e432aca..93ecb16 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -212,6 +212,8 @@ def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, AssemblerPredicate<"FeatureFPARMv8", "FPARMv8">; def HasNEON : Predicate<"Subtarget->hasNEON()">, AssemblerPredicate<"FeatureNEON", "NEON">; +def HasCrypto : Predicate<"Subtarget->hasCrypto()">, + AssemblerPredicate<"FeatureCrypto", "crypto">; def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate<"FeatureFP16","half-float">; def HasDivide : Predicate<"Subtarget->hasDivide()">, diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index f1bd37e..269c13d 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -2355,17 +2355,36 @@ class N2VQInt op24_23, bits<2> op21_20, bits<2> op19_18, class N2VDIntnp op17_16, bits<3> op10_8, bit op7, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> - : N2Vnp; class N2VQIntnp op17_16, bits<3> op10_8, bit op7, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> - : N2Vnp; +// Similar to NV2VQIntnp with some more encoding bits exposed (crypto). +class N2VQIntXnp op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, + bit op7, InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2Vnp; + +// Same as N2VQIntXnp but with Vd as a src register. +class N2VQIntX2np op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, + bit op7, InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2Vnp { + let Constraints = "$src = $Vd"; +} + // Narrow 2-register operations. class N2VN op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, @@ -2534,7 +2553,7 @@ class N3VDIntnp op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> : N3Vnp; @@ -2592,6 +2611,19 @@ class N3VQIntnp op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, ResTy, OpTy, IntOp, Commutable, [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; +// Same as N3VQIntnp but with Vd as a src register. +class N3VQInt3np op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, Format f, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable> + : N3Vnp { + let Constraints = "$src = $Vd"; +} + class N3VQIntSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> @@ -2842,6 +2874,7 @@ class N3VL op21_20, bits<4> op11_8, bit op4, [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { let isCommutable = Commutable; } + class N3VLSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, SDNode OpNode> @@ -2897,6 +2930,17 @@ class N3VLInt op21_20, bits<4> op11_8, bit op4, [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { let isCommutable = Commutable; } + +// Same as above, but not predicated. +class N3VLIntnp op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable> + : N3Vnp; + class N3VLIntSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> @@ -4078,12 +4122,18 @@ def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) -defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, - "vmull", "s", NEONvmulls, 1>; -defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, - "vmull", "u", NEONvmullu, 1>; -def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", - v8i16, v8i8, int_arm_neon_vmullp, 1>; +let PostEncoderMethod = "NEONThumb2DataIPostEncoder", + DecoderNamespace = "NEONData" in { + defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, + "vmull", "s", NEONvmulls, 1>; + defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, + "vmull", "u", NEONvmullu, 1>; + def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", + v8i16, v8i8, int_arm_neon_vmullp, 1>; + def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary, + "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>, + Requires<[HasV8, HasCrypto]>; +} defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>; defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>; @@ -5818,6 +5868,49 @@ defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>; defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>; defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>; +// Cryptography instructions +let PostEncoderMethod = "NEONThumb2DataIPostEncoder", + DecoderNamespace = "v8Crypto" in { + class AES + : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary, + !strconcat("aes", op), "8", v16i8, v16i8, Int>, + Requires<[HasV8, HasCrypto]>; + class AES2Op + : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary, + !strconcat("aes", op), "8", v16i8, v16i8, Int>, + Requires<[HasV8, HasCrypto]>; + class N2SHA op17_16, bits<3> op10_8, bit op7, bit op6, + SDPatternOperator Int> + : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary, + !strconcat("sha", op), "32", v4i32, v4i32, Int>, + Requires<[HasV8, HasCrypto]>; + class N2SHA2Op op17_16, bits<3> op10_8, bit op7, bit op6, + SDPatternOperator Int> + : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary, + !strconcat("sha", op), "32", v4i32, v4i32, Int>, + Requires<[HasV8, HasCrypto]>; + class N3SHA3Op op27_23, bits<2> op21_20, SDPatternOperator Int> + : N3VQInt3np, + Requires<[HasV8, HasCrypto]>; +} + +def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>; +def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>; +def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>; +def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>; + +def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, int_arm_neon_sha1h>; +def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>; +def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>; +def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, int_arm_neon_sha1c>; +def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, int_arm_neon_sha1m>; +def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, int_arm_neon_sha1p>; +def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>; +def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>; +def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>; +def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>; + //===----------------------------------------------------------------------===// // NEON instructions for single-precision FP math //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index a227718..0692fbd 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -110,6 +110,7 @@ void ARMSubtarget::initializeEnvironment() { FPOnlySP = false; HasPerfMon = false; HasTrustZone = false; + HasCrypto = false; AllowsUnalignedMem = false; Thumb2DSP = false; UseNaClTrap = false; diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 65278a5..a3b701a 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -159,6 +159,9 @@ protected: /// HasTrustZone - if true, processor supports TrustZone security extensions bool HasTrustZone; + /// HasCrypto - if true, processor supports Cryptography extensions + bool HasCrypto; + /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory /// accesses for some types. For details, see /// ARMTargetLowering::allowsUnalignedMemoryAccesses(). @@ -248,6 +251,7 @@ public: bool hasVFP4() const { return HasVFPv4; } bool hasFPARMv8() const { return HasFPARMv8; } bool hasNEON() const { return HasNEON; } + bool hasCrypto() const { return HasCrypto; } bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; } diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index ed9c8c4..edb7ccd 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -141,7 +141,8 @@ class ARMAsmParser : public MCTargetAsmParser { StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode, bool &CarrySetting, unsigned &ProcessorIMod, StringRef &ITMask); - void getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, + void getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst, + bool &CanAcceptCarrySet, bool &CanAcceptPredicationCode); bool isThumb() const { @@ -4784,8 +4785,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, // // FIXME: It would be nice to autogen this. void ARMAsmParser:: -getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, - bool &CanAcceptPredicationCode) { +getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst, + bool &CanAcceptCarrySet, bool &CanAcceptPredicationCode) { if (Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" || Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" || Mnemonic == "add" || Mnemonic == "adc" || @@ -4808,7 +4809,9 @@ getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || Mnemonic == "vcvta" || Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" || Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" || - Mnemonic == "vrintm") { + Mnemonic == "vrintm" || Mnemonic.startswith("aes") || + Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") || + (FullInst.startswith("vmull") && FullInst.endswith(".p64"))) { // These mnemonics are never predicable CanAcceptPredicationCode = false; } else if (!isThumb()) { @@ -5068,7 +5071,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // the matcher deal with finding the right instruction or generating an // appropriate error. bool CanAcceptCarrySet, CanAcceptPredicationCode; - getMnemonicAcceptInfo(Mnemonic, CanAcceptCarrySet, CanAcceptPredicationCode); + getMnemonicAcceptInfo(Mnemonic, Name, CanAcceptCarrySet, CanAcceptPredicationCode); // If we had a carry-set on an instruction that can't do that, issue an // error. diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 8a06664..7f53240 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -507,6 +507,14 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } MI.clear(); + result = decodeInstruction(DecoderTablev8Crypto32, MI, insn, Address, + this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + return result; + } + + MI.clear(); Size = 0; return MCDisassembler::Fail; } @@ -826,6 +834,18 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } MI.clear(); + uint32_t NEONCryptoInsn = insn32; + NEONCryptoInsn &= 0xF0FFFFFF; // Clear bits 27-24 + NEONCryptoInsn |= (NEONCryptoInsn & 0x10000000) >> 4; // Move bit 28 to bit 24 + NEONCryptoInsn |= 0x12000000; // Set bits 28 and 25 + result = decodeInstruction(DecoderTablev8Crypto32, MI, NEONCryptoInsn, + Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + return result; + } + + MI.clear(); uint32_t NEONv8Insn = insn32; NEONv8Insn &= 0xF3FFFFFF; // Clear bits 27-26 result = decodeInstruction(DecoderTablev8NEON32, MI, NEONv8Insn, Address, -- cgit v1.1 From 3917535b1af06f03dc0150fc956fb05218a78292 Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Thu, 19 Sep 2013 12:51:46 +0000 Subject: [NVPTX] Support constant vector globals git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190997 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 12f18c4..10f6854 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -1864,7 +1864,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, case Type::VectorTyID: case Type::StructTyID: { if (isa(CPV) || isa(CPV) || - isa(CPV)) { + isa(CPV) || isa(CPV)) { int ElementSize = TD->getTypeAllocSize(CPV->getType()); bufferAggregateConstant(CPV, aggBuffer); if (Bytes > ElementSize) -- cgit v1.1 From cdfb43f0a68274f40340af73218699265466c074 Mon Sep 17 00:00:00 2001 From: Yi Jiang Date: Thu, 19 Sep 2013 17:48:48 +0000 Subject: X86 horizontal vector reduction cost model git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191021 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86TargetTransformInfo.cpp | 84 +++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 935a6da..36bfeb1 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -101,6 +101,9 @@ public: unsigned AddressSpace) const; virtual unsigned getAddressComputationCost(Type *PtrTy, bool IsComplex) const; + + virtual unsigned getReductionCost(unsigned Opcode, Type *Ty, + bool IsPairwiseForm) const; /// @} }; @@ -605,3 +608,84 @@ unsigned X86TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { return TargetTransformInfo::getAddressComputationCost(Ty, IsComplex); } + +unsigned X86TTI::getReductionCost(unsigned Opcode, Type *ValTy, + bool IsPairwise) const { + + std::pair LT = TLI->getTypeLegalizationCost(ValTy); + + MVT MTy = LT.second; + + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput + // and make it as the cost. + + static const CostTblEntry SSE42CostTblPairWise[] = { + { ISD::FADD, MVT::v2f64, 2 }, + { ISD::FADD, MVT::v4f32, 4 }, + { ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6". + { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.5". + { ISD::ADD, MVT::v8i16, 5 }, + }; + + static const CostTblEntry AVX1CostTblPairWise[] = { + { ISD::FADD, MVT::v4f32, 4 }, + { ISD::FADD, MVT::v4f64, 5 }, + { ISD::FADD, MVT::v8f32, 7 }, + { ISD::ADD, MVT::v2i64, 1 }, // The data reported by the IACA tool is "1.5". + { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.5". + { ISD::ADD, MVT::v4i64, 5 }, // The data reported by the IACA tool is "4.8". + { ISD::ADD, MVT::v8i16, 5 }, + { ISD::ADD, MVT::v8i32, 5 }, + }; + + static const CostTblEntry SSE42CostTblNoPairWise[] = { + { ISD::FADD, MVT::v2f64, 2 }, + { ISD::FADD, MVT::v4f32, 4 }, + { ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6". + { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.3". + { ISD::ADD, MVT::v8i16, 4 }, // The data reported by the IACA tool is "4.3". + }; + + static const CostTblEntry AVX1CostTblNoPairWise[] = { + { ISD::FADD, MVT::v4f32, 3 }, + { ISD::FADD, MVT::v4f64, 3 }, + { ISD::FADD, MVT::v8f32, 4 }, + { ISD::ADD, MVT::v2i64, 1 }, // The data reported by the IACA tool is "1.5". + { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "2.8". + { ISD::ADD, MVT::v4i64, 3 }, + { ISD::ADD, MVT::v8i16, 4 }, + { ISD::ADD, MVT::v8i32, 5 }, + }; + + if (IsPairwise) { + if (ST->hasAVX()) { + int Idx = CostTableLookup(AVX1CostTblPairWise, ISD, MTy); + if (Idx != -1) + return LT.first * AVX1CostTblPairWise[Idx].Cost; + } + + if (ST->hasSSE42()) { + int Idx = CostTableLookup(SSE42CostTblPairWise, ISD, MTy); + if (Idx != -1) + return LT.first * SSE42CostTblPairWise[Idx].Cost; + } + } else { + if (ST->hasAVX()) { + int Idx = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy); + if (Idx != -1) + return LT.first * AVX1CostTblNoPairWise[Idx].Cost; + } + + if (ST->hasSSE42()) { + int Idx = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy); + if (Idx != -1) + return LT.first * SSE42CostTblNoPairWise[Idx].Cost; + } + } + + return TargetTransformInfo::getReductionCost(Opcode, ValTy, IsPairwise); +} + -- cgit v1.1 From 5cc319a42a914b24b164a94d9a563c728a7a4026 Mon Sep 17 00:00:00 2001 From: Richard Mitton Date: Thu, 19 Sep 2013 23:21:01 +0000 Subject: Added support for generate DWARF .debug_aranges sections automatically. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191052 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp | 2 +- lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index 104e4d2..8e2be81 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -129,7 +129,7 @@ private: MCELF::SetType(SD, ELF::STT_NOTYPE); MCELF::SetBinding(SD, ELF::STB_LOCAL); SD.setExternal(false); - Symbol->setSection(*getCurrentSection().first); + AssignSection(Symbol, getCurrentSection().first); const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext()); Symbol->setVariableValue(Value); diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 6b98205..37a80d5 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -183,7 +183,7 @@ private: MCELF::SetType(SD, ELF::STT_NOTYPE); MCELF::SetBinding(SD, ELF::STB_LOCAL); SD.setExternal(false); - Symbol->setSection(*getCurrentSection().first); + AssignSection(Symbol, getCurrentSection().first); const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext()); Symbol->setVariableValue(Value); -- cgit v1.1 From f45edcc3818757234c20d4d5975c0b992bf1f95e Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Fri, 20 Sep 2013 05:14:41 +0000 Subject: Allow subtarget selection of the default MachineScheduler and document the interface. The global registry is used to allow command line override of the scheduler selection, but does not work well as the normal selection API. For example, the same LLVM process should be able to target multiple targets or subtargets. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191071 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonTargetMachine.cpp | 16 ++++++++++++---- lib/Target/R600/AMDGPUSubtarget.h | 4 ++++ lib/Target/R600/AMDGPUTargetMachine.cpp | 18 ++++++++++-------- 3 files changed, 26 insertions(+), 12 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index cd96b58..bb950a0 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -102,17 +102,25 @@ class HexagonPassConfig : public TargetPassConfig { public: HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) { - // Enable MI scheduler. - if (!DisableHexagonMISched) { + // FIXME: Rather than calling enablePass(&MachineSchedulerID) below, define + // HexagonSubtarget::enableMachineScheduler() { return true; }. + // That will bypass the SelectionDAG VLIW scheduler, which is probably just + // hurting compile time and will be removed eventually anyway. + if (DisableHexagonMISched) + disablePass(&MachineSchedulerID); + else enablePass(&MachineSchedulerID); - MachineSchedRegistry::setDefault(createVLIWMachineSched); - } } HexagonTargetMachine &getHexagonTargetMachine() const { return getTM(); } + virtual ScheduleDAGInstrs * + createMachineScheduler(MachineSchedContext *C) const { + return createVLIWMachineSched(C); + } + virtual bool addInstSelector(); virtual bool addPreRegAlloc(); virtual bool addPostRegAlloc(); diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h index 8c65096..0e8b58a 100644 --- a/lib/Target/R600/AMDGPUSubtarget.h +++ b/lib/Target/R600/AMDGPUSubtarget.h @@ -64,6 +64,10 @@ public: bool hasHWFP64() const; bool hasCaymanISA() const; + virtual bool enableMachineScheduler() const { + return getGeneration() <= NORTHERN_ISLANDS; + } + // Helper functions to simplify if statements bool isTargetELF() const; std::string getDataLayout() const; diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index d77cddd..2119ed3 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -80,17 +80,20 @@ namespace { class AMDGPUPassConfig : public TargetPassConfig { public: AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) { - const AMDGPUSubtarget &ST = TM->getSubtarget(); - if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { - enablePass(&MachineSchedulerID); - MachineSchedRegistry::setDefault(createR600MachineScheduler); - } - } + : TargetPassConfig(TM, PM) {} AMDGPUTargetMachine &getAMDGPUTargetMachine() const { return getTM(); } + + virtual ScheduleDAGInstrs * + createMachineScheduler(MachineSchedContext *C) const { + const AMDGPUSubtarget &ST = TM->getSubtarget(); + if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) + return createR600MachineScheduler(C); + return 0; + } + virtual bool addPreISel(); virtual bool addInstSelector(); virtual bool addPreRegAlloc(); @@ -186,4 +189,3 @@ bool AMDGPUPassConfig::addPreEmitPass() { return false; } - -- cgit v1.1 From 333983d0c2711b193042a4a484363a2b2e32bc5f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 20 Sep 2013 05:37:49 +0000 Subject: Lift alignment restrictions on load/store folding of VEXTRACTI128/VINSERTI128. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191073 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 81ea275..be0f6c5 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -8329,22 +8329,22 @@ def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), (VINSERTI128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsert128_imm VR256:$ins))>; -def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2), +def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2), (iPTR imm)), (VINSERTI128rm VR256:$src1, addr:$src2, (INSERT_get_vinsert128_imm VR256:$ins))>; def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1), - (bc_v4i32 (memopv2i64 addr:$src2)), + (bc_v4i32 (loadv2i64 addr:$src2)), (iPTR imm)), (VINSERTI128rm VR256:$src1, addr:$src2, (INSERT_get_vinsert128_imm VR256:$ins))>; def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1), - (bc_v16i8 (memopv2i64 addr:$src2)), + (bc_v16i8 (loadv2i64 addr:$src2)), (iPTR imm)), (VINSERTI128rm VR256:$src1, addr:$src2, (INSERT_get_vinsert128_imm VR256:$ins))>; def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), - (bc_v8i16 (memopv2i64 addr:$src2)), + (bc_v8i16 (loadv2i64 addr:$src2)), (iPTR imm)), (VINSERTI128rm VR256:$src1, addr:$src2, (INSERT_get_vinsert128_imm VR256:$ins))>; @@ -8383,20 +8383,20 @@ def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), (v32i8 VR256:$src1), (EXTRACT_get_vextract128_imm VR128:$ext)))>; -def : Pat<(alignedstore (v2i64 (vextract128_extract:$ext (v4i64 VR256:$src1), - (iPTR imm))), addr:$dst), +def : Pat<(store (v2i64 (vextract128_extract:$ext (v4i64 VR256:$src1), + (iPTR imm))), addr:$dst), (VEXTRACTI128mr addr:$dst, VR256:$src1, (EXTRACT_get_vextract128_imm VR128:$ext))>; -def : Pat<(alignedstore (v4i32 (vextract128_extract:$ext (v8i32 VR256:$src1), - (iPTR imm))), addr:$dst), +def : Pat<(store (v4i32 (vextract128_extract:$ext (v8i32 VR256:$src1), + (iPTR imm))), addr:$dst), (VEXTRACTI128mr addr:$dst, VR256:$src1, (EXTRACT_get_vextract128_imm VR128:$ext))>; -def : Pat<(alignedstore (v8i16 (vextract128_extract:$ext (v16i16 VR256:$src1), - (iPTR imm))), addr:$dst), +def : Pat<(store (v8i16 (vextract128_extract:$ext (v16i16 VR256:$src1), + (iPTR imm))), addr:$dst), (VEXTRACTI128mr addr:$dst, VR256:$src1, (EXTRACT_get_vextract128_imm VR128:$ext))>; -def : Pat<(alignedstore (v16i8 (vextract128_extract:$ext (v32i8 VR256:$src1), - (iPTR imm))), addr:$dst), +def : Pat<(store (v16i8 (vextract128_extract:$ext (v32i8 VR256:$src1), + (iPTR imm))), addr:$dst), (VEXTRACTI128mr addr:$dst, VR256:$src1, (EXTRACT_get_vextract128_imm VR128:$ext))>; } -- cgit v1.1 From 6b2448326f8faa9149616a541d4c0d72b9d526e6 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Fri, 20 Sep 2013 21:22:28 +0000 Subject: [mips] MUL should clobber HI0 and LO0. I cannot think of a test case that reliably triggers this bug. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191109 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsInstrInfo.td | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 4795969..be6d753 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -878,6 +878,7 @@ def ADDu : MMRel, ArithLogicR<"addu", GPR32Opnd, 1, IIArith, add>, ADD_FM<0, 0x21>; def SUBu : MMRel, ArithLogicR<"subu", GPR32Opnd, 0, IIArith, sub>, ADD_FM<0, 0x23>; +let Defs = [HI0, LO0] in def MUL : MMRel, ArithLogicR<"mul", GPR32Opnd, 1, IIImul, mul>, ADD_FM<0x1c, 2>; def ADD : MMRel, ArithLogicR<"add", GPR32Opnd>, ADD_FM<0, 0x20>; -- cgit v1.1 From ed7fd711a1917a31714d3f9a41210916be450079 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Sat, 21 Sep 2013 01:37:52 +0000 Subject: Set .reorder for the stub so that gas takes care of delay slot processing. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191125 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16HardFloat.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp index a73d162..14a9424 100644 --- a/lib/Target/Mips/Mips16HardFloat.cpp +++ b/lib/Target/Mips/Mips16HardFloat.cpp @@ -441,8 +441,10 @@ static void createFPFnStub(Function *F, Module *M, FPParamVariant PV, IAH.Out(".reloc 0,R_MIPS_NONE," + Name); IAH.Out("la $$25," + LocalName); } - else + else { + IAH.Out(".set reorder"); IAH.Out("la $$25, " + Name); + } swapFPIntParams(PV, M, IAH, LE, false); IAH.Out("jr $$25"); IAH.Out(LocalName + " = " + Name); -- cgit v1.1 From 1941431f8a0f61fb5d5e3175cc49efd5dd19033c Mon Sep 17 00:00:00 2001 From: Juergen Ributzka Date: Sat, 21 Sep 2013 04:55:18 +0000 Subject: SelectionDAG: Teach the legalizer to split SETCC if VSELECT needs splitting too. The Type Legalizer recognizes that VSELECT needs to be split, because the type is to wide for the given target. The same does not always apply to SETCC, because less space is required to encode the result of a comparison. As a result VSELECT is split and SETCC is unrolled into scalar comparisons. This commit fixes the issue by checking for VSELECT-SETCC patterns in the DAG Combiner. If a matching pattern is found, then the result mask of SETCC is promoted to the expected vector mask for the given target. This mask has usually te same size as the VSELECT return type (except for Intel KNL). Now the type legalizer will split both VSELECT and SETCC. This allows the following X86 DAG Combine code to sucessfully detect the MIN/MAX pattern. This fixes PR16695, PR17002, and . git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191130 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b3cac05..974a1ee 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1535,7 +1535,16 @@ void X86TargetLowering::resetOperationActions() { } EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { - if (!VT.isVector()) return MVT::i8; + if (!VT.isVector()) + return MVT::i8; + + const TargetMachine &TM = getTargetMachine(); + if (!TM.Options.UseSoftFloat && Subtarget->hasAVX512()) + switch(VT.getVectorNumElements()) { + case 8: return MVT::v8i1; + case 16: return MVT::v16i1; + } + return VT.changeVectorElementTypeToInteger(); } -- cgit v1.1 From fcfc234130e9b065cda0363f6945c9ce5a930c31 Mon Sep 17 00:00:00 2001 From: Juergen Ributzka Date: Sat, 21 Sep 2013 04:55:22 +0000 Subject: [X86] Emulate AVX 256bit MIN/MAX support by splitting the vector. In AVX 256bit vectors are valid vectors and therefore the Type Legalizer doesn't split the VSELECT and SETCC nodes. AVX only supports MIN/MAX on 128bit vectors and this fix enables vector splitting for this special case in the X86 DAG Combiner. This fix is related to PR16695, PR17002, and . git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191131 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 64 +++++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 19 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 974a1ee..0e9381d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16371,24 +16371,28 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, } /// \brief Matches a VSELECT onto min/max or return 0 if the node doesn't match. -static unsigned matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, - SDValue RHS, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { +static std::pair +matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS, + SelectionDAG &DAG, const X86Subtarget *Subtarget) { if (!VT.isVector()) - return 0; + return std::make_pair(0, false); + bool NeedSplit = false; switch (VT.getSimpleVT().SimpleTy) { - default: return 0; + default: return std::make_pair(0, false); case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: if (!Subtarget->hasAVX2()) - return 0; + NeedSplit = true; + if (!Subtarget->hasAVX()) + return std::make_pair(0, false); + break; case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: if (!Subtarget->hasSSE2()) - return 0; + return std::make_pair(0, false); } // SSE2 has only a small subset of the operations. @@ -16399,6 +16403,7 @@ static unsigned matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, ISD::CondCode CC = cast(Cond.getOperand(2))->get(); + unsigned Opc = 0; // Check for x CC y ? x : y. if (DAG.isEqualTo(LHS, Cond.getOperand(0)) && DAG.isEqualTo(RHS, Cond.getOperand(1))) { @@ -16406,16 +16411,16 @@ static unsigned matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, default: break; case ISD::SETULT: case ISD::SETULE: - return hasUnsigned ? X86ISD::UMIN : 0; + Opc = hasUnsigned ? X86ISD::UMIN : 0; break; case ISD::SETUGT: case ISD::SETUGE: - return hasUnsigned ? X86ISD::UMAX : 0; + Opc = hasUnsigned ? X86ISD::UMAX : 0; break; case ISD::SETLT: case ISD::SETLE: - return hasSigned ? X86ISD::SMIN : 0; + Opc = hasSigned ? X86ISD::SMIN : 0; break; case ISD::SETGT: case ISD::SETGE: - return hasSigned ? X86ISD::SMAX : 0; + Opc = hasSigned ? X86ISD::SMAX : 0; break; } // Check for x CC y ? y : x -- a min/max with reversed arms. } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) && @@ -16424,20 +16429,20 @@ static unsigned matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, default: break; case ISD::SETULT: case ISD::SETULE: - return hasUnsigned ? X86ISD::UMAX : 0; + Opc = hasUnsigned ? X86ISD::UMAX : 0; break; case ISD::SETUGT: case ISD::SETUGE: - return hasUnsigned ? X86ISD::UMIN : 0; + Opc = hasUnsigned ? X86ISD::UMIN : 0; break; case ISD::SETLT: case ISD::SETLE: - return hasSigned ? X86ISD::SMAX : 0; + Opc = hasSigned ? X86ISD::SMAX : 0; break; case ISD::SETGT: case ISD::SETGE: - return hasSigned ? X86ISD::SMIN : 0; + Opc = hasSigned ? X86ISD::SMIN : 0; break; } } - return 0; + return std::make_pair(Opc, NeedSplit); } /// PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT @@ -16795,9 +16800,30 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, } // Try to match a min/max vector operation. - if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC) - if (unsigned Op = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget)) - return DAG.getNode(Op, DL, N->getValueType(0), LHS, RHS); + if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC) { + unsigned Opc; + bool NeedSplit; + std::tie(Opc, NeedSplit) = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget); + + if (Opc && NeedSplit) { + unsigned NumElems = VT.getVectorNumElements(); + // Extract the LHS vectors + SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, DL); + SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, DL); + + // Extract the RHS vectors + SDValue RHS1 = Extract128BitVector(RHS, 0, DAG, DL); + SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, DL); + + // Create min/max for each subvector + LHS = DAG.getNode(Opc, DL, LHS1.getValueType(), LHS1, RHS1); + RHS = DAG.getNode(Opc, DL, LHS2.getValueType(), LHS2, RHS2); + + // Merge the result + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LHS, RHS); + } else if (Opc) + return DAG.getNode(Opc, DL, VT, LHS, RHS); + } // Simplify vector selection if the selector will be produced by CMPP*/PCMP*. if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC && -- cgit v1.1 From 7cdc37064116f8688bb695fc9f30f128c25dcd84 Mon Sep 17 00:00:00 2001 From: Juergen Ributzka Date: Sat, 21 Sep 2013 05:15:01 +0000 Subject: Fix the buildbot git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191133 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 0e9381d..c5cc2cb 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16801,9 +16801,9 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // Try to match a min/max vector operation. if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC) { - unsigned Opc; - bool NeedSplit; - std::tie(Opc, NeedSplit) = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget); + std::pair ret = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget); + unsigned Opc = ret.first; + bool NeedSplit = ret.second; if (Opc && NeedSplit) { unsigned NumElems = VT.getVectorNumElements(); -- cgit v1.1 From 95838d5caf2d234156d5862d54f8e78d649a7d1b Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 21 Sep 2013 05:58:59 +0000 Subject: Remove alignment restrictions from FMA load folding. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191136 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrFMA.td | 62 +++++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 31 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td index 7759a8a..79fa7a0 100644 --- a/lib/Target/X86/X86InstrFMA.td +++ b/lib/Target/X86/X86InstrFMA.td @@ -74,43 +74,43 @@ let neverHasSideEffects = 1 in { // Fused Multiply-Add let ExeDomain = SSEPackedSingle in { - defm VFMADDPS : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", memopv4f32, - memopv8f32, X86Fmadd, v4f32, v8f32>; - defm VFMSUBPS : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", memopv4f32, - memopv8f32, X86Fmsub, v4f32, v8f32>; + defm VFMADDPS : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", loadv4f32, + loadv8f32, X86Fmadd, v4f32, v8f32>; + defm VFMSUBPS : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", loadv4f32, + loadv8f32, X86Fmsub, v4f32, v8f32>; defm VFMADDSUBPS : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps", - memopv4f32, memopv8f32, X86Fmaddsub, + loadv4f32, loadv8f32, X86Fmaddsub, v4f32, v8f32>; defm VFMSUBADDPS : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps", - memopv4f32, memopv8f32, X86Fmsubadd, + loadv4f32, loadv8f32, X86Fmsubadd, v4f32, v8f32>; } let ExeDomain = SSEPackedDouble in { - defm VFMADDPD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", memopv2f64, - memopv4f64, X86Fmadd, v2f64, v4f64>, VEX_W; - defm VFMSUBPD : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", memopv2f64, - memopv4f64, X86Fmsub, v2f64, v4f64>, VEX_W; + defm VFMADDPD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", loadv2f64, + loadv4f64, X86Fmadd, v2f64, v4f64>, VEX_W; + defm VFMSUBPD : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", loadv2f64, + loadv4f64, X86Fmsub, v2f64, v4f64>, VEX_W; defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd", - memopv2f64, memopv4f64, X86Fmaddsub, + loadv2f64, loadv4f64, X86Fmaddsub, v2f64, v4f64>, VEX_W; defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd", - memopv2f64, memopv4f64, X86Fmsubadd, + loadv2f64, loadv4f64, X86Fmsubadd, v2f64, v4f64>, VEX_W; } // Fused Negative Multiply-Add let ExeDomain = SSEPackedSingle in { - defm VFNMADDPS : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps", memopv4f32, - memopv8f32, X86Fnmadd, v4f32, v8f32>; - defm VFNMSUBPS : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps", memopv4f32, - memopv8f32, X86Fnmsub, v4f32, v8f32>; + defm VFNMADDPS : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps", loadv4f32, + loadv8f32, X86Fnmadd, v4f32, v8f32>; + defm VFNMSUBPS : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps", loadv4f32, + loadv8f32, X86Fnmsub, v4f32, v8f32>; } let ExeDomain = SSEPackedDouble in { - defm VFNMADDPD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", memopv2f64, - memopv4f64, X86Fnmadd, v2f64, v4f64>, VEX_W; + defm VFNMADDPD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", loadv2f64, + loadv4f64, X86Fnmadd, v2f64, v4f64>, VEX_W; defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd", - memopv2f64, memopv4f64, X86Fnmsub, v2f64, + loadv2f64, loadv4f64, X86Fnmsub, v2f64, v4f64>, VEX_W; } @@ -338,31 +338,31 @@ defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64, let ExeDomain = SSEPackedSingle in { defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32, - memopv4f32, memopv8f32>; + loadv4f32, loadv8f32>; defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32, - memopv4f32, memopv8f32>; + loadv4f32, loadv8f32>; defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", X86Fnmadd, v4f32, v8f32, - memopv4f32, memopv8f32>; + loadv4f32, loadv8f32>; defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", X86Fnmsub, v4f32, v8f32, - memopv4f32, memopv8f32>; + loadv4f32, loadv8f32>; defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", X86Fmaddsub, v4f32, v8f32, - memopv4f32, memopv8f32>; + loadv4f32, loadv8f32>; defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", X86Fmsubadd, v4f32, v8f32, - memopv4f32, memopv8f32>; + loadv4f32, loadv8f32>; } let ExeDomain = SSEPackedDouble in { defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64, - memopv2f64, memopv4f64>; + loadv2f64, loadv4f64>; defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64, - memopv2f64, memopv4f64>; + loadv2f64, loadv4f64>; defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", X86Fnmadd, v2f64, v4f64, - memopv2f64, memopv4f64>; + loadv2f64, loadv4f64>; defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", X86Fnmsub, v2f64, v4f64, - memopv2f64, memopv4f64>; + loadv2f64, loadv4f64>; defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", X86Fmaddsub, v2f64, v4f64, - memopv2f64, memopv4f64>; + loadv2f64, loadv4f64>; defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", X86Fmsubadd, v2f64, v4f64, - memopv2f64, memopv4f64>; + loadv2f64, loadv4f64>; } -- cgit v1.1 From 189c6235e7d783928c94cbfe4bccb39e4bd0b84f Mon Sep 17 00:00:00 2001 From: Juergen Ributzka Date: Sat, 21 Sep 2013 15:09:46 +0000 Subject: Revert "SelectionDAG: Teach the legalizer to split SETCC if VSELECT needs splitting too." This reverts commit r191130. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191138 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c5cc2cb..f1034eb 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1535,16 +1535,7 @@ void X86TargetLowering::resetOperationActions() { } EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { - if (!VT.isVector()) - return MVT::i8; - - const TargetMachine &TM = getTargetMachine(); - if (!TM.Options.UseSoftFloat && Subtarget->hasAVX512()) - switch(VT.getVectorNumElements()) { - case 8: return MVT::v8i1; - case 16: return MVT::v16i1; - } - + if (!VT.isVector()) return MVT::i8; return VT.changeVectorElementTypeToInteger(); } -- cgit v1.1 From 20b5879e0ec5c926c3b636ad36d5b6cfb278f736 Mon Sep 17 00:00:00 2001 From: Venkatraman Govindaraju Date: Sat, 21 Sep 2013 23:51:08 +0000 Subject: [Sparc] Fix lowering FABS on fp128 (long double) on pre-v9 targets. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191154 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Sparc/SparcISelLowering.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 641ab6c..2260fe4 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -2160,12 +2160,12 @@ static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG, return RetAddr; } -static SDValue LowerF64Op(SDValue Op, SelectionDAG &DAG) +static SDValue LowerF64Op(SDValue Op, SelectionDAG &DAG, unsigned opcode) { SDLoc dl(Op); assert(Op.getValueType() == MVT::f64 && "LowerF64Op called on non-double!"); - assert(Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::FABS); + assert(opcode == ISD::FNEG || opcode == ISD::FABS); // Lower fneg/fabs on f64 to fneg/fabs on f32. // fneg f64 => fneg f32:sub_even, fmov f32:sub_odd. @@ -2177,7 +2177,7 @@ static SDValue LowerF64Op(SDValue Op, SelectionDAG &DAG) SDValue Lo32 = DAG.getTargetExtractSubreg(SP::sub_odd, dl, MVT::f32, SrcReg64); - Hi32 = DAG.getNode(Op.getOpcode(), dl, MVT::f32, Hi32); + Hi32 = DAG.getNode(opcode, dl, MVT::f32, Hi32); SDValue DstReg64 = SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::f64), 0); @@ -2280,7 +2280,7 @@ static SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG, const SparcTargetLowering &TLI, bool is64Bit) { if (Op.getValueType() == MVT::f64) - return LowerF64Op(Op, DAG); + return LowerF64Op(Op, DAG, ISD::FNEG); if (Op.getValueType() == MVT::f128) return TLI.LowerF128Op(Op, DAG, ((is64Bit) ? "_Qp_neg" : "_Q_neg"), 1); return Op; @@ -2288,7 +2288,7 @@ static SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG, static SDValue LowerFABS(SDValue Op, SelectionDAG &DAG, bool isV9) { if (Op.getValueType() == MVT::f64) - return LowerF64Op(Op, DAG); + return LowerF64Op(Op, DAG, ISD::FABS); if (Op.getValueType() != MVT::f128) return Op; @@ -2304,7 +2304,7 @@ static SDValue LowerFABS(SDValue Op, SelectionDAG &DAG, bool isV9) { if (isV9) Hi64 = DAG.getNode(Op.getOpcode(), dl, MVT::f64, Hi64); else - Hi64 = LowerF64Op(Op, DAG); + Hi64 = LowerF64Op(Hi64, DAG, ISD::FABS); SDValue DstReg128 = SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::f128), 0); -- cgit v1.1 From 50019d8f7e1af96b85098ba501acbb9845682e4a Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Sun, 22 Sep 2013 00:08:14 +0000 Subject: Correct the pre-increment load latencies in the PPC A2 itinerary Pre-increment loads are microcoded on the A2, and the address increment occurs only after the load completes. As a result, the latency of the GPR address update is an additional 2 cycles on top of the load latency. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191156 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCScheduleA2.td | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td index 2e41edf..1612cd2 100644 --- a/lib/Target/PowerPC/PPCScheduleA2.td +++ b/lib/Target/PowerPC/PPCScheduleA2.td @@ -70,7 +70,7 @@ def PPCA2Itineraries : ProcessorItineraries< InstrItinData], [6, 1, 1]>, InstrItinData], - [6, 2, 1, 1]>, + [6, 8, 1, 1]>, InstrItinData], [6, 1, 1]>, InstrItinData], @@ -86,11 +86,11 @@ def PPCA2Itineraries : ProcessorItineraries< InstrItinData], [7, 1, 1]>, InstrItinData], - [7, 2, 1, 1]>, + [7, 9, 1, 1]>, InstrItinData], [6, 1, 1]>, InstrItinData], - [6, 2, 1, 1]>, + [6, 8, 1, 1]>, InstrItinData], [82, 1, 1]>, // L2 latency InstrItinData], -- cgit v1.1 From c12c8d754dcf7793d924c01517c9f6f297fdf6b4 Mon Sep 17 00:00:00 2001 From: Venkatraman Govindaraju Date: Sun, 22 Sep 2013 00:42:30 +0000 Subject: [Sparc] Emit .register directive to declare the use of global registers %g2, %g4, %g6 and %g7. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191158 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Sparc/SparcAsmPrinter.cpp | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp index 3fe2b44..b695dd8 100644 --- a/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" @@ -43,6 +44,7 @@ namespace { const char *Modifier = 0); void printCCOperand(const MachineInstr *MI, int opNum, raw_ostream &OS); + virtual void EmitFunctionBodyStart(); virtual void EmitInstruction(const MachineInstr *MI) { SmallString<128> Str; raw_svector_ostream OS(Str); @@ -63,11 +65,35 @@ namespace { virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const; + void EmitGlobalRegisterDecl(unsigned reg) { + SmallString<128> Str; + raw_svector_ostream OS(Str); + OS << "\t.register " + << "%" << StringRef(getRegisterName(reg)).lower() + << ", " + << ((reg == SP::G6 || reg == SP::G7)? "#ignore" : "#scratch"); + OutStreamer.EmitRawText(OS.str()); + } + }; } // end of anonymous namespace #include "SparcGenAsmWriter.inc" +void SparcAsmPrinter::EmitFunctionBodyStart() { + if (!TM.getSubtarget().is64Bit()) + return; + + const MachineRegisterInfo &MRI = MF->getRegInfo(); + const unsigned globalRegs[] = { SP::G2, SP::G3, SP::G6, SP::G7, 0 }; + for (unsigned i = 0; globalRegs[i] != 0; ++i) { + unsigned reg = globalRegs[i]; + if (!MRI.isPhysRegUsed(reg)) + continue; + EmitGlobalRegisterDecl(reg); + } +} + void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum, raw_ostream &O) { const MachineOperand &MO = MI->getOperand (opNum); -- cgit v1.1 From ecd4965c133d16fc9e5a6ac393c5194b67cd53ab Mon Sep 17 00:00:00 2001 From: Venkatraman Govindaraju Date: Sun, 22 Sep 2013 01:40:24 +0000 Subject: [SPARC] Make functions with GLOBAL_OFFSET_TABLE access as non-leaf functions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191160 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Sparc/SparcISelLowering.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 2260fe4..eb9896c 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -1651,6 +1651,10 @@ SDValue SparcTargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const { SDValue HiLo = makeHiLoPair(Op, SPII::MO_HI, SPII::MO_LO, DAG); SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, DL, VT); SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, VT, GlobalBase, HiLo); + // GLOBAL_BASE_REG codegen'ed with call. Inform MFI that this + // function has calls. + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setHasCalls(true); return DAG.getLoad(VT, DL, DAG.getEntryNode(), AbsAddr, MachinePointerInfo::getGOT(), false, false, false, 0); } -- cgit v1.1 From 7d7db75a55319f1d21f0d8336744f90a81b87ac7 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Sun, 22 Sep 2013 05:30:16 +0000 Subject: X86: Use R_X86_64_TPOFF64 for FK_Data_8 Summary: LLVM would crash when trying to come up with a relocation type for assembly like: movabsq $V@TPOFF, %rax Instead, we say the relocation type is R_X86_64_TPOFF64. Fixes PR17274. Reviewers: dblaikie, nrieck, rafael CC: llvm-commits Differential Revision: http://llvm-reviews.chandlerc.com/D1717 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191163 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp index b400b87..6a4bdeb 100644 --- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp @@ -108,6 +108,9 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target, case MCSymbolRefExpr::VK_None: Type = ELF::R_X86_64_64; break; + case MCSymbolRefExpr::VK_TPOFF: + Type = ELF::R_X86_64_TPOFF64; + break; case MCSymbolRefExpr::VK_DTPOFF: Type = ELF::R_X86_64_DTPOFF64; break; -- cgit v1.1 From 7d052f272d3f9ad0acdebf6811e29d529f70c1e1 Mon Sep 17 00:00:00 2001 From: Venkatraman Govindaraju Date: Sun, 22 Sep 2013 06:48:52 +0000 Subject: [Sparc] Add support for TLS in sparc. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191164 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h | 22 +++++- lib/Target/Sparc/SparcAsmPrinter.cpp | 50 ++++++++++++- lib/Target/Sparc/SparcISelDAGToDAG.cpp | 6 +- lib/Target/Sparc/SparcISelLowering.cpp | 103 +++++++++++++++++++++++++- lib/Target/Sparc/SparcISelLowering.h | 9 ++- lib/Target/Sparc/SparcInstr64Bit.td | 9 +++ lib/Target/Sparc/SparcInstrInfo.td | 49 ++++++++++++ 7 files changed, 238 insertions(+), 10 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h index aac0e8d..f3caeaa 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h +++ b/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h @@ -53,7 +53,27 @@ enum TOF { // Extract bits 41-32 of an address. // Assembler: %hm(addr) - MO_HM + MO_HM, + + // TargetFlags for Thread Local Storage. + MO_TLS_GD_HI22, + MO_TLS_GD_LO10, + MO_TLS_GD_ADD, + MO_TLS_GD_CALL, + MO_TLS_LDM_HI22, + MO_TLS_LDM_LO10, + MO_TLS_LDM_ADD, + MO_TLS_LDM_CALL, + MO_TLS_LDO_HIX22, + MO_TLS_LDO_LOX10, + MO_TLS_LDO_ADD, + MO_TLS_IE_HI22, + MO_TLS_IE_LO10, + MO_TLS_IE_LD, + MO_TLS_IE_LDX, + MO_TLS_IE_ADD, + MO_TLS_LE_HIX22, + MO_TLS_LE_LOX10 }; } // end namespace SPII diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp index b695dd8..d561093 100644 --- a/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -105,11 +105,37 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum, assert(TF == SPII::MO_NO_FLAG && "Cannot handle target flags on call address"); else if (MI->getOpcode() == SP::SETHIi) - assert((TF == SPII::MO_HI || TF == SPII::MO_H44 || TF == SPII::MO_HH) && + assert((TF == SPII::MO_HI || TF == SPII::MO_H44 || TF == SPII::MO_HH + || TF == SPII::MO_TLS_GD_HI22 + || TF == SPII::MO_TLS_LDM_HI22 + || TF == SPII::MO_TLS_LDO_HIX22 + || TF == SPII::MO_TLS_IE_HI22 + || TF == SPII::MO_TLS_LE_HIX22) && "Invalid target flags for address operand on sethi"); + else if (MI->getOpcode() == SP::TLS_CALL) + assert((TF == SPII::MO_NO_FLAG + || TF == SPII::MO_TLS_GD_CALL + || TF == SPII::MO_TLS_LDM_CALL) && + "Cannot handle target flags on tls call address"); + else if (MI->getOpcode() == SP::TLS_ADDrr) + assert((TF == SPII::MO_TLS_GD_ADD || TF == SPII::MO_TLS_LDM_ADD + || TF == SPII::MO_TLS_LDO_ADD || TF == SPII::MO_TLS_IE_ADD) && + "Cannot handle target flags on add for TLS"); + else if (MI->getOpcode() == SP::TLS_LDrr) + assert(TF == SPII::MO_TLS_IE_LD && + "Cannot handle target flags on ld for TLS"); + else if (MI->getOpcode() == SP::TLS_LDXrr) + assert(TF == SPII::MO_TLS_IE_LDX && + "Cannot handle target flags on ldx for TLS"); + else if (MI->getOpcode() == SP::XORri) + assert((TF == SPII::MO_TLS_LDO_LOX10 || TF == SPII::MO_TLS_LE_LOX10) && + "Cannot handle target flags on xor for TLS"); else - assert((TF == SPII::MO_LO || TF == SPII::MO_M44 || TF == SPII::MO_L44 || - TF == SPII::MO_HM) && + assert((TF == SPII::MO_LO || TF == SPII::MO_M44 || TF == SPII::MO_L44 + || TF == SPII::MO_HM + || TF == SPII::MO_TLS_GD_LO10 + || TF == SPII::MO_TLS_LDM_LO10 + || TF == SPII::MO_TLS_IE_LO10 ) && "Invalid target flags for small address operand"); } #endif @@ -128,6 +154,24 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum, case SPII::MO_L44: O << "%l44("; break; case SPII::MO_HH: O << "%hh("; break; case SPII::MO_HM: O << "%hm("; break; + case SPII::MO_TLS_GD_HI22: O << "%tgd_hi22("; break; + case SPII::MO_TLS_GD_LO10: O << "%tgd_lo10("; break; + case SPII::MO_TLS_GD_ADD: O << "%tgd_add("; break; + case SPII::MO_TLS_GD_CALL: O << "%tgd_call("; break; + case SPII::MO_TLS_LDM_HI22: O << "%tldm_hi22("; break; + case SPII::MO_TLS_LDM_LO10: O << "%tldm_lo10("; break; + case SPII::MO_TLS_LDM_ADD: O << "%tldm_add("; break; + case SPII::MO_TLS_LDM_CALL: O << "%tldm_call("; break; + case SPII::MO_TLS_LDO_HIX22: O << "%tldo_hix22("; break; + case SPII::MO_TLS_LDO_LOX10: O << "%tldo_lox10("; break; + case SPII::MO_TLS_LDO_ADD: O << "%tldo_add("; break; + case SPII::MO_TLS_IE_HI22: O << "%tie_hi22("; break; + case SPII::MO_TLS_IE_LO10: O << "%tie_lo10("; break; + case SPII::MO_TLS_IE_LD: O << "%tie_ld("; break; + case SPII::MO_TLS_IE_LDX: O << "%tie_ldx("; break; + case SPII::MO_TLS_IE_ADD: O << "%tie_add("; break; + case SPII::MO_TLS_LE_HIX22: O << "%tle_hix22("; break; + case SPII::MO_TLS_LE_LOX10: O << "%tle_lox10("; break; } switch (MO.getType()) { diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp index db62151..e17a187 100644 --- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp +++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -80,7 +80,8 @@ bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr, return true; } if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) + Addr.getOpcode() == ISD::TargetGlobalAddress || + Addr.getOpcode() == ISD::TargetGlobalTLSAddress) return false; // direct calls. if (Addr.getOpcode() == ISD::ADD) { @@ -117,7 +118,8 @@ bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr, bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) { if (Addr.getOpcode() == ISD::FrameIndex) return false; if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) + Addr.getOpcode() == ISD::TargetGlobalAddress || + Addr.getOpcode() == ISD::TargetGlobalTLSAddress) return false; // direct calls. if (Addr.getOpcode() == ISD::ADD) { diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index eb9896c..86bac7e 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -1544,6 +1544,9 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const { case SPISD::RET_FLAG: return "SPISD::RET_FLAG"; case SPISD::GLOBAL_BASE_REG: return "SPISD::GLOBAL_BASE_REG"; case SPISD::FLUSHW: return "SPISD::FLUSHW"; + case SPISD::TLS_ADD: return "SPISD::TLS_ADD"; + case SPISD::TLS_LD: return "SPISD::TLS_LD"; + case SPISD::TLS_CALL: return "SPISD::TLS_CALL"; } } @@ -1699,6 +1702,103 @@ SDValue SparcTargetLowering::LowerBlockAddress(SDValue Op, return makeAddress(Op, DAG); } +SDValue SparcTargetLowering::LowerGlobalTLSAddress(SDValue Op, + SelectionDAG &DAG) const { + + GlobalAddressSDNode *GA = cast(Op); + SDLoc DL(GA); + const GlobalValue *GV = GA->getGlobal(); + EVT PtrVT = getPointerTy(); + + TLSModel::Model model = getTargetMachine().getTLSModel(GV); + + if (model == TLSModel::GeneralDynamic || model == TLSModel::LocalDynamic) { + unsigned HiTF = ((model == TLSModel::GeneralDynamic)? SPII::MO_TLS_GD_HI22 + : SPII::MO_TLS_LDM_HI22); + unsigned LoTF = ((model == TLSModel::GeneralDynamic)? SPII::MO_TLS_GD_LO10 + : SPII::MO_TLS_LDM_LO10); + unsigned addTF = ((model == TLSModel::GeneralDynamic)? SPII::MO_TLS_GD_ADD + : SPII::MO_TLS_LDM_ADD); + unsigned callTF = ((model == TLSModel::GeneralDynamic)? SPII::MO_TLS_GD_CALL + : SPII::MO_TLS_LDM_CALL); + + SDValue HiLo = makeHiLoPair(Op, HiTF, LoTF, DAG); + SDValue Base = DAG.getNode(SPISD::GLOBAL_BASE_REG, DL, PtrVT); + SDValue Argument = DAG.getNode(SPISD::TLS_ADD, DL, PtrVT, Base, HiLo, + withTargetFlags(Op, addTF, DAG)); + + SDValue Chain = DAG.getEntryNode(); + SDValue InFlag; + + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(1, true), DL); + Chain = DAG.getCopyToReg(Chain, DL, SP::O0, Argument, InFlag); + InFlag = Chain.getValue(1); + SDValue Callee = DAG.getTargetExternalSymbol("__tls_get_addr", PtrVT); + SDValue Symbol = withTargetFlags(Op, callTF, DAG); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SmallVector Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + Ops.push_back(Symbol); + Ops.push_back(DAG.getRegister(SP::O0, PtrVT)); + const uint32_t *Mask = getTargetMachine() + .getRegisterInfo()->getCallPreservedMask(CallingConv::C); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + Ops.push_back(InFlag); + Chain = DAG.getNode(SPISD::TLS_CALL, DL, NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(1, true), + DAG.getIntPtrConstant(0, true), InFlag, DL); + InFlag = Chain.getValue(1); + SDValue Ret = DAG.getCopyFromReg(Chain, DL, SP::O0, PtrVT, InFlag); + + if (model != TLSModel::LocalDynamic) + return Ret; + + SDValue Hi = DAG.getNode(SPISD::Hi, DL, PtrVT, + withTargetFlags(Op, SPII::MO_TLS_LDO_HIX22, DAG)); + SDValue Lo = DAG.getNode(SPISD::Lo, DL, PtrVT, + withTargetFlags(Op, SPII::MO_TLS_LDO_LOX10, DAG)); + HiLo = DAG.getNode(ISD::XOR, DL, PtrVT, Hi, Lo); + return DAG.getNode(SPISD::TLS_ADD, DL, PtrVT, Ret, HiLo, + withTargetFlags(Op, SPII::MO_TLS_LDO_ADD, DAG)); + } + + if (model == TLSModel::InitialExec) { + unsigned ldTF = ((PtrVT == MVT::i64)? SPII::MO_TLS_IE_LDX + : SPII::MO_TLS_IE_LD); + + SDValue Base = DAG.getNode(SPISD::GLOBAL_BASE_REG, DL, PtrVT); + + // GLOBAL_BASE_REG codegen'ed with call. Inform MFI that this + // function has calls. + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setHasCalls(true); + + SDValue TGA = makeHiLoPair(Op, + SPII::MO_TLS_IE_HI22, SPII::MO_TLS_IE_LO10, DAG); + SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Base, TGA); + SDValue Offset = DAG.getNode(SPISD::TLS_LD, + DL, PtrVT, Ptr, + withTargetFlags(Op, ldTF, DAG)); + return DAG.getNode(SPISD::TLS_ADD, DL, PtrVT, + DAG.getRegister(SP::G7, PtrVT), Offset, + withTargetFlags(Op, SPII::MO_TLS_IE_ADD, DAG)); + } + + assert(model == TLSModel::LocalExec); + SDValue Hi = DAG.getNode(SPISD::Hi, DL, PtrVT, + withTargetFlags(Op, SPII::MO_TLS_LE_HIX22, DAG)); + SDValue Lo = DAG.getNode(SPISD::Lo, DL, PtrVT, + withTargetFlags(Op, SPII::MO_TLS_LE_LOX10, DAG)); + SDValue Offset = DAG.getNode(ISD::XOR, DL, PtrVT, Hi, Lo); + + return DAG.getNode(ISD::ADD, DL, PtrVT, + DAG.getRegister(SP::G7, PtrVT), Offset); +} + SDValue SparcTargetLowering::LowerF128_LibCallArg(SDValue Chain, ArgListTy &Args, SDValue Arg, SDLoc DL, @@ -2333,8 +2433,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG, *this); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); - case ISD::GlobalTLSAddress: - llvm_unreachable("TLS not implemented for Sparc."); + case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index 64c688d..57ef099 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -40,8 +40,12 @@ namespace llvm { CALL, // A call instruction. RET_FLAG, // Return with a flag operand. - GLOBAL_BASE_REG, // Global base reg for PIC - FLUSHW // FLUSH register windows to stack + GLOBAL_BASE_REG, // Global base reg for PIC. + FLUSHW, // FLUSH register windows to stack. + + TLS_ADD, // For Thread Local Storage (TLS). + TLS_LD, + TLS_CALL }; } @@ -119,6 +123,7 @@ namespace llvm { SDLoc DL, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/Sparc/SparcInstr64Bit.td b/lib/Target/Sparc/SparcInstr64Bit.td index 47658ee..212bd1c 100644 --- a/lib/Target/Sparc/SparcInstr64Bit.td +++ b/lib/Target/Sparc/SparcInstr64Bit.td @@ -162,6 +162,9 @@ def : Pat<(subc i64:$a, i64:$b), (SUBCCrr $a, $b)>; def : Pat<(SPcmpicc i64:$a, i64:$b), (CMPrr $a, $b)>; +def : Pat<(tlsadd i64:$a, i64:$b, tglobaltlsaddr:$sym), + (TLS_ADDrr $a, $b, $sym)>; + // Register-immediate instructions. def : Pat<(and i64:$a, (i64 simm13:$b)), (ANDri $a, (as_i32imm $b))>; @@ -237,6 +240,12 @@ def LDXri : F3_2<3, 0b001011, (outs I64Regs:$dst), (ins MEMri:$addr), "ldx [$addr], $dst", [(set i64:$dst, (load ADDRri:$addr))]>; +let mayLoad = 1 in + def TLS_LDXrr : F3_1<3, 0b001011, + (outs IntRegs:$dst), (ins MEMrr:$addr, TLSSym:$sym), + "ldx [$addr], $dst, $sym", + [(set i64:$dst, + (tlsld ADDRrr:$addr, tglobaltlsaddr:$sym))]>; // Extending loads to i64. def : Pat<(i64 (zextloadi1 ADDRrr:$addr)), (LDUBrr ADDRrr:$addr)>; diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index 695be33..7dc17cc 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -85,6 +85,8 @@ def MEMri : Operand { let MIOperandInfo = (ops ptr_rc, i32imm); } +def TLSSym : Operand; + // Branch targets have OtherVT type. def brtarget : Operand; def calltarget : Operand; @@ -106,6 +108,11 @@ SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>; def SDTSPITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>; +def SDTSPtlsadd : +SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; +def SDTSPtlsld : +SDTypeProfile<1, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; + def SPcmpicc : SDNode<"SPISD::CMPICC", SDTSPcmpicc, [SDNPOutGlue]>; def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutGlue]>; def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>; @@ -144,6 +151,12 @@ def retflag : SDNode<"SPISD::RET_FLAG", SDT_SPRet, def flushw : SDNode<"SPISD::FLUSHW", SDTNone, [SDNPHasChain, SDNPSideEffect, SDNPMayStore]>; +def tlsadd : SDNode<"SPISD::TLS_ADD", SDTSPtlsadd>; +def tlsld : SDNode<"SPISD::TLS_LD", SDTSPtlsld>; +def tlscall : SDNode<"SPISD::TLS_CALL", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; + def getPCX : Operand { let PrintMethod = "printGetPCX"; } @@ -807,6 +820,34 @@ let Defs = [FCC] in { } //===----------------------------------------------------------------------===// +// Instructions for Thread Local Storage(TLS). +//===----------------------------------------------------------------------===// + +def TLS_ADDrr : F3_1<2, 0b000000, + (outs IntRegs:$rd), + (ins IntRegs:$rs1, IntRegs:$rs2, TLSSym:$sym), + "add $rs1, $rs2, $rd, $sym", + [(set i32:$rd, + (tlsadd i32:$rs1, i32:$rs2, tglobaltlsaddr:$sym))]>; + +let mayLoad = 1 in + def TLS_LDrr : F3_1<3, 0b000000, + (outs IntRegs:$dst), (ins MEMrr:$addr, TLSSym:$sym), + "ld [$addr], $dst, $sym", + [(set i32:$dst, + (tlsld ADDRrr:$addr, tglobaltlsaddr:$sym))]>; + +let Uses = [O6], isCall = 1 in + def TLS_CALL : InstSP<(outs), + (ins calltarget:$disp, TLSSym:$sym, variable_ops), + "call $disp, $sym\n\tnop", + [(tlscall texternalsym:$disp, tglobaltlsaddr:$sym)]> { + bits<30> disp; + let op = 1; + let Inst{29-0} = disp; +} + +//===----------------------------------------------------------------------===// // V9 Instructions //===----------------------------------------------------------------------===// @@ -915,6 +956,14 @@ def : Pat<(SPlo tglobaladdr:$in), (ORri (i32 G0), tglobaladdr:$in)>; def : Pat<(SPhi tconstpool:$in), (SETHIi tconstpool:$in)>; def : Pat<(SPlo tconstpool:$in), (ORri (i32 G0), tconstpool:$in)>; +// GlobalTLS addresses +def : Pat<(SPhi tglobaltlsaddr:$in), (SETHIi tglobaltlsaddr:$in)>; +def : Pat<(SPlo tglobaltlsaddr:$in), (ORri (i32 G0), tglobaltlsaddr:$in)>; +def : Pat<(add (SPhi tglobaltlsaddr:$in1), (SPlo tglobaltlsaddr:$in2)), + (ADDri (SETHIi tglobaltlsaddr:$in1), (tglobaltlsaddr:$in2))>; +def : Pat<(xor (SPhi tglobaltlsaddr:$in1), (SPlo tglobaltlsaddr:$in2)), + (XORri (SETHIi tglobaltlsaddr:$in1), (tglobaltlsaddr:$in2))>; + // Blockaddress def : Pat<(SPhi tblockaddress:$in), (SETHIi tblockaddress:$in)>; def : Pat<(SPlo tblockaddress:$in), (ORri (i32 G0), tblockaddress:$in)>; -- cgit v1.1 From 3e84ad28d4d3ceee25771b1e30315c20b7608c39 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Sun, 22 Sep 2013 08:21:56 +0000 Subject: ISelDAG: spot chain cycles involving MachineNodes Previously, the DAGISel function WalkChainUsers was spotting that it had entered already-selected territory by whether a node was a MachineNode (amongst other things). Since it's fairly common practice to insert MachineNodes during ISelLowering, this was not the correct check. Looking around, it seems that other nodes get their NodeId set to -1 upon selection, so this makes sure the same thing happens to all MachineNodes and uses that characteristic to determine whether we should stop looking for a loop during selection. This should fix PR15840. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191165 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 1 + lib/Target/ARM/ARMISelDAGToDAG.cpp | 4 +++- lib/Target/Hexagon/HexagonISelDAGToDAG.cpp | 4 +++- lib/Target/MSP430/MSP430ISelDAGToDAG.cpp | 1 + lib/Target/Mips/MipsISelDAGToDAG.cpp | 1 + lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 4 +++- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 4 +++- lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 1 + lib/Target/Sparc/SparcISelDAGToDAG.cpp | 4 +++- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 1 + lib/Target/X86/X86ISelDAGToDAG.cpp | 1 + 11 files changed, 21 insertions(+), 5 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index ee819e0..a865564 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -396,6 +396,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { if (Node->isMachineOpcode()) { DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); + Node->setNodeId(-1); return NULL; } diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index c2dbcb9..f6b3827 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -2383,8 +2383,10 @@ SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDLoc dl(N); - if (N->isMachineOpcode()) + if (N->isMachineOpcode()) { + N->setNodeId(-1); return NULL; // Already selected. + } switch (N->getOpcode()) { default: break; diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 9e78e51..5ae9328 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -1344,8 +1344,10 @@ SDNode *HexagonDAGToDAGISel::SelectAdd(SDNode *N) { SDNode *HexagonDAGToDAGISel::Select(SDNode *N) { - if (N->isMachineOpcode()) + if (N->isMachineOpcode()) { + N->setNodeId(-1); return NULL; // Already selected. + } switch (N->getOpcode()) { diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index 543f54c..4152829 100644 --- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -395,6 +395,7 @@ SDNode *MSP430DAGToDAGISel::Select(SDNode *Node) { DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); + Node->setNodeId(-1); return NULL; } diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 725d9b4..b428589 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -110,6 +110,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { // If we have a custom node, we already have selected! if (Node->isMachineOpcode()) { DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); + Node->setNodeId(-1); return NULL; } diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 01fbdb3..68fa955 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -118,8 +118,10 @@ bool NVPTXDAGToDAGISel::useF32FTZ() const { /// expanded, promoted and normal instructions. SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) { - if (N->isMachineOpcode()) + if (N->isMachineOpcode()) { + N->setNodeId(-1); return NULL; // Already selected. + } SDNode *ResNode = NULL; switch (N->getOpcode()) { diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 475bde1..6ba6af6 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -876,8 +876,10 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { // target-specific node if it hasn't already been changed. SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDLoc dl(N); - if (N->isMachineOpcode()) + if (N->isMachineOpcode()) { + N->setNodeId(-1); return NULL; // Already selected. + } switch (N->getOpcode()) { default: break; diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index a008c96..88b375b 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -195,6 +195,7 @@ bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) { SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { unsigned int Opc = N->getOpcode(); if (N->isMachineOpcode()) { + N->setNodeId(-1); return NULL; // Already selected. } switch (Opc) { diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp index e17a187..b012bfd 100644 --- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp +++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -141,8 +141,10 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) { SDNode *SparcDAGToDAGISel::Select(SDNode *N) { SDLoc dl(N); - if (N->isMachineOpcode()) + if (N->isMachineOpcode()) { + N->setNodeId(-1); return NULL; // Already selected. + } switch (N->getOpcode()) { default: break; diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 01f008a..d717408 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -999,6 +999,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { // If we have a custom node, we already have selected! if (Node->isMachineOpcode()) { DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); + Node->setNodeId(-1); return 0; } diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 32ad1aa..36d1690 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2057,6 +2057,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { if (Node->isMachineOpcode()) { DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n'); + Node->setNodeId(-1); return NULL; // Already selected. } -- cgit v1.1 From a432a97b62617b8b74219ae60c6c6db5cc5ec7ab Mon Sep 17 00:00:00 2001 From: Venkatraman Govindaraju Date: Sun, 22 Sep 2013 08:51:55 +0000 Subject: [Sparc] Clean up branch instructions, so that TableGen can encode branch conditions as well. No functionality change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191166 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Sparc/SparcInstr64Bit.td | 6 +++--- lib/Target/Sparc/SparcInstrFormats.td | 3 +-- lib/Target/Sparc/SparcInstrInfo.td | 40 +++++++++++++++++++---------------- 3 files changed, 26 insertions(+), 23 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Sparc/SparcInstr64Bit.td b/lib/Target/Sparc/SparcInstr64Bit.td index 212bd1c..e31ba55 100644 --- a/lib/Target/Sparc/SparcInstr64Bit.td +++ b/lib/Target/Sparc/SparcInstr64Bit.td @@ -321,9 +321,9 @@ def : Pat<(store (i64 0), ADDRri:$dst), (STXri ADDRri:$dst, (i64 G0))>; let Predicates = [Is64Bit] in { let Uses = [ICC] in -def BPXCC : BranchSP<0, (ins brtarget:$dst, CCOp:$cc), - "b$cc %xcc, $dst", - [(SPbrxcc bb:$dst, imm:$cc)]>; +def BPXCC : BranchSP<(ins brtarget:$imm22, CCOp:$cond), + "b$cond %xcc, $imm22", + [(SPbrxcc bb:$imm22, imm:$cond)]>; // Conditional moves on %xcc. let Uses = [ICC], Constraints = "$f = $rd" in { diff --git a/lib/Target/Sparc/SparcInstrFormats.td b/lib/Target/Sparc/SparcInstrFormats.td index 6cdf6bc..f68728a 100644 --- a/lib/Target/Sparc/SparcInstrFormats.td +++ b/lib/Target/Sparc/SparcInstrFormats.td @@ -47,12 +47,11 @@ class F2_1 op2Val, dag outs, dag ins, string asmstr, list pattern> let Inst{29-25} = rd; } -class F2_2 condVal, bits<3> op2Val, dag outs, dag ins, string asmstr, +class F2_2 op2Val, dag outs, dag ins, string asmstr, list pattern> : F2 { bits<4> cond; bit annul = 0; // currently unused - let cond = condVal; let op2 = op2Val; let Inst{29} = annul; diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index 7dc17cc..a656e85 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -548,19 +548,26 @@ defm RESTORE : F3_12np<"restore", 0b111101>; // Section B.21 - Branch on Integer Condition Codes Instructions, p. 119 +// unconditional branch class. +class BranchAlways pattern> + : F2_2<0b010, (outs), ins, asmstr, pattern> { + let isBranch = 1; + let isTerminator = 1; + let hasDelaySlot = 1; + let isBarrier = 1; +} + +let cond = 8 in + def BA : BranchAlways<(ins brtarget:$imm22), "ba $imm22", [(br bb:$imm22)]>; + // conditional branch class: -class BranchSP cc, dag ins, string asmstr, list pattern> - : F2_2 { +class BranchSP pattern> + : F2_2<0b010, (outs), ins, asmstr, pattern> { let isBranch = 1; let isTerminator = 1; let hasDelaySlot = 1; } -let isBarrier = 1 in - def BA : BranchSP<0b1000, (ins brtarget:$dst), - "ba $dst", - [(br bb:$dst)]>; - // Indirect branch instructions. let isTerminator = 1, isBarrier = 1, hasDelaySlot = 1, isBranch =1, @@ -575,28 +582,25 @@ let isTerminator = 1, isBarrier = 1, [(brind ADDRri:$ptr)]>; } -// FIXME: the encoding for the JIT should look at the condition field. let Uses = [ICC] in - def BCOND : BranchSP<0, (ins brtarget:$dst, CCOp:$cc), - "b$cc $dst", - [(SPbricc bb:$dst, imm:$cc)]>; - + def BCOND : BranchSP<(ins brtarget:$imm22, CCOp:$cond), + "b$cond $imm22", + [(SPbricc bb:$imm22, imm:$cond)]>; // Section B.22 - Branch on Floating-point Condition Codes Instructions, p. 121 // floating-point conditional branch class: -class FPBranchSP cc, dag ins, string asmstr, list pattern> - : F2_2 { +class FPBranchSP pattern> + : F2_2<0b110, (outs), ins, asmstr, pattern> { let isBranch = 1; let isTerminator = 1; let hasDelaySlot = 1; } -// FIXME: the encoding for the JIT should look at the condition field. let Uses = [FCC] in - def FBCOND : FPBranchSP<0, (ins brtarget:$dst, CCOp:$cc), - "fb$cc $dst", - [(SPbrfcc bb:$dst, imm:$cc)]>; + def FBCOND : FPBranchSP<(ins brtarget:$imm22, CCOp:$cond), + "fb$cond $imm22", + [(SPbrfcc bb:$imm22, imm:$cond)]>; // Section B.24 - Call and Link Instruction, p. 125 -- cgit v1.1 From 69ae8f1abda2cfcbbb2ef895bbe23936d1beddf8 Mon Sep 17 00:00:00 2001 From: Venkatraman Govindaraju Date: Sun, 22 Sep 2013 09:18:26 +0000 Subject: [Sparc] Clean up MOVcc instructions so that TableGen can encode them correctly. No functionality change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191167 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Sparc/SparcInstrFormats.td | 56 +++++++++++++++++++++++++ lib/Target/Sparc/SparcInstrInfo.td | 78 ++++++++++++++++++++++------------- 2 files changed, 105 insertions(+), 29 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Sparc/SparcInstrFormats.td b/lib/Target/Sparc/SparcInstrFormats.td index f68728a..c270ffe 100644 --- a/lib/Target/Sparc/SparcInstrFormats.td +++ b/lib/Target/Sparc/SparcInstrFormats.td @@ -149,3 +149,59 @@ multiclass F3_S Op3Val, bit XVal, SDNode OpNode, !strconcat(OpcStr, " $rs, $shcnt, $rd"), [(set VT:$rd, (OpNode VT:$rs, (i32 imm:$shcnt)))]>; } + +class F4 op3, dag outs, dag ins, string asmstr, list pattern> + : InstSP { + bits<5> rd; + + let op = 2; + let Inst{29-25} = rd; + let Inst{24-19} = op3; +} + + +class F4_1 op3, dag outs, dag ins, + string asmstr, list pattern> + : F4 { + + bits<3> cc; + bits<4> cond; + bits<5> rs2; + + let Inst{4-0} = rs2; + let Inst{11} = cc{0}; + let Inst{12} = cc{1}; + let Inst{13} = 0; + let Inst{17-14} = cond; + let Inst{18} = cc{2}; + +} + +class F4_2 op3, dag outs, dag ins, + string asmstr, list pattern> + : F4 { + bits<3> cc; + bits<4> cond; + bits<11> simm11; + + let Inst{10-0} = simm11; + let Inst{11} = cc{0}; + let Inst{12} = cc{1}; + let Inst{13} = 1; + let Inst{17-14} = cond; + let Inst{18} = cc{2}; +} + +class F4_3 op3, bits<6> opf_low, dag outs, dag ins, + string asmstr, list pattern> + : F4 { + bits<4> cond; + bits<3> opf_cc; + bits<5> rs2; + + let Inst{18} = 0; + let Inst{17-14} = cond; + let Inst{13-11} = opf_cc; + let Inst{10-5} = opf_low; + let Inst{4-0} = rs2; +} diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index a656e85..f6409e7 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -858,49 +858,69 @@ let Uses = [O6], isCall = 1 in // V9 Conditional Moves. let Predicates = [HasV9], Constraints = "$f = $rd" in { // Move Integer Register on Condition (MOVcc) p. 194 of the V9 manual. - // FIXME: Add instruction encodings for the JIT some day. - let Uses = [ICC] in { + let Uses = [ICC], cc = 0b100 in { def MOVICCrr - : Pseudo<(outs IntRegs:$rd), (ins IntRegs:$rs2, IntRegs:$f, CCOp:$cc), - "mov$cc %icc, $rs2, $rd", - [(set i32:$rd, (SPselecticc i32:$rs2, i32:$f, imm:$cc))]>; + : F4_1<0b101100, (outs IntRegs:$rd), + (ins IntRegs:$rs2, IntRegs:$f, CCOp:$cond), + "mov$cond %icc, $rs2, $rd", + [(set i32:$rd, (SPselecticc i32:$rs2, i32:$f, imm:$cond))]>; + def MOVICCri - : Pseudo<(outs IntRegs:$rd), (ins i32imm:$i, IntRegs:$f, CCOp:$cc), - "mov$cc %icc, $i, $rd", - [(set i32:$rd, (SPselecticc simm11:$i, i32:$f, imm:$cc))]>; + : F4_2<0b101100, (outs IntRegs:$rd), + (ins i32imm:$simm11, IntRegs:$f, CCOp:$cond), + "mov$cond %icc, $simm11, $rd", + [(set i32:$rd, + (SPselecticc simm11:$simm11, i32:$f, imm:$cond))]>; } - let Uses = [FCC] in { + let Uses = [FCC], cc = 0b000 in { def MOVFCCrr - : Pseudo<(outs IntRegs:$rd), (ins IntRegs:$rs2, IntRegs:$f, CCOp:$cc), - "mov$cc %fcc0, $rs2, $rd", - [(set i32:$rd, (SPselectfcc i32:$rs2, i32:$f, imm:$cc))]>; + : F4_1<0b101100, (outs IntRegs:$rd), + (ins IntRegs:$rs2, IntRegs:$f, CCOp:$cond), + "mov$cond %fcc0, $rs2, $rd", + [(set i32:$rd, (SPselectfcc i32:$rs2, i32:$f, imm:$cond))]>; def MOVFCCri - : Pseudo<(outs IntRegs:$rd), (ins i32imm:$i, IntRegs:$f, CCOp:$cc), - "mov$cc %fcc0, $i, $rd", - [(set i32:$rd, (SPselectfcc simm11:$i, i32:$f, imm:$cc))]>; + : F4_2<0b101100, (outs IntRegs:$rd), + (ins i32imm:$simm11, IntRegs:$f, CCOp:$cond), + "mov$cond %fcc0, $simm11, $rd", + [(set i32:$rd, + (SPselectfcc simm11:$simm11, i32:$f, imm:$cond))]>; } - let Uses = [ICC] in { + let Uses = [ICC], opf_cc = 0b100 in { def FMOVS_ICC - : Pseudo<(outs FPRegs:$rd), (ins FPRegs:$rs2, FPRegs:$f, CCOp:$cc), - "fmovs$cc %icc, $rs2, $rd", - [(set f32:$rd, (SPselecticc f32:$rs2, f32:$f, imm:$cc))]>; + : F4_3<0b110101, 0b000001, (outs FPRegs:$rd), + (ins FPRegs:$rs2, FPRegs:$f, CCOp:$cond), + "fmovs$cond %icc, $rs2, $rd", + [(set f32:$rd, (SPselecticc f32:$rs2, f32:$f, imm:$cond))]>; def FMOVD_ICC - : Pseudo<(outs DFPRegs:$rd), (ins DFPRegs:$rs2, DFPRegs:$f, CCOp:$cc), - "fmovd$cc %icc, $rs2, $rd", - [(set f64:$rd, (SPselecticc f64:$rs2, f64:$f, imm:$cc))]>; + : F4_3<0b110101, 0b000010, (outs DFPRegs:$rd), + (ins DFPRegs:$rs2, DFPRegs:$f, CCOp:$cond), + "fmovd$cond %icc, $rs2, $rd", + [(set f64:$rd, (SPselecticc f64:$rs2, f64:$f, imm:$cond))]>; + def FMOVQ_ICC + : F4_3<0b110101, 0b000011, (outs QFPRegs:$rd), + (ins QFPRegs:$rs2, QFPRegs:$f, CCOp:$cond), + "fmovd$cond %icc, $rs2, $rd", + [(set f128:$rd, (SPselecticc f128:$rs2, f128:$f, imm:$cond))]>; } - let Uses = [FCC] in { + let Uses = [FCC], opf_cc = 0b000 in { def FMOVS_FCC - : Pseudo<(outs FPRegs:$rd), (ins FPRegs:$rs2, FPRegs:$f, CCOp:$cc), - "fmovs$cc %fcc0, $rs2, $rd", - [(set f32:$rd, (SPselectfcc f32:$rs2, f32:$f, imm:$cc))]>; + : F4_3<0b110101, 0b000001, (outs FPRegs:$rd), + (ins FPRegs:$rs2, FPRegs:$f, CCOp:$cond), + "fmovs$cond %fcc0, $rs2, $rd", + [(set f32:$rd, (SPselectfcc f32:$rs2, f32:$f, imm:$cond))]>; def FMOVD_FCC - : Pseudo<(outs DFPRegs:$rd), (ins DFPRegs:$rs2, DFPRegs:$f, CCOp:$cc), - "fmovd$cc %fcc0, $rs2, $rd", - [(set f64:$rd, (SPselectfcc f64:$rs2, f64:$f, imm:$cc))]>; + : F4_3<0b110101, 0b000010, (outs DFPRegs:$rd), + (ins DFPRegs:$rs2, DFPRegs:$f, CCOp:$cond), + "fmovd$cond %fcc0, $rs2, $rd", + [(set f64:$rd, (SPselectfcc f64:$rs2, f64:$f, imm:$cond))]>; + def FMOVQ_FCC + : F4_3<0b110101, 0b000011, (outs QFPRegs:$rd), + (ins QFPRegs:$rs2, QFPRegs:$f, CCOp:$cond), + "fmovd$cond %fcc0, $rs2, $rd", + [(set f128:$rd, (SPselectfcc f128:$rs2, f128:$f, imm:$cond))]>; } } -- cgit v1.1 From 0821c72f11659964f76f4326874dd4037900ce14 Mon Sep 17 00:00:00 2001 From: Venkatraman Govindaraju Date: Sun, 22 Sep 2013 09:54:42 +0000 Subject: [Sparc] Make SPARC instructions' encoding well defined such that TableGen can automatically generate code emitter. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191168 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Sparc/CMakeLists.txt | 1 + lib/Target/Sparc/Makefile | 3 +- lib/Target/Sparc/SparcInstrFormats.td | 26 ++++++++++++ lib/Target/Sparc/SparcInstrInfo.td | 79 ++++++++++++++++++----------------- 4 files changed, 70 insertions(+), 39 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt index acf7496..5ac1d30 100644 --- a/lib/Target/Sparc/CMakeLists.txt +++ b/lib/Target/Sparc/CMakeLists.txt @@ -2,6 +2,7 @@ set(LLVM_TARGET_DEFINITIONS Sparc.td) tablegen(LLVM SparcGenRegisterInfo.inc -gen-register-info) tablegen(LLVM SparcGenInstrInfo.inc -gen-instr-info) +tablegen(LLVM SparcGenCodeEmitter.inc -gen-emitter) tablegen(LLVM SparcGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM SparcGenDAGISel.inc -gen-dag-isel) tablegen(LLVM SparcGenSubtargetInfo.inc -gen-subtarget) diff --git a/lib/Target/Sparc/Makefile b/lib/Target/Sparc/Makefile index 4b81ada..c171db7 100644 --- a/lib/Target/Sparc/Makefile +++ b/lib/Target/Sparc/Makefile @@ -14,7 +14,8 @@ TARGET = Sparc # Make sure that tblgen is run, first thing. BUILT_SOURCES = SparcGenRegisterInfo.inc SparcGenInstrInfo.inc \ SparcGenAsmWriter.inc SparcGenDAGISel.inc \ - SparcGenSubtargetInfo.inc SparcGenCallingConv.inc + SparcGenSubtargetInfo.inc SparcGenCallingConv.inc \ + SparcGenCodeEmitter.inc DIRS = TargetInfo MCTargetDesc diff --git a/lib/Target/Sparc/SparcInstrFormats.td b/lib/Target/Sparc/SparcInstrFormats.td index c270ffe..afa2874 100644 --- a/lib/Target/Sparc/SparcInstrFormats.td +++ b/lib/Target/Sparc/SparcInstrFormats.td @@ -111,6 +111,32 @@ class F3_3 opVal, bits<6> op3val, bits<9> opfval, dag outs, dag ins, let Inst{4-0} = rs2; } +// floating-point unary operations. +class F3_3u opVal, bits<6> op3val, bits<9> opfval, dag outs, dag ins, + string asmstr, list pattern> : F3 { + bits<5> rs2; + + let op = opVal; + let op3 = op3val; + let rs1 = 0; + + let Inst{13-5} = opfval; // fp opcode + let Inst{4-0} = rs2; +} + +// floating-point compares. +class F3_3c opVal, bits<6> op3val, bits<9> opfval, dag outs, dag ins, + string asmstr, list pattern> : F3 { + bits<5> rs2; + + let op = opVal; + let op3 = op3val; + let rd = 0; + + let Inst{13-5} = opfval; // fp opcode + let Inst{4-0} = rs2; +} + // Shift by register rs2. class F3_Sr opVal, bits<6> op3val, bit xVal, dag outs, dag ins, string asmstr, list pattern> : F3 { diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index f6409e7..b6b850e 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -259,8 +259,9 @@ let hasSideEffects = 1, mayStore = 1 in { [(flushw)]>; } -def UNIMP : F2_1<0b000, (outs), (ins i32imm:$val), - "unimp $val", []>; +let rd = 0 in + def UNIMP : F2_1<0b000, (outs), (ins i32imm:$val), + "unimp $val", []>; // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after // instruction selection into a branch sequence. This has to handle all @@ -511,9 +512,10 @@ defm SUB : F3_12 <"sub" , 0b000100, sub>; let Uses = [ICC] in defm SUBX : F3_12 <"subx" , 0b001100, sube>; -let Defs = [ICC] in { +let Defs = [ICC] in defm SUBCC : F3_12 <"subcc", 0b010100, subc>; +let Defs = [ICC], rd = 0 in { def CMPrr : F3_1<2, 0b010100, (outs), (ins IntRegs:$b, IntRegs:$c), "cmp $b, $c", @@ -571,7 +573,7 @@ class BranchSP pattern> // Indirect branch instructions. let isTerminator = 1, isBarrier = 1, hasDelaySlot = 1, isBranch =1, - isIndirectBranch = 1 in { + isIndirectBranch = 1, rd = 0 in { def BINDrr : F3_1<2, 0b111000, (outs), (ins MEMrr:$ptr), "jmp $ptr", @@ -618,21 +620,21 @@ let Uses = [O6], def JMPLrr : F3_1<2, 0b111000, (outs), (ins MEMrr:$ptr, variable_ops), "call $ptr", - [(call ADDRrr:$ptr)]>; + [(call ADDRrr:$ptr)]> { let rd = 15; } def JMPLri : F3_2<2, 0b111000, (outs), (ins MEMri:$ptr, variable_ops), "call $ptr", - [(call ADDRri:$ptr)]>; + [(call ADDRri:$ptr)]> { let rd = 15; } } // Section B.28 - Read State Register Instructions -let Uses = [Y] in +let Uses = [Y], rs1 = 0, rs2 = 0 in def RDY : F3_1<2, 0b101000, (outs IntRegs:$dst), (ins), "rd %y, $dst", []>; // Section B.29 - Write State Register Instructions -let Defs = [Y] in { +let Defs = [Y], rd = 0 in { def WRYrr : F3_1<2, 0b110000, (outs), (ins IntRegs:$b, IntRegs:$c), "wr $b, $c, %y", []>; @@ -641,89 +643,89 @@ let Defs = [Y] in { "wr $b, $c, %y", []>; } // Convert Integer to Floating-point Instructions, p. 141 -def FITOS : F3_3<2, 0b110100, 0b011000100, +def FITOS : F3_3u<2, 0b110100, 0b011000100, (outs FPRegs:$dst), (ins FPRegs:$src), "fitos $src, $dst", [(set FPRegs:$dst, (SPitof FPRegs:$src))]>; -def FITOD : F3_3<2, 0b110100, 0b011001000, +def FITOD : F3_3u<2, 0b110100, 0b011001000, (outs DFPRegs:$dst), (ins FPRegs:$src), "fitod $src, $dst", [(set DFPRegs:$dst, (SPitof FPRegs:$src))]>; -def FITOQ : F3_3<2, 0b110100, 0b011001100, +def FITOQ : F3_3u<2, 0b110100, 0b011001100, (outs QFPRegs:$dst), (ins FPRegs:$src), "fitoq $src, $dst", [(set QFPRegs:$dst, (SPitof FPRegs:$src))]>, Requires<[HasHardQuad]>; // Convert Floating-point to Integer Instructions, p. 142 -def FSTOI : F3_3<2, 0b110100, 0b011010001, +def FSTOI : F3_3u<2, 0b110100, 0b011010001, (outs FPRegs:$dst), (ins FPRegs:$src), "fstoi $src, $dst", [(set FPRegs:$dst, (SPftoi FPRegs:$src))]>; -def FDTOI : F3_3<2, 0b110100, 0b011010010, +def FDTOI : F3_3u<2, 0b110100, 0b011010010, (outs FPRegs:$dst), (ins DFPRegs:$src), "fdtoi $src, $dst", [(set FPRegs:$dst, (SPftoi DFPRegs:$src))]>; -def FQTOI : F3_3<2, 0b110100, 0b011010011, +def FQTOI : F3_3u<2, 0b110100, 0b011010011, (outs FPRegs:$dst), (ins QFPRegs:$src), "fqtoi $src, $dst", [(set FPRegs:$dst, (SPftoi QFPRegs:$src))]>, Requires<[HasHardQuad]>; // Convert between Floating-point Formats Instructions, p. 143 -def FSTOD : F3_3<2, 0b110100, 0b011001001, +def FSTOD : F3_3u<2, 0b110100, 0b011001001, (outs DFPRegs:$dst), (ins FPRegs:$src), "fstod $src, $dst", [(set f64:$dst, (fextend f32:$src))]>; -def FSTOQ : F3_3<2, 0b110100, 0b011001101, +def FSTOQ : F3_3u<2, 0b110100, 0b011001101, (outs QFPRegs:$dst), (ins FPRegs:$src), "fstoq $src, $dst", [(set f128:$dst, (fextend f32:$src))]>, Requires<[HasHardQuad]>; -def FDTOS : F3_3<2, 0b110100, 0b011000110, +def FDTOS : F3_3u<2, 0b110100, 0b011000110, (outs FPRegs:$dst), (ins DFPRegs:$src), "fdtos $src, $dst", [(set f32:$dst, (fround f64:$src))]>; -def FDTOQ : F3_3<2, 0b110100, 0b01101110, +def FDTOQ : F3_3u<2, 0b110100, 0b01101110, (outs QFPRegs:$dst), (ins DFPRegs:$src), "fdtoq $src, $dst", [(set f128:$dst, (fextend f64:$src))]>, Requires<[HasHardQuad]>; -def FQTOS : F3_3<2, 0b110100, 0b011000111, +def FQTOS : F3_3u<2, 0b110100, 0b011000111, (outs FPRegs:$dst), (ins QFPRegs:$src), "fqtos $src, $dst", [(set f32:$dst, (fround f128:$src))]>, Requires<[HasHardQuad]>; -def FQTOD : F3_3<2, 0b110100, 0b011001011, +def FQTOD : F3_3u<2, 0b110100, 0b011001011, (outs DFPRegs:$dst), (ins QFPRegs:$src), "fqtod $src, $dst", [(set f64:$dst, (fround f128:$src))]>, Requires<[HasHardQuad]>; // Floating-point Move Instructions, p. 144 -def FMOVS : F3_3<2, 0b110100, 0b000000001, +def FMOVS : F3_3u<2, 0b110100, 0b000000001, (outs FPRegs:$dst), (ins FPRegs:$src), "fmovs $src, $dst", []>; -def FNEGS : F3_3<2, 0b110100, 0b000000101, +def FNEGS : F3_3u<2, 0b110100, 0b000000101, (outs FPRegs:$dst), (ins FPRegs:$src), "fnegs $src, $dst", [(set f32:$dst, (fneg f32:$src))]>; -def FABSS : F3_3<2, 0b110100, 0b000001001, +def FABSS : F3_3u<2, 0b110100, 0b000001001, (outs FPRegs:$dst), (ins FPRegs:$src), "fabss $src, $dst", [(set f32:$dst, (fabs f32:$src))]>; // Floating-point Square Root Instructions, p.145 -def FSQRTS : F3_3<2, 0b110100, 0b000101001, +def FSQRTS : F3_3u<2, 0b110100, 0b000101001, (outs FPRegs:$dst), (ins FPRegs:$src), "fsqrts $src, $dst", [(set f32:$dst, (fsqrt f32:$src))]>; -def FSQRTD : F3_3<2, 0b110100, 0b000101010, +def FSQRTD : F3_3u<2, 0b110100, 0b000101010, (outs DFPRegs:$dst), (ins DFPRegs:$src), "fsqrtd $src, $dst", [(set f64:$dst, (fsqrt f64:$src))]>; -def FSQRTQ : F3_3<2, 0b110100, 0b000101011, +def FSQRTQ : F3_3u<2, 0b110100, 0b000101011, (outs QFPRegs:$dst), (ins QFPRegs:$src), "fsqrtq $src, $dst", [(set f128:$dst, (fsqrt f128:$src))]>, @@ -808,15 +810,15 @@ def FDIVQ : F3_3<2, 0b110100, 0b001001111, // after the instr is retired, but there is no interlock. This behavior // is modelled with a forced noop after the instruction. let Defs = [FCC] in { - def FCMPS : F3_3<2, 0b110101, 0b001010001, + def FCMPS : F3_3c<2, 0b110101, 0b001010001, (outs), (ins FPRegs:$src1, FPRegs:$src2), "fcmps $src1, $src2\n\tnop", [(SPcmpfcc f32:$src1, f32:$src2)]>; - def FCMPD : F3_3<2, 0b110101, 0b001010010, + def FCMPD : F3_3c<2, 0b110101, 0b001010010, (outs), (ins DFPRegs:$src1, DFPRegs:$src2), "fcmpd $src1, $src2\n\tnop", [(SPcmpfcc f64:$src1, f64:$src2)]>; - def FCMPQ : F3_3<2, 0b110101, 0b001010011, + def FCMPQ : F3_3c<2, 0b110101, 0b001010011, (outs), (ins QFPRegs:$src1, QFPRegs:$src2), "fcmpq $src1, $src2\n\tnop", [(SPcmpfcc f128:$src1, f128:$src2)]>, @@ -927,27 +929,27 @@ let Predicates = [HasV9], Constraints = "$f = $rd" in { // Floating-Point Move Instructions, p. 164 of the V9 manual. let Predicates = [HasV9] in { - def FMOVD : F3_3<2, 0b110100, 0b000000010, + def FMOVD : F3_3u<2, 0b110100, 0b000000010, (outs DFPRegs:$dst), (ins DFPRegs:$src), "fmovd $src, $dst", []>; - def FMOVQ : F3_3<2, 0b110100, 0b000000011, + def FMOVQ : F3_3u<2, 0b110100, 0b000000011, (outs QFPRegs:$dst), (ins QFPRegs:$src), "fmovq $src, $dst", []>, Requires<[HasHardQuad]>; - def FNEGD : F3_3<2, 0b110100, 0b000000110, + def FNEGD : F3_3u<2, 0b110100, 0b000000110, (outs DFPRegs:$dst), (ins DFPRegs:$src), "fnegd $src, $dst", [(set f64:$dst, (fneg f64:$src))]>; - def FNEGQ : F3_3<2, 0b110100, 0b000000111, + def FNEGQ : F3_3u<2, 0b110100, 0b000000111, (outs QFPRegs:$dst), (ins QFPRegs:$src), "fnegq $src, $dst", [(set f128:$dst, (fneg f128:$src))]>, Requires<[HasHardQuad]>; - def FABSD : F3_3<2, 0b110100, 0b000001010, + def FABSD : F3_3u<2, 0b110100, 0b000001010, (outs DFPRegs:$dst), (ins DFPRegs:$src), "fabsd $src, $dst", [(set f64:$dst, (fabs f64:$src))]>; - def FABSQ : F3_3<2, 0b110100, 0b000001011, + def FABSQ : F3_3u<2, 0b110100, 0b000001011, (outs QFPRegs:$dst), (ins QFPRegs:$src), "fabsq $src, $dst", [(set f128:$dst, (fabs f128:$src))]>, @@ -956,9 +958,10 @@ let Predicates = [HasV9] in { // POPCrr - This does a ctpop of a 64-bit register. As such, we have to clear // the top 32-bits before using it. To do this clearing, we use a SLLri X,0. -def POPCrr : F3_1<2, 0b101110, - (outs IntRegs:$dst), (ins IntRegs:$src), - "popc $src, $dst", []>, Requires<[HasV9]>; +let rs1 = 0 in + def POPCrr : F3_1<2, 0b101110, + (outs IntRegs:$dst), (ins IntRegs:$src), + "popc $src, $dst", []>, Requires<[HasV9]>; def : Pat<(ctpop i32:$src), (POPCrr (SLLri $src, 0))>; -- cgit v1.1 From ff96efee98f8a06e8fdf29d34843659932a83e95 Mon Sep 17 00:00:00 2001 From: Venkatraman Govindaraju Date: Sun, 22 Sep 2013 18:54:54 +0000 Subject: [Sparc] Use correct instruction pattern for CMPri. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191180 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Sparc/SparcInstrInfo.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index b6b850e..fcab5b3 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -520,7 +520,7 @@ let Defs = [ICC], rd = 0 in { (outs), (ins IntRegs:$b, IntRegs:$c), "cmp $b, $c", [(SPcmpicc i32:$b, i32:$c)]>; - def CMPri : F3_1<2, 0b010100, + def CMPri : F3_2<2, 0b010100, (outs), (ins IntRegs:$b, i32imm:$c), "cmp $b, $c", [(SPcmpicc i32:$b, (i32 simm13:$c))]>; -- cgit v1.1 From da521cc1cc733ee1c27b00e4c0e365c8b702e2e0 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Mon, 23 Sep 2013 12:02:46 +0000 Subject: [mips][msa] Implemented build_vector using ldi, fill, and custom SelectionDAG nodes (VSPLAT and VSPLATD) Note: There's a later patch on my branch that re-implements this to select build_vector without the custom SelectionDAG nodes. The future patch avoids the constant-folding problems stemming from the custom node (i.e. it doesn't need to re-implement all the DAG combines related to BUILD_VECTOR). Changes to MIPS specific SelectionDAG nodes: * Added VSPLAT This is a special case of BUILD_VECTOR that covers the case the BUILD_VECTOR is a splat operation. * Added VSPLATD This is a special case of VSPLAT that handles the cases when v2i64 is legal git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191191 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 2 + lib/Target/Mips/MipsISelLowering.h | 8 ++- lib/Target/Mips/MipsMSAInstrInfo.td | 26 +++++--- lib/Target/Mips/MipsSEISelLowering.cpp | 109 +++++++++++++++++++++++++++++++++ lib/Target/Mips/MipsSEISelLowering.h | 5 ++ 5 files changed, 139 insertions(+), 11 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 220955e..21c5edb 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -212,6 +212,8 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::VANY_ZERO: return "MipsISD::VANY_ZERO"; case MipsISD::VALL_NONZERO: return "MipsISD::VALL_NONZERO"; case MipsISD::VANY_NONZERO: return "MipsISD::VANY_NONZERO"; + case MipsISD::VSPLAT: return "MipsISD::VSPLAT"; + case MipsISD::VSPLATD: return "MipsISD::VSPLATD"; default: return NULL; } } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 85aa162..57b5603 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -152,12 +152,18 @@ namespace llvm { SETCC_DSP, SELECT_CC_DSP, - // Vector comparisons + // Vector comparisons. VALL_ZERO, VANY_ZERO, VALL_NONZERO, VANY_NONZERO, + // Special case of BUILD_VECTOR where all elements are the same. + VSPLAT, + // Special case of VSPLAT where the result is v2i64, the operand is + // constant, and the operand fits in a signed 10-bits value. + VSPLATD, + // Load/Store Left/Right nodes. LWL = ISD::FIRST_TARGET_MEMORY_OPCODE, LWR, diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index d4dcbd1..68b835e 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -11,12 +11,20 @@ // //===----------------------------------------------------------------------===// +def SDT_MipsSplat : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisInt<1>]>; def SDT_MipsVecCond : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>; def MipsVAllNonZero : SDNode<"MipsISD::VALL_NONZERO", SDT_MipsVecCond>; def MipsVAnyNonZero : SDNode<"MipsISD::VANY_NONZERO", SDT_MipsVecCond>; def MipsVAllZero : SDNode<"MipsISD::VALL_ZERO", SDT_MipsVecCond>; def MipsVAnyZero : SDNode<"MipsISD::VANY_ZERO", SDT_MipsVecCond>; +def MipsVSplat : SDNode<"MipsISD::VSPLAT", SDT_MipsSplat>; +def MipsVSplatD : SDNode<"MipsISD::VSPLATD", SDT_MipsSplat>; + +def vsplati8 : PatFrag<(ops node:$in), (v16i8 (MipsVSplat (i32 node:$in)))>; +def vsplati16 : PatFrag<(ops node:$in), (v8i16 (MipsVSplat (i32 node:$in)))>; +def vsplati32 : PatFrag<(ops node:$in), (v4i32 (MipsVSplat (i32 node:$in)))>; +def vsplati64 : PatFrag<(ops node:$in), (v2i64 (MipsVSplatD (i32 node:$in)))>; // Immediates def immSExt5 : ImmLeaf(Imm);}]>; @@ -1383,12 +1391,9 @@ class FFQR_W_DESC : MSA_2RF_DESC_BASE<"ffqr.w", int_mips_ffqr_w, class FFQR_D_DESC : MSA_2RF_DESC_BASE<"ffqr.d", int_mips_ffqr_d, MSA128D, MSA128W>; -class FILL_B_DESC : MSA_2R_DESC_BASE<"fill.b", int_mips_fill_b, - MSA128B, GPR32>; -class FILL_H_DESC : MSA_2R_DESC_BASE<"fill.h", int_mips_fill_h, - MSA128H, GPR32>; -class FILL_W_DESC : MSA_2R_DESC_BASE<"fill.w", int_mips_fill_w, - MSA128W, GPR32>; +class FILL_B_DESC : MSA_2R_DESC_BASE<"fill.b", vsplati8, MSA128B, GPR32>; +class FILL_H_DESC : MSA_2R_DESC_BASE<"fill.h", vsplati16, MSA128H, GPR32>; +class FILL_W_DESC : MSA_2R_DESC_BASE<"fill.w", vsplati32, MSA128W, GPR32>; class FLOG2_W_DESC : MSA_2RF_DESC_BASE<"flog2.w", flog2, MSA128W>; class FLOG2_D_DESC : MSA_2RF_DESC_BASE<"flog2.d", flog2, MSA128D>; @@ -1573,10 +1578,10 @@ class LD_H_DESC : LD_DESC_BASE<"ld.h", load, v8i16, MSA128H>; class LD_W_DESC : LD_DESC_BASE<"ld.w", load, v4i32, MSA128W>; class LD_D_DESC : LD_DESC_BASE<"ld.d", load, v2i64, MSA128D>; -class LDI_B_DESC : MSA_I10_DESC_BASE<"ldi.b", int_mips_ldi_b, MSA128B>; -class LDI_H_DESC : MSA_I10_DESC_BASE<"ldi.h", int_mips_ldi_h, MSA128H>; -class LDI_W_DESC : MSA_I10_DESC_BASE<"ldi.w", int_mips_ldi_w, MSA128W>; -class LDI_D_DESC : MSA_I10_DESC_BASE<"ldi.d", int_mips_ldi_d, MSA128D>; +class LDI_B_DESC : MSA_I10_DESC_BASE<"ldi.b", vsplati8, MSA128B>; +class LDI_H_DESC : MSA_I10_DESC_BASE<"ldi.h", vsplati16, MSA128H>; +class LDI_W_DESC : MSA_I10_DESC_BASE<"ldi.w", vsplati32, MSA128W>; +class LDI_D_DESC : MSA_I10_DESC_BASE<"ldi.d", vsplati64, MSA128D>; class LDX_DESC_BASEgetValueType(0); + assert(VT.isVector() && "Expected a vector type"); + + unsigned int nOps = N->getNumOperands(); + assert(nOps > 1 && "isSplat has 0 or 1 sized build vector"); + + SDValue Operand0 = N->getOperand(0); + + for (unsigned int i = 1; i < nOps; ++i) { + if (N->getOperand(i) != Operand0) + return false; + } + + return true; +} + +// Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the +// backend. +// +// Lowers according to the following rules: +// - Vectors of 128-bits may be legal subject to the other rules. Other sizes +// are not legal. +// - Non-constant splats are legal and are lowered to MipsISD::VSPLAT. +// - Constant splats with an element size of 32-bits or less are legal and are +// lowered to MipsISD::VSPLAT. +// - Constant splats with an element size of 64-bits but whose value would fit +// within a 10 bit immediate are legal and are lowered to MipsISD::VSPLATD. +// - All other ISD::BUILD_VECTORS are not legal +SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + BuildVectorSDNode *Node = cast(Op); + EVT ResTy = Op->getValueType(0); + SDLoc DL(Op); + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + if (!Subtarget->hasMSA() || !ResTy.is128BitVector()) + return SDValue(); + + if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs, 8, + !Subtarget->isLittle())) { + SDValue Result; + EVT TmpVecTy; + EVT ConstTy = MVT::i32; + unsigned SplatOp = MipsISD::VSPLAT; + + switch (SplatBitSize) { + default: + return SDValue(); + case 64: + TmpVecTy = MVT::v2i64; + + // i64 is an illegal type on Mips32, but if it the constant fits into a + // signed 10-bit value then we can still handle it using VSPLATD and an + // i32 constant + if (HasMips64) + ConstTy = MVT::i64; + else if (isInt<10>(SplatValue.getSExtValue())) { + SplatValue = SplatValue.trunc(32); + SplatOp = MipsISD::VSPLATD; + } else + return SDValue(); + break; + case 32: + TmpVecTy = MVT::v4i32; + break; + case 16: + TmpVecTy = MVT::v8i16; + SplatValue = SplatValue.sext(32); + break; + case 8: + TmpVecTy = MVT::v16i8; + SplatValue = SplatValue.sext(32); + break; + } + + Result = DAG.getNode(SplatOp, DL, TmpVecTy, + DAG.getConstant(SplatValue, ConstTy)); + if (ResTy != Result.getValueType()) + Result = DAG.getNode(ISD::BITCAST, DL, ResTy, Result); + + return Result; + } + else if (isSplatVector(Node)) + return DAG.getNode(MipsISD::VSPLAT, DL, ResTy, Op->getOperand(0)); + + return SDValue(); +} + MachineBasicBlock * MipsSETargetLowering:: emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{ // $bb: diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h index 016d4ad..909ab7d 100644 --- a/lib/Target/Mips/MipsSEISelLowering.h +++ b/lib/Target/Mips/MipsSEISelLowering.h @@ -22,7 +22,11 @@ namespace llvm { public: explicit MipsSETargetLowering(MipsTargetMachine &TM); + /// \brief Enable MSA support for the given integer type and Register + /// class. void addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC); + /// \brief Enable MSA support for the given floating-point type and + /// Register class. void addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC); @@ -69,6 +73,7 @@ namespace llvm { SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue lowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; MachineBasicBlock *emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const; -- cgit v1.1 From 912fde24089094a953dfb802c69f4b0d83c7925c Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Mon, 23 Sep 2013 12:26:55 +0000 Subject: Fix -Wunused-variable error when assertions are disabled and -Werror is in use. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191192 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsSEISelLowering.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 3b446c5..9532c9d 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -162,14 +162,17 @@ addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); setOperationAction(ISD::ADD, Ty, Legal); + setOperationAction(ISD::AND, Ty, Legal); setOperationAction(ISD::CTLZ, Ty, Legal); setOperationAction(ISD::MUL, Ty, Legal); + setOperationAction(ISD::OR, Ty, Legal); setOperationAction(ISD::SDIV, Ty, Legal); setOperationAction(ISD::SHL, Ty, Legal); setOperationAction(ISD::SRA, Ty, Legal); setOperationAction(ISD::SRL, Ty, Legal); setOperationAction(ISD::SUB, Ty, Legal); setOperationAction(ISD::UDIV, Ty, Legal); + setOperationAction(ISD::XOR, Ty, Legal); } // Enable MSA support for the given floating-point type and Register class. @@ -895,6 +898,8 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_addv_w: case Intrinsic::mips_addv_d: return lowerMSABinaryIntr(Op, DAG, ISD::ADD); + case Intrinsic::mips_and_v: + return lowerMSABinaryIntr(Op, DAG, ISD::AND); case Intrinsic::mips_bnz_b: case Intrinsic::mips_bnz_h: case Intrinsic::mips_bnz_w: @@ -959,6 +964,8 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_nlzc_w: case Intrinsic::mips_nlzc_d: return lowerMSAUnaryIntr(Op, DAG, ISD::CTLZ); + case Intrinsic::mips_or_v: + return lowerMSABinaryIntr(Op, DAG, ISD::OR); case Intrinsic::mips_sll_b: case Intrinsic::mips_sll_h: case Intrinsic::mips_sll_w: @@ -979,6 +986,8 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_subv_w: case Intrinsic::mips_subv_d: return lowerMSABinaryIntr(Op, DAG, ISD::SUB); + case Intrinsic::mips_xor_v: + return lowerMSABinaryIntr(Op, DAG, ISD::XOR); } } @@ -1091,9 +1100,6 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op, /// so it's possible for this to return false even when isConstantSplat returns /// true. static bool isSplatVector(const BuildVectorSDNode *N) { - EVT VT = N->getValueType(0); - assert(VT.isVector() && "Expected a vector type"); - unsigned int nOps = N->getNumOperands(); assert(nOps > 1 && "isSplat has 0 or 1 sized build vector"); -- cgit v1.1 From 57ebcb28a63d8646fd8fd69cfd9e6766066e342f Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Mon, 23 Sep 2013 12:33:38 +0000 Subject: Partially revert r191192: Fix -Wunused-variable error when assertions are disabled and -Werror is in use. An unrelated change crept in because 'svn revert' isn't recursive by default. The unrelated changes have been reverted. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191193 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsSEISelLowering.cpp | 9 --------- 1 file changed, 9 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 9532c9d..41bf017 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -162,17 +162,14 @@ addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); setOperationAction(ISD::ADD, Ty, Legal); - setOperationAction(ISD::AND, Ty, Legal); setOperationAction(ISD::CTLZ, Ty, Legal); setOperationAction(ISD::MUL, Ty, Legal); - setOperationAction(ISD::OR, Ty, Legal); setOperationAction(ISD::SDIV, Ty, Legal); setOperationAction(ISD::SHL, Ty, Legal); setOperationAction(ISD::SRA, Ty, Legal); setOperationAction(ISD::SRL, Ty, Legal); setOperationAction(ISD::SUB, Ty, Legal); setOperationAction(ISD::UDIV, Ty, Legal); - setOperationAction(ISD::XOR, Ty, Legal); } // Enable MSA support for the given floating-point type and Register class. @@ -898,8 +895,6 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_addv_w: case Intrinsic::mips_addv_d: return lowerMSABinaryIntr(Op, DAG, ISD::ADD); - case Intrinsic::mips_and_v: - return lowerMSABinaryIntr(Op, DAG, ISD::AND); case Intrinsic::mips_bnz_b: case Intrinsic::mips_bnz_h: case Intrinsic::mips_bnz_w: @@ -964,8 +959,6 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_nlzc_w: case Intrinsic::mips_nlzc_d: return lowerMSAUnaryIntr(Op, DAG, ISD::CTLZ); - case Intrinsic::mips_or_v: - return lowerMSABinaryIntr(Op, DAG, ISD::OR); case Intrinsic::mips_sll_b: case Intrinsic::mips_sll_h: case Intrinsic::mips_sll_w: @@ -986,8 +979,6 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_subv_w: case Intrinsic::mips_subv_d: return lowerMSABinaryIntr(Op, DAG, ISD::SUB); - case Intrinsic::mips_xor_v: - return lowerMSABinaryIntr(Op, DAG, ISD::XOR); } } -- cgit v1.1 From 4e812c1f4a723f0fa0e8714610e08be593c759b8 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Mon, 23 Sep 2013 12:57:42 +0000 Subject: [mips][msa] Added support for matching and, or, and xor from normal IR (i.e. not intrinsics) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191194 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 48 +++++++++++++++++++++++++++++++--- lib/Target/Mips/MipsSEISelLowering.cpp | 9 +++++++ 2 files changed, 54 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 68b835e..b7bc357 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -953,6 +953,12 @@ class MSA_VEC_DESC_BASE : + MipsPseudo<(outs RCWD:$wd), (ins RCWS:$ws, RCWT:$wt), + [(set RCWD:$wd, (OpNode RCWS:$ws, RCWT:$wt))]>; + class ADD_A_B_DESC : MSA_3R_DESC_BASE<"add_a.b", int_mips_add_a_b, MSA128B>, IsCommutable; class ADD_A_H_DESC : MSA_3R_DESC_BASE<"add_a.h", int_mips_add_a_h, MSA128H>, @@ -999,7 +1005,10 @@ class ADDVI_H_DESC : MSA_I5_DESC_BASE<"addvi.h", int_mips_addvi_h, MSA128H>; class ADDVI_W_DESC : MSA_I5_DESC_BASE<"addvi.w", int_mips_addvi_w, MSA128W>; class ADDVI_D_DESC : MSA_I5_DESC_BASE<"addvi.d", int_mips_addvi_d, MSA128D>; -class AND_V_DESC : MSA_VEC_DESC_BASE<"and.v", int_mips_and_v, MSA128B>; +class AND_V_DESC : MSA_VEC_DESC_BASE<"and.v", and, MSA128B>; +class AND_V_H_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; +class AND_V_W_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; +class AND_V_D_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class ANDI_B_DESC : MSA_I8_DESC_BASE<"andi.b", int_mips_andi_b, MSA128B>; @@ -1724,7 +1733,10 @@ class NOR_V_DESC : MSA_VEC_DESC_BASE<"nor.v", int_mips_nor_v, MSA128B>; class NORI_B_DESC : MSA_I8_DESC_BASE<"nori.b", int_mips_nori_b, MSA128B>; -class OR_V_DESC : MSA_VEC_DESC_BASE<"or.v", int_mips_or_v, MSA128B>; +class OR_V_DESC : MSA_VEC_DESC_BASE<"or.v", or, MSA128B>; +class OR_V_H_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; +class OR_V_W_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; +class OR_V_D_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class ORI_B_DESC : MSA_I8_DESC_BASE<"ori.b", int_mips_ori_b, MSA128B>; @@ -1910,7 +1922,10 @@ class VSHF_H_DESC : MSA_3R_DESC_BASE<"vshf.h", int_mips_vshf_h, MSA128H>; class VSHF_W_DESC : MSA_3R_DESC_BASE<"vshf.w", int_mips_vshf_w, MSA128W>; class VSHF_D_DESC : MSA_3R_DESC_BASE<"vshf.d", int_mips_vshf_d, MSA128D>; -class XOR_V_DESC : MSA_VEC_DESC_BASE<"xor.v", int_mips_xor_v, MSA128B>; +class XOR_V_DESC : MSA_VEC_DESC_BASE<"xor.v", xor, MSA128B>; +class XOR_V_H_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; +class XOR_V_W_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; +class XOR_V_D_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class XORI_B_DESC : MSA_I8_DESC_BASE<"xori.b", int_mips_xori_b, MSA128B>; @@ -1946,6 +1961,15 @@ def ADDVI_W : ADDVI_W_ENC, ADDVI_W_DESC; def ADDVI_D : ADDVI_D_ENC, ADDVI_D_DESC; def AND_V : AND_V_ENC, AND_V_DESC; +def AND_V_H_PSEUDO : AND_V_H_PSEUDO_DESC, + PseudoInstExpansion<(AND_V MSA128B:$wd, + MSA128B:$ws, MSA128B:$wt)>; +def AND_V_W_PSEUDO : AND_V_W_PSEUDO_DESC, + PseudoInstExpansion<(AND_V MSA128B:$wd, + MSA128B:$ws, MSA128B:$wt)>; +def AND_V_D_PSEUDO : AND_V_D_PSEUDO_DESC, + PseudoInstExpansion<(AND_V MSA128B:$wd, + MSA128B:$ws, MSA128B:$wt)>; def ANDI_B : ANDI_B_ENC, ANDI_B_DESC; @@ -2478,6 +2502,15 @@ def NOR_V : NOR_V_ENC, NOR_V_DESC; def NORI_B : NORI_B_ENC, NORI_B_DESC; def OR_V : OR_V_ENC, OR_V_DESC; +def OR_V_H_PSEUDO : OR_V_H_PSEUDO_DESC, + PseudoInstExpansion<(OR_V MSA128B:$wd, + MSA128B:$ws, MSA128B:$wt)>; +def OR_V_W_PSEUDO : OR_V_W_PSEUDO_DESC, + PseudoInstExpansion<(OR_V MSA128B:$wd, + MSA128B:$ws, MSA128B:$wt)>; +def OR_V_D_PSEUDO : OR_V_D_PSEUDO_DESC, + PseudoInstExpansion<(OR_V MSA128B:$wd, + MSA128B:$ws, MSA128B:$wt)>; def ORI_B : ORI_B_ENC, ORI_B_DESC; @@ -2626,6 +2659,15 @@ def VSHF_W : VSHF_W_ENC, VSHF_W_DESC; def VSHF_D : VSHF_D_ENC, VSHF_D_DESC; def XOR_V : XOR_V_ENC, XOR_V_DESC; +def XOR_V_H_PSEUDO : XOR_V_H_PSEUDO_DESC, + PseudoInstExpansion<(XOR_V MSA128B:$wd, + MSA128B:$ws, MSA128B:$wt)>; +def XOR_V_W_PSEUDO : XOR_V_W_PSEUDO_DESC, + PseudoInstExpansion<(XOR_V MSA128B:$wd, + MSA128B:$ws, MSA128B:$wt)>; +def XOR_V_D_PSEUDO : XOR_V_D_PSEUDO_DESC, + PseudoInstExpansion<(XOR_V MSA128B:$wd, + MSA128B:$ws, MSA128B:$wt)>; def XORI_B : XORI_B_ENC, XORI_B_DESC; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 41bf017..9532c9d 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -162,14 +162,17 @@ addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); setOperationAction(ISD::ADD, Ty, Legal); + setOperationAction(ISD::AND, Ty, Legal); setOperationAction(ISD::CTLZ, Ty, Legal); setOperationAction(ISD::MUL, Ty, Legal); + setOperationAction(ISD::OR, Ty, Legal); setOperationAction(ISD::SDIV, Ty, Legal); setOperationAction(ISD::SHL, Ty, Legal); setOperationAction(ISD::SRA, Ty, Legal); setOperationAction(ISD::SRL, Ty, Legal); setOperationAction(ISD::SUB, Ty, Legal); setOperationAction(ISD::UDIV, Ty, Legal); + setOperationAction(ISD::XOR, Ty, Legal); } // Enable MSA support for the given floating-point type and Register class. @@ -895,6 +898,8 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_addv_w: case Intrinsic::mips_addv_d: return lowerMSABinaryIntr(Op, DAG, ISD::ADD); + case Intrinsic::mips_and_v: + return lowerMSABinaryIntr(Op, DAG, ISD::AND); case Intrinsic::mips_bnz_b: case Intrinsic::mips_bnz_h: case Intrinsic::mips_bnz_w: @@ -959,6 +964,8 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_nlzc_w: case Intrinsic::mips_nlzc_d: return lowerMSAUnaryIntr(Op, DAG, ISD::CTLZ); + case Intrinsic::mips_or_v: + return lowerMSABinaryIntr(Op, DAG, ISD::OR); case Intrinsic::mips_sll_b: case Intrinsic::mips_sll_h: case Intrinsic::mips_sll_w: @@ -979,6 +986,8 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_subv_w: case Intrinsic::mips_subv_d: return lowerMSABinaryIntr(Op, DAG, ISD::SUB); + case Intrinsic::mips_xor_v: + return lowerMSABinaryIntr(Op, DAG, ISD::XOR); } } -- cgit v1.1 From 915432ca1306d10453c9eb523cbc4b257642f62a Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Mon, 23 Sep 2013 13:22:24 +0000 Subject: [mips][msa] Added support for matching nor from normal IR (i.e. not intrinsics) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191195 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 1 + lib/Target/Mips/MipsISelLowering.h | 3 ++ lib/Target/Mips/MipsMSAInstrInfo.td | 16 ++++++++++- lib/Target/Mips/MipsSEISelLowering.cpp | 50 ++++++++++++++++++++++++++++++++-- 4 files changed, 67 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 21c5edb..443653f 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -214,6 +214,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::VANY_NONZERO: return "MipsISD::VANY_NONZERO"; case MipsISD::VSPLAT: return "MipsISD::VSPLAT"; case MipsISD::VSPLATD: return "MipsISD::VSPLATD"; + case MipsISD::VNOR: return "MipsISD::VNOR"; default: return NULL; } } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 57b5603..b56ac41 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -164,6 +164,9 @@ namespace llvm { // constant, and the operand fits in a signed 10-bits value. VSPLATD, + // Combined (XOR (OR $a, $b), -1) + VNOR, + // Load/Store Left/Right nodes. LWL = ISD::FIRST_TARGET_MEMORY_OPCODE, LWR, diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index b7bc357..9783256 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -20,6 +20,8 @@ def MipsVAllZero : SDNode<"MipsISD::VALL_ZERO", SDT_MipsVecCond>; def MipsVAnyZero : SDNode<"MipsISD::VANY_ZERO", SDT_MipsVecCond>; def MipsVSplat : SDNode<"MipsISD::VSPLAT", SDT_MipsSplat>; def MipsVSplatD : SDNode<"MipsISD::VSPLATD", SDT_MipsSplat>; +def MipsVNOR : SDNode<"MipsISD::VNOR", SDTIntBinOp, + [SDNPCommutative, SDNPAssociative]>; def vsplati8 : PatFrag<(ops node:$in), (v16i8 (MipsVSplat (i32 node:$in)))>; def vsplati16 : PatFrag<(ops node:$in), (v8i16 (MipsVSplat (i32 node:$in)))>; @@ -1729,7 +1731,10 @@ class NLZC_H_DESC : MSA_2R_DESC_BASE<"nlzc.h", ctlz, MSA128H>; class NLZC_W_DESC : MSA_2R_DESC_BASE<"nlzc.w", ctlz, MSA128W>; class NLZC_D_DESC : MSA_2R_DESC_BASE<"nlzc.d", ctlz, MSA128D>; -class NOR_V_DESC : MSA_VEC_DESC_BASE<"nor.v", int_mips_nor_v, MSA128B>; +class NOR_V_DESC : MSA_VEC_DESC_BASE<"nor.v", MipsVNOR, MSA128B>; +class NOR_V_H_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; +class NOR_V_W_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; +class NOR_V_D_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class NORI_B_DESC : MSA_I8_DESC_BASE<"nori.b", int_mips_nori_b, MSA128B>; @@ -2498,6 +2503,15 @@ def NLZC_W : NLZC_W_ENC, NLZC_W_DESC; def NLZC_D : NLZC_D_ENC, NLZC_D_DESC; def NOR_V : NOR_V_ENC, NOR_V_DESC; +def NOR_V_H_PSEUDO : NOR_V_H_PSEUDO_DESC, + PseudoInstExpansion<(NOR_V MSA128B:$wd, + MSA128B:$ws, MSA128B:$wt)>; +def NOR_V_W_PSEUDO : NOR_V_W_PSEUDO_DESC, + PseudoInstExpansion<(NOR_V MSA128B:$wd, + MSA128B:$ws, MSA128B:$wt)>; +def NOR_V_D_PSEUDO : NOR_V_D_PSEUDO_DESC, + PseudoInstExpansion<(NOR_V MSA128B:$wd, + MSA128B:$ws, MSA128B:$wt)>; def NORI_B : NORI_B_ENC, NORI_B_DESC; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 9532c9d..ca3e06c 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -90,6 +90,8 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) addMSAFloatType(MVT::v8f16, &Mips::MSA128HRegClass); addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass); addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass); + + setTargetDAGCombine(ISD::XOR); } if (!Subtarget->mipsSEUsesSoftFloat()) { @@ -567,6 +569,44 @@ static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { N->getOperand(2), SetCC.getOperand(2)); } +static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, + const MipsSubtarget *Subtarget) { + EVT Ty = N->getValueType(0); + + if (Subtarget->hasMSA() && Ty.is128BitVector() && Ty.isInteger()) { + // Try the following combines: + // (xor (or $a, $b), (build_vector allones)) + // (xor (or $a, $b), (bitcast (build_vector allones))) + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + SDValue NotOp; + ConstantSDNode *Const; + + if (ISD::isBuildVectorAllOnes(Op0.getNode())) + NotOp = Op1; + else if (ISD::isBuildVectorAllOnes(Op1.getNode())) + NotOp = Op0; + else if ((Op0->getOpcode() == MipsISD::VSPLAT || + Op0->getOpcode() == MipsISD::VSPLATD) && + (Const = dyn_cast(Op0->getOperand(0))) && + Const->isAllOnesValue()) + NotOp = Op1; + else if ((Op1->getOpcode() == MipsISD::VSPLAT || + Op1->getOpcode() == MipsISD::VSPLATD) && + (Const = dyn_cast(Op1->getOperand(0))) && + Const->isAllOnesValue()) + NotOp = Op0; + else + return SDValue(); + + if (NotOp->getOpcode() == ISD::OR) + return DAG.getNode(MipsISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0), + NotOp->getOperand(1)); + } + + return SDValue(); +} + SDValue MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -587,11 +627,13 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { return performSRLCombine(N, DAG, DCI, Subtarget); case ISD::VSELECT: return performVSELECTCombine(N, DAG); - case ISD::SETCC: { + case ISD::XOR: + Val = performXORCombine(N, DAG, Subtarget); + break; + case ISD::SETCC: Val = performSETCCCombine(N, DAG); break; } - } if (Val.getNode()) return Val; @@ -964,6 +1006,10 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_nlzc_w: case Intrinsic::mips_nlzc_d: return lowerMSAUnaryIntr(Op, DAG, ISD::CTLZ); + case Intrinsic::mips_nor_v: { + SDValue Res = lowerMSABinaryIntr(Op, DAG, ISD::OR); + return DAG.getNOT(SDLoc(Op), Res, Res->getValueType(0)); + } case Intrinsic::mips_or_v: return lowerMSABinaryIntr(Op, DAG, ISD::OR); case Intrinsic::mips_sll_b: -- cgit v1.1 From a399d698a84ffd22c7d1f121c24cbc147c6f4e06 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Mon, 23 Sep 2013 13:40:21 +0000 Subject: [mips][msa] Added support for matching pcnt from normal IR (i.e. not intrinsics) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191198 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 8 ++++---- lib/Target/Mips/MipsSEISelLowering.cpp | 6 ++++++ 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 9783256..4f875ea 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -1755,10 +1755,10 @@ class PCKOD_H_DESC : MSA_3R_DESC_BASE<"pckod.h", int_mips_pckod_h, MSA128H>; class PCKOD_W_DESC : MSA_3R_DESC_BASE<"pckod.w", int_mips_pckod_w, MSA128W>; class PCKOD_D_DESC : MSA_3R_DESC_BASE<"pckod.d", int_mips_pckod_d, MSA128D>; -class PCNT_B_DESC : MSA_2R_DESC_BASE<"pcnt.b", int_mips_pcnt_b, MSA128B>; -class PCNT_H_DESC : MSA_2R_DESC_BASE<"pcnt.h", int_mips_pcnt_h, MSA128H>; -class PCNT_W_DESC : MSA_2R_DESC_BASE<"pcnt.w", int_mips_pcnt_w, MSA128W>; -class PCNT_D_DESC : MSA_2R_DESC_BASE<"pcnt.d", int_mips_pcnt_d, MSA128D>; +class PCNT_B_DESC : MSA_2R_DESC_BASE<"pcnt.b", ctpop, MSA128B>; +class PCNT_H_DESC : MSA_2R_DESC_BASE<"pcnt.h", ctpop, MSA128H>; +class PCNT_W_DESC : MSA_2R_DESC_BASE<"pcnt.w", ctpop, MSA128W>; +class PCNT_D_DESC : MSA_2R_DESC_BASE<"pcnt.d", ctpop, MSA128D>; class SAT_S_B_DESC : MSA_BIT_B_DESC_BASE<"sat_s.b", int_mips_sat_s_b, MSA128B>; class SAT_S_H_DESC : MSA_BIT_H_DESC_BASE<"sat_s.h", int_mips_sat_s_h, MSA128H>; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index ca3e06c..610b8bf 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -166,6 +166,7 @@ addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { setOperationAction(ISD::ADD, Ty, Legal); setOperationAction(ISD::AND, Ty, Legal); setOperationAction(ISD::CTLZ, Ty, Legal); + setOperationAction(ISD::CTPOP, Ty, Legal); setOperationAction(ISD::MUL, Ty, Legal); setOperationAction(ISD::OR, Ty, Legal); setOperationAction(ISD::SDIV, Ty, Legal); @@ -1012,6 +1013,11 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, } case Intrinsic::mips_or_v: return lowerMSABinaryIntr(Op, DAG, ISD::OR); + case Intrinsic::mips_pcnt_b: + case Intrinsic::mips_pcnt_h: + case Intrinsic::mips_pcnt_w: + case Intrinsic::mips_pcnt_d: + return lowerMSAUnaryIntr(Op, DAG, ISD::CTPOP); case Intrinsic::mips_sll_b: case Intrinsic::mips_sll_h: case Intrinsic::mips_sll_w: -- cgit v1.1 From 9a1aaeb012e593fba977015c5d8b6b1aa41a908c Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Mon, 23 Sep 2013 14:03:12 +0000 Subject: [mips][msa] Added support for matching insert and copy from normal IR (i.e. not intrinsics) Changes to MIPS SelectionDAG: * Added nodes VEXTRACT_[SZ]EXT_ELT to represent extract and extend in a single operation and implemented the DAG combines necessary to fold sign/zero extends into the extract. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191199 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 2 + lib/Target/Mips/MipsISelLowering.h | 4 + lib/Target/Mips/MipsMSAInstrInfo.td | 66 ++++++++++---- lib/Target/Mips/MipsSEISelLowering.cpp | 158 +++++++++++++++++++++++++++++++++ lib/Target/Mips/MipsSEISelLowering.h | 1 + 5 files changed, 213 insertions(+), 18 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 443653f..443ce19 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -214,6 +214,8 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::VANY_NONZERO: return "MipsISD::VANY_NONZERO"; case MipsISD::VSPLAT: return "MipsISD::VSPLAT"; case MipsISD::VSPLATD: return "MipsISD::VSPLATD"; + case MipsISD::VEXTRACT_SEXT_ELT: return "MipsISD::VEXTRACT_SEXT_ELT"; + case MipsISD::VEXTRACT_ZEXT_ELT: return "MipsISD::VEXTRACT_ZEXT_ELT"; case MipsISD::VNOR: return "MipsISD::VNOR"; default: return NULL; } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index b56ac41..71a2ddc 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -167,6 +167,10 @@ namespace llvm { // Combined (XOR (OR $a, $b), -1) VNOR, + // Extended vector element extraction + VEXTRACT_SEXT_ELT, + VEXTRACT_ZEXT_ELT, + // Load/Store Left/Right nodes. LWL = ISD::FIRST_TARGET_MEMORY_OPCODE, LWR, diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 4f875ea..e30cfc5 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -23,6 +23,33 @@ def MipsVSplatD : SDNode<"MipsISD::VSPLATD", SDT_MipsSplat>; def MipsVNOR : SDNode<"MipsISD::VNOR", SDTIntBinOp, [SDNPCommutative, SDNPAssociative]>; +def MipsVExtractSExt : SDNode<"MipsISD::VEXTRACT_SEXT_ELT", + SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>; +def MipsVExtractZExt : SDNode<"MipsISD::VEXTRACT_ZEXT_ELT", + SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>; + +// Pattern fragments +def vextract_sext_i8 : PatFrag<(ops node:$vec, node:$idx), + (MipsVExtractSExt node:$vec, node:$idx, i8)>; +def vextract_sext_i16 : PatFrag<(ops node:$vec, node:$idx), + (MipsVExtractSExt node:$vec, node:$idx, i16)>; +def vextract_sext_i32 : PatFrag<(ops node:$vec, node:$idx), + (MipsVExtractSExt node:$vec, node:$idx, i32)>; + +def vextract_zext_i8 : PatFrag<(ops node:$vec, node:$idx), + (MipsVExtractZExt node:$vec, node:$idx, i8)>; +def vextract_zext_i16 : PatFrag<(ops node:$vec, node:$idx), + (MipsVExtractZExt node:$vec, node:$idx, i16)>; +def vextract_zext_i32 : PatFrag<(ops node:$vec, node:$idx), + (MipsVExtractZExt node:$vec, node:$idx, i32)>; + +def vinsert_v16i8 : PatFrag<(ops node:$vec, node:$val, node:$idx), + (v16i8 (vector_insert node:$vec, node:$val, node:$idx))>; +def vinsert_v8i16 : PatFrag<(ops node:$vec, node:$val, node:$idx), + (v8i16 (vector_insert node:$vec, node:$val, node:$idx))>; +def vinsert_v4i32 : PatFrag<(ops node:$vec, node:$val, node:$idx), + (v4i32 (vector_insert node:$vec, node:$val, node:$idx))>; + def vsplati8 : PatFrag<(ops node:$in), (v16i8 (MipsVSplat (i32 node:$in)))>; def vsplati16 : PatFrag<(ops node:$in), (v8i16 (MipsVSplat (i32 node:$in)))>; def vsplati32 : PatFrag<(ops node:$in), (v4i32 (MipsVSplat (i32 node:$in)))>; @@ -805,12 +832,12 @@ class MSA_BIT_D_DESC_BASE { dag OutOperandList = (outs RCD:$rd); - dag InOperandList = (ins RCWS:$ws, uimm6:$n); + dag InOperandList = (ins RCWS:$ws, uimm4:$n); string AsmString = !strconcat(instr_asm, "\t$rd, $ws[$n]"); - list Pattern = [(set RCD:$rd, (OpNode RCWS:$ws, immZExt6:$n))]; + list Pattern = [(set RCD:$rd, (OpNode (VecTy RCWS:$ws), immZExt4:$n))]; InstrItinClass Itinerary = itin; } @@ -922,11 +949,11 @@ class MSA_INSERT_DESC_BASE { dag OutOperandList = (outs RCD:$wd); - dag InOperandList = (ins RCD:$wd_in, uimm6:$n, RCWS:$rs); + dag InOperandList = (ins RCD:$wd_in, RCWS:$rs, uimm6:$n); string AsmString = !strconcat(instr_asm, "\t$wd[$n], $rs"); list Pattern = [(set RCD:$wd, (OpNode RCD:$wd_in, - immZExt6:$n, - RCWS:$rs))]; + RCWS:$rs, + immZExt6:$n))]; InstrItinClass Itinerary = itin; string Constraints = "$wd = $wd_in"; } @@ -1222,18 +1249,18 @@ class CLTI_U_W_DESC : MSA_SI5_DESC_BASE<"clti_u.w", int_mips_clti_u_w, class CLTI_U_D_DESC : MSA_SI5_DESC_BASE<"clti_u.d", int_mips_clti_u_d, MSA128D>; -class COPY_S_B_DESC : MSA_COPY_DESC_BASE<"copy_s.b", int_mips_copy_s_b, +class COPY_S_B_DESC : MSA_COPY_DESC_BASE<"copy_s.b", vextract_sext_i8, v16i8, GPR32, MSA128B>; -class COPY_S_H_DESC : MSA_COPY_DESC_BASE<"copy_s.h", int_mips_copy_s_h, +class COPY_S_H_DESC : MSA_COPY_DESC_BASE<"copy_s.h", vextract_sext_i16, v8i16, GPR32, MSA128H>; -class COPY_S_W_DESC : MSA_COPY_DESC_BASE<"copy_s.w", int_mips_copy_s_w, +class COPY_S_W_DESC : MSA_COPY_DESC_BASE<"copy_s.w", vextract_sext_i32, v4i32, GPR32, MSA128W>; -class COPY_U_B_DESC : MSA_COPY_DESC_BASE<"copy_u.b", int_mips_copy_u_b, +class COPY_U_B_DESC : MSA_COPY_DESC_BASE<"copy_u.b", vextract_zext_i8, v16i8, GPR32, MSA128B>; -class COPY_U_H_DESC : MSA_COPY_DESC_BASE<"copy_u.h", int_mips_copy_u_h, +class COPY_U_H_DESC : MSA_COPY_DESC_BASE<"copy_u.h", vextract_zext_i16, v8i16, GPR32, MSA128H>; -class COPY_U_W_DESC : MSA_COPY_DESC_BASE<"copy_u.w", int_mips_copy_u_w, +class COPY_U_W_DESC : MSA_COPY_DESC_BASE<"copy_u.w", vextract_zext_i32, v4i32, GPR32, MSA128W>; class CTCMSA_DESC { @@ -1561,12 +1588,12 @@ class ILVR_H_DESC : MSA_3R_DESC_BASE<"ilvr.h", int_mips_ilvr_h, MSA128H>; class ILVR_W_DESC : MSA_3R_DESC_BASE<"ilvr.w", int_mips_ilvr_w, MSA128W>; class ILVR_D_DESC : MSA_3R_DESC_BASE<"ilvr.d", int_mips_ilvr_d, MSA128D>; -class INSERT_B_DESC : MSA_INSERT_DESC_BASE<"insert.b", int_mips_insert_b, - MSA128B, GPR32>; -class INSERT_H_DESC : MSA_INSERT_DESC_BASE<"insert.h", int_mips_insert_h, - MSA128H, GPR32>; -class INSERT_W_DESC : MSA_INSERT_DESC_BASE<"insert.w", int_mips_insert_w, - MSA128W, GPR32>; +class INSERT_B_DESC : MSA_INSERT_DESC_BASE<"insert.b", vinsert_v16i8, MSA128B, + GPR32>; +class INSERT_H_DESC : MSA_INSERT_DESC_BASE<"insert.h", vinsert_v8i16, MSA128H, + GPR32>; +class INSERT_W_DESC : MSA_INSERT_DESC_BASE<"insert.w", vinsert_v4i32, MSA128W, + GPR32>; class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", int_mips_insve_b, MSA128B>; class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", int_mips_insve_h, MSA128H>; @@ -2689,6 +2716,9 @@ def XORI_B : XORI_B_ENC, XORI_B_DESC; class MSAPat pred = [HasMSA]> : Pat, Requires; +def : MSAPat<(extractelt (v4i32 MSA128W:$ws), immZExt4:$idx), + (COPY_S_W MSA128W:$ws, immZExt4:$idx)>; + def : MSAPat<(v16i8 (load addr:$addr)), (LD_B addr:$addr)>; def : MSAPat<(v8i16 (load addr:$addr)), (LD_H addr:$addr)>; def : MSAPat<(v4i32 (load addr:$addr)), (LD_W addr:$addr)>; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 610b8bf..732b68b 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -91,6 +91,8 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass); addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass); + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::XOR); } @@ -161,6 +163,8 @@ addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { setOperationAction(ISD::BITCAST, Ty, Legal); setOperationAction(ISD::LOAD, Ty, Legal); setOperationAction(ISD::STORE, Ty, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); setOperationAction(ISD::ADD, Ty, Legal); @@ -190,6 +194,7 @@ addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { setOperationAction(ISD::LOAD, Ty, Legal); setOperationAction(ISD::STORE, Ty, Legal); setOperationAction(ISD::BITCAST, Ty, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal); if (Ty != MVT::v8f16) { setOperationAction(ISD::FADD, Ty, Legal); @@ -233,6 +238,7 @@ SDValue MipsSETargetLowering::LowerOperation(SDValue Op, case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG); } @@ -404,6 +410,56 @@ static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +// Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT +// +// Performs the following transformations: +// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its +// sign/zero-extension is completely overwritten by the new one performed by +// the ISD::AND. +// - Removes redundant zero extensions performed by an ISD::AND. +static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget *Subtarget) { + if (!Subtarget->hasMSA()) + return SDValue(); + + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + unsigned Op0Opcode = Op0->getOpcode(); + + // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d) + // where $d + 1 == 2^n and n == 32 + // or $d + 1 == 2^n and n <= 32 and ZExt + // -> (MipsVExtractZExt $a, $b, $c) + if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT || + Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT) { + ConstantSDNode *Mask = dyn_cast(Op1); + + if (!Mask) + return SDValue(); + + int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2(); + + if (Log2IfPositive <= 0) + return SDValue(); // Mask+1 is not a power of 2 + + SDValue Op0Op2 = Op0->getOperand(2); + EVT ExtendTy = cast(Op0Op2)->getVT(); + unsigned ExtendTySize = ExtendTy.getSizeInBits(); + unsigned Log2 = Log2IfPositive; + + if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) || + Log2 == ExtendTySize) { + SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 }; + DAG.MorphNodeTo(Op0.getNode(), MipsISD::VEXTRACT_ZEXT_ELT, + Op0->getVTList(), Ops, Op0->getNumOperands()); + return Op0; + } + } + + return SDValue(); +} + static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget *Subtarget) { @@ -500,11 +556,53 @@ static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget); } +// Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold +// constant splats into MipsISD::SHRA_DSP for DSPr2. +// +// Performs the following transformations: +// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its +// sign/zero-extension is completely overwritten by the new one performed by +// the ISD::SRA and ISD::SHL nodes. +// - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL +// sequence. +// +// See performDSPShiftCombine for more information about the transformation +// used for DSPr2. static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget *Subtarget) { EVT Ty = N->getValueType(0); + if (Subtarget->hasMSA()) { + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + + // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d) + // where $d + sizeof($c) == 32 + // or $d + sizeof($c) <= 32 and SExt + // -> (MipsVExtractSExt $a, $b, $c) + if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(1)) { + SDValue Op0Op0 = Op0->getOperand(0); + ConstantSDNode *ShAmount = dyn_cast(Op1); + + if (!ShAmount) + return SDValue(); + + EVT ExtendTy = cast(Op0Op0->getOperand(2))->getVT(); + unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits(); + + if (TotalBits == 32 || + (Op0Op0->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT && + TotalBits <= 32)) { + SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1), + Op0Op0->getOperand(2) }; + DAG.MorphNodeTo(Op0Op0.getNode(), MipsISD::VEXTRACT_SEXT_ELT, + Op0Op0->getVTList(), Ops, Op0Op0->getNumOperands()); + return Op0Op0; + } + } + } + if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget->hasDSPR2())) return SDValue(); @@ -616,6 +714,9 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { case ISD::ADDE: return performADDECombine(N, DAG, DCI, Subtarget); + case ISD::AND: + Val = performANDCombine(N, DAG, DCI, Subtarget); + break; case ISD::SUBE: return performSUBECombine(N, DAG, DCI, Subtarget); case ISD::MUL: @@ -889,6 +990,33 @@ static SDValue lowerMSABranchIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { return Result; } +// Lower an MSA copy intrinsic into the specified SelectionDAG node +static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { + SDLoc DL(Op); + SDValue Vec = Op->getOperand(1); + SDValue Idx = Op->getOperand(2); + EVT ResTy = Op->getValueType(0); + EVT EltTy = Vec->getValueType(0).getVectorElementType(); + + SDValue Result = DAG.getNode(Opc, DL, ResTy, Vec, Idx, + DAG.getValueType(EltTy)); + + return Result; +} + +// Lower an MSA insert intrinsic into the specified SelectionDAG node +static SDValue lowerMSAInsertIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { + SDLoc DL(Op); + SDValue Op0 = Op->getOperand(1); + SDValue Op1 = Op->getOperand(2); + SDValue Op2 = Op->getOperand(3); + EVT ResTy = Op->getValueType(0); + + SDValue Result = DAG.getNode(Opc, DL, ResTy, Op0, Op2, Op1); + + return Result; +} + static SDValue lowerMSAUnaryIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { SDLoc DL(Op); SDValue Value = Op->getOperand(1); @@ -957,6 +1085,14 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, return lowerMSABranchIntr(Op, DAG, MipsISD::VALL_ZERO); case Intrinsic::mips_bz_v: return lowerMSABranchIntr(Op, DAG, MipsISD::VANY_ZERO); + case Intrinsic::mips_copy_s_b: + case Intrinsic::mips_copy_s_h: + case Intrinsic::mips_copy_s_w: + return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); + case Intrinsic::mips_copy_u_b: + case Intrinsic::mips_copy_u_h: + case Intrinsic::mips_copy_u_w: + return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); case Intrinsic::mips_div_s_b: case Intrinsic::mips_div_s_h: case Intrinsic::mips_div_s_w: @@ -992,6 +1128,10 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_fsub_w: case Intrinsic::mips_fsub_d: return lowerMSABinaryIntr(Op, DAG, ISD::FSUB); + case Intrinsic::mips_insert_b: + case Intrinsic::mips_insert_h: + case Intrinsic::mips_insert_w: + return lowerMSAInsertIntr(Op, DAG, ISD::INSERT_VECTOR_ELT); case Intrinsic::mips_ldi_b: case Intrinsic::mips_ldi_h: case Intrinsic::mips_ldi_w: @@ -1165,6 +1305,24 @@ static bool isSplatVector(const BuildVectorSDNode *N) { return true; } +// Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT. +// +// The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We +// choose to sign-extend but we could have equally chosen zero-extend. The +// DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT +// result into this node later (possibly changing it to a zero-extend in the +// process). +SDValue MipsSETargetLowering:: +lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT ResTy = Op->getValueType(0); + SDValue Op0 = Op->getOperand(0); + SDValue Op1 = Op->getOperand(1); + EVT EltTy = Op0->getValueType(0).getVectorElementType(); + return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1, + DAG.getValueType(EltTy)); +} + // Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the // backend. // diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h index 909ab7d..644fe02 100644 --- a/lib/Target/Mips/MipsSEISelLowering.h +++ b/lib/Target/Mips/MipsSEISelLowering.h @@ -73,6 +73,7 @@ namespace llvm { SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue lowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; MachineBasicBlock *emitBPOSGE32(MachineInstr *MI, -- cgit v1.1 From 0f22c134be40a337b30e30bdafb9e8b6880dea1e Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Mon, 23 Sep 2013 14:26:15 +0000 Subject: [ARM] Split A/R class into separate subtarget features. Patch by Bradley Smith. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191202 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARM.td | 31 +++++++++++++++++++++---------- lib/Target/ARM/ARMInstrInfo.td | 6 +++--- lib/Target/ARM/ARMInstrThumb2.td | 6 +++--- lib/Target/ARM/ARMSubtarget.cpp | 2 +- lib/Target/ARM/ARMSubtarget.h | 15 +++++++++------ 5 files changed, 37 insertions(+), 23 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index c9d71bd..21d1c5b 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -117,10 +117,18 @@ def FeatureDSPThumb2 : SubtargetFeature<"t2dsp", "Thumb2DSP", "true", def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", "Supports Multiprocessing extension">; -// M-series ISA? -def FeatureMClass : SubtargetFeature<"mclass", "IsMClass", "true", +// M-series ISA +def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass", "Is microcontroller profile ('M' series)">; +// R-series ISA +def FeatureRClass : SubtargetFeature<"rclass", "ARMProcClass", "RClass", + "Is realtime profile ('R' series)">; + +// A-series ISA +def FeatureAClass : SubtargetFeature<"aclass", "ARMProcClass", "AClass", + "Is application profile ('A' series)">; + // Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too. // See ARMInstrInfo.td for details. def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true", @@ -261,26 +269,29 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2, def : ProcessorModel<"cortex-a5", CortexA8Model, [ProcA5, HasV7Ops, FeatureNEON, FeatureDB, FeatureVFP4, FeatureDSPThumb2, - FeatureHasRAS]>; + FeatureHasRAS, FeatureAClass]>; def : ProcessorModel<"cortex-a8", CortexA8Model, [ProcA8, HasV7Ops, FeatureNEON, FeatureDB, - FeatureDSPThumb2, FeatureHasRAS]>; + FeatureDSPThumb2, FeatureHasRAS, + FeatureAClass]>; def : ProcessorModel<"cortex-a9", CortexA9Model, [ProcA9, HasV7Ops, FeatureNEON, FeatureDB, - FeatureDSPThumb2, FeatureHasRAS]>; + FeatureDSPThumb2, FeatureHasRAS, + FeatureAClass]>; def : ProcessorModel<"cortex-a9-mp", CortexA9Model, [ProcA9, HasV7Ops, FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureMP, - FeatureHasRAS]>; + FeatureHasRAS, FeatureAClass]>; // FIXME: A15 has currently the same ProcessorModel as A9. def : ProcessorModel<"cortex-a15", CortexA9Model, [ProcA15, HasV7Ops, FeatureNEON, FeatureDB, - FeatureDSPThumb2, FeatureHasRAS]>; + FeatureDSPThumb2, FeatureHasRAS, + FeatureAClass]>; // FIXME: R5 has currently the same ProcessorModel as A8. def : ProcessorModel<"cortex-r5", CortexA8Model, [ProcR5, HasV7Ops, FeatureDB, FeatureVFP3, FeatureDSPThumb2, - FeatureHasRAS]>; + FeatureHasRAS, FeatureRClass]>; // V7M Processors. def : ProcNoItin<"cortex-m3", [HasV7Ops, @@ -298,10 +309,10 @@ def : ProcNoItin<"cortex-m4", [HasV7Ops, def : ProcessorModel<"swift", SwiftModel, [ProcSwift, HasV7Ops, FeatureNEON, FeatureDB, FeatureDSPThumb2, - FeatureHasRAS]>; + FeatureHasRAS, FeatureAClass]>; // V8 Processors -def : ProcNoItin<"cortex-a53", [HasV8Ops]>; +def : ProcNoItin<"cortex-a53", [HasV8Ops, FeatureAClass]>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 93ecb16..80226ac 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -244,10 +244,10 @@ def IsThumb2 : Predicate<"Subtarget->isThumb2()">, AssemblerPredicate<"ModeThumb,FeatureThumb2", "thumb2">; def IsMClass : Predicate<"Subtarget->isMClass()">, - AssemblerPredicate<"FeatureMClass", "armv7m">; -def IsARClass : Predicate<"!Subtarget->isMClass()">, + AssemblerPredicate<"FeatureMClass", "armv*m">; +def IsNotMClass : Predicate<"!Subtarget->isMClass()">, AssemblerPredicate<"!FeatureMClass", - "armv7a/r">; + "!armv*m">; def IsARM : Predicate<"!Subtarget->isThumb()">, AssemblerPredicate<"!ModeThumb", "arm-mode">; def IsIOS : Predicate<"Subtarget->isTargetIOS()">; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 435b0ea..fcc8f86 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3922,7 +3922,7 @@ defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">; // // A/R class can only move from CPSR or SPSR. def t2MRS_AR : T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr", - []>, Requires<[IsThumb2,IsARClass]> { + []>, Requires<[IsThumb2,IsNotMClass]> { bits<4> Rd; let Inst{31-12} = 0b11110011111011111000; let Inst{11-8} = Rd; @@ -3932,7 +3932,7 @@ def t2MRS_AR : T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr", def : t2InstAlias<"mrs${p} $Rd, cpsr", (t2MRS_AR GPR:$Rd, pred:$p)>; def t2MRSsys_AR: T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr", - []>, Requires<[IsThumb2,IsARClass]> { + []>, Requires<[IsThumb2,IsNotMClass]> { bits<4> Rd; let Inst{31-12} = 0b11110011111111111000; let Inst{11-8} = Rd; @@ -3965,7 +3965,7 @@ def t2MRS_M : T2I<(outs rGPR:$Rd), (ins msr_mask:$mask), NoItinerary, // the mask with the fields to be accessed in the special register. def t2MSR_AR : T2I<(outs), (ins msr_mask:$mask, rGPR:$Rn), NoItinerary, "msr", "\t$mask, $Rn", []>, - Requires<[IsThumb2,IsARClass]> { + Requires<[IsThumb2,IsNotMClass]> { bits<5> mask; bits<4> Rn; let Inst{31-21} = 0b11110011100; diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 0692fbd..4589a43 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -61,6 +61,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, const TargetOptions &Options) : ARMGenSubtargetInfo(TT, CPU, FS) , ARMProcFamily(Others) + , ARMProcClass(None) , stackAlignment(4) , CPUString(CPU) , TargetTriple(TT) @@ -90,7 +91,6 @@ void ARMSubtarget::initializeEnvironment() { SlowFPBrcc = false; InThumbMode = false; HasThumb2 = false; - IsMClass = false; NoARM = false; PostRAScheduler = false; IsR9Reserved = ReserveR9; diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index a3b701a..11d8e8e 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -33,10 +33,16 @@ protected: enum ARMProcFamilyEnum { Others, CortexA5, CortexA8, CortexA9, CortexA15, CortexR5, Swift }; + enum ARMProcClassEnum { + None, AClass, RClass, MClass + }; /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others. ARMProcFamilyEnum ARMProcFamily; + /// ARMProcClass - ARM processor class: None, AClass, RClass or MClass. + ARMProcClassEnum ARMProcClass; + /// HasV4TOps, HasV5TOps, HasV5TEOps, /// HasV6Ops, HasV6T2Ops, HasV7Ops, HasV8Ops - /// Specify whether target support specific ARM ISA variants. @@ -82,10 +88,6 @@ protected: /// HasThumb2 - True if Thumb2 instructions are supported. bool HasThumb2; - /// IsMClass - True if the subtarget belongs to the 'M' profile of CPUs - - /// v6m, v7m for example. - bool IsMClass; - /// NoARM - True if subtarget does not support ARM mode execution. bool NoARM; @@ -300,8 +302,9 @@ public: bool isThumb1Only() const { return InThumbMode && !HasThumb2; } bool isThumb2() const { return InThumbMode && HasThumb2; } bool hasThumb2() const { return HasThumb2; } - bool isMClass() const { return IsMClass; } - bool isARClass() const { return !IsMClass; } + bool isMClass() const { return ARMProcClass == MClass; } + bool isRClass() const { return ARMProcClass == RClass; } + bool isAClass() const { return ARMProcClass == AClass; } bool isR9Reserved() const { return IsR9Reserved; } -- cgit v1.1 From e0187e51a17f2081d6a72a57e0fbba8ce38d9410 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Mon, 23 Sep 2013 14:29:55 +0000 Subject: [mips][msa] Added support for matching addvi, and subvi from normal IR (i.e. not intrinsics) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191203 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 71 ++++++++++++++++++++++------------ lib/Target/Mips/MipsSEISelLowering.cpp | 30 ++++++++++++++ 2 files changed, 76 insertions(+), 25 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index e30cfc5..b1e2e57 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -842,11 +842,24 @@ class MSA_COPY_DESC_BASE { dag OutOperandList = (outs RCWD:$wd); dag InOperandList = (ins RCWS:$ws, uimm5:$u5); string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u5"); + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, + (SplatNode immZExt5:$u5)))]; + InstrItinClass Itinerary = itin; +} + +// This class is deprecated and will be removed in the next few changes +class MSA_I5_X_DESC_BASE { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins RCWS:$ws, uimm5:$u5); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u5"); list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, immZExt5:$u5))]; InstrItinClass Itinerary = itin; } @@ -1029,10 +1042,10 @@ class ADDV_H_DESC : MSA_3R_DESC_BASE<"addv.h", add, MSA128H>, IsCommutable; class ADDV_W_DESC : MSA_3R_DESC_BASE<"addv.w", add, MSA128W>, IsCommutable; class ADDV_D_DESC : MSA_3R_DESC_BASE<"addv.d", add, MSA128D>, IsCommutable; -class ADDVI_B_DESC : MSA_I5_DESC_BASE<"addvi.b", int_mips_addvi_b, MSA128B>; -class ADDVI_H_DESC : MSA_I5_DESC_BASE<"addvi.h", int_mips_addvi_h, MSA128H>; -class ADDVI_W_DESC : MSA_I5_DESC_BASE<"addvi.w", int_mips_addvi_w, MSA128W>; -class ADDVI_D_DESC : MSA_I5_DESC_BASE<"addvi.d", int_mips_addvi_d, MSA128D>; +class ADDVI_B_DESC : MSA_I5_DESC_BASE<"addvi.b", add, vsplati8, MSA128B>; +class ADDVI_H_DESC : MSA_I5_DESC_BASE<"addvi.h", add, vsplati16, MSA128H>; +class ADDVI_W_DESC : MSA_I5_DESC_BASE<"addvi.w", add, vsplati32, MSA128W>; +class ADDVI_D_DESC : MSA_I5_DESC_BASE<"addvi.d", add, vsplati64, MSA128D>; class AND_V_DESC : MSA_VEC_DESC_BASE<"and.v", and, MSA128B>; class AND_V_H_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; @@ -1667,15 +1680,23 @@ class MAX_U_H_DESC : MSA_3R_DESC_BASE<"max_u.h", int_mips_max_u_h, MSA128H>; class MAX_U_W_DESC : MSA_3R_DESC_BASE<"max_u.w", int_mips_max_u_w, MSA128W>; class MAX_U_D_DESC : MSA_3R_DESC_BASE<"max_u.d", int_mips_max_u_d, MSA128D>; -class MAXI_S_B_DESC : MSA_I5_DESC_BASE<"maxi_s.b", int_mips_maxi_s_b, MSA128B>; -class MAXI_S_H_DESC : MSA_I5_DESC_BASE<"maxi_s.h", int_mips_maxi_s_h, MSA128H>; -class MAXI_S_W_DESC : MSA_I5_DESC_BASE<"maxi_s.w", int_mips_maxi_s_w, MSA128W>; -class MAXI_S_D_DESC : MSA_I5_DESC_BASE<"maxi_s.d", int_mips_maxi_s_d, MSA128D>; +class MAXI_S_B_DESC : MSA_I5_X_DESC_BASE<"maxi_s.b", int_mips_maxi_s_b, + MSA128B>; +class MAXI_S_H_DESC : MSA_I5_X_DESC_BASE<"maxi_s.h", int_mips_maxi_s_h, + MSA128H>; +class MAXI_S_W_DESC : MSA_I5_X_DESC_BASE<"maxi_s.w", int_mips_maxi_s_w, + MSA128W>; +class MAXI_S_D_DESC : MSA_I5_X_DESC_BASE<"maxi_s.d", int_mips_maxi_s_d, + MSA128D>; -class MAXI_U_B_DESC : MSA_I5_DESC_BASE<"maxi_u.b", int_mips_maxi_u_b, MSA128B>; -class MAXI_U_H_DESC : MSA_I5_DESC_BASE<"maxi_u.h", int_mips_maxi_u_h, MSA128H>; -class MAXI_U_W_DESC : MSA_I5_DESC_BASE<"maxi_u.w", int_mips_maxi_u_w, MSA128W>; -class MAXI_U_D_DESC : MSA_I5_DESC_BASE<"maxi_u.d", int_mips_maxi_u_d, MSA128D>; +class MAXI_U_B_DESC : MSA_I5_X_DESC_BASE<"maxi_u.b", int_mips_maxi_u_b, + MSA128B>; +class MAXI_U_H_DESC : MSA_I5_X_DESC_BASE<"maxi_u.h", int_mips_maxi_u_h, + MSA128H>; +class MAXI_U_W_DESC : MSA_I5_X_DESC_BASE<"maxi_u.w", int_mips_maxi_u_w, + MSA128W>; +class MAXI_U_D_DESC : MSA_I5_X_DESC_BASE<"maxi_u.d", int_mips_maxi_u_d, + MSA128D>; class MIN_A_B_DESC : MSA_3R_DESC_BASE<"min_a.b", int_mips_min_a_b, MSA128B>; class MIN_A_H_DESC : MSA_3R_DESC_BASE<"min_a.h", int_mips_min_a_h, MSA128H>; @@ -1692,15 +1713,15 @@ class MIN_U_H_DESC : MSA_3R_DESC_BASE<"min_u.h", int_mips_min_u_h, MSA128H>; class MIN_U_W_DESC : MSA_3R_DESC_BASE<"min_u.w", int_mips_min_u_w, MSA128W>; class MIN_U_D_DESC : MSA_3R_DESC_BASE<"min_u.d", int_mips_min_u_d, MSA128D>; -class MINI_S_B_DESC : MSA_I5_DESC_BASE<"mini_s.b", int_mips_mini_s_b, MSA128B>; -class MINI_S_H_DESC : MSA_I5_DESC_BASE<"mini_s.h", int_mips_mini_s_h, MSA128H>; -class MINI_S_W_DESC : MSA_I5_DESC_BASE<"mini_s.w", int_mips_mini_s_w, MSA128W>; -class MINI_S_D_DESC : MSA_I5_DESC_BASE<"mini_s.d", int_mips_mini_s_d, MSA128D>; +class MINI_S_B_DESC : MSA_I5_X_DESC_BASE<"mini_s.b", int_mips_mini_s_b, MSA128B>; +class MINI_S_H_DESC : MSA_I5_X_DESC_BASE<"mini_s.h", int_mips_mini_s_h, MSA128H>; +class MINI_S_W_DESC : MSA_I5_X_DESC_BASE<"mini_s.w", int_mips_mini_s_w, MSA128W>; +class MINI_S_D_DESC : MSA_I5_X_DESC_BASE<"mini_s.d", int_mips_mini_s_d, MSA128D>; -class MINI_U_B_DESC : MSA_I5_DESC_BASE<"mini_u.b", int_mips_mini_u_b, MSA128B>; -class MINI_U_H_DESC : MSA_I5_DESC_BASE<"mini_u.h", int_mips_mini_u_h, MSA128H>; -class MINI_U_W_DESC : MSA_I5_DESC_BASE<"mini_u.w", int_mips_mini_u_w, MSA128W>; -class MINI_U_D_DESC : MSA_I5_DESC_BASE<"mini_u.d", int_mips_mini_u_d, MSA128D>; +class MINI_U_B_DESC : MSA_I5_X_DESC_BASE<"mini_u.b", int_mips_mini_u_b, MSA128B>; +class MINI_U_H_DESC : MSA_I5_X_DESC_BASE<"mini_u.h", int_mips_mini_u_h, MSA128H>; +class MINI_U_W_DESC : MSA_I5_X_DESC_BASE<"mini_u.w", int_mips_mini_u_w, MSA128W>; +class MINI_U_D_DESC : MSA_I5_X_DESC_BASE<"mini_u.d", int_mips_mini_u_d, MSA128D>; class MOD_S_B_DESC : MSA_3R_DESC_BASE<"mod_s.b", int_mips_mod_s_b, MSA128B>; class MOD_S_H_DESC : MSA_3R_DESC_BASE<"mod_s.h", int_mips_mod_s_h, MSA128H>; @@ -1944,10 +1965,10 @@ class SUBV_H_DESC : MSA_3R_DESC_BASE<"subv.h", sub, MSA128H>; class SUBV_W_DESC : MSA_3R_DESC_BASE<"subv.w", sub, MSA128W>; class SUBV_D_DESC : MSA_3R_DESC_BASE<"subv.d", sub, MSA128D>; -class SUBVI_B_DESC : MSA_I5_DESC_BASE<"subvi.b", int_mips_subvi_b, MSA128B>; -class SUBVI_H_DESC : MSA_I5_DESC_BASE<"subvi.h", int_mips_subvi_h, MSA128H>; -class SUBVI_W_DESC : MSA_I5_DESC_BASE<"subvi.w", int_mips_subvi_w, MSA128W>; -class SUBVI_D_DESC : MSA_I5_DESC_BASE<"subvi.d", int_mips_subvi_d, MSA128D>; +class SUBVI_B_DESC : MSA_I5_DESC_BASE<"subvi.b", sub, vsplati8, MSA128B>; +class SUBVI_H_DESC : MSA_I5_DESC_BASE<"subvi.h", sub, vsplati16, MSA128H>; +class SUBVI_W_DESC : MSA_I5_DESC_BASE<"subvi.w", sub, vsplati32, MSA128W>; +class SUBVI_D_DESC : MSA_I5_DESC_BASE<"subvi.d", sub, vsplati64, MSA128D>; class VSHF_B_DESC : MSA_3R_DESC_BASE<"vshf.b", int_mips_vshf_b, MSA128B>; class VSHF_H_DESC : MSA_3R_DESC_BASE<"vshf.h", int_mips_vshf_h, MSA128H>; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 732b68b..60960d6 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -980,6 +980,14 @@ static SDValue lowerMSABinaryIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { return Result; } +static SDValue lowerMSABinaryImmIntr(SDValue Op, SelectionDAG &DAG, + unsigned Opc, SDValue RHS) { + SDValue LHS = Op->getOperand(1); + EVT ResTy = Op->getValueType(0); + + return DAG.getNode(Opc, SDLoc(Op), ResTy, LHS, RHS); +} + static SDValue lowerMSABranchIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { SDLoc DL(Op); SDValue Value = Op->getOperand(1); @@ -1017,6 +1025,16 @@ static SDValue lowerMSAInsertIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { return Result; } +static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG) { + EVT ResTy = Op->getValueType(0); + + unsigned SplatOp = MipsISD::VSPLAT; + if (ResTy == MVT::v2i64) + SplatOp = MipsISD::VSPLATD; + + return DAG.getNode(SplatOp, SDLoc(Op), ResTy, Op->getOperand(ImmOp)); +} + static SDValue lowerMSAUnaryIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { SDLoc DL(Op); SDValue Value = Op->getOperand(1); @@ -1069,6 +1087,12 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_addv_w: case Intrinsic::mips_addv_d: return lowerMSABinaryIntr(Op, DAG, ISD::ADD); + case Intrinsic::mips_addvi_b: + case Intrinsic::mips_addvi_h: + case Intrinsic::mips_addvi_w: + case Intrinsic::mips_addvi_d: + return lowerMSABinaryImmIntr(Op, DAG, ISD::ADD, + lowerMSASplatImm(Op, 2, DAG)); case Intrinsic::mips_and_v: return lowerMSABinaryIntr(Op, DAG, ISD::AND); case Intrinsic::mips_bnz_b: @@ -1178,6 +1202,12 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_subv_w: case Intrinsic::mips_subv_d: return lowerMSABinaryIntr(Op, DAG, ISD::SUB); + case Intrinsic::mips_subvi_b: + case Intrinsic::mips_subvi_h: + case Intrinsic::mips_subvi_w: + case Intrinsic::mips_subvi_d: + return lowerMSABinaryImmIntr(Op, DAG, ISD::SUB, + lowerMSASplatImm(Op, 2, DAG)); case Intrinsic::mips_xor_v: return lowerMSABinaryIntr(Op, DAG, ISD::XOR); } -- cgit v1.1 From f2058addc2aa221d0fd744180a2c04a38ebddcd0 Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Mon, 23 Sep 2013 22:36:11 +0000 Subject: Make nomips16 mask not repeat if it ends with a '.'. This mask is purely for debugging and testing. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191231 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsOs16.cpp | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsOs16.cpp b/lib/Target/Mips/MipsOs16.cpp index 49c73b5..fe60841 100644 --- a/lib/Target/Mips/MipsOs16.cpp +++ b/lib/Target/Mips/MipsOs16.cpp @@ -94,6 +94,7 @@ namespace llvm { bool MipsOs16::runOnModule(Module &M) { bool usingMask = Mips32FunctionMask.length() > 0; + bool doneUsingMask = false; // this will make it stop repeating DEBUG(dbgs() << "Run on Module MipsOs16 \n" << Mips32FunctionMask << "\n"); if (usingMask) DEBUG(dbgs() << "using mask \n" << Mips32FunctionMask << "\n"); @@ -103,13 +104,22 @@ bool MipsOs16::runOnModule(Module &M) { if (F->isDeclaration()) continue; DEBUG(dbgs() << "Working on " << F->getName() << "\n"); if (usingMask) { - if (functionIndex == Mips32FunctionMask.length()) - functionIndex = 0; - if (Mips32FunctionMask[functionIndex] == '1') { - DEBUG(dbgs() << "mask forced mips32: " << F->getName() << "\n"); - F->addFnAttr("nomips16"); + if (!doneUsingMask) { + if (functionIndex == Mips32FunctionMask.length()) + functionIndex = 0; + switch (Mips32FunctionMask[functionIndex]) { + case '1': + DEBUG(dbgs() << "mask forced mips32: " << F->getName() << "\n"); + F->addFnAttr("nomips16"); + break; + case '.': + doneUsingMask = true; + break; + default: + break; + } + functionIndex++; } - functionIndex++; } else { if (needsFP(*F)) { -- cgit v1.1 From cb3023ae51ebf9c11c279607f5c845570220cc4f Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 24 Sep 2013 00:13:08 +0000 Subject: Selecting the address from a very long chain of GEPs can blow the stack. The recursive nature of the address selection code can cause the stack to explode if there is a long chain of GEPs. Convert the recursive bit into a iterative method to avoid this. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191252 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 8e942c8..5afc002 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -347,6 +347,7 @@ bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, /// X86SelectAddress - Attempt to fill in an address from the given value. /// bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { +redo_gep: const User *U = NULL; unsigned Opcode = Instruction::UserOp1; if (const Instruction *I = dyn_cast(V)) { @@ -469,16 +470,24 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { goto unsupported_gep; } } + // Check for displacement overflow. if (!isInt<32>(Disp)) break; - // Ok, the GEP indices were covered by constant-offset and scaled-index - // addressing. Update the address state and move on to examining the base. + AM.IndexReg = IndexReg; AM.Scale = Scale; AM.Disp = (uint32_t)Disp; - if (X86SelectAddress(U->getOperand(0), AM)) + + if (const GetElementPtrInst *GEP = + dyn_cast(U->getOperand(0))) { + // Ok, the GEP indices were covered by constant-offset and scaled-index + // addressing. Update the address state and move on to examining the base. + V = GEP; + goto redo_gep; + } else if (X86SelectAddress(U->getOperand(0), AM)) { return true; + } // If we couldn't merge the gep value into this addr mode, revert back to // our address and just match the value instead of completely failing. -- cgit v1.1 From 17f013265f00b8df07438a648c02873888477e36 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Tue, 24 Sep 2013 00:36:09 +0000 Subject: Misc fixes for cpp backend. PR17317. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191258 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/CppBackend/CPPBackend.cpp | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 6f27af4..0eb15d3 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -1161,8 +1161,7 @@ void CppWriter::printInstruction(const Instruction *I, break; } case Instruction::Resume: { - Out << "ResumeInst::Create(mod->getContext(), " << opNames[0] - << ", " << bbname << ");"; + Out << "ResumeInst::Create(" << opNames[0] << ", " << bbname << ");"; break; } case Instruction::Invoke: { @@ -1176,7 +1175,7 @@ void CppWriter::printInstruction(const Instruction *I, } // FIXME: This shouldn't use magic numbers -3, -2, and -1. Out << "InvokeInst *" << iName << " = InvokeInst::Create(" - << getOpName(inv->getCalledFunction()) << ", " + << getOpName(inv->getCalledValue()) << ", " << getOpName(inv->getNormalDest()) << ", " << getOpName(inv->getUnwindDest()) << ", " << iName << "_params, \""; @@ -1590,6 +1589,20 @@ void CppWriter::printInstruction(const Instruction *I, Out << "\");"; break; } + case Instruction::LandingPad: { + const LandingPadInst *lpi = cast(I); + Out << "LandingPadInst* " << iName << " = LandingPadInst::Create("; + printCppName(lpi->getType()); + Out << ", " << opNames[0] << ", " << lpi->getNumClauses() << ", \""; + printEscapedString(lpi->getName()); + Out << "\", " << bbname << ");"; + nl(Out) << iName << "->setCleanup(" + << (lpi->isCleanup() ? "true" : "false") + << ");"; + for (unsigned i = 0, e = lpi->getNumClauses(); i != e; ++i) + nl(Out) << iName << "->addClause(" << opNames[i+1] << ");"; + break; + } } DefinedValues.insert(I); nl(Out); -- cgit v1.1 From 477fc628b3c9ce1c970d4a678dd5607b15242cc8 Mon Sep 17 00:00:00 2001 From: Jiangning Liu Date: Tue, 24 Sep 2013 02:47:27 +0000 Subject: Initial support for Neon scalar instructions. Patch by Ana Pazos. 1.Added support for v1ix and v1fx types. 2.Added Scalar Pairwise Reduce instructions. 3.Added initial implementation of Scalar Arithmetic instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191263 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64CallingConv.td | 9 +- lib/Target/AArch64/AArch64ISelLowering.cpp | 11 ++ lib/Target/AArch64/AArch64InstrFormats.td | 21 ++- lib/Target/AArch64/AArch64InstrInfo.td | 16 +- lib/Target/AArch64/AArch64InstrNEON.td | 253 +++++++++++++++++++++++++---- lib/Target/AArch64/AArch64RegisterInfo.td | 8 +- 6 files changed, 271 insertions(+), 47 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/AArch64/AArch64CallingConv.td b/lib/Target/AArch64/AArch64CallingConv.td index bff7eeb..a2a9f3f 100644 --- a/lib/Target/AArch64/AArch64CallingConv.td +++ b/lib/Target/AArch64/AArch64CallingConv.td @@ -59,9 +59,9 @@ def CC_A64_APCS : CallingConv<[ // Canonicalise the various types that live in different floating-point // registers. This makes sense because the PCS does not distinguish Short // Vectors and Floating-point types. - CCIfType<[v2i8], CCBitConvertToType>, - CCIfType<[v4i8, v2i16], CCBitConvertToType>, - CCIfType<[v8i8, v4i16, v2i32, v2f32, v1i64], CCBitConvertToType>, + CCIfType<[v1i16, v2i8], CCBitConvertToType>, + CCIfType<[v1i32, v4i8, v2i16, v1f32], CCBitConvertToType>, + CCIfType<[v8i8, v4i16, v2i32, v2f32, v1i64, v1f64], CCBitConvertToType>, CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCBitConvertToType>, @@ -70,7 +70,8 @@ def CC_A64_APCS : CallingConv<[ // argument is allocated to the least significant bits of register // v[NSRN]. The NSRN is incremented by one. The argument has now been // allocated." - CCIfType<[f16], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>, + CCIfType<[v1i8], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>, + CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>, CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, CCIfType<[f128], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 8597f07..48f34c0 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -57,6 +57,12 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) if (Subtarget->hasNEON()) { // And the vectors + addRegisterClass(MVT::v1i8, &AArch64::FPR8RegClass); + addRegisterClass(MVT::v1i16, &AArch64::FPR16RegClass); + addRegisterClass(MVT::v1i32, &AArch64::FPR32RegClass); + addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass); + addRegisterClass(MVT::v1f32, &AArch64::FPR32RegClass); + addRegisterClass(MVT::v1f64, &AArch64::FPR64RegClass); addRegisterClass(MVT::v8i8, &AArch64::FPR64RegClass); addRegisterClass(MVT::v4i16, &AArch64::FPR64RegClass); addRegisterClass(MVT::v2i32, &AArch64::FPR64RegClass); @@ -274,16 +280,21 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setExceptionSelectorRegister(AArch64::X1); if (Subtarget->hasNEON()) { + setOperationAction(ISD::BUILD_VECTOR, MVT::v1i8, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v1i16, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v1i32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v1f32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v1f64, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal); diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index 735670b..4f48712 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -1074,8 +1074,7 @@ class NeonI_2VMisc size, bits<5> opcode, class NeonI_2VShiftImm opcode, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRdn -{ + : A64InstRdn { bits<7> Imm; let Inst{31} = 0b0; let Inst{30} = q; @@ -1129,5 +1128,23 @@ class NeonI_insert size, bits<5> opcode, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdn { + let Inst{31} = 0b0; + let Inst{30} = 0b1; + let Inst{29} = u; + let Inst{28-24} = 0b11110; + let Inst{23-22} = size; + let Inst{21-17} = 0b11000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + } diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index fef3019..2332799 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -2189,22 +2189,22 @@ def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>; // Extra patterns for when we're allowed to optimise separate multiplication and // addition. let Predicates = [UseFusedMAC] in { -def : Pat<(fadd FPR32:$Ra, (fmul FPR32:$Rn, FPR32:$Rm)), +def : Pat<(f32 (fadd FPR32:$Ra, (f32 (fmul FPR32:$Rn, FPR32:$Rm)))), (FMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; -def : Pat<(fsub FPR32:$Ra, (fmul FPR32:$Rn, FPR32:$Rm)), +def : Pat<(f32 (fsub FPR32:$Ra, (f32 (fmul FPR32:$Rn, FPR32:$Rm)))), (FMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; -def : Pat<(fsub (fmul FPR32:$Rn, FPR32:$Rm), FPR32:$Ra), +def : Pat<(f32 (fsub (f32 (fmul FPR32:$Rn, FPR32:$Rm)), FPR32:$Ra)), (FNMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; -def : Pat<(fsub (fneg FPR32:$Ra), (fmul FPR32:$Rn, FPR32:$Rm)), +def : Pat<(f32 (fsub (f32 (fneg FPR32:$Ra)), (f32 (fmul FPR32:$Rn, FPR32:$Rm)))), (FNMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; -def : Pat<(fadd FPR64:$Ra, (fmul (f64 FPR64:$Rn), FPR64:$Rm)), +def : Pat<(f64 (fadd FPR64:$Ra, (f64 (fmul FPR64:$Rn, FPR64:$Rm)))), (FMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -def : Pat<(fsub FPR64:$Ra, (fmul (f64 FPR64:$Rn), FPR64:$Rm)), +def : Pat<(f64 (fsub FPR64:$Ra, (f64 (fmul FPR64:$Rn, FPR64:$Rm)))), (FMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -def : Pat<(fsub (fmul (f64 FPR64:$Rn), FPR64:$Rm), FPR64:$Ra), +def : Pat<(f64 (fsub (f64 (fmul FPR64:$Rn, FPR64:$Rm)), FPR64:$Ra)), (FNMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -def : Pat<(fsub (fneg (f64 FPR64:$Ra)), (fmul FPR64:$Rn, FPR64:$Rm)), +def : Pat<(f64 (fsub (f64 (fneg FPR64:$Ra)), (f64 (fmul FPR64:$Rn, FPR64:$Rm)))), (FNMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; } diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 5506aff..4bd5a67 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -2504,11 +2504,12 @@ defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>; multiclass NeonI_Op_High { def _16B : PatFrag<(ops node:$Rn, node:$Rm), - (op (Neon_top16B node:$Rn), (Neon_top16B node:$Rm))>; + (op (v8i8 (Neon_top16B node:$Rn)), (v8i8 (Neon_top16B node:$Rm)))>; def _8H : PatFrag<(ops node:$Rn, node:$Rm), - (op (Neon_top8H node:$Rn), (Neon_top8H node:$Rm))>; + (op (v4i16 (Neon_top8H node:$Rn)), (v4i16 (Neon_top8H node:$Rm)))>; def _4S : PatFrag<(ops node:$Rn, node:$Rm), - (op (Neon_top4S node:$Rn), (Neon_top4S node:$Rm))>; + (op (v2i32 (Neon_top4S node:$Rn)), (v2i32 (Neon_top4S node:$Rm)))>; + } defm NI_sabdl_hi : NeonI_Op_High; @@ -2868,9 +2869,25 @@ multiclass NeonI_Scalar3Same_BHSD_sizes opcode, } } -class Neon_Scalar_D_size_patterns - : Pat<(v1i64 (opnode (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))), - (INSTD VPR64:$Rn, VPR64:$Rm)>; +multiclass Neon_Scalar_D_size_patterns { + def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))), + (INSTD FPR64:$Rn, FPR64:$Rm)>; +} + +multiclass Neon_Scalar_BHSD_size_patterns + : Neon_Scalar_D_size_patterns { + def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))), + (INSTB FPR8:$Rn, FPR8:$Rm)>; + + def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), + (INSTH FPR16:$Rn, FPR16:$Rm)>; + + def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), + (INSTS FPR32:$Rn, FPR32:$Rm)>; +} // Scalar Integer Add let isCommutable = 1 in { @@ -2880,9 +2897,15 @@ def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">; // Scalar Integer Sub def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">; -// Pattern for Scalar Integer Add and Sub with D register -def : Neon_Scalar_D_size_patterns; -def : Neon_Scalar_D_size_patterns; +// Pattern for Scalar Integer Add and Sub with D register only +defm : Neon_Scalar_D_size_patterns; +defm : Neon_Scalar_D_size_patterns; + +// Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub +defm : Neon_Scalar_D_size_patterns; +defm : Neon_Scalar_D_size_patterns; +defm : Neon_Scalar_D_size_patterns; +defm : Neon_Scalar_D_size_patterns; // Scalar Integer Saturating Add (Signed, Unsigned) defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>; @@ -2892,40 +2915,160 @@ defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>; defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>; defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>; -// Patterns for Scalar Integer Saturating Add, Sub with D register only -def : Neon_Scalar_D_size_patterns; -def : Neon_Scalar_D_size_patterns; -def : Neon_Scalar_D_size_patterns; -def : Neon_Scalar_D_size_patterns; +// Patterns to match llvm.arm.* intrinsic for +// Scalar Integer Saturating Add, Sub (Signed, Unsigned) +defm : Neon_Scalar_D_size_patterns; +defm : Neon_Scalar_D_size_patterns; +defm : Neon_Scalar_D_size_patterns; +defm : Neon_Scalar_D_size_patterns; + +// Patterns to match llvm.aarch64.* intrinsic for +// Scalar Integer Saturating Add, Sub (Signed, Unsigned) +defm : Neon_Scalar_BHSD_size_patterns; +defm : Neon_Scalar_BHSD_size_patterns; +defm : Neon_Scalar_BHSD_size_patterns; +defm : Neon_Scalar_BHSD_size_patterns; // Scalar Integer Shift Left (Signed, Unsigned) def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">; def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">; +// Patterns to match llvm.arm.* intrinsic for +// Scalar Integer Shift Left (Signed, Unsigned) +defm : Neon_Scalar_D_size_patterns; +defm : Neon_Scalar_D_size_patterns; + +// Patterns to match llvm.aarch64.* intrinsic for +// Scalar Integer Shift Left (Signed, Unsigned) +defm : Neon_Scalar_D_size_patterns; +defm : Neon_Scalar_D_size_patterns; + // Scalar Integer Saturating Shift Left (Signed, Unsigned) defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>; defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>; -// Scalar Integer Rouding Shift Left (Signed, Unsigned) +// Patterns to match llvm.aarch64.* intrinsic for +// Scalar Integer Saturating Shift Letf (Signed, Unsigned) +defm : Neon_Scalar_BHSD_size_patterns; +defm : Neon_Scalar_BHSD_size_patterns; + +// Patterns to match llvm.arm.* intrinsic for +// Scalar Integer Saturating Shift Letf (Signed, Unsigned) +defm : Neon_Scalar_D_size_patterns; +defm : Neon_Scalar_D_size_patterns; + +// Scalar Integer Rounding Shift Left (Signed, Unsigned) def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">; def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">; +// Patterns to match llvm.aarch64.* intrinsic for +// Scalar Integer Rounding Shift Left (Signed, Unsigned) +defm : Neon_Scalar_D_size_patterns; +defm : Neon_Scalar_D_size_patterns; + +// Patterns to match llvm.arm.* intrinsic for +// Scalar Integer Rounding Shift Left (Signed, Unsigned) +defm : Neon_Scalar_D_size_patterns; +defm : Neon_Scalar_D_size_patterns; + // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned) defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>; defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>; -// Patterns for Scalar Integer Shift Lef, Saturating Shift Left, -// Rounding Shift Left, Rounding Saturating Shift Left with D register only -def : Neon_Scalar_D_size_patterns; -def : Neon_Scalar_D_size_patterns; -def : Neon_Scalar_D_size_patterns; -def : Neon_Scalar_D_size_patterns; -def : Neon_Scalar_D_size_patterns; -def : Neon_Scalar_D_size_patterns; -def : Neon_Scalar_D_size_patterns; -def : Neon_Scalar_D_size_patterns; -def : Neon_Scalar_D_size_patterns; -def : Neon_Scalar_D_size_patterns; +// Patterns to match llvm.aarch64.* intrinsic for +// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned) +defm : Neon_Scalar_BHSD_size_patterns; +defm : Neon_Scalar_BHSD_size_patterns; + +// Patterns to match llvm.arm.* intrinsic for +// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned) +defm : Neon_Scalar_D_size_patterns; +defm : Neon_Scalar_D_size_patterns; + +// Scalar Reduce Pairwise + +multiclass NeonI_ScalarPair_D_sizes opcode, + string asmop, bit Commutable = 0> { + let isCommutable = Commutable in { + def _D_2D : NeonI_ScalarPair; + } +} + +multiclass NeonI_ScalarPair_SD_sizes opcode, + string asmop, bit Commutable = 0> + : NeonI_ScalarPair_D_sizes { + let isCommutable = Commutable in { + def _S_2S : NeonI_ScalarPair; + } +} + +// Scalar Reduce Addition Pairwise (Integer) with +// Pattern to match llvm.arm.* intrinsic +defm ADDPvv : NeonI_ScalarPair_D_sizes<0b0, 0b1, 0b11011, "addp", 0>; + +// Pattern to match llvm.aarch64.* intrinsic for +// Scalar Reduce Addition Pairwise (Integer) +def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))), + (ADDPvv_D_2D VPR128:$Rn)>; + +// Scalar Reduce Addition Pairwise (Floating Point) +defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>; + +// Scalar Reduce Maximum Pairwise (Floating Point) +defm FMAXPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01111, "fmaxp", 0>; + +// Scalar Reduce Minimum Pairwise (Floating Point) +defm FMINPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01111, "fminp", 0>; + +// Scalar Reduce maxNum Pairwise (Floating Point) +defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>; + +// Scalar Reduce minNum Pairwise (Floating Point) +defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>; + +multiclass Neon_ScalarPair_SD_size_patterns { + def : Pat<(v1f32 (opnodeS (v2f32 VPR64:$Rn))), + (INSTS VPR64:$Rn)>; + def : Pat<(v1f64 (opnodeD (v2f64 VPR128:$Rn))), + (INSTD VPR128:$Rn)>; +} + +// Patterns to match llvm.aarch64.* intrinsic for +// Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point) +defm : Neon_ScalarPair_SD_size_patterns; + +defm : Neon_ScalarPair_SD_size_patterns; + +defm : Neon_ScalarPair_SD_size_patterns; + +defm : Neon_ScalarPair_SD_size_patterns; + +defm : Neon_ScalarPair_SD_size_patterns; + //===----------------------------------------------------------------------===// @@ -2999,6 +3142,14 @@ def : Pat<(v16i8 (bitconvert (v2f64 VPR128:$src))), (v16i8 VPR128:$src)>; // ...and scalar bitcasts... +def : Pat<(f16 (bitconvert (v1i16 FPR16:$src))), (f16 FPR16:$src)>; +def : Pat<(f32 (bitconvert (v1i32 FPR32:$src))), (f32 FPR32:$src)>; +def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>; +def : Pat<(f32 (bitconvert (v1f32 FPR32:$src))), (f32 FPR32:$src)>; +def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>; + +def : Pat<(i64 (bitconvert (v1i64 FPR64:$src))), (FMOVxd $src)>; +def : Pat<(i32 (bitconvert (v1i32 FPR32:$src))), (FMOVws $src)>; def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>; def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>; @@ -3017,6 +3168,15 @@ def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))), (f128 VPR128:$src)>; def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))), (f128 VPR128:$src)>; def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), (f128 VPR128:$src)>; +def : Pat<(v1i16 (bitconvert (f16 FPR16:$src))), (v1i16 FPR16:$src)>; +def : Pat<(v1i32 (bitconvert (f32 FPR32:$src))), (v1i32 FPR32:$src)>; +def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; +def : Pat<(v1f32 (bitconvert (f32 FPR32:$src))), (v1f32 FPR32:$src)>; +def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>; + +def : Pat<(v1i64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; +def : Pat<(v1i32 (bitconvert (i32 GPR32:$src))), (FMOVsw $src)>; + def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>; def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>; @@ -3349,8 +3509,6 @@ def UMOVwh_pattern : Neon_UMOV_pattern; def UMOVws_pattern : Neon_UMOV_pattern; -def UMOVxd_pattern : Neon_UMOV_pattern; def : Pat<(i32 (and (i32 (vector_extract @@ -3389,3 +3547,40 @@ def : Pat<(i64 (zext (UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64), neon_uimm0_bare:$Imm)>; +// Additional copy patterns for scalar types +def : Pat<(i32 (vector_extract (v1i8 FPR8:$Rn), (i64 0))), + (UMOVwb (v16i8 + (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8)), (i64 0))>; + +def : Pat<(i32 (vector_extract (v1i16 FPR16:$Rn), (i64 0))), + (UMOVwh (v8i16 + (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16)), (i64 0))>; + +def : Pat<(i32 (vector_extract (v1i32 FPR32:$Rn), (i64 0))), + (FMOVws FPR32:$Rn)>; + +def : Pat<(i64 (vector_extract (v1i64 FPR64:$Rn), (i64 0))), + (FMOVxd FPR64:$Rn)>; + +def : Pat<(f64 (vector_extract (v1f64 FPR64:$Rn), (i64 0))), + (f64 FPR64:$Rn)>; + +def : Pat<(f32 (vector_extract (v1f32 FPR32:$Rn), (i64 0))), + (f32 FPR32:$Rn)>; + +def : Pat<(v1i8 (scalar_to_vector GPR32:$Rn)), + (v1i8 (EXTRACT_SUBREG (v16i8 + (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))), + sub_8))>; + +def : Pat<(v1i16 (scalar_to_vector GPR32:$Rn)), + (v1i16 (EXTRACT_SUBREG (v8i16 + (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))), + sub_16))>; + +def : Pat<(v1i32 (scalar_to_vector GPR32:$src)), + (FMOVsw $src)>; + +def : Pat<(v1i64 (scalar_to_vector GPR64:$src)), + (FMOVdx $src)>; + diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td index e0eca23..089cc08 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/lib/Target/AArch64/AArch64RegisterInfo.td @@ -133,19 +133,19 @@ foreach Index = 0-31 in { } -def FPR8 : RegisterClass<"AArch64", [i8], 8, +def FPR8 : RegisterClass<"AArch64", [i8, v1i8], 8, (sequence "B%u", 0, 31)> { } -def FPR16 : RegisterClass<"AArch64", [f16], 16, +def FPR16 : RegisterClass<"AArch64", [f16, v1i16], 16, (sequence "H%u", 0, 31)> { } -def FPR32 : RegisterClass<"AArch64", [f32], 32, +def FPR32 : RegisterClass<"AArch64", [f32, v1i32, v1f32], 32, (sequence "S%u", 0, 31)> { } -def FPR64 : RegisterClass<"AArch64", [f64, v2f32, v2i32, v4i16, v8i8, v1i64], +def FPR64 : RegisterClass<"AArch64", [f64, v2f32, v2i32, v4i16, v8i8, v1i64, v1f64], 64, (sequence "D%u", 0, 31)>; def FPR128 : RegisterClass<"AArch64", -- cgit v1.1 From 42d42595243be2787ce8807321a8783e0d0877bb Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 24 Sep 2013 07:19:30 +0000 Subject: Followup to r191252. Make sure that the code that handles the constant addresses is run for the GEPs. This just refactors that code and then calls it for the GEPs that are collected during the iteration. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191281 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 240 ++++++++++++++++++++++------------------- 1 file changed, 127 insertions(+), 113 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 5afc002..f048038 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -125,6 +125,8 @@ private: return static_cast(&TM); } + bool handleConstantAddresses(const Value *V, X86AddressMode &AM); + unsigned TargetMaterializeConstant(const Constant *C); unsigned TargetMaterializeAlloca(const AllocaInst *C); @@ -344,9 +346,125 @@ bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, return true; } +bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) { + // Handle constant address. + if (const GlobalValue *GV = dyn_cast(V)) { + // Can't handle alternate code models yet. + if (TM.getCodeModel() != CodeModel::Small) + return false; + + // Can't handle TLS yet. + if (const GlobalVariable *GVar = dyn_cast(GV)) + if (GVar->isThreadLocal()) + return false; + + // Can't handle TLS yet, part 2 (this is slightly crazy, but this is how + // it works...). + if (const GlobalAlias *GA = dyn_cast(GV)) + if (const GlobalVariable *GVar = + dyn_cast_or_null(GA->resolveAliasedGlobal(false))) + if (GVar->isThreadLocal()) + return false; + + // RIP-relative addresses can't have additional register operands, so if + // we've already folded stuff into the addressing mode, just force the + // global value into its own register, which we can use as the basereg. + if (!Subtarget->isPICStyleRIPRel() || + (AM.Base.Reg == 0 && AM.IndexReg == 0)) { + // Okay, we've committed to selecting this global. Set up the address. + AM.GV = GV; + + // Allow the subtarget to classify the global. + unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM); + + // If this reference is relative to the pic base, set it now. + if (isGlobalRelativeToPICBase(GVFlags)) { + // FIXME: How do we know Base.Reg is free?? + AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); + } + + // Unless the ABI requires an extra load, return a direct reference to + // the global. + if (!isGlobalStubReference(GVFlags)) { + if (Subtarget->isPICStyleRIPRel()) { + // Use rip-relative addressing if we can. Above we verified that the + // base and index registers are unused. + assert(AM.Base.Reg == 0 && AM.IndexReg == 0); + AM.Base.Reg = X86::RIP; + } + AM.GVOpFlags = GVFlags; + return true; + } + + // Ok, we need to do a load from a stub. If we've already loaded from + // this stub, reuse the loaded pointer, otherwise emit the load now. + DenseMap::iterator I = LocalValueMap.find(V); + unsigned LoadReg; + if (I != LocalValueMap.end() && I->second != 0) { + LoadReg = I->second; + } else { + // Issue load from stub. + unsigned Opc = 0; + const TargetRegisterClass *RC = NULL; + X86AddressMode StubAM; + StubAM.Base.Reg = AM.Base.Reg; + StubAM.GV = GV; + StubAM.GVOpFlags = GVFlags; + + // Prepare for inserting code in the local-value area. + SavePoint SaveInsertPt = enterLocalValueArea(); + + if (TLI.getPointerTy() == MVT::i64) { + Opc = X86::MOV64rm; + RC = &X86::GR64RegClass; + + if (Subtarget->isPICStyleRIPRel()) + StubAM.Base.Reg = X86::RIP; + } else { + Opc = X86::MOV32rm; + RC = &X86::GR32RegClass; + } + + LoadReg = createResultReg(RC); + MachineInstrBuilder LoadMI = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg); + addFullAddress(LoadMI, StubAM); + + // Ok, back to normal mode. + leaveLocalValueArea(SaveInsertPt); + + // Prevent loading GV stub multiple times in same MBB. + LocalValueMap[V] = LoadReg; + } + + // Now construct the final address. Note that the Disp, Scale, + // and Index values may already be set here. + AM.Base.Reg = LoadReg; + AM.GV = 0; + return true; + } + } + + // If all else fails, try to materialize the value in a register. + if (!AM.GV || !Subtarget->isPICStyleRIPRel()) { + if (AM.Base.Reg == 0) { + AM.Base.Reg = getRegForValue(V); + return AM.Base.Reg != 0; + } + if (AM.IndexReg == 0) { + assert(AM.Scale == 1 && "Scale with no index!"); + AM.IndexReg = getRegForValue(V); + return AM.IndexReg != 0; + } + } + + return false; +} + /// X86SelectAddress - Attempt to fill in an address from the given value. /// bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { + SmallVector GEPs; redo_gep: const User *U = NULL; unsigned Opcode = Instruction::UserOp1; @@ -478,6 +596,7 @@ redo_gep: AM.IndexReg = IndexReg; AM.Scale = Scale; AM.Disp = (uint32_t)Disp; + GEPs.push_back(V); if (const GetElementPtrInst *GEP = dyn_cast(U->getOperand(0))) { @@ -492,125 +611,20 @@ redo_gep: // If we couldn't merge the gep value into this addr mode, revert back to // our address and just match the value instead of completely failing. AM = SavedAM; - break; - unsupported_gep: - // Ok, the GEP indices weren't all covered. - break; - } - } - - // Handle constant address. - if (const GlobalValue *GV = dyn_cast(V)) { - // Can't handle alternate code models yet. - if (TM.getCodeModel() != CodeModel::Small) - return false; - - // Can't handle TLS yet. - if (const GlobalVariable *GVar = dyn_cast(GV)) - if (GVar->isThreadLocal()) - return false; - - // Can't handle TLS yet, part 2 (this is slightly crazy, but this is how - // it works...). - if (const GlobalAlias *GA = dyn_cast(GV)) - if (const GlobalVariable *GVar = - dyn_cast_or_null(GA->resolveAliasedGlobal(false))) - if (GVar->isThreadLocal()) - return false; - - // RIP-relative addresses can't have additional register operands, so if - // we've already folded stuff into the addressing mode, just force the - // global value into its own register, which we can use as the basereg. - if (!Subtarget->isPICStyleRIPRel() || - (AM.Base.Reg == 0 && AM.IndexReg == 0)) { - // Okay, we've committed to selecting this global. Set up the address. - AM.GV = GV; - - // Allow the subtarget to classify the global. - unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM); - // If this reference is relative to the pic base, set it now. - if (isGlobalRelativeToPICBase(GVFlags)) { - // FIXME: How do we know Base.Reg is free?? - AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); - } - - // Unless the ABI requires an extra load, return a direct reference to - // the global. - if (!isGlobalStubReference(GVFlags)) { - if (Subtarget->isPICStyleRIPRel()) { - // Use rip-relative addressing if we can. Above we verified that the - // base and index registers are unused. - assert(AM.Base.Reg == 0 && AM.IndexReg == 0); - AM.Base.Reg = X86::RIP; - } - AM.GVOpFlags = GVFlags; + for (SmallVectorImpl::reverse_iterator + I = GEPs.rbegin(), E = GEPs.rend(); I != E; ++I) + if (handleConstantAddresses(*I, AM)) return true; - } - - // Ok, we need to do a load from a stub. If we've already loaded from - // this stub, reuse the loaded pointer, otherwise emit the load now. - DenseMap::iterator I = LocalValueMap.find(V); - unsigned LoadReg; - if (I != LocalValueMap.end() && I->second != 0) { - LoadReg = I->second; - } else { - // Issue load from stub. - unsigned Opc = 0; - const TargetRegisterClass *RC = NULL; - X86AddressMode StubAM; - StubAM.Base.Reg = AM.Base.Reg; - StubAM.GV = GV; - StubAM.GVOpFlags = GVFlags; - - // Prepare for inserting code in the local-value area. - SavePoint SaveInsertPt = enterLocalValueArea(); - - if (TLI.getPointerTy() == MVT::i64) { - Opc = X86::MOV64rm; - RC = &X86::GR64RegClass; - - if (Subtarget->isPICStyleRIPRel()) - StubAM.Base.Reg = X86::RIP; - } else { - Opc = X86::MOV32rm; - RC = &X86::GR32RegClass; - } - - LoadReg = createResultReg(RC); - MachineInstrBuilder LoadMI = - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg); - addFullAddress(LoadMI, StubAM); - - // Ok, back to normal mode. - leaveLocalValueArea(SaveInsertPt); - - // Prevent loading GV stub multiple times in same MBB. - LocalValueMap[V] = LoadReg; - } - // Now construct the final address. Note that the Disp, Scale, - // and Index values may already be set here. - AM.Base.Reg = LoadReg; - AM.GV = 0; - return true; - } + return false; + unsupported_gep: + // Ok, the GEP indices weren't all covered. + break; } - - // If all else fails, try to materialize the value in a register. - if (!AM.GV || !Subtarget->isPICStyleRIPRel()) { - if (AM.Base.Reg == 0) { - AM.Base.Reg = getRegForValue(V); - return AM.Base.Reg != 0; - } - if (AM.IndexReg == 0) { - assert(AM.Scale == 1 && "Scale with no index!"); - AM.IndexReg = getRegForValue(V); - return AM.IndexReg != 0; - } } - return false; + return handleConstantAddresses(V, AM); } /// X86SelectCallAddress - Attempt to fill in an address from the given value. -- cgit v1.1 From cfb1e1703130809043a7b020b4cdfa04b59fa8ec Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Tue, 24 Sep 2013 10:28:18 +0000 Subject: [mips][msa] Added support for matching slli, srai, and srli from normal IR (i.e. not intrinsics) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191285 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 92 ++++++++++++++++++++++++++-------- lib/Target/Mips/MipsSEISelLowering.cpp | 18 +++++++ 2 files changed, 90 insertions(+), 20 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index b1e2e57..92dc046 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -831,6 +831,50 @@ class MSA_BIT_D_DESC_BASE { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins RCWS:$ws, uimm3:$u3); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u3"); + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, + (vsplati8 immZExt3:$u3)))]; + InstrItinClass Itinerary = itin; +} + +class MSA_BIT_SPLATH_DESC_BASE { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins RCWS:$ws, uimm4:$u4); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u4"); + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, + (vsplati16 immZExt4:$u4)))]; + InstrItinClass Itinerary = itin; +} + +class MSA_BIT_SPLATW_DESC_BASE { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins RCWS:$ws, uimm5:$u5); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u5"); + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, + (vsplati32 immZExt5:$u5)))]; + InstrItinClass Itinerary = itin; +} + +class MSA_BIT_SPLATD_DESC_BASE { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins RCWS:$ws, uimm6:$u6); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u6"); + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, + (vsplati64 immZExt6:$u6)))]; + InstrItinClass Itinerary = itin; +} + class MSA_COPY_DESC_BASE { @@ -1713,15 +1757,23 @@ class MIN_U_H_DESC : MSA_3R_DESC_BASE<"min_u.h", int_mips_min_u_h, MSA128H>; class MIN_U_W_DESC : MSA_3R_DESC_BASE<"min_u.w", int_mips_min_u_w, MSA128W>; class MIN_U_D_DESC : MSA_3R_DESC_BASE<"min_u.d", int_mips_min_u_d, MSA128D>; -class MINI_S_B_DESC : MSA_I5_X_DESC_BASE<"mini_s.b", int_mips_mini_s_b, MSA128B>; -class MINI_S_H_DESC : MSA_I5_X_DESC_BASE<"mini_s.h", int_mips_mini_s_h, MSA128H>; -class MINI_S_W_DESC : MSA_I5_X_DESC_BASE<"mini_s.w", int_mips_mini_s_w, MSA128W>; -class MINI_S_D_DESC : MSA_I5_X_DESC_BASE<"mini_s.d", int_mips_mini_s_d, MSA128D>; +class MINI_S_B_DESC : MSA_I5_X_DESC_BASE<"mini_s.b", int_mips_mini_s_b, + MSA128B>; +class MINI_S_H_DESC : MSA_I5_X_DESC_BASE<"mini_s.h", int_mips_mini_s_h, + MSA128H>; +class MINI_S_W_DESC : MSA_I5_X_DESC_BASE<"mini_s.w", int_mips_mini_s_w, + MSA128W>; +class MINI_S_D_DESC : MSA_I5_X_DESC_BASE<"mini_s.d", int_mips_mini_s_d, + MSA128D>; -class MINI_U_B_DESC : MSA_I5_X_DESC_BASE<"mini_u.b", int_mips_mini_u_b, MSA128B>; -class MINI_U_H_DESC : MSA_I5_X_DESC_BASE<"mini_u.h", int_mips_mini_u_h, MSA128H>; -class MINI_U_W_DESC : MSA_I5_X_DESC_BASE<"mini_u.w", int_mips_mini_u_w, MSA128W>; -class MINI_U_D_DESC : MSA_I5_X_DESC_BASE<"mini_u.d", int_mips_mini_u_d, MSA128D>; +class MINI_U_B_DESC : MSA_I5_X_DESC_BASE<"mini_u.b", int_mips_mini_u_b, + MSA128B>; +class MINI_U_H_DESC : MSA_I5_X_DESC_BASE<"mini_u.h", int_mips_mini_u_h, + MSA128H>; +class MINI_U_W_DESC : MSA_I5_X_DESC_BASE<"mini_u.w", int_mips_mini_u_w, + MSA128W>; +class MINI_U_D_DESC : MSA_I5_X_DESC_BASE<"mini_u.d", int_mips_mini_u_d, + MSA128D>; class MOD_S_B_DESC : MSA_3R_DESC_BASE<"mod_s.b", int_mips_mod_s_b, MSA128B>; class MOD_S_H_DESC : MSA_3R_DESC_BASE<"mod_s.h", int_mips_mod_s_h, MSA128H>; @@ -1837,10 +1889,10 @@ class SLL_H_DESC : MSA_3R_DESC_BASE<"sll.h", shl, MSA128H>; class SLL_W_DESC : MSA_3R_DESC_BASE<"sll.w", shl, MSA128W>; class SLL_D_DESC : MSA_3R_DESC_BASE<"sll.d", shl, MSA128D>; -class SLLI_B_DESC : MSA_BIT_B_DESC_BASE<"slli.b", int_mips_slli_b, MSA128B>; -class SLLI_H_DESC : MSA_BIT_H_DESC_BASE<"slli.h", int_mips_slli_h, MSA128H>; -class SLLI_W_DESC : MSA_BIT_W_DESC_BASE<"slli.w", int_mips_slli_w, MSA128W>; -class SLLI_D_DESC : MSA_BIT_D_DESC_BASE<"slli.d", int_mips_slli_d, MSA128D>; +class SLLI_B_DESC : MSA_BIT_SPLATB_DESC_BASE<"slli.b", shl, MSA128B>; +class SLLI_H_DESC : MSA_BIT_SPLATH_DESC_BASE<"slli.h", shl, MSA128H>; +class SLLI_W_DESC : MSA_BIT_SPLATW_DESC_BASE<"slli.w", shl, MSA128W>; +class SLLI_D_DESC : MSA_BIT_SPLATD_DESC_BASE<"slli.d", shl, MSA128D>; class SPLAT_B_DESC : MSA_3R_DESC_BASE<"splat.b", int_mips_splat_b, MSA128B, MSA128B, GPR32>; @@ -1865,10 +1917,10 @@ class SRA_H_DESC : MSA_3R_DESC_BASE<"sra.h", sra, MSA128H>; class SRA_W_DESC : MSA_3R_DESC_BASE<"sra.w", sra, MSA128W>; class SRA_D_DESC : MSA_3R_DESC_BASE<"sra.d", sra, MSA128D>; -class SRAI_B_DESC : MSA_BIT_B_DESC_BASE<"srai.b", int_mips_srai_b, MSA128B>; -class SRAI_H_DESC : MSA_BIT_H_DESC_BASE<"srai.h", int_mips_srai_h, MSA128H>; -class SRAI_W_DESC : MSA_BIT_W_DESC_BASE<"srai.w", int_mips_srai_w, MSA128W>; -class SRAI_D_DESC : MSA_BIT_D_DESC_BASE<"srai.d", int_mips_srai_d, MSA128D>; +class SRAI_B_DESC : MSA_BIT_SPLATB_DESC_BASE<"srai.b", sra, MSA128B>; +class SRAI_H_DESC : MSA_BIT_SPLATH_DESC_BASE<"srai.h", sra, MSA128H>; +class SRAI_W_DESC : MSA_BIT_SPLATW_DESC_BASE<"srai.w", sra, MSA128W>; +class SRAI_D_DESC : MSA_BIT_SPLATD_DESC_BASE<"srai.d", sra, MSA128D>; class SRAR_B_DESC : MSA_3R_DESC_BASE<"srar.b", int_mips_srar_b, MSA128B>; class SRAR_H_DESC : MSA_3R_DESC_BASE<"srar.h", int_mips_srar_h, MSA128H>; @@ -1885,10 +1937,10 @@ class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", srl, MSA128H>; class SRL_W_DESC : MSA_3R_DESC_BASE<"srl.w", srl, MSA128W>; class SRL_D_DESC : MSA_3R_DESC_BASE<"srl.d", srl, MSA128D>; -class SRLI_B_DESC : MSA_BIT_B_DESC_BASE<"srli.b", int_mips_srli_b, MSA128B>; -class SRLI_H_DESC : MSA_BIT_H_DESC_BASE<"srli.h", int_mips_srli_h, MSA128H>; -class SRLI_W_DESC : MSA_BIT_W_DESC_BASE<"srli.w", int_mips_srli_w, MSA128W>; -class SRLI_D_DESC : MSA_BIT_D_DESC_BASE<"srli.d", int_mips_srli_d, MSA128D>; +class SRLI_B_DESC : MSA_BIT_SPLATB_DESC_BASE<"srli.b", srl, MSA128B>; +class SRLI_H_DESC : MSA_BIT_SPLATH_DESC_BASE<"srli.h", srl, MSA128H>; +class SRLI_W_DESC : MSA_BIT_SPLATW_DESC_BASE<"srli.w", srl, MSA128W>; +class SRLI_D_DESC : MSA_BIT_SPLATD_DESC_BASE<"srli.d", srl, MSA128D>; class SRLR_B_DESC : MSA_3R_DESC_BASE<"srlr.b", int_mips_srlr_b, MSA128B>; class SRLR_H_DESC : MSA_3R_DESC_BASE<"srlr.h", int_mips_srlr_h, MSA128H>; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 60960d6..9bd3be2 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -1187,16 +1187,34 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_sll_w: case Intrinsic::mips_sll_d: return lowerMSABinaryIntr(Op, DAG, ISD::SHL); + case Intrinsic::mips_slli_b: + case Intrinsic::mips_slli_h: + case Intrinsic::mips_slli_w: + case Intrinsic::mips_slli_d: + return lowerMSABinaryImmIntr(Op, DAG, ISD::SHL, + lowerMSASplatImm(Op, 2, DAG)); case Intrinsic::mips_sra_b: case Intrinsic::mips_sra_h: case Intrinsic::mips_sra_w: case Intrinsic::mips_sra_d: return lowerMSABinaryIntr(Op, DAG, ISD::SRA); + case Intrinsic::mips_srai_b: + case Intrinsic::mips_srai_h: + case Intrinsic::mips_srai_w: + case Intrinsic::mips_srai_d: + return lowerMSABinaryImmIntr(Op, DAG, ISD::SRA, + lowerMSASplatImm(Op, 2, DAG)); case Intrinsic::mips_srl_b: case Intrinsic::mips_srl_h: case Intrinsic::mips_srl_w: case Intrinsic::mips_srl_d: return lowerMSABinaryIntr(Op, DAG, ISD::SRL); + case Intrinsic::mips_srli_b: + case Intrinsic::mips_srli_h: + case Intrinsic::mips_srli_w: + case Intrinsic::mips_srli_d: + return lowerMSABinaryImmIntr(Op, DAG, ISD::SRL, + lowerMSASplatImm(Op, 2, DAG)); case Intrinsic::mips_subv_b: case Intrinsic::mips_subv_h: case Intrinsic::mips_subv_w: -- cgit v1.1 From ae1fb8fc19dcfd2f0e33a36f40d687b08dcc9a6b Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Tue, 24 Sep 2013 10:46:19 +0000 Subject: [mips][msa] Added support for matching comparisons from normal IR (i.e. not intrinsics) MIPS SelectionDAG changes: * Added VCEQ, VCL[ET]_[SU] nodes to represent vector comparisons that produce a bitmask. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191286 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 5 + lib/Target/Mips/MipsISelLowering.h | 8 ++ lib/Target/Mips/MipsMSAInstrInfo.td | 225 ++++++++++++++++++++++----------- lib/Target/Mips/MipsSEISelLowering.cpp | 116 +++++++++++++++++ 4 files changed, 281 insertions(+), 73 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 443ce19..97700bc 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -212,6 +212,11 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::VANY_ZERO: return "MipsISD::VANY_ZERO"; case MipsISD::VALL_NONZERO: return "MipsISD::VALL_NONZERO"; case MipsISD::VANY_NONZERO: return "MipsISD::VANY_NONZERO"; + case MipsISD::VCEQ: return "MipsISD::VCEQ"; + case MipsISD::VCLE_S: return "MipsISD::VCLE_S"; + case MipsISD::VCLE_U: return "MipsISD::VCLE_U"; + case MipsISD::VCLT_S: return "MipsISD::VCLT_S"; + case MipsISD::VCLT_U: return "MipsISD::VCLT_U"; case MipsISD::VSPLAT: return "MipsISD::VSPLAT"; case MipsISD::VSPLATD: return "MipsISD::VSPLATD"; case MipsISD::VEXTRACT_SEXT_ELT: return "MipsISD::VEXTRACT_SEXT_ELT"; diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 71a2ddc..730e97f 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -153,11 +153,19 @@ namespace llvm { SELECT_CC_DSP, // Vector comparisons. + // These take a vector and return a boolean. VALL_ZERO, VANY_ZERO, VALL_NONZERO, VANY_NONZERO, + // These take a vector and return a vector bitmask. + VCEQ, + VCLE_S, + VCLE_U, + VCLT_S, + VCLT_U, + // Special case of BUILD_VECTOR where all elements are the same. VSPLAT, // Special case of VSPLAT where the result is v2i64, the operand is diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 92dc046..fb88a3b 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -13,6 +13,14 @@ def SDT_MipsSplat : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisInt<1>]>; def SDT_MipsVecCond : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>; +def SDT_VSetCC : SDTypeProfile<1, 3, [SDTCisInt<0>, + SDTCisInt<1>, + SDTCisSameAs<1, 2>, + SDTCisVT<3, OtherVT>]>; +def SDT_VFSetCC : SDTypeProfile<1, 3, [SDTCisInt<0>, + SDTCisFP<1>, + SDTCisSameAs<1, 2>, + SDTCisVT<3, OtherVT>]>; def MipsVAllNonZero : SDNode<"MipsISD::VALL_NONZERO", SDT_MipsVecCond>; def MipsVAnyNonZero : SDNode<"MipsISD::VANY_NONZERO", SDT_MipsVecCond>; @@ -23,6 +31,9 @@ def MipsVSplatD : SDNode<"MipsISD::VSPLATD", SDT_MipsSplat>; def MipsVNOR : SDNode<"MipsISD::VNOR", SDTIntBinOp, [SDNPCommutative, SDNPAssociative]>; +def vsetcc : SDNode<"ISD::SETCC", SDT_VSetCC>; +def vfsetcc : SDNode<"ISD::SETCC", SDT_VFSetCC>; + def MipsVExtractSExt : SDNode<"MipsISD::VEXTRACT_SEXT_ELT", SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>; def MipsVExtractZExt : SDNode<"MipsISD::VEXTRACT_ZEXT_ELT", @@ -50,6 +61,68 @@ def vinsert_v8i16 : PatFrag<(ops node:$vec, node:$val, node:$idx), def vinsert_v4i32 : PatFrag<(ops node:$vec, node:$val, node:$idx), (v4i32 (vector_insert node:$vec, node:$val, node:$idx))>; +class vfsetcc_type : + PatFrag<(ops node:$lhs, node:$rhs), + (ResTy (vfsetcc (OpTy node:$lhs), (OpTy node:$rhs), CC))>; + +// ISD::SETFALSE cannot occur +def vfsetoeq_v4f32 : vfsetcc_type; +def vfsetoeq_v2f64 : vfsetcc_type; +def vfsetoge_v4f32 : vfsetcc_type; +def vfsetoge_v2f64 : vfsetcc_type; +def vfsetogt_v4f32 : vfsetcc_type; +def vfsetogt_v2f64 : vfsetcc_type; +def vfsetole_v4f32 : vfsetcc_type; +def vfsetole_v2f64 : vfsetcc_type; +def vfsetolt_v4f32 : vfsetcc_type; +def vfsetolt_v2f64 : vfsetcc_type; +def vfsetone_v4f32 : vfsetcc_type; +def vfsetone_v2f64 : vfsetcc_type; +def vfsetord_v4f32 : vfsetcc_type; +def vfsetord_v2f64 : vfsetcc_type; +def vfsetun_v4f32 : vfsetcc_type; +def vfsetun_v2f64 : vfsetcc_type; +def vfsetueq_v4f32 : vfsetcc_type; +def vfsetueq_v2f64 : vfsetcc_type; +def vfsetuge_v4f32 : vfsetcc_type; +def vfsetuge_v2f64 : vfsetcc_type; +def vfsetugt_v4f32 : vfsetcc_type; +def vfsetugt_v2f64 : vfsetcc_type; +def vfsetule_v4f32 : vfsetcc_type; +def vfsetule_v2f64 : vfsetcc_type; +def vfsetult_v4f32 : vfsetcc_type; +def vfsetult_v2f64 : vfsetcc_type; +def vfsetune_v4f32 : vfsetcc_type; +def vfsetune_v2f64 : vfsetcc_type; +// ISD::SETTRUE cannot occur +// ISD::SETFALSE2 cannot occur +// ISD::SETTRUE2 cannot occur + +class vsetcc_type : + PatFrag<(ops node:$lhs, node:$rhs), + (ResTy (vsetcc node:$lhs, node:$rhs, CC))>; + +def vseteq_v16i8 : vsetcc_type; +def vseteq_v8i16 : vsetcc_type; +def vseteq_v4i32 : vsetcc_type; +def vseteq_v2i64 : vsetcc_type; +def vsetle_v16i8 : vsetcc_type; +def vsetle_v8i16 : vsetcc_type; +def vsetle_v4i32 : vsetcc_type; +def vsetle_v2i64 : vsetcc_type; +def vsetlt_v16i8 : vsetcc_type; +def vsetlt_v8i16 : vsetcc_type; +def vsetlt_v4i32 : vsetcc_type; +def vsetlt_v2i64 : vsetcc_type; +def vsetule_v16i8 : vsetcc_type; +def vsetule_v8i16 : vsetcc_type; +def vsetule_v4i32 : vsetcc_type; +def vsetule_v2i64 : vsetcc_type; +def vsetult_v16i8 : vsetcc_type; +def vsetult_v8i16 : vsetcc_type; +def vsetult_v4i32 : vsetcc_type; +def vsetult_v2i64 : vsetcc_type; + def vsplati8 : PatFrag<(ops node:$in), (v16i8 (MipsVSplat (i32 node:$in)))>; def vsplati16 : PatFrag<(ops node:$in), (v8i16 (MipsVSplat (i32 node:$in)))>; def vsplati32 : PatFrag<(ops node:$in), (v4i32 (MipsVSplat (i32 node:$in)))>; @@ -909,12 +982,14 @@ class MSA_I5_X_DESC_BASE { dag OutOperandList = (outs RCWD:$wd); dag InOperandList = (ins RCWS:$ws, simm5:$s5); string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $s5"); - list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, immSExt5:$s5))]; + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, + (SplatNode immSExt5:$s5)))]; InstrItinClass Itinerary = itin; } @@ -1228,19 +1303,23 @@ class BZ_D_DESC : MSA_CBRANCH_DESC_BASE<"bz.d", MSA128D>; class BZ_V_DESC : MSA_CBRANCH_DESC_BASE<"bz.v", MSA128B>; -class CEQ_B_DESC : MSA_3R_DESC_BASE<"ceq.b", int_mips_ceq_b, MSA128B>, +class CEQ_B_DESC : MSA_3R_DESC_BASE<"ceq.b", vseteq_v16i8, MSA128B>, IsCommutable; -class CEQ_H_DESC : MSA_3R_DESC_BASE<"ceq.h", int_mips_ceq_h, MSA128H>, +class CEQ_H_DESC : MSA_3R_DESC_BASE<"ceq.h", vseteq_v8i16, MSA128H>, IsCommutable; -class CEQ_W_DESC : MSA_3R_DESC_BASE<"ceq.w", int_mips_ceq_w, MSA128W>, +class CEQ_W_DESC : MSA_3R_DESC_BASE<"ceq.w", vseteq_v4i32, MSA128W>, IsCommutable; -class CEQ_D_DESC : MSA_3R_DESC_BASE<"ceq.d", int_mips_ceq_d, MSA128D>, +class CEQ_D_DESC : MSA_3R_DESC_BASE<"ceq.d", vseteq_v2i64, MSA128D>, IsCommutable; -class CEQI_B_DESC : MSA_SI5_DESC_BASE<"ceqi.b", int_mips_ceqi_b, MSA128B>; -class CEQI_H_DESC : MSA_SI5_DESC_BASE<"ceqi.h", int_mips_ceqi_h, MSA128H>; -class CEQI_W_DESC : MSA_SI5_DESC_BASE<"ceqi.w", int_mips_ceqi_w, MSA128W>; -class CEQI_D_DESC : MSA_SI5_DESC_BASE<"ceqi.d", int_mips_ceqi_d, MSA128D>; +class CEQI_B_DESC : MSA_SI5_DESC_BASE<"ceqi.b", vseteq_v16i8, vsplati8, + MSA128B>; +class CEQI_H_DESC : MSA_SI5_DESC_BASE<"ceqi.h", vseteq_v8i16, vsplati16, + MSA128H>; +class CEQI_W_DESC : MSA_SI5_DESC_BASE<"ceqi.w", vseteq_v4i32, vsplati32, + MSA128W>; +class CEQI_D_DESC : MSA_SI5_DESC_BASE<"ceqi.d", vseteq_v2i64, vsplati64, + MSA128D>; class CFCMSA_DESC { dag OutOperandList = (outs GPR32:$rd); @@ -1250,61 +1329,61 @@ class CFCMSA_DESC { bit hasSideEffects = 1; } -class CLE_S_B_DESC : MSA_3R_DESC_BASE<"cle_s.b", int_mips_cle_s_b, MSA128B>; -class CLE_S_H_DESC : MSA_3R_DESC_BASE<"cle_s.h", int_mips_cle_s_h, MSA128H>; -class CLE_S_W_DESC : MSA_3R_DESC_BASE<"cle_s.w", int_mips_cle_s_w, MSA128W>; -class CLE_S_D_DESC : MSA_3R_DESC_BASE<"cle_s.d", int_mips_cle_s_d, MSA128D>; +class CLE_S_B_DESC : MSA_3R_DESC_BASE<"cle_s.b", vsetle_v16i8, MSA128B>; +class CLE_S_H_DESC : MSA_3R_DESC_BASE<"cle_s.h", vsetle_v8i16, MSA128H>; +class CLE_S_W_DESC : MSA_3R_DESC_BASE<"cle_s.w", vsetle_v4i32, MSA128W>; +class CLE_S_D_DESC : MSA_3R_DESC_BASE<"cle_s.d", vsetle_v2i64, MSA128D>; -class CLE_U_B_DESC : MSA_3R_DESC_BASE<"cle_u.b", int_mips_cle_u_b, MSA128B>; -class CLE_U_H_DESC : MSA_3R_DESC_BASE<"cle_u.h", int_mips_cle_u_h, MSA128H>; -class CLE_U_W_DESC : MSA_3R_DESC_BASE<"cle_u.w", int_mips_cle_u_w, MSA128W>; -class CLE_U_D_DESC : MSA_3R_DESC_BASE<"cle_u.d", int_mips_cle_u_d, MSA128D>; +class CLE_U_B_DESC : MSA_3R_DESC_BASE<"cle_u.b", vsetule_v16i8, MSA128B>; +class CLE_U_H_DESC : MSA_3R_DESC_BASE<"cle_u.h", vsetule_v8i16, MSA128H>; +class CLE_U_W_DESC : MSA_3R_DESC_BASE<"cle_u.w", vsetule_v4i32, MSA128W>; +class CLE_U_D_DESC : MSA_3R_DESC_BASE<"cle_u.d", vsetule_v2i64, MSA128D>; -class CLEI_S_B_DESC : MSA_SI5_DESC_BASE<"clei_s.b", int_mips_clei_s_b, +class CLEI_S_B_DESC : MSA_SI5_DESC_BASE<"clei_s.b", vsetle_v16i8, vsplati8, MSA128B>; -class CLEI_S_H_DESC : MSA_SI5_DESC_BASE<"clei_s.h", int_mips_clei_s_h, +class CLEI_S_H_DESC : MSA_SI5_DESC_BASE<"clei_s.h", vsetle_v8i16, vsplati16, MSA128H>; -class CLEI_S_W_DESC : MSA_SI5_DESC_BASE<"clei_s.w", int_mips_clei_s_w, +class CLEI_S_W_DESC : MSA_SI5_DESC_BASE<"clei_s.w", vsetle_v4i32, vsplati32, MSA128W>; -class CLEI_S_D_DESC : MSA_SI5_DESC_BASE<"clei_s.d", int_mips_clei_s_d, +class CLEI_S_D_DESC : MSA_SI5_DESC_BASE<"clei_s.d", vsetle_v2i64, vsplati64, MSA128D>; -class CLEI_U_B_DESC : MSA_SI5_DESC_BASE<"clei_u.b", int_mips_clei_u_b, +class CLEI_U_B_DESC : MSA_I5_DESC_BASE<"clei_u.b", vsetule_v16i8, vsplati8, + MSA128B>; +class CLEI_U_H_DESC : MSA_I5_DESC_BASE<"clei_u.h", vsetule_v8i16, vsplati16, + MSA128H>; +class CLEI_U_W_DESC : MSA_I5_DESC_BASE<"clei_u.w", vsetule_v4i32, vsplati32, + MSA128W>; +class CLEI_U_D_DESC : MSA_I5_DESC_BASE<"clei_u.d", vsetule_v2i64, vsplati64, + MSA128D>; + +class CLT_S_B_DESC : MSA_3R_DESC_BASE<"clt_s.b", vsetlt_v16i8, MSA128B>; +class CLT_S_H_DESC : MSA_3R_DESC_BASE<"clt_s.h", vsetlt_v8i16, MSA128H>; +class CLT_S_W_DESC : MSA_3R_DESC_BASE<"clt_s.w", vsetlt_v4i32, MSA128W>; +class CLT_S_D_DESC : MSA_3R_DESC_BASE<"clt_s.d", vsetlt_v2i64, MSA128D>; + +class CLT_U_B_DESC : MSA_3R_DESC_BASE<"clt_u.b", vsetult_v16i8, MSA128B>; +class CLT_U_H_DESC : MSA_3R_DESC_BASE<"clt_u.h", vsetult_v8i16, MSA128H>; +class CLT_U_W_DESC : MSA_3R_DESC_BASE<"clt_u.w", vsetult_v4i32, MSA128W>; +class CLT_U_D_DESC : MSA_3R_DESC_BASE<"clt_u.d", vsetult_v2i64, MSA128D>; + +class CLTI_S_B_DESC : MSA_SI5_DESC_BASE<"clti_s.b", vsetlt_v16i8, vsplati8, MSA128B>; -class CLEI_U_H_DESC : MSA_SI5_DESC_BASE<"clei_u.h", int_mips_clei_u_h, +class CLTI_S_H_DESC : MSA_SI5_DESC_BASE<"clti_s.h", vsetlt_v8i16, vsplati16, MSA128H>; -class CLEI_U_W_DESC : MSA_SI5_DESC_BASE<"clei_u.w", int_mips_clei_u_w, +class CLTI_S_W_DESC : MSA_SI5_DESC_BASE<"clti_s.w", vsetlt_v4i32, vsplati32, MSA128W>; -class CLEI_U_D_DESC : MSA_SI5_DESC_BASE<"clei_u.d", int_mips_clei_u_d, +class CLTI_S_D_DESC : MSA_SI5_DESC_BASE<"clti_s.d", vsetlt_v2i64, vsplati64, MSA128D>; -class CLT_S_B_DESC : MSA_3R_DESC_BASE<"clt_s.b", int_mips_clt_s_b, MSA128B>; -class CLT_S_H_DESC : MSA_3R_DESC_BASE<"clt_s.h", int_mips_clt_s_h, MSA128H>; -class CLT_S_W_DESC : MSA_3R_DESC_BASE<"clt_s.w", int_mips_clt_s_w, MSA128W>; -class CLT_S_D_DESC : MSA_3R_DESC_BASE<"clt_s.d", int_mips_clt_s_d, MSA128D>; - -class CLT_U_B_DESC : MSA_3R_DESC_BASE<"clt_u.b", int_mips_clt_u_b, MSA128B>; -class CLT_U_H_DESC : MSA_3R_DESC_BASE<"clt_u.h", int_mips_clt_u_h, MSA128H>; -class CLT_U_W_DESC : MSA_3R_DESC_BASE<"clt_u.w", int_mips_clt_u_w, MSA128W>; -class CLT_U_D_DESC : MSA_3R_DESC_BASE<"clt_u.d", int_mips_clt_u_d, MSA128D>; - -class CLTI_S_B_DESC : MSA_SI5_DESC_BASE<"clti_s.b", int_mips_clti_s_b, - MSA128B>; -class CLTI_S_H_DESC : MSA_SI5_DESC_BASE<"clti_s.h", int_mips_clti_s_h, - MSA128H>; -class CLTI_S_W_DESC : MSA_SI5_DESC_BASE<"clti_s.w", int_mips_clti_s_w, - MSA128W>; -class CLTI_S_D_DESC : MSA_SI5_DESC_BASE<"clti_s.d", int_mips_clti_s_d, - MSA128D>; - -class CLTI_U_B_DESC : MSA_SI5_DESC_BASE<"clti_u.b", int_mips_clti_u_b, - MSA128B>; -class CLTI_U_H_DESC : MSA_SI5_DESC_BASE<"clti_u.h", int_mips_clti_u_h, - MSA128H>; -class CLTI_U_W_DESC : MSA_SI5_DESC_BASE<"clti_u.w", int_mips_clti_u_w, - MSA128W>; -class CLTI_U_D_DESC : MSA_SI5_DESC_BASE<"clti_u.d", int_mips_clti_u_d, - MSA128D>; +class CLTI_U_B_DESC : MSA_I5_DESC_BASE<"clti_u.b", vsetult_v16i8, vsplati8, + MSA128B>; +class CLTI_U_H_DESC : MSA_I5_DESC_BASE<"clti_u.h", vsetult_v8i16, vsplati16, + MSA128H>; +class CLTI_U_W_DESC : MSA_I5_DESC_BASE<"clti_u.w", vsetult_v4i32, vsplati32, + MSA128W>; +class CLTI_U_D_DESC : MSA_I5_DESC_BASE<"clti_u.d", vsetult_v2i64, vsplati64, + MSA128D>; class COPY_S_B_DESC : MSA_COPY_DESC_BASE<"copy_s.b", vextract_sext_i8, v16i8, GPR32, MSA128B>; @@ -1394,9 +1473,9 @@ class FCAF_W_DESC : MSA_3RF_DESC_BASE<"fcaf.w", int_mips_fcaf_w, MSA128W>, class FCAF_D_DESC : MSA_3RF_DESC_BASE<"fcaf.d", int_mips_fcaf_d, MSA128D>, IsCommutable; -class FCEQ_W_DESC : MSA_3RF_DESC_BASE<"fceq.w", int_mips_fceq_w, MSA128W>, +class FCEQ_W_DESC : MSA_3RF_DESC_BASE<"fceq.w", vfsetoeq_v4f32, MSA128W>, IsCommutable; -class FCEQ_D_DESC : MSA_3RF_DESC_BASE<"fceq.d", int_mips_fceq_d, MSA128D>, +class FCEQ_D_DESC : MSA_3RF_DESC_BASE<"fceq.d", vfsetoeq_v2f64, MSA128D>, IsCommutable; class FCLASS_W_DESC : MSA_2RF_DESC_BASE<"fclass.w", int_mips_fclass_w, @@ -1404,45 +1483,45 @@ class FCLASS_W_DESC : MSA_2RF_DESC_BASE<"fclass.w", int_mips_fclass_w, class FCLASS_D_DESC : MSA_2RF_DESC_BASE<"fclass.d", int_mips_fclass_d, MSA128D>; -class FCLE_W_DESC : MSA_3RF_DESC_BASE<"fcle.w", int_mips_fcle_w, MSA128W>; -class FCLE_D_DESC : MSA_3RF_DESC_BASE<"fcle.d", int_mips_fcle_d, MSA128D>; +class FCLE_W_DESC : MSA_3RF_DESC_BASE<"fcle.w", vfsetole_v4f32, MSA128W>; +class FCLE_D_DESC : MSA_3RF_DESC_BASE<"fcle.d", vfsetole_v2f64, MSA128D>; -class FCLT_W_DESC : MSA_3RF_DESC_BASE<"fclt.w", int_mips_fclt_w, MSA128W>; -class FCLT_D_DESC : MSA_3RF_DESC_BASE<"fclt.d", int_mips_fclt_d, MSA128D>; +class FCLT_W_DESC : MSA_3RF_DESC_BASE<"fclt.w", vfsetolt_v4f32, MSA128W>; +class FCLT_D_DESC : MSA_3RF_DESC_BASE<"fclt.d", vfsetolt_v2f64, MSA128D>; -class FCNE_W_DESC : MSA_3RF_DESC_BASE<"fcne.w", int_mips_fcne_w, MSA128W>, +class FCNE_W_DESC : MSA_3RF_DESC_BASE<"fcne.w", vfsetone_v4f32, MSA128W>, IsCommutable; -class FCNE_D_DESC : MSA_3RF_DESC_BASE<"fcne.d", int_mips_fcne_d, MSA128D>, +class FCNE_D_DESC : MSA_3RF_DESC_BASE<"fcne.d", vfsetone_v2f64, MSA128D>, IsCommutable; -class FCOR_W_DESC : MSA_3RF_DESC_BASE<"fcor.w", int_mips_fcor_w, MSA128W>, +class FCOR_W_DESC : MSA_3RF_DESC_BASE<"fcor.w", vfsetord_v4f32, MSA128W>, IsCommutable; -class FCOR_D_DESC : MSA_3RF_DESC_BASE<"fcor.d", int_mips_fcor_d, MSA128D>, +class FCOR_D_DESC : MSA_3RF_DESC_BASE<"fcor.d", vfsetord_v2f64, MSA128D>, IsCommutable; -class FCUEQ_W_DESC : MSA_3RF_DESC_BASE<"fcueq.w", int_mips_fcueq_w, MSA128W>, +class FCUEQ_W_DESC : MSA_3RF_DESC_BASE<"fcueq.w", vfsetueq_v4f32, MSA128W>, IsCommutable; -class FCUEQ_D_DESC : MSA_3RF_DESC_BASE<"fcueq.d", int_mips_fcueq_d, MSA128D>, +class FCUEQ_D_DESC : MSA_3RF_DESC_BASE<"fcueq.d", vfsetueq_v2f64, MSA128D>, IsCommutable; -class FCULE_W_DESC : MSA_3RF_DESC_BASE<"fcule.w", int_mips_fcule_w, MSA128W>, +class FCULE_W_DESC : MSA_3RF_DESC_BASE<"fcule.w", vfsetule_v4f32, MSA128W>, IsCommutable; -class FCULE_D_DESC : MSA_3RF_DESC_BASE<"fcule.d", int_mips_fcule_d, MSA128D>, +class FCULE_D_DESC : MSA_3RF_DESC_BASE<"fcule.d", vfsetule_v2f64, MSA128D>, IsCommutable; -class FCULT_W_DESC : MSA_3RF_DESC_BASE<"fcult.w", int_mips_fcult_w, MSA128W>, +class FCULT_W_DESC : MSA_3RF_DESC_BASE<"fcult.w", vfsetult_v4f32, MSA128W>, IsCommutable; -class FCULT_D_DESC : MSA_3RF_DESC_BASE<"fcult.d", int_mips_fcult_d, MSA128D>, +class FCULT_D_DESC : MSA_3RF_DESC_BASE<"fcult.d", vfsetult_v2f64, MSA128D>, IsCommutable; -class FCUN_W_DESC : MSA_3RF_DESC_BASE<"fcun.w", int_mips_fcun_w, MSA128W>, +class FCUN_W_DESC : MSA_3RF_DESC_BASE<"fcun.w", vfsetun_v4f32, MSA128W>, IsCommutable; -class FCUN_D_DESC : MSA_3RF_DESC_BASE<"fcun.d", int_mips_fcun_d, MSA128D>, +class FCUN_D_DESC : MSA_3RF_DESC_BASE<"fcun.d", vfsetun_v2f64, MSA128D>, IsCommutable; -class FCUNE_W_DESC : MSA_3RF_DESC_BASE<"fcune.w", int_mips_fcune_w, MSA128W>, +class FCUNE_W_DESC : MSA_3RF_DESC_BASE<"fcune.w", vfsetune_v4f32, MSA128W>, IsCommutable; -class FCUNE_D_DESC : MSA_3RF_DESC_BASE<"fcune.d", int_mips_fcune_d, MSA128D>, +class FCUNE_D_DESC : MSA_3RF_DESC_BASE<"fcune.d", vfsetune_v2f64, MSA128D>, IsCommutable; class FDIV_W_DESC : MSA_3RF_DESC_BASE<"fdiv.w", fdiv, MSA128W>; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 9bd3be2..260847c 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -180,6 +180,13 @@ addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { setOperationAction(ISD::SUB, Ty, Legal); setOperationAction(ISD::UDIV, Ty, Legal); setOperationAction(ISD::XOR, Ty, Legal); + + setOperationAction(ISD::SETCC, Ty, Legal); + setCondCodeAction(ISD::SETNE, Ty, Expand); + setCondCodeAction(ISD::SETGE, Ty, Expand); + setCondCodeAction(ISD::SETGT, Ty, Expand); + setCondCodeAction(ISD::SETUGE, Ty, Expand); + setCondCodeAction(ISD::SETUGT, Ty, Expand); } // Enable MSA support for the given floating-point type and Register class. @@ -204,6 +211,14 @@ addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { setOperationAction(ISD::FRINT, Ty, Legal); setOperationAction(ISD::FSQRT, Ty, Legal); setOperationAction(ISD::FSUB, Ty, Legal); + + setOperationAction(ISD::SETCC, Ty, Legal); + setCondCodeAction(ISD::SETOGE, Ty, Expand); + setCondCodeAction(ISD::SETOGT, Ty, Expand); + setCondCodeAction(ISD::SETUGE, Ty, Expand); + setCondCodeAction(ISD::SETUGT, Ty, Expand); + setCondCodeAction(ISD::SETGE, Ty, Expand); + setCondCodeAction(ISD::SETGT, Ty, Expand); } } @@ -1109,6 +1124,66 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, return lowerMSABranchIntr(Op, DAG, MipsISD::VALL_ZERO); case Intrinsic::mips_bz_v: return lowerMSABranchIntr(Op, DAG, MipsISD::VANY_ZERO); + case Intrinsic::mips_ceq_b: + case Intrinsic::mips_ceq_h: + case Intrinsic::mips_ceq_w: + case Intrinsic::mips_ceq_d: + return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2), ISD::SETEQ); + case Intrinsic::mips_ceqi_b: + case Intrinsic::mips_ceqi_h: + case Intrinsic::mips_ceqi_w: + case Intrinsic::mips_ceqi_d: + return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + lowerMSASplatImm(Op, 2, DAG), ISD::SETEQ); + case Intrinsic::mips_cle_s_b: + case Intrinsic::mips_cle_s_h: + case Intrinsic::mips_cle_s_w: + case Intrinsic::mips_cle_s_d: + return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2), ISD::SETLE); + case Intrinsic::mips_clei_s_b: + case Intrinsic::mips_clei_s_h: + case Intrinsic::mips_clei_s_w: + case Intrinsic::mips_clei_s_d: + return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + lowerMSASplatImm(Op, 2, DAG), ISD::SETLE); + case Intrinsic::mips_cle_u_b: + case Intrinsic::mips_cle_u_h: + case Intrinsic::mips_cle_u_w: + case Intrinsic::mips_cle_u_d: + return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2), ISD::SETULE); + case Intrinsic::mips_clei_u_b: + case Intrinsic::mips_clei_u_h: + case Intrinsic::mips_clei_u_w: + case Intrinsic::mips_clei_u_d: + return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + lowerMSASplatImm(Op, 2, DAG), ISD::SETULE); + case Intrinsic::mips_clt_s_b: + case Intrinsic::mips_clt_s_h: + case Intrinsic::mips_clt_s_w: + case Intrinsic::mips_clt_s_d: + return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2), ISD::SETLT); + case Intrinsic::mips_clti_s_b: + case Intrinsic::mips_clti_s_h: + case Intrinsic::mips_clti_s_w: + case Intrinsic::mips_clti_s_d: + return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + lowerMSASplatImm(Op, 2, DAG), ISD::SETLT); + case Intrinsic::mips_clt_u_b: + case Intrinsic::mips_clt_u_h: + case Intrinsic::mips_clt_u_w: + case Intrinsic::mips_clt_u_d: + return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2), ISD::SETULT); + case Intrinsic::mips_clti_u_b: + case Intrinsic::mips_clti_u_h: + case Intrinsic::mips_clti_u_w: + case Intrinsic::mips_clti_u_d: + return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + lowerMSASplatImm(Op, 2, DAG), ISD::SETULT); case Intrinsic::mips_copy_s_b: case Intrinsic::mips_copy_s_h: case Intrinsic::mips_copy_s_w: @@ -1130,6 +1205,47 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_fadd_w: case Intrinsic::mips_fadd_d: return lowerMSABinaryIntr(Op, DAG, ISD::FADD); + // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away + case Intrinsic::mips_fceq_w: + case Intrinsic::mips_fceq_d: + return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2), ISD::SETOEQ); + case Intrinsic::mips_fcle_w: + case Intrinsic::mips_fcle_d: + return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2), ISD::SETOLE); + case Intrinsic::mips_fclt_w: + case Intrinsic::mips_fclt_d: + return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2), ISD::SETOLT); + case Intrinsic::mips_fcne_w: + case Intrinsic::mips_fcne_d: + return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2), ISD::SETONE); + case Intrinsic::mips_fcor_w: + case Intrinsic::mips_fcor_d: + return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2), ISD::SETO); + case Intrinsic::mips_fcueq_w: + case Intrinsic::mips_fcueq_d: + return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2), ISD::SETUEQ); + case Intrinsic::mips_fcule_w: + case Intrinsic::mips_fcule_d: + return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2), ISD::SETULE); + case Intrinsic::mips_fcult_w: + case Intrinsic::mips_fcult_d: + return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2), ISD::SETULT); + case Intrinsic::mips_fcun_w: + case Intrinsic::mips_fcun_d: + return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2), ISD::SETUO); + case Intrinsic::mips_fcune_w: + case Intrinsic::mips_fcune_d: + return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2), ISD::SETUNE); case Intrinsic::mips_fdiv_w: case Intrinsic::mips_fdiv_d: return lowerMSABinaryIntr(Op, DAG, ISD::FDIV); -- cgit v1.1 From 38a10ff063971c2f7f7384cceba3253bca32e27a Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Tue, 24 Sep 2013 12:04:44 +0000 Subject: [mips][msa] Added support for matching bsel and bseli from normal IR (i.e. not intrinsics) This required correcting the definition of the bsel and bseli intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191290 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 35 ++++++++++++++++++++++++++++++++-- lib/Target/Mips/MipsSEISelLowering.cpp | 11 +++++++++++ 2 files changed, 44 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index fb88a3b..48bf8a2 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -1282,9 +1282,26 @@ class BNZ_D_DESC : MSA_CBRANCH_DESC_BASE<"bnz.d", MSA128D>; class BNZ_V_DESC : MSA_CBRANCH_DESC_BASE<"bnz.v", MSA128B>; -class BSEL_V_DESC : MSA_VEC_DESC_BASE<"bsel.v", int_mips_bsel_v, MSA128B>; +class BSEL_V_DESC { + dag OutOperandList = (outs MSA128B:$wd); + dag InOperandList = (ins MSA128B:$wd_in, MSA128B:$ws, MSA128B:$wt); + string AsmString = "bsel.v\t$wd, $ws, $wt"; + list Pattern = [(set MSA128B:$wd, (vselect MSA128B:$wd_in, MSA128B:$ws, + MSA128B:$wt))]; + InstrItinClass Itinerary = NoItinerary; + string Constraints = "$wd = $wd_in"; +} -class BSELI_B_DESC : MSA_I8_DESC_BASE<"bseli.b", int_mips_bseli_b, MSA128B>; +class BSELI_B_DESC { + dag OutOperandList = (outs MSA128B:$wd); + dag InOperandList = (ins MSA128B:$wd_in, MSA128B:$ws, uimm8:$u8); + string AsmString = "bseli.b\t$wd, $ws, $u8"; + list Pattern = [(set MSA128B:$wd, (vselect MSA128B:$wd_in, + MSA128B:$ws, + (vsplati8 immZExt8:$u8)))]; + InstrItinClass Itinerary = NoItinerary; + string Constraints = "$wd = $wd_in"; +} class BSET_B_DESC : MSA_3R_DESC_BASE<"bset.b", int_mips_bset_b, MSA128B>; class BSET_H_DESC : MSA_3R_DESC_BASE<"bset.h", int_mips_bset_h, MSA128H>; @@ -2244,6 +2261,20 @@ def BNZ_V : BNZ_V_ENC, BNZ_V_DESC; def BSEL_V : BSEL_V_ENC, BSEL_V_DESC; +class MSA_BSEL_PSEUDO_BASE : + MipsPseudo<(outs RC:$wd), (ins RC:$wd_in, RC:$ws, RC:$wt), + [(set RC:$wd, (Ty (vselect RC:$wd_in, RC:$ws, RC:$wt)))]>, + PseudoInstExpansion<(BSEL_V MSA128B:$wd, MSA128B:$wd_in, MSA128B:$ws, + MSA128B:$wt)> { + let Constraints = "$wd_in = $wd"; +} + +def BSEL_H_PSEUDO : MSA_BSEL_PSEUDO_BASE; +def BSEL_W_PSEUDO : MSA_BSEL_PSEUDO_BASE; +def BSEL_D_PSEUDO : MSA_BSEL_PSEUDO_BASE; +def BSEL_FW_PSEUDO : MSA_BSEL_PSEUDO_BASE; +def BSEL_FD_PSEUDO : MSA_BSEL_PSEUDO_BASE; + def BSELI_B : BSELI_B_ENC, BSELI_B_DESC; def BSET_B : BSET_B_ENC, BSET_B_DESC; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 260847c..ffc5777 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -93,6 +93,7 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::SRA); + setTargetDAGCombine(ISD::VSELECT); setTargetDAGCombine(ISD::XOR); } @@ -179,6 +180,7 @@ addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { setOperationAction(ISD::SRL, Ty, Legal); setOperationAction(ISD::SUB, Ty, Legal); setOperationAction(ISD::UDIV, Ty, Legal); + setOperationAction(ISD::VSELECT, Ty, Legal); setOperationAction(ISD::XOR, Ty, Legal); setOperationAction(ISD::SETCC, Ty, Legal); @@ -211,6 +213,7 @@ addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { setOperationAction(ISD::FRINT, Ty, Legal); setOperationAction(ISD::FSQRT, Ty, Legal); setOperationAction(ISD::FSUB, Ty, Legal); + setOperationAction(ISD::VSELECT, Ty, Legal); setOperationAction(ISD::SETCC, Ty, Legal); setCondCodeAction(ISD::SETOGE, Ty, Expand); @@ -1117,6 +1120,14 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, return lowerMSABranchIntr(Op, DAG, MipsISD::VALL_NONZERO); case Intrinsic::mips_bnz_v: return lowerMSABranchIntr(Op, DAG, MipsISD::VANY_NONZERO); + case Intrinsic::mips_bsel_v: + return DAG.getNode(ISD::VSELECT, SDLoc(Op), Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2), + Op->getOperand(3)); + case Intrinsic::mips_bseli_b: + return DAG.getNode(ISD::VSELECT, SDLoc(Op), Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2), + lowerMSASplatImm(Op, 3, DAG)); case Intrinsic::mips_bz_b: case Intrinsic::mips_bz_h: case Intrinsic::mips_bz_w: -- cgit v1.1 From 89d13c1b380218d381be035eb5e4d83dcbc391cc Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Tue, 24 Sep 2013 12:18:31 +0000 Subject: [mips][msa] Added support for matching max, maxi, min, mini from normal IR (i.e. not intrinsics) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191291 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 4 ++ lib/Target/Mips/MipsISelLowering.h | 6 ++ lib/Target/Mips/MipsMSAInstrInfo.td | 115 ++++++++++++++++----------------- lib/Target/Mips/MipsSEISelLowering.cpp | 100 +++++++++++++++++++++++++--- 4 files changed, 158 insertions(+), 67 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 97700bc..2a841b9 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -217,6 +217,10 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::VCLE_U: return "MipsISD::VCLE_U"; case MipsISD::VCLT_S: return "MipsISD::VCLT_S"; case MipsISD::VCLT_U: return "MipsISD::VCLT_U"; + case MipsISD::VSMAX: return "MipsISD::VSMAX"; + case MipsISD::VSMIN: return "MipsISD::VSMIN"; + case MipsISD::VUMAX: return "MipsISD::VUMAX"; + case MipsISD::VUMIN: return "MipsISD::VUMIN"; case MipsISD::VSPLAT: return "MipsISD::VSPLAT"; case MipsISD::VSPLATD: return "MipsISD::VSPLATD"; case MipsISD::VEXTRACT_SEXT_ELT: return "MipsISD::VEXTRACT_SEXT_ELT"; diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 730e97f..0d588c1 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -166,6 +166,12 @@ namespace llvm { VCLT_S, VCLT_U, + // Element-wise vector max/min. + VSMAX, + VSMIN, + VUMAX, + VUMIN, + // Special case of BUILD_VECTOR where all elements are the same. VSPLAT, // Special case of VSPLAT where the result is v2i64, the operand is diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 48bf8a2..1909dc7 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -26,6 +26,14 @@ def MipsVAllNonZero : SDNode<"MipsISD::VALL_NONZERO", SDT_MipsVecCond>; def MipsVAnyNonZero : SDNode<"MipsISD::VANY_NONZERO", SDT_MipsVecCond>; def MipsVAllZero : SDNode<"MipsISD::VALL_ZERO", SDT_MipsVecCond>; def MipsVAnyZero : SDNode<"MipsISD::VANY_ZERO", SDT_MipsVecCond>; +def MipsVSMax : SDNode<"MipsISD::VSMAX", SDTIntBinOp, + [SDNPCommutative, SDNPAssociative]>; +def MipsVSMin : SDNode<"MipsISD::VSMIN", SDTIntBinOp, + [SDNPCommutative, SDNPAssociative]>; +def MipsVUMax : SDNode<"MipsISD::VUMAX", SDTIntBinOp, + [SDNPCommutative, SDNPAssociative]>; +def MipsVUMin : SDNode<"MipsISD::VUMIN", SDTIntBinOp, + [SDNPCommutative, SDNPAssociative]>; def MipsVSplat : SDNode<"MipsISD::VSPLAT", SDT_MipsSplat>; def MipsVSplatD : SDNode<"MipsISD::VSPLATD", SDT_MipsSplat>; def MipsVNOR : SDNode<"MipsISD::VNOR", SDTIntBinOp, @@ -970,17 +978,6 @@ class MSA_I5_DESC_BASE { - dag OutOperandList = (outs RCWD:$wd); - dag InOperandList = (ins RCWS:$ws, uimm5:$u5); - string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u5"); - list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, immZExt5:$u5))]; - InstrItinClass Itinerary = itin; -} - class MSA_SI5_DESC_BASE; class MAX_A_W_DESC : MSA_3R_DESC_BASE<"max_a.w", int_mips_max_a_w, MSA128W>; class MAX_A_D_DESC : MSA_3R_DESC_BASE<"max_a.d", int_mips_max_a_d, MSA128D>; -class MAX_S_B_DESC : MSA_3R_DESC_BASE<"max_s.b", int_mips_max_s_b, MSA128B>; -class MAX_S_H_DESC : MSA_3R_DESC_BASE<"max_s.h", int_mips_max_s_h, MSA128H>; -class MAX_S_W_DESC : MSA_3R_DESC_BASE<"max_s.w", int_mips_max_s_w, MSA128W>; -class MAX_S_D_DESC : MSA_3R_DESC_BASE<"max_s.d", int_mips_max_s_d, MSA128D>; +class MAX_S_B_DESC : MSA_3R_DESC_BASE<"max_s.b", MipsVSMax, MSA128B>; +class MAX_S_H_DESC : MSA_3R_DESC_BASE<"max_s.h", MipsVSMax, MSA128H>; +class MAX_S_W_DESC : MSA_3R_DESC_BASE<"max_s.w", MipsVSMax, MSA128W>; +class MAX_S_D_DESC : MSA_3R_DESC_BASE<"max_s.d", MipsVSMax, MSA128D>; -class MAX_U_B_DESC : MSA_3R_DESC_BASE<"max_u.b", int_mips_max_u_b, MSA128B>; -class MAX_U_H_DESC : MSA_3R_DESC_BASE<"max_u.h", int_mips_max_u_h, MSA128H>; -class MAX_U_W_DESC : MSA_3R_DESC_BASE<"max_u.w", int_mips_max_u_w, MSA128W>; -class MAX_U_D_DESC : MSA_3R_DESC_BASE<"max_u.d", int_mips_max_u_d, MSA128D>; +class MAX_U_B_DESC : MSA_3R_DESC_BASE<"max_u.b", MipsVUMax, MSA128B>; +class MAX_U_H_DESC : MSA_3R_DESC_BASE<"max_u.h", MipsVUMax, MSA128H>; +class MAX_U_W_DESC : MSA_3R_DESC_BASE<"max_u.w", MipsVUMax, MSA128W>; +class MAX_U_D_DESC : MSA_3R_DESC_BASE<"max_u.d", MipsVUMax, MSA128D>; -class MAXI_S_B_DESC : MSA_I5_X_DESC_BASE<"maxi_s.b", int_mips_maxi_s_b, - MSA128B>; -class MAXI_S_H_DESC : MSA_I5_X_DESC_BASE<"maxi_s.h", int_mips_maxi_s_h, - MSA128H>; -class MAXI_S_W_DESC : MSA_I5_X_DESC_BASE<"maxi_s.w", int_mips_maxi_s_w, - MSA128W>; -class MAXI_S_D_DESC : MSA_I5_X_DESC_BASE<"maxi_s.d", int_mips_maxi_s_d, - MSA128D>; +class MAXI_S_B_DESC : MSA_I5_DESC_BASE<"maxi_s.b", MipsVSMax, vsplati8, + MSA128B>; +class MAXI_S_H_DESC : MSA_I5_DESC_BASE<"maxi_s.h", MipsVSMax, vsplati16, + MSA128H>; +class MAXI_S_W_DESC : MSA_I5_DESC_BASE<"maxi_s.w", MipsVSMax, vsplati32, + MSA128W>; +class MAXI_S_D_DESC : MSA_I5_DESC_BASE<"maxi_s.d", MipsVSMax, vsplati64, + MSA128D>; -class MAXI_U_B_DESC : MSA_I5_X_DESC_BASE<"maxi_u.b", int_mips_maxi_u_b, - MSA128B>; -class MAXI_U_H_DESC : MSA_I5_X_DESC_BASE<"maxi_u.h", int_mips_maxi_u_h, - MSA128H>; -class MAXI_U_W_DESC : MSA_I5_X_DESC_BASE<"maxi_u.w", int_mips_maxi_u_w, - MSA128W>; -class MAXI_U_D_DESC : MSA_I5_X_DESC_BASE<"maxi_u.d", int_mips_maxi_u_d, - MSA128D>; +class MAXI_U_B_DESC : MSA_I5_DESC_BASE<"maxi_u.b", MipsVUMax, vsplati8, + MSA128B>; +class MAXI_U_H_DESC : MSA_I5_DESC_BASE<"maxi_u.h", MipsVUMax, vsplati16, + MSA128H>; +class MAXI_U_W_DESC : MSA_I5_DESC_BASE<"maxi_u.w", MipsVUMax, vsplati32, + MSA128W>; +class MAXI_U_D_DESC : MSA_I5_DESC_BASE<"maxi_u.d", MipsVUMax, vsplati64, + MSA128D>; class MIN_A_B_DESC : MSA_3R_DESC_BASE<"min_a.b", int_mips_min_a_b, MSA128B>; class MIN_A_H_DESC : MSA_3R_DESC_BASE<"min_a.h", int_mips_min_a_h, MSA128H>; class MIN_A_W_DESC : MSA_3R_DESC_BASE<"min_a.w", int_mips_min_a_w, MSA128W>; class MIN_A_D_DESC : MSA_3R_DESC_BASE<"min_a.d", int_mips_min_a_d, MSA128D>; -class MIN_S_B_DESC : MSA_3R_DESC_BASE<"min_s.b", int_mips_min_s_b, MSA128B>; -class MIN_S_H_DESC : MSA_3R_DESC_BASE<"min_s.h", int_mips_min_s_h, MSA128H>; -class MIN_S_W_DESC : MSA_3R_DESC_BASE<"min_s.w", int_mips_min_s_w, MSA128W>; -class MIN_S_D_DESC : MSA_3R_DESC_BASE<"min_s.d", int_mips_min_s_d, MSA128D>; +class MIN_S_B_DESC : MSA_3R_DESC_BASE<"min_s.b", MipsVSMin, MSA128B>; +class MIN_S_H_DESC : MSA_3R_DESC_BASE<"min_s.h", MipsVSMin, MSA128H>; +class MIN_S_W_DESC : MSA_3R_DESC_BASE<"min_s.w", MipsVSMin, MSA128W>; +class MIN_S_D_DESC : MSA_3R_DESC_BASE<"min_s.d", MipsVSMin, MSA128D>; -class MIN_U_B_DESC : MSA_3R_DESC_BASE<"min_u.b", int_mips_min_u_b, MSA128B>; -class MIN_U_H_DESC : MSA_3R_DESC_BASE<"min_u.h", int_mips_min_u_h, MSA128H>; -class MIN_U_W_DESC : MSA_3R_DESC_BASE<"min_u.w", int_mips_min_u_w, MSA128W>; -class MIN_U_D_DESC : MSA_3R_DESC_BASE<"min_u.d", int_mips_min_u_d, MSA128D>; +class MIN_U_B_DESC : MSA_3R_DESC_BASE<"min_u.b", MipsVUMin, MSA128B>; +class MIN_U_H_DESC : MSA_3R_DESC_BASE<"min_u.h", MipsVUMin, MSA128H>; +class MIN_U_W_DESC : MSA_3R_DESC_BASE<"min_u.w", MipsVUMin, MSA128W>; +class MIN_U_D_DESC : MSA_3R_DESC_BASE<"min_u.d", MipsVUMin, MSA128D>; -class MINI_S_B_DESC : MSA_I5_X_DESC_BASE<"mini_s.b", int_mips_mini_s_b, - MSA128B>; -class MINI_S_H_DESC : MSA_I5_X_DESC_BASE<"mini_s.h", int_mips_mini_s_h, - MSA128H>; -class MINI_S_W_DESC : MSA_I5_X_DESC_BASE<"mini_s.w", int_mips_mini_s_w, - MSA128W>; -class MINI_S_D_DESC : MSA_I5_X_DESC_BASE<"mini_s.d", int_mips_mini_s_d, - MSA128D>; +class MINI_S_B_DESC : MSA_I5_DESC_BASE<"mini_s.b", MipsVSMin, vsplati8, + MSA128B>; +class MINI_S_H_DESC : MSA_I5_DESC_BASE<"mini_s.h", MipsVSMin, vsplati16, + MSA128H>; +class MINI_S_W_DESC : MSA_I5_DESC_BASE<"mini_s.w", MipsVSMin, vsplati32, + MSA128W>; +class MINI_S_D_DESC : MSA_I5_DESC_BASE<"mini_s.d", MipsVSMin, vsplati64, + MSA128D>; -class MINI_U_B_DESC : MSA_I5_X_DESC_BASE<"mini_u.b", int_mips_mini_u_b, - MSA128B>; -class MINI_U_H_DESC : MSA_I5_X_DESC_BASE<"mini_u.h", int_mips_mini_u_h, - MSA128H>; -class MINI_U_W_DESC : MSA_I5_X_DESC_BASE<"mini_u.w", int_mips_mini_u_w, - MSA128W>; -class MINI_U_D_DESC : MSA_I5_X_DESC_BASE<"mini_u.d", int_mips_mini_u_d, - MSA128D>; +class MINI_U_B_DESC : MSA_I5_DESC_BASE<"mini_u.b", MipsVUMin, vsplati8, + MSA128B>; +class MINI_U_H_DESC : MSA_I5_DESC_BASE<"mini_u.h", MipsVUMin, vsplati16, + MSA128H>; +class MINI_U_W_DESC : MSA_I5_DESC_BASE<"mini_u.w", MipsVUMin, vsplati32, + MSA128W>; +class MINI_U_D_DESC : MSA_I5_DESC_BASE<"mini_u.d", MipsVUMin, vsplati64, + MSA128D>; class MOD_S_B_DESC : MSA_3R_DESC_BASE<"mod_s.b", int_mips_mod_s_b, MSA128B>; class MOD_S_H_DESC : MSA_3R_DESC_BASE<"mod_s.h", int_mips_mod_s_h, MSA128H>; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index ffc5777..9687bb9 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -673,17 +673,57 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { EVT Ty = N->getValueType(0); - if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) - return SDValue(); + if (Ty.is128BitVector() && Ty.isInteger()) { + // Try the following combines: + // (vselect (setcc $a, $b, SETLT), $b, $a)) -> (vsmax $a, $b) + // (vselect (setcc $a, $b, SETLE), $b, $a)) -> (vsmax $a, $b) + // (vselect (setcc $a, $b, SETLT), $a, $b)) -> (vsmin $a, $b) + // (vselect (setcc $a, $b, SETLE), $a, $b)) -> (vsmin $a, $b) + // (vselect (setcc $a, $b, SETULT), $b, $a)) -> (vumax $a, $b) + // (vselect (setcc $a, $b, SETULE), $b, $a)) -> (vumax $a, $b) + // (vselect (setcc $a, $b, SETULT), $a, $b)) -> (vumin $a, $b) + // (vselect (setcc $a, $b, SETULE), $a, $b)) -> (vumin $a, $b) + // SETGT/SETGE/SETUGT/SETUGE variants of these will show up initially but + // will be expanded to equivalent SETLT/SETLE/SETULT/SETULE versions by the + // legalizer. + SDValue Op0 = N->getOperand(0); + + if (Op0->getOpcode() != ISD::SETCC) + return SDValue(); - SDValue SetCC = N->getOperand(0); + ISD::CondCode CondCode = cast(Op0->getOperand(2))->get(); + bool Signed; - if (SetCC.getOpcode() != MipsISD::SETCC_DSP) - return SDValue(); + if (CondCode == ISD::SETLT || CondCode == ISD::SETLE) + Signed = true; + else if (CondCode == ISD::SETULT || CondCode == ISD::SETULE) + Signed = false; + else + return SDValue(); - return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty, - SetCC.getOperand(0), SetCC.getOperand(1), N->getOperand(1), - N->getOperand(2), SetCC.getOperand(2)); + SDValue Op1 = N->getOperand(1); + SDValue Op2 = N->getOperand(2); + SDValue Op0Op0 = Op0->getOperand(0); + SDValue Op0Op1 = Op0->getOperand(1); + + if (Op1 == Op0Op0 && Op2 == Op0Op1) + return DAG.getNode(Signed ? MipsISD::VSMIN : MipsISD::VUMIN, SDLoc(N), + Ty, Op1, Op2); + else if (Op1 == Op0Op1 && Op2 == Op0Op0) + return DAG.getNode(Signed ? MipsISD::VSMAX : MipsISD::VUMAX, SDLoc(N), + Ty, Op1, Op2); + } else if ((Ty == MVT::v2i16) || (Ty == MVT::v4i8)) { + SDValue SetCC = N->getOperand(0); + + if (SetCC.getOpcode() != MipsISD::SETCC_DSP) + return SDValue(); + + return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty, + SetCC.getOperand(0), SetCC.getOperand(1), + N->getOperand(1), N->getOperand(2), SetCC.getOperand(2)); + } + + return SDValue(); } static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, @@ -1288,6 +1328,50 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_ldi_w: case Intrinsic::mips_ldi_d: return lowerMSAUnaryIntr(Op, DAG, MipsISD::VSPLAT); + case Intrinsic::mips_max_s_b: + case Intrinsic::mips_max_s_h: + case Intrinsic::mips_max_s_w: + case Intrinsic::mips_max_s_d: + return lowerMSABinaryIntr(Op, DAG, MipsISD::VSMAX); + case Intrinsic::mips_max_u_b: + case Intrinsic::mips_max_u_h: + case Intrinsic::mips_max_u_w: + case Intrinsic::mips_max_u_d: + return lowerMSABinaryIntr(Op, DAG, MipsISD::VUMAX); + case Intrinsic::mips_maxi_s_b: + case Intrinsic::mips_maxi_s_h: + case Intrinsic::mips_maxi_s_w: + case Intrinsic::mips_maxi_s_d: + return lowerMSABinaryImmIntr(Op, DAG, MipsISD::VSMAX, + lowerMSASplatImm(Op, 2, DAG)); + case Intrinsic::mips_maxi_u_b: + case Intrinsic::mips_maxi_u_h: + case Intrinsic::mips_maxi_u_w: + case Intrinsic::mips_maxi_u_d: + return lowerMSABinaryImmIntr(Op, DAG, MipsISD::VUMAX, + lowerMSASplatImm(Op, 2, DAG)); + case Intrinsic::mips_min_s_b: + case Intrinsic::mips_min_s_h: + case Intrinsic::mips_min_s_w: + case Intrinsic::mips_min_s_d: + return lowerMSABinaryIntr(Op, DAG, MipsISD::VSMIN); + case Intrinsic::mips_min_u_b: + case Intrinsic::mips_min_u_h: + case Intrinsic::mips_min_u_w: + case Intrinsic::mips_min_u_d: + return lowerMSABinaryIntr(Op, DAG, MipsISD::VUMIN); + case Intrinsic::mips_mini_s_b: + case Intrinsic::mips_mini_s_h: + case Intrinsic::mips_mini_s_w: + case Intrinsic::mips_mini_s_d: + return lowerMSABinaryImmIntr(Op, DAG, MipsISD::VSMIN, + lowerMSASplatImm(Op, 2, DAG)); + case Intrinsic::mips_mini_u_b: + case Intrinsic::mips_mini_u_h: + case Intrinsic::mips_mini_u_w: + case Intrinsic::mips_mini_u_d: + return lowerMSABinaryImmIntr(Op, DAG, MipsISD::VUMIN, + lowerMSASplatImm(Op, 2, DAG)); case Intrinsic::mips_mulv_b: case Intrinsic::mips_mulv_h: case Intrinsic::mips_mulv_w: -- cgit v1.1 From c998bc98439e21bc8c3838d6353475eacfb8494e Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Tue, 24 Sep 2013 12:32:47 +0000 Subject: [mips][msa] Added support for matching andi, ori, nori, and xori from normal IR (i.e. not intrinsics) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191293 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 33 +++++++++++++++++++++++---------- lib/Target/Mips/MipsSEISelLowering.cpp | 14 ++++++++++++++ 2 files changed, 37 insertions(+), 10 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 1909dc7..a4c9cb1 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -991,11 +991,24 @@ class MSA_SI5_DESC_BASE { dag OutOperandList = (outs RCWD:$wd); dag InOperandList = (ins RCWS:$ws, uimm8:$u8); string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u8"); + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, + (SplatNode immZExt8:$u8)))]; + InstrItinClass Itinerary = itin; +} + +// This class is deprecated and will be removed in the next few patches +class MSA_I8_X_DESC_BASE { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins RCWS:$ws, uimm8:$u8); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u8"); list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, immZExt8:$u8))]; InstrItinClass Itinerary = itin; } @@ -1168,7 +1181,7 @@ class AND_V_H_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class AND_V_W_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class AND_V_D_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; -class ANDI_B_DESC : MSA_I8_DESC_BASE<"andi.b", int_mips_andi_b, MSA128B>; +class ANDI_B_DESC : MSA_I8_DESC_BASE<"andi.b", and, vsplati8, MSA128B>; class ASUB_S_B_DESC : MSA_3R_DESC_BASE<"asub_s.b", int_mips_asub_s_b, MSA128B>; class ASUB_S_H_DESC : MSA_3R_DESC_BASE<"asub_s.h", int_mips_asub_s_h, MSA128H>; @@ -1256,11 +1269,11 @@ class BINSRI_D_DESC : MSA_BIT_D_DESC_BASE<"binsri.d", int_mips_binsri_d, class BMNZ_V_DESC : MSA_VEC_DESC_BASE<"bmnz.v", int_mips_bmnz_v, MSA128B>; -class BMNZI_B_DESC : MSA_I8_DESC_BASE<"bmnzi.b", int_mips_bmnzi_b, MSA128B>; +class BMNZI_B_DESC : MSA_I8_X_DESC_BASE<"bmnzi.b", int_mips_bmnzi_b, MSA128B>; class BMZ_V_DESC : MSA_VEC_DESC_BASE<"bmz.v", int_mips_bmz_v, MSA128B>; -class BMZI_B_DESC : MSA_I8_DESC_BASE<"bmzi.b", int_mips_bmzi_b, MSA128B>; +class BMZI_B_DESC : MSA_I8_X_DESC_BASE<"bmzi.b", int_mips_bmzi_b, MSA128B>; class BNEG_B_DESC : MSA_3R_DESC_BASE<"bneg.b", int_mips_bneg_b, MSA128B>; class BNEG_H_DESC : MSA_3R_DESC_BASE<"bneg.h", int_mips_bneg_h, MSA128H>; @@ -1929,14 +1942,14 @@ class NOR_V_H_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class NOR_V_W_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class NOR_V_D_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; -class NORI_B_DESC : MSA_I8_DESC_BASE<"nori.b", int_mips_nori_b, MSA128B>; +class NORI_B_DESC : MSA_I8_DESC_BASE<"nori.b", MipsVNOR, vsplati8, MSA128B>; class OR_V_DESC : MSA_VEC_DESC_BASE<"or.v", or, MSA128B>; class OR_V_H_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class OR_V_W_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class OR_V_D_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; -class ORI_B_DESC : MSA_I8_DESC_BASE<"ori.b", int_mips_ori_b, MSA128B>; +class ORI_B_DESC : MSA_I8_DESC_BASE<"ori.b", or, vsplati8, MSA128B>; class PCKEV_B_DESC : MSA_3R_DESC_BASE<"pckev.b", int_mips_pckev_b, MSA128B>; class PCKEV_H_DESC : MSA_3R_DESC_BASE<"pckev.h", int_mips_pckev_h, MSA128H>; @@ -1963,9 +1976,9 @@ class SAT_U_H_DESC : MSA_BIT_H_DESC_BASE<"sat_u.h", int_mips_sat_u_h, MSA128H>; class SAT_U_W_DESC : MSA_BIT_W_DESC_BASE<"sat_u.w", int_mips_sat_u_w, MSA128W>; class SAT_U_D_DESC : MSA_BIT_D_DESC_BASE<"sat_u.d", int_mips_sat_u_d, MSA128D>; -class SHF_B_DESC : MSA_I8_DESC_BASE<"shf.b", int_mips_shf_b, MSA128B>; -class SHF_H_DESC : MSA_I8_DESC_BASE<"shf.h", int_mips_shf_h, MSA128H>; -class SHF_W_DESC : MSA_I8_DESC_BASE<"shf.w", int_mips_shf_w, MSA128W>; +class SHF_B_DESC : MSA_I8_X_DESC_BASE<"shf.b", int_mips_shf_b, MSA128B>; +class SHF_H_DESC : MSA_I8_X_DESC_BASE<"shf.h", int_mips_shf_h, MSA128H>; +class SHF_W_DESC : MSA_I8_X_DESC_BASE<"shf.w", int_mips_shf_w, MSA128W>; class SLD_B_DESC : MSA_3R_DESC_BASE<"sld.b", int_mips_sld_b, MSA128B>; class SLD_H_DESC : MSA_3R_DESC_BASE<"sld.h", int_mips_sld_h, MSA128H>; @@ -2125,7 +2138,7 @@ class XOR_V_H_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class XOR_V_W_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class XOR_V_D_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; -class XORI_B_DESC : MSA_I8_DESC_BASE<"xori.b", int_mips_xori_b, MSA128B>; +class XORI_B_DESC : MSA_I8_DESC_BASE<"xori.b", xor, vsplati8, MSA128B>; // Instruction defs. def ADD_A_B : ADD_A_B_ENC, ADD_A_B_DESC; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 9687bb9..ef2217c 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -1153,6 +1153,9 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, lowerMSASplatImm(Op, 2, DAG)); case Intrinsic::mips_and_v: return lowerMSABinaryIntr(Op, DAG, ISD::AND); + case Intrinsic::mips_andi_b: + return lowerMSABinaryImmIntr(Op, DAG, ISD::AND, + lowerMSASplatImm(Op, 2, DAG)); case Intrinsic::mips_bnz_b: case Intrinsic::mips_bnz_h: case Intrinsic::mips_bnz_w: @@ -1386,8 +1389,16 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, SDValue Res = lowerMSABinaryIntr(Op, DAG, ISD::OR); return DAG.getNOT(SDLoc(Op), Res, Res->getValueType(0)); } + case Intrinsic::mips_nori_b: { + SDValue Res = lowerMSABinaryImmIntr(Op, DAG, ISD::OR, + lowerMSASplatImm(Op, 2, DAG)); + return DAG.getNOT(SDLoc(Op), Res, Res->getValueType(0)); + } case Intrinsic::mips_or_v: return lowerMSABinaryIntr(Op, DAG, ISD::OR); + case Intrinsic::mips_ori_b: + return lowerMSABinaryImmIntr(Op, DAG, ISD::OR, + lowerMSASplatImm(Op, 2, DAG)); case Intrinsic::mips_pcnt_b: case Intrinsic::mips_pcnt_h: case Intrinsic::mips_pcnt_w: @@ -1439,6 +1450,9 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, lowerMSASplatImm(Op, 2, DAG)); case Intrinsic::mips_xor_v: return lowerMSABinaryIntr(Op, DAG, ISD::XOR); + case Intrinsic::mips_xori_b: + return lowerMSABinaryImmIntr(Op, DAG, ISD::XOR, + lowerMSASplatImm(Op, 2, DAG)); } } -- cgit v1.1 From 930f2b51084c6dac1238b8b0f8dd11f40f619694 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Tue, 24 Sep 2013 12:45:36 +0000 Subject: [mips][msa] Line wrapping. No functional change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191295 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index a4c9cb1..d0dbb15 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -1764,8 +1764,8 @@ class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", int_mips_insve_w, MSA128W>; class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", int_mips_insve_d, MSA128D>; class LD_DESC_BASE { dag OutOperandList = (outs RCWD:$wd); dag InOperandList = (ins MemOpnd:$addr); @@ -2059,8 +2059,8 @@ class SRLRI_W_DESC : MSA_BIT_W_DESC_BASE<"srlri.w", int_mips_srlri_w, MSA128W>; class SRLRI_D_DESC : MSA_BIT_D_DESC_BASE<"srlri.d", int_mips_srlri_d, MSA128D>; class ST_DESC_BASE { dag OutOperandList = (outs); dag InOperandList = (ins RCWD:$wd, MemOpnd:$addr); -- cgit v1.1 From 421dcc59212a73b82141caa2c94ea340a7b34deb Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Tue, 24 Sep 2013 13:02:08 +0000 Subject: [mips][msa] Added partial support for matching fmax_a from normal IR (i.e. not intrinsics) This covers the case where fmax_a can be used to implement ISD::FABS. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191296 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 14 ++++++++++++++ lib/Target/Mips/MipsSEISelLowering.cpp | 1 + 2 files changed, 15 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index d0dbb15..959c05a 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -2946,6 +2946,20 @@ def ST_FW : MSAPat<(store (v4f32 MSA128W:$ws), addrRegImm:$addr), def ST_FD : MSAPat<(store (v2f64 MSA128D:$ws), addrRegImm:$addr), (ST_D MSA128D:$ws, addrRegImm:$addr)>; +class MSA_FABS_PSEUDO_DESC_BASE : + MipsPseudo<(outs RCWD:$wd), + (ins RCWS:$ws), + [(set RCWD:$wd, (fabs RCWS:$ws))]> { + InstrItinClass Itinerary = itin; +} +def FABS_W : MSA_FABS_PSEUDO_DESC_BASE, + PseudoInstExpansion<(FMAX_A_W MSA128W:$wd, MSA128W:$ws, + MSA128W:$ws)>; +def FABS_D : MSA_FABS_PSEUDO_DESC_BASE, + PseudoInstExpansion<(FMAX_A_D MSA128D:$wd, MSA128D:$ws, + MSA128D:$ws)>; + class MSABitconvertPat preds = [HasMSA]> : MSAPat<(DstVT (bitconvert SrcVT:$src)), diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index ef2217c..3077454 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -206,6 +206,7 @@ addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal); if (Ty != MVT::v8f16) { + setOperationAction(ISD::FABS, Ty, Legal); setOperationAction(ISD::FADD, Ty, Legal); setOperationAction(ISD::FDIV, Ty, Legal); setOperationAction(ISD::FLOG2, Ty, Legal); -- cgit v1.1 From ad16ddeb8e07a259d17ef203c9f443f816f6ae7b Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Tue, 24 Sep 2013 13:16:15 +0000 Subject: [mips][msa] Non-constant BUILD_VECTOR's should be expanded to INSERT_VECTOR_ELT instead of memory operations. The resulting code is the same length, but doesnt cause memory traffic or latency. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191297 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsSEISelLowering.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 3077454..1575cde 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -1668,6 +1668,23 @@ SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, } else if (isSplatVector(Node)) return DAG.getNode(MipsISD::VSPLAT, DL, ResTy, Op->getOperand(0)); + else { + // Use INSERT_VECTOR_ELT operations rather than expand to stores. + // The resulting code is the same length as the expansion, but it doesn't + // use memory operations + EVT ResTy = Node->getValueType(0); + + assert(ResTy.isVector()); + + unsigned NumElts = ResTy.getVectorNumElements(); + SDValue Vector = DAG.getUNDEF(ResTy); + for (unsigned i = 0; i < NumElts; ++i) { + Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, + Node->getOperand(i), + DAG.getConstant(i, MVT::i32)); + } + return Vector; + } return SDValue(); } -- cgit v1.1 From acfa5a203c01d99aac1bdc1e045c08153bcdbbf6 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Tue, 24 Sep 2013 13:33:07 +0000 Subject: [mips][msa] Remove the VSPLAT and VSPLATD nodes in favour of matching BUILD_VECTOR. Most constant BUILD_VECTOR's are matched using ComplexPatterns which cover bitcasted as well as normal vectors. However, it doesn't seem to be possible to match ldi.[bhwd] in a type-agnostic manner (e.g. to support the widest range of immediates, it should be possible to use ldi.b to load v2i64) using TableGen so ldi.[bhwd] is matched using custom code in MipsSEISelDAGToDAG.cpp This made the majority of the constant splat BUILD_VECTOR lowering redundant. The only transformation remaining for constant splats is when an (up-to) 32-bit constant splat is possible but the value does not fit into a 10-bit signed integer. In this case, the BUILD_VECTOR is transformed into a bitcasted BUILD_VECTOR so that fill.[bhw] can be used to splat the vector from a GPR32 register (which is initialized using the usual lui/addui sequence). There are no additional tests since this is a re-implementation of previous functionality. The change is intended to make it easier to implement some of the upcoming instruction selection patches since they can rely on existing support for BUILD_VECTOR's in the DAGCombiner. compare_float.ll changed slightly because a BITCAST is no longer introduced during legalization. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191299 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelDAGToDAG.cpp | 40 +++ lib/Target/Mips/MipsISelDAGToDAG.h | 17 ++ lib/Target/Mips/MipsISelLowering.cpp | 2 - lib/Target/Mips/MipsISelLowering.h | 6 - lib/Target/Mips/MipsMSAInstrInfo.td | 441 ++++++++++++++++++++------------- lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 217 ++++++++++++++++ lib/Target/Mips/MipsSEISelDAGToDAG.h | 20 ++ lib/Target/Mips/MipsSEISelLowering.cpp | 160 +++++++----- 8 files changed, 656 insertions(+), 247 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index b428589..7766fea 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -99,6 +99,46 @@ bool MipsDAGToDAGISel::selectAddr16(SDNode *Parent, SDValue N, SDValue &Base, return false; } +bool MipsDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm) const { + llvm_unreachable("Unimplemented function."); + return false; +} + +bool MipsDAGToDAGISel::selectVSplatUimm3(SDValue N, SDValue &Imm) const { + llvm_unreachable("Unimplemented function."); + return false; +} + +bool MipsDAGToDAGISel::selectVSplatUimm4(SDValue N, SDValue &Imm) const { + llvm_unreachable("Unimplemented function."); + return false; +} + +bool MipsDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &Imm) const { + llvm_unreachable("Unimplemented function."); + return false; +} + +bool MipsDAGToDAGISel::selectVSplatUimm6(SDValue N, SDValue &Imm) const { + llvm_unreachable("Unimplemented function."); + return false; +} + +bool MipsDAGToDAGISel::selectVSplatUimm8(SDValue N, SDValue &Imm) const { + llvm_unreachable("Unimplemented function."); + return false; +} + +bool MipsDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &Imm) const { + llvm_unreachable("Unimplemented function."); + return false; +} + +bool MipsDAGToDAGISel::selectVSplatUimmPow2(SDValue N, SDValue &Imm) const { + llvm_unreachable("Unimplemented function."); + return false; +} + /// Select instructions not customized! Used for /// expanded, promoted and normal instructions SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h index e98d590..208701e 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.h +++ b/lib/Target/Mips/MipsISelDAGToDAG.h @@ -76,6 +76,23 @@ private: virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset, SDValue &Alias); + /// \brief Select constant vector splats. + virtual bool selectVSplat(SDNode *N, APInt &Imm) const; + /// \brief Select constant vector splats whose value fits in a uimm3. + virtual bool selectVSplatUimm3(SDValue N, SDValue &Imm) const; + /// \brief Select constant vector splats whose value fits in a uimm4. + virtual bool selectVSplatUimm4(SDValue N, SDValue &Imm) const; + /// \brief Select constant vector splats whose value fits in a uimm5. + virtual bool selectVSplatUimm5(SDValue N, SDValue &Imm) const; + /// \brief Select constant vector splats whose value fits in a uimm6. + virtual bool selectVSplatUimm6(SDValue N, SDValue &Imm) const; + /// \brief Select constant vector splats whose value fits in a uimm8. + virtual bool selectVSplatUimm8(SDValue N, SDValue &Imm) const; + /// \brief Select constant vector splats whose value fits in a simm5. + virtual bool selectVSplatSimm5(SDValue N, SDValue &Imm) const; + /// \brief Select constant vector splats whose value is a power of 2. + virtual bool selectVSplatUimmPow2(SDValue N, SDValue &Imm) const; + virtual SDNode *Select(SDNode *N); virtual std::pair selectNode(SDNode *Node) = 0; diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 2a841b9..5f01936 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -221,8 +221,6 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::VSMIN: return "MipsISD::VSMIN"; case MipsISD::VUMAX: return "MipsISD::VUMAX"; case MipsISD::VUMIN: return "MipsISD::VUMIN"; - case MipsISD::VSPLAT: return "MipsISD::VSPLAT"; - case MipsISD::VSPLATD: return "MipsISD::VSPLATD"; case MipsISD::VEXTRACT_SEXT_ELT: return "MipsISD::VEXTRACT_SEXT_ELT"; case MipsISD::VEXTRACT_ZEXT_ELT: return "MipsISD::VEXTRACT_ZEXT_ELT"; case MipsISD::VNOR: return "MipsISD::VNOR"; diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 0d588c1..ae82e7e 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -172,12 +172,6 @@ namespace llvm { VUMAX, VUMIN, - // Special case of BUILD_VECTOR where all elements are the same. - VSPLAT, - // Special case of VSPLAT where the result is v2i64, the operand is - // constant, and the operand fits in a signed 10-bits value. - VSPLATD, - // Combined (XOR (OR $a, $b), -1) VNOR, diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 959c05a..60dcdce 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -def SDT_MipsSplat : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisInt<1>]>; def SDT_MipsVecCond : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>; def SDT_VSetCC : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisInt<1>, @@ -34,8 +33,6 @@ def MipsVUMax : SDNode<"MipsISD::VUMAX", SDTIntBinOp, [SDNPCommutative, SDNPAssociative]>; def MipsVUMin : SDNode<"MipsISD::VUMIN", SDTIntBinOp, [SDNPCommutative, SDNPAssociative]>; -def MipsVSplat : SDNode<"MipsISD::VSPLAT", SDT_MipsSplat>; -def MipsVSplatD : SDNode<"MipsISD::VSPLATD", SDT_MipsSplat>; def MipsVNOR : SDNode<"MipsISD::VNOR", SDTIntBinOp, [SDNPCommutative, SDNPAssociative]>; @@ -47,6 +44,48 @@ def MipsVExtractSExt : SDNode<"MipsISD::VEXTRACT_SEXT_ELT", def MipsVExtractZExt : SDNode<"MipsISD::VEXTRACT_ZEXT_ELT", SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>; +// Operands + +def uimm3 : Operand { + let PrintMethod = "printUnsignedImm"; +} + +def uimm4 : Operand { + let PrintMethod = "printUnsignedImm"; +} + +def uimm8 : Operand { + let PrintMethod = "printUnsignedImm"; +} + +def simm5 : Operand; + +def simm10 : Operand; + +def vsplat_uimm3 : Operand { + let PrintMethod = "printUnsignedImm"; +} + +def vsplat_uimm4 : Operand { + let PrintMethod = "printUnsignedImm"; +} + +def vsplat_uimm5 : Operand { + let PrintMethod = "printUnsignedImm"; +} + +def vsplat_uimm6 : Operand { + let PrintMethod = "printUnsignedImm"; +} + +def vsplat_uimm8 : Operand { + let PrintMethod = "printUnsignedImm"; +} + +def vsplat_simm5 : Operand; + +def vsplat_simm10 : Operand; + // Pattern fragments def vextract_sext_i8 : PatFrag<(ops node:$vec, node:$idx), (MipsVExtractSExt node:$vec, node:$idx, i8)>; @@ -131,30 +170,95 @@ def vsetult_v8i16 : vsetcc_type; def vsetult_v4i32 : vsetcc_type; def vsetult_v2i64 : vsetcc_type; -def vsplati8 : PatFrag<(ops node:$in), (v16i8 (MipsVSplat (i32 node:$in)))>; -def vsplati16 : PatFrag<(ops node:$in), (v8i16 (MipsVSplat (i32 node:$in)))>; -def vsplati32 : PatFrag<(ops node:$in), (v4i32 (MipsVSplat (i32 node:$in)))>; -def vsplati64 : PatFrag<(ops node:$in), (v2i64 (MipsVSplatD (i32 node:$in)))>; - -// Immediates -def immSExt5 : ImmLeaf(Imm);}]>; -def immSExt10: ImmLeaf(Imm);}]>; - -def uimm3 : Operand { - let PrintMethod = "printUnsignedImm"; +def vsplati8 : PatFrag<(ops node:$e0), + (v16i8 (build_vector node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0))>; +def vsplati16 : PatFrag<(ops node:$e0), + (v8i16 (build_vector node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0))>; +def vsplati32 : PatFrag<(ops node:$e0), + (v4i32 (build_vector node:$e0, node:$e0, + node:$e0, node:$e0))>; +def vsplati64 : PatFrag<(ops node:$e0), + (v2i64 (build_vector:$v0 node:$e0, node:$e0))>; + +class SplatPatLeaf + : PatLeaf { + Operand OpClass = opclass; } -def uimm4 : Operand { - let PrintMethod = "printUnsignedImm"; +class SplatComplexPattern roots = [], + list props = []> : + ComplexPattern { + Operand OpClass = opclass; } -def uimm8 : Operand { - let PrintMethod = "printUnsignedImm"; -} +def vsplati8_uimm3 : SplatComplexPattern; -def simm5 : Operand; +def vsplati8_uimm5 : SplatComplexPattern; -def simm10 : Operand; +def vsplati8_uimm8 : SplatComplexPattern; + +def vsplati8_simm5 : SplatComplexPattern; + +def vsplati16_uimm4 : SplatComplexPattern; + +def vsplati16_uimm5 : SplatComplexPattern; + +def vsplati16_simm5 : SplatComplexPattern; + +def vsplati32_uimm5 : SplatComplexPattern; + +def vsplati32_simm5 : SplatComplexPattern; + +def vsplati64_uimm5 : SplatComplexPattern; + +def vsplati64_uimm6 : SplatComplexPattern; + +def vsplati64_simm5 : SplatComplexPattern; + +// Any build_vector that is a constant splat with a value that is an exact +// power of 2 +def vsplat_uimm_pow2 : ComplexPattern; + +// Immediates +def immSExt5 : ImmLeaf(Imm);}]>; +def immSExt10: ImmLeaf(Imm);}]>; // Instruction encoding. class ADD_A_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b010000>; @@ -912,47 +1016,14 @@ class MSA_BIT_D_DESC_BASE { - dag OutOperandList = (outs RCWD:$wd); - dag InOperandList = (ins RCWS:$ws, uimm3:$u3); - string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u3"); - list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, - (vsplati8 immZExt3:$u3)))]; - InstrItinClass Itinerary = itin; -} - -class MSA_BIT_SPLATH_DESC_BASE { - dag OutOperandList = (outs RCWD:$wd); - dag InOperandList = (ins RCWS:$ws, uimm4:$u4); - string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u4"); - list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, - (vsplati16 immZExt4:$u4)))]; - InstrItinClass Itinerary = itin; -} - -class MSA_BIT_SPLATW_DESC_BASE { - dag OutOperandList = (outs RCWD:$wd); - dag InOperandList = (ins RCWS:$ws, uimm5:$u5); - string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u5"); - list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, - (vsplati32 immZExt5:$u5)))]; - InstrItinClass Itinerary = itin; -} - -class MSA_BIT_SPLATD_DESC_BASE { +class MSA_BIT_SPLAT_DESC_BASE { dag OutOperandList = (outs RCWD:$wd); - dag InOperandList = (ins RCWS:$ws, uimm6:$u6); - string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u6"); - list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, - (vsplati64 immZExt6:$u6)))]; + dag InOperandList = (ins RCWS:$ws, SplatImm.OpClass:$u); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u"); + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, SplatImm:$u))]; InstrItinClass Itinerary = itin; } @@ -967,38 +1038,24 @@ class MSA_COPY_DESC_BASE { dag OutOperandList = (outs RCWD:$wd); - dag InOperandList = (ins RCWS:$ws, uimm5:$u5); - string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u5"); - list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, - (SplatNode immZExt5:$u5)))]; - InstrItinClass Itinerary = itin; -} - -class MSA_SI5_DESC_BASE { - dag OutOperandList = (outs RCWD:$wd); - dag InOperandList = (ins RCWS:$ws, simm5:$s5); - string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $s5"); - list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, - (SplatNode immSExt5:$s5)))]; + dag InOperandList = (ins RCWS:$ws, SplatImm.OpClass:$imm); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $imm"); + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, SplatImm:$imm))]; InstrItinClass Itinerary = itin; } class MSA_I8_DESC_BASE { dag OutOperandList = (outs RCWD:$wd); - dag InOperandList = (ins RCWS:$ws, uimm8:$u8); + dag InOperandList = (ins RCWS:$ws, SplatImm.OpClass:$u8); string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u8"); - list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, - (SplatNode immZExt8:$u8)))]; + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, SplatImm:$u8))]; InstrItinClass Itinerary = itin; } @@ -1013,13 +1070,14 @@ class MSA_I8_X_DESC_BASE { +class MSA_I10_LDI_DESC_BASE { dag OutOperandList = (outs RCWD:$wd); - dag InOperandList = (ins simm10:$i10); + dag InOperandList = (ins vsplat_simm10:$i10); string AsmString = !strconcat(instr_asm, "\t$wd, $i10"); - list Pattern = [(set RCWD:$wd, (OpNode immSExt10:$i10))]; + // LDI is matched using custom matching code in MipsSEISelDAGToDAG.cpp + list Pattern = []; + bit hasSideEffects = 0; InstrItinClass Itinerary = itin; } @@ -1033,6 +1091,17 @@ class MSA_2R_DESC_BASE { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins RCWS:$ws); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws"); + list Pattern = [(set RCWD:$wd, (VT (OpNode RCWS:$ws)))]; + InstrItinClass Itinerary = itin; +} + class MSA_2RF_DESC_BASE : @@ -1171,17 +1240,17 @@ class ADDV_H_DESC : MSA_3R_DESC_BASE<"addv.h", add, MSA128H>, IsCommutable; class ADDV_W_DESC : MSA_3R_DESC_BASE<"addv.w", add, MSA128W>, IsCommutable; class ADDV_D_DESC : MSA_3R_DESC_BASE<"addv.d", add, MSA128D>, IsCommutable; -class ADDVI_B_DESC : MSA_I5_DESC_BASE<"addvi.b", add, vsplati8, MSA128B>; -class ADDVI_H_DESC : MSA_I5_DESC_BASE<"addvi.h", add, vsplati16, MSA128H>; -class ADDVI_W_DESC : MSA_I5_DESC_BASE<"addvi.w", add, vsplati32, MSA128W>; -class ADDVI_D_DESC : MSA_I5_DESC_BASE<"addvi.d", add, vsplati64, MSA128D>; +class ADDVI_B_DESC : MSA_I5_DESC_BASE<"addvi.b", add, vsplati8_uimm5, MSA128B>; +class ADDVI_H_DESC : MSA_I5_DESC_BASE<"addvi.h", add, vsplati16_uimm5, MSA128H>; +class ADDVI_W_DESC : MSA_I5_DESC_BASE<"addvi.w", add, vsplati32_uimm5, MSA128W>; +class ADDVI_D_DESC : MSA_I5_DESC_BASE<"addvi.d", add, vsplati64_uimm5, MSA128D>; class AND_V_DESC : MSA_VEC_DESC_BASE<"and.v", and, MSA128B>; class AND_V_H_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class AND_V_W_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class AND_V_D_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; -class ANDI_B_DESC : MSA_I8_DESC_BASE<"andi.b", and, vsplati8, MSA128B>; +class ANDI_B_DESC : MSA_I8_DESC_BASE<"andi.b", and, vsplati8_uimm8, MSA128B>; class ASUB_S_B_DESC : MSA_3R_DESC_BASE<"asub_s.b", int_mips_asub_s_b, MSA128B>; class ASUB_S_H_DESC : MSA_3R_DESC_BASE<"asub_s.h", int_mips_asub_s_h, MSA128H>; @@ -1304,11 +1373,11 @@ class BSEL_V_DESC { class BSELI_B_DESC { dag OutOperandList = (outs MSA128B:$wd); - dag InOperandList = (ins MSA128B:$wd_in, MSA128B:$ws, uimm8:$u8); + dag InOperandList = (ins MSA128B:$wd_in, MSA128B:$ws, vsplat_uimm8:$u8); string AsmString = "bseli.b\t$wd, $ws, $u8"; list Pattern = [(set MSA128B:$wd, (vselect MSA128B:$wd_in, MSA128B:$ws, - (vsplati8 immZExt8:$u8)))]; + vsplati8_uimm8:$u8))]; InstrItinClass Itinerary = NoItinerary; string Constraints = "$wd = $wd_in"; } @@ -1339,14 +1408,14 @@ class CEQ_W_DESC : MSA_3R_DESC_BASE<"ceq.w", vseteq_v4i32, MSA128W>, class CEQ_D_DESC : MSA_3R_DESC_BASE<"ceq.d", vseteq_v2i64, MSA128D>, IsCommutable; -class CEQI_B_DESC : MSA_SI5_DESC_BASE<"ceqi.b", vseteq_v16i8, vsplati8, - MSA128B>; -class CEQI_H_DESC : MSA_SI5_DESC_BASE<"ceqi.h", vseteq_v8i16, vsplati16, - MSA128H>; -class CEQI_W_DESC : MSA_SI5_DESC_BASE<"ceqi.w", vseteq_v4i32, vsplati32, - MSA128W>; -class CEQI_D_DESC : MSA_SI5_DESC_BASE<"ceqi.d", vseteq_v2i64, vsplati64, - MSA128D>; +class CEQI_B_DESC : MSA_I5_DESC_BASE<"ceqi.b", vseteq_v16i8, vsplati8_simm5, + MSA128B>; +class CEQI_H_DESC : MSA_I5_DESC_BASE<"ceqi.h", vseteq_v8i16, vsplati16_simm5, + MSA128H>; +class CEQI_W_DESC : MSA_I5_DESC_BASE<"ceqi.w", vseteq_v4i32, vsplati32_simm5, + MSA128W>; +class CEQI_D_DESC : MSA_I5_DESC_BASE<"ceqi.d", vseteq_v2i64, vsplati64_simm5, + MSA128D>; class CFCMSA_DESC { dag OutOperandList = (outs GPR32:$rd); @@ -1366,23 +1435,23 @@ class CLE_U_H_DESC : MSA_3R_DESC_BASE<"cle_u.h", vsetule_v8i16, MSA128H>; class CLE_U_W_DESC : MSA_3R_DESC_BASE<"cle_u.w", vsetule_v4i32, MSA128W>; class CLE_U_D_DESC : MSA_3R_DESC_BASE<"cle_u.d", vsetule_v2i64, MSA128D>; -class CLEI_S_B_DESC : MSA_SI5_DESC_BASE<"clei_s.b", vsetle_v16i8, vsplati8, - MSA128B>; -class CLEI_S_H_DESC : MSA_SI5_DESC_BASE<"clei_s.h", vsetle_v8i16, vsplati16, - MSA128H>; -class CLEI_S_W_DESC : MSA_SI5_DESC_BASE<"clei_s.w", vsetle_v4i32, vsplati32, - MSA128W>; -class CLEI_S_D_DESC : MSA_SI5_DESC_BASE<"clei_s.d", vsetle_v2i64, vsplati64, - MSA128D>; - -class CLEI_U_B_DESC : MSA_I5_DESC_BASE<"clei_u.b", vsetule_v16i8, vsplati8, - MSA128B>; -class CLEI_U_H_DESC : MSA_I5_DESC_BASE<"clei_u.h", vsetule_v8i16, vsplati16, - MSA128H>; -class CLEI_U_W_DESC : MSA_I5_DESC_BASE<"clei_u.w", vsetule_v4i32, vsplati32, - MSA128W>; -class CLEI_U_D_DESC : MSA_I5_DESC_BASE<"clei_u.d", vsetule_v2i64, vsplati64, - MSA128D>; +class CLEI_S_B_DESC : MSA_I5_DESC_BASE<"clei_s.b", vsetle_v16i8, + vsplati8_simm5, MSA128B>; +class CLEI_S_H_DESC : MSA_I5_DESC_BASE<"clei_s.h", vsetle_v8i16, + vsplati16_simm5, MSA128H>; +class CLEI_S_W_DESC : MSA_I5_DESC_BASE<"clei_s.w", vsetle_v4i32, + vsplati32_simm5, MSA128W>; +class CLEI_S_D_DESC : MSA_I5_DESC_BASE<"clei_s.d", vsetle_v2i64, + vsplati64_simm5, MSA128D>; + +class CLEI_U_B_DESC : MSA_I5_DESC_BASE<"clei_u.b", vsetule_v16i8, + vsplati8_uimm5, MSA128B>; +class CLEI_U_H_DESC : MSA_I5_DESC_BASE<"clei_u.h", vsetule_v8i16, + vsplati16_uimm5, MSA128H>; +class CLEI_U_W_DESC : MSA_I5_DESC_BASE<"clei_u.w", vsetule_v4i32, + vsplati32_uimm5, MSA128W>; +class CLEI_U_D_DESC : MSA_I5_DESC_BASE<"clei_u.d", vsetule_v2i64, + vsplati64_uimm5, MSA128D>; class CLT_S_B_DESC : MSA_3R_DESC_BASE<"clt_s.b", vsetlt_v16i8, MSA128B>; class CLT_S_H_DESC : MSA_3R_DESC_BASE<"clt_s.h", vsetlt_v8i16, MSA128H>; @@ -1394,23 +1463,23 @@ class CLT_U_H_DESC : MSA_3R_DESC_BASE<"clt_u.h", vsetult_v8i16, MSA128H>; class CLT_U_W_DESC : MSA_3R_DESC_BASE<"clt_u.w", vsetult_v4i32, MSA128W>; class CLT_U_D_DESC : MSA_3R_DESC_BASE<"clt_u.d", vsetult_v2i64, MSA128D>; -class CLTI_S_B_DESC : MSA_SI5_DESC_BASE<"clti_s.b", vsetlt_v16i8, vsplati8, - MSA128B>; -class CLTI_S_H_DESC : MSA_SI5_DESC_BASE<"clti_s.h", vsetlt_v8i16, vsplati16, - MSA128H>; -class CLTI_S_W_DESC : MSA_SI5_DESC_BASE<"clti_s.w", vsetlt_v4i32, vsplati32, - MSA128W>; -class CLTI_S_D_DESC : MSA_SI5_DESC_BASE<"clti_s.d", vsetlt_v2i64, vsplati64, - MSA128D>; - -class CLTI_U_B_DESC : MSA_I5_DESC_BASE<"clti_u.b", vsetult_v16i8, vsplati8, - MSA128B>; -class CLTI_U_H_DESC : MSA_I5_DESC_BASE<"clti_u.h", vsetult_v8i16, vsplati16, - MSA128H>; -class CLTI_U_W_DESC : MSA_I5_DESC_BASE<"clti_u.w", vsetult_v4i32, vsplati32, - MSA128W>; -class CLTI_U_D_DESC : MSA_I5_DESC_BASE<"clti_u.d", vsetult_v2i64, vsplati64, - MSA128D>; +class CLTI_S_B_DESC : MSA_I5_DESC_BASE<"clti_s.b", vsetlt_v16i8, + vsplati8_simm5, MSA128B>; +class CLTI_S_H_DESC : MSA_I5_DESC_BASE<"clti_s.h", vsetlt_v8i16, + vsplati16_simm5, MSA128H>; +class CLTI_S_W_DESC : MSA_I5_DESC_BASE<"clti_s.w", vsetlt_v4i32, + vsplati32_simm5, MSA128W>; +class CLTI_S_D_DESC : MSA_I5_DESC_BASE<"clti_s.d", vsetlt_v2i64, + vsplati64_simm5, MSA128D>; + +class CLTI_U_B_DESC : MSA_I5_DESC_BASE<"clti_u.b", vsetult_v16i8, + vsplati8_uimm5, MSA128B>; +class CLTI_U_H_DESC : MSA_I5_DESC_BASE<"clti_u.h", vsetult_v8i16, + vsplati16_uimm5, MSA128H>; +class CLTI_U_W_DESC : MSA_I5_DESC_BASE<"clti_u.w", vsetult_v4i32, + vsplati32_uimm5, MSA128W>; +class CLTI_U_D_DESC : MSA_I5_DESC_BASE<"clti_u.d", vsetult_v2i64, + vsplati64_uimm5, MSA128D>; class COPY_S_B_DESC : MSA_COPY_DESC_BASE<"copy_s.b", vextract_sext_i8, v16i8, GPR32, MSA128B>; @@ -1592,9 +1661,12 @@ class FFQR_W_DESC : MSA_2RF_DESC_BASE<"ffqr.w", int_mips_ffqr_w, class FFQR_D_DESC : MSA_2RF_DESC_BASE<"ffqr.d", int_mips_ffqr_d, MSA128D, MSA128W>; -class FILL_B_DESC : MSA_2R_DESC_BASE<"fill.b", vsplati8, MSA128B, GPR32>; -class FILL_H_DESC : MSA_2R_DESC_BASE<"fill.h", vsplati16, MSA128H, GPR32>; -class FILL_W_DESC : MSA_2R_DESC_BASE<"fill.w", vsplati32, MSA128W, GPR32>; +class FILL_B_DESC : MSA_2R_FILL_DESC_BASE<"fill.b", v16i8, vsplati8, MSA128B, + GPR32>; +class FILL_H_DESC : MSA_2R_FILL_DESC_BASE<"fill.h", v8i16, vsplati16, MSA128H, + GPR32>; +class FILL_W_DESC : MSA_2R_FILL_DESC_BASE<"fill.w", v4i32, vsplati32, MSA128W, + GPR32>; class FLOG2_W_DESC : MSA_2RF_DESC_BASE<"flog2.w", flog2, MSA128W>; class FLOG2_D_DESC : MSA_2RF_DESC_BASE<"flog2.d", flog2, MSA128D>; @@ -1779,10 +1851,10 @@ class LD_H_DESC : LD_DESC_BASE<"ld.h", load, v8i16, MSA128H>; class LD_W_DESC : LD_DESC_BASE<"ld.w", load, v4i32, MSA128W>; class LD_D_DESC : LD_DESC_BASE<"ld.d", load, v2i64, MSA128D>; -class LDI_B_DESC : MSA_I10_DESC_BASE<"ldi.b", vsplati8, MSA128B>; -class LDI_H_DESC : MSA_I10_DESC_BASE<"ldi.h", vsplati16, MSA128H>; -class LDI_W_DESC : MSA_I10_DESC_BASE<"ldi.w", vsplati32, MSA128W>; -class LDI_D_DESC : MSA_I10_DESC_BASE<"ldi.d", vsplati64, MSA128D>; +class LDI_B_DESC : MSA_I10_LDI_DESC_BASE<"ldi.b", MSA128B>; +class LDI_H_DESC : MSA_I10_LDI_DESC_BASE<"ldi.h", MSA128H>; +class LDI_W_DESC : MSA_I10_LDI_DESC_BASE<"ldi.w", MSA128W>; +class LDI_D_DESC : MSA_I10_LDI_DESC_BASE<"ldi.d", MSA128D>; class LDX_DESC_BASE; class MAX_U_W_DESC : MSA_3R_DESC_BASE<"max_u.w", MipsVUMax, MSA128W>; class MAX_U_D_DESC : MSA_3R_DESC_BASE<"max_u.d", MipsVUMax, MSA128D>; -class MAXI_S_B_DESC : MSA_I5_DESC_BASE<"maxi_s.b", MipsVSMax, vsplati8, +class MAXI_S_B_DESC : MSA_I5_DESC_BASE<"maxi_s.b", MipsVSMax, vsplati8_simm5, MSA128B>; -class MAXI_S_H_DESC : MSA_I5_DESC_BASE<"maxi_s.h", MipsVSMax, vsplati16, +class MAXI_S_H_DESC : MSA_I5_DESC_BASE<"maxi_s.h", MipsVSMax, vsplati16_simm5, MSA128H>; -class MAXI_S_W_DESC : MSA_I5_DESC_BASE<"maxi_s.w", MipsVSMax, vsplati32, +class MAXI_S_W_DESC : MSA_I5_DESC_BASE<"maxi_s.w", MipsVSMax, vsplati32_simm5, MSA128W>; -class MAXI_S_D_DESC : MSA_I5_DESC_BASE<"maxi_s.d", MipsVSMax, vsplati64, +class MAXI_S_D_DESC : MSA_I5_DESC_BASE<"maxi_s.d", MipsVSMax, vsplati64_simm5, MSA128D>; -class MAXI_U_B_DESC : MSA_I5_DESC_BASE<"maxi_u.b", MipsVUMax, vsplati8, +class MAXI_U_B_DESC : MSA_I5_DESC_BASE<"maxi_u.b", MipsVUMax, vsplati8_uimm5, MSA128B>; -class MAXI_U_H_DESC : MSA_I5_DESC_BASE<"maxi_u.h", MipsVUMax, vsplati16, +class MAXI_U_H_DESC : MSA_I5_DESC_BASE<"maxi_u.h", MipsVUMax, vsplati16_uimm5, MSA128H>; -class MAXI_U_W_DESC : MSA_I5_DESC_BASE<"maxi_u.w", MipsVUMax, vsplati32, +class MAXI_U_W_DESC : MSA_I5_DESC_BASE<"maxi_u.w", MipsVUMax, vsplati32_uimm5, MSA128W>; -class MAXI_U_D_DESC : MSA_I5_DESC_BASE<"maxi_u.d", MipsVUMax, vsplati64, +class MAXI_U_D_DESC : MSA_I5_DESC_BASE<"maxi_u.d", MipsVUMax, vsplati64_uimm5, MSA128D>; class MIN_A_B_DESC : MSA_3R_DESC_BASE<"min_a.b", int_mips_min_a_b, MSA128B>; @@ -1863,22 +1935,22 @@ class MIN_U_H_DESC : MSA_3R_DESC_BASE<"min_u.h", MipsVUMin, MSA128H>; class MIN_U_W_DESC : MSA_3R_DESC_BASE<"min_u.w", MipsVUMin, MSA128W>; class MIN_U_D_DESC : MSA_3R_DESC_BASE<"min_u.d", MipsVUMin, MSA128D>; -class MINI_S_B_DESC : MSA_I5_DESC_BASE<"mini_s.b", MipsVSMin, vsplati8, +class MINI_S_B_DESC : MSA_I5_DESC_BASE<"mini_s.b", MipsVSMin, vsplati8_simm5, MSA128B>; -class MINI_S_H_DESC : MSA_I5_DESC_BASE<"mini_s.h", MipsVSMin, vsplati16, +class MINI_S_H_DESC : MSA_I5_DESC_BASE<"mini_s.h", MipsVSMin, vsplati16_simm5, MSA128H>; -class MINI_S_W_DESC : MSA_I5_DESC_BASE<"mini_s.w", MipsVSMin, vsplati32, +class MINI_S_W_DESC : MSA_I5_DESC_BASE<"mini_s.w", MipsVSMin, vsplati32_simm5, MSA128W>; -class MINI_S_D_DESC : MSA_I5_DESC_BASE<"mini_s.d", MipsVSMin, vsplati64, +class MINI_S_D_DESC : MSA_I5_DESC_BASE<"mini_s.d", MipsVSMin, vsplati64_simm5, MSA128D>; -class MINI_U_B_DESC : MSA_I5_DESC_BASE<"mini_u.b", MipsVUMin, vsplati8, +class MINI_U_B_DESC : MSA_I5_DESC_BASE<"mini_u.b", MipsVUMin, vsplati8_uimm5, MSA128B>; -class MINI_U_H_DESC : MSA_I5_DESC_BASE<"mini_u.h", MipsVUMin, vsplati16, +class MINI_U_H_DESC : MSA_I5_DESC_BASE<"mini_u.h", MipsVUMin, vsplati16_uimm5, MSA128H>; -class MINI_U_W_DESC : MSA_I5_DESC_BASE<"mini_u.w", MipsVUMin, vsplati32, +class MINI_U_W_DESC : MSA_I5_DESC_BASE<"mini_u.w", MipsVUMin, vsplati32_uimm5, MSA128W>; -class MINI_U_D_DESC : MSA_I5_DESC_BASE<"mini_u.d", MipsVUMin, vsplati64, +class MINI_U_D_DESC : MSA_I5_DESC_BASE<"mini_u.d", MipsVUMin, vsplati64_uimm5, MSA128D>; class MOD_S_B_DESC : MSA_3R_DESC_BASE<"mod_s.b", int_mips_mod_s_b, MSA128B>; @@ -1942,14 +2014,15 @@ class NOR_V_H_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class NOR_V_W_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class NOR_V_D_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; -class NORI_B_DESC : MSA_I8_DESC_BASE<"nori.b", MipsVNOR, vsplati8, MSA128B>; +class NORI_B_DESC : MSA_I8_DESC_BASE<"nori.b", MipsVNOR, vsplati8_uimm8, + MSA128B>; class OR_V_DESC : MSA_VEC_DESC_BASE<"or.v", or, MSA128B>; class OR_V_H_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class OR_V_W_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class OR_V_D_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; -class ORI_B_DESC : MSA_I8_DESC_BASE<"ori.b", or, vsplati8, MSA128B>; +class ORI_B_DESC : MSA_I8_DESC_BASE<"ori.b", or, vsplati8_uimm8, MSA128B>; class PCKEV_B_DESC : MSA_3R_DESC_BASE<"pckev.b", int_mips_pckev_b, MSA128B>; class PCKEV_H_DESC : MSA_3R_DESC_BASE<"pckev.h", int_mips_pckev_h, MSA128H>; @@ -1995,10 +2068,14 @@ class SLL_H_DESC : MSA_3R_DESC_BASE<"sll.h", shl, MSA128H>; class SLL_W_DESC : MSA_3R_DESC_BASE<"sll.w", shl, MSA128W>; class SLL_D_DESC : MSA_3R_DESC_BASE<"sll.d", shl, MSA128D>; -class SLLI_B_DESC : MSA_BIT_SPLATB_DESC_BASE<"slli.b", shl, MSA128B>; -class SLLI_H_DESC : MSA_BIT_SPLATH_DESC_BASE<"slli.h", shl, MSA128H>; -class SLLI_W_DESC : MSA_BIT_SPLATW_DESC_BASE<"slli.w", shl, MSA128W>; -class SLLI_D_DESC : MSA_BIT_SPLATD_DESC_BASE<"slli.d", shl, MSA128D>; +class SLLI_B_DESC : MSA_BIT_SPLAT_DESC_BASE<"slli.b", shl, vsplati8_uimm3, + MSA128B>; +class SLLI_H_DESC : MSA_BIT_SPLAT_DESC_BASE<"slli.h", shl, vsplati16_uimm4, + MSA128H>; +class SLLI_W_DESC : MSA_BIT_SPLAT_DESC_BASE<"slli.w", shl, vsplati32_uimm5, + MSA128W>; +class SLLI_D_DESC : MSA_BIT_SPLAT_DESC_BASE<"slli.d", shl, vsplati64_uimm6, + MSA128D>; class SPLAT_B_DESC : MSA_3R_DESC_BASE<"splat.b", int_mips_splat_b, MSA128B, MSA128B, GPR32>; @@ -2023,10 +2100,14 @@ class SRA_H_DESC : MSA_3R_DESC_BASE<"sra.h", sra, MSA128H>; class SRA_W_DESC : MSA_3R_DESC_BASE<"sra.w", sra, MSA128W>; class SRA_D_DESC : MSA_3R_DESC_BASE<"sra.d", sra, MSA128D>; -class SRAI_B_DESC : MSA_BIT_SPLATB_DESC_BASE<"srai.b", sra, MSA128B>; -class SRAI_H_DESC : MSA_BIT_SPLATH_DESC_BASE<"srai.h", sra, MSA128H>; -class SRAI_W_DESC : MSA_BIT_SPLATW_DESC_BASE<"srai.w", sra, MSA128W>; -class SRAI_D_DESC : MSA_BIT_SPLATD_DESC_BASE<"srai.d", sra, MSA128D>; +class SRAI_B_DESC : MSA_BIT_SPLAT_DESC_BASE<"srai.b", sra, vsplati8_uimm3, + MSA128B>; +class SRAI_H_DESC : MSA_BIT_SPLAT_DESC_BASE<"srai.h", sra, vsplati16_uimm4, + MSA128H>; +class SRAI_W_DESC : MSA_BIT_SPLAT_DESC_BASE<"srai.w", sra, vsplati32_uimm5, + MSA128W>; +class SRAI_D_DESC : MSA_BIT_SPLAT_DESC_BASE<"srai.d", sra, vsplati64_uimm6, + MSA128D>; class SRAR_B_DESC : MSA_3R_DESC_BASE<"srar.b", int_mips_srar_b, MSA128B>; class SRAR_H_DESC : MSA_3R_DESC_BASE<"srar.h", int_mips_srar_h, MSA128H>; @@ -2043,10 +2124,14 @@ class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", srl, MSA128H>; class SRL_W_DESC : MSA_3R_DESC_BASE<"srl.w", srl, MSA128W>; class SRL_D_DESC : MSA_3R_DESC_BASE<"srl.d", srl, MSA128D>; -class SRLI_B_DESC : MSA_BIT_SPLATB_DESC_BASE<"srli.b", srl, MSA128B>; -class SRLI_H_DESC : MSA_BIT_SPLATH_DESC_BASE<"srli.h", srl, MSA128H>; -class SRLI_W_DESC : MSA_BIT_SPLATW_DESC_BASE<"srli.w", srl, MSA128W>; -class SRLI_D_DESC : MSA_BIT_SPLATD_DESC_BASE<"srli.d", srl, MSA128D>; +class SRLI_B_DESC : MSA_BIT_SPLAT_DESC_BASE<"srli.b", srl, vsplati8_uimm3, + MSA128B>; +class SRLI_H_DESC : MSA_BIT_SPLAT_DESC_BASE<"srli.h", srl, vsplati16_uimm4, + MSA128H>; +class SRLI_W_DESC : MSA_BIT_SPLAT_DESC_BASE<"srli.w", srl, vsplati32_uimm5, + MSA128W>; +class SRLI_D_DESC : MSA_BIT_SPLAT_DESC_BASE<"srli.d", srl, vsplati64_uimm6, + MSA128D>; class SRLR_B_DESC : MSA_3R_DESC_BASE<"srlr.b", int_mips_srlr_b, MSA128B>; class SRLR_H_DESC : MSA_3R_DESC_BASE<"srlr.h", int_mips_srlr_h, MSA128H>; @@ -2123,10 +2208,10 @@ class SUBV_H_DESC : MSA_3R_DESC_BASE<"subv.h", sub, MSA128H>; class SUBV_W_DESC : MSA_3R_DESC_BASE<"subv.w", sub, MSA128W>; class SUBV_D_DESC : MSA_3R_DESC_BASE<"subv.d", sub, MSA128D>; -class SUBVI_B_DESC : MSA_I5_DESC_BASE<"subvi.b", sub, vsplati8, MSA128B>; -class SUBVI_H_DESC : MSA_I5_DESC_BASE<"subvi.h", sub, vsplati16, MSA128H>; -class SUBVI_W_DESC : MSA_I5_DESC_BASE<"subvi.w", sub, vsplati32, MSA128W>; -class SUBVI_D_DESC : MSA_I5_DESC_BASE<"subvi.d", sub, vsplati64, MSA128D>; +class SUBVI_B_DESC : MSA_I5_DESC_BASE<"subvi.b", sub, vsplati8_uimm5, MSA128B>; +class SUBVI_H_DESC : MSA_I5_DESC_BASE<"subvi.h", sub, vsplati16_uimm5, MSA128H>; +class SUBVI_W_DESC : MSA_I5_DESC_BASE<"subvi.w", sub, vsplati32_uimm5, MSA128W>; +class SUBVI_D_DESC : MSA_I5_DESC_BASE<"subvi.d", sub, vsplati64_uimm5, MSA128D>; class VSHF_B_DESC : MSA_3R_DESC_BASE<"vshf.b", int_mips_vshf_b, MSA128B>; class VSHF_H_DESC : MSA_3R_DESC_BASE<"vshf.h", int_mips_vshf_h, MSA128H>; @@ -2138,7 +2223,7 @@ class XOR_V_H_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class XOR_V_W_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class XOR_V_D_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; -class XORI_B_DESC : MSA_I8_DESC_BASE<"xori.b", xor, vsplati8, MSA128B>; +class XORI_B_DESC : MSA_I8_DESC_BASE<"xori.b", xor, vsplati8_uimm8, MSA128B>; // Instruction defs. def ADD_A_B : ADD_A_B_ENC, ADD_A_B_DESC; diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 10221d5..8a9481c 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -374,6 +374,147 @@ bool MipsSEDAGToDAGISel::selectIntAddrMM(SDValue Addr, SDValue &Base, selectAddrDefault(Addr, Base, Offset); } +// Select constant vector splats. +// +// Returns true and sets Imm if: +// * MSA is enabled +// * N is a ISD::BUILD_VECTOR representing a constant splat +// * The splat value fits in a signed 32-bit value. +// +// That last requirement isn't strictly a requirement of the instruction set +// but it simplifies the callers by allowing them to assume they don't have to +// handle 64-bit values. The callers will also be placing stricter requirements +// on the immediates so this doesn't prohibit selection of legal immediates. +bool MipsSEDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm) const { + if (!Subtarget.hasMSA()) + return false; + + BuildVectorSDNode *Node = dyn_cast(N); + + if (Node == NULL) + return false; + + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs, 8, + !Subtarget.isLittle())) + return false; + + // None of the immediate forms can handle more than 32 bits + if (!SplatValue.isIntN(32)) + return false; + + Imm = SplatValue; + + return true; +} + +// Select constant vector splats. +// +// In addition to the requirements of selectVSplat(), this function returns +// true and sets Imm if: +// * The splat value is the same width as the elements of the vector +// * The splat value fits in an integer with the specified signed-ness and +// width. +// +// This function looks through ISD::BITCAST nodes. +// TODO: This might not be appropriate for big-endian MSA since BITCAST is +// sometimes a shuffle in big-endian mode. +// +// It's worth noting that this function is not used as part of the selection +// of ldi.[bhwd] since it does not permit using the wrong-typed ldi.[bhwd] +// instruction to achieve the desired bit pattern. ldi.[bhwd] is selected in +// MipsSEDAGToDAGISel::selectNode. +bool MipsSEDAGToDAGISel:: +selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed, + unsigned ImmBitSize) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat (N.getNode(), ImmValue) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + if (( Signed && ImmValue.isSignedIntN(ImmBitSize)) || + (!Signed && ImmValue.isIntN(ImmBitSize))) { + Imm = CurDAG->getTargetConstant(ImmValue, EltTy); + return true; + } + } + + return false; +} + +// Select constant vector splats. +bool MipsSEDAGToDAGISel:: +selectVSplatUimm3(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 3); +} + +// Select constant vector splats. +bool MipsSEDAGToDAGISel:: +selectVSplatUimm4(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 4); +} + +// Select constant vector splats. +bool MipsSEDAGToDAGISel:: +selectVSplatUimm5(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 5); +} + +// Select constant vector splats. +bool MipsSEDAGToDAGISel:: +selectVSplatUimm6(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 6); +} + +// Select constant vector splats. +bool MipsSEDAGToDAGISel:: +selectVSplatUimm8(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 8); +} + +// Select constant vector splats. +bool MipsSEDAGToDAGISel:: +selectVSplatSimm5(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, true, 5); +} + +// Select constant vector splats whose value is a power of 2. +// +// In addition to the requirements of selectVSplat(), this function returns +// true and sets Imm if: +// * The splat value is the same width as the elements of the vector +// * The splat value is a power of two. +// +// This function looks through ISD::BITCAST nodes. +// TODO: This might not be appropriate for big-endian MSA since BITCAST is +// sometimes a shuffle in big-endian mode. +bool MipsSEDAGToDAGISel::selectVSplatUimmPow2(SDValue N, SDValue &Imm) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat (N.getNode(), ImmValue) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + int32_t Log2 = ImmValue.exactLogBase2(); + + if (Log2 != -1) { + Imm = CurDAG->getTargetConstant(Log2, EltTy); + return true; + } + } + + return false; +} + std::pair MipsSEDAGToDAGISel::selectNode(SDNode *Node) { unsigned Opcode = Node->getOpcode(); SDLoc DL(Node); @@ -545,6 +686,82 @@ std::pair MipsSEDAGToDAGISel::selectNode(SDNode *Node) { MVT::Untyped, Ops); return std::make_pair(true, Res); } + + case ISD::BUILD_VECTOR: { + // Select appropriate ldi.[bhwd] instructions for constant splats of + // 128-bit when MSA is enabled. Fixup any register class mismatches that + // occur as a result. + // + // This allows the compiler to use a wider range of immediates than would + // otherwise be allowed. If, for example, v4i32 could only use ldi.h then + // it would not be possible to load { 0x01010101, 0x01010101, 0x01010101, + // 0x01010101 } without using a constant pool. This would be sub-optimal + // when // 'ldi.b wd, 1' is capable of producing that bit-pattern in the + // same set/ of registers. Similarly, ldi.h isn't capable of producing { + // 0x00000000, 0x00000001, 0x00000000, 0x00000001 } but 'ldi.d wd, 1' can. + + BuildVectorSDNode *BVN = cast(Node); + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + unsigned LdiOp; + EVT ResVecTy = BVN->getValueType(0); + EVT ViaVecTy; + + if (!Subtarget.hasMSA() || !BVN->getValueType(0).is128BitVector()) + return std::make_pair(false, (SDNode*)NULL); + + if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs, 8, + !Subtarget.isLittle())) + return std::make_pair(false, (SDNode*)NULL); + + switch (SplatBitSize) { + default: + return std::make_pair(false, (SDNode*)NULL); + case 8: + LdiOp = Mips::LDI_B; + ViaVecTy = MVT::v16i8; + break; + case 16: + LdiOp = Mips::LDI_H; + ViaVecTy = MVT::v8i16; + break; + case 32: + LdiOp = Mips::LDI_W; + ViaVecTy = MVT::v4i32; + break; + case 64: + LdiOp = Mips::LDI_D; + ViaVecTy = MVT::v2i64; + break; + } + + if (!SplatValue.isSignedIntN(10)) + return std::make_pair(false, (SDNode*)NULL); + + SDValue Imm = CurDAG->getTargetConstant(SplatValue, + ViaVecTy.getVectorElementType()); + + SDNode *Res = CurDAG->getMachineNode(LdiOp, SDLoc(Node), ViaVecTy, Imm); + + if (ResVecTy != ViaVecTy) { + // If LdiOp is writing to a different register class to ResVecTy, then + // fix it up here. This COPY_TO_REGCLASS should never cause a move.v + // since the source and destination register sets contain the same + // registers. + const TargetLowering *TLI = getTargetLowering(); + MVT ResVecTySimple = ResVecTy.getSimpleVT(); + const TargetRegisterClass *RC = TLI->getRegClassFor(ResVecTySimple); + Res = CurDAG->getMachineNode(Mips::COPY_TO_REGCLASS, SDLoc(Node), + ResVecTy, SDValue(Res, 0), + CurDAG->getTargetConstant(RC->getID(), + MVT::i32)); + } + + return std::make_pair(true, Res); + } + } return std::make_pair(false, (SDNode*)NULL); diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h index 22e597e..fe0da12 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.h +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -58,6 +58,26 @@ private: virtual bool selectIntAddrMM(SDValue Addr, SDValue &Base, SDValue &Offset) const; + /// \brief Select constant vector splats. + virtual bool selectVSplat(SDNode *N, APInt &Imm) const; + /// \brief Select constant vector splats whose value fits in a given integer. + virtual bool selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed, + unsigned ImmBitSize) const; + /// \brief Select constant vector splats whose value fits in a uimm3. + virtual bool selectVSplatUimm3(SDValue N, SDValue &Imm) const; + /// \brief Select constant vector splats whose value fits in a uimm4. + virtual bool selectVSplatUimm4(SDValue N, SDValue &Imm) const; + /// \brief Select constant vector splats whose value fits in a uimm5. + virtual bool selectVSplatUimm5(SDValue N, SDValue &Imm) const; + /// \brief Select constant vector splats whose value fits in a uimm6. + virtual bool selectVSplatUimm6(SDValue N, SDValue &Imm) const; + /// \brief Select constant vector splats whose value fits in a uimm8. + virtual bool selectVSplatUimm8(SDValue N, SDValue &Imm) const; + /// \brief Select constant vector splats whose value fits in a simm5. + virtual bool selectVSplatSimm5(SDValue N, SDValue &Imm) const; + /// \brief Select constant vector splats whose value is a power of 2. + virtual bool selectVSplatUimmPow2(SDValue N, SDValue &Imm) const; + virtual std::pair selectNode(SDNode *Node); virtual void processFunctionAfterISel(MachineFunction &MF); diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 1575cde..f135b5f 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -738,22 +738,11 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); SDValue NotOp; - ConstantSDNode *Const; if (ISD::isBuildVectorAllOnes(Op0.getNode())) NotOp = Op1; else if (ISD::isBuildVectorAllOnes(Op1.getNode())) NotOp = Op0; - else if ((Op0->getOpcode() == MipsISD::VSPLAT || - Op0->getOpcode() == MipsISD::VSPLATD) && - (Const = dyn_cast(Op0->getOperand(0))) && - Const->isAllOnesValue()) - NotOp = Op1; - else if ((Op1->getOpcode() == MipsISD::VSPLAT || - Op1->getOpcode() == MipsISD::VSPLATD) && - (Const = dyn_cast(Op1->getOperand(0))) && - Const->isAllOnesValue()) - NotOp = Op0; else return SDValue(); @@ -1084,14 +1073,38 @@ static SDValue lowerMSAInsertIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { return Result; } -static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG) { +static SDValue lowerMSASplatImm(SDValue Op, SDValue ImmOp, SelectionDAG &DAG) { EVT ResTy = Op->getValueType(0); + EVT ViaVecTy = ResTy; + SmallVector Ops; + SDValue ImmHiOp; + SDLoc DL(Op); - unsigned SplatOp = MipsISD::VSPLAT; - if (ResTy == MVT::v2i64) - SplatOp = MipsISD::VSPLATD; + if (ViaVecTy == MVT::v2i64) { + ImmHiOp = DAG.getNode(ISD::SRA, DL, MVT::i32, ImmOp, + DAG.getConstant(31, MVT::i32)); + for (unsigned i = 0; i < ViaVecTy.getVectorNumElements(); ++i) { + Ops.push_back(ImmHiOp); + Ops.push_back(ImmOp); + } + ViaVecTy = MVT::v4i32; + } else { + for (unsigned i = 0; i < ResTy.getVectorNumElements(); ++i) + Ops.push_back(ImmOp); + } + + SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, &Ops[0], + Ops.size()); + + if (ResTy != ViaVecTy) + Result = DAG.getNode(ISD::BITCAST, DL, ResTy, Result); + + return Result; +} - return DAG.getNode(SplatOp, SDLoc(Op), ResTy, Op->getOperand(ImmOp)); +static SDValue +lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG) { + return lowerMSASplatImm(Op, Op->getOperand(ImmOp), DAG); } static SDValue lowerMSAUnaryIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { @@ -1306,8 +1319,16 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, return lowerMSABinaryIntr(Op, DAG, ISD::FDIV); case Intrinsic::mips_fill_b: case Intrinsic::mips_fill_h: - case Intrinsic::mips_fill_w: - return lowerMSAUnaryIntr(Op, DAG, MipsISD::VSPLAT); + case Intrinsic::mips_fill_w: { + SmallVector Ops; + EVT ResTy = Op->getValueType(0); + + for (unsigned i = 0; i < ResTy.getVectorNumElements(); ++i) + Ops.push_back(Op->getOperand(1)); + + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), ResTy, &Ops[0], + Ops.size()); + } case Intrinsic::mips_flog2_w: case Intrinsic::mips_flog2_d: return lowerMSAUnaryIntr(Op, DAG, ISD::FLOG2); @@ -1331,7 +1352,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_ldi_h: case Intrinsic::mips_ldi_w: case Intrinsic::mips_ldi_d: - return lowerMSAUnaryIntr(Op, DAG, MipsISD::VSPLAT); + return lowerMSASplatImm(Op, 1, DAG); case Intrinsic::mips_max_s_b: case Intrinsic::mips_max_s_h: case Intrinsic::mips_max_s_w: @@ -1597,18 +1618,36 @@ lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { DAG.getValueType(EltTy)); } +static bool isConstantOrUndef(const SDValue Op) { + if (Op->getOpcode() == ISD::UNDEF) + return true; + if (dyn_cast(Op)) + return true; + if (dyn_cast(Op)) + return true; + return false; +} + +static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { + for (unsigned i = 0; i < Op->getNumOperands(); ++i) + if (isConstantOrUndef(Op->getOperand(i))) + return true; + return false; +} + // Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the // backend. // // Lowers according to the following rules: -// - Vectors of 128-bits may be legal subject to the other rules. Other sizes -// are not legal. -// - Non-constant splats are legal and are lowered to MipsISD::VSPLAT. -// - Constant splats with an element size of 32-bits or less are legal and are -// lowered to MipsISD::VSPLAT. -// - Constant splats with an element size of 64-bits but whose value would fit -// within a 10 bit immediate are legal and are lowered to MipsISD::VSPLATD. -// - All other ISD::BUILD_VECTORS are not legal +// - Constant splats are legal as-is as long as the SplatBitSize is a power of +// 2 less than or equal to 64 and the value fits into a signed 10-bit +// immediate +// - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize +// is a power of 2 less than or equal to 64 and the value does not fit into a +// signed 10-bit immediate +// - Non-constant splats are legal as-is. +// - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT. +// - All others are illegal and must be expanded. SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { BuildVectorSDNode *Node = cast(Op); @@ -1623,52 +1662,51 @@ SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 8, - !Subtarget->isLittle())) { - SDValue Result; - EVT TmpVecTy; - EVT ConstTy = MVT::i32; - unsigned SplatOp = MipsISD::VSPLAT; + !Subtarget->isLittle()) && SplatBitSize <= 64) { + // We can only cope with 8, 16, 32, or 64-bit elements + if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && + SplatBitSize != 64) + return SDValue(); + + // If the value fits into a simm10 then we can use ldi.[bhwd] + if (SplatValue.isSignedIntN(10)) + return Op; + + EVT ViaVecTy; switch (SplatBitSize) { default: return SDValue(); - case 64: - TmpVecTy = MVT::v2i64; - - // i64 is an illegal type on Mips32, but if it the constant fits into a - // signed 10-bit value then we can still handle it using VSPLATD and an - // i32 constant - if (HasMips64) - ConstTy = MVT::i64; - else if (isInt<10>(SplatValue.getSExtValue())) { - SplatValue = SplatValue.trunc(32); - SplatOp = MipsISD::VSPLATD; - } else - return SDValue(); - break; - case 32: - TmpVecTy = MVT::v4i32; + case 8: + ViaVecTy = MVT::v16i8; break; case 16: - TmpVecTy = MVT::v8i16; - SplatValue = SplatValue.sext(32); + ViaVecTy = MVT::v8i16; break; - case 8: - TmpVecTy = MVT::v16i8; - SplatValue = SplatValue.sext(32); + case 32: + ViaVecTy = MVT::v4i32; break; + case 64: + // There's no fill.d to fall back on for 64-bit values + return SDValue(); } - Result = DAG.getNode(SplatOp, DL, TmpVecTy, - DAG.getConstant(SplatValue, ConstTy)); - if (ResTy != Result.getValueType()) - Result = DAG.getNode(ISD::BITCAST, DL, ResTy, Result); + SmallVector Ops; + SDValue Constant = DAG.getConstant(SplatValue.sextOrSelf(32), MVT::i32); + + for (unsigned i = 0; i < ViaVecTy.getVectorNumElements(); ++i) + Ops.push_back(Constant); + + SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Node), ViaVecTy, + &Ops[0], Ops.size()); + + if (ViaVecTy != ResTy) + Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); return Result; - } - else if (isSplatVector(Node)) - return DAG.getNode(MipsISD::VSPLAT, DL, ResTy, Op->getOperand(0)); - else { + } else if (isSplatVector(Node)) + return Op; + else if (!isConstantOrUndefBUILD_VECTOR(Node)) { // Use INSERT_VECTOR_ELT operations rather than expand to stores. // The resulting code is the same length as the expansion, but it doesn't // use memory operations -- cgit v1.1 From 7e0df9aa2966d0462e34511524a4958e226b74ee Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Tue, 24 Sep 2013 14:02:15 +0000 Subject: [mips][msa] Added support for matching vshf from normal IR (i.e. not intrinsics) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191301 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 1 + lib/Target/Mips/MipsISelLowering.h | 3 ++ lib/Target/Mips/MipsMSAInstrInfo.td | 25 +++++++++-- lib/Target/Mips/MipsSEISelLowering.cpp | 78 ++++++++++++++++++++++++++++++++++ lib/Target/Mips/MipsSEISelLowering.h | 3 ++ 5 files changed, 106 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 5f01936..04f9083 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -224,6 +224,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::VEXTRACT_SEXT_ELT: return "MipsISD::VEXTRACT_SEXT_ELT"; case MipsISD::VEXTRACT_ZEXT_ELT: return "MipsISD::VEXTRACT_ZEXT_ELT"; case MipsISD::VNOR: return "MipsISD::VNOR"; + case MipsISD::VSHF: return "MipsISD::VSHF"; default: return NULL; } } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index ae82e7e..0cb67b8 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -172,6 +172,9 @@ namespace llvm { VUMAX, VUMIN, + // Vector Shuffle with mask as an operand + VSHF, // Generic shuffle + // Combined (XOR (OR $a, $b), -1) VNOR, diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 60dcdce..4909743 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -20,6 +20,9 @@ def SDT_VFSetCC : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisFP<1>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT>]>; +def SDT_VSHF : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisVec<0>, + SDTCisInt<1>, SDTCisVec<1>, + SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>; def MipsVAllNonZero : SDNode<"MipsISD::VALL_NONZERO", SDT_MipsVecCond>; def MipsVAnyNonZero : SDNode<"MipsISD::VANY_NONZERO", SDT_MipsVecCond>; @@ -35,6 +38,7 @@ def MipsVUMin : SDNode<"MipsISD::VUMIN", SDTIntBinOp, [SDNPCommutative, SDNPAssociative]>; def MipsVNOR : SDNode<"MipsISD::VNOR", SDTIntBinOp, [SDNPCommutative, SDNPAssociative]>; +def MipsVSHF : SDNode<"MipsISD::VSHF", SDT_VSHF>; def vsetcc : SDNode<"ISD::SETCC", SDT_VSetCC>; def vfsetcc : SDNode<"ISD::SETCC", SDT_VFSetCC>; @@ -1119,6 +1123,19 @@ class MSA_3R_DESC_BASE { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins RCWD:$wd_in, RCWS:$ws, RCWT:$wt); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $wt"); + list Pattern = [(set RCWD:$wd, (MipsVSHF RCWD:$wd_in, RCWS:$ws, + RCWT:$wt))]; + string Constraints = "$wd = $wd_in"; + InstrItinClass Itinerary = itin; +} + class MSA_3R_4R_DESC_BASE; class SUBVI_W_DESC : MSA_I5_DESC_BASE<"subvi.w", sub, vsplati32_uimm5, MSA128W>; class SUBVI_D_DESC : MSA_I5_DESC_BASE<"subvi.d", sub, vsplati64_uimm5, MSA128D>; -class VSHF_B_DESC : MSA_3R_DESC_BASE<"vshf.b", int_mips_vshf_b, MSA128B>; -class VSHF_H_DESC : MSA_3R_DESC_BASE<"vshf.h", int_mips_vshf_h, MSA128H>; -class VSHF_W_DESC : MSA_3R_DESC_BASE<"vshf.w", int_mips_vshf_w, MSA128W>; -class VSHF_D_DESC : MSA_3R_DESC_BASE<"vshf.d", int_mips_vshf_d, MSA128D>; +class VSHF_B_DESC : MSA_3R_VSHF_DESC_BASE<"vshf.b", MSA128B>; +class VSHF_H_DESC : MSA_3R_VSHF_DESC_BASE<"vshf.h", MSA128H>; +class VSHF_W_DESC : MSA_3R_VSHF_DESC_BASE<"vshf.w", MSA128W>; +class VSHF_D_DESC : MSA_3R_VSHF_DESC_BASE<"vshf.d", MSA128D>; class XOR_V_DESC : MSA_VEC_DESC_BASE<"xor.v", xor, MSA128B>; class XOR_V_H_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index f135b5f..929e91e 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -180,6 +180,7 @@ addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { setOperationAction(ISD::SRL, Ty, Legal); setOperationAction(ISD::SUB, Ty, Legal); setOperationAction(ISD::UDIV, Ty, Legal); + setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom); setOperationAction(ISD::VSELECT, Ty, Legal); setOperationAction(ISD::XOR, Ty, Legal); @@ -259,6 +260,7 @@ SDValue MipsSETargetLowering::LowerOperation(SDValue Op, case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG); + case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG); } return MipsTargetLowering::LowerOperation(Op, DAG); @@ -1470,6 +1472,12 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_subvi_d: return lowerMSABinaryImmIntr(Op, DAG, ISD::SUB, lowerMSASplatImm(Op, 2, DAG)); + case Intrinsic::mips_vshf_b: + case Intrinsic::mips_vshf_h: + case Intrinsic::mips_vshf_w: + case Intrinsic::mips_vshf_d: + return DAG.getNode(MipsISD::VSHF, SDLoc(Op), Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); case Intrinsic::mips_xor_v: return lowerMSABinaryIntr(Op, DAG, ISD::XOR); case Intrinsic::mips_xori_b: @@ -1727,6 +1735,76 @@ SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, return SDValue(); } +// Lower VECTOR_SHUFFLE into VSHF. +// +// This mostly consists of converting the shuffle indices in Indices into a +// BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is +// also code to eliminate unused operands of the VECTOR_SHUFFLE. For example, +// if the type is v8i16 and all the indices are less than 8 then the second +// operand is unused and can be replaced with anything. We choose to replace it +// with the used operand since this reduces the number of instructions overall. +static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + SmallVector Ops; + SDValue Op0; + SDValue Op1; + EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger(); + EVT MaskEltTy = MaskVecTy.getVectorElementType(); + bool Using1stVec = false; + bool Using2ndVec = false; + SDLoc DL(Op); + int ResTyNumElts = ResTy.getVectorNumElements(); + + for (int i = 0; i < ResTyNumElts; ++i) { + // Idx == -1 means UNDEF + int Idx = Indices[i]; + + if (0 <= Idx && Idx < ResTyNumElts) + Using1stVec = true; + if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2) + Using2ndVec = true; + } + + for (SmallVector::iterator I = Indices.begin(); I != Indices.end(); + ++I) + Ops.push_back(DAG.getTargetConstant(*I, MaskEltTy)); + + SDValue MaskVec = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVecTy, &Ops[0], + Ops.size()); + + if (Using1stVec && Using2ndVec) { + Op0 = Op->getOperand(0); + Op1 = Op->getOperand(1); + } else if (Using1stVec) + Op0 = Op1 = Op->getOperand(0); + else if (Using2ndVec) + Op0 = Op1 = Op->getOperand(1); + else + llvm_unreachable("shuffle vector mask references neither vector operand?"); + + return DAG.getNode(MipsISD::VSHF, DL, ResTy, MaskVec, Op0, Op1); +} + +// Lower VECTOR_SHUFFLE into one of a number of instructions depending on the +// indices in the shuffle. +SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, + SelectionDAG &DAG) const { + ShuffleVectorSDNode *Node = cast(Op); + EVT ResTy = Op->getValueType(0); + + if (!ResTy.is128BitVector()) + return SDValue(); + + int ResTyNumElts = ResTy.getVectorNumElements(); + SmallVector Indices; + + for (int i = 0; i < ResTyNumElts; ++i) + Indices.push_back(Node->getMaskElt(i)); + + return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); +} + MachineBasicBlock * MipsSETargetLowering:: emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{ // $bb: diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h index 644fe02..9b69fb5 100644 --- a/lib/Target/Mips/MipsSEISelLowering.h +++ b/lib/Target/Mips/MipsSEISelLowering.h @@ -75,6 +75,9 @@ namespace llvm { SDValue lowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + /// \brief Lower VECTOR_SHUFFLE into one of a number of instructions + /// depending on the indices in the shuffle. + SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; MachineBasicBlock *emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const; -- cgit v1.1 From 93d995719e2459a6e9ccdb2c93a8ede8fa88c899 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Tue, 24 Sep 2013 14:20:00 +0000 Subject: [mips][msa] Added support for matching shf from normal IR (i.e. not intrinsics) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191302 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp | 5 ++ lib/Target/Mips/InstPrinter/MipsInstPrinter.h | 1 + lib/Target/Mips/MipsISelLowering.cpp | 1 + lib/Target/Mips/MipsISelLowering.h | 1 + lib/Target/Mips/MipsMSAInstrInfo.td | 19 ++++++- lib/Target/Mips/MipsSEISelLowering.cpp | 74 +++++++++++++++++++++++++ 6 files changed, 98 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp index 26321c9..2612e3b 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp @@ -211,6 +211,11 @@ printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O) { O << MipsFCCToString((Mips::CondCode)MO.getImm()); } +void MipsInstPrinter:: +printSHFMask(const MCInst *MI, int opNum, raw_ostream &O) { + llvm_unreachable("TODO"); +} + bool MipsInstPrinter::printAlias(const char *Str, const MCInst &MI, unsigned OpNo, raw_ostream &OS) { OS << "\t" << Str << "\t"; diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h index 1253ab0..11590ad 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h @@ -96,6 +96,7 @@ private: void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O); void printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O); void printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O); + void printSHFMask(const MCInst *MI, int opNum, raw_ostream &O); bool printAlias(const char *Str, const MCInst &MI, unsigned OpNo, raw_ostream &OS); diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 04f9083..90d2cc6 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -225,6 +225,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::VEXTRACT_ZEXT_ELT: return "MipsISD::VEXTRACT_ZEXT_ELT"; case MipsISD::VNOR: return "MipsISD::VNOR"; case MipsISD::VSHF: return "MipsISD::VSHF"; + case MipsISD::SHF: return "MipsISD::SHF"; default: return NULL; } } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 0cb67b8..9e9e995 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -174,6 +174,7 @@ namespace llvm { // Vector Shuffle with mask as an operand VSHF, // Generic shuffle + SHF, // 4-element set shuffle. // Combined (XOR (OR $a, $b), -1) VNOR, diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 4909743..5592ac5 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -23,6 +23,8 @@ def SDT_VFSetCC : SDTypeProfile<1, 3, [SDTCisInt<0>, def SDT_VSHF : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisVec<0>, SDTCisInt<1>, SDTCisVec<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>; +def SDT_SHF : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>, + SDTCisVT<1, i32>, SDTCisSameAs<0, 2>]>; def MipsVAllNonZero : SDNode<"MipsISD::VALL_NONZERO", SDT_MipsVecCond>; def MipsVAnyNonZero : SDNode<"MipsISD::VANY_NONZERO", SDT_MipsVecCond>; @@ -39,6 +41,7 @@ def MipsVUMin : SDNode<"MipsISD::VUMIN", SDTIntBinOp, def MipsVNOR : SDNode<"MipsISD::VNOR", SDTIntBinOp, [SDNPCommutative, SDNPAssociative]>; def MipsVSHF : SDNode<"MipsISD::VSHF", SDT_VSHF>; +def MipsSHF : SDNode<"MipsISD::SHF", SDT_SHF>; def vsetcc : SDNode<"ISD::SETCC", SDT_VSetCC>; def vfsetcc : SDNode<"ISD::SETCC", SDT_VFSetCC>; @@ -1074,6 +1077,16 @@ class MSA_I8_X_DESC_BASE { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins RCWS:$ws, uimm8:$u8); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u8"); + list Pattern = [(set RCWD:$wd, (MipsSHF immZExt8:$u8, RCWS:$ws))]; + InstrItinClass Itinerary = itin; +} + class MSA_I10_LDI_DESC_BASE { dag OutOperandList = (outs RCWD:$wd); @@ -2066,9 +2079,9 @@ class SAT_U_H_DESC : MSA_BIT_H_DESC_BASE<"sat_u.h", int_mips_sat_u_h, MSA128H>; class SAT_U_W_DESC : MSA_BIT_W_DESC_BASE<"sat_u.w", int_mips_sat_u_w, MSA128W>; class SAT_U_D_DESC : MSA_BIT_D_DESC_BASE<"sat_u.d", int_mips_sat_u_d, MSA128D>; -class SHF_B_DESC : MSA_I8_X_DESC_BASE<"shf.b", int_mips_shf_b, MSA128B>; -class SHF_H_DESC : MSA_I8_X_DESC_BASE<"shf.h", int_mips_shf_h, MSA128H>; -class SHF_W_DESC : MSA_I8_X_DESC_BASE<"shf.w", int_mips_shf_w, MSA128W>; +class SHF_B_DESC : MSA_I8_SHF_DESC_BASE<"shf.b", MSA128B>; +class SHF_H_DESC : MSA_I8_SHF_DESC_BASE<"shf.h", MSA128H>; +class SHF_W_DESC : MSA_I8_SHF_DESC_BASE<"shf.w", MSA128W>; class SLD_B_DESC : MSA_3R_DESC_BASE<"sld.b", int_mips_sld_b, MSA128B>; class SLD_H_DESC : MSA_3R_DESC_BASE<"sld.h", int_mips_sld_h, MSA128H>; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 929e91e..b79e532 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -1428,6 +1428,11 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_pcnt_w: case Intrinsic::mips_pcnt_d: return lowerMSAUnaryIntr(Op, DAG, ISD::CTPOP); + case Intrinsic::mips_shf_b: + case Intrinsic::mips_shf_h: + case Intrinsic::mips_shf_w: + return DAG.getNode(MipsISD::SHF, SDLoc(Op), Op->getValueType(0), + Op->getOperand(2), Op->getOperand(1)); case Intrinsic::mips_sll_b: case Intrinsic::mips_sll_h: case Intrinsic::mips_sll_w: @@ -1735,6 +1740,72 @@ SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, return SDValue(); } +// Lower VECTOR_SHUFFLE into SHF (if possible). +// +// SHF splits the vector into blocks of four elements, then shuffles these +// elements according to a <4 x i2> constant (encoded as an integer immediate). +// +// It is therefore possible to lower into SHF when the mask takes the form: +// +// When undef's appear they are treated as if they were whatever value is +// necessary in order to fit the above form. +// +// For example: +// %2 = shufflevector <8 x i16> %0, <8 x i16> undef, +// <8 x i32> +// is lowered to: +// (SHF_H $w0, $w1, 27) +// where the 27 comes from: +// 3 + (2 << 2) + (1 << 4) + (0 << 6) +static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + int SHFIndices[4] = { -1, -1, -1, -1 }; + + if (Indices.size() < 4) + return SDValue(); + + for (unsigned i = 0; i < 4; ++i) { + for (unsigned j = i; j < Indices.size(); j += 4) { + int Idx = Indices[j]; + + // Convert from vector index to 4-element subvector index + // If an index refers to an element outside of the subvector then give up + if (Idx != -1) { + Idx -= 4 * (j / 4); + if (Idx < 0 || Idx >= 4) + return SDValue(); + } + + // If the mask has an undef, replace it with the current index. + // Note that it might still be undef if the current index is also undef + if (SHFIndices[i] == -1) + SHFIndices[i] = Idx; + + // Check that non-undef values are the same as in the mask. If they + // aren't then give up + if (!(Idx == -1 || Idx == SHFIndices[i])) + return SDValue(); + } + } + + // Calculate the immediate. Replace any remaining undefs with zero + APInt Imm(32, 0); + for (int i = 3; i >= 0; --i) { + int Idx = SHFIndices[i]; + + if (Idx == -1) + Idx = 0; + + Imm <<= 2; + Imm |= Idx & 0x3; + } + + return DAG.getNode(MipsISD::SHF, SDLoc(Op), ResTy, + DAG.getConstant(Imm, MVT::i32), Op->getOperand(0)); +} + // Lower VECTOR_SHUFFLE into VSHF. // // This mostly consists of converting the shuffle indices in Indices into a @@ -1802,6 +1873,9 @@ SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, for (int i = 0; i < ResTyNumElts; ++i) Indices.push_back(Node->getMaskElt(i)); + SDValue Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); } -- cgit v1.1 From f515964d36834ec918fe831029bc72ccdcec34d3 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Tue, 24 Sep 2013 14:36:12 +0000 Subject: [mips][msa] Added support for matching ilv[lr], ilvod, and ilvev from normal IR (i.e. not intrinsics) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191304 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 4 + lib/Target/Mips/MipsISelLowering.h | 4 + lib/Target/Mips/MipsMSAInstrInfo.td | 44 +++++---- lib/Target/Mips/MipsSEISelLowering.cpp | 157 +++++++++++++++++++++++++++++++++ 4 files changed, 190 insertions(+), 19 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 90d2cc6..6f5918d 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -226,6 +226,10 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::VNOR: return "MipsISD::VNOR"; case MipsISD::VSHF: return "MipsISD::VSHF"; case MipsISD::SHF: return "MipsISD::SHF"; + case MipsISD::ILVEV: return "MipsISD::ILVEV"; + case MipsISD::ILVOD: return "MipsISD::ILVOD"; + case MipsISD::ILVL: return "MipsISD::ILVL"; + case MipsISD::ILVR: return "MipsISD::ILVR"; default: return NULL; } } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 9e9e995..2fece82 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -175,6 +175,10 @@ namespace llvm { // Vector Shuffle with mask as an operand VSHF, // Generic shuffle SHF, // 4-element set shuffle. + ILVEV, // Interleave even elements + ILVOD, // Interleave odd elements + ILVL, // Interleave left elements + ILVR, // Interleave right elements // Combined (XOR (OR $a, $b), -1) VNOR, diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 5592ac5..a9c421e 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -25,6 +25,8 @@ def SDT_VSHF : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisVec<0>, SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>; def SDT_SHF : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>, SDTCisVT<1, i32>, SDTCisSameAs<0, 2>]>; +def SDT_ILV : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>, + SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>; def MipsVAllNonZero : SDNode<"MipsISD::VALL_NONZERO", SDT_MipsVecCond>; def MipsVAnyNonZero : SDNode<"MipsISD::VANY_NONZERO", SDT_MipsVecCond>; @@ -42,6 +44,10 @@ def MipsVNOR : SDNode<"MipsISD::VNOR", SDTIntBinOp, [SDNPCommutative, SDNPAssociative]>; def MipsVSHF : SDNode<"MipsISD::VSHF", SDT_VSHF>; def MipsSHF : SDNode<"MipsISD::SHF", SDT_SHF>; +def MipsILVEV : SDNode<"MipsISD::ILVEV", SDT_ILV>; +def MipsILVOD : SDNode<"MipsISD::ILVOD", SDT_ILV>; +def MipsILVL : SDNode<"MipsISD::ILVL", SDT_ILV>; +def MipsILVR : SDNode<"MipsISD::ILVR", SDT_ILV>; def vsetcc : SDNode<"ISD::SETCC", SDT_VSetCC>; def vfsetcc : SDNode<"ISD::SETCC", SDT_VFSetCC>; @@ -1833,25 +1839,25 @@ class HSUB_U_W_DESC : MSA_3R_DESC_BASE<"hsub_u.w", int_mips_hsub_u_w, MSA128W, class HSUB_U_D_DESC : MSA_3R_DESC_BASE<"hsub_u.d", int_mips_hsub_u_d, MSA128D, MSA128W, MSA128W>; -class ILVEV_B_DESC : MSA_3R_DESC_BASE<"ilvev.b", int_mips_ilvev_b, MSA128B>; -class ILVEV_H_DESC : MSA_3R_DESC_BASE<"ilvev.h", int_mips_ilvev_h, MSA128H>; -class ILVEV_W_DESC : MSA_3R_DESC_BASE<"ilvev.w", int_mips_ilvev_w, MSA128W>; -class ILVEV_D_DESC : MSA_3R_DESC_BASE<"ilvev.d", int_mips_ilvev_d, MSA128D>; - -class ILVL_B_DESC : MSA_3R_DESC_BASE<"ilvl.b", int_mips_ilvl_b, MSA128B>; -class ILVL_H_DESC : MSA_3R_DESC_BASE<"ilvl.h", int_mips_ilvl_h, MSA128H>; -class ILVL_W_DESC : MSA_3R_DESC_BASE<"ilvl.w", int_mips_ilvl_w, MSA128W>; -class ILVL_D_DESC : MSA_3R_DESC_BASE<"ilvl.d", int_mips_ilvl_d, MSA128D>; - -class ILVOD_B_DESC : MSA_3R_DESC_BASE<"ilvod.b", int_mips_ilvod_b, MSA128B>; -class ILVOD_H_DESC : MSA_3R_DESC_BASE<"ilvod.h", int_mips_ilvod_h, MSA128H>; -class ILVOD_W_DESC : MSA_3R_DESC_BASE<"ilvod.w", int_mips_ilvod_w, MSA128W>; -class ILVOD_D_DESC : MSA_3R_DESC_BASE<"ilvod.d", int_mips_ilvod_d, MSA128D>; - -class ILVR_B_DESC : MSA_3R_DESC_BASE<"ilvr.b", int_mips_ilvr_b, MSA128B>; -class ILVR_H_DESC : MSA_3R_DESC_BASE<"ilvr.h", int_mips_ilvr_h, MSA128H>; -class ILVR_W_DESC : MSA_3R_DESC_BASE<"ilvr.w", int_mips_ilvr_w, MSA128W>; -class ILVR_D_DESC : MSA_3R_DESC_BASE<"ilvr.d", int_mips_ilvr_d, MSA128D>; +class ILVEV_B_DESC : MSA_3R_DESC_BASE<"ilvev.b", MipsILVEV, MSA128B>; +class ILVEV_H_DESC : MSA_3R_DESC_BASE<"ilvev.h", MipsILVEV, MSA128H>; +class ILVEV_W_DESC : MSA_3R_DESC_BASE<"ilvev.w", MipsILVEV, MSA128W>; +class ILVEV_D_DESC : MSA_3R_DESC_BASE<"ilvev.d", MipsILVEV, MSA128D>; + +class ILVL_B_DESC : MSA_3R_DESC_BASE<"ilvl.b", MipsILVL, MSA128B>; +class ILVL_H_DESC : MSA_3R_DESC_BASE<"ilvl.h", MipsILVL, MSA128H>; +class ILVL_W_DESC : MSA_3R_DESC_BASE<"ilvl.w", MipsILVL, MSA128W>; +class ILVL_D_DESC : MSA_3R_DESC_BASE<"ilvl.d", MipsILVL, MSA128D>; + +class ILVOD_B_DESC : MSA_3R_DESC_BASE<"ilvod.b", MipsILVOD, MSA128B>; +class ILVOD_H_DESC : MSA_3R_DESC_BASE<"ilvod.h", MipsILVOD, MSA128H>; +class ILVOD_W_DESC : MSA_3R_DESC_BASE<"ilvod.w", MipsILVOD, MSA128W>; +class ILVOD_D_DESC : MSA_3R_DESC_BASE<"ilvod.d", MipsILVOD, MSA128D>; + +class ILVR_B_DESC : MSA_3R_DESC_BASE<"ilvr.b", MipsILVR, MSA128B>; +class ILVR_H_DESC : MSA_3R_DESC_BASE<"ilvr.h", MipsILVR, MSA128H>; +class ILVR_W_DESC : MSA_3R_DESC_BASE<"ilvr.w", MipsILVR, MSA128W>; +class ILVR_D_DESC : MSA_3R_DESC_BASE<"ilvr.d", MipsILVR, MSA128D>; class INSERT_B_DESC : MSA_INSERT_DESC_BASE<"insert.b", vinsert_v16i8, MSA128B, GPR32>; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index b79e532..4710e6a 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -1346,6 +1346,30 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_fsub_w: case Intrinsic::mips_fsub_d: return lowerMSABinaryIntr(Op, DAG, ISD::FSUB); + case Intrinsic::mips_ilvev_b: + case Intrinsic::mips_ilvev_h: + case Intrinsic::mips_ilvev_w: + case Intrinsic::mips_ilvev_d: + return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::mips_ilvl_b: + case Intrinsic::mips_ilvl_h: + case Intrinsic::mips_ilvl_w: + case Intrinsic::mips_ilvl_d: + return DAG.getNode(MipsISD::ILVL, SDLoc(Op), Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::mips_ilvod_b: + case Intrinsic::mips_ilvod_h: + case Intrinsic::mips_ilvod_w: + case Intrinsic::mips_ilvod_d: + return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::mips_ilvr_b: + case Intrinsic::mips_ilvr_h: + case Intrinsic::mips_ilvr_w: + case Intrinsic::mips_ilvr_d: + return DAG.getNode(MipsISD::ILVR, SDLoc(Op), Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); case Intrinsic::mips_insert_b: case Intrinsic::mips_insert_h: case Intrinsic::mips_insert_w: @@ -1806,6 +1830,127 @@ static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, DAG.getConstant(Imm, MVT::i32), Op->getOperand(0)); } +// Lower VECTOR_SHUFFLE into ILVEV (if possible). +// +// ILVEV interleaves the even elements from each vector. +// +// It is possible to lower into ILVEV when the mask takes the form: +// <0, n, 2, n+2, 4, n+4, ...> +// where n is the number of elements in the vector. +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above form. +static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert ((Indices.size() % 2) == 0); + int WsIdx = 0; + int WtIdx = ResTy.getVectorNumElements(); + + for (unsigned i = 0; i < Indices.size(); i += 2) { + if (Indices[i] != -1 && Indices[i] != WsIdx) + return SDValue(); + if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) + return SDValue(); + WsIdx += 2; + WtIdx += 2; + } + + return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), ResTy, Op->getOperand(0), + Op->getOperand(1)); +} + +// Lower VECTOR_SHUFFLE into ILVOD (if possible). +// +// ILVOD interleaves the odd elements from each vector. +// +// It is possible to lower into ILVOD when the mask takes the form: +// <1, n+1, 3, n+3, 5, n+5, ...> +// where n is the number of elements in the vector. +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above form. +static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert ((Indices.size() % 2) == 0); + int WsIdx = 1; + int WtIdx = ResTy.getVectorNumElements() + 1; + + for (unsigned i = 0; i < Indices.size(); i += 2) { + if (Indices[i] != -1 && Indices[i] != WsIdx) + return SDValue(); + if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) + return SDValue(); + WsIdx += 2; + WtIdx += 2; + } + + return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Op->getOperand(0), + Op->getOperand(1)); +} + +// Lower VECTOR_SHUFFLE into ILVL (if possible). +// +// ILVL interleaves consecutive elements from the left half of each vector. +// +// It is possible to lower into ILVL when the mask takes the form: +// <0, n, 1, n+1, 2, n+2, ...> +// where n is the number of elements in the vector. +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above form. +static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert ((Indices.size() % 2) == 0); + int WsIdx = 0; + int WtIdx = ResTy.getVectorNumElements(); + + for (unsigned i = 0; i < Indices.size(); i += 2) { + if (Indices[i] != -1 && Indices[i] != WsIdx) + return SDValue(); + if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) + return SDValue(); + WsIdx ++; + WtIdx ++; + } + + return DAG.getNode(MipsISD::ILVL, SDLoc(Op), ResTy, Op->getOperand(0), + Op->getOperand(1)); +} + +// Lower VECTOR_SHUFFLE into ILVR (if possible). +// +// ILVR interleaves consecutive elements from the right half of each vector. +// +// It is possible to lower into ILVR when the mask takes the form: +// +// where n is the number of elements in the vector and x is half n. +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above form. +static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert ((Indices.size() % 2) == 0); + unsigned NumElts = ResTy.getVectorNumElements(); + int WsIdx = NumElts / 2; + int WtIdx = NumElts + NumElts / 2; + + for (unsigned i = 0; i < Indices.size(); i += 2) { + if (Indices[i] != -1 && Indices[i] != WsIdx) + return SDValue(); + if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) + return SDValue(); + WsIdx ++; + WtIdx ++; + } + + return DAG.getNode(MipsISD::ILVR, SDLoc(Op), ResTy, Op->getOperand(0), + Op->getOperand(1)); +} + // Lower VECTOR_SHUFFLE into VSHF. // // This mostly consists of converting the shuffle indices in Indices into a @@ -1876,6 +2021,18 @@ SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, SDValue Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG); if (Result.getNode()) return Result; + Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); } -- cgit v1.1 From 3706eda52c4565016959902a3f5aaf7271516286 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Tue, 24 Sep 2013 14:53:25 +0000 Subject: [mips][msa] Added support for matching pckev, and pckod from normal IR (i.e. not intrinsics) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191306 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 2 + lib/Target/Mips/MipsISelLowering.h | 2 + lib/Target/Mips/MipsMSAInstrInfo.td | 18 +++++---- lib/Target/Mips/MipsSEISelLowering.cpp | 70 ++++++++++++++++++++++++++++++++++ 4 files changed, 84 insertions(+), 8 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 6f5918d..8fb1efe 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -230,6 +230,8 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::ILVOD: return "MipsISD::ILVOD"; case MipsISD::ILVL: return "MipsISD::ILVL"; case MipsISD::ILVR: return "MipsISD::ILVR"; + case MipsISD::PCKEV: return "MipsISD::PCKEV"; + case MipsISD::PCKOD: return "MipsISD::PCKOD"; default: return NULL; } } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 2fece82..2021114 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -179,6 +179,8 @@ namespace llvm { ILVOD, // Interleave odd elements ILVL, // Interleave left elements ILVR, // Interleave right elements + PCKEV, // Pack even elements + PCKOD, // Pack odd elements // Combined (XOR (OR $a, $b), -1) VNOR, diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index a9c421e..3fd52c6 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -48,6 +48,8 @@ def MipsILVEV : SDNode<"MipsISD::ILVEV", SDT_ILV>; def MipsILVOD : SDNode<"MipsISD::ILVOD", SDT_ILV>; def MipsILVL : SDNode<"MipsISD::ILVL", SDT_ILV>; def MipsILVR : SDNode<"MipsISD::ILVR", SDT_ILV>; +def MipsPCKEV : SDNode<"MipsISD::PCKEV", SDT_ILV>; +def MipsPCKOD : SDNode<"MipsISD::PCKOD", SDT_ILV>; def vsetcc : SDNode<"ISD::SETCC", SDT_VSetCC>; def vfsetcc : SDNode<"ISD::SETCC", SDT_VFSetCC>; @@ -2060,15 +2062,15 @@ class OR_V_D_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class ORI_B_DESC : MSA_I8_DESC_BASE<"ori.b", or, vsplati8_uimm8, MSA128B>; -class PCKEV_B_DESC : MSA_3R_DESC_BASE<"pckev.b", int_mips_pckev_b, MSA128B>; -class PCKEV_H_DESC : MSA_3R_DESC_BASE<"pckev.h", int_mips_pckev_h, MSA128H>; -class PCKEV_W_DESC : MSA_3R_DESC_BASE<"pckev.w", int_mips_pckev_w, MSA128W>; -class PCKEV_D_DESC : MSA_3R_DESC_BASE<"pckev.d", int_mips_pckev_d, MSA128D>; +class PCKEV_B_DESC : MSA_3R_DESC_BASE<"pckev.b", MipsPCKEV, MSA128B>; +class PCKEV_H_DESC : MSA_3R_DESC_BASE<"pckev.h", MipsPCKEV, MSA128H>; +class PCKEV_W_DESC : MSA_3R_DESC_BASE<"pckev.w", MipsPCKEV, MSA128W>; +class PCKEV_D_DESC : MSA_3R_DESC_BASE<"pckev.d", MipsPCKEV, MSA128D>; -class PCKOD_B_DESC : MSA_3R_DESC_BASE<"pckod.b", int_mips_pckod_b, MSA128B>; -class PCKOD_H_DESC : MSA_3R_DESC_BASE<"pckod.h", int_mips_pckod_h, MSA128H>; -class PCKOD_W_DESC : MSA_3R_DESC_BASE<"pckod.w", int_mips_pckod_w, MSA128W>; -class PCKOD_D_DESC : MSA_3R_DESC_BASE<"pckod.d", int_mips_pckod_d, MSA128D>; +class PCKOD_B_DESC : MSA_3R_DESC_BASE<"pckod.b", MipsPCKOD, MSA128B>; +class PCKOD_H_DESC : MSA_3R_DESC_BASE<"pckod.h", MipsPCKOD, MSA128H>; +class PCKOD_W_DESC : MSA_3R_DESC_BASE<"pckod.w", MipsPCKOD, MSA128W>; +class PCKOD_D_DESC : MSA_3R_DESC_BASE<"pckod.d", MipsPCKOD, MSA128D>; class PCNT_B_DESC : MSA_2R_DESC_BASE<"pcnt.b", ctpop, MSA128B>; class PCNT_H_DESC : MSA_2R_DESC_BASE<"pcnt.h", ctpop, MSA128H>; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 4710e6a..e86f4cb 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -1447,6 +1447,18 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_ori_b: return lowerMSABinaryImmIntr(Op, DAG, ISD::OR, lowerMSASplatImm(Op, 2, DAG)); + case Intrinsic::mips_pckev_b: + case Intrinsic::mips_pckev_h: + case Intrinsic::mips_pckev_w: + case Intrinsic::mips_pckev_d: + return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::mips_pckod_b: + case Intrinsic::mips_pckod_h: + case Intrinsic::mips_pckod_w: + case Intrinsic::mips_pckod_d: + return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); case Intrinsic::mips_pcnt_b: case Intrinsic::mips_pcnt_h: case Intrinsic::mips_pcnt_w: @@ -1951,6 +1963,58 @@ static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy, Op->getOperand(1)); } +// Lower VECTOR_SHUFFLE into PCKEV (if possible). +// +// PCKEV copies the even elements of each vector into the result vector. +// +// It is possible to lower into PCKEV when the mask takes the form: +// <0, 2, 4, ..., n, n+2, n+4, ...> +// where n is the number of elements in the vector. +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above form. +static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert ((Indices.size() % 2) == 0); + int Idx = 0; + + for (unsigned i = 0; i < Indices.size(); ++i) { + if (Indices[i] != -1 && Indices[i] != Idx) + return SDValue(); + Idx += 2; + } + + return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), ResTy, Op->getOperand(0), + Op->getOperand(1)); +} + +// Lower VECTOR_SHUFFLE into PCKOD (if possible). +// +// PCKOD copies the odd elements of each vector into the result vector. +// +// It is possible to lower into PCKOD when the mask takes the form: +// <1, 3, 5, ..., n+1, n+3, n+5, ...> +// where n is the number of elements in the vector. +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above form. +static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert ((Indices.size() % 2) == 0); + int Idx = 1; + + for (unsigned i = 0; i < Indices.size(); ++i) { + if (Indices[i] != -1 && Indices[i] != Idx) + return SDValue(); + Idx += 2; + } + + return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), ResTy, Op->getOperand(0), + Op->getOperand(1)); +} + // Lower VECTOR_SHUFFLE into VSHF. // // This mostly consists of converting the shuffle indices in Indices into a @@ -2033,6 +2097,12 @@ SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG); if (Result.getNode()) return Result; + Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); } -- cgit v1.1 From 4da61345ec553a4f3ab3cedf6f21452374f1fda1 Mon Sep 17 00:00:00 2001 From: Yunzhong Gao Date: Tue, 24 Sep 2013 18:21:52 +0000 Subject: Adding a feature flag to the llvm backend for x86 TBM instruction set. Adding TBM feature to bdver2 processor; piledriver supports this instruction set according to the following document: http://developer.amd.com/wordpress/media/2012/10/New-Bulldozer-and-Piledriver-Instructions.pdf Phabricator code review is located here: http://llvm-reviews.chandlerc.com/D1692 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191324 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86.td | 5 ++++- lib/Target/X86/X86Subtarget.cpp | 5 +++++ lib/Target/X86/X86Subtarget.h | 4 ++++ 3 files changed, 13 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 82716f8..0b49d61 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -117,6 +117,8 @@ def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem", def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", "Enable AES instructions", [FeatureSSE2]>; +def FeatureTBM : SubtargetFeature<"tbm", "HasTBM", "true", + "Enable TBM instructions">; def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true", "Support MOVBE instruction">; def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true", @@ -304,7 +306,8 @@ def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAES, FeaturePCLMUL, FeatureF16C, FeatureLZCNT, - FeaturePOPCNT, FeatureBMI, FeatureFMA]>; + FeaturePOPCNT, FeatureBMI, FeatureTBM, + FeatureFMA]>; def : Proc<"geode", [Feature3DNowA]>; def : Proc<"winchip-c6", [FeatureMMX]>; diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index f5b2614..05db662 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -384,6 +384,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { ToggleFeature(X86::FeatureSHA); } } + if (IsAMD && ((ECX >> 21) & 0x1)) { + HasTBM = true; + ToggleFeature(X86::FeatureTBM); + } } } @@ -492,6 +496,7 @@ void X86Subtarget::initializeEnvironment() { HasFMA = false; HasFMA4 = false; HasXOP = false; + HasTBM = false; HasMOVBE = false; HasRDRAND = false; HasF16C = false; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 90378fc..21c2d57 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -97,6 +97,9 @@ protected: /// HasXOP - Target has XOP instructions bool HasXOP; + /// HasTBM - Target has TBM instructions. + bool HasTBM; + /// HasMOVBE - True if the processor has the MOVBE instruction. bool HasMOVBE; @@ -274,6 +277,7 @@ public: // FIXME: Favor FMA when both are enabled. Is this the right thing to do? bool hasFMA4() const { return HasFMA4 && !HasFMA; } bool hasXOP() const { return HasXOP; } + bool hasTBM() const { return HasTBM; } bool hasMOVBE() const { return HasMOVBE; } bool hasRDRAND() const { return HasRDRAND; } bool hasF16C() const { return HasF16C; } -- cgit v1.1 From dfca6eec3171802d6fcb091da01604ef4420fb3b Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Wed, 25 Sep 2013 00:26:16 +0000 Subject: CriticalAntiDepBreaker is no longer needed for armv7 scheduling. This is being disabled because it is no longer needed for performance. It is only used by postRAscheduler which is also planned for removal, and it is implemented with an out-dated view of register liveness. It consideres aliases instead of register units, assumes valid kill flags, and assumes implicit uses on partial register defs. Kill flags and implicit operands are error prone and impossible to verify. We should gradually eliminate dependence on them in the postRA phases. Targets that still benefit from this should move to the MI scheduler. If that doesn't solve the problem, then we should add a hook to regalloc to optimize reload placement. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191348 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMSubtarget.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 4589a43..3b34a2b 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -282,8 +282,6 @@ bool ARMSubtarget::enablePostRAScheduler( CodeGenOpt::Level OptLevel, TargetSubtargetInfo::AntiDepBreakMode& Mode, RegClassVector& CriticalPathRCs) const { - Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; - CriticalPathRCs.clear(); - CriticalPathRCs.push_back(&ARM::GPRRegClass); + Mode = TargetSubtargetInfo::ANTIDEP_NONE; return PostRAScheduler && OptLevel >= CodeGenOpt::Default; } -- cgit v1.1 From 793803449870a661c1a09e400df9b04492772196 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Wed, 25 Sep 2013 00:30:25 +0000 Subject: [mips] Define getTargetNode as a template function. No intended functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191350 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16ISelLowering.cpp | 4 +- lib/Target/Mips/MipsISelLowering.cpp | 134 ++++++++++++++++++++------------- lib/Target/Mips/MipsISelLowering.h | 11 ++- 3 files changed, 94 insertions(+), 55 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp index c8d0a52..89f3d3b 100644 --- a/lib/Target/Mips/Mips16ISelLowering.cpp +++ b/lib/Target/Mips/Mips16ISelLowering.cpp @@ -474,7 +474,9 @@ getOpndList(SmallVectorImpl &Ops, if (NeedMips16Helper) { RegsToPass.push_front(std::make_pair(V0Reg, Callee)); JumpTarget = DAG.getExternalSymbol(Mips16HelperFunction, getPointerTy()); - JumpTarget = getAddrGlobal(JumpTarget, DAG, MipsII::MO_GOT); + JumpTarget = getAddrGlobal(cast(JumpTarget), + JumpTarget.getValueType(), DAG, + MipsII::MO_GOT); } else RegsToPass.push_front(std::make_pair((unsigned)Mips::T9, Callee)); } diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 8fb1efe..8f93ed3 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -80,70 +80,91 @@ SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const { return DAG.getRegister(FI->getGlobalBaseReg(), Ty); } -static SDValue getTargetNode(SDValue Op, SelectionDAG &DAG, unsigned Flag) { - EVT Ty = Op.getValueType(); - - if (GlobalAddressSDNode *N = dyn_cast(Op)) - return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(Op), Ty, 0, - Flag); - if (ExternalSymbolSDNode *N = dyn_cast(Op)) - return DAG.getTargetExternalSymbol(N->getSymbol(), Ty, Flag); - if (BlockAddressSDNode *N = dyn_cast(Op)) - return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag); - if (JumpTableSDNode *N = dyn_cast(Op)) - return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag); - if (ConstantPoolSDNode *N = dyn_cast(Op)) - return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(), - N->getOffset(), Flag); - +template +static SDValue getTargetNode(NodeTy *Node, EVT Ty, SelectionDAG &DAG, + unsigned Flag) { llvm_unreachable("Unexpected node type."); return SDValue(); } -static SDValue getAddrNonPIC(SDValue Op, SelectionDAG &DAG) { - SDLoc DL(Op); - EVT Ty = Op.getValueType(); - SDValue Hi = getTargetNode(Op, DAG, MipsII::MO_ABS_HI); - SDValue Lo = getTargetNode(Op, DAG, MipsII::MO_ABS_LO); +template<> +SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, + SelectionDAG &DAG, unsigned Flag) { + return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag); +} + +template<> +SDValue getTargetNode(ExternalSymbolSDNode *N, EVT Ty, + SelectionDAG &DAG, unsigned Flag) { + return DAG.getTargetExternalSymbol(N->getSymbol(), Ty, Flag); +} + +template<> +SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, + SelectionDAG &DAG, unsigned Flag) { + return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag); +} + +template<> +SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, + SelectionDAG &DAG, unsigned Flag) { + return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag); +} + +template<> +SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, + SelectionDAG &DAG, unsigned Flag) { + return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(), + N->getOffset(), Flag); +} + +template +static SDValue getAddrNonPIC(NodeTy *N, EVT Ty, SelectionDAG &DAG) { + SDLoc DL(N); + SDValue Hi = getTargetNode(N, Ty, DAG, MipsII::MO_ABS_HI); + SDValue Lo = getTargetNode(N, Ty, DAG, MipsII::MO_ABS_LO); return DAG.getNode(ISD::ADD, DL, Ty, DAG.getNode(MipsISD::Hi, DL, Ty, Hi), DAG.getNode(MipsISD::Lo, DL, Ty, Lo)); } -SDValue MipsTargetLowering::getAddrLocal(SDValue Op, SelectionDAG &DAG, +template +SDValue MipsTargetLowering::getAddrLocal(NodeTy *N, EVT Ty, SelectionDAG &DAG, bool HasMips64) const { - SDLoc DL(Op); - EVT Ty = Op.getValueType(); + SDLoc DL(N); unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; SDValue GOT = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty), - getTargetNode(Op, DAG, GOTFlag)); + getTargetNode(N, Ty, DAG, GOTFlag)); SDValue Load = DAG.getLoad(Ty, DL, DAG.getEntryNode(), GOT, MachinePointerInfo::getGOT(), false, false, false, 0); unsigned LoFlag = HasMips64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; - SDValue Lo = DAG.getNode(MipsISD::Lo, DL, Ty, getTargetNode(Op, DAG, LoFlag)); + SDValue Lo = DAG.getNode(MipsISD::Lo, DL, Ty, + getTargetNode(N, Ty, DAG, LoFlag)); return DAG.getNode(ISD::ADD, DL, Ty, Load, Lo); } -SDValue MipsTargetLowering::getAddrGlobal(SDValue Op, SelectionDAG &DAG, +template +SDValue MipsTargetLowering::getAddrGlobal(NodeTy *N, EVT Ty, SelectionDAG &DAG, unsigned Flag) const { - SDLoc DL(Op); - EVT Ty = Op.getValueType(); + SDLoc DL(N); SDValue Tgt = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty), - getTargetNode(Op, DAG, Flag)); + getTargetNode(N, Ty, DAG, Flag)); return DAG.getLoad(Ty, DL, DAG.getEntryNode(), Tgt, MachinePointerInfo::getGOT(), false, false, false, 0); } -SDValue MipsTargetLowering::getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG, +template +SDValue MipsTargetLowering::getAddrGlobalLargeGOT(NodeTy *N, EVT Ty, + SelectionDAG &DAG, unsigned HiFlag, unsigned LoFlag) const { - SDLoc DL(Op); - EVT Ty = Op.getValueType(); - SDValue Hi = DAG.getNode(MipsISD::Hi, DL, Ty, getTargetNode(Op, DAG, HiFlag)); + SDLoc DL(N); + SDValue Hi = DAG.getNode(MipsISD::Hi, DL, Ty, + getTargetNode(N, Ty, DAG, HiFlag)); Hi = DAG.getNode(ISD::ADD, DL, Ty, Hi, getGlobalReg(DAG, Ty)); SDValue Wrapper = DAG.getNode(MipsISD::Wrapper, DL, Ty, Hi, - getTargetNode(Op, DAG, LoFlag)); + getTargetNode(N, Ty, DAG, LoFlag)); return DAG.getLoad(Ty, DL, DAG.getEntryNode(), Wrapper, MachinePointerInfo::getGOT(), false, false, false, 0); } @@ -1478,7 +1499,9 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { // FIXME there isn't actually debug info here SDLoc DL(Op); - const GlobalValue *GV = cast(Op)->getGlobal(); + EVT Ty = Op.getValueType(); + GlobalAddressSDNode *N = cast(Op); + const GlobalValue *GV = N->getGlobal(); if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) { const MipsTargetObjectFile &TLOF = @@ -1495,26 +1518,29 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op, } // %hi/%lo relocation - return getAddrNonPIC(Op, DAG); + return getAddrNonPIC(N, Ty, DAG); } if (GV->hasInternalLinkage() || (GV->hasLocalLinkage() && !isa(GV))) - return getAddrLocal(Op, DAG, HasMips64); + return getAddrLocal(N, Ty, DAG, HasMips64); if (LargeGOT) - return getAddrGlobalLargeGOT(Op, DAG, MipsII::MO_GOT_HI16, + return getAddrGlobalLargeGOT(N, Ty, DAG, MipsII::MO_GOT_HI16, MipsII::MO_GOT_LO16); - return getAddrGlobal(Op, DAG, + return getAddrGlobal(N, Ty, DAG, HasMips64 ? MipsII::MO_GOT_DISP : MipsII::MO_GOT16); } SDValue MipsTargetLowering::lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { + BlockAddressSDNode *N = cast(Op); + EVT Ty = Op.getValueType(); + if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) - return getAddrNonPIC(Op, DAG); + return getAddrNonPIC(N, Ty, DAG); - return getAddrLocal(Op, DAG, HasMips64); + return getAddrLocal(N, Ty, DAG, HasMips64); } SDValue MipsTargetLowering:: @@ -1601,10 +1627,13 @@ lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const SDValue MipsTargetLowering:: lowerJumpTable(SDValue Op, SelectionDAG &DAG) const { + JumpTableSDNode *N = cast(Op); + EVT Ty = Op.getValueType(); + if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) - return getAddrNonPIC(Op, DAG); + return getAddrNonPIC(N, Ty, DAG); - return getAddrLocal(Op, DAG, HasMips64); + return getAddrLocal(N, Ty, DAG, HasMips64); } SDValue MipsTargetLowering:: @@ -1619,11 +1648,13 @@ lowerConstantPool(SDValue Op, SelectionDAG &DAG) const // SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, MVT::i32, CP); // SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32); // ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode); + ConstantPoolSDNode *N = cast(Op); + EVT Ty = Op.getValueType(); if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) - return getAddrNonPIC(Op, DAG); + return getAddrNonPIC(N, Ty, DAG); - return getAddrLocal(Op, DAG, HasMips64); + return getAddrLocal(N, Ty, DAG, HasMips64); } SDValue MipsTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { @@ -2469,18 +2500,19 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool IsPICCall = (IsN64 || IsPIC); // true if calls are translated to jalr $25 bool GlobalOrExternal = false, InternalLinkage = false; SDValue CalleeLo; + EVT Ty = Callee.getValueType(); if (GlobalAddressSDNode *G = dyn_cast(Callee)) { if (IsPICCall) { InternalLinkage = G->getGlobal()->hasInternalLinkage(); if (InternalLinkage) - Callee = getAddrLocal(Callee, DAG, HasMips64); + Callee = getAddrLocal(G, Ty, DAG, HasMips64); else if (LargeGOT) - Callee = getAddrGlobalLargeGOT(Callee, DAG, MipsII::MO_CALL_HI16, + Callee = getAddrGlobalLargeGOT(G, Ty, DAG, MipsII::MO_CALL_HI16, MipsII::MO_CALL_LO16); else - Callee = getAddrGlobal(Callee, DAG, MipsII::MO_GOT_CALL); + Callee = getAddrGlobal(G, Ty, DAG, MipsII::MO_GOT_CALL); } else Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy(), 0, MipsII::MO_NO_FLAG); @@ -2491,10 +2523,10 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(), MipsII::MO_NO_FLAG); else if (LargeGOT) - Callee = getAddrGlobalLargeGOT(Callee, DAG, MipsII::MO_CALL_HI16, + Callee = getAddrGlobalLargeGOT(S, Ty, DAG, MipsII::MO_CALL_HI16, MipsII::MO_CALL_LO16); else // N64 || PIC - Callee = getAddrGlobal(Callee, DAG, MipsII::MO_GOT_CALL); + Callee = getAddrGlobal(S, Ty, DAG, MipsII::MO_GOT_CALL); GlobalOrExternal = true; } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 2021114..f778d36 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -248,11 +248,16 @@ namespace llvm { protected: SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) const; - SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) const; + template + SDValue getAddrLocal(NodeTy *N, EVT Ty, SelectionDAG &DAG, + bool HasMips64) const; - SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) const; + template + SDValue getAddrGlobal(NodeTy *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; - SDValue getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG, + template + SDValue getAddrGlobalLargeGOT(NodeTy *N, EVT Ty, SelectionDAG &DAG, unsigned HiFlag, unsigned LoFlag) const; /// This function fills Ops, which is the list of operands that will later -- cgit v1.1 From d47aa3adbef5ee2343b61c96292454f3a5b77dbc Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Wed, 25 Sep 2013 00:34:42 +0000 Subject: [mips] Move public functions to the beginning of the class definition. No intended functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191352 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMachineFunction.h | 66 +++++++++++++++++------------------ 1 file changed, 33 insertions(+), 33 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index b05b348..36c2e59 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -26,39 +26,6 @@ namespace llvm { /// MipsFunctionInfo - This class is derived from MachineFunction private /// Mips target-specific information for each MachineFunction. class MipsFunctionInfo : public MachineFunctionInfo { - virtual void anchor(); - - MachineFunction& MF; - /// SRetReturnReg - Some subtargets require that sret lowering includes - /// returning the value of the returned struct in a register. This field - /// holds the virtual register into which the sret argument is passed. - unsigned SRetReturnReg; - - /// GlobalBaseReg - keeps track of the virtual register initialized for - /// use as the global base register. This is used for PIC in some PIC - /// relocation models. - unsigned GlobalBaseReg; - - /// Mips16SPAliasReg - keeps track of the virtual register initialized for - /// use as an alias for SP for use in load/store of halfword/byte from/to - /// the stack - unsigned Mips16SPAliasReg; - - /// VarArgsFrameIndex - FrameIndex for start of varargs area. - int VarArgsFrameIndex; - - /// True if function has a byval argument. - bool HasByvalArg; - - /// Size of incoming argument area. - unsigned IncomingArgSize; - - /// CallsEhReturn - Whether the function calls llvm.eh.return. - bool CallsEhReturn; - - /// Frame objects for spilling eh data registers. - int EhDataRegFI[4]; - public: MipsFunctionInfo(MachineFunction& MF) : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), Mips16SPAliasReg(0), @@ -92,6 +59,39 @@ public: int getEhDataRegFI(unsigned Reg) const { return EhDataRegFI[Reg]; } bool isEhDataRegFI(int FI) const; +private: + virtual void anchor(); + + MachineFunction& MF; + /// SRetReturnReg - Some subtargets require that sret lowering includes + /// returning the value of the returned struct in a register. This field + /// holds the virtual register into which the sret argument is passed. + unsigned SRetReturnReg; + + /// GlobalBaseReg - keeps track of the virtual register initialized for + /// use as the global base register. This is used for PIC in some PIC + /// relocation models. + unsigned GlobalBaseReg; + + /// Mips16SPAliasReg - keeps track of the virtual register initialized for + /// use as an alias for SP for use in load/store of halfword/byte from/to + /// the stack + unsigned Mips16SPAliasReg; + + /// VarArgsFrameIndex - FrameIndex for start of varargs area. + int VarArgsFrameIndex; + + /// True if function has a byval argument. + bool HasByvalArg; + + /// Size of incoming argument area. + unsigned IncomingArgSize; + + /// CallsEhReturn - Whether the function calls llvm.eh.return. + bool CallsEhReturn; + + /// Frame objects for spilling eh data registers. + int EhDataRegFI[4]; }; } // end of namespace llvm -- cgit v1.1 From e36a62c23d332658e4513d67eedb392b9c27f470 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Wed, 25 Sep 2013 00:52:34 +0000 Subject: Revert r191350. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191353 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16ISelLowering.cpp | 4 +- lib/Target/Mips/MipsISelLowering.cpp | 134 +++++++++++++-------------------- lib/Target/Mips/MipsISelLowering.h | 11 +-- 3 files changed, 55 insertions(+), 94 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp index 89f3d3b..c8d0a52 100644 --- a/lib/Target/Mips/Mips16ISelLowering.cpp +++ b/lib/Target/Mips/Mips16ISelLowering.cpp @@ -474,9 +474,7 @@ getOpndList(SmallVectorImpl &Ops, if (NeedMips16Helper) { RegsToPass.push_front(std::make_pair(V0Reg, Callee)); JumpTarget = DAG.getExternalSymbol(Mips16HelperFunction, getPointerTy()); - JumpTarget = getAddrGlobal(cast(JumpTarget), - JumpTarget.getValueType(), DAG, - MipsII::MO_GOT); + JumpTarget = getAddrGlobal(JumpTarget, DAG, MipsII::MO_GOT); } else RegsToPass.push_front(std::make_pair((unsigned)Mips::T9, Callee)); } diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 8f93ed3..8fb1efe 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -80,91 +80,70 @@ SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const { return DAG.getRegister(FI->getGlobalBaseReg(), Ty); } -template -static SDValue getTargetNode(NodeTy *Node, EVT Ty, SelectionDAG &DAG, - unsigned Flag) { - llvm_unreachable("Unexpected node type."); - return SDValue(); -} - -template<> -SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, - SelectionDAG &DAG, unsigned Flag) { - return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag); -} - -template<> -SDValue getTargetNode(ExternalSymbolSDNode *N, EVT Ty, - SelectionDAG &DAG, unsigned Flag) { - return DAG.getTargetExternalSymbol(N->getSymbol(), Ty, Flag); -} - -template<> -SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, - SelectionDAG &DAG, unsigned Flag) { - return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag); -} +static SDValue getTargetNode(SDValue Op, SelectionDAG &DAG, unsigned Flag) { + EVT Ty = Op.getValueType(); -template<> -SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, - SelectionDAG &DAG, unsigned Flag) { - return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag); -} + if (GlobalAddressSDNode *N = dyn_cast(Op)) + return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(Op), Ty, 0, + Flag); + if (ExternalSymbolSDNode *N = dyn_cast(Op)) + return DAG.getTargetExternalSymbol(N->getSymbol(), Ty, Flag); + if (BlockAddressSDNode *N = dyn_cast(Op)) + return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag); + if (JumpTableSDNode *N = dyn_cast(Op)) + return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag); + if (ConstantPoolSDNode *N = dyn_cast(Op)) + return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(), + N->getOffset(), Flag); -template<> -SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, - SelectionDAG &DAG, unsigned Flag) { - return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(), - N->getOffset(), Flag); + llvm_unreachable("Unexpected node type."); + return SDValue(); } -template -static SDValue getAddrNonPIC(NodeTy *N, EVT Ty, SelectionDAG &DAG) { - SDLoc DL(N); - SDValue Hi = getTargetNode(N, Ty, DAG, MipsII::MO_ABS_HI); - SDValue Lo = getTargetNode(N, Ty, DAG, MipsII::MO_ABS_LO); +static SDValue getAddrNonPIC(SDValue Op, SelectionDAG &DAG) { + SDLoc DL(Op); + EVT Ty = Op.getValueType(); + SDValue Hi = getTargetNode(Op, DAG, MipsII::MO_ABS_HI); + SDValue Lo = getTargetNode(Op, DAG, MipsII::MO_ABS_LO); return DAG.getNode(ISD::ADD, DL, Ty, DAG.getNode(MipsISD::Hi, DL, Ty, Hi), DAG.getNode(MipsISD::Lo, DL, Ty, Lo)); } -template -SDValue MipsTargetLowering::getAddrLocal(NodeTy *N, EVT Ty, SelectionDAG &DAG, +SDValue MipsTargetLowering::getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) const { - SDLoc DL(N); + SDLoc DL(Op); + EVT Ty = Op.getValueType(); unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; SDValue GOT = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty), - getTargetNode(N, Ty, DAG, GOTFlag)); + getTargetNode(Op, DAG, GOTFlag)); SDValue Load = DAG.getLoad(Ty, DL, DAG.getEntryNode(), GOT, MachinePointerInfo::getGOT(), false, false, false, 0); unsigned LoFlag = HasMips64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; - SDValue Lo = DAG.getNode(MipsISD::Lo, DL, Ty, - getTargetNode(N, Ty, DAG, LoFlag)); + SDValue Lo = DAG.getNode(MipsISD::Lo, DL, Ty, getTargetNode(Op, DAG, LoFlag)); return DAG.getNode(ISD::ADD, DL, Ty, Load, Lo); } -template -SDValue MipsTargetLowering::getAddrGlobal(NodeTy *N, EVT Ty, SelectionDAG &DAG, +SDValue MipsTargetLowering::getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) const { - SDLoc DL(N); + SDLoc DL(Op); + EVT Ty = Op.getValueType(); SDValue Tgt = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty), - getTargetNode(N, Ty, DAG, Flag)); + getTargetNode(Op, DAG, Flag)); return DAG.getLoad(Ty, DL, DAG.getEntryNode(), Tgt, MachinePointerInfo::getGOT(), false, false, false, 0); } -template -SDValue MipsTargetLowering::getAddrGlobalLargeGOT(NodeTy *N, EVT Ty, - SelectionDAG &DAG, +SDValue MipsTargetLowering::getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG, unsigned HiFlag, unsigned LoFlag) const { - SDLoc DL(N); - SDValue Hi = DAG.getNode(MipsISD::Hi, DL, Ty, - getTargetNode(N, Ty, DAG, HiFlag)); + SDLoc DL(Op); + EVT Ty = Op.getValueType(); + SDValue Hi = DAG.getNode(MipsISD::Hi, DL, Ty, getTargetNode(Op, DAG, HiFlag)); Hi = DAG.getNode(ISD::ADD, DL, Ty, Hi, getGlobalReg(DAG, Ty)); SDValue Wrapper = DAG.getNode(MipsISD::Wrapper, DL, Ty, Hi, - getTargetNode(N, Ty, DAG, LoFlag)); + getTargetNode(Op, DAG, LoFlag)); return DAG.getLoad(Ty, DL, DAG.getEntryNode(), Wrapper, MachinePointerInfo::getGOT(), false, false, false, 0); } @@ -1499,9 +1478,7 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { // FIXME there isn't actually debug info here SDLoc DL(Op); - EVT Ty = Op.getValueType(); - GlobalAddressSDNode *N = cast(Op); - const GlobalValue *GV = N->getGlobal(); + const GlobalValue *GV = cast(Op)->getGlobal(); if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) { const MipsTargetObjectFile &TLOF = @@ -1518,29 +1495,26 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op, } // %hi/%lo relocation - return getAddrNonPIC(N, Ty, DAG); + return getAddrNonPIC(Op, DAG); } if (GV->hasInternalLinkage() || (GV->hasLocalLinkage() && !isa(GV))) - return getAddrLocal(N, Ty, DAG, HasMips64); + return getAddrLocal(Op, DAG, HasMips64); if (LargeGOT) - return getAddrGlobalLargeGOT(N, Ty, DAG, MipsII::MO_GOT_HI16, + return getAddrGlobalLargeGOT(Op, DAG, MipsII::MO_GOT_HI16, MipsII::MO_GOT_LO16); - return getAddrGlobal(N, Ty, DAG, + return getAddrGlobal(Op, DAG, HasMips64 ? MipsII::MO_GOT_DISP : MipsII::MO_GOT16); } SDValue MipsTargetLowering::lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { - BlockAddressSDNode *N = cast(Op); - EVT Ty = Op.getValueType(); - if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) - return getAddrNonPIC(N, Ty, DAG); + return getAddrNonPIC(Op, DAG); - return getAddrLocal(N, Ty, DAG, HasMips64); + return getAddrLocal(Op, DAG, HasMips64); } SDValue MipsTargetLowering:: @@ -1627,13 +1601,10 @@ lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const SDValue MipsTargetLowering:: lowerJumpTable(SDValue Op, SelectionDAG &DAG) const { - JumpTableSDNode *N = cast(Op); - EVT Ty = Op.getValueType(); - if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) - return getAddrNonPIC(N, Ty, DAG); + return getAddrNonPIC(Op, DAG); - return getAddrLocal(N, Ty, DAG, HasMips64); + return getAddrLocal(Op, DAG, HasMips64); } SDValue MipsTargetLowering:: @@ -1648,13 +1619,11 @@ lowerConstantPool(SDValue Op, SelectionDAG &DAG) const // SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, MVT::i32, CP); // SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32); // ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode); - ConstantPoolSDNode *N = cast(Op); - EVT Ty = Op.getValueType(); if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) - return getAddrNonPIC(N, Ty, DAG); + return getAddrNonPIC(Op, DAG); - return getAddrLocal(N, Ty, DAG, HasMips64); + return getAddrLocal(Op, DAG, HasMips64); } SDValue MipsTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { @@ -2500,19 +2469,18 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool IsPICCall = (IsN64 || IsPIC); // true if calls are translated to jalr $25 bool GlobalOrExternal = false, InternalLinkage = false; SDValue CalleeLo; - EVT Ty = Callee.getValueType(); if (GlobalAddressSDNode *G = dyn_cast(Callee)) { if (IsPICCall) { InternalLinkage = G->getGlobal()->hasInternalLinkage(); if (InternalLinkage) - Callee = getAddrLocal(G, Ty, DAG, HasMips64); + Callee = getAddrLocal(Callee, DAG, HasMips64); else if (LargeGOT) - Callee = getAddrGlobalLargeGOT(G, Ty, DAG, MipsII::MO_CALL_HI16, + Callee = getAddrGlobalLargeGOT(Callee, DAG, MipsII::MO_CALL_HI16, MipsII::MO_CALL_LO16); else - Callee = getAddrGlobal(G, Ty, DAG, MipsII::MO_GOT_CALL); + Callee = getAddrGlobal(Callee, DAG, MipsII::MO_GOT_CALL); } else Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy(), 0, MipsII::MO_NO_FLAG); @@ -2523,10 +2491,10 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(), MipsII::MO_NO_FLAG); else if (LargeGOT) - Callee = getAddrGlobalLargeGOT(S, Ty, DAG, MipsII::MO_CALL_HI16, + Callee = getAddrGlobalLargeGOT(Callee, DAG, MipsII::MO_CALL_HI16, MipsII::MO_CALL_LO16); else // N64 || PIC - Callee = getAddrGlobal(S, Ty, DAG, MipsII::MO_GOT_CALL); + Callee = getAddrGlobal(Callee, DAG, MipsII::MO_GOT_CALL); GlobalOrExternal = true; } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index f778d36..2021114 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -248,16 +248,11 @@ namespace llvm { protected: SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) const; - template - SDValue getAddrLocal(NodeTy *N, EVT Ty, SelectionDAG &DAG, - bool HasMips64) const; + SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) const; - template - SDValue getAddrGlobal(NodeTy *N, EVT Ty, SelectionDAG &DAG, - unsigned Flag) const; + SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) const; - template - SDValue getAddrGlobalLargeGOT(NodeTy *N, EVT Ty, SelectionDAG &DAG, + SDValue getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG, unsigned HiFlag, unsigned LoFlag) const; /// This function fills Ops, which is the list of operands that will later -- cgit v1.1 From 8ba3f9c9008223136207295b48b53c8aefffa178 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Wed, 25 Sep 2013 09:36:11 +0000 Subject: MC: Remove vestigial PCSymbol field from AsmInfo git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191362 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp | 1 - lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp | 3 --- lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp | 1 - lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp | 1 - lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp | 2 -- 5 files changed, 8 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp index d213a45..ab8c81e 100644 --- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp @@ -22,7 +22,6 @@ MSP430MCAsmInfo::MSP430MCAsmInfo(StringRef TT) { PrivateGlobalPrefix = ".L"; WeakRefDirective ="\t.weak\t"; - PCSymbol="."; CommentString = ";"; AlignmentIsInBytes = false; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index 6822507..1f3e5b4 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -22,7 +22,6 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { } IsLittleEndian = false; - PCSymbol = "."; CommentString = ";"; ExceptionsType = ExceptionHandling::DwarfCFI; @@ -55,8 +54,6 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { // Debug Information SupportsDebugInformation = true; - PCSymbol = "."; - // Set up DWARF directives HasLEB128 = true; // Target asm supports leb128 directives (little-endian) MinInstAlignment = 4; diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp index 59136f3..e26d7a3 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -21,7 +21,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() { HasStaticCtorDtorReferenceInStaticMode = false; LinkerRequiresNonEmptyDwarfLines = true; MaxInstLength = 16; - PCSymbol = "$"; SeparatorString = "\n"; CommentColumn = 40; CommentString = ";"; diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp index 9e27aa0..1d39140 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp @@ -19,7 +19,6 @@ SystemZMCAsmInfo::SystemZMCAsmInfo(StringRef TT) { IsLittleEndian = false; CommentString = "#"; - PCSymbol = "."; GlobalPrefix = ""; PrivateGlobalPrefix = ".L"; WeakRefDirective = "\t.weak\t"; diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 36b6eb4..826caf4 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -59,7 +59,6 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) { // for .S files on other systems. Perhaps this is because the file system // wasn't always case preserving or something. CommentString = "##"; - PCSymbol = "."; SupportsDebugInformation = true; UseDataRegionDirectives = MarkedJTDataRegions; @@ -92,7 +91,6 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { PrivateGlobalPrefix = ".L"; WeakRefDirective = "\t.weak\t"; - PCSymbol = "."; // Set up DWARF directives HasLEB128 = true; // Target asm supports leb128 directives (little-endian) -- cgit v1.1 From c2b840cb7c58e59c68aaa589841c41fb272df66d Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 25 Sep 2013 10:11:07 +0000 Subject: [SystemZ] Add instruction-shortening pass When loading immediates into a GR32, the port prefered LHI, followed by LLILH or LLILL, followed by IILF. LHI and IILF are natural 32-bit operations, but LLILH and LLILL also clear the upper 32 bits of the register. This was represented as taking a 32-bit subreg of a 64-bit assignment. Using subregs for something as simple as a move immediate was probably a bad idea. Also, I have patches to add support for the high-word facility, and we don't want something like LLILH and LLILL to stop the high word of the same GPR from being used. This patch therefore uses LHI and IILF to begin with and adds a late machine-specific pass to use LLILH and LLILL if the other half of the register is not live. The high-word patches extend this behavior to IIHF, LLIHL and LLIHH. No behavioral change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191363 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/CMakeLists.txt | 1 + .../SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp | 17 +++ .../SystemZ/MCTargetDesc/SystemZMCTargetDesc.h | 9 ++ lib/Target/SystemZ/SystemZ.h | 1 + lib/Target/SystemZ/SystemZInstrInfo.td | 8 -- lib/Target/SystemZ/SystemZRegisterInfo.h | 4 + lib/Target/SystemZ/SystemZShortenInst.cpp | 159 +++++++++++++++++++++ lib/Target/SystemZ/SystemZTargetMachine.cpp | 2 + 8 files changed, 193 insertions(+), 8 deletions(-) create mode 100644 lib/Target/SystemZ/SystemZShortenInst.cpp (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt index ab657f6..8a4eaa3 100644 --- a/lib/Target/SystemZ/CMakeLists.txt +++ b/lib/Target/SystemZ/CMakeLists.txt @@ -24,6 +24,7 @@ add_llvm_target(SystemZCodeGen SystemZMCInstLower.cpp SystemZRegisterInfo.cpp SystemZSelectionDAGInfo.cpp + SystemZShortenInst.cpp SystemZSubtarget.cpp SystemZTargetMachine.cpp ) diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp index 3653192..cb97bca 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -69,6 +69,23 @@ const unsigned SystemZMC::FP128Regs[16] = { SystemZ::F12Q, SystemZ::F13Q, 0, 0 }; +unsigned SystemZMC::getFirstReg(unsigned Reg) { + static unsigned Map[SystemZ::NUM_TARGET_REGS]; + static bool Initialized = false; + if (!Initialized) { + for (unsigned I = 0; I < 16; ++I) { + Map[GR32Regs[I]] = I; + Map[GR64Regs[I]] = I; + Map[GR128Regs[I]] = I; + Map[FP32Regs[I]] = I; + Map[FP64Regs[I]] = I; + Map[FP128Regs[I]] = I; + } + } + assert(Reg < SystemZ::NUM_TARGET_REGS); + return Map[Reg]; +} + static MCAsmInfo *createSystemZMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { MCAsmInfo *MAI = new SystemZMCAsmInfo(TT); diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h index 01ef093..84184af 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h @@ -47,6 +47,15 @@ namespace SystemZMC { extern const unsigned FP32Regs[16]; extern const unsigned FP64Regs[16]; extern const unsigned FP128Regs[16]; + + // Return the 0-based number of the first architectural register that + // contains the given LLVM register. E.g. R1D -> 1. + unsigned getFirstReg(unsigned Reg); + + // Return the given register as a GR64. + inline unsigned getRegAsGR64(unsigned Reg) { + return GR64Regs[getFirstReg(Reg)]; + } } MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII, diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h index 1647d93..4a6b4db 100644 --- a/lib/Target/SystemZ/SystemZ.h +++ b/lib/Target/SystemZ/SystemZ.h @@ -106,6 +106,7 @@ namespace llvm { FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel); FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM); + FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM); } // end namespace llvm; #endif diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index d028461..ca678ab 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -1212,14 +1212,6 @@ def : Pat<(ctlz GR64:$src), def : Pat<(i64 (anyext GR32:$src)), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_32bit)>; -// There are no 32-bit equivalents of LLILL and LLILH, so use a full -// 64-bit move followed by a subreg. This preserves the invariant that -// all GR32 operations only modify the low 32 bits. -def : Pat<(i32 imm32ll16:$src), - (EXTRACT_SUBREG (LLILL (LL16 imm:$src)), subreg_32bit)>; -def : Pat<(i32 imm32lh16:$src), - (EXTRACT_SUBREG (LLILH (LH16 imm:$src)), subreg_32bit)>; - // Extend GR32s and GR64s to GR128s. let usesCustomInserter = 1 in { def AEXT128_64 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>; diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h index c447e4d..edd107d 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -48,6 +48,10 @@ public: LLVM_OVERRIDE { return true; } + virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const + LLVM_OVERRIDE { + return true; + } virtual const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const LLVM_OVERRIDE; virtual BitVector getReservedRegs(const MachineFunction &MF) diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp new file mode 100644 index 0000000..526ae5c --- /dev/null +++ b/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -0,0 +1,159 @@ +//===-- SystemZShortenInst.cpp - Instruction-shortening pass --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass tries to replace instructions with shorter forms. For example, +// IILF can be replaced with LLILL or LLILH if the constant fits and if the +// other 32 bits of the GR64 destination are not live. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "systemz-shorten-inst" + +#include "SystemZTargetMachine.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +using namespace llvm; + +namespace { + class SystemZShortenInst : public MachineFunctionPass { + public: + static char ID; + SystemZShortenInst(const SystemZTargetMachine &tm); + + virtual const char *getPassName() const { + return "SystemZ Instruction Shortening"; + } + + bool processBlock(MachineBasicBlock *MBB); + bool runOnMachineFunction(MachineFunction &F); + + private: + bool shortenIIF(MachineInstr &MI, unsigned *GPRMap, unsigned LiveOther, + unsigned LLIxL, unsigned LLIxH); + + const SystemZInstrInfo *TII; + + // LowGPRs[I] has bit N set if LLVM register I includes the low + // word of GPR N. HighGPRs is the same for the high word. + unsigned LowGPRs[SystemZ::NUM_TARGET_REGS]; + unsigned HighGPRs[SystemZ::NUM_TARGET_REGS]; + }; + + char SystemZShortenInst::ID = 0; +} // end of anonymous namespace + +FunctionPass *llvm::createSystemZShortenInstPass(SystemZTargetMachine &TM) { + return new SystemZShortenInst(TM); +} + +SystemZShortenInst::SystemZShortenInst(const SystemZTargetMachine &tm) + : MachineFunctionPass(ID), TII(0), LowGPRs(), HighGPRs() { + // Set up LowGPRs and HighGPRs. + for (unsigned I = 0; I < 16; ++I) { + LowGPRs[SystemZMC::GR32Regs[I]] |= 1 << I; + LowGPRs[SystemZMC::GR64Regs[I]] |= 1 << I; + HighGPRs[SystemZMC::GR64Regs[I]] |= 1 << I; + if (unsigned GR128 = SystemZMC::GR128Regs[I]) { + LowGPRs[GR128] |= 3 << I; + HighGPRs[GR128] |= 3 << I; + } + } +} + +// MI loads one word of a GPR using an IIxF instruction and LLIxL and LLIxH +// are the halfword immediate loads for the same word. Try to use one of them +// instead of IIxF. If MI loads the high word, GPRMap[X] is the set of high +// words referenced by LLVM register X while LiveOther is the mask of low +// words that are currently live, and vice versa. +bool SystemZShortenInst::shortenIIF(MachineInstr &MI, unsigned *GPRMap, + unsigned LiveOther, unsigned LLIxL, + unsigned LLIxH) { + unsigned Reg = MI.getOperand(0).getReg(); + assert(Reg < SystemZ::NUM_TARGET_REGS && "Invalid register number"); + unsigned GPRs = GPRMap[Reg]; + assert(GPRs != 0 && "Register must be a GPR"); + if (GPRs & LiveOther) + return false; + + uint64_t Imm = MI.getOperand(1).getImm(); + if (SystemZ::isImmLL(Imm)) { + MI.setDesc(TII->get(LLIxL)); + MI.getOperand(0).setReg(SystemZMC::getRegAsGR64(Reg)); + return true; + } + if (SystemZ::isImmLH(Imm)) { + MI.setDesc(TII->get(LLIxH)); + MI.getOperand(0).setReg(SystemZMC::getRegAsGR64(Reg)); + MI.getOperand(1).setImm(Imm >> 16); + return true; + } + return false; +} + +// Process all instructions in MBB. Return true if something changed. +bool SystemZShortenInst::processBlock(MachineBasicBlock *MBB) { + bool Changed = false; + + // Work out which words are live on exit from the block. + unsigned LiveLow = 0; + unsigned LiveHigh = 0; + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + for (MachineBasicBlock::livein_iterator LI = (*SI)->livein_begin(), + LE = (*SI)->livein_end(); LI != LE; ++LI) { + unsigned Reg = *LI; + assert(Reg < SystemZ::NUM_TARGET_REGS && "Invalid register number"); + LiveLow |= LowGPRs[Reg]; + LiveHigh |= HighGPRs[Reg]; + } + } + + // Iterate backwards through the block looking for instructions to change. + for (MachineBasicBlock::reverse_iterator MBBI = MBB->rbegin(), + MBBE = MBB->rend(); MBBI != MBBE; ++MBBI) { + MachineInstr &MI = *MBBI; + unsigned Opcode = MI.getOpcode(); + if (Opcode == SystemZ::IILF32) + Changed |= shortenIIF(MI, LowGPRs, LiveHigh, SystemZ::LLILL, + SystemZ::LLILH); + unsigned UsedLow = 0; + unsigned UsedHigh = 0; + for (MachineInstr::mop_iterator MOI = MI.operands_begin(), + MOE = MI.operands_end(); MOI != MOE; ++MOI) { + MachineOperand &MO = *MOI; + if (MO.isReg()) { + if (unsigned Reg = MO.getReg()) { + assert(Reg < SystemZ::NUM_TARGET_REGS && "Invalid register number"); + if (MO.isDef()) { + LiveLow &= ~LowGPRs[Reg]; + LiveHigh &= ~HighGPRs[Reg]; + } else if (!MO.isUndef()) { + UsedLow |= LowGPRs[Reg]; + UsedHigh |= HighGPRs[Reg]; + } + } + } + } + LiveLow |= UsedLow; + LiveHigh |= UsedHigh; + } + + return Changed; +} + +bool SystemZShortenInst::runOnMachineFunction(MachineFunction &F) { + TII = static_cast(F.getTarget().getInstrInfo()); + + bool Changed = false; + for (MachineFunction::iterator MFI = F.begin(), MFE = F.end(); + MFI != MFE; ++MFI) + Changed |= processBlock(MFI); + + return Changed; +} diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp index f276152..dee92e9 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -97,6 +97,8 @@ bool SystemZPassConfig::addPreEmitPass() { // preventing that would be a win or not. if (getOptLevel() != CodeGenOpt::None) addPass(createSystemZElimComparePass(getSystemZTargetMachine())); + if (getOptLevel() != CodeGenOpt::None) + addPass(createSystemZShortenInstPass(getSystemZTargetMachine())); addPass(createSystemZLongBranchPass(getSystemZTargetMachine())); return true; } -- cgit v1.1 From 9f3f4bf377ac93fd32c8b93ae23378a82ad0f353 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 25 Sep 2013 10:20:08 +0000 Subject: [SystemZ] Define the return instruction as a pseudo alias of BR This is the first of a few patches to reduce the dupliation of encoding information. The return instruction is a normal BR in which one of the registers is fixed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191364 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZAsmPrinter.cpp | 12 ++++++++++-- lib/Target/SystemZ/SystemZInstrFormats.td | 10 ++++++++++ lib/Target/SystemZ/SystemZInstrInfo.td | 9 +++------ 3 files changed, 23 insertions(+), 8 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp index 3a57ea0..8422761 100644 --- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/Mangler.h" @@ -26,9 +27,16 @@ using namespace llvm; void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { - SystemZMCInstLower Lower(Mang, MF->getContext(), *this); MCInst LoweredMI; - Lower.lower(MI, LoweredMI); + switch (MI->getOpcode()) { + case SystemZ::Return: + LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R14D); + break; + + default: + SystemZMCInstLower(Mang, MF->getContext(), *this).lower(MI, LoweredMI); + break; + } OutStreamer.EmitInstruction(LoweredMI); } diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index 7f2f9f8..b594f0e 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -1467,3 +1467,13 @@ multiclass StringRRE opcode, [(set GR64:$end, (operator GR64:$start1, GR64:$start2, GR32:$char))]>; } + +// A pseudo instruction that is a direct alias of a real instruction. +// These aliases are used in cases where a particular register operand is +// fixed or where the same instruction is used with different register sizes. +// The size parameter is the size in bytes of the associated real instruction. +class Alias pattern> + : InstSystemZ { + let isPseudo = 1; + let isCodeGenOnly = 1; +} diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index ca678ab..9470172 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -32,12 +32,9 @@ let neverHasSideEffects = 1 in { // Control flow instructions //===----------------------------------------------------------------------===// -// A return instruction. R1 is the condition-code mask (all 1s) -// and R2 is the target address, which is always stored in %r14. -let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1, - R1 = 15, R2 = 14, isCodeGenOnly = 1 in { - def RET : InstRR<0x07, (outs), (ins), "br\t%r14", [(z_retflag)]>; -} +// A return instruction (br %r14). +let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in + def Return : Alias<2, (outs), (ins), [(z_retflag)]>; // Unconditional branches. R1 is the condition-code mask (all 1s). let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in { -- cgit v1.1 From e39a156b921f47a374f091b43205555ee90cd555 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 25 Sep 2013 10:29:47 +0000 Subject: [SystemZ] Use subregs for 64-bit truncating stores Another patch to reduce the duplication of encoding information. Rather than define separate patterns for truncating 64-bit stores, use the 32-bit stores with a subreg. No behavioral changed intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191365 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelLowering.cpp | 12 ------- lib/Target/SystemZ/SystemZInstrInfo.cpp | 2 +- lib/Target/SystemZ/SystemZInstrInfo.td | 51 ++++++++++++++---------------- lib/Target/SystemZ/SystemZPatterns.td | 46 +++++++++++++++++++++++++++ 4 files changed, 70 insertions(+), 41 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 81d824c..eb3dc49 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -2872,18 +2872,6 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { case SystemZ::SelectF128: return emitSelect(MI, MBB); - case SystemZ::CondStore8_32: - return emitCondStore(MI, MBB, SystemZ::STC32, 0, false); - case SystemZ::CondStore8_32Inv: - return emitCondStore(MI, MBB, SystemZ::STC32, 0, true); - case SystemZ::CondStore16_32: - return emitCondStore(MI, MBB, SystemZ::STH32, 0, false); - case SystemZ::CondStore16_32Inv: - return emitCondStore(MI, MBB, SystemZ::STH32, 0, true); - case SystemZ::CondStore32_32: - return emitCondStore(MI, MBB, SystemZ::ST32, SystemZ::STOC32, false); - case SystemZ::CondStore32_32Inv: - return emitCondStore(MI, MBB, SystemZ::ST32, SystemZ::STOC32, true); case SystemZ::CondStore8: return emitCondStore(MI, MBB, SystemZ::STC, 0, false); case SystemZ::CondStore8Inv: diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 6f6b2bf..d20487a 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -819,7 +819,7 @@ void SystemZInstrInfo::getLoadStoreOpcodes(const TargetRegisterClass *RC, unsigned &StoreOpcode) const { if (RC == &SystemZ::GR32BitRegClass || RC == &SystemZ::ADDR32BitRegClass) { LoadOpcode = SystemZ::L; - StoreOpcode = SystemZ::ST32; + StoreOpcode = SystemZ::ST; } else if (RC == &SystemZ::GR64BitRegClass || RC == &SystemZ::ADDR64BitRegClass) { LoadOpcode = SystemZ::LG; diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 9470172..cb48ca9 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -201,19 +201,19 @@ let Defs = [CC] in { def Select32 : SelectWrapper; def Select64 : SelectWrapper; -defm CondStore8_32 : CondStores; -defm CondStore16_32 : CondStores; -defm CondStore32_32 : CondStores; - -defm CondStore8 : CondStores; -defm CondStore16 : CondStores; -defm CondStore32 : CondStores; +defm CondStore32 : CondStores; + +defm : CondStores64; +defm : CondStores64; +defm : CondStores64; defm CondStore64 : CondStores; @@ -329,8 +329,7 @@ let Uses = [CC] in { // Register stores. let SimpleBDXStore = 1 in { - let isCodeGenOnly = 1 in - defm ST32 : StoreRXPair<"st", 0x50, 0xE350, store, GR32, 4>; + defm ST : StoreRXPair<"st", 0x50, 0xE350, store, GR32, 4>; def STG : StoreRXY<"stg", 0xE324, store, GR64, 8>; // These instructions are split after register allocation, so we don't @@ -340,15 +339,13 @@ let SimpleBDXStore = 1 in { [(store GR128:$src, bdxaddr20only128:$dst)]>; } } -let isCodeGenOnly = 1 in - def STRL32 : StoreRILPC<"strl", 0xC4F, aligned_store, GR32>; +def STRL : StoreRILPC<"strl", 0xC4F, aligned_store, GR32>; def STGRL : StoreRILPC<"stgrl", 0xC4B, aligned_store, GR64>; // Store on condition. let isCodeGenOnly = 1, Uses = [CC] in { - def STOC32 : CondStoreRSY<"stoc", 0xEBF3, GR32, 4>; - def STOC : CondStoreRSY<"stoc", 0xEBF3, GR64, 4>; - def STOCG : CondStoreRSY<"stocg", 0xEBE3, GR64, 8>; + def STOC : CondStoreRSY<"stoc", 0xEBF3, GR32, 4>; + def STOCG : CondStoreRSY<"stocg", 0xEBE3, GR64, 8>; } let Uses = [CC] in { def AsmSTOC : AsmCondStoreRSY<"stoc", 0xEBF3, GR32, 4>; @@ -459,18 +456,16 @@ def : Pat<(i32 (trunc GR64:$src)), (EXTRACT_SUBREG GR64:$src, subreg_32bit)>; // Truncations of 32-bit registers to memory. -let isCodeGenOnly = 1 in { - defm STC32 : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8, GR32, 1>; - defm STH32 : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR32, 2>; - def STHRL32 : StoreRILPC<"sthrl", 0xC47, aligned_truncstorei16, GR32>; -} +defm STC : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8, GR32, 1>; +defm STH : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR32, 2>; +def STHRL : StoreRILPC<"sthrl", 0xC47, aligned_truncstorei16, GR32>; // Truncations of 64-bit registers to memory. -defm STC : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8, GR64, 1>; -defm STH : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR64, 2>; -def STHRL : StoreRILPC<"sthrl", 0xC47, aligned_truncstorei16, GR64>; -defm ST : StoreRXPair<"st", 0x50, 0xE350, truncstorei32, GR64, 4>; -def STRL : StoreRILPC<"strl", 0xC4F, aligned_truncstorei32, GR64>; +defm : StoreGR64Pair; +defm : StoreGR64Pair; +def : StoreGR64PC; +defm : StoreGR64Pair; +def : StoreGR64PC; //===----------------------------------------------------------------------===// // Multi-register moves diff --git a/lib/Target/SystemZ/SystemZPatterns.td b/lib/Target/SystemZ/SystemZPatterns.td index 481534b..a1344a3 100644 --- a/lib/Target/SystemZ/SystemZPatterns.td +++ b/lib/Target/SystemZ/SystemZPatterns.td @@ -66,6 +66,52 @@ multiclass InsertMem; } +// INSN stores the low 32 bits of a GPR to a memory with addressing mode MODE. +// Record that it is equivalent to using OPERATOR to store a GR64. +class StoreGR64 + : Pat<(operator GR64:$R1, mode:$XBD2), + (insn (EXTRACT_SUBREG GR64:$R1, subreg_32bit), mode:$XBD2)>; + +// INSN and INSNY are an RX/RXY pair of instructions that store the low +// 32 bits of a GPR to memory. Record that they are equivalent to using +// OPERATOR to store a GR64. +multiclass StoreGR64Pair { + def : StoreGR64; + def : StoreGR64; +} + +// INSN stores the low 32 bits of a GPR using PC-relative addressing. +// Record that it is equivalent to using OPERATOR to store a GR64. +class StoreGR64PC + : Pat<(operator GR64:$R1, pcrel32:$XBD2), + (insn (EXTRACT_SUBREG GR64:$R1, subreg_32bit), pcrel32:$XBD2)> { + // We want PC-relative addresses to be tried ahead of BD and BDX addresses. + // However, BDXs have two extra operands and are therefore 6 units more + // complex. + let AddedComplexity = 7; +} + +// INSN and INSNINV conditionally store the low 32 bits of a GPR to memory, +// with INSN storing when the condition is true and INSNINV storing when the +// condition is false. Record that they are equivalent to a LOAD/select/STORE +// sequence for GR64s. +multiclass CondStores64 { + def : Pat<(store (z_select_ccmask GR64:$new, (load mode:$addr), + uimm8zx4:$valid, uimm8zx4:$cc), + mode:$addr), + (insn (EXTRACT_SUBREG GR64:$new, subreg_32bit), mode:$addr, + uimm8zx4:$valid, uimm8zx4:$cc)>; + def : Pat<(store (z_select_ccmask (load mode:$addr), GR64:$new, + uimm8zx4:$valid, uimm8zx4:$cc), + mode:$addr), + (insninv (EXTRACT_SUBREG GR64:$new, subreg_32bit), mode:$addr, + uimm8zx4:$valid, uimm8zx4:$cc)>; +} + // Try to use MVC instruction INSN for a load of type LOAD followed by a store // of the same size. VT is the type of the intermediate (legalized) value and // LENGTH is the number of bytes loaded by LOAD. -- cgit v1.1 From 76f8ae87b4705f5c08c3995948223531715a2d58 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 25 Sep 2013 10:37:17 +0000 Subject: [SystemZ] Define the call instructions as pseudo aliases. Similar to r191364, but for calls. This patch also removes the shortening of BRASL to BRAS within a TU. Doing that was a bit controversial internally, since there's a strong expectation with the z assembler that WYWIWYG. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191366 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../SystemZ/InstPrinter/SystemZInstPrinter.cpp | 12 --- .../SystemZ/InstPrinter/SystemZInstPrinter.h | 1 - .../SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp | 43 ++-------- .../SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp | 8 -- lib/Target/SystemZ/SystemZAsmPrinter.cpp | 24 +++++- lib/Target/SystemZ/SystemZInstrInfo.td | 36 ++++---- lib/Target/SystemZ/SystemZMCInstLower.cpp | 98 ++++++++++------------ lib/Target/SystemZ/SystemZMCInstLower.h | 9 +- lib/Target/SystemZ/SystemZOperands.td | 13 --- 9 files changed, 97 insertions(+), 147 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp index 37ebff3..e1e64d3 100644 --- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp +++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp @@ -124,18 +124,6 @@ void SystemZInstPrinter::printPCRelOperand(const MCInst *MI, int OpNum, O << *MO.getExpr(); } -void SystemZInstPrinter::printCallOperand(const MCInst *MI, int OpNum, - raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNum); - if (MO.isImm()) { - O << "0x"; - O.write_hex(MO.getImm()); - } else { - O << *MO.getExpr(); - O << "@PLT"; - } -} - void SystemZInstPrinter::printOperand(const MCInst *MI, int OpNum, raw_ostream &O) { printOperand(MI->getOperand(OpNum), O); diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h index 30cdee5..734ecf0 100644 --- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h +++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h @@ -58,7 +58,6 @@ private: void printS32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printU32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printPCRelOperand(const MCInst *MI, int OpNum, raw_ostream &O); - void printCallOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printAccessRegOperand(const MCInst *MI, int OpNum, raw_ostream &O); // Print the mnemonic for a condition-code mask ("ne", "lh", etc.) diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp index 11b520d..26a8fae 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp @@ -35,15 +35,6 @@ static uint64_t extractBitsForFixup(MCFixupKind Kind, uint64_t Value) { llvm_unreachable("Unknown fixup kind!"); } -// If Opcode is a relaxable interprocedural reference, return the relaxed form, -// otherwise return 0. -static unsigned getRelaxedOpcode(unsigned Opcode) { - switch (Opcode) { - case SystemZ::BRAS: return SystemZ::BRASL; - } - return 0; -} - namespace { class SystemZMCAsmBackend : public MCAsmBackend { uint8_t OSABI; @@ -59,14 +50,20 @@ public: LLVM_OVERRIDE; virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const LLVM_OVERRIDE; - virtual bool mayNeedRelaxation(const MCInst &Inst) const LLVM_OVERRIDE; + virtual bool mayNeedRelaxation(const MCInst &Inst) const LLVM_OVERRIDE { + return false; + } virtual bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCRelaxableFragment *Fragment, const MCAsmLayout &Layout) const - LLVM_OVERRIDE; + LLVM_OVERRIDE { + return false; + } virtual void relaxInstruction(const MCInst &Inst, - MCInst &Res) const LLVM_OVERRIDE; + MCInst &Res) const LLVM_OVERRIDE { + llvm_unreachable("SystemZ does do not have assembler relaxation"); + } virtual bool writeNopData(uint64_t Count, MCObjectWriter *OW) const LLVM_OVERRIDE; virtual MCObjectWriter *createObjectWriter(raw_ostream &OS) const @@ -114,28 +111,6 @@ void SystemZMCAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, } } -bool SystemZMCAsmBackend::mayNeedRelaxation(const MCInst &Inst) const { - return getRelaxedOpcode(Inst.getOpcode()) != 0; -} - -bool -SystemZMCAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, - uint64_t Value, - const MCRelaxableFragment *Fragment, - const MCAsmLayout &Layout) const { - // At the moment we just need to relax 16-bit fields to wider fields. - Value = extractBitsForFixup(Fixup.getKind(), Value); - return (int16_t)Value != (int64_t)Value; -} - -void SystemZMCAsmBackend::relaxInstruction(const MCInst &Inst, - MCInst &Res) const { - unsigned Opcode = getRelaxedOpcode(Inst.getOpcode()); - assert(Opcode && "Unexpected insn to relax"); - Res = Inst; - Res.setOpcode(Opcode); -} - bool SystemZMCAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { for (uint64_t I = 0; I != Count; ++I) diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp index bda7714..f07ea7b 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp @@ -79,14 +79,6 @@ private: SmallVectorImpl &Fixups) const { return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC32DBL, 2); } - uint64_t getPLT16DBLEncoding(const MCInst &MI, unsigned OpNum, - SmallVectorImpl &Fixups) const { - return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PLT16DBL, 2); - } - uint64_t getPLT32DBLEncoding(const MCInst &MI, unsigned OpNum, - SmallVectorImpl &Fixups) const { - return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PLT32DBL, 2); - } }; } diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp index 8422761..3f3ce6b 100644 --- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -27,14 +27,36 @@ using namespace llvm; void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { + SystemZMCInstLower Lower(Mang, MF->getContext(), *this); MCInst LoweredMI; switch (MI->getOpcode()) { case SystemZ::Return: LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R14D); break; + case SystemZ::CallBRASL: + LoweredMI = MCInstBuilder(SystemZ::BRASL) + .addReg(SystemZ::R14D) + .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_PLT)); + break; + + case SystemZ::CallBASR: + LoweredMI = MCInstBuilder(SystemZ::BASR) + .addReg(SystemZ::R14D) + .addReg(MI->getOperand(0).getReg()); + break; + + case SystemZ::CallJG: + LoweredMI = MCInstBuilder(SystemZ::JG) + .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_PLT)); + break; + + case SystemZ::CallBR: + LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R1D); + break; + default: - SystemZMCInstLower(Mang, MF->getContext(), *this).lower(MI, LoweredMI); + Lower.lower(MI, LoweredMI); break; } OutStreamer.EmitInstruction(LoweredMI); diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index cb48ca9..ec59e2d 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -223,34 +223,30 @@ defm CondStore64 : CondStores; - def BRASL : InstRIL<0xC05, (outs), (ins pcrel32call:$I2, variable_ops), - "brasl\t%r14, $I2", [(z_call pcrel32call:$I2)]>; - def BASR : InstRR<0x0D, (outs), (ins ADDR64:$R2, variable_ops), - "basr\t%r14, $R2", [(z_call ADDR64:$R2)]>; + F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D, CC] in { + def CallBRASL : Alias<6, (outs), (ins pcrel32:$I2, variable_ops), + [(z_call pcrel32:$I2)]>; + def CallBASR : Alias<2, (outs), (ins ADDR64:$R2, variable_ops), + [(z_call ADDR64:$R2)]>; } // Sibling calls. Indirect sibling calls must be via R1, since R2 upwards // are argument registers and since branching to R0 is a no-op. -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, - isCodeGenOnly = 1, R1 = 15 in { - def CallJG : InstRIL<0xC04, (outs), (ins pcrel32call:$I2), - "jg\t$I2", [(z_sibcall pcrel32call:$I2)]>; - let R2 = 1, Uses = [R1D] in - def CallBR : InstRR<0x07, (outs), (ins), "br\t%r1", [(z_sibcall R1D)]>; +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { + def CallJG : Alias<6, (outs), (ins pcrel32:$I2), + [(z_sibcall pcrel32:$I2)]>; + let Uses = [R1D] in + def CallBR : Alias<2, (outs), (ins), [(z_sibcall R1D)]>; } // Define the general form of the call instructions for the asm parser. // These instructions don't hard-code %r14 as the return address register. -def AsmBRAS : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16:$I2), - "bras\t$R1, $I2", []>; -def AsmBRASL : InstRIL<0xC05, (outs), (ins GR64:$R1, brtarget32:$I2), - "brasl\t$R1, $I2", []>; -def AsmBASR : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2), - "basr\t$R1, $R2", []>; +def BRAS : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16:$I2), + "bras\t$R1, $I2", []>; +def BRASL : InstRIL<0xC05, (outs), (ins GR64:$R1, brtarget32:$I2), + "brasl\t$R1, $I2", []>; +def BASR : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2), + "basr\t$R1, $R2", []>; //===----------------------------------------------------------------------===// // Move instructions diff --git a/lib/Target/SystemZ/SystemZMCInstLower.cpp b/lib/Target/SystemZ/SystemZMCInstLower.cpp index 432a0d3..ebf043e 100644 --- a/lib/Target/SystemZ/SystemZMCInstLower.cpp +++ b/lib/Target/SystemZ/SystemZMCInstLower.cpp @@ -15,15 +15,6 @@ using namespace llvm; -// If Opcode is an interprocedural reference that can be shortened, -// return the short form, otherwise return 0. -static unsigned getShortenedInstr(unsigned Opcode) { - switch (Opcode) { - case SystemZ::BRASL: return SystemZ::BRAS; - } - return Opcode; -} - // Return the VK_* enumeration for MachineOperand target flags Flags. static MCSymbolRefExpr::VariantKind getVariantKind(unsigned Flags) { switch (Flags & SystemZII::MO_SYMBOL_MODIFIER) { @@ -39,66 +30,67 @@ SystemZMCInstLower::SystemZMCInstLower(Mangler *mang, MCContext &ctx, SystemZAsmPrinter &asmprinter) : Mang(mang), Ctx(ctx), AsmPrinter(asmprinter) {} -MCOperand SystemZMCInstLower::lowerSymbolOperand(const MachineOperand &MO, - const MCSymbol *Symbol, - int64_t Offset) const { - MCSymbolRefExpr::VariantKind Kind = getVariantKind(MO.getTargetFlags()); - const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, Kind, Ctx); - if (Offset) { - const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx); - Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx); +const MCExpr * +SystemZMCInstLower::getExpr(const MachineOperand &MO, + MCSymbolRefExpr::VariantKind Kind) const { + const MCSymbol *Symbol; + bool HasOffset = true; + switch (MO.getType()) { + case MachineOperand::MO_MachineBasicBlock: + Symbol = MO.getMBB()->getSymbol(); + HasOffset = false; + break; + + case MachineOperand::MO_GlobalAddress: + Symbol = Mang->getSymbol(MO.getGlobal()); + break; + + case MachineOperand::MO_ExternalSymbol: + Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName()); + break; + + case MachineOperand::MO_JumpTableIndex: + Symbol = AsmPrinter.GetJTISymbol(MO.getIndex()); + HasOffset = false; + break; + + case MachineOperand::MO_ConstantPoolIndex: + Symbol = AsmPrinter.GetCPISymbol(MO.getIndex()); + break; + + case MachineOperand::MO_BlockAddress: + Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()); + break; + + default: + llvm_unreachable("unknown operand type"); } - return MCOperand::CreateExpr(Expr); + const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, Kind, Ctx); + if (HasOffset) + if (int64_t Offset = MO.getOffset()) { + const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx); + Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx); + } + return Expr; } MCOperand SystemZMCInstLower::lowerOperand(const MachineOperand &MO) const { switch (MO.getType()) { - default: - llvm_unreachable("unknown operand type"); - case MachineOperand::MO_Register: return MCOperand::CreateReg(MO.getReg()); case MachineOperand::MO_Immediate: return MCOperand::CreateImm(MO.getImm()); - case MachineOperand::MO_MachineBasicBlock: - return lowerSymbolOperand(MO, MO.getMBB()->getSymbol(), - /* MO has no offset field */0); - - case MachineOperand::MO_GlobalAddress: - return lowerSymbolOperand(MO, Mang->getSymbol(MO.getGlobal()), - MO.getOffset()); - - case MachineOperand::MO_ExternalSymbol: { - StringRef Name = MO.getSymbolName(); - return lowerSymbolOperand(MO, AsmPrinter.GetExternalSymbolSymbol(Name), - MO.getOffset()); - } - - case MachineOperand::MO_JumpTableIndex: - return lowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()), - /* MO has no offset field */0); - - case MachineOperand::MO_ConstantPoolIndex: - return lowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()), - MO.getOffset()); - - case MachineOperand::MO_BlockAddress: { - const BlockAddress *BA = MO.getBlockAddress(); - return lowerSymbolOperand(MO, AsmPrinter.GetBlockAddressSymbol(BA), - MO.getOffset()); + default: { + MCSymbolRefExpr::VariantKind Kind = getVariantKind(MO.getTargetFlags()); + return MCOperand::CreateExpr(getExpr(MO, Kind)); } } } void SystemZMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { - unsigned Opcode = MI->getOpcode(); - // When emitting binary code, start with the shortest form of an instruction - // and then relax it where necessary. - if (!AsmPrinter.OutStreamer.hasRawTextSupport()) - Opcode = getShortenedInstr(Opcode); - OutMI.setOpcode(Opcode); + OutMI.setOpcode(MI->getOpcode()); for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { const MachineOperand &MO = MI->getOperand(I); // Ignore all implicit register operands. diff --git a/lib/Target/SystemZ/SystemZMCInstLower.h b/lib/Target/SystemZ/SystemZMCInstLower.h index db5bdb0..33a849d 100644 --- a/lib/Target/SystemZ/SystemZMCInstLower.h +++ b/lib/Target/SystemZ/SystemZMCInstLower.h @@ -10,14 +10,13 @@ #ifndef LLVM_SYSTEMZMCINSTLOWER_H #define LLVM_SYSTEMZMCINSTLOWER_H +#include "llvm/MC/MCExpr.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/Compiler.h" namespace llvm { -class MCContext; class MCInst; class MCOperand; -class MCSymbol; class MachineInstr; class MachineOperand; class Mangler; @@ -38,9 +37,9 @@ public: // Return an MCOperand for MO. MCOperand lowerOperand(const MachineOperand& MO) const; - // Return an MCOperand for MO, given that it equals Symbol + Offset. - MCOperand lowerSymbolOperand(const MachineOperand &MO, - const MCSymbol *Symbol, int64_t Offset) const; + // Return an MCExpr for symbolic operand MO with variant kind Kind. + const MCExpr *getExpr(const MachineOperand &MO, + MCSymbolRefExpr::VariantKind Kind) const; }; } // end namespace llvm diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td index e1e4541..f1db1fd 100644 --- a/lib/Target/SystemZ/SystemZOperands.td +++ b/lib/Target/SystemZ/SystemZOperands.td @@ -395,19 +395,6 @@ def pcrel32 : PCRelAddress { let DecoderMethod = "decodePC32DBLOperand"; } -// A PC-relative offset of a global value when the value is used as a -// call target. The offset is sign-extended and multiplied by 2. -def pcrel16call : PCRelAddress { - let PrintMethod = "printCallOperand"; - let EncoderMethod = "getPLT16DBLEncoding"; - let DecoderMethod = "decodePC16DBLOperand"; -} -def pcrel32call : PCRelAddress { - let PrintMethod = "printCallOperand"; - let EncoderMethod = "getPLT32DBLEncoding"; - let DecoderMethod = "decodePC32DBLOperand"; -} - //===----------------------------------------------------------------------===// // Addressing modes //===----------------------------------------------------------------------===// -- cgit v1.1 From 3f22cc1df64a6dd6a3ecc5e7e261f15af083f806 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Wed, 25 Sep 2013 10:47:21 +0000 Subject: MC: Add support for treating $ as a reference to the PC The binutils assembler supports a mode called DOLLAR_DOT which treats the dollar sign token as a reference to the current program counter if the dollar sign doesn't precede a constant or identifier. This commit adds a new MCAsmInfo flag stating whether or not a given target supports this interpretation of the dollar sign token; by default, this flag is not enabled. Further, enable this flag for PPC. The system assembler for AIX and binutils both support using the dollar sign in this manner. This fixes PR17353. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191368 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index 1f3e5b4..91578a9 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -54,6 +54,8 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { // Debug Information SupportsDebugInformation = true; + DollarIsPC = true; + // Set up DWARF directives HasLEB128 = true; // Target asm supports leb128 directives (little-endian) MinInstAlignment = 4; -- cgit v1.1 From 259a6006e89576704e52e7392ef2bfd83f277ce3 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 25 Sep 2013 11:11:53 +0000 Subject: [SystemZ] Define the GR64 low-word logic instructions as pseudo aliases. Another patch to avoid duplication of encoding information. Things like NILF, NILL and NILH are used as both 32-bit and 64-bit instructions. Here the 64-bit versions are defined as aliases of the 32-bit ones. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191369 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../SystemZ/MCTargetDesc/SystemZMCTargetDesc.h | 5 + lib/Target/SystemZ/SystemZAsmPrinter.cpp | 30 ++++++ lib/Target/SystemZ/SystemZISelLowering.cpp | 92 +++++++++--------- lib/Target/SystemZ/SystemZInstrFormats.td | 16 ++++ lib/Target/SystemZ/SystemZInstrInfo.cpp | 12 +-- lib/Target/SystemZ/SystemZInstrInfo.td | 106 ++++++++++----------- lib/Target/SystemZ/SystemZPatterns.td | 8 ++ lib/Target/SystemZ/SystemZShortenInst.cpp | 2 +- 8 files changed, 160 insertions(+), 111 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h index 84184af..f2e5a5a 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h @@ -56,6 +56,11 @@ namespace SystemZMC { inline unsigned getRegAsGR64(unsigned Reg) { return GR64Regs[getFirstReg(Reg)]; } + + // Return the given register as a low GR32. + inline unsigned getRegAsGR32(unsigned Reg) { + return GR32Regs[getFirstReg(Reg)]; + } } MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII, diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp index 3f3ce6b..380fa87 100644 --- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -26,6 +26,15 @@ using namespace llvm; +// Return an RI instruction like MI with opcode Opcode, but with the +// GR64 register operands turned into GR32s. +static MCInst lowerRILow(const MachineInstr *MI, unsigned Opcode) { + return MCInstBuilder(Opcode) + .addReg(SystemZMC::getRegAsGR32(MI->getOperand(0).getReg())) + .addReg(SystemZMC::getRegAsGR32(MI->getOperand(1).getReg())) + .addImm(MI->getOperand(2).getImm()); +} + void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { SystemZMCInstLower Lower(Mang, MF->getContext(), *this); MCInst LoweredMI; @@ -55,6 +64,27 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R1D); break; + case SystemZ::IILF64: + LoweredMI = MCInstBuilder(SystemZ::IILF) + .addReg(SystemZMC::getRegAsGR32(MI->getOperand(0).getReg())) + .addImm(MI->getOperand(2).getImm()); + break; + +#define LOWER_LOW(NAME) \ + case SystemZ::NAME##64: LoweredMI = lowerRILow(MI, SystemZ::NAME); break + + LOWER_LOW(IILL); + LOWER_LOW(IILH); + LOWER_LOW(NILL); + LOWER_LOW(NILH); + LOWER_LOW(NILF); + LOWER_LOW(OILL); + LOWER_LOW(OILH); + LOWER_LOW(OILF); + LOWER_LOW(XILF); + +#undef LOWER_LOW + default: Lower.lower(MI, LoweredMI); break; diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index eb3dc49..704c4e5 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -2330,11 +2330,11 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI, .addReg(RotatedOldVal).addOperand(Src2); if (BitSize < 32) // XILF with the upper BitSize bits set. - BuildMI(MBB, DL, TII->get(SystemZ::XILF32), RotatedNewVal) + BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal) .addReg(Tmp).addImm(uint32_t(~0 << (32 - BitSize))); else if (BitSize == 32) // XILF with every bit set. - BuildMI(MBB, DL, TII->get(SystemZ::XILF32), RotatedNewVal) + BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal) .addReg(Tmp).addImm(~uint32_t(0)); else { // Use LCGR and add -1 to the result, which is more compact than @@ -2938,96 +2938,96 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { case SystemZ::ATOMIC_LOADW_NR: return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0); case SystemZ::ATOMIC_LOADW_NILH: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 0); + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0); case SystemZ::ATOMIC_LOAD_NR: return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32); - case SystemZ::ATOMIC_LOAD_NILL32: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL32, 32); - case SystemZ::ATOMIC_LOAD_NILH32: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 32); - case SystemZ::ATOMIC_LOAD_NILF32: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF32, 32); - case SystemZ::ATOMIC_LOAD_NGR: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64); case SystemZ::ATOMIC_LOAD_NILL: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 64); + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32); case SystemZ::ATOMIC_LOAD_NILH: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 64); + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32); + case SystemZ::ATOMIC_LOAD_NILF: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32); + case SystemZ::ATOMIC_LOAD_NGR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64); + case SystemZ::ATOMIC_LOAD_NILL64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64); + case SystemZ::ATOMIC_LOAD_NILH64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64); case SystemZ::ATOMIC_LOAD_NIHL: return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL, 64); case SystemZ::ATOMIC_LOAD_NIHH: return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH, 64); - case SystemZ::ATOMIC_LOAD_NILF: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 64); + case SystemZ::ATOMIC_LOAD_NILF64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64); case SystemZ::ATOMIC_LOAD_NIHF: return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF, 64); case SystemZ::ATOMIC_LOADW_OR: return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 0); case SystemZ::ATOMIC_LOADW_OILH: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH32, 0); + return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 0); case SystemZ::ATOMIC_LOAD_OR: return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 32); - case SystemZ::ATOMIC_LOAD_OILL32: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL32, 32); - case SystemZ::ATOMIC_LOAD_OILH32: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH32, 32); - case SystemZ::ATOMIC_LOAD_OILF32: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF32, 32); - case SystemZ::ATOMIC_LOAD_OGR: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64); case SystemZ::ATOMIC_LOAD_OILL: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 64); + return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 32); case SystemZ::ATOMIC_LOAD_OILH: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 64); + return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 32); + case SystemZ::ATOMIC_LOAD_OILF: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 32); + case SystemZ::ATOMIC_LOAD_OGR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64); + case SystemZ::ATOMIC_LOAD_OILL64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL64, 64); + case SystemZ::ATOMIC_LOAD_OILH64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH64, 64); case SystemZ::ATOMIC_LOAD_OIHL: return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL, 64); case SystemZ::ATOMIC_LOAD_OIHH: return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH, 64); - case SystemZ::ATOMIC_LOAD_OILF: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 64); + case SystemZ::ATOMIC_LOAD_OILF64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF64, 64); case SystemZ::ATOMIC_LOAD_OIHF: return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF, 64); case SystemZ::ATOMIC_LOADW_XR: return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 0); case SystemZ::ATOMIC_LOADW_XILF: - return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF32, 0); + return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 0); case SystemZ::ATOMIC_LOAD_XR: return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 32); - case SystemZ::ATOMIC_LOAD_XILF32: - return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF32, 32); + case SystemZ::ATOMIC_LOAD_XILF: + return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 32); case SystemZ::ATOMIC_LOAD_XGR: return emitAtomicLoadBinary(MI, MBB, SystemZ::XGR, 64); - case SystemZ::ATOMIC_LOAD_XILF: - return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 64); + case SystemZ::ATOMIC_LOAD_XILF64: + return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF64, 64); case SystemZ::ATOMIC_LOAD_XIHF: return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF, 64); case SystemZ::ATOMIC_LOADW_NRi: return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0, true); case SystemZ::ATOMIC_LOADW_NILHi: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 0, true); + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0, true); case SystemZ::ATOMIC_LOAD_NRi: return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32, true); - case SystemZ::ATOMIC_LOAD_NILL32i: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL32, 32, true); - case SystemZ::ATOMIC_LOAD_NILH32i: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 32, true); - case SystemZ::ATOMIC_LOAD_NILF32i: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF32, 32, true); - case SystemZ::ATOMIC_LOAD_NGRi: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true); case SystemZ::ATOMIC_LOAD_NILLi: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 64, true); + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32, true); case SystemZ::ATOMIC_LOAD_NILHi: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 64, true); + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32, true); + case SystemZ::ATOMIC_LOAD_NILFi: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32, true); + case SystemZ::ATOMIC_LOAD_NGRi: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true); + case SystemZ::ATOMIC_LOAD_NILL64i: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64, true); + case SystemZ::ATOMIC_LOAD_NILH64i: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64, true); case SystemZ::ATOMIC_LOAD_NIHLi: return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL, 64, true); case SystemZ::ATOMIC_LOAD_NIHHi: return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH, 64, true); - case SystemZ::ATOMIC_LOAD_NILFi: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 64, true); + case SystemZ::ATOMIC_LOAD_NILF64i: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64, true); case SystemZ::ATOMIC_LOAD_NIHFi: return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF, 64, true); diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index b594f0e..a104329 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -1477,3 +1477,19 @@ class Alias pattern> let isPseudo = 1; let isCodeGenOnly = 1; } + +// An alias of a BinaryRI, but with different register sizes. +class BinaryAliasRI + : Alias<4, (outs cls:$R1), (ins cls:$R1src, imm:$I2), + [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { + let Constraints = "$R1 = $R1src"; +} + +// An alias of a BinaryRIL, but with different register sizes. +class BinaryAliasRIL + : Alias<6, (outs cls:$R1), (ins cls:$R1src, imm:$I2), + [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { + let Constraints = "$R1 = $R1src"; +} diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index d20487a..6d19bdd 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -535,14 +535,14 @@ namespace { static LogicOp interpretAndImmediate(unsigned Opcode) { switch (Opcode) { - case SystemZ::NILL32: return LogicOp(32, 0, 16); - case SystemZ::NILH32: return LogicOp(32, 16, 16); - case SystemZ::NILL: return LogicOp(64, 0, 16); - case SystemZ::NILH: return LogicOp(64, 16, 16); + case SystemZ::NILL: return LogicOp(32, 0, 16); + case SystemZ::NILH: return LogicOp(32, 16, 16); + case SystemZ::NILL64: return LogicOp(64, 0, 16); + case SystemZ::NILH64: return LogicOp(64, 16, 16); case SystemZ::NIHL: return LogicOp(64, 32, 16); case SystemZ::NIHH: return LogicOp(64, 48, 16); - case SystemZ::NILF32: return LogicOp(32, 0, 32); - case SystemZ::NILF: return LogicOp(64, 0, 32); + case SystemZ::NILF: return LogicOp(32, 0, 32); + case SystemZ::NILF64: return LogicOp(64, 0, 32); case SystemZ::NIHF: return LogicOp(64, 32, 32); default: return LogicOp(); } diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index ec59e2d..d2aa276 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -570,12 +570,10 @@ defm : InsertMem<"inserti8", ICY, GR64, azextloadi8, bdxaddr20pair>; // Insertions of a 16-bit immediate, leaving other bits unaffected. // We don't have or_as_insert equivalents of these operations because // OI is available instead. -let isCodeGenOnly = 1 in { - def IILL32 : BinaryRI<"iill", 0xA53, insertll, GR32, imm32ll16>; - def IILH32 : BinaryRI<"iilh", 0xA52, insertlh, GR32, imm32lh16>; -} -def IILL : BinaryRI<"iill", 0xA53, insertll, GR64, imm64ll16>; -def IILH : BinaryRI<"iilh", 0xA52, insertlh, GR64, imm64lh16>; +def IILL : BinaryRI<"iill", 0xA53, insertll, GR32, imm32ll16>; +def IILH : BinaryRI<"iilh", 0xA52, insertlh, GR32, imm32lh16>; +def IILL64 : BinaryAliasRI; +def IILH64 : BinaryAliasRI; def IIHL : BinaryRI<"iihl", 0xA51, inserthl, GR64, imm64hl16>; def IIHH : BinaryRI<"iihh", 0xA50, inserthh, GR64, imm64hh16>; @@ -583,11 +581,9 @@ def IIHH : BinaryRI<"iihh", 0xA50, inserthh, GR64, imm64hh16>; // full-width move. (We use IILF rather than something like LLILF // for 32-bit moves because IILF leaves the upper 32 bits of the // GR64 unchanged.) -let isCodeGenOnly = 1, isAsCheapAsAMove = 1, isMoveImm = 1, - isReMaterializable = 1 in { - def IILF32 : UnaryRIL<"iilf", 0xC09, bitconvert, GR32, uimm32>; -} -def IILF : BinaryRIL<"iilf", 0xC09, insertlf, GR64, imm64lf32>; +let isAsCheapAsAMove = 1, isMoveImm = 1, isReMaterializable = 1 in + def IILF : UnaryRIL<"iilf", 0xC09, bitconvert, GR32, uimm32>; +def IILF64 : BinaryAliasRIL; def IIHF : BinaryRIL<"iihf", 0xC08, inserthf, GR64, imm64hf32>; // An alternative model of inserthf, with the first operand being @@ -730,21 +726,19 @@ let Defs = [CC] in { let isConvertibleToThreeAddress = 1 in { // ANDs of a 16-bit immediate, leaving other bits unaffected. // The CC result only reflects the 16-bit field, not the full register. - let isCodeGenOnly = 1 in { - def NILL32 : BinaryRI<"nill", 0xA57, and, GR32, imm32ll16c>; - def NILH32 : BinaryRI<"nilh", 0xA56, and, GR32, imm32lh16c>; - } - def NILL : BinaryRI<"nill", 0xA57, and, GR64, imm64ll16c>; - def NILH : BinaryRI<"nilh", 0xA56, and, GR64, imm64lh16c>; + def NILL : BinaryRI<"nill", 0xA57, and, GR32, imm32ll16c>; + def NILH : BinaryRI<"nilh", 0xA56, and, GR32, imm32lh16c>; + def NILL64 : BinaryAliasRI; + def NILH64 : BinaryAliasRI; def NIHL : BinaryRI<"nihl", 0xA55, and, GR64, imm64hl16c>; def NIHH : BinaryRI<"nihh", 0xA54, and, GR64, imm64hh16c>; // ANDs of a 32-bit immediate, leaving other bits unaffected. // The CC result only reflects the 32-bit field, which means we can // use it as a zero indicator for i32 operations but not otherwise. - let isCodeGenOnly = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in - def NILF32 : BinaryRIL<"nilf", 0xC0B, and, GR32, uimm32>; - def NILF : BinaryRIL<"nilf", 0xC0B, and, GR64, imm64lf32c>; + let CCValues = 0xC, CompareZeroCCMask = 0x8 in + def NILF : BinaryRIL<"nilf", 0xC0B, and, GR32, uimm32>; + def NILF64 : BinaryAliasRIL; def NIHF : BinaryRIL<"nihf", 0xC0A, and, GR64, imm64hf32c>; } @@ -777,21 +771,19 @@ let Defs = [CC] in { // ORs of a 16-bit immediate, leaving other bits unaffected. // The CC result only reflects the 16-bit field, not the full register. - let isCodeGenOnly = 1 in { - def OILL32 : BinaryRI<"oill", 0xA5B, or, GR32, imm32ll16>; - def OILH32 : BinaryRI<"oilh", 0xA5A, or, GR32, imm32lh16>; - } - def OILL : BinaryRI<"oill", 0xA5B, or, GR64, imm64ll16>; - def OILH : BinaryRI<"oilh", 0xA5A, or, GR64, imm64lh16>; + def OILL : BinaryRI<"oill", 0xA5B, or, GR32, imm32ll16>; + def OILH : BinaryRI<"oilh", 0xA5A, or, GR32, imm32lh16>; + def OILL64 : BinaryAliasRI; + def OILH64 : BinaryAliasRI; def OIHL : BinaryRI<"oihl", 0xA59, or, GR64, imm64hl16>; def OIHH : BinaryRI<"oihh", 0xA58, or, GR64, imm64hh16>; // ORs of a 32-bit immediate, leaving other bits unaffected. // The CC result only reflects the 32-bit field, which means we can // use it as a zero indicator for i32 operations but not otherwise. - let isCodeGenOnly = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in - def OILF32 : BinaryRIL<"oilf", 0xC0D, or, GR32, uimm32>; - def OILF : BinaryRIL<"oilf", 0xC0D, or, GR64, imm64lf32>; + let CCValues = 0xC, CompareZeroCCMask = 0x8 in + def OILF : BinaryRIL<"oilf", 0xC0D, or, GR32, uimm32>; + def OILF64 : BinaryAliasRIL; def OIHF : BinaryRIL<"oihf", 0xC0C, or, GR64, imm64hf32>; // ORs of memory. @@ -824,9 +816,9 @@ let Defs = [CC] in { // XORs of a 32-bit immediate, leaving other bits unaffected. // The CC result only reflects the 32-bit field, which means we can // use it as a zero indicator for i32 operations but not otherwise. - let isCodeGenOnly = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in - def XILF32 : BinaryRIL<"xilf", 0xC07, xor, GR32, uimm32>; - def XILF : BinaryRIL<"xilf", 0xC07, xor, GR64, imm64lf32>; + let CCValues = 0xC, CompareZeroCCMask = 0x8 in + def XILF : BinaryRIL<"xilf", 0xC07, xor, GR32, uimm32>; + def XILF64 : BinaryAliasRIL; def XIHF : BinaryRIL<"xihf", 0xC06, xor, GR64, imm64hf32>; // XORs of memory. @@ -1036,18 +1028,16 @@ let mayLoad = 1, Defs = [CC], Uses = [R0W] in // Test under mask. let Defs = [CC] in { - let isCodeGenOnly = 1 in { - def TMLL32 : CompareRI<"tmll", 0xA71, z_tm_reg, GR32, imm32ll16>; - def TMLH32 : CompareRI<"tmlh", 0xA70, z_tm_reg, GR32, imm32lh16>; - } + def TMLL : CompareRI<"tmll", 0xA71, z_tm_reg, GR32, imm32ll16>; + def TMLH : CompareRI<"tmlh", 0xA70, z_tm_reg, GR32, imm32lh16>; - def TMLL : CompareRI<"tmll", 0xA71, z_tm_reg, GR64, imm64ll16>; - def TMLH : CompareRI<"tmlh", 0xA70, z_tm_reg, GR64, imm64lh16>; def TMHL : CompareRI<"tmhl", 0xA73, z_tm_reg, GR64, imm64hl16>; def TMHH : CompareRI<"tmhh", 0xA72, z_tm_reg, GR64, imm64hh16>; defm TM : CompareSIPair<"tm", 0x91, 0xEB51, z_tm_mem, anyextloadi8, imm32zx8>; } +def : CompareGR64RI; +def : CompareGR64RI; //===----------------------------------------------------------------------===// // Prefetch @@ -1080,58 +1070,58 @@ def ATOMIC_LOAD_SGR : AtomicLoadBinaryReg64; def ATOMIC_LOADW_NR : AtomicLoadWBinaryReg; def ATOMIC_LOADW_NILH : AtomicLoadWBinaryImm; def ATOMIC_LOAD_NR : AtomicLoadBinaryReg32; -def ATOMIC_LOAD_NILL32 : AtomicLoadBinaryImm32; -def ATOMIC_LOAD_NILH32 : AtomicLoadBinaryImm32; -def ATOMIC_LOAD_NILF32 : AtomicLoadBinaryImm32; +def ATOMIC_LOAD_NILL : AtomicLoadBinaryImm32; +def ATOMIC_LOAD_NILH : AtomicLoadBinaryImm32; +def ATOMIC_LOAD_NILF : AtomicLoadBinaryImm32; def ATOMIC_LOAD_NGR : AtomicLoadBinaryReg64; -def ATOMIC_LOAD_NILL : AtomicLoadBinaryImm64; -def ATOMIC_LOAD_NILH : AtomicLoadBinaryImm64; +def ATOMIC_LOAD_NILL64 : AtomicLoadBinaryImm64; +def ATOMIC_LOAD_NILH64 : AtomicLoadBinaryImm64; def ATOMIC_LOAD_NIHL : AtomicLoadBinaryImm64; def ATOMIC_LOAD_NIHH : AtomicLoadBinaryImm64; -def ATOMIC_LOAD_NILF : AtomicLoadBinaryImm64; +def ATOMIC_LOAD_NILF64 : AtomicLoadBinaryImm64; def ATOMIC_LOAD_NIHF : AtomicLoadBinaryImm64; def ATOMIC_LOADW_OR : AtomicLoadWBinaryReg; def ATOMIC_LOADW_OILH : AtomicLoadWBinaryImm; def ATOMIC_LOAD_OR : AtomicLoadBinaryReg32; -def ATOMIC_LOAD_OILL32 : AtomicLoadBinaryImm32; -def ATOMIC_LOAD_OILH32 : AtomicLoadBinaryImm32; -def ATOMIC_LOAD_OILF32 : AtomicLoadBinaryImm32; +def ATOMIC_LOAD_OILL : AtomicLoadBinaryImm32; +def ATOMIC_LOAD_OILH : AtomicLoadBinaryImm32; +def ATOMIC_LOAD_OILF : AtomicLoadBinaryImm32; def ATOMIC_LOAD_OGR : AtomicLoadBinaryReg64; -def ATOMIC_LOAD_OILL : AtomicLoadBinaryImm64; -def ATOMIC_LOAD_OILH : AtomicLoadBinaryImm64; +def ATOMIC_LOAD_OILL64 : AtomicLoadBinaryImm64; +def ATOMIC_LOAD_OILH64 : AtomicLoadBinaryImm64; def ATOMIC_LOAD_OIHL : AtomicLoadBinaryImm64; def ATOMIC_LOAD_OIHH : AtomicLoadBinaryImm64; -def ATOMIC_LOAD_OILF : AtomicLoadBinaryImm64; +def ATOMIC_LOAD_OILF64 : AtomicLoadBinaryImm64; def ATOMIC_LOAD_OIHF : AtomicLoadBinaryImm64; def ATOMIC_LOADW_XR : AtomicLoadWBinaryReg; def ATOMIC_LOADW_XILF : AtomicLoadWBinaryImm; def ATOMIC_LOAD_XR : AtomicLoadBinaryReg32; -def ATOMIC_LOAD_XILF32 : AtomicLoadBinaryImm32; +def ATOMIC_LOAD_XILF : AtomicLoadBinaryImm32; def ATOMIC_LOAD_XGR : AtomicLoadBinaryReg64; -def ATOMIC_LOAD_XILF : AtomicLoadBinaryImm64; +def ATOMIC_LOAD_XILF64 : AtomicLoadBinaryImm64; def ATOMIC_LOAD_XIHF : AtomicLoadBinaryImm64; def ATOMIC_LOADW_NRi : AtomicLoadWBinaryReg; def ATOMIC_LOADW_NILHi : AtomicLoadWBinaryImm; def ATOMIC_LOAD_NRi : AtomicLoadBinaryReg32; -def ATOMIC_LOAD_NILL32i : AtomicLoadBinaryImm32; -def ATOMIC_LOAD_NILH32i : AtomicLoadBinaryImm32; -def ATOMIC_LOAD_NILF32i : AtomicLoadBinaryImm32; +def ATOMIC_LOAD_NILFi : AtomicLoadBinaryImm32; def ATOMIC_LOAD_NGRi : AtomicLoadBinaryReg64; -def ATOMIC_LOAD_NILLi : AtomicLoadBinaryImm64; -def ATOMIC_LOAD_NILHi : AtomicLoadBinaryImm64; def ATOMIC_LOAD_NIHLi : AtomicLoadBinaryImm64; def ATOMIC_LOAD_NIHHi : AtomicLoadBinaryImm64; -def ATOMIC_LOAD_NILFi : AtomicLoadBinaryImm64; def ATOMIC_LOAD_NIHFi : AtomicLoadBinaryImm64; diff --git a/lib/Target/SystemZ/SystemZPatterns.td b/lib/Target/SystemZ/SystemZPatterns.td index a1344a3..203247c 100644 --- a/lib/Target/SystemZ/SystemZPatterns.td +++ b/lib/Target/SystemZ/SystemZPatterns.td @@ -112,6 +112,14 @@ multiclass CondStores64; } +// INSN performs a comparison between a 32-bit register and a constant. +// Record that it is equivalent to comparing the low word of a GR64 with IMM. +class CompareGR64RI + : Pat<(compare GR64:$R1, imm:$I2), + (insn (EXTRACT_SUBREG GR64:$R1, subreg_32bit), + (imm.OperandTransform imm:$I2))>; + // Try to use MVC instruction INSN for a load of type LOAD followed by a store // of the same size. VT is the type of the intermediate (legalized) value and // LENGTH is the number of bytes loaded by LOAD. diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp index 526ae5c..b480864 100644 --- a/lib/Target/SystemZ/SystemZShortenInst.cpp +++ b/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -119,7 +119,7 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock *MBB) { MBBE = MBB->rend(); MBBI != MBBE; ++MBBI) { MachineInstr &MI = *MBBI; unsigned Opcode = MI.getOpcode(); - if (Opcode == SystemZ::IILF32) + if (Opcode == SystemZ::IILF) Changed |= shortenIIF(MI, LowGPRs, LiveHigh, SystemZ::LLILL, SystemZ::LLILH); unsigned UsedLow = 0; -- cgit v1.1 From 070156437752179833b1e5fddd50caa03fd7c12f Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Wed, 25 Sep 2013 18:14:12 +0000 Subject: Mark the x86 machine model as incomplete. PR17367. Ideally, the machinel model is added at the time the instructions are defined. But many instructions in X86InstrSSE.td still need a model. Without this workaround the scheduler asserts because x86 already has itinerary classes for these instructions, indicating they should be modeled by the scheduler. Since we use the new machine model for other instructions, it expects a new machine model for these too. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191391 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86SchedHaswell.td | 4 ++++ lib/Target/X86/X86SchedSandyBridge.td | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td index 62ba2bc..9748261 100644 --- a/lib/Target/X86/X86SchedHaswell.td +++ b/lib/Target/X86/X86SchedHaswell.td @@ -19,6 +19,10 @@ def HaswellModel : SchedMachineModel { let MicroOpBufferSize = 192; // Based on the reorder buffer. let LoadLatency = 4; let MispredictPenalty = 16; + + // FIXME: SSE4 and AVX are unimplemented. This flag is set to allow + // the scheduler to assign a default model to unrecognized opcodes. + let CompleteModel = 0; } let SchedModel = HaswellModel in { diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td index 52ead94..3011c6d 100644 --- a/lib/Target/X86/X86SchedSandyBridge.td +++ b/lib/Target/X86/X86SchedSandyBridge.td @@ -20,6 +20,10 @@ def SandyBridgeModel : SchedMachineModel { let MicroOpBufferSize = 168; // Based on the reorder buffer. let LoadLatency = 4; let MispredictPenalty = 16; + + // FIXME: SSE4 and AVX are unimplemented. This flag is set to allow + // the scheduler to assign a default model to unrecognized opcodes. + let CompleteModel = 0; } let SchedModel = SandyBridgeModel in { -- cgit v1.1 From 55d016886cc695aa14dfa3cc8b9e1d602cca149c Mon Sep 17 00:00:00 2001 From: Reed Kotler Date: Wed, 25 Sep 2013 20:58:50 +0000 Subject: Fix a bad typo in the inline assembly code for mips16 pic fp stubs and make one cosmetic cleanup to make it look the same as gcc in this area; adjusting test cases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191400 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16HardFloat.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp index 14a9424..4d728bd 100644 --- a/lib/Target/Mips/Mips16HardFloat.cpp +++ b/lib/Target/Mips/Mips16HardFloat.cpp @@ -421,7 +421,7 @@ static void createFPFnStub(Function *F, Module *M, FPParamVariant PV, std::string Name = F->getName(); std::string SectionName = ".mips16.fn." + Name; std::string StubName = "__fn_stub_" + Name; - std::string LocalName = "__fn_local_" + Name; + std::string LocalName = "$$__fn_local_" + Name; Function *FStub = Function::Create (F->getFunctionType(), Function::InternalLinkage, StubName, M); @@ -436,14 +436,14 @@ static void createFPFnStub(Function *F, Module *M, FPParamVariant PV, IAH.Out(" .set macro"); if (PicMode) { IAH.Out(".set noreorder"); - IAH.Out(".cpload $$2"); + IAH.Out(".cpload $$25"); IAH.Out(".set reorder"); IAH.Out(".reloc 0,R_MIPS_NONE," + Name); IAH.Out("la $$25," + LocalName); } else { IAH.Out(".set reorder"); - IAH.Out("la $$25, " + Name); + IAH.Out("la $$25," + Name); } swapFPIntParams(PV, M, IAH, LE, false); IAH.Out("jr $$25"); -- cgit v1.1 From 541681c8485c18b564970c80180a798b2c1663e8 Mon Sep 17 00:00:00 2001 From: Weiming Zhao Date: Wed, 25 Sep 2013 23:12:06 +0000 Subject: Fix PR 17368: disable vector mul distribution for square of add/sub for ARM Generally, it is desirable to distribute (a + b) * c to a*c + b*c for ARM with VMLx forwarding, where a, b and c are vectors. However, for (a + b)*(a + b), distribution will result in one extra instruction. With distribution: x = a + b (add) y = a * x (mul) z = y + b * y (mla) Without distribution: x = a + b (add) z = x * x (mul) This patch checks if a mul is a square of add/sub. If yes, skip distribution. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191410 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index c83f7b1..773b710 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -8342,6 +8342,13 @@ static SDValue PerformSUBCombine(SDNode *N, /// is faster than /// vadd d3, d0, d1 /// vmul d3, d3, d2 +// However, for (A + B) * (A + B), +// vadd d2, d0, d1 +// vmul d3, d0, d2 +// vmla d3, d1, d2 +// is slower than +// vadd d2, d0, d1 +// vmul d3, d2, d2 static SDValue PerformVMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { @@ -8361,6 +8368,9 @@ static SDValue PerformVMULCombine(SDNode *N, std::swap(N0, N1); } + if (N0 == N1) + return SDValue(); + EVT VT = N->getValueType(0); SDLoc DL(N); SDValue N00 = N0->getOperand(0); -- cgit v1.1 From 825e5583b6c3161bb02074dc48cd07dafdf9545c Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Wed, 25 Sep 2013 23:42:03 +0000 Subject: [mips][msa] Updates encoding of 2RF instructions to match the latest revision of the MSA spec (1.06). This only changes internal encodings and doesn't affect output. Patch by Matheus Almeida git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191411 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 3fd52c6..740e7f2 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -654,11 +654,11 @@ class FSUN_D_ENC : MSA_3RF_FMT<0b1001, 0b1, 0b011010>; class FSUNE_W_ENC : MSA_3RF_FMT<0b1010, 0b0, 0b011100>; class FSUNE_D_ENC : MSA_3RF_FMT<0b1010, 0b1, 0b011100>; -class FTRUNC_S_W_ENC : MSA_2RF_FMT<0b110100000, 0b0, 0b011110>; -class FTRUNC_S_D_ENC : MSA_2RF_FMT<0b110100000, 0b1, 0b011110>; +class FTRUNC_S_W_ENC : MSA_2RF_FMT<0b110010001, 0b0, 0b011110>; +class FTRUNC_S_D_ENC : MSA_2RF_FMT<0b110010001, 0b1, 0b011110>; -class FTRUNC_U_W_ENC : MSA_2RF_FMT<0b110100001, 0b0, 0b011110>; -class FTRUNC_U_D_ENC : MSA_2RF_FMT<0b110100001, 0b1, 0b011110>; +class FTRUNC_U_W_ENC : MSA_2RF_FMT<0b110010010, 0b0, 0b011110>; +class FTRUNC_U_D_ENC : MSA_2RF_FMT<0b110010010, 0b1, 0b011110>; class FTINT_S_W_ENC : MSA_2RF_FMT<0b110011100, 0b0, 0b011110>; class FTINT_S_D_ENC : MSA_2RF_FMT<0b110011100, 0b1, 0b011110>; -- cgit v1.1 From 42d9ca629934d0c20ac19949399ce4faa9a7bbb3 Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Wed, 25 Sep 2013 23:50:44 +0000 Subject: [mips][msa] Direct Object Emission support for the MSA instruction set. In more detail, this patch adds the ability to parse, encode and decode MSA registers ($w0-$w31). The format of 2RF instructions (MipsMSAInstrFormat.td) was updated so that we could attach a test case to this patch i.e., the test case parses, encodes and decodes 2 MSA instructions. Following patches will add the remainder of the instructions. Note that DecodeMSA128BRegisterClass is missing from MipsDisassembler.td because it's not yet required at this stage and having it would cause a compiler warning (unused function). Patch by Matheus Almeida git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191412 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 133 +++++++++++++++++++++- lib/Target/Mips/Disassembler/MipsDisassembler.cpp | 51 +++++++++ lib/Target/Mips/MipsMSAInstrFormats.td | 5 + lib/Target/Mips/MipsMSAInstrInfo.td | 17 ++- lib/Target/Mips/MipsRegisterInfo.td | 37 ++++++ 5 files changed, 235 insertions(+), 8 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 3033fef..5b77d87 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -82,6 +82,10 @@ class MipsAsmParser : public MCTargetAsmParser { int RegKind); MipsAsmParser::OperandMatchResultTy + parseMSARegs(SmallVectorImpl &Operands, + int RegKind); + + MipsAsmParser::OperandMatchResultTy parseMemOperand(SmallVectorImpl &Operands); bool parsePtrReg(SmallVectorImpl &Operands, int RegKind); @@ -128,6 +132,18 @@ class MipsAsmParser : public MCTargetAsmParser { MipsAsmParser::OperandMatchResultTy parseCOP2(SmallVectorImpl &Operands); + MipsAsmParser::OperandMatchResultTy + parseMSA128BRegs(SmallVectorImpl &Operands); + + MipsAsmParser::OperandMatchResultTy + parseMSA128HRegs(SmallVectorImpl &Operands); + + MipsAsmParser::OperandMatchResultTy + parseMSA128WRegs(SmallVectorImpl &Operands); + + MipsAsmParser::OperandMatchResultTy + parseMSA128DRegs(SmallVectorImpl &Operands); + bool searchSymbolAlias(SmallVectorImpl &Operands, unsigned RegKind); @@ -199,6 +215,8 @@ class MipsAsmParser : public MCTargetAsmParser { int matchACRegisterName(StringRef Name); + int matchMSA128RegisterName(StringRef Name); + int regKindToRegClass(int RegKind); unsigned getReg(int RC, int RegNo); @@ -243,7 +261,11 @@ public: Kind_ACC64DSP, Kind_LO32DSP, Kind_HI32DSP, - Kind_COP2 + Kind_COP2, + Kind_MSA128BRegs, + Kind_MSA128HRegs, + Kind_MSA128WRegs, + Kind_MSA128DRegs }; private: @@ -465,6 +487,22 @@ public: return Kind == k_Register && Reg.Kind == Kind_COP2; } + bool isMSA128BAsm() const { + return Kind == k_Register && Reg.Kind == Kind_MSA128BRegs; + } + + bool isMSA128HAsm() const { + return Kind == k_Register && Reg.Kind == Kind_MSA128HRegs; + } + + bool isMSA128WAsm() const { + return Kind == k_Register && Reg.Kind == Kind_MSA128WRegs; + } + + bool isMSA128DAsm() const { + return Kind == k_Register && Reg.Kind == Kind_MSA128DRegs; + } + /// getStartLoc - Get the location of the first token of this operand. SMLoc getStartLoc() const { return StartLoc; @@ -914,6 +952,18 @@ int MipsAsmParser::matchACRegisterName(StringRef Name) { return -1; } +int MipsAsmParser::matchMSA128RegisterName(StringRef Name) { + unsigned IntVal; + + if (Name.front() != 'w' || Name.drop_front(1).getAsInteger(10, IntVal)) + return -1; + + if (IntVal > 31) + return -1; + + return IntVal; +} + int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) { int CC; @@ -921,10 +971,12 @@ int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) { if (CC != -1) return matchRegisterByNumber(CC, is64BitReg ? Mips::GPR64RegClassID : Mips::GPR32RegClassID); - CC= matchFPURegisterName(Name); + CC = matchFPURegisterName(Name); //TODO: decide about fpu register class - return matchRegisterByNumber(CC, isFP64() ? Mips::FGR64RegClassID - : Mips::FGR32RegClassID); + if (CC != -1) + return matchRegisterByNumber(CC, isFP64() ? Mips::FGR64RegClassID + : Mips::FGR32RegClassID); + return matchMSA128RegisterName(Name); } int MipsAsmParser::regKindToRegClass(int RegKind) { @@ -940,6 +992,10 @@ int MipsAsmParser::regKindToRegClass(int RegKind) { case MipsOperand::Kind_CCRRegs: return Mips::CCRRegClassID; case MipsOperand::Kind_ACC64DSP: return Mips::ACC64DSPRegClassID; case MipsOperand::Kind_FCCRegs: return Mips::FCCRegClassID; + case MipsOperand::Kind_MSA128BRegs: return Mips::MSA128BRegClassID; + case MipsOperand::Kind_MSA128HRegs: return Mips::MSA128HRegClassID; + case MipsOperand::Kind_MSA128WRegs: return Mips::MSA128WRegClassID; + case MipsOperand::Kind_MSA128DRegs: return Mips::MSA128DRegClassID; default :return -1; } @@ -1480,6 +1536,51 @@ MipsAsmParser::parseRegs(SmallVectorImpl &Operands, } MipsAsmParser::OperandMatchResultTy +MipsAsmParser::parseMSARegs(SmallVectorImpl &Operands, + int RegKind) { + MipsOperand::RegisterKind Kind = (MipsOperand::RegisterKind)RegKind; + SMLoc S = Parser.getTok().getLoc(); + std::string RegName; + + if (Parser.getTok().isNot(AsmToken::Dollar)) + return MatchOperand_NoMatch; + + switch (RegKind) { + default: + return MatchOperand_ParseFail; + case MipsOperand::Kind_MSA128BRegs: + case MipsOperand::Kind_MSA128HRegs: + case MipsOperand::Kind_MSA128WRegs: + case MipsOperand::Kind_MSA128DRegs: + break; + } + + Parser.Lex(); // Eat the '$'. + if (getLexer().getKind() == AsmToken::Identifier) + RegName = Parser.getTok().getString().lower(); + else + return MatchOperand_ParseFail; + + int RegNum = matchMSA128RegisterName(RegName); + + if (RegNum < 0 || RegNum > 31) + return MatchOperand_ParseFail; + + int RegVal = getReg(regKindToRegClass(Kind), RegNum); + if (RegVal == -1) + return MatchOperand_ParseFail; + + MipsOperand *Op = MipsOperand::CreateReg(RegVal, S, + Parser.getTok().getLoc()); + Op->setRegKind(Kind); + Operands.push_back(Op); + + Parser.Lex(); // Eat the register identifier. + + return MatchOperand_Success; +} + +MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseGPR64(SmallVectorImpl &Operands) { if (!isMips64()) @@ -1619,6 +1720,30 @@ MipsAsmParser::parseCOP2(SmallVectorImpl &Operands) { return MatchOperand_Success; } +MipsAsmParser::OperandMatchResultTy +MipsAsmParser::parseMSA128BRegs( + SmallVectorImpl &Operands) { + return parseMSARegs(Operands, (int) MipsOperand::Kind_MSA128BRegs); +} + +MipsAsmParser::OperandMatchResultTy +MipsAsmParser::parseMSA128HRegs( + SmallVectorImpl &Operands) { + return parseMSARegs(Operands, (int) MipsOperand::Kind_MSA128HRegs); +} + +MipsAsmParser::OperandMatchResultTy +MipsAsmParser::parseMSA128WRegs( + SmallVectorImpl &Operands) { + return parseMSARegs(Operands, (int) MipsOperand::Kind_MSA128WRegs); +} + +MipsAsmParser::OperandMatchResultTy +MipsAsmParser::parseMSA128DRegs( + SmallVectorImpl &Operands) { + return parseMSARegs(Operands, (int) MipsOperand::Kind_MSA128DRegs); +} + bool MipsAsmParser::searchSymbolAlias( SmallVectorImpl &Operands, unsigned RegKind) { diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index e2b3fa4..9fabc3a 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -170,6 +170,21 @@ static DecodeStatus DecodeLO32DSPRegisterClass(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeMSA128HRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeMSA128WRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeMSA128DRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + static DecodeStatus DecodeBranchTarget(MCInst &Inst, unsigned Offset, uint64_t Address, @@ -620,6 +635,42 @@ static DecodeStatus DecodeLO32DSPRegisterClass(MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeMSA128HRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + unsigned Reg = getReg(Decoder, Mips::MSA128HRegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeMSA128WRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + unsigned Reg = getReg(Decoder, Mips::MSA128WRegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeMSA128DRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + unsigned Reg = getReg(Decoder, Mips::MSA128DRegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Reg)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeBranchTarget(MCInst &Inst, unsigned Offset, uint64_t Address, diff --git a/lib/Target/Mips/MipsMSAInstrFormats.td b/lib/Target/Mips/MipsMSAInstrFormats.td index b011674..9b63ef0 100644 --- a/lib/Target/Mips/MipsMSAInstrFormats.td +++ b/lib/Target/Mips/MipsMSAInstrFormats.td @@ -52,8 +52,13 @@ class MSA_2R_FMT major, bits<2> df, bits<6> minor>: MSAInst { } class MSA_2RF_FMT major, bits<1> df, bits<6> minor>: MSAInst { + bits<5> ws; + bits<5> wd; + let Inst{25-17} = major; let Inst{16} = df; + let Inst{15-11} = ws; + let Inst{10-6} = wd; let Inst{5-0} = minor; } diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 740e7f2..881335f 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -1132,6 +1132,15 @@ class MSA_2RF_DESC_BASE : MSA_2R_DESC_BASE; +class MSA_2RF_RO_DESC_BASE { + dag OutOperandList = (outs ROWD:$wd); + dag InOperandList = (ins ROWS:$ws); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws"); + list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws))]; + InstrItinClass Itinerary = itin; +} class MSA_3R_DESC_BASE, class FCEQ_D_DESC : MSA_3RF_DESC_BASE<"fceq.d", vfsetoeq_v2f64, MSA128D>, IsCommutable; -class FCLASS_W_DESC : MSA_2RF_DESC_BASE<"fclass.w", int_mips_fclass_w, - MSA128W>; -class FCLASS_D_DESC : MSA_2RF_DESC_BASE<"fclass.d", int_mips_fclass_d, - MSA128D>; +class FCLASS_W_DESC : MSA_2RF_RO_DESC_BASE<"fclass.w", int_mips_fclass_w, + MSA128WOpnd>; +class FCLASS_D_DESC : MSA_2RF_RO_DESC_BASE<"fclass.d", int_mips_fclass_d, + MSA128DOpnd>; class FCLE_W_DESC : MSA_3RF_DESC_BASE<"fcle.w", vfsetole_v4f32, MSA128W>; class FCLE_D_DESC : MSA_3RF_DESC_BASE<"fcle.d", vfsetole_v2f64, MSA128D>; diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index 0b88e0a..94bfbf5 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -436,6 +436,26 @@ def FCCRegsAsmOperand : MipsAsmRegOperand { let ParserMethod = "parseFCCRegs"; } +def MSA128BAsmOperand : MipsAsmRegOperand { + let Name = "MSA128BAsm"; + let ParserMethod = "parseMSA128BRegs"; +} + +def MSA128HAsmOperand : MipsAsmRegOperand { + let Name = "MSA128HAsm"; + let ParserMethod = "parseMSA128HRegs"; +} + +def MSA128WAsmOperand : MipsAsmRegOperand { + let Name = "MSA128WAsm"; + let ParserMethod = "parseMSA128WRegs"; +} + +def MSA128DAsmOperand : MipsAsmRegOperand { + let Name = "MSA128DAsm"; + let ParserMethod = "parseMSA128DRegs"; +} + def GPR32Opnd : RegisterOperand { let ParserMatchClass = GPR32AsmOperand; } @@ -501,3 +521,20 @@ def ACC64DSPOpnd : RegisterOperand { def COP2Opnd : RegisterOperand { let ParserMatchClass = COP2AsmOperand; } + +def MSA128BOpnd : RegisterOperand { + let ParserMatchClass = MSA128BAsmOperand; +} + +def MSA128HOpnd : RegisterOperand { + let ParserMatchClass = MSA128HAsmOperand; +} + +def MSA128WOpnd : RegisterOperand { + let ParserMatchClass = MSA128WAsmOperand; +} + +def MSA128DOpnd : RegisterOperand { + let ParserMatchClass = MSA128DAsmOperand; +} + -- cgit v1.1 From 4167b88cf5754597e5a7e53aa0cbba26c18b6162 Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Wed, 25 Sep 2013 23:56:25 +0000 Subject: [mips][msa] Direct Object Emission for 2RF instructions. Patch by Matheus Almeida git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191413 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 77 +++++++++++++++++-------------------- 1 file changed, 36 insertions(+), 41 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 881335f..08b1afb 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -1128,13 +1128,8 @@ class MSA_2R_FILL_DESC_BASE : - MSA_2R_DESC_BASE; - -class MSA_2RF_RO_DESC_BASE { + RegisterOperand ROWD, RegisterOperand ROWS = ROWD, + InstrItinClass itin = NoItinerary> { dag OutOperandList = (outs ROWD:$wd); dag InOperandList = (ins ROWS:$ws); string AsmString = !strconcat(instr_asm, "\t$wd, $ws"); @@ -1621,10 +1616,10 @@ class FCEQ_W_DESC : MSA_3RF_DESC_BASE<"fceq.w", vfsetoeq_v4f32, MSA128W>, class FCEQ_D_DESC : MSA_3RF_DESC_BASE<"fceq.d", vfsetoeq_v2f64, MSA128D>, IsCommutable; -class FCLASS_W_DESC : MSA_2RF_RO_DESC_BASE<"fclass.w", int_mips_fclass_w, - MSA128WOpnd>; -class FCLASS_D_DESC : MSA_2RF_RO_DESC_BASE<"fclass.d", int_mips_fclass_d, - MSA128DOpnd>; +class FCLASS_W_DESC : MSA_2RF_DESC_BASE<"fclass.w", int_mips_fclass_w, + MSA128WOpnd>; +class FCLASS_D_DESC : MSA_2RF_DESC_BASE<"fclass.d", int_mips_fclass_d, + MSA128DOpnd>; class FCLE_W_DESC : MSA_3RF_DESC_BASE<"fcle.w", vfsetole_v4f32, MSA128W>; class FCLE_D_DESC : MSA_3RF_DESC_BASE<"fcle.d", vfsetole_v2f64, MSA128D>; @@ -1679,34 +1674,34 @@ class FEXP2_W_DESC : MSA_3RF_DESC_BASE<"fexp2.w", int_mips_fexp2_w, MSA128W>; class FEXP2_D_DESC : MSA_3RF_DESC_BASE<"fexp2.d", int_mips_fexp2_d, MSA128D>; class FEXUPL_W_DESC : MSA_2RF_DESC_BASE<"fexupl.w", int_mips_fexupl_w, - MSA128W, MSA128H>; + MSA128WOpnd, MSA128HOpnd>; class FEXUPL_D_DESC : MSA_2RF_DESC_BASE<"fexupl.d", int_mips_fexupl_d, - MSA128D, MSA128W>; + MSA128DOpnd, MSA128WOpnd>; class FEXUPR_W_DESC : MSA_2RF_DESC_BASE<"fexupr.w", int_mips_fexupr_w, - MSA128W, MSA128H>; + MSA128WOpnd, MSA128HOpnd>; class FEXUPR_D_DESC : MSA_2RF_DESC_BASE<"fexupr.d", int_mips_fexupr_d, - MSA128D, MSA128W>; + MSA128DOpnd, MSA128WOpnd>; class FFINT_S_W_DESC : MSA_2RF_DESC_BASE<"ffint_s.w", int_mips_ffint_s_w, - MSA128W>; + MSA128WOpnd>; class FFINT_S_D_DESC : MSA_2RF_DESC_BASE<"ffint_s.d", int_mips_ffint_s_d, - MSA128D>; + MSA128DOpnd>; class FFINT_U_W_DESC : MSA_2RF_DESC_BASE<"ffint_u.w", int_mips_ffint_u_w, - MSA128W>; + MSA128WOpnd>; class FFINT_U_D_DESC : MSA_2RF_DESC_BASE<"ffint_u.d", int_mips_ffint_u_d, - MSA128D>; + MSA128DOpnd>; class FFQL_W_DESC : MSA_2RF_DESC_BASE<"ffql.w", int_mips_ffql_w, - MSA128W, MSA128H>; + MSA128WOpnd, MSA128HOpnd>; class FFQL_D_DESC : MSA_2RF_DESC_BASE<"ffql.d", int_mips_ffql_d, - MSA128D, MSA128W>; + MSA128DOpnd, MSA128WOpnd>; class FFQR_W_DESC : MSA_2RF_DESC_BASE<"ffqr.w", int_mips_ffqr_w, - MSA128W, MSA128H>; + MSA128WOpnd, MSA128HOpnd>; class FFQR_D_DESC : MSA_2RF_DESC_BASE<"ffqr.d", int_mips_ffqr_d, - MSA128D, MSA128W>; + MSA128DOpnd, MSA128WOpnd>; class FILL_B_DESC : MSA_2R_FILL_DESC_BASE<"fill.b", v16i8, vsplati8, MSA128B, GPR32>; @@ -1715,8 +1710,8 @@ class FILL_H_DESC : MSA_2R_FILL_DESC_BASE<"fill.h", v8i16, vsplati16, MSA128H, class FILL_W_DESC : MSA_2R_FILL_DESC_BASE<"fill.w", v4i32, vsplati32, MSA128W, GPR32>; -class FLOG2_W_DESC : MSA_2RF_DESC_BASE<"flog2.w", flog2, MSA128W>; -class FLOG2_D_DESC : MSA_2RF_DESC_BASE<"flog2.d", flog2, MSA128D>; +class FLOG2_W_DESC : MSA_2RF_DESC_BASE<"flog2.w", flog2, MSA128WOpnd>; +class FLOG2_D_DESC : MSA_2RF_DESC_BASE<"flog2.d", flog2, MSA128DOpnd>; class FMADD_W_DESC : MSA_3RF_4RF_DESC_BASE<"fmadd.w", int_mips_fmadd_w, MSA128W>; @@ -1747,16 +1742,16 @@ class FMSUB_D_DESC : MSA_3RF_4RF_DESC_BASE<"fmsub.d", int_mips_fmsub_d, class FMUL_W_DESC : MSA_3RF_DESC_BASE<"fmul.w", fmul, MSA128W>; class FMUL_D_DESC : MSA_3RF_DESC_BASE<"fmul.d", fmul, MSA128D>; -class FRINT_W_DESC : MSA_2RF_DESC_BASE<"frint.w", frint, MSA128W>; -class FRINT_D_DESC : MSA_2RF_DESC_BASE<"frint.d", frint, MSA128D>; +class FRINT_W_DESC : MSA_2RF_DESC_BASE<"frint.w", frint, MSA128WOpnd>; +class FRINT_D_DESC : MSA_2RF_DESC_BASE<"frint.d", frint, MSA128DOpnd>; -class FRCP_W_DESC : MSA_2RF_DESC_BASE<"frcp.w", int_mips_frcp_w, MSA128W>; -class FRCP_D_DESC : MSA_2RF_DESC_BASE<"frcp.d", int_mips_frcp_d, MSA128D>; +class FRCP_W_DESC : MSA_2RF_DESC_BASE<"frcp.w", int_mips_frcp_w, MSA128WOpnd>; +class FRCP_D_DESC : MSA_2RF_DESC_BASE<"frcp.d", int_mips_frcp_d, MSA128DOpnd>; class FRSQRT_W_DESC : MSA_2RF_DESC_BASE<"frsqrt.w", int_mips_frsqrt_w, - MSA128W>; + MSA128WOpnd>; class FRSQRT_D_DESC : MSA_2RF_DESC_BASE<"frsqrt.d", int_mips_frsqrt_d, - MSA128D>; + MSA128DOpnd>; class FSAF_W_DESC : MSA_3RF_DESC_BASE<"fsaf.w", int_mips_fsaf_w, MSA128W>; class FSAF_D_DESC : MSA_3RF_DESC_BASE<"fsaf.d", int_mips_fsaf_d, MSA128D>; @@ -1776,8 +1771,8 @@ class FSNE_D_DESC : MSA_3RF_DESC_BASE<"fsne.d", int_mips_fsne_d, MSA128D>; class FSOR_W_DESC : MSA_3RF_DESC_BASE<"fsor.w", int_mips_fsor_w, MSA128W>; class FSOR_D_DESC : MSA_3RF_DESC_BASE<"fsor.d", int_mips_fsor_d, MSA128D>; -class FSQRT_W_DESC : MSA_2RF_DESC_BASE<"fsqrt.w", fsqrt, MSA128W>; -class FSQRT_D_DESC : MSA_2RF_DESC_BASE<"fsqrt.d", fsqrt, MSA128D>; +class FSQRT_W_DESC : MSA_2RF_DESC_BASE<"fsqrt.w", fsqrt, MSA128WOpnd>; +class FSQRT_D_DESC : MSA_2RF_DESC_BASE<"fsqrt.d", fsqrt, MSA128DOpnd>; class FSUB_W_DESC : MSA_3RF_DESC_BASE<"fsub.w", fsub, MSA128W>; class FSUB_D_DESC : MSA_3RF_DESC_BASE<"fsub.d", fsub, MSA128D>; @@ -1798,24 +1793,24 @@ class FSUNE_W_DESC : MSA_3RF_DESC_BASE<"fsune.w", int_mips_fsune_w, MSA128W>; class FSUNE_D_DESC : MSA_3RF_DESC_BASE<"fsune.d", int_mips_fsune_d, MSA128D>; class FTRUNC_S_W_DESC : MSA_2RF_DESC_BASE<"ftrunc_s.w", int_mips_ftrunc_s_w, - MSA128W>; + MSA128WOpnd>; class FTRUNC_S_D_DESC : MSA_2RF_DESC_BASE<"ftrunc_s.d", int_mips_ftrunc_s_d, - MSA128D>; + MSA128DOpnd>; class FTRUNC_U_W_DESC : MSA_2RF_DESC_BASE<"ftrunc_u.w", int_mips_ftrunc_u_w, - MSA128W>; + MSA128WOpnd>; class FTRUNC_U_D_DESC : MSA_2RF_DESC_BASE<"ftrunc_u.d", int_mips_ftrunc_u_d, - MSA128D>; + MSA128DOpnd>; class FTINT_S_W_DESC : MSA_2RF_DESC_BASE<"ftint_s.w", int_mips_ftint_s_w, - MSA128W>; + MSA128WOpnd>; class FTINT_S_D_DESC : MSA_2RF_DESC_BASE<"ftint_s.d", int_mips_ftint_s_d, - MSA128D>; + MSA128DOpnd>; class FTINT_U_W_DESC : MSA_2RF_DESC_BASE<"ftint_u.w", int_mips_ftint_u_w, - MSA128W>; + MSA128WOpnd>; class FTINT_U_D_DESC : MSA_2RF_DESC_BASE<"ftint_u.d", int_mips_ftint_u_d, - MSA128D>; + MSA128DOpnd>; class FTQ_H_DESC : MSA_3RF_DESC_BASE<"ftq.h", int_mips_ftq_h, MSA128H, MSA128W, MSA128W>; -- cgit v1.1 From afee613bf390b255eef9cd46fd336fecc7663a1a Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Thu, 26 Sep 2013 00:02:44 +0000 Subject: [mips][msa] Updates encoding of 3R instructions to match the latest revision of the MSA spec (1.06). Internal changes only. Patch by Matheus Almeida git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191414 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 08b1afb..529be49 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -961,15 +961,15 @@ class SUBS_U_H_ENC : MSA_3R_FMT<0b001, 0b01, 0b010001>; class SUBS_U_W_ENC : MSA_3R_FMT<0b001, 0b10, 0b010001>; class SUBS_U_D_ENC : MSA_3R_FMT<0b001, 0b11, 0b010001>; -class SUBSUS_U_B_ENC : MSA_3R_FMT<0b011, 0b00, 0b010001>; -class SUBSUS_U_H_ENC : MSA_3R_FMT<0b011, 0b01, 0b010001>; -class SUBSUS_U_W_ENC : MSA_3R_FMT<0b011, 0b10, 0b010001>; -class SUBSUS_U_D_ENC : MSA_3R_FMT<0b011, 0b11, 0b010001>; - -class SUBSUU_S_B_ENC : MSA_3R_FMT<0b010, 0b00, 0b010001>; -class SUBSUU_S_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b010001>; -class SUBSUU_S_W_ENC : MSA_3R_FMT<0b010, 0b10, 0b010001>; -class SUBSUU_S_D_ENC : MSA_3R_FMT<0b010, 0b11, 0b010001>; +class SUBSUS_U_B_ENC : MSA_3R_FMT<0b010, 0b00, 0b010001>; +class SUBSUS_U_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b010001>; +class SUBSUS_U_W_ENC : MSA_3R_FMT<0b010, 0b10, 0b010001>; +class SUBSUS_U_D_ENC : MSA_3R_FMT<0b010, 0b11, 0b010001>; + +class SUBSUU_S_B_ENC : MSA_3R_FMT<0b011, 0b00, 0b010001>; +class SUBSUU_S_H_ENC : MSA_3R_FMT<0b011, 0b01, 0b010001>; +class SUBSUU_S_W_ENC : MSA_3R_FMT<0b011, 0b10, 0b010001>; +class SUBSUU_S_D_ENC : MSA_3R_FMT<0b011, 0b11, 0b010001>; class SUBV_B_ENC : MSA_3R_FMT<0b001, 0b00, 0b001110>; class SUBV_H_ENC : MSA_3R_FMT<0b001, 0b01, 0b001110>; -- cgit v1.1 From 9fa81ab83898314d1a6608e8303dc57253292796 Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Thu, 26 Sep 2013 00:09:46 +0000 Subject: [mips][msa] Direct Object Emission for 3R instructions. This is the first set of instructions with a ".b" modifier thus we need to add the required code to disassemble a MSA128B register class. Patch by Matheus Almeida git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191415 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Disassembler/MipsDisassembler.cpp | 17 + lib/Target/Mips/MipsMSAInstrFormats.td | 7 + lib/Target/Mips/MipsMSAInstrInfo.td | 708 ++++++++++++---------- 3 files changed, 402 insertions(+), 330 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index 9fabc3a..c8d0d88 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -170,6 +170,11 @@ static DecodeStatus DecodeLO32DSPRegisterClass(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeMSA128BRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + static DecodeStatus DecodeMSA128HRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, @@ -635,6 +640,18 @@ static DecodeStatus DecodeLO32DSPRegisterClass(MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeMSA128BRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + unsigned Reg = getReg(Decoder, Mips::MSA128BRegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Reg)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeMSA128HRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, diff --git a/lib/Target/Mips/MipsMSAInstrFormats.td b/lib/Target/Mips/MipsMSAInstrFormats.td index 9b63ef0..ddd3110 100644 --- a/lib/Target/Mips/MipsMSAInstrFormats.td +++ b/lib/Target/Mips/MipsMSAInstrFormats.td @@ -63,8 +63,15 @@ class MSA_2RF_FMT major, bits<1> df, bits<6> minor>: MSAInst { } class MSA_3R_FMT major, bits<2> df, bits<6> minor>: MSAInst { + bits<5> wt; + bits<5> ws; + bits<5> wd; + let Inst{25-23} = major; let Inst{22-21} = df; + let Inst{20-16} = wt; + let Inst{15-11} = ws; + let Inst{10-6} = wd; let Inst{5-0} = minor; } diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 529be49..fb02d3f 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -1138,38 +1138,38 @@ class MSA_2RF_DESC_BASE { - dag OutOperandList = (outs RCWD:$wd); - dag InOperandList = (ins RCWS:$ws, RCWT:$wt); + dag OutOperandList = (outs ROWD:$wd); + dag InOperandList = (ins ROWS:$ws, ROWT:$wt); string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $wt"); - list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, RCWT:$wt))]; + list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, ROWT:$wt))]; InstrItinClass Itinerary = itin; } -class MSA_3R_VSHF_DESC_BASE { - dag OutOperandList = (outs RCWD:$wd); - dag InOperandList = (ins RCWD:$wd_in, RCWS:$ws, RCWT:$wt); + dag OutOperandList = (outs ROWD:$wd); + dag InOperandList = (ins ROWD:$wd_in, ROWS:$ws, ROWT:$wt); string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $wt"); - list Pattern = [(set RCWD:$wd, (MipsVSHF RCWD:$wd_in, RCWS:$ws, - RCWT:$wt))]; + list Pattern = [(set ROWD:$wd, (MipsVSHF ROWD:$wd_in, ROWS:$ws, + ROWT:$wt))]; string Constraints = "$wd = $wd_in"; InstrItinClass Itinerary = itin; } class MSA_3R_4R_DESC_BASE { - dag OutOperandList = (outs RCWD:$wd); - dag InOperandList = (ins RCWD:$wd_in, RCWS:$ws, RCWT:$wt); + dag OutOperandList = (outs ROWD:$wd); + dag InOperandList = (ins ROWD:$wd_in, ROWS:$ws, ROWT:$wt); string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $wt"); - list Pattern = [(set RCWD:$wd, - (OpNode RCWD:$wd_in, RCWS:$ws, RCWT:$wt))]; + list Pattern = [(set ROWD:$wd, + (OpNode ROWD:$wd_in, ROWS:$ws, ROWT:$wt))]; InstrItinClass Itinerary = itin; string Constraints = "$wd = $wd_in"; } @@ -1177,14 +1177,26 @@ class MSA_3R_4R_DESC_BASE : - MSA_3R_DESC_BASE; + InstrItinClass itin = NoItinerary> { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins RCWS:$ws, RCWT:$wt); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $wt"); + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, RCWT:$wt))]; + InstrItinClass Itinerary = itin; +} class MSA_3RF_4RF_DESC_BASE : - MSA_3R_4R_DESC_BASE; + InstrItinClass itin = NoItinerary> { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins RCWD:$wd_in, RCWS:$ws, RCWT:$wt); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $wt"); + list Pattern = [(set RCWD:$wd, + (OpNode RCWD:$wd_in, RCWS:$ws, RCWT:$wt))]; + InstrItinClass Itinerary = itin; + string Constraints = "$wd = $wd_in"; +} class MSA_CBRANCH_DESC_BASE { dag OutOperandList = (outs); @@ -1241,46 +1253,46 @@ class MSA_VEC_PSEUDO_BASE; -class ADD_A_B_DESC : MSA_3R_DESC_BASE<"add_a.b", int_mips_add_a_b, MSA128B>, +class ADD_A_B_DESC : MSA_3R_DESC_BASE<"add_a.b", int_mips_add_a_b, MSA128BOpnd>, IsCommutable; -class ADD_A_H_DESC : MSA_3R_DESC_BASE<"add_a.h", int_mips_add_a_h, MSA128H>, +class ADD_A_H_DESC : MSA_3R_DESC_BASE<"add_a.h", int_mips_add_a_h, MSA128HOpnd>, IsCommutable; -class ADD_A_W_DESC : MSA_3R_DESC_BASE<"add_a.w", int_mips_add_a_w, MSA128W>, +class ADD_A_W_DESC : MSA_3R_DESC_BASE<"add_a.w", int_mips_add_a_w, MSA128WOpnd>, IsCommutable; -class ADD_A_D_DESC : MSA_3R_DESC_BASE<"add_a.d", int_mips_add_a_d, MSA128D>, +class ADD_A_D_DESC : MSA_3R_DESC_BASE<"add_a.d", int_mips_add_a_d, MSA128DOpnd>, IsCommutable; -class ADDS_A_B_DESC : MSA_3R_DESC_BASE<"adds_a.b", int_mips_adds_a_b, MSA128B>, - IsCommutable; -class ADDS_A_H_DESC : MSA_3R_DESC_BASE<"adds_a.h", int_mips_adds_a_h, MSA128H>, - IsCommutable; -class ADDS_A_W_DESC : MSA_3R_DESC_BASE<"adds_a.w", int_mips_adds_a_w, MSA128W>, - IsCommutable; -class ADDS_A_D_DESC : MSA_3R_DESC_BASE<"adds_a.d", int_mips_adds_a_d, MSA128D>, - IsCommutable; - -class ADDS_S_B_DESC : MSA_3R_DESC_BASE<"adds_s.b", int_mips_adds_s_b, MSA128B>, - IsCommutable; -class ADDS_S_H_DESC : MSA_3R_DESC_BASE<"adds_s.h", int_mips_adds_s_h, MSA128H>, - IsCommutable; -class ADDS_S_W_DESC : MSA_3R_DESC_BASE<"adds_s.w", int_mips_adds_s_w, MSA128W>, - IsCommutable; -class ADDS_S_D_DESC : MSA_3R_DESC_BASE<"adds_s.d", int_mips_adds_s_d, MSA128D>, - IsCommutable; - -class ADDS_U_B_DESC : MSA_3R_DESC_BASE<"adds_u.b", int_mips_adds_u_b, MSA128B>, - IsCommutable; -class ADDS_U_H_DESC : MSA_3R_DESC_BASE<"adds_u.h", int_mips_adds_u_h, MSA128H>, - IsCommutable; -class ADDS_U_W_DESC : MSA_3R_DESC_BASE<"adds_u.w", int_mips_adds_u_w, MSA128W>, - IsCommutable; -class ADDS_U_D_DESC : MSA_3R_DESC_BASE<"adds_u.d", int_mips_adds_u_d, MSA128D>, - IsCommutable; - -class ADDV_B_DESC : MSA_3R_DESC_BASE<"addv.b", add, MSA128B>, IsCommutable; -class ADDV_H_DESC : MSA_3R_DESC_BASE<"addv.h", add, MSA128H>, IsCommutable; -class ADDV_W_DESC : MSA_3R_DESC_BASE<"addv.w", add, MSA128W>, IsCommutable; -class ADDV_D_DESC : MSA_3R_DESC_BASE<"addv.d", add, MSA128D>, IsCommutable; +class ADDS_A_B_DESC : MSA_3R_DESC_BASE<"adds_a.b", int_mips_adds_a_b, + MSA128BOpnd>, IsCommutable; +class ADDS_A_H_DESC : MSA_3R_DESC_BASE<"adds_a.h", int_mips_adds_a_h, + MSA128HOpnd>, IsCommutable; +class ADDS_A_W_DESC : MSA_3R_DESC_BASE<"adds_a.w", int_mips_adds_a_w, + MSA128WOpnd>, IsCommutable; +class ADDS_A_D_DESC : MSA_3R_DESC_BASE<"adds_a.d", int_mips_adds_a_d, + MSA128DOpnd>, IsCommutable; + +class ADDS_S_B_DESC : MSA_3R_DESC_BASE<"adds_s.b", int_mips_adds_s_b, + MSA128BOpnd>, IsCommutable; +class ADDS_S_H_DESC : MSA_3R_DESC_BASE<"adds_s.h", int_mips_adds_s_h, + MSA128HOpnd>, IsCommutable; +class ADDS_S_W_DESC : MSA_3R_DESC_BASE<"adds_s.w", int_mips_adds_s_w, + MSA128WOpnd>, IsCommutable; +class ADDS_S_D_DESC : MSA_3R_DESC_BASE<"adds_s.d", int_mips_adds_s_d, + MSA128DOpnd>, IsCommutable; + +class ADDS_U_B_DESC : MSA_3R_DESC_BASE<"adds_u.b", int_mips_adds_u_b, + MSA128BOpnd>, IsCommutable; +class ADDS_U_H_DESC : MSA_3R_DESC_BASE<"adds_u.h", int_mips_adds_u_h, + MSA128HOpnd>, IsCommutable; +class ADDS_U_W_DESC : MSA_3R_DESC_BASE<"adds_u.w", int_mips_adds_u_w, + MSA128WOpnd>, IsCommutable; +class ADDS_U_D_DESC : MSA_3R_DESC_BASE<"adds_u.d", int_mips_adds_u_d, + MSA128DOpnd>, IsCommutable; + +class ADDV_B_DESC : MSA_3R_DESC_BASE<"addv.b", add, MSA128BOpnd>, IsCommutable; +class ADDV_H_DESC : MSA_3R_DESC_BASE<"addv.h", add, MSA128HOpnd>, IsCommutable; +class ADDV_W_DESC : MSA_3R_DESC_BASE<"addv.w", add, MSA128WOpnd>, IsCommutable; +class ADDV_D_DESC : MSA_3R_DESC_BASE<"addv.d", add, MSA128DOpnd>, IsCommutable; class ADDVI_B_DESC : MSA_I5_DESC_BASE<"addvi.b", add, vsplati8_uimm5, MSA128B>; class ADDVI_H_DESC : MSA_I5_DESC_BASE<"addvi.h", add, vsplati16_uimm5, MSA128H>; @@ -1294,66 +1306,74 @@ class AND_V_D_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class ANDI_B_DESC : MSA_I8_DESC_BASE<"andi.b", and, vsplati8_uimm8, MSA128B>; -class ASUB_S_B_DESC : MSA_3R_DESC_BASE<"asub_s.b", int_mips_asub_s_b, MSA128B>; -class ASUB_S_H_DESC : MSA_3R_DESC_BASE<"asub_s.h", int_mips_asub_s_h, MSA128H>; -class ASUB_S_W_DESC : MSA_3R_DESC_BASE<"asub_s.w", int_mips_asub_s_w, MSA128W>; -class ASUB_S_D_DESC : MSA_3R_DESC_BASE<"asub_s.d", int_mips_asub_s_d, MSA128D>; - -class ASUB_U_B_DESC : MSA_3R_DESC_BASE<"asub_u.b", int_mips_asub_u_b, MSA128B>; -class ASUB_U_H_DESC : MSA_3R_DESC_BASE<"asub_u.h", int_mips_asub_u_h, MSA128H>; -class ASUB_U_W_DESC : MSA_3R_DESC_BASE<"asub_u.w", int_mips_asub_u_w, MSA128W>; -class ASUB_U_D_DESC : MSA_3R_DESC_BASE<"asub_u.d", int_mips_asub_u_d, MSA128D>; - -class AVE_S_B_DESC : MSA_3R_DESC_BASE<"ave_s.b", int_mips_ave_s_b, MSA128B>, +class ASUB_S_B_DESC : MSA_3R_DESC_BASE<"asub_s.b", int_mips_asub_s_b, + MSA128BOpnd>; +class ASUB_S_H_DESC : MSA_3R_DESC_BASE<"asub_s.h", int_mips_asub_s_h, + MSA128HOpnd>; +class ASUB_S_W_DESC : MSA_3R_DESC_BASE<"asub_s.w", int_mips_asub_s_w, + MSA128WOpnd>; +class ASUB_S_D_DESC : MSA_3R_DESC_BASE<"asub_s.d", int_mips_asub_s_d, + MSA128DOpnd>; + +class ASUB_U_B_DESC : MSA_3R_DESC_BASE<"asub_u.b", int_mips_asub_u_b, + MSA128BOpnd>; +class ASUB_U_H_DESC : MSA_3R_DESC_BASE<"asub_u.h", int_mips_asub_u_h, + MSA128HOpnd>; +class ASUB_U_W_DESC : MSA_3R_DESC_BASE<"asub_u.w", int_mips_asub_u_w, + MSA128WOpnd>; +class ASUB_U_D_DESC : MSA_3R_DESC_BASE<"asub_u.d", int_mips_asub_u_d, + MSA128DOpnd>; + +class AVE_S_B_DESC : MSA_3R_DESC_BASE<"ave_s.b", int_mips_ave_s_b, MSA128BOpnd>, IsCommutable; -class AVE_S_H_DESC : MSA_3R_DESC_BASE<"ave_s.h", int_mips_ave_s_h, MSA128H>, +class AVE_S_H_DESC : MSA_3R_DESC_BASE<"ave_s.h", int_mips_ave_s_h, MSA128HOpnd>, IsCommutable; -class AVE_S_W_DESC : MSA_3R_DESC_BASE<"ave_s.w", int_mips_ave_s_w, MSA128W>, +class AVE_S_W_DESC : MSA_3R_DESC_BASE<"ave_s.w", int_mips_ave_s_w, MSA128WOpnd>, IsCommutable; -class AVE_S_D_DESC : MSA_3R_DESC_BASE<"ave_s.d", int_mips_ave_s_d, MSA128D>, +class AVE_S_D_DESC : MSA_3R_DESC_BASE<"ave_s.d", int_mips_ave_s_d, MSA128DOpnd>, IsCommutable; -class AVE_U_B_DESC : MSA_3R_DESC_BASE<"ave_u.b", int_mips_ave_u_b, MSA128B>, +class AVE_U_B_DESC : MSA_3R_DESC_BASE<"ave_u.b", int_mips_ave_u_b, MSA128BOpnd>, IsCommutable; -class AVE_U_H_DESC : MSA_3R_DESC_BASE<"ave_u.h", int_mips_ave_u_h, MSA128H>, +class AVE_U_H_DESC : MSA_3R_DESC_BASE<"ave_u.h", int_mips_ave_u_h, MSA128HOpnd>, IsCommutable; -class AVE_U_W_DESC : MSA_3R_DESC_BASE<"ave_u.w", int_mips_ave_u_w, MSA128W>, +class AVE_U_W_DESC : MSA_3R_DESC_BASE<"ave_u.w", int_mips_ave_u_w, MSA128WOpnd>, IsCommutable; -class AVE_U_D_DESC : MSA_3R_DESC_BASE<"ave_u.d", int_mips_ave_u_d, MSA128D>, +class AVE_U_D_DESC : MSA_3R_DESC_BASE<"ave_u.d", int_mips_ave_u_d, MSA128DOpnd>, IsCommutable; -class AVER_S_B_DESC : MSA_3R_DESC_BASE<"aver_s.b", int_mips_aver_s_b, MSA128B>, - IsCommutable; -class AVER_S_H_DESC : MSA_3R_DESC_BASE<"aver_s.h", int_mips_aver_s_h, MSA128H>, - IsCommutable; -class AVER_S_W_DESC : MSA_3R_DESC_BASE<"aver_s.w", int_mips_aver_s_w, MSA128W>, - IsCommutable; -class AVER_S_D_DESC : MSA_3R_DESC_BASE<"aver_s.d", int_mips_aver_s_d, MSA128D>, - IsCommutable; - -class AVER_U_B_DESC : MSA_3R_DESC_BASE<"aver_u.b", int_mips_aver_u_b, MSA128B>, - IsCommutable; -class AVER_U_H_DESC : MSA_3R_DESC_BASE<"aver_u.h", int_mips_aver_u_h, MSA128H>, - IsCommutable; -class AVER_U_W_DESC : MSA_3R_DESC_BASE<"aver_u.w", int_mips_aver_u_w, MSA128W>, - IsCommutable; -class AVER_U_D_DESC : MSA_3R_DESC_BASE<"aver_u.d", int_mips_aver_u_d, MSA128D>, - IsCommutable; - -class BCLR_B_DESC : MSA_3R_DESC_BASE<"bclr.b", int_mips_bclr_b, MSA128B>; -class BCLR_H_DESC : MSA_3R_DESC_BASE<"bclr.h", int_mips_bclr_h, MSA128H>; -class BCLR_W_DESC : MSA_3R_DESC_BASE<"bclr.w", int_mips_bclr_w, MSA128W>; -class BCLR_D_DESC : MSA_3R_DESC_BASE<"bclr.d", int_mips_bclr_d, MSA128D>; +class AVER_S_B_DESC : MSA_3R_DESC_BASE<"aver_s.b", int_mips_aver_s_b, + MSA128BOpnd>, IsCommutable; +class AVER_S_H_DESC : MSA_3R_DESC_BASE<"aver_s.h", int_mips_aver_s_h, + MSA128HOpnd>, IsCommutable; +class AVER_S_W_DESC : MSA_3R_DESC_BASE<"aver_s.w", int_mips_aver_s_w, + MSA128WOpnd>, IsCommutable; +class AVER_S_D_DESC : MSA_3R_DESC_BASE<"aver_s.d", int_mips_aver_s_d, + MSA128DOpnd>, IsCommutable; + +class AVER_U_B_DESC : MSA_3R_DESC_BASE<"aver_u.b", int_mips_aver_u_b, + MSA128BOpnd>, IsCommutable; +class AVER_U_H_DESC : MSA_3R_DESC_BASE<"aver_u.h", int_mips_aver_u_h, + MSA128HOpnd>, IsCommutable; +class AVER_U_W_DESC : MSA_3R_DESC_BASE<"aver_u.w", int_mips_aver_u_w, + MSA128WOpnd>, IsCommutable; +class AVER_U_D_DESC : MSA_3R_DESC_BASE<"aver_u.d", int_mips_aver_u_d, + MSA128DOpnd>, IsCommutable; + +class BCLR_B_DESC : MSA_3R_DESC_BASE<"bclr.b", int_mips_bclr_b, MSA128BOpnd>; +class BCLR_H_DESC : MSA_3R_DESC_BASE<"bclr.h", int_mips_bclr_h, MSA128HOpnd>; +class BCLR_W_DESC : MSA_3R_DESC_BASE<"bclr.w", int_mips_bclr_w, MSA128WOpnd>; +class BCLR_D_DESC : MSA_3R_DESC_BASE<"bclr.d", int_mips_bclr_d, MSA128DOpnd>; class BCLRI_B_DESC : MSA_BIT_B_DESC_BASE<"bclri.b", int_mips_bclri_b, MSA128B>; class BCLRI_H_DESC : MSA_BIT_H_DESC_BASE<"bclri.h", int_mips_bclri_h, MSA128H>; class BCLRI_W_DESC : MSA_BIT_W_DESC_BASE<"bclri.w", int_mips_bclri_w, MSA128W>; class BCLRI_D_DESC : MSA_BIT_D_DESC_BASE<"bclri.d", int_mips_bclri_d, MSA128D>; -class BINSL_B_DESC : MSA_3R_DESC_BASE<"binsl.b", int_mips_binsl_b, MSA128B>; -class BINSL_H_DESC : MSA_3R_DESC_BASE<"binsl.h", int_mips_binsl_h, MSA128H>; -class BINSL_W_DESC : MSA_3R_DESC_BASE<"binsl.w", int_mips_binsl_w, MSA128W>; -class BINSL_D_DESC : MSA_3R_DESC_BASE<"binsl.d", int_mips_binsl_d, MSA128D>; +class BINSL_B_DESC : MSA_3R_DESC_BASE<"binsl.b", int_mips_binsl_b, MSA128BOpnd>; +class BINSL_H_DESC : MSA_3R_DESC_BASE<"binsl.h", int_mips_binsl_h, MSA128HOpnd>; +class BINSL_W_DESC : MSA_3R_DESC_BASE<"binsl.w", int_mips_binsl_w, MSA128WOpnd>; +class BINSL_D_DESC : MSA_3R_DESC_BASE<"binsl.d", int_mips_binsl_d, MSA128DOpnd>; class BINSLI_B_DESC : MSA_BIT_B_DESC_BASE<"binsli.b", int_mips_binsli_b, MSA128B>; @@ -1364,10 +1384,10 @@ class BINSLI_W_DESC : MSA_BIT_W_DESC_BASE<"binsli.w", int_mips_binsli_w, class BINSLI_D_DESC : MSA_BIT_D_DESC_BASE<"binsli.d", int_mips_binsli_d, MSA128D>; -class BINSR_B_DESC : MSA_3R_DESC_BASE<"binsr.b", int_mips_binsr_b, MSA128B>; -class BINSR_H_DESC : MSA_3R_DESC_BASE<"binsr.h", int_mips_binsr_h, MSA128H>; -class BINSR_W_DESC : MSA_3R_DESC_BASE<"binsr.w", int_mips_binsr_w, MSA128W>; -class BINSR_D_DESC : MSA_3R_DESC_BASE<"binsr.d", int_mips_binsr_d, MSA128D>; +class BINSR_B_DESC : MSA_3R_DESC_BASE<"binsr.b", int_mips_binsr_b, MSA128BOpnd>; +class BINSR_H_DESC : MSA_3R_DESC_BASE<"binsr.h", int_mips_binsr_h, MSA128HOpnd>; +class BINSR_W_DESC : MSA_3R_DESC_BASE<"binsr.w", int_mips_binsr_w, MSA128WOpnd>; +class BINSR_D_DESC : MSA_3R_DESC_BASE<"binsr.d", int_mips_binsr_d, MSA128DOpnd>; class BINSRI_B_DESC : MSA_BIT_B_DESC_BASE<"binsri.b", int_mips_binsri_b, MSA128B>; @@ -1386,10 +1406,10 @@ class BMZ_V_DESC : MSA_VEC_DESC_BASE<"bmz.v", int_mips_bmz_v, MSA128B>; class BMZI_B_DESC : MSA_I8_X_DESC_BASE<"bmzi.b", int_mips_bmzi_b, MSA128B>; -class BNEG_B_DESC : MSA_3R_DESC_BASE<"bneg.b", int_mips_bneg_b, MSA128B>; -class BNEG_H_DESC : MSA_3R_DESC_BASE<"bneg.h", int_mips_bneg_h, MSA128H>; -class BNEG_W_DESC : MSA_3R_DESC_BASE<"bneg.w", int_mips_bneg_w, MSA128W>; -class BNEG_D_DESC : MSA_3R_DESC_BASE<"bneg.d", int_mips_bneg_d, MSA128D>; +class BNEG_B_DESC : MSA_3R_DESC_BASE<"bneg.b", int_mips_bneg_b, MSA128BOpnd>; +class BNEG_H_DESC : MSA_3R_DESC_BASE<"bneg.h", int_mips_bneg_h, MSA128HOpnd>; +class BNEG_W_DESC : MSA_3R_DESC_BASE<"bneg.w", int_mips_bneg_w, MSA128WOpnd>; +class BNEG_D_DESC : MSA_3R_DESC_BASE<"bneg.d", int_mips_bneg_d, MSA128DOpnd>; class BNEGI_B_DESC : MSA_BIT_B_DESC_BASE<"bnegi.b", int_mips_bnegi_b, MSA128B>; class BNEGI_H_DESC : MSA_BIT_H_DESC_BASE<"bnegi.h", int_mips_bnegi_h, MSA128H>; @@ -1424,10 +1444,10 @@ class BSELI_B_DESC { string Constraints = "$wd = $wd_in"; } -class BSET_B_DESC : MSA_3R_DESC_BASE<"bset.b", int_mips_bset_b, MSA128B>; -class BSET_H_DESC : MSA_3R_DESC_BASE<"bset.h", int_mips_bset_h, MSA128H>; -class BSET_W_DESC : MSA_3R_DESC_BASE<"bset.w", int_mips_bset_w, MSA128W>; -class BSET_D_DESC : MSA_3R_DESC_BASE<"bset.d", int_mips_bset_d, MSA128D>; +class BSET_B_DESC : MSA_3R_DESC_BASE<"bset.b", int_mips_bset_b, MSA128BOpnd>; +class BSET_H_DESC : MSA_3R_DESC_BASE<"bset.h", int_mips_bset_h, MSA128HOpnd>; +class BSET_W_DESC : MSA_3R_DESC_BASE<"bset.w", int_mips_bset_w, MSA128WOpnd>; +class BSET_D_DESC : MSA_3R_DESC_BASE<"bset.d", int_mips_bset_d, MSA128DOpnd>; class BSETI_B_DESC : MSA_BIT_B_DESC_BASE<"bseti.b", int_mips_bseti_b, MSA128B>; class BSETI_H_DESC : MSA_BIT_H_DESC_BASE<"bseti.h", int_mips_bseti_h, MSA128H>; @@ -1441,13 +1461,13 @@ class BZ_D_DESC : MSA_CBRANCH_DESC_BASE<"bz.d", MSA128D>; class BZ_V_DESC : MSA_CBRANCH_DESC_BASE<"bz.v", MSA128B>; -class CEQ_B_DESC : MSA_3R_DESC_BASE<"ceq.b", vseteq_v16i8, MSA128B>, +class CEQ_B_DESC : MSA_3R_DESC_BASE<"ceq.b", vseteq_v16i8, MSA128BOpnd>, IsCommutable; -class CEQ_H_DESC : MSA_3R_DESC_BASE<"ceq.h", vseteq_v8i16, MSA128H>, +class CEQ_H_DESC : MSA_3R_DESC_BASE<"ceq.h", vseteq_v8i16, MSA128HOpnd>, IsCommutable; -class CEQ_W_DESC : MSA_3R_DESC_BASE<"ceq.w", vseteq_v4i32, MSA128W>, +class CEQ_W_DESC : MSA_3R_DESC_BASE<"ceq.w", vseteq_v4i32, MSA128WOpnd>, IsCommutable; -class CEQ_D_DESC : MSA_3R_DESC_BASE<"ceq.d", vseteq_v2i64, MSA128D>, +class CEQ_D_DESC : MSA_3R_DESC_BASE<"ceq.d", vseteq_v2i64, MSA128DOpnd>, IsCommutable; class CEQI_B_DESC : MSA_I5_DESC_BASE<"ceqi.b", vseteq_v16i8, vsplati8_simm5, @@ -1467,15 +1487,15 @@ class CFCMSA_DESC { bit hasSideEffects = 1; } -class CLE_S_B_DESC : MSA_3R_DESC_BASE<"cle_s.b", vsetle_v16i8, MSA128B>; -class CLE_S_H_DESC : MSA_3R_DESC_BASE<"cle_s.h", vsetle_v8i16, MSA128H>; -class CLE_S_W_DESC : MSA_3R_DESC_BASE<"cle_s.w", vsetle_v4i32, MSA128W>; -class CLE_S_D_DESC : MSA_3R_DESC_BASE<"cle_s.d", vsetle_v2i64, MSA128D>; +class CLE_S_B_DESC : MSA_3R_DESC_BASE<"cle_s.b", vsetle_v16i8, MSA128BOpnd>; +class CLE_S_H_DESC : MSA_3R_DESC_BASE<"cle_s.h", vsetle_v8i16, MSA128HOpnd>; +class CLE_S_W_DESC : MSA_3R_DESC_BASE<"cle_s.w", vsetle_v4i32, MSA128WOpnd>; +class CLE_S_D_DESC : MSA_3R_DESC_BASE<"cle_s.d", vsetle_v2i64, MSA128DOpnd>; -class CLE_U_B_DESC : MSA_3R_DESC_BASE<"cle_u.b", vsetule_v16i8, MSA128B>; -class CLE_U_H_DESC : MSA_3R_DESC_BASE<"cle_u.h", vsetule_v8i16, MSA128H>; -class CLE_U_W_DESC : MSA_3R_DESC_BASE<"cle_u.w", vsetule_v4i32, MSA128W>; -class CLE_U_D_DESC : MSA_3R_DESC_BASE<"cle_u.d", vsetule_v2i64, MSA128D>; +class CLE_U_B_DESC : MSA_3R_DESC_BASE<"cle_u.b", vsetule_v16i8, MSA128BOpnd>; +class CLE_U_H_DESC : MSA_3R_DESC_BASE<"cle_u.h", vsetule_v8i16, MSA128HOpnd>; +class CLE_U_W_DESC : MSA_3R_DESC_BASE<"cle_u.w", vsetule_v4i32, MSA128WOpnd>; +class CLE_U_D_DESC : MSA_3R_DESC_BASE<"cle_u.d", vsetule_v2i64, MSA128DOpnd>; class CLEI_S_B_DESC : MSA_I5_DESC_BASE<"clei_s.b", vsetle_v16i8, vsplati8_simm5, MSA128B>; @@ -1495,15 +1515,15 @@ class CLEI_U_W_DESC : MSA_I5_DESC_BASE<"clei_u.w", vsetule_v4i32, class CLEI_U_D_DESC : MSA_I5_DESC_BASE<"clei_u.d", vsetule_v2i64, vsplati64_uimm5, MSA128D>; -class CLT_S_B_DESC : MSA_3R_DESC_BASE<"clt_s.b", vsetlt_v16i8, MSA128B>; -class CLT_S_H_DESC : MSA_3R_DESC_BASE<"clt_s.h", vsetlt_v8i16, MSA128H>; -class CLT_S_W_DESC : MSA_3R_DESC_BASE<"clt_s.w", vsetlt_v4i32, MSA128W>; -class CLT_S_D_DESC : MSA_3R_DESC_BASE<"clt_s.d", vsetlt_v2i64, MSA128D>; +class CLT_S_B_DESC : MSA_3R_DESC_BASE<"clt_s.b", vsetlt_v16i8, MSA128BOpnd>; +class CLT_S_H_DESC : MSA_3R_DESC_BASE<"clt_s.h", vsetlt_v8i16, MSA128HOpnd>; +class CLT_S_W_DESC : MSA_3R_DESC_BASE<"clt_s.w", vsetlt_v4i32, MSA128WOpnd>; +class CLT_S_D_DESC : MSA_3R_DESC_BASE<"clt_s.d", vsetlt_v2i64, MSA128DOpnd>; -class CLT_U_B_DESC : MSA_3R_DESC_BASE<"clt_u.b", vsetult_v16i8, MSA128B>; -class CLT_U_H_DESC : MSA_3R_DESC_BASE<"clt_u.h", vsetult_v8i16, MSA128H>; -class CLT_U_W_DESC : MSA_3R_DESC_BASE<"clt_u.w", vsetult_v4i32, MSA128W>; -class CLT_U_D_DESC : MSA_3R_DESC_BASE<"clt_u.d", vsetult_v2i64, MSA128D>; +class CLT_U_B_DESC : MSA_3R_DESC_BASE<"clt_u.b", vsetult_v16i8, MSA128BOpnd>; +class CLT_U_H_DESC : MSA_3R_DESC_BASE<"clt_u.h", vsetult_v8i16, MSA128HOpnd>; +class CLT_U_W_DESC : MSA_3R_DESC_BASE<"clt_u.w", vsetult_v4i32, MSA128WOpnd>; +class CLT_U_D_DESC : MSA_3R_DESC_BASE<"clt_u.d", vsetult_v2i64, MSA128DOpnd>; class CLTI_S_B_DESC : MSA_I5_DESC_BASE<"clti_s.b", vsetlt_v16i8, vsplati8_simm5, MSA128B>; @@ -1545,63 +1565,75 @@ class CTCMSA_DESC { bit hasSideEffects = 1; } -class DIV_S_B_DESC : MSA_3R_DESC_BASE<"div_s.b", sdiv, MSA128B>; -class DIV_S_H_DESC : MSA_3R_DESC_BASE<"div_s.h", sdiv, MSA128H>; -class DIV_S_W_DESC : MSA_3R_DESC_BASE<"div_s.w", sdiv, MSA128W>; -class DIV_S_D_DESC : MSA_3R_DESC_BASE<"div_s.d", sdiv, MSA128D>; - -class DIV_U_B_DESC : MSA_3R_DESC_BASE<"div_u.b", udiv, MSA128B>; -class DIV_U_H_DESC : MSA_3R_DESC_BASE<"div_u.h", udiv, MSA128H>; -class DIV_U_W_DESC : MSA_3R_DESC_BASE<"div_u.w", udiv, MSA128W>; -class DIV_U_D_DESC : MSA_3R_DESC_BASE<"div_u.d", udiv, MSA128D>; - -class DOTP_S_H_DESC : MSA_3R_DESC_BASE<"dotp_s.h", int_mips_dotp_s_h, MSA128H, - MSA128B, MSA128B>, IsCommutable; -class DOTP_S_W_DESC : MSA_3R_DESC_BASE<"dotp_s.w", int_mips_dotp_s_w, MSA128W, - MSA128H, MSA128H>, IsCommutable; -class DOTP_S_D_DESC : MSA_3R_DESC_BASE<"dotp_s.d", int_mips_dotp_s_d, MSA128D, - MSA128W, MSA128W>, IsCommutable; - -class DOTP_U_H_DESC : MSA_3R_DESC_BASE<"dotp_u.h", int_mips_dotp_u_h, MSA128H, - MSA128B, MSA128B>, IsCommutable; -class DOTP_U_W_DESC : MSA_3R_DESC_BASE<"dotp_u.w", int_mips_dotp_u_w, MSA128W, - MSA128H, MSA128H>, IsCommutable; -class DOTP_U_D_DESC : MSA_3R_DESC_BASE<"dotp_u.d", int_mips_dotp_u_d, MSA128D, - MSA128W, MSA128W>, IsCommutable; +class DIV_S_B_DESC : MSA_3R_DESC_BASE<"div_s.b", sdiv, MSA128BOpnd>; +class DIV_S_H_DESC : MSA_3R_DESC_BASE<"div_s.h", sdiv, MSA128HOpnd>; +class DIV_S_W_DESC : MSA_3R_DESC_BASE<"div_s.w", sdiv, MSA128WOpnd>; +class DIV_S_D_DESC : MSA_3R_DESC_BASE<"div_s.d", sdiv, MSA128DOpnd>; + +class DIV_U_B_DESC : MSA_3R_DESC_BASE<"div_u.b", udiv, MSA128BOpnd>; +class DIV_U_H_DESC : MSA_3R_DESC_BASE<"div_u.h", udiv, MSA128HOpnd>; +class DIV_U_W_DESC : MSA_3R_DESC_BASE<"div_u.w", udiv, MSA128WOpnd>; +class DIV_U_D_DESC : MSA_3R_DESC_BASE<"div_u.d", udiv, MSA128DOpnd>; + +class DOTP_S_H_DESC : MSA_3R_DESC_BASE<"dotp_s.h", int_mips_dotp_s_h, + MSA128HOpnd, MSA128BOpnd, MSA128BOpnd>, + IsCommutable; +class DOTP_S_W_DESC : MSA_3R_DESC_BASE<"dotp_s.w", int_mips_dotp_s_w, + MSA128WOpnd, MSA128HOpnd, MSA128HOpnd>, + IsCommutable; +class DOTP_S_D_DESC : MSA_3R_DESC_BASE<"dotp_s.d", int_mips_dotp_s_d, + MSA128DOpnd, MSA128WOpnd, MSA128WOpnd>, + IsCommutable; + +class DOTP_U_H_DESC : MSA_3R_DESC_BASE<"dotp_u.h", int_mips_dotp_u_h, + MSA128HOpnd, MSA128BOpnd, MSA128BOpnd>, + IsCommutable; +class DOTP_U_W_DESC : MSA_3R_DESC_BASE<"dotp_u.w", int_mips_dotp_u_w, + MSA128WOpnd, MSA128HOpnd, MSA128HOpnd>, + IsCommutable; +class DOTP_U_D_DESC : MSA_3R_DESC_BASE<"dotp_u.d", int_mips_dotp_u_d, + MSA128DOpnd, MSA128WOpnd, MSA128WOpnd>, + IsCommutable; class DPADD_S_H_DESC : MSA_3R_4R_DESC_BASE<"dpadd_s.h", int_mips_dpadd_s_h, - MSA128H, MSA128B, MSA128B>, - IsCommutable; + MSA128HOpnd, MSA128BOpnd, + MSA128BOpnd>, IsCommutable; class DPADD_S_W_DESC : MSA_3R_4R_DESC_BASE<"dpadd_s.w", int_mips_dpadd_s_w, - MSA128W, MSA128H, MSA128H>, - IsCommutable; + MSA128WOpnd, MSA128HOpnd, + MSA128HOpnd>, IsCommutable; class DPADD_S_D_DESC : MSA_3R_4R_DESC_BASE<"dpadd_s.d", int_mips_dpadd_s_d, - MSA128D, MSA128W, MSA128W>, - IsCommutable; + MSA128DOpnd, MSA128WOpnd, + MSA128WOpnd>, IsCommutable; class DPADD_U_H_DESC : MSA_3R_4R_DESC_BASE<"dpadd_u.h", int_mips_dpadd_u_h, - MSA128H, MSA128B, MSA128B>, - IsCommutable; + MSA128HOpnd, MSA128BOpnd, + MSA128BOpnd>, IsCommutable; class DPADD_U_W_DESC : MSA_3R_4R_DESC_BASE<"dpadd_u.w", int_mips_dpadd_u_w, - MSA128W, MSA128H, MSA128H>, - IsCommutable; + MSA128WOpnd, MSA128HOpnd, + MSA128HOpnd>, IsCommutable; class DPADD_U_D_DESC : MSA_3R_4R_DESC_BASE<"dpadd_u.d", int_mips_dpadd_u_d, - MSA128D, MSA128W, MSA128W>, - IsCommutable; + MSA128DOpnd, MSA128WOpnd, + MSA128WOpnd>, IsCommutable; class DPSUB_S_H_DESC : MSA_3R_4R_DESC_BASE<"dpsub_s.h", int_mips_dpsub_s_h, - MSA128H, MSA128B, MSA128B>; + MSA128HOpnd, MSA128BOpnd, + MSA128BOpnd>; class DPSUB_S_W_DESC : MSA_3R_4R_DESC_BASE<"dpsub_s.w", int_mips_dpsub_s_w, - MSA128W, MSA128H, MSA128H>; + MSA128WOpnd, MSA128HOpnd, + MSA128HOpnd>; class DPSUB_S_D_DESC : MSA_3R_4R_DESC_BASE<"dpsub_s.d", int_mips_dpsub_s_d, - MSA128D, MSA128W, MSA128W>; + MSA128DOpnd, MSA128WOpnd, + MSA128WOpnd>; class DPSUB_U_H_DESC : MSA_3R_4R_DESC_BASE<"dpsub_u.h", int_mips_dpsub_u_h, - MSA128H, MSA128B, MSA128B>; + MSA128HOpnd, MSA128BOpnd, + MSA128BOpnd>; class DPSUB_U_W_DESC : MSA_3R_4R_DESC_BASE<"dpsub_u.w", int_mips_dpsub_u_w, - MSA128W, MSA128H, MSA128H>; + MSA128WOpnd, MSA128HOpnd, + MSA128HOpnd>; class DPSUB_U_D_DESC : MSA_3R_4R_DESC_BASE<"dpsub_u.d", int_mips_dpsub_u_d, - MSA128D, MSA128W, MSA128W>; + MSA128DOpnd, MSA128WOpnd, + MSA128WOpnd>; class FADD_W_DESC : MSA_3RF_DESC_BASE<"fadd.w", fadd, MSA128W>, IsCommutable; class FADD_D_DESC : MSA_3RF_DESC_BASE<"fadd.d", fadd, MSA128D>, IsCommutable; @@ -1817,53 +1849,53 @@ class FTQ_H_DESC : MSA_3RF_DESC_BASE<"ftq.h", int_mips_ftq_h, class FTQ_W_DESC : MSA_3RF_DESC_BASE<"ftq.w", int_mips_ftq_w, MSA128W, MSA128D, MSA128D>; -class HADD_S_H_DESC : MSA_3R_DESC_BASE<"hadd_s.h", int_mips_hadd_s_h, MSA128H, - MSA128B, MSA128B>; -class HADD_S_W_DESC : MSA_3R_DESC_BASE<"hadd_s.w", int_mips_hadd_s_w, MSA128W, - MSA128H, MSA128H>; -class HADD_S_D_DESC : MSA_3R_DESC_BASE<"hadd_s.d", int_mips_hadd_s_d, MSA128D, - MSA128W, MSA128W>; - -class HADD_U_H_DESC : MSA_3R_DESC_BASE<"hadd_u.h", int_mips_hadd_u_h, MSA128H, - MSA128B, MSA128B>; -class HADD_U_W_DESC : MSA_3R_DESC_BASE<"hadd_u.w", int_mips_hadd_u_w, MSA128W, - MSA128H, MSA128H>; -class HADD_U_D_DESC : MSA_3R_DESC_BASE<"hadd_u.d", int_mips_hadd_u_d, MSA128D, - MSA128W, MSA128W>; - -class HSUB_S_H_DESC : MSA_3R_DESC_BASE<"hsub_s.h", int_mips_hsub_s_h, MSA128H, - MSA128B, MSA128B>; -class HSUB_S_W_DESC : MSA_3R_DESC_BASE<"hsub_s.w", int_mips_hsub_s_w, MSA128W, - MSA128H, MSA128H>; -class HSUB_S_D_DESC : MSA_3R_DESC_BASE<"hsub_s.d", int_mips_hsub_s_d, MSA128D, - MSA128W, MSA128W>; - -class HSUB_U_H_DESC : MSA_3R_DESC_BASE<"hsub_u.h", int_mips_hsub_u_h, MSA128H, - MSA128B, MSA128B>; -class HSUB_U_W_DESC : MSA_3R_DESC_BASE<"hsub_u.w", int_mips_hsub_u_w, MSA128W, - MSA128H, MSA128H>; -class HSUB_U_D_DESC : MSA_3R_DESC_BASE<"hsub_u.d", int_mips_hsub_u_d, MSA128D, - MSA128W, MSA128W>; - -class ILVEV_B_DESC : MSA_3R_DESC_BASE<"ilvev.b", MipsILVEV, MSA128B>; -class ILVEV_H_DESC : MSA_3R_DESC_BASE<"ilvev.h", MipsILVEV, MSA128H>; -class ILVEV_W_DESC : MSA_3R_DESC_BASE<"ilvev.w", MipsILVEV, MSA128W>; -class ILVEV_D_DESC : MSA_3R_DESC_BASE<"ilvev.d", MipsILVEV, MSA128D>; - -class ILVL_B_DESC : MSA_3R_DESC_BASE<"ilvl.b", MipsILVL, MSA128B>; -class ILVL_H_DESC : MSA_3R_DESC_BASE<"ilvl.h", MipsILVL, MSA128H>; -class ILVL_W_DESC : MSA_3R_DESC_BASE<"ilvl.w", MipsILVL, MSA128W>; -class ILVL_D_DESC : MSA_3R_DESC_BASE<"ilvl.d", MipsILVL, MSA128D>; - -class ILVOD_B_DESC : MSA_3R_DESC_BASE<"ilvod.b", MipsILVOD, MSA128B>; -class ILVOD_H_DESC : MSA_3R_DESC_BASE<"ilvod.h", MipsILVOD, MSA128H>; -class ILVOD_W_DESC : MSA_3R_DESC_BASE<"ilvod.w", MipsILVOD, MSA128W>; -class ILVOD_D_DESC : MSA_3R_DESC_BASE<"ilvod.d", MipsILVOD, MSA128D>; - -class ILVR_B_DESC : MSA_3R_DESC_BASE<"ilvr.b", MipsILVR, MSA128B>; -class ILVR_H_DESC : MSA_3R_DESC_BASE<"ilvr.h", MipsILVR, MSA128H>; -class ILVR_W_DESC : MSA_3R_DESC_BASE<"ilvr.w", MipsILVR, MSA128W>; -class ILVR_D_DESC : MSA_3R_DESC_BASE<"ilvr.d", MipsILVR, MSA128D>; +class HADD_S_H_DESC : MSA_3R_DESC_BASE<"hadd_s.h", int_mips_hadd_s_h, + MSA128HOpnd, MSA128BOpnd, MSA128BOpnd>; +class HADD_S_W_DESC : MSA_3R_DESC_BASE<"hadd_s.w", int_mips_hadd_s_w, + MSA128WOpnd, MSA128HOpnd, MSA128HOpnd>; +class HADD_S_D_DESC : MSA_3R_DESC_BASE<"hadd_s.d", int_mips_hadd_s_d, + MSA128DOpnd, MSA128WOpnd, MSA128WOpnd>; + +class HADD_U_H_DESC : MSA_3R_DESC_BASE<"hadd_u.h", int_mips_hadd_u_h, + MSA128HOpnd, MSA128BOpnd, MSA128BOpnd>; +class HADD_U_W_DESC : MSA_3R_DESC_BASE<"hadd_u.w", int_mips_hadd_u_w, + MSA128WOpnd, MSA128HOpnd, MSA128HOpnd>; +class HADD_U_D_DESC : MSA_3R_DESC_BASE<"hadd_u.d", int_mips_hadd_u_d, + MSA128DOpnd, MSA128WOpnd, MSA128WOpnd>; + +class HSUB_S_H_DESC : MSA_3R_DESC_BASE<"hsub_s.h", int_mips_hsub_s_h, + MSA128HOpnd, MSA128BOpnd, MSA128BOpnd>; +class HSUB_S_W_DESC : MSA_3R_DESC_BASE<"hsub_s.w", int_mips_hsub_s_w, + MSA128WOpnd, MSA128HOpnd, MSA128HOpnd>; +class HSUB_S_D_DESC : MSA_3R_DESC_BASE<"hsub_s.d", int_mips_hsub_s_d, + MSA128DOpnd, MSA128WOpnd, MSA128WOpnd>; + +class HSUB_U_H_DESC : MSA_3R_DESC_BASE<"hsub_u.h", int_mips_hsub_u_h, + MSA128HOpnd, MSA128BOpnd, MSA128BOpnd>; +class HSUB_U_W_DESC : MSA_3R_DESC_BASE<"hsub_u.w", int_mips_hsub_u_w, + MSA128WOpnd, MSA128HOpnd, MSA128HOpnd>; +class HSUB_U_D_DESC : MSA_3R_DESC_BASE<"hsub_u.d", int_mips_hsub_u_d, + MSA128DOpnd, MSA128WOpnd, MSA128WOpnd>; + +class ILVEV_B_DESC : MSA_3R_DESC_BASE<"ilvev.b", MipsILVEV, MSA128BOpnd>; +class ILVEV_H_DESC : MSA_3R_DESC_BASE<"ilvev.h", MipsILVEV, MSA128HOpnd>; +class ILVEV_W_DESC : MSA_3R_DESC_BASE<"ilvev.w", MipsILVEV, MSA128WOpnd>; +class ILVEV_D_DESC : MSA_3R_DESC_BASE<"ilvev.d", MipsILVEV, MSA128DOpnd>; + +class ILVL_B_DESC : MSA_3R_DESC_BASE<"ilvl.b", MipsILVL, MSA128BOpnd>; +class ILVL_H_DESC : MSA_3R_DESC_BASE<"ilvl.h", MipsILVL, MSA128HOpnd>; +class ILVL_W_DESC : MSA_3R_DESC_BASE<"ilvl.w", MipsILVL, MSA128WOpnd>; +class ILVL_D_DESC : MSA_3R_DESC_BASE<"ilvl.d", MipsILVL, MSA128DOpnd>; + +class ILVOD_B_DESC : MSA_3R_DESC_BASE<"ilvod.b", MipsILVOD, MSA128BOpnd>; +class ILVOD_H_DESC : MSA_3R_DESC_BASE<"ilvod.h", MipsILVOD, MSA128HOpnd>; +class ILVOD_W_DESC : MSA_3R_DESC_BASE<"ilvod.w", MipsILVOD, MSA128WOpnd>; +class ILVOD_D_DESC : MSA_3R_DESC_BASE<"ilvod.d", MipsILVOD, MSA128DOpnd>; + +class ILVR_B_DESC : MSA_3R_DESC_BASE<"ilvr.b", MipsILVR, MSA128BOpnd>; +class ILVR_H_DESC : MSA_3R_DESC_BASE<"ilvr.h", MipsILVR, MSA128HOpnd>; +class ILVR_W_DESC : MSA_3R_DESC_BASE<"ilvr.w", MipsILVR, MSA128WOpnd>; +class ILVR_D_DESC : MSA_3R_DESC_BASE<"ilvr.d", MipsILVR, MSA128DOpnd>; class INSERT_B_DESC : MSA_INSERT_DESC_BASE<"insert.b", vinsert_v16i8, MSA128B, GPR32>; @@ -1924,25 +1956,29 @@ class MADDR_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"maddr_q.h", int_mips_maddr_q_h, class MADDR_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"maddr_q.w", int_mips_maddr_q_w, MSA128W>; -class MADDV_B_DESC : MSA_3R_4R_DESC_BASE<"maddv.b", int_mips_maddv_b, MSA128B>; -class MADDV_H_DESC : MSA_3R_4R_DESC_BASE<"maddv.h", int_mips_maddv_h, MSA128H>; -class MADDV_W_DESC : MSA_3R_4R_DESC_BASE<"maddv.w", int_mips_maddv_w, MSA128W>; -class MADDV_D_DESC : MSA_3R_4R_DESC_BASE<"maddv.d", int_mips_maddv_d, MSA128D>; +class MADDV_B_DESC : MSA_3R_4R_DESC_BASE<"maddv.b", int_mips_maddv_b, + MSA128BOpnd>; +class MADDV_H_DESC : MSA_3R_4R_DESC_BASE<"maddv.h", int_mips_maddv_h, + MSA128HOpnd>; +class MADDV_W_DESC : MSA_3R_4R_DESC_BASE<"maddv.w", int_mips_maddv_w, + MSA128WOpnd>; +class MADDV_D_DESC : MSA_3R_4R_DESC_BASE<"maddv.d", int_mips_maddv_d, + MSA128DOpnd>; -class MAX_A_B_DESC : MSA_3R_DESC_BASE<"max_a.b", int_mips_max_a_b, MSA128B>; -class MAX_A_H_DESC : MSA_3R_DESC_BASE<"max_a.h", int_mips_max_a_h, MSA128H>; -class MAX_A_W_DESC : MSA_3R_DESC_BASE<"max_a.w", int_mips_max_a_w, MSA128W>; -class MAX_A_D_DESC : MSA_3R_DESC_BASE<"max_a.d", int_mips_max_a_d, MSA128D>; +class MAX_A_B_DESC : MSA_3R_DESC_BASE<"max_a.b", int_mips_max_a_b, MSA128BOpnd>; +class MAX_A_H_DESC : MSA_3R_DESC_BASE<"max_a.h", int_mips_max_a_h, MSA128HOpnd>; +class MAX_A_W_DESC : MSA_3R_DESC_BASE<"max_a.w", int_mips_max_a_w, MSA128WOpnd>; +class MAX_A_D_DESC : MSA_3R_DESC_BASE<"max_a.d", int_mips_max_a_d, MSA128DOpnd>; -class MAX_S_B_DESC : MSA_3R_DESC_BASE<"max_s.b", MipsVSMax, MSA128B>; -class MAX_S_H_DESC : MSA_3R_DESC_BASE<"max_s.h", MipsVSMax, MSA128H>; -class MAX_S_W_DESC : MSA_3R_DESC_BASE<"max_s.w", MipsVSMax, MSA128W>; -class MAX_S_D_DESC : MSA_3R_DESC_BASE<"max_s.d", MipsVSMax, MSA128D>; +class MAX_S_B_DESC : MSA_3R_DESC_BASE<"max_s.b", MipsVSMax, MSA128BOpnd>; +class MAX_S_H_DESC : MSA_3R_DESC_BASE<"max_s.h", MipsVSMax, MSA128HOpnd>; +class MAX_S_W_DESC : MSA_3R_DESC_BASE<"max_s.w", MipsVSMax, MSA128WOpnd>; +class MAX_S_D_DESC : MSA_3R_DESC_BASE<"max_s.d", MipsVSMax, MSA128DOpnd>; -class MAX_U_B_DESC : MSA_3R_DESC_BASE<"max_u.b", MipsVUMax, MSA128B>; -class MAX_U_H_DESC : MSA_3R_DESC_BASE<"max_u.h", MipsVUMax, MSA128H>; -class MAX_U_W_DESC : MSA_3R_DESC_BASE<"max_u.w", MipsVUMax, MSA128W>; -class MAX_U_D_DESC : MSA_3R_DESC_BASE<"max_u.d", MipsVUMax, MSA128D>; +class MAX_U_B_DESC : MSA_3R_DESC_BASE<"max_u.b", MipsVUMax, MSA128BOpnd>; +class MAX_U_H_DESC : MSA_3R_DESC_BASE<"max_u.h", MipsVUMax, MSA128HOpnd>; +class MAX_U_W_DESC : MSA_3R_DESC_BASE<"max_u.w", MipsVUMax, MSA128WOpnd>; +class MAX_U_D_DESC : MSA_3R_DESC_BASE<"max_u.d", MipsVUMax, MSA128DOpnd>; class MAXI_S_B_DESC : MSA_I5_DESC_BASE<"maxi_s.b", MipsVSMax, vsplati8_simm5, MSA128B>; @@ -1962,20 +1998,20 @@ class MAXI_U_W_DESC : MSA_I5_DESC_BASE<"maxi_u.w", MipsVUMax, vsplati32_uimm5, class MAXI_U_D_DESC : MSA_I5_DESC_BASE<"maxi_u.d", MipsVUMax, vsplati64_uimm5, MSA128D>; -class MIN_A_B_DESC : MSA_3R_DESC_BASE<"min_a.b", int_mips_min_a_b, MSA128B>; -class MIN_A_H_DESC : MSA_3R_DESC_BASE<"min_a.h", int_mips_min_a_h, MSA128H>; -class MIN_A_W_DESC : MSA_3R_DESC_BASE<"min_a.w", int_mips_min_a_w, MSA128W>; -class MIN_A_D_DESC : MSA_3R_DESC_BASE<"min_a.d", int_mips_min_a_d, MSA128D>; +class MIN_A_B_DESC : MSA_3R_DESC_BASE<"min_a.b", int_mips_min_a_b, MSA128BOpnd>; +class MIN_A_H_DESC : MSA_3R_DESC_BASE<"min_a.h", int_mips_min_a_h, MSA128HOpnd>; +class MIN_A_W_DESC : MSA_3R_DESC_BASE<"min_a.w", int_mips_min_a_w, MSA128WOpnd>; +class MIN_A_D_DESC : MSA_3R_DESC_BASE<"min_a.d", int_mips_min_a_d, MSA128DOpnd>; -class MIN_S_B_DESC : MSA_3R_DESC_BASE<"min_s.b", MipsVSMin, MSA128B>; -class MIN_S_H_DESC : MSA_3R_DESC_BASE<"min_s.h", MipsVSMin, MSA128H>; -class MIN_S_W_DESC : MSA_3R_DESC_BASE<"min_s.w", MipsVSMin, MSA128W>; -class MIN_S_D_DESC : MSA_3R_DESC_BASE<"min_s.d", MipsVSMin, MSA128D>; +class MIN_S_B_DESC : MSA_3R_DESC_BASE<"min_s.b", MipsVSMin, MSA128BOpnd>; +class MIN_S_H_DESC : MSA_3R_DESC_BASE<"min_s.h", MipsVSMin, MSA128HOpnd>; +class MIN_S_W_DESC : MSA_3R_DESC_BASE<"min_s.w", MipsVSMin, MSA128WOpnd>; +class MIN_S_D_DESC : MSA_3R_DESC_BASE<"min_s.d", MipsVSMin, MSA128DOpnd>; -class MIN_U_B_DESC : MSA_3R_DESC_BASE<"min_u.b", MipsVUMin, MSA128B>; -class MIN_U_H_DESC : MSA_3R_DESC_BASE<"min_u.h", MipsVUMin, MSA128H>; -class MIN_U_W_DESC : MSA_3R_DESC_BASE<"min_u.w", MipsVUMin, MSA128W>; -class MIN_U_D_DESC : MSA_3R_DESC_BASE<"min_u.d", MipsVUMin, MSA128D>; +class MIN_U_B_DESC : MSA_3R_DESC_BASE<"min_u.b", MipsVUMin, MSA128BOpnd>; +class MIN_U_H_DESC : MSA_3R_DESC_BASE<"min_u.h", MipsVUMin, MSA128HOpnd>; +class MIN_U_W_DESC : MSA_3R_DESC_BASE<"min_u.w", MipsVUMin, MSA128WOpnd>; +class MIN_U_D_DESC : MSA_3R_DESC_BASE<"min_u.d", MipsVUMin, MSA128DOpnd>; class MINI_S_B_DESC : MSA_I5_DESC_BASE<"mini_s.b", MipsVSMin, vsplati8_simm5, MSA128B>; @@ -1995,15 +2031,15 @@ class MINI_U_W_DESC : MSA_I5_DESC_BASE<"mini_u.w", MipsVUMin, vsplati32_uimm5, class MINI_U_D_DESC : MSA_I5_DESC_BASE<"mini_u.d", MipsVUMin, vsplati64_uimm5, MSA128D>; -class MOD_S_B_DESC : MSA_3R_DESC_BASE<"mod_s.b", int_mips_mod_s_b, MSA128B>; -class MOD_S_H_DESC : MSA_3R_DESC_BASE<"mod_s.h", int_mips_mod_s_h, MSA128H>; -class MOD_S_W_DESC : MSA_3R_DESC_BASE<"mod_s.w", int_mips_mod_s_w, MSA128W>; -class MOD_S_D_DESC : MSA_3R_DESC_BASE<"mod_s.d", int_mips_mod_s_d, MSA128D>; +class MOD_S_B_DESC : MSA_3R_DESC_BASE<"mod_s.b", int_mips_mod_s_b, MSA128BOpnd>; +class MOD_S_H_DESC : MSA_3R_DESC_BASE<"mod_s.h", int_mips_mod_s_h, MSA128HOpnd>; +class MOD_S_W_DESC : MSA_3R_DESC_BASE<"mod_s.w", int_mips_mod_s_w, MSA128WOpnd>; +class MOD_S_D_DESC : MSA_3R_DESC_BASE<"mod_s.d", int_mips_mod_s_d, MSA128DOpnd>; -class MOD_U_B_DESC : MSA_3R_DESC_BASE<"mod_u.b", int_mips_mod_u_b, MSA128B>; -class MOD_U_H_DESC : MSA_3R_DESC_BASE<"mod_u.h", int_mips_mod_u_h, MSA128H>; -class MOD_U_W_DESC : MSA_3R_DESC_BASE<"mod_u.w", int_mips_mod_u_w, MSA128W>; -class MOD_U_D_DESC : MSA_3R_DESC_BASE<"mod_u.d", int_mips_mod_u_d, MSA128D>; +class MOD_U_B_DESC : MSA_3R_DESC_BASE<"mod_u.b", int_mips_mod_u_b, MSA128BOpnd>; +class MOD_U_H_DESC : MSA_3R_DESC_BASE<"mod_u.h", int_mips_mod_u_h, MSA128HOpnd>; +class MOD_U_W_DESC : MSA_3R_DESC_BASE<"mod_u.w", int_mips_mod_u_w, MSA128WOpnd>; +class MOD_U_D_DESC : MSA_3R_DESC_BASE<"mod_u.d", int_mips_mod_u_d, MSA128DOpnd>; class MOVE_V_DESC { dag OutOperandList = (outs MSA128B:$wd); @@ -2023,10 +2059,14 @@ class MSUBR_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"msubr_q.h", int_mips_msubr_q_h, class MSUBR_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"msubr_q.w", int_mips_msubr_q_w, MSA128W>; -class MSUBV_B_DESC : MSA_3R_4R_DESC_BASE<"msubv.b", int_mips_msubv_b, MSA128B>; -class MSUBV_H_DESC : MSA_3R_4R_DESC_BASE<"msubv.h", int_mips_msubv_h, MSA128H>; -class MSUBV_W_DESC : MSA_3R_4R_DESC_BASE<"msubv.w", int_mips_msubv_w, MSA128W>; -class MSUBV_D_DESC : MSA_3R_4R_DESC_BASE<"msubv.d", int_mips_msubv_d, MSA128D>; +class MSUBV_B_DESC : MSA_3R_4R_DESC_BASE<"msubv.b", int_mips_msubv_b, + MSA128BOpnd>; +class MSUBV_H_DESC : MSA_3R_4R_DESC_BASE<"msubv.h", int_mips_msubv_h, + MSA128HOpnd>; +class MSUBV_W_DESC : MSA_3R_4R_DESC_BASE<"msubv.w", int_mips_msubv_w, + MSA128WOpnd>; +class MSUBV_D_DESC : MSA_3R_4R_DESC_BASE<"msubv.d", int_mips_msubv_d, + MSA128DOpnd>; class MUL_Q_H_DESC : MSA_3RF_DESC_BASE<"mul_q.h", int_mips_mul_q_h, MSA128H>; class MUL_Q_W_DESC : MSA_3RF_DESC_BASE<"mul_q.w", int_mips_mul_q_w, MSA128W>; @@ -2036,10 +2076,10 @@ class MULR_Q_H_DESC : MSA_3RF_DESC_BASE<"mulr_q.h", int_mips_mulr_q_h, class MULR_Q_W_DESC : MSA_3RF_DESC_BASE<"mulr_q.w", int_mips_mulr_q_w, MSA128W>; -class MULV_B_DESC : MSA_3R_DESC_BASE<"mulv.b", mul, MSA128B>; -class MULV_H_DESC : MSA_3R_DESC_BASE<"mulv.h", mul, MSA128H>; -class MULV_W_DESC : MSA_3R_DESC_BASE<"mulv.w", mul, MSA128W>; -class MULV_D_DESC : MSA_3R_DESC_BASE<"mulv.d", mul, MSA128D>; +class MULV_B_DESC : MSA_3R_DESC_BASE<"mulv.b", mul, MSA128BOpnd>; +class MULV_H_DESC : MSA_3R_DESC_BASE<"mulv.h", mul, MSA128HOpnd>; +class MULV_W_DESC : MSA_3R_DESC_BASE<"mulv.w", mul, MSA128WOpnd>; +class MULV_D_DESC : MSA_3R_DESC_BASE<"mulv.d", mul, MSA128DOpnd>; class NLOC_B_DESC : MSA_2R_DESC_BASE<"nloc.b", int_mips_nloc_b, MSA128B>; class NLOC_H_DESC : MSA_2R_DESC_BASE<"nloc.h", int_mips_nloc_h, MSA128H>; @@ -2066,15 +2106,15 @@ class OR_V_D_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class ORI_B_DESC : MSA_I8_DESC_BASE<"ori.b", or, vsplati8_uimm8, MSA128B>; -class PCKEV_B_DESC : MSA_3R_DESC_BASE<"pckev.b", MipsPCKEV, MSA128B>; -class PCKEV_H_DESC : MSA_3R_DESC_BASE<"pckev.h", MipsPCKEV, MSA128H>; -class PCKEV_W_DESC : MSA_3R_DESC_BASE<"pckev.w", MipsPCKEV, MSA128W>; -class PCKEV_D_DESC : MSA_3R_DESC_BASE<"pckev.d", MipsPCKEV, MSA128D>; +class PCKEV_B_DESC : MSA_3R_DESC_BASE<"pckev.b", MipsPCKEV, MSA128BOpnd>; +class PCKEV_H_DESC : MSA_3R_DESC_BASE<"pckev.h", MipsPCKEV, MSA128HOpnd>; +class PCKEV_W_DESC : MSA_3R_DESC_BASE<"pckev.w", MipsPCKEV, MSA128WOpnd>; +class PCKEV_D_DESC : MSA_3R_DESC_BASE<"pckev.d", MipsPCKEV, MSA128DOpnd>; -class PCKOD_B_DESC : MSA_3R_DESC_BASE<"pckod.b", MipsPCKOD, MSA128B>; -class PCKOD_H_DESC : MSA_3R_DESC_BASE<"pckod.h", MipsPCKOD, MSA128H>; -class PCKOD_W_DESC : MSA_3R_DESC_BASE<"pckod.w", MipsPCKOD, MSA128W>; -class PCKOD_D_DESC : MSA_3R_DESC_BASE<"pckod.d", MipsPCKOD, MSA128D>; +class PCKOD_B_DESC : MSA_3R_DESC_BASE<"pckod.b", MipsPCKOD, MSA128BOpnd>; +class PCKOD_H_DESC : MSA_3R_DESC_BASE<"pckod.h", MipsPCKOD, MSA128HOpnd>; +class PCKOD_W_DESC : MSA_3R_DESC_BASE<"pckod.w", MipsPCKOD, MSA128WOpnd>; +class PCKOD_D_DESC : MSA_3R_DESC_BASE<"pckod.d", MipsPCKOD, MSA128DOpnd>; class PCNT_B_DESC : MSA_2R_DESC_BASE<"pcnt.b", ctpop, MSA128B>; class PCNT_H_DESC : MSA_2R_DESC_BASE<"pcnt.h", ctpop, MSA128H>; @@ -2095,20 +2135,20 @@ class SHF_B_DESC : MSA_I8_SHF_DESC_BASE<"shf.b", MSA128B>; class SHF_H_DESC : MSA_I8_SHF_DESC_BASE<"shf.h", MSA128H>; class SHF_W_DESC : MSA_I8_SHF_DESC_BASE<"shf.w", MSA128W>; -class SLD_B_DESC : MSA_3R_DESC_BASE<"sld.b", int_mips_sld_b, MSA128B>; -class SLD_H_DESC : MSA_3R_DESC_BASE<"sld.h", int_mips_sld_h, MSA128H>; -class SLD_W_DESC : MSA_3R_DESC_BASE<"sld.w", int_mips_sld_w, MSA128W>; -class SLD_D_DESC : MSA_3R_DESC_BASE<"sld.d", int_mips_sld_d, MSA128D>; +class SLD_B_DESC : MSA_3R_DESC_BASE<"sld.b", int_mips_sld_b, MSA128BOpnd>; +class SLD_H_DESC : MSA_3R_DESC_BASE<"sld.h", int_mips_sld_h, MSA128HOpnd>; +class SLD_W_DESC : MSA_3R_DESC_BASE<"sld.w", int_mips_sld_w, MSA128WOpnd>; +class SLD_D_DESC : MSA_3R_DESC_BASE<"sld.d", int_mips_sld_d, MSA128DOpnd>; class SLDI_B_DESC : MSA_BIT_B_DESC_BASE<"sldi.b", int_mips_sldi_b, MSA128B>; class SLDI_H_DESC : MSA_BIT_H_DESC_BASE<"sldi.h", int_mips_sldi_h, MSA128H>; class SLDI_W_DESC : MSA_BIT_W_DESC_BASE<"sldi.w", int_mips_sldi_w, MSA128W>; class SLDI_D_DESC : MSA_BIT_D_DESC_BASE<"sldi.d", int_mips_sldi_d, MSA128D>; -class SLL_B_DESC : MSA_3R_DESC_BASE<"sll.b", shl, MSA128B>; -class SLL_H_DESC : MSA_3R_DESC_BASE<"sll.h", shl, MSA128H>; -class SLL_W_DESC : MSA_3R_DESC_BASE<"sll.w", shl, MSA128W>; -class SLL_D_DESC : MSA_3R_DESC_BASE<"sll.d", shl, MSA128D>; +class SLL_B_DESC : MSA_3R_DESC_BASE<"sll.b", shl, MSA128BOpnd>; +class SLL_H_DESC : MSA_3R_DESC_BASE<"sll.h", shl, MSA128HOpnd>; +class SLL_W_DESC : MSA_3R_DESC_BASE<"sll.w", shl, MSA128WOpnd>; +class SLL_D_DESC : MSA_3R_DESC_BASE<"sll.d", shl, MSA128DOpnd>; class SLLI_B_DESC : MSA_BIT_SPLAT_DESC_BASE<"slli.b", shl, vsplati8_uimm3, MSA128B>; @@ -2119,14 +2159,14 @@ class SLLI_W_DESC : MSA_BIT_SPLAT_DESC_BASE<"slli.w", shl, vsplati32_uimm5, class SLLI_D_DESC : MSA_BIT_SPLAT_DESC_BASE<"slli.d", shl, vsplati64_uimm6, MSA128D>; -class SPLAT_B_DESC : MSA_3R_DESC_BASE<"splat.b", int_mips_splat_b, MSA128B, - MSA128B, GPR32>; -class SPLAT_H_DESC : MSA_3R_DESC_BASE<"splat.h", int_mips_splat_h, MSA128H, - MSA128H, GPR32>; -class SPLAT_W_DESC : MSA_3R_DESC_BASE<"splat.w", int_mips_splat_w, MSA128W, - MSA128W, GPR32>; -class SPLAT_D_DESC : MSA_3R_DESC_BASE<"splat.d", int_mips_splat_d, MSA128D, - MSA128D, GPR32>; +class SPLAT_B_DESC : MSA_3R_DESC_BASE<"splat.b", int_mips_splat_b, MSA128BOpnd, + MSA128BOpnd, GPR32Opnd>; +class SPLAT_H_DESC : MSA_3R_DESC_BASE<"splat.h", int_mips_splat_h, MSA128HOpnd, + MSA128HOpnd, GPR32Opnd>; +class SPLAT_W_DESC : MSA_3R_DESC_BASE<"splat.w", int_mips_splat_w, MSA128WOpnd, + MSA128WOpnd, GPR32Opnd>; +class SPLAT_D_DESC : MSA_3R_DESC_BASE<"splat.d", int_mips_splat_d, MSA128DOpnd, + MSA128DOpnd, GPR32Opnd>; class SPLATI_B_DESC : MSA_BIT_B_DESC_BASE<"splati.b", int_mips_splati_b, MSA128B>; @@ -2137,10 +2177,10 @@ class SPLATI_W_DESC : MSA_BIT_W_DESC_BASE<"splati.w", int_mips_splati_w, class SPLATI_D_DESC : MSA_BIT_D_DESC_BASE<"splati.d", int_mips_splati_d, MSA128D>; -class SRA_B_DESC : MSA_3R_DESC_BASE<"sra.b", sra, MSA128B>; -class SRA_H_DESC : MSA_3R_DESC_BASE<"sra.h", sra, MSA128H>; -class SRA_W_DESC : MSA_3R_DESC_BASE<"sra.w", sra, MSA128W>; -class SRA_D_DESC : MSA_3R_DESC_BASE<"sra.d", sra, MSA128D>; +class SRA_B_DESC : MSA_3R_DESC_BASE<"sra.b", sra, MSA128BOpnd>; +class SRA_H_DESC : MSA_3R_DESC_BASE<"sra.h", sra, MSA128HOpnd>; +class SRA_W_DESC : MSA_3R_DESC_BASE<"sra.w", sra, MSA128WOpnd>; +class SRA_D_DESC : MSA_3R_DESC_BASE<"sra.d", sra, MSA128DOpnd>; class SRAI_B_DESC : MSA_BIT_SPLAT_DESC_BASE<"srai.b", sra, vsplati8_uimm3, MSA128B>; @@ -2151,20 +2191,20 @@ class SRAI_W_DESC : MSA_BIT_SPLAT_DESC_BASE<"srai.w", sra, vsplati32_uimm5, class SRAI_D_DESC : MSA_BIT_SPLAT_DESC_BASE<"srai.d", sra, vsplati64_uimm6, MSA128D>; -class SRAR_B_DESC : MSA_3R_DESC_BASE<"srar.b", int_mips_srar_b, MSA128B>; -class SRAR_H_DESC : MSA_3R_DESC_BASE<"srar.h", int_mips_srar_h, MSA128H>; -class SRAR_W_DESC : MSA_3R_DESC_BASE<"srar.w", int_mips_srar_w, MSA128W>; -class SRAR_D_DESC : MSA_3R_DESC_BASE<"srar.d", int_mips_srar_d, MSA128D>; +class SRAR_B_DESC : MSA_3R_DESC_BASE<"srar.b", int_mips_srar_b, MSA128BOpnd>; +class SRAR_H_DESC : MSA_3R_DESC_BASE<"srar.h", int_mips_srar_h, MSA128HOpnd>; +class SRAR_W_DESC : MSA_3R_DESC_BASE<"srar.w", int_mips_srar_w, MSA128WOpnd>; +class SRAR_D_DESC : MSA_3R_DESC_BASE<"srar.d", int_mips_srar_d, MSA128DOpnd>; class SRARI_B_DESC : MSA_BIT_B_DESC_BASE<"srari.b", int_mips_srari_b, MSA128B>; class SRARI_H_DESC : MSA_BIT_H_DESC_BASE<"srari.h", int_mips_srari_h, MSA128H>; class SRARI_W_DESC : MSA_BIT_W_DESC_BASE<"srari.w", int_mips_srari_w, MSA128W>; class SRARI_D_DESC : MSA_BIT_D_DESC_BASE<"srari.d", int_mips_srari_d, MSA128D>; -class SRL_B_DESC : MSA_3R_DESC_BASE<"srl.b", srl, MSA128B>; -class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", srl, MSA128H>; -class SRL_W_DESC : MSA_3R_DESC_BASE<"srl.w", srl, MSA128W>; -class SRL_D_DESC : MSA_3R_DESC_BASE<"srl.d", srl, MSA128D>; +class SRL_B_DESC : MSA_3R_DESC_BASE<"srl.b", srl, MSA128BOpnd>; +class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", srl, MSA128HOpnd>; +class SRL_W_DESC : MSA_3R_DESC_BASE<"srl.w", srl, MSA128WOpnd>; +class SRL_D_DESC : MSA_3R_DESC_BASE<"srl.d", srl, MSA128DOpnd>; class SRLI_B_DESC : MSA_BIT_SPLAT_DESC_BASE<"srli.b", srl, vsplati8_uimm3, MSA128B>; @@ -2175,10 +2215,10 @@ class SRLI_W_DESC : MSA_BIT_SPLAT_DESC_BASE<"srli.w", srl, vsplati32_uimm5, class SRLI_D_DESC : MSA_BIT_SPLAT_DESC_BASE<"srli.d", srl, vsplati64_uimm6, MSA128D>; -class SRLR_B_DESC : MSA_3R_DESC_BASE<"srlr.b", int_mips_srlr_b, MSA128B>; -class SRLR_H_DESC : MSA_3R_DESC_BASE<"srlr.h", int_mips_srlr_h, MSA128H>; -class SRLR_W_DESC : MSA_3R_DESC_BASE<"srlr.w", int_mips_srlr_w, MSA128W>; -class SRLR_D_DESC : MSA_3R_DESC_BASE<"srlr.d", int_mips_srlr_d, MSA128D>; +class SRLR_B_DESC : MSA_3R_DESC_BASE<"srlr.b", int_mips_srlr_b, MSA128BOpnd>; +class SRLR_H_DESC : MSA_3R_DESC_BASE<"srlr.h", int_mips_srlr_h, MSA128HOpnd>; +class SRLR_W_DESC : MSA_3R_DESC_BASE<"srlr.w", int_mips_srlr_w, MSA128WOpnd>; +class SRLR_D_DESC : MSA_3R_DESC_BASE<"srlr.d", int_mips_srlr_d, MSA128DOpnd>; class SRLRI_B_DESC : MSA_BIT_B_DESC_BASE<"srlri.b", int_mips_srlri_b, MSA128B>; class SRLRI_H_DESC : MSA_BIT_H_DESC_BASE<"srlri.h", int_mips_srlri_h, MSA128H>; @@ -2217,48 +2257,56 @@ class STX_H_DESC : STX_DESC_BASE<"stx.h", store, v8i16, MSA128H>; class STX_W_DESC : STX_DESC_BASE<"stx.w", store, v4i32, MSA128W>; class STX_D_DESC : STX_DESC_BASE<"stx.d", store, v2i64, MSA128D>; -class SUBS_S_B_DESC : MSA_3R_DESC_BASE<"subs_s.b", int_mips_subs_s_b, MSA128B>; -class SUBS_S_H_DESC : MSA_3R_DESC_BASE<"subs_s.h", int_mips_subs_s_h, MSA128H>; -class SUBS_S_W_DESC : MSA_3R_DESC_BASE<"subs_s.w", int_mips_subs_s_w, MSA128W>; -class SUBS_S_D_DESC : MSA_3R_DESC_BASE<"subs_s.d", int_mips_subs_s_d, MSA128D>; - -class SUBS_U_B_DESC : MSA_3R_DESC_BASE<"subs_u.b", int_mips_subs_u_b, MSA128B>; -class SUBS_U_H_DESC : MSA_3R_DESC_BASE<"subs_u.h", int_mips_subs_u_h, MSA128H>; -class SUBS_U_W_DESC : MSA_3R_DESC_BASE<"subs_u.w", int_mips_subs_u_w, MSA128W>; -class SUBS_U_D_DESC : MSA_3R_DESC_BASE<"subs_u.d", int_mips_subs_u_d, MSA128D>; +class SUBS_S_B_DESC : MSA_3R_DESC_BASE<"subs_s.b", int_mips_subs_s_b, + MSA128BOpnd>; +class SUBS_S_H_DESC : MSA_3R_DESC_BASE<"subs_s.h", int_mips_subs_s_h, + MSA128HOpnd>; +class SUBS_S_W_DESC : MSA_3R_DESC_BASE<"subs_s.w", int_mips_subs_s_w, + MSA128WOpnd>; +class SUBS_S_D_DESC : MSA_3R_DESC_BASE<"subs_s.d", int_mips_subs_s_d, + MSA128DOpnd>; + +class SUBS_U_B_DESC : MSA_3R_DESC_BASE<"subs_u.b", int_mips_subs_u_b, + MSA128BOpnd>; +class SUBS_U_H_DESC : MSA_3R_DESC_BASE<"subs_u.h", int_mips_subs_u_h, + MSA128HOpnd>; +class SUBS_U_W_DESC : MSA_3R_DESC_BASE<"subs_u.w", int_mips_subs_u_w, + MSA128WOpnd>; +class SUBS_U_D_DESC : MSA_3R_DESC_BASE<"subs_u.d", int_mips_subs_u_d, + MSA128DOpnd>; class SUBSUS_U_B_DESC : MSA_3R_DESC_BASE<"subsus_u.b", int_mips_subsus_u_b, - MSA128B>; + MSA128BOpnd>; class SUBSUS_U_H_DESC : MSA_3R_DESC_BASE<"subsus_u.h", int_mips_subsus_u_h, - MSA128H>; + MSA128HOpnd>; class SUBSUS_U_W_DESC : MSA_3R_DESC_BASE<"subsus_u.w", int_mips_subsus_u_w, - MSA128W>; + MSA128WOpnd>; class SUBSUS_U_D_DESC : MSA_3R_DESC_BASE<"subsus_u.d", int_mips_subsus_u_d, - MSA128D>; + MSA128DOpnd>; class SUBSUU_S_B_DESC : MSA_3R_DESC_BASE<"subsuu_s.b", int_mips_subsuu_s_b, - MSA128B>; + MSA128BOpnd>; class SUBSUU_S_H_DESC : MSA_3R_DESC_BASE<"subsuu_s.h", int_mips_subsuu_s_h, - MSA128H>; + MSA128HOpnd>; class SUBSUU_S_W_DESC : MSA_3R_DESC_BASE<"subsuu_s.w", int_mips_subsuu_s_w, - MSA128W>; + MSA128WOpnd>; class SUBSUU_S_D_DESC : MSA_3R_DESC_BASE<"subsuu_s.d", int_mips_subsuu_s_d, - MSA128D>; + MSA128DOpnd>; -class SUBV_B_DESC : MSA_3R_DESC_BASE<"subv.b", sub, MSA128B>; -class SUBV_H_DESC : MSA_3R_DESC_BASE<"subv.h", sub, MSA128H>; -class SUBV_W_DESC : MSA_3R_DESC_BASE<"subv.w", sub, MSA128W>; -class SUBV_D_DESC : MSA_3R_DESC_BASE<"subv.d", sub, MSA128D>; +class SUBV_B_DESC : MSA_3R_DESC_BASE<"subv.b", sub, MSA128BOpnd>; +class SUBV_H_DESC : MSA_3R_DESC_BASE<"subv.h", sub, MSA128HOpnd>; +class SUBV_W_DESC : MSA_3R_DESC_BASE<"subv.w", sub, MSA128WOpnd>; +class SUBV_D_DESC : MSA_3R_DESC_BASE<"subv.d", sub, MSA128DOpnd>; class SUBVI_B_DESC : MSA_I5_DESC_BASE<"subvi.b", sub, vsplati8_uimm5, MSA128B>; class SUBVI_H_DESC : MSA_I5_DESC_BASE<"subvi.h", sub, vsplati16_uimm5, MSA128H>; class SUBVI_W_DESC : MSA_I5_DESC_BASE<"subvi.w", sub, vsplati32_uimm5, MSA128W>; class SUBVI_D_DESC : MSA_I5_DESC_BASE<"subvi.d", sub, vsplati64_uimm5, MSA128D>; -class VSHF_B_DESC : MSA_3R_VSHF_DESC_BASE<"vshf.b", MSA128B>; -class VSHF_H_DESC : MSA_3R_VSHF_DESC_BASE<"vshf.h", MSA128H>; -class VSHF_W_DESC : MSA_3R_VSHF_DESC_BASE<"vshf.w", MSA128W>; -class VSHF_D_DESC : MSA_3R_VSHF_DESC_BASE<"vshf.d", MSA128D>; +class VSHF_B_DESC : MSA_3R_VSHF_DESC_BASE<"vshf.b", MSA128BOpnd>; +class VSHF_H_DESC : MSA_3R_VSHF_DESC_BASE<"vshf.h", MSA128HOpnd>; +class VSHF_W_DESC : MSA_3R_VSHF_DESC_BASE<"vshf.w", MSA128WOpnd>; +class VSHF_D_DESC : MSA_3R_VSHF_DESC_BASE<"vshf.d", MSA128DOpnd>; class XOR_V_DESC : MSA_VEC_DESC_BASE<"xor.v", xor, MSA128B>; class XOR_V_H_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; -- cgit v1.1 From 11c2b15c0a8282cfdc1c74968ebaba92f1fdae34 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Thu, 26 Sep 2013 04:11:24 +0000 Subject: PPC: Add support for fctid and fctiw Encodings were checked against the Power ISA documents and double checked against binutils. This fixes PR17350. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191419 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.h | 8 ++++---- lib/Target/PowerPC/PPCInstr64Bit.td | 3 +++ lib/Target/PowerPC/PPCInstrInfo.td | 5 +++++ 3 files changed, 12 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index df3af35..b988199 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -42,10 +42,10 @@ namespace llvm { /// unsigned integers and single-precision outputs. FCFIDU, FCFIDS, FCFIDUS, - /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 - /// operand, producing an f64 value containing the integer representation - /// of that FP value. - FCTIDZ, FCTIWZ, + /// FCTI[D,W]Z? - The FCTID, FCTIDZ, FCTIW and FCTIWZ instructions, + /// taking an f32 or f64 operand, producing an f64 value containing the + /// integer representation of that FP value. + FCTID, FCTIDZ, FCTIW, FCTIWZ, /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for /// unsigned integers. diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index c9e70c8..6025afb 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -968,6 +968,9 @@ let PPC970_Unit = 3, neverHasSideEffects = 1, defm FCFID : XForm_26r<63, 846, (outs f8rc:$frD), (ins f8rc:$frB), "fcfid", "$frD, $frB", FPGeneral, [(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64; +defm FCTID : XForm_26r<63, 814, (outs f8rc:$frD), (ins f8rc:$frB), + "fctid", "$frD, $frB", FPGeneral, + [(set f64:$frD, (PPCfctid f64:$frB))]>, isPPC64; defm FCTIDZ : XForm_26r<63, 815, (outs f8rc:$frD), (ins f8rc:$frB), "fctidz", "$frD, $frB", FPGeneral, [(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64; diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index a9c916f..9baa791 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -69,7 +69,9 @@ def PPCfcfid : SDNode<"PPCISD::FCFID", SDTFPUnaryOp, []>; def PPCfcfidu : SDNode<"PPCISD::FCFIDU", SDTFPUnaryOp, []>; def PPCfcfids : SDNode<"PPCISD::FCFIDS", SDTFPRoundOp, []>; def PPCfcfidus: SDNode<"PPCISD::FCFIDUS", SDTFPRoundOp, []>; +def PPCfctid : SDNode<"PPCISD::FCTID", SDTFPUnaryOp, []>; def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>; +def PPCfctiw : SDNode<"PPCISD::FCTIW", SDTFPUnaryOp, []>; def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>; def PPCfctiduz: SDNode<"PPCISD::FCTIDUZ",SDTFPUnaryOp, []>; def PPCfctiwuz: SDNode<"PPCISD::FCTIWUZ",SDTFPUnaryOp, []>; @@ -1692,6 +1694,9 @@ let isCompare = 1, neverHasSideEffects = 1 in { let Uses = [RM] in { let neverHasSideEffects = 1 in { + defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB), + "fctiw", "$frD, $frB", FPGeneral, + [(set f64:$frD, (PPCfctiw f64:$frB))]>; defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB), "fctiwz", "$frD, $frB", FPGeneral, [(set f64:$frD, (PPCfctiwz f64:$frB))]>; -- cgit v1.1 From dd5cebdd74aaaefcf50eb1ea44bee8b02ab65a2e Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Thu, 26 Sep 2013 05:22:11 +0000 Subject: PPC: Do not introduce ISD nodes for fctid and fctiw git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191421 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.h | 8 ++++---- lib/Target/PowerPC/PPCInstr64Bit.td | 2 +- lib/Target/PowerPC/PPCInstrInfo.td | 4 +--- 3 files changed, 6 insertions(+), 8 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index b988199..df3af35 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -42,10 +42,10 @@ namespace llvm { /// unsigned integers and single-precision outputs. FCFIDU, FCFIDS, FCFIDUS, - /// FCTI[D,W]Z? - The FCTID, FCTIDZ, FCTIW and FCTIWZ instructions, - /// taking an f32 or f64 operand, producing an f64 value containing the - /// integer representation of that FP value. - FCTID, FCTIDZ, FCTIW, FCTIWZ, + /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 + /// operand, producing an f64 value containing the integer representation + /// of that FP value. + FCTIDZ, FCTIWZ, /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for /// unsigned integers. diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 6025afb..46aafbe 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -970,7 +970,7 @@ defm FCFID : XForm_26r<63, 846, (outs f8rc:$frD), (ins f8rc:$frB), [(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64; defm FCTID : XForm_26r<63, 814, (outs f8rc:$frD), (ins f8rc:$frB), "fctid", "$frD, $frB", FPGeneral, - [(set f64:$frD, (PPCfctid f64:$frB))]>, isPPC64; + []>, isPPC64; defm FCTIDZ : XForm_26r<63, 815, (outs f8rc:$frD), (ins f8rc:$frB), "fctidz", "$frD, $frB", FPGeneral, [(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64; diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 9baa791..2bd3aad 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -69,9 +69,7 @@ def PPCfcfid : SDNode<"PPCISD::FCFID", SDTFPUnaryOp, []>; def PPCfcfidu : SDNode<"PPCISD::FCFIDU", SDTFPUnaryOp, []>; def PPCfcfids : SDNode<"PPCISD::FCFIDS", SDTFPRoundOp, []>; def PPCfcfidus: SDNode<"PPCISD::FCFIDUS", SDTFPRoundOp, []>; -def PPCfctid : SDNode<"PPCISD::FCTID", SDTFPUnaryOp, []>; def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>; -def PPCfctiw : SDNode<"PPCISD::FCTIW", SDTFPUnaryOp, []>; def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>; def PPCfctiduz: SDNode<"PPCISD::FCTIDUZ",SDTFPUnaryOp, []>; def PPCfctiwuz: SDNode<"PPCISD::FCTIWUZ",SDTFPUnaryOp, []>; @@ -1696,7 +1694,7 @@ let Uses = [RM] in { let neverHasSideEffects = 1 in { defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB), "fctiw", "$frD, $frB", FPGeneral, - [(set f64:$frD, (PPCfctiw f64:$frB))]>; + []>; defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB), "fctiwz", "$frD, $frB", FPGeneral, [(set f64:$frD, (PPCfctiwz f64:$frB))]>; -- cgit v1.1 From b6ac11cd03e9dd97b45dc97787171f942ef8e344 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Thu, 26 Sep 2013 05:53:35 +0000 Subject: Added temp flag -misched-bench for staging in default changes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191423 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/TargetSubtargetInfo.cpp | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/TargetSubtargetInfo.cpp b/lib/Target/TargetSubtargetInfo.cpp index f624c32..10e8db5 100644 --- a/lib/Target/TargetSubtargetInfo.cpp +++ b/lib/Target/TargetSubtargetInfo.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/ADT/SmallVector.h" using namespace llvm; @@ -22,6 +23,21 @@ TargetSubtargetInfo::TargetSubtargetInfo() {} TargetSubtargetInfo::~TargetSubtargetInfo() {} +// Temporary option to compare overall performance change when moving from the +// SD scheduler to the MachineScheduler pass pipeline. It should be removed +// before 3.4. The normal way to enable/disable the MachineScheduling pass +// itself is by using -enable-misched. For targets that already use MI sched +// (via MySubTarget::enableMachineScheduler()) -misched-bench=false negates the +// subtarget hook. +static cl::opt BenchMachineSched("misched-bench", cl::Hidden, + cl::desc("Migrate from the target's default SD scheduler to MI scheduler")); + +bool TargetSubtargetInfo::useMachineScheduler() const { + if (BenchMachineSched.getNumOccurrences()) + return BenchMachineSched; + return enableMachineScheduler(); +} + bool TargetSubtargetInfo::enableMachineScheduler() const { return false; } @@ -38,4 +54,3 @@ bool TargetSubtargetInfo::enablePostRAScheduler( bool TargetSubtargetInfo::useAA() const { return false; } - -- cgit v1.1 From 9637da60835a60f8ccd8289d04c60b2dcd4b9b5a Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Thu, 26 Sep 2013 09:18:48 +0000 Subject: PPC: Allow partial fills in writeNopData() When asked to pad an irregular number of bytes, we should fill with zeros. This is consistent with the behavior specified in the AIX Assembler Language Reference as well as other LLVM and binutils assemblers. N.B. There is a small deviation from binutils' PPC assembler: when handling pads which are greater than 4 bytes but not mod 4, binutils will not emit any NOP sequences at all and only use zeros. This may or may not be a bug but there is no excellent rationale as to why that behavior is important to emulate. If that behavior is needed, we can change writeNopData() to behave in the same way. This fixes PR17352. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191426 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 91f1160..0d42081 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -132,14 +132,17 @@ public: } bool writeNopData(uint64_t Count, MCObjectWriter *OW) const { - // Can't emit NOP with size not multiple of 32-bits - if (Count % 4 != 0) - return false; - uint64_t NumNops = Count / 4; for (uint64_t i = 0; i != NumNops; ++i) OW->Write32(0x60000000); + switch (Count % 4) { + default: break; // No leftover bytes to write + case 1: OW->Write8(0); break; + case 2: OW->Write16(0); break; + case 3: OW->Write16(0); OW->Write8(0); break; + } + return true; } -- cgit v1.1 From 268c743a3ba44ada364938bc5ff9b1be219df54f Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Thu, 26 Sep 2013 12:22:36 +0000 Subject: [ARM] Use the load-acquire/store-release instructions optimally in AArch32. Patch by Artyom Skrobov. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191428 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelDAGToDAG.cpp | 156 ++++++++--- lib/Target/ARM/ARMISelLowering.cpp | 283 +++++++++++--------- lib/Target/ARM/ARMISelLowering.h | 18 +- lib/Target/ARM/ARMInstrInfo.td | 332 +++++++++++++++--------- lib/Target/ARM/ARMInstrThumb2.td | 9 + lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp | 2 +- 6 files changed, 495 insertions(+), 305 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index f6b3827..87d1522 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -253,7 +253,7 @@ private: SDNode *SelectConcatVector(SDNode *N); - SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); + SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32, unsigned Op64); /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. @@ -2361,23 +2361,36 @@ SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1)); } -SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { +SDNode *ARMDAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8, + unsigned Op16,unsigned Op32, + unsigned Op64) { + // Mostly direct translation to the given operations, except that we preserve + // the AtomicOrdering for use later on. + AtomicSDNode *AN = cast(Node); + EVT VT = AN->getMemoryVT(); + + unsigned Op; + SDVTList VTs = CurDAG->getVTList(AN->getValueType(0), MVT::Other); + if (VT == MVT::i8) + Op = Op8; + else if (VT == MVT::i16) + Op = Op16; + else if (VT == MVT::i32) + Op = Op32; + else if (VT == MVT::i64) { + Op = Op64; + VTs = CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other); + } else + llvm_unreachable("Unexpected atomic operation"); + SmallVector Ops; - Ops.push_back(Node->getOperand(1)); // Ptr - Ops.push_back(Node->getOperand(2)); // Low part of Val1 - Ops.push_back(Node->getOperand(3)); // High part of Val1 - if (Opc == ARM::ATOMCMPXCHG6432) { - Ops.push_back(Node->getOperand(4)); // Low part of Val2 - Ops.push_back(Node->getOperand(5)); // High part of Val2 - } - Ops.push_back(Node->getOperand(0)); // Chain - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast(Node)->getMemOperand(); - SDNode *ResNode = CurDAG->getMachineNode(Opc, SDLoc(Node), - MVT::i32, MVT::i32, MVT::Other, - Ops); - cast(ResNode)->setMemRefs(MemOp, MemOp + 1); - return ResNode; + for (unsigned i = 1; i < AN->getNumOperands(); ++i) + Ops.push_back(AN->getOperand(i)); + + Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32)); + Ops.push_back(AN->getOperand(0)); // Chain moves to the end + + return CurDAG->SelectNodeTo(Node, Op, VTs, &Ops[0], Ops.size()); } SDNode *ARMDAGToDAGISel::Select(SDNode *N) { @@ -3251,31 +3264,90 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ISD::CONCAT_VECTORS: return SelectConcatVector(N); - case ARMISD::ATOMOR64_DAG: - return SelectAtomic64(N, ARM::ATOMOR6432); - case ARMISD::ATOMXOR64_DAG: - return SelectAtomic64(N, ARM::ATOMXOR6432); - case ARMISD::ATOMADD64_DAG: - return SelectAtomic64(N, ARM::ATOMADD6432); - case ARMISD::ATOMSUB64_DAG: - return SelectAtomic64(N, ARM::ATOMSUB6432); - case ARMISD::ATOMNAND64_DAG: - return SelectAtomic64(N, ARM::ATOMNAND6432); - case ARMISD::ATOMAND64_DAG: - return SelectAtomic64(N, ARM::ATOMAND6432); - case ARMISD::ATOMSWAP64_DAG: - return SelectAtomic64(N, ARM::ATOMSWAP6432); - case ARMISD::ATOMCMPXCHG64_DAG: - return SelectAtomic64(N, ARM::ATOMCMPXCHG6432); - - case ARMISD::ATOMMIN64_DAG: - return SelectAtomic64(N, ARM::ATOMMIN6432); - case ARMISD::ATOMUMIN64_DAG: - return SelectAtomic64(N, ARM::ATOMUMIN6432); - case ARMISD::ATOMMAX64_DAG: - return SelectAtomic64(N, ARM::ATOMMAX6432); - case ARMISD::ATOMUMAX64_DAG: - return SelectAtomic64(N, ARM::ATOMUMAX6432); + case ISD::ATOMIC_LOAD: + if (cast(N)->getMemoryVT() == MVT::i64) + return SelectAtomic(N, 0, 0, 0, ARM::ATOMIC_LOAD_I64); + else + break; + + case ISD::ATOMIC_STORE: + if (cast(N)->getMemoryVT() == MVT::i64) + return SelectAtomic(N, 0, 0, 0, ARM::ATOMIC_STORE_I64); + else + break; + + case ISD::ATOMIC_LOAD_ADD: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_ADD_I8, + ARM::ATOMIC_LOAD_ADD_I16, + ARM::ATOMIC_LOAD_ADD_I32, + ARM::ATOMIC_LOAD_ADD_I64); + case ISD::ATOMIC_LOAD_SUB: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_SUB_I8, + ARM::ATOMIC_LOAD_SUB_I16, + ARM::ATOMIC_LOAD_SUB_I32, + ARM::ATOMIC_LOAD_SUB_I64); + case ISD::ATOMIC_LOAD_AND: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_AND_I8, + ARM::ATOMIC_LOAD_AND_I16, + ARM::ATOMIC_LOAD_AND_I32, + ARM::ATOMIC_LOAD_AND_I64); + case ISD::ATOMIC_LOAD_OR: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_OR_I8, + ARM::ATOMIC_LOAD_OR_I16, + ARM::ATOMIC_LOAD_OR_I32, + ARM::ATOMIC_LOAD_OR_I64); + case ISD::ATOMIC_LOAD_XOR: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_XOR_I8, + ARM::ATOMIC_LOAD_XOR_I16, + ARM::ATOMIC_LOAD_XOR_I32, + ARM::ATOMIC_LOAD_XOR_I64); + case ISD::ATOMIC_LOAD_NAND: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_NAND_I8, + ARM::ATOMIC_LOAD_NAND_I16, + ARM::ATOMIC_LOAD_NAND_I32, + ARM::ATOMIC_LOAD_NAND_I64); + case ISD::ATOMIC_LOAD_MIN: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_MIN_I8, + ARM::ATOMIC_LOAD_MIN_I16, + ARM::ATOMIC_LOAD_MIN_I32, + ARM::ATOMIC_LOAD_MIN_I64); + case ISD::ATOMIC_LOAD_MAX: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_MAX_I8, + ARM::ATOMIC_LOAD_MAX_I16, + ARM::ATOMIC_LOAD_MAX_I32, + ARM::ATOMIC_LOAD_MAX_I64); + case ISD::ATOMIC_LOAD_UMIN: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_UMIN_I8, + ARM::ATOMIC_LOAD_UMIN_I16, + ARM::ATOMIC_LOAD_UMIN_I32, + ARM::ATOMIC_LOAD_UMIN_I64); + case ISD::ATOMIC_LOAD_UMAX: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_UMAX_I8, + ARM::ATOMIC_LOAD_UMAX_I16, + ARM::ATOMIC_LOAD_UMAX_I32, + ARM::ATOMIC_LOAD_UMAX_I64); + case ISD::ATOMIC_SWAP: + return SelectAtomic(N, + ARM::ATOMIC_SWAP_I8, + ARM::ATOMIC_SWAP_I16, + ARM::ATOMIC_SWAP_I32, + ARM::ATOMIC_SWAP_I64); + case ISD::ATOMIC_CMP_SWAP: + return SelectAtomic(N, + ARM::ATOMIC_CMP_SWAP_I8, + ARM::ATOMIC_CMP_SWAP_I16, + ARM::ATOMIC_CMP_SWAP_I32, + ARM::ATOMIC_CMP_SWAP_I64); } return SelectCode(N); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 773b710..96942ec 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -769,8 +769,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); - // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. - setInsertFencesForAtomic(true); + // On v8, we have particularly efficient implementations of atomic fences + // if they can be combined with nearby atomic loads and stores. + if (!Subtarget->hasV8Ops()) { + // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. + setInsertFencesForAtomic(true); + } + setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom); + //setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Custom); } else { // Set them all for expansion, which will force libcalls. setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); @@ -909,6 +915,44 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); } +static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord, + bool isThumb2, unsigned &LdrOpc, + unsigned &StrOpc) { + static const unsigned LoadBares[4][2] = {{ARM::LDREXB, ARM::t2LDREXB}, + {ARM::LDREXH, ARM::t2LDREXH}, + {ARM::LDREX, ARM::t2LDREX}, + {ARM::LDREXD, ARM::t2LDREXD}}; + static const unsigned LoadAcqs[4][2] = {{ARM::LDAEXB, ARM::t2LDAEXB}, + {ARM::LDAEXH, ARM::t2LDAEXH}, + {ARM::LDAEX, ARM::t2LDAEX}, + {ARM::LDAEXD, ARM::t2LDAEXD}}; + static const unsigned StoreBares[4][2] = {{ARM::STREXB, ARM::t2STREXB}, + {ARM::STREXH, ARM::t2STREXH}, + {ARM::STREX, ARM::t2STREX}, + {ARM::STREXD, ARM::t2STREXD}}; + static const unsigned StoreRels[4][2] = {{ARM::STLEXB, ARM::t2STLEXB}, + {ARM::STLEXH, ARM::t2STLEXH}, + {ARM::STLEX, ARM::t2STLEX}, + {ARM::STLEXD, ARM::t2STLEXD}}; + + const unsigned (*LoadOps)[2], (*StoreOps)[2]; + if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent) + LoadOps = LoadAcqs; + else + LoadOps = LoadBares; + + if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) + StoreOps = StoreRels; + else + StoreOps = StoreBares; + + assert(isPowerOf2_32(Size) && Size <= 8 && + "unsupported size for atomic binary op!"); + + LdrOpc = LoadOps[Log2_32(Size)][isThumb2]; + StrOpc = StoreOps[Log2_32(Size)][isThumb2]; +} + // FIXME: It might make sense to define the representative register class as the // nearest super-register that has a non-null superset. For example, DPR_VFP2 is // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, @@ -1094,19 +1138,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD"; case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD"; case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD"; - - case ARMISD::ATOMADD64_DAG: return "ATOMADD64_DAG"; - case ARMISD::ATOMSUB64_DAG: return "ATOMSUB64_DAG"; - case ARMISD::ATOMOR64_DAG: return "ATOMOR64_DAG"; - case ARMISD::ATOMXOR64_DAG: return "ATOMXOR64_DAG"; - case ARMISD::ATOMAND64_DAG: return "ATOMAND64_DAG"; - case ARMISD::ATOMNAND64_DAG: return "ATOMNAND64_DAG"; - case ARMISD::ATOMSWAP64_DAG: return "ATOMSWAP64_DAG"; - case ARMISD::ATOMCMPXCHG64_DAG: return "ATOMCMPXCHG64_DAG"; - case ARMISD::ATOMMIN64_DAG: return "ATOMMIN64_DAG"; - case ARMISD::ATOMUMIN64_DAG: return "ATOMUMIN64_DAG"; - case ARMISD::ATOMMAX64_DAG: return "ATOMMAX64_DAG"; - case ARMISD::ATOMUMAX64_DAG: return "ATOMUMAX64_DAG"; } } @@ -5922,32 +5953,28 @@ static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { static void ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl& Results, - SelectionDAG &DAG, unsigned NewOp) { + SelectionDAG &DAG) { SDLoc dl(Node); assert (Node->getValueType(0) == MVT::i64 && "Only know how to expand i64 atomics"); + AtomicSDNode *AN = cast(Node); SmallVector Ops; Ops.push_back(Node->getOperand(0)); // Chain Ops.push_back(Node->getOperand(1)); // Ptr - // Low part of Val1 - Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Node->getOperand(2), DAG.getIntPtrConstant(0))); - // High part of Val1 - Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Node->getOperand(2), DAG.getIntPtrConstant(1))); - if (NewOp == ARMISD::ATOMCMPXCHG64_DAG) { - // High part of Val1 + for(unsigned i=2; igetNumOperands(); i++) { + // Low part Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Node->getOperand(3), DAG.getIntPtrConstant(0))); - // High part of Val2 + Node->getOperand(i), DAG.getIntPtrConstant(0))); + // High part Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Node->getOperand(3), DAG.getIntPtrConstant(1))); + Node->getOperand(i), DAG.getIntPtrConstant(1))); } SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); SDValue Result = - DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops.data(), Ops.size(), MVT::i64, - cast(Node)->getMemOperand()); + DAG.getAtomic(Node->getOpcode(), dl, MVT::i64, Tys, Ops.data(), Ops.size(), + cast(Node)->getMemOperand(), AN->getOrdering(), + AN->getSynchScope()); SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) }; Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); Results.push_back(Result.getValue(2)); @@ -6073,41 +6100,21 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::READCYCLECOUNTER: ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget); return; + case ISD::ATOMIC_STORE: + case ISD::ATOMIC_LOAD: case ISD::ATOMIC_LOAD_ADD: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG); - return; case ISD::ATOMIC_LOAD_AND: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMAND64_DAG); - return; case ISD::ATOMIC_LOAD_NAND: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMNAND64_DAG); - return; case ISD::ATOMIC_LOAD_OR: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMOR64_DAG); - return; case ISD::ATOMIC_LOAD_SUB: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSUB64_DAG); - return; case ISD::ATOMIC_LOAD_XOR: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMXOR64_DAG); - return; case ISD::ATOMIC_SWAP: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSWAP64_DAG); - return; case ISD::ATOMIC_CMP_SWAP: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG); - return; case ISD::ATOMIC_LOAD_MIN: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMIN64_DAG); - return; case ISD::ATOMIC_LOAD_UMIN: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMIN64_DAG); - return; case ISD::ATOMIC_LOAD_MAX: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMAX64_DAG); - return; case ISD::ATOMIC_LOAD_UMAX: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMAX64_DAG); + ReplaceATOMIC_OP_64(N, Results, DAG); return; } if (Res.getNode()) @@ -6127,6 +6134,7 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, unsigned oldval = MI->getOperand(2).getReg(); unsigned newval = MI->getOperand(3).getReg(); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + AtomicOrdering Ord = static_cast(MI->getOperand(4).getImm()); DebugLoc dl = MI->getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); @@ -6142,21 +6150,7 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, } unsigned ldrOpc, strOpc; - switch (Size) { - default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); - case 1: - ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; - strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; - break; - case 2: - ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; - strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; - break; - case 4: - ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; - strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; - break; - } + getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc); MachineFunction *MF = BB->getParent(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); @@ -6236,6 +6230,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, unsigned dest = MI->getOperand(0).getReg(); unsigned ptr = MI->getOperand(1).getReg(); unsigned incr = MI->getOperand(2).getReg(); + AtomicOrdering Ord = static_cast(MI->getOperand(3).getImm()); DebugLoc dl = MI->getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); @@ -6243,24 +6238,11 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, if (isThumb2) { MRI.constrainRegClass(dest, &ARM::rGPRRegClass); MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); + MRI.constrainRegClass(incr, &ARM::rGPRRegClass); } unsigned ldrOpc, strOpc; - switch (Size) { - default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); - case 1: - ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; - strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; - break; - case 2: - ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; - strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; - break; - case 4: - ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; - strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; - break; - } + getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc); MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); @@ -6344,6 +6326,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, unsigned ptr = MI->getOperand(1).getReg(); unsigned incr = MI->getOperand(2).getReg(); unsigned oldval = dest; + AtomicOrdering Ord = static_cast(MI->getOperand(3).getImm()); DebugLoc dl = MI->getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); @@ -6351,24 +6334,20 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, if (isThumb2) { MRI.constrainRegClass(dest, &ARM::rGPRRegClass); MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); + MRI.constrainRegClass(incr, &ARM::rGPRRegClass); } unsigned ldrOpc, strOpc, extendOpc; + getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc); switch (Size) { - default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); + default: llvm_unreachable("unsupported size for AtomicBinaryMinMax!"); case 1: - ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; - strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB; break; case 2: - ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; - strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH; break; case 4: - ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; - strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; extendOpc = 0; break; } @@ -6412,7 +6391,10 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, // Sign extend the value, if necessary. if (signExtend && extendOpc) { - oldval = MRI.createVirtualRegister(&ARM::GPRRegClass); + oldval = MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass + : &ARM::GPRnopcRegClass); + if (!isThumb2) + MRI.constrainRegClass(dest, &ARM::GPRnopcRegClass); AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval) .addReg(dest) .addImm(0)); @@ -6450,7 +6432,7 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, unsigned Op1, unsigned Op2, bool NeedsCarry, bool IsCmpxchg, bool IsMinMax, ARMCC::CondCodes CC) const { - // This also handles ATOMIC_SWAP, indicated by Op1==0. + // This also handles ATOMIC_SWAP and ATOMIC_STORE, indicated by Op1==0. const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); @@ -6458,11 +6440,15 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, MachineFunction::iterator It = BB; ++It; + bool isStore = (MI->getOpcode() == ARM::ATOMIC_STORE_I64); + unsigned offset = (isStore ? -2 : 0); unsigned destlo = MI->getOperand(0).getReg(); unsigned desthi = MI->getOperand(1).getReg(); - unsigned ptr = MI->getOperand(2).getReg(); - unsigned vallo = MI->getOperand(3).getReg(); - unsigned valhi = MI->getOperand(4).getReg(); + unsigned ptr = MI->getOperand(offset+2).getReg(); + unsigned vallo = MI->getOperand(offset+3).getReg(); + unsigned valhi = MI->getOperand(offset+4).getReg(); + unsigned OrdIdx = offset + (IsCmpxchg ? 7 : 5); + AtomicOrdering Ord = static_cast(MI->getOperand(OrdIdx).getImm()); DebugLoc dl = MI->getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); @@ -6475,6 +6461,9 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, MRI.constrainRegClass(valhi, &ARM::rGPRRegClass); } + unsigned ldrOpc, strOpc; + getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc); + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *contBB = 0, *cont2BB = 0; if (IsCmpxchg || IsMinMax) @@ -6514,21 +6503,23 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, // fallthrough --> exitMBB BB = loopMBB; - // Load - if (isThumb2) { - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2LDREXD)) - .addReg(destlo, RegState::Define) - .addReg(desthi, RegState::Define) - .addReg(ptr)); - } else { - unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDREXD)) - .addReg(GPRPair0, RegState::Define).addReg(ptr)); - // Copy r2/r3 into dest. (This copy will normally be coalesced.) - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo) - .addReg(GPRPair0, 0, ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi) - .addReg(GPRPair0, 0, ARM::gsub_1); + if (!isStore) { + // Load + if (isThumb2) { + AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) + .addReg(destlo, RegState::Define) + .addReg(desthi, RegState::Define) + .addReg(ptr)); + } else { + unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) + .addReg(GPRPair0, RegState::Define).addReg(ptr)); + // Copy r2/r3 into dest. (This copy will normally be coalesced.) + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo) + .addReg(GPRPair0, 0, ARM::gsub_0); + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi) + .addReg(GPRPair0, 0, ARM::gsub_1); + } } unsigned StoreLo, StoreHi; @@ -6582,7 +6573,7 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, if (isThumb2) { MRI.constrainRegClass(StoreLo, &ARM::rGPRRegClass); MRI.constrainRegClass(StoreHi, &ARM::rGPRRegClass); - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2STREXD), storesuccess) + AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) .addReg(StoreLo).addReg(StoreHi).addReg(ptr)); } else { // Marshal a pair... @@ -6600,7 +6591,7 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, .addImm(ARM::gsub_1); // ...and store it - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::STREXD), storesuccess) + AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) .addReg(StorePair).addReg(ptr)); } // Cmp+jump @@ -6621,6 +6612,51 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, return BB; } +MachineBasicBlock * +ARMTargetLowering::EmitAtomicLoad64(MachineInstr *MI, MachineBasicBlock *BB) const { + + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + unsigned destlo = MI->getOperand(0).getReg(); + unsigned desthi = MI->getOperand(1).getReg(); + unsigned ptr = MI->getOperand(2).getReg(); + AtomicOrdering Ord = static_cast(MI->getOperand(3).getImm()); + DebugLoc dl = MI->getDebugLoc(); + bool isThumb2 = Subtarget->isThumb2(); + + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + if (isThumb2) { + MRI.constrainRegClass(destlo, &ARM::rGPRRegClass); + MRI.constrainRegClass(desthi, &ARM::rGPRRegClass); + MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); + } + unsigned ldrOpc, strOpc; + getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc); + + MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(ldrOpc)); + + if (isThumb2) { + MIB.addReg(destlo, RegState::Define) + .addReg(desthi, RegState::Define) + .addReg(ptr); + + } else { + unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + MIB.addReg(GPRPair0, RegState::Define).addReg(ptr); + + // Copy GPRPair0 into dest. (This copy will normally be coalesced.) + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), destlo) + .addReg(GPRPair0, 0, ARM::gsub_0); + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), desthi) + .addReg(GPRPair0, 0, ARM::gsub_1); + } + AddDefaultPred(MIB); + + MI->eraseFromParent(); // The instruction is gone now. + + return BB; +} + /// SetupEntryBlockForSjLj - Insert code into the entry block that creates and /// registers the function context. void ARMTargetLowering:: @@ -7594,46 +7630,49 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2); case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4); + case ARM::ATOMIC_LOAD_I64: + return EmitAtomicLoad64(MI, BB); - case ARM::ATOMADD6432: + case ARM::ATOMIC_LOAD_ADD_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr, isThumb2 ? ARM::t2ADCrr : ARM::ADCrr, /*NeedsCarry*/ true); - case ARM::ATOMSUB6432: + case ARM::ATOMIC_LOAD_SUB_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ true); - case ARM::ATOMOR6432: + case ARM::ATOMIC_LOAD_OR_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); - case ARM::ATOMXOR6432: + case ARM::ATOMIC_LOAD_XOR_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr, isThumb2 ? ARM::t2EORrr : ARM::EORrr); - case ARM::ATOMAND6432: + case ARM::ATOMIC_LOAD_AND_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); - case ARM::ATOMSWAP6432: + case ARM::ATOMIC_STORE_I64: + case ARM::ATOMIC_SWAP_I64: return EmitAtomicBinary64(MI, BB, 0, 0, false); - case ARM::ATOMCMPXCHG6432: + case ARM::ATOMIC_CMP_SWAP_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ false, /*IsCmpxchg*/true); - case ARM::ATOMMIN6432: + case ARM::ATOMIC_LOAD_MIN_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ true, /*IsCmpxchg*/false, /*IsMinMax*/ true, ARMCC::LT); - case ARM::ATOMMAX6432: + case ARM::ATOMIC_LOAD_MAX_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ true, /*IsCmpxchg*/false, /*IsMinMax*/ true, ARMCC::GE); - case ARM::ATOMUMIN6432: + case ARM::ATOMIC_LOAD_UMIN_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ true, /*IsCmpxchg*/false, /*IsMinMax*/ true, ARMCC::LO); - case ARM::ATOMUMAX6432: + case ARM::ATOMIC_LOAD_UMAX_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ true, /*IsCmpxchg*/false, diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index fca9e0e..6131a26 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -223,21 +223,7 @@ namespace llvm { VST4_UPD, VST2LN_UPD, VST3LN_UPD, - VST4LN_UPD, - - // 64-bit atomic ops (value split into two registers) - ATOMADD64_DAG, - ATOMSUB64_DAG, - ATOMOR64_DAG, - ATOMXOR64_DAG, - ATOMAND64_DAG, - ATOMNAND64_DAG, - ATOMSWAP64_DAG, - ATOMCMPXCHG64_DAG, - ATOMMIN64_DAG, - ATOMUMIN64_DAG, - ATOMMAX64_DAG, - ATOMUMAX64_DAG + VST4LN_UPD }; } @@ -574,6 +560,8 @@ namespace llvm { unsigned Size, bool signExtend, ARMCC::CondCodes Cond) const; + MachineBasicBlock *EmitAtomicLoad64(MachineInstr *MI, + MachineBasicBlock *BB) const; void SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 80226ac..59d2f7a 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -1677,48 +1677,6 @@ PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary, [(ARMcallseq_start timm:$amt)]>; } -// Atomic pseudo-insts which will be lowered to ldrexd/strexd loops. -// (These pseudos use a hand-written selection code). -let usesCustomInserter = 1, Defs = [CPSR], mayLoad = 1, mayStore = 1 in { -def ATOMOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMXOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMADD6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMSUB6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMNAND6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMAND6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMSWAP6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMCMPXCHG6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2, - GPR:$set1, GPR:$set2), - NoItinerary, []>; -def ATOMMIN6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMUMIN6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMUMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -} - def HINT : AI<(outs), (ins imm0_4:$imm), MiscFrm, NoItinerary, "hint", "\t$imm", []>, Requires<[IsARM, HasV6]> { bits<3> imm; @@ -4329,124 +4287,219 @@ def ISB : AInoP<(outs), (ins instsyncb_opt:$opt), MiscFrm, NoItinerary, let Inst{3-0} = opt; } +let usesCustomInserter = 1, Defs = [CPSR] in { + // Pseudo instruction that combines movs + predicated rsbmi // to implement integer ABS -let usesCustomInserter = 1, Defs = [CPSR] in -def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>; + def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>; -let usesCustomInserter = 1 in { - let Defs = [CPSR] in { +// Atomic pseudo-insts which will be lowered to ldrex/strex loops. +// (64-bit pseudos use a hand-written selection code). + let mayLoad = 1, mayStore = 1 in { def ATOMIC_LOAD_ADD_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_add_8 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_SUB_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_sub_8 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_AND_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_and_8 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_OR_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_or_8 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_XOR_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_xor_8 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_NAND_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_MIN_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_MAX_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_UMIN_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_umin_8 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_UMAX_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_umax_8 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_SWAP_I8 : PseudoInst< + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$new, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_CMP_SWAP_I8 : PseudoInst< + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_ADD_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_SUB_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_sub_16 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_AND_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_and_16 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_OR_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_or_16 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_XOR_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_xor_16 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_NAND_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_MIN_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_MAX_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_UMIN_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_umin_16 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_UMAX_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_umax_16 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_SWAP_I16 : PseudoInst< + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$new, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_CMP_SWAP_I16 : PseudoInst< + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_ADD_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_SUB_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_sub_32 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_AND_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_and_32 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_OR_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_or_32 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_XOR_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_xor_32 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_NAND_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_MIN_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_MAX_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_UMIN_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_umin_32 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_UMAX_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_umax_32 GPR:$ptr, GPR:$val))]>; - - def ATOMIC_SWAP_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, - [(set GPR:$dst, (atomic_swap_8 GPR:$ptr, GPR:$new))]>; - def ATOMIC_SWAP_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, - [(set GPR:$dst, (atomic_swap_16 GPR:$ptr, GPR:$new))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_SWAP_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, - [(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$new))]>; - - def ATOMIC_CMP_SWAP_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary, - [(set GPR:$dst, (atomic_cmp_swap_8 GPR:$ptr, GPR:$old, GPR:$new))]>; - def ATOMIC_CMP_SWAP_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary, - [(set GPR:$dst, (atomic_cmp_swap_16 GPR:$ptr, GPR:$old, GPR:$new))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$new, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_CMP_SWAP_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary, - [(set GPR:$dst, (atomic_cmp_swap_32 GPR:$ptr, GPR:$old, GPR:$new))]>; -} + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_ADD_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_SUB_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_AND_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_OR_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_XOR_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_NAND_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_MIN_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_MAX_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_UMIN_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_UMAX_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_SWAP_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_CMP_SWAP_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2, + GPR:$set1, GPR:$set2, i32imm:$ordering), + NoItinerary, []>; + } + let mayLoad = 1 in + def ATOMIC_LOAD_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, i32imm:$ordering), + NoItinerary, []>; + let mayStore = 1 in + def ATOMIC_STORE_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; } let usesCustomInserter = 1 in { @@ -4560,6 +4613,35 @@ def : ARMPat<(strex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr), def : ARMPat<(strex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr), (STREXH GPR:$Rt, addr_offset_none:$addr)>; +class acquiring_load + : PatFrag<(ops node:$ptr), (base node:$ptr), [{ + AtomicOrdering Ordering = cast(N)->getOrdering(); + return Ordering == Acquire || Ordering == SequentiallyConsistent; +}]>; + +def atomic_load_acquire_8 : acquiring_load; +def atomic_load_acquire_16 : acquiring_load; +def atomic_load_acquire_32 : acquiring_load; + +class releasing_store + : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ + AtomicOrdering Ordering = cast(N)->getOrdering(); + return Ordering == Release || Ordering == SequentiallyConsistent; +}]>; + +def atomic_store_release_8 : releasing_store; +def atomic_store_release_16 : releasing_store; +def atomic_store_release_32 : releasing_store; + +let AddedComplexity = 8 in { + def : ARMPat<(atomic_load_acquire_8 addr_offset_none:$addr), (LDAB addr_offset_none:$addr)>; + def : ARMPat<(atomic_load_acquire_16 addr_offset_none:$addr), (LDAH addr_offset_none:$addr)>; + def : ARMPat<(atomic_load_acquire_32 addr_offset_none:$addr), (LDA addr_offset_none:$addr)>; + def : ARMPat<(atomic_store_release_8 addr_offset_none:$addr, GPR:$val), (STLB GPR:$val, addr_offset_none:$addr)>; + def : ARMPat<(atomic_store_release_16 addr_offset_none:$addr, GPR:$val), (STLH GPR:$val, addr_offset_none:$addr)>; + def : ARMPat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (STL GPR:$val, addr_offset_none:$addr)>; +} + // SWP/SWPB are deprecated in V6/V7. let mayLoad = 1, mayStore = 1 in { def SWP : AIswp<0, (outs GPRnopc:$Rt), diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index fcc8f86..07baf2d 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -4216,6 +4216,15 @@ def : T2Pat<(atomic_store_32 t2addrmode_negimm8:$addr, GPR:$val), def : T2Pat<(atomic_store_32 t2addrmode_so_reg:$addr, GPR:$val), (t2STRs GPR:$val, t2addrmode_so_reg:$addr)>; +let AddedComplexity = 8 in { + def : T2Pat<(atomic_load_acquire_8 addr_offset_none:$addr), (t2LDAB addr_offset_none:$addr)>; + def : T2Pat<(atomic_load_acquire_16 addr_offset_none:$addr), (t2LDAH addr_offset_none:$addr)>; + def : T2Pat<(atomic_load_acquire_32 addr_offset_none:$addr), (t2LDA addr_offset_none:$addr)>; + def : T2Pat<(atomic_store_release_8 addr_offset_none:$addr, GPR:$val), (t2STLB GPR:$val, addr_offset_none:$addr)>; + def : T2Pat<(atomic_store_release_16 addr_offset_none:$addr, GPR:$val), (t2STLH GPR:$val, addr_offset_none:$addr)>; + def : T2Pat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (t2STL GPR:$val, addr_offset_none:$addr)>; +} + //===----------------------------------------------------------------------===// // Assembler aliases diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 64001b4..94069cd 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -94,7 +94,7 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { unsigned SubVer = TT[Idx]; if (SubVer == '8') { // FIXME: Parse v8 features - ARMArchFeature = "+v8"; + ARMArchFeature = "+v8,+db"; } else if (SubVer == '7') { if (Len >= Idx+2 && TT[Idx+1] == 'm') { isThumb = true; -- cgit v1.1 From 30ec8a3658b1f06bb94d392c55feb7f107517bf8 Mon Sep 17 00:00:00 2001 From: Venkatraman Govindaraju Date: Thu, 26 Sep 2013 15:11:00 +0000 Subject: [Sparc] Implements exception handling in SPARC with DwarfCFI. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191432 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp | 2 ++ lib/Target/Sparc/SparcFrameLowering.cpp | 19 +++++++++++++++++++ lib/Target/Sparc/SparcISelLowering.cpp | 6 ++---- 3 files changed, 23 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp index 45cfe03..06b1df6 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp @@ -36,6 +36,8 @@ SparcELFMCAsmInfo::SparcELFMCAsmInfo(StringRef TT) { HasLEB128 = true; SupportsDebugInformation = true; + ExceptionsType = ExceptionHandling::DwarfCFI; + SunStyleELFSectionSwitchSyntax = true; UsesELFSectionDirectiveForBSS = true; diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp index 536e466..1f9cac5 100644 --- a/lib/Target/Sparc/SparcFrameLowering.cpp +++ b/lib/Target/Sparc/SparcFrameLowering.cpp @@ -70,6 +70,25 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, dl, TII.get(SAVErr), SP::O6) .addReg(SP::O6).addReg(SP::G1); } + MachineModuleInfo &MMI = MF.getMMI(); + const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); + MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, dl, TII.get(SP::PROLOG_LABEL)).addSym(FrameLabel); + + unsigned regFP = MRI->getDwarfRegNum(SP::I6, true); + + // Emit ".cfi_def_cfa_register 30". + MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(FrameLabel, + regFP)); + // Emit ".cfi_window_save". + MMI.addFrameInst(MCCFIInstruction::createWindowSave(FrameLabel)); + + unsigned regInRA = MRI->getDwarfRegNum(SP::I7, true); + unsigned regOutRA = MRI->getDwarfRegNum(SP::O7, true); + // Emit ".cfi_register 15, 31". + MMI.addFrameInst(MCCFIInstruction::createRegister(FrameLabel, + regOutRA, + regInRA)); } void SparcFrameLowering:: diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 86bac7e..cdba998 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -1432,8 +1432,6 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); - setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); - // VASTART needs to be custom lowered to use the VarArgsFrameIndex. setOperationAction(ISD::VASTART , MVT::Other, Custom); // VAARG needs to be lowered to not do unaligned accesses for doubles. @@ -1446,8 +1444,8 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); - // No debug info support yet. - setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); + setExceptionPointerRegister(SP::I0); + setExceptionSelectorRegister(SP::I1); setStackPointerRegisterToSaveRestore(SP::O6); -- cgit v1.1 From daf6b948b98b886f5f0fba130e91e01c9ca7c2f2 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Thu, 26 Sep 2013 17:09:28 +0000 Subject: [PowerPC] Fix PR17354: Generate nop after local calls for PIC code. When generating code for shared libraries, even local calls may be intercepted, so we need a nop after the call for the linker to fix up the TOC. Test case adapted from the one provided in PR17354. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191440 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 5e12664..8da5f05 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -3487,7 +3487,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, // from allocating it), resulting in an additional register being // allocated and an unnecessary move instruction being generated. needsTOCRestore = true; - } else if ((CallOpc == PPCISD::CALL) && !isLocalCall(Callee)) { + } else if ((CallOpc == PPCISD::CALL) && + (!isLocalCall(Callee) || + DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { // Otherwise insert NOP for non-local calls. CallOpc = PPCISD::CALL_NOP; } -- cgit v1.1 From 82f36241c2484a72ba11b7ae5af3f485504a7b6e Mon Sep 17 00:00:00 2001 From: Weiming Zhao Date: Thu, 26 Sep 2013 17:25:10 +0000 Subject: Fix PR 17372: Emitting PLD for stack address for ARM Thumb2 t2PLDi12, t2PLDi8, t2PLDs was omitted in Thumb2InstrInfo. This patch fixes it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191441 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/Thumb2InstrInfo.cpp | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 286eaa0..82c57df 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -334,6 +334,7 @@ negativeOffsetOpcode(unsigned opcode) case ARM::t2STRi12: return ARM::t2STRi8; case ARM::t2STRBi12: return ARM::t2STRBi8; case ARM::t2STRHi12: return ARM::t2STRHi8; + case ARM::t2PLDi12: return ARM::t2PLDi8; case ARM::t2LDRi8: case ARM::t2LDRHi8: @@ -343,6 +344,7 @@ negativeOffsetOpcode(unsigned opcode) case ARM::t2STRi8: case ARM::t2STRBi8: case ARM::t2STRHi8: + case ARM::t2PLDi8: return opcode; default: @@ -364,6 +366,7 @@ positiveOffsetOpcode(unsigned opcode) case ARM::t2STRi8: return ARM::t2STRi12; case ARM::t2STRBi8: return ARM::t2STRBi12; case ARM::t2STRHi8: return ARM::t2STRHi12; + case ARM::t2PLDi8: return ARM::t2PLDi12; case ARM::t2LDRi12: case ARM::t2LDRHi12: @@ -373,6 +376,7 @@ positiveOffsetOpcode(unsigned opcode) case ARM::t2STRi12: case ARM::t2STRBi12: case ARM::t2STRHi12: + case ARM::t2PLDi12: return opcode; default: @@ -394,6 +398,7 @@ immediateOffsetOpcode(unsigned opcode) case ARM::t2STRs: return ARM::t2STRi12; case ARM::t2STRBs: return ARM::t2STRBi12; case ARM::t2STRHs: return ARM::t2STRHi12; + case ARM::t2PLDs: return ARM::t2PLDi12; case ARM::t2LDRi12: case ARM::t2LDRHi12: @@ -403,6 +408,7 @@ immediateOffsetOpcode(unsigned opcode) case ARM::t2STRi12: case ARM::t2STRBi12: case ARM::t2STRHi12: + case ARM::t2PLDi12: case ARM::t2LDRi8: case ARM::t2LDRHi8: case ARM::t2LDRBi8: @@ -411,6 +417,7 @@ immediateOffsetOpcode(unsigned opcode) case ARM::t2STRi8: case ARM::t2STRBi8: case ARM::t2STRHi8: + case ARM::t2PLDi8: return opcode; default: -- cgit v1.1 From 00a5b53e148cd0c840650504fcd6e92dee132c1a Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Thu, 26 Sep 2013 21:18:57 +0000 Subject: [mips][msa] Updates encoding of 3RF instructions to match the latest revision of the MSA spec (1.06). This does not affect any of the existing output. Patch by Matheus Almeida git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191460 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrInfo.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index fb02d3f..af0e5e4 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -813,8 +813,8 @@ class MSUBV_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b010010>; class MSUBV_W_ENC : MSA_3R_FMT<0b010, 0b10, 0b010010>; class MSUBV_D_ENC : MSA_3R_FMT<0b010, 0b11, 0b010010>; -class MUL_Q_H_ENC : MSA_3RF_FMT<0b0000, 0b0, 0b011100>; -class MUL_Q_W_ENC : MSA_3RF_FMT<0b0000, 0b1, 0b011100>; +class MUL_Q_H_ENC : MSA_3RF_FMT<0b0100, 0b0, 0b011100>; +class MUL_Q_W_ENC : MSA_3RF_FMT<0b0100, 0b1, 0b011100>; class MULR_Q_H_ENC : MSA_3RF_FMT<0b1100, 0b0, 0b011100>; class MULR_Q_W_ENC : MSA_3RF_FMT<0b1100, 0b1, 0b011100>; -- cgit v1.1 From 1327c089221da78b1bfd61067162023e520085ed Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Thu, 26 Sep 2013 21:31:43 +0000 Subject: [mips][msa] Direct Object Emission for 3RF instructions. Patch by Matheus Almeida git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191461 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMSAInstrFormats.td | 7 + lib/Target/Mips/MipsMSAInstrInfo.td | 229 +++++++++++++++++---------------- 2 files changed, 124 insertions(+), 112 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsMSAInstrFormats.td b/lib/Target/Mips/MipsMSAInstrFormats.td index ddd3110..5667890 100644 --- a/lib/Target/Mips/MipsMSAInstrFormats.td +++ b/lib/Target/Mips/MipsMSAInstrFormats.td @@ -76,8 +76,15 @@ class MSA_3R_FMT major, bits<2> df, bits<6> minor>: MSAInst { } class MSA_3RF_FMT major, bits<1> df, bits<6> minor>: MSAInst { + bits<5> wt; + bits<5> ws; + bits<5> wd; + let Inst{25-22} = major; let Inst{21} = df; + let Inst{20-16} = wt; + let Inst{15-11} = ws; + let Inst{10-6} = wd; let Inst{5-0} = minor; } diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index af0e5e4..76fcdcc 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -1175,28 +1175,16 @@ class MSA_3R_4R_DESC_BASE { - dag OutOperandList = (outs RCWD:$wd); - dag InOperandList = (ins RCWS:$ws, RCWT:$wt); - string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $wt"); - list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, RCWT:$wt))]; - InstrItinClass Itinerary = itin; -} + RegisterOperand ROWD, RegisterOperand ROWS = ROWD, + RegisterOperand ROWT = ROWD, + InstrItinClass itin = NoItinerary> : + MSA_3R_DESC_BASE; class MSA_3RF_4RF_DESC_BASE { - dag OutOperandList = (outs RCWD:$wd); - dag InOperandList = (ins RCWD:$wd_in, RCWS:$ws, RCWT:$wt); - string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $wt"); - list Pattern = [(set RCWD:$wd, - (OpNode RCWD:$wd_in, RCWS:$ws, RCWT:$wt))]; - InstrItinClass Itinerary = itin; - string Constraints = "$wd = $wd_in"; -} + RegisterOperand ROWD, RegisterOperand ROWS = ROWD, + RegisterOperand ROWT = ROWD, + InstrItinClass itin = NoItinerary> : + MSA_3R_4R_DESC_BASE; class MSA_CBRANCH_DESC_BASE { dag OutOperandList = (outs); @@ -1635,17 +1623,19 @@ class DPSUB_U_D_DESC : MSA_3R_4R_DESC_BASE<"dpsub_u.d", int_mips_dpsub_u_d, MSA128DOpnd, MSA128WOpnd, MSA128WOpnd>; -class FADD_W_DESC : MSA_3RF_DESC_BASE<"fadd.w", fadd, MSA128W>, IsCommutable; -class FADD_D_DESC : MSA_3RF_DESC_BASE<"fadd.d", fadd, MSA128D>, IsCommutable; +class FADD_W_DESC : MSA_3RF_DESC_BASE<"fadd.w", fadd, MSA128WOpnd>, + IsCommutable; +class FADD_D_DESC : MSA_3RF_DESC_BASE<"fadd.d", fadd, MSA128DOpnd>, + IsCommutable; -class FCAF_W_DESC : MSA_3RF_DESC_BASE<"fcaf.w", int_mips_fcaf_w, MSA128W>, +class FCAF_W_DESC : MSA_3RF_DESC_BASE<"fcaf.w", int_mips_fcaf_w, MSA128WOpnd>, IsCommutable; -class FCAF_D_DESC : MSA_3RF_DESC_BASE<"fcaf.d", int_mips_fcaf_d, MSA128D>, +class FCAF_D_DESC : MSA_3RF_DESC_BASE<"fcaf.d", int_mips_fcaf_d, MSA128DOpnd>, IsCommutable; -class FCEQ_W_DESC : MSA_3RF_DESC_BASE<"fceq.w", vfsetoeq_v4f32, MSA128W>, +class FCEQ_W_DESC : MSA_3RF_DESC_BASE<"fceq.w", vfsetoeq_v4f32, MSA128WOpnd>, IsCommutable; -class FCEQ_D_DESC : MSA_3RF_DESC_BASE<"fceq.d", vfsetoeq_v2f64, MSA128D>, +class FCEQ_D_DESC : MSA_3RF_DESC_BASE<"fceq.d", vfsetoeq_v2f64, MSA128DOpnd>, IsCommutable; class FCLASS_W_DESC : MSA_2RF_DESC_BASE<"fclass.w", int_mips_fclass_w, @@ -1653,57 +1643,59 @@ class FCLASS_W_DESC : MSA_2RF_DESC_BASE<"fclass.w", int_mips_fclass_w, class FCLASS_D_DESC : MSA_2RF_DESC_BASE<"fclass.d", int_mips_fclass_d, MSA128DOpnd>; -class FCLE_W_DESC : MSA_3RF_DESC_BASE<"fcle.w", vfsetole_v4f32, MSA128W>; -class FCLE_D_DESC : MSA_3RF_DESC_BASE<"fcle.d", vfsetole_v2f64, MSA128D>; +class FCLE_W_DESC : MSA_3RF_DESC_BASE<"fcle.w", vfsetole_v4f32, MSA128WOpnd>; +class FCLE_D_DESC : MSA_3RF_DESC_BASE<"fcle.d", vfsetole_v2f64, MSA128DOpnd>; -class FCLT_W_DESC : MSA_3RF_DESC_BASE<"fclt.w", vfsetolt_v4f32, MSA128W>; -class FCLT_D_DESC : MSA_3RF_DESC_BASE<"fclt.d", vfsetolt_v2f64, MSA128D>; +class FCLT_W_DESC : MSA_3RF_DESC_BASE<"fclt.w", vfsetolt_v4f32, MSA128WOpnd>; +class FCLT_D_DESC : MSA_3RF_DESC_BASE<"fclt.d", vfsetolt_v2f64, MSA128DOpnd>; -class FCNE_W_DESC : MSA_3RF_DESC_BASE<"fcne.w", vfsetone_v4f32, MSA128W>, +class FCNE_W_DESC : MSA_3RF_DESC_BASE<"fcne.w", vfsetone_v4f32, MSA128WOpnd>, IsCommutable; -class FCNE_D_DESC : MSA_3RF_DESC_BASE<"fcne.d", vfsetone_v2f64, MSA128D>, +class FCNE_D_DESC : MSA_3RF_DESC_BASE<"fcne.d", vfsetone_v2f64, MSA128DOpnd>, IsCommutable; -class FCOR_W_DESC : MSA_3RF_DESC_BASE<"fcor.w", vfsetord_v4f32, MSA128W>, +class FCOR_W_DESC : MSA_3RF_DESC_BASE<"fcor.w", vfsetord_v4f32, MSA128WOpnd>, IsCommutable; -class FCOR_D_DESC : MSA_3RF_DESC_BASE<"fcor.d", vfsetord_v2f64, MSA128D>, +class FCOR_D_DESC : MSA_3RF_DESC_BASE<"fcor.d", vfsetord_v2f64, MSA128DOpnd>, IsCommutable; -class FCUEQ_W_DESC : MSA_3RF_DESC_BASE<"fcueq.w", vfsetueq_v4f32, MSA128W>, +class FCUEQ_W_DESC : MSA_3RF_DESC_BASE<"fcueq.w", vfsetueq_v4f32, MSA128WOpnd>, IsCommutable; -class FCUEQ_D_DESC : MSA_3RF_DESC_BASE<"fcueq.d", vfsetueq_v2f64, MSA128D>, +class FCUEQ_D_DESC : MSA_3RF_DESC_BASE<"fcueq.d", vfsetueq_v2f64, MSA128DOpnd>, IsCommutable; -class FCULE_W_DESC : MSA_3RF_DESC_BASE<"fcule.w", vfsetule_v4f32, MSA128W>, +class FCULE_W_DESC : MSA_3RF_DESC_BASE<"fcule.w", vfsetule_v4f32, MSA128WOpnd>, IsCommutable; -class FCULE_D_DESC : MSA_3RF_DESC_BASE<"fcule.d", vfsetule_v2f64, MSA128D>, +class FCULE_D_DESC : MSA_3RF_DESC_BASE<"fcule.d", vfsetule_v2f64, MSA128DOpnd>, IsCommutable; -class FCULT_W_DESC : MSA_3RF_DESC_BASE<"fcult.w", vfsetult_v4f32, MSA128W>, +class FCULT_W_DESC : MSA_3RF_DESC_BASE<"fcult.w", vfsetult_v4f32, MSA128WOpnd>, IsCommutable; -class FCULT_D_DESC : MSA_3RF_DESC_BASE<"fcult.d", vfsetult_v2f64, MSA128D>, +class FCULT_D_DESC : MSA_3RF_DESC_BASE<"fcult.d", vfsetult_v2f64, MSA128DOpnd>, IsCommutable; -class FCUN_W_DESC : MSA_3RF_DESC_BASE<"fcun.w", vfsetun_v4f32, MSA128W>, +class FCUN_W_DESC : MSA_3RF_DESC_BASE<"fcun.w", vfsetun_v4f32, MSA128WOpnd>, IsCommutable; -class FCUN_D_DESC : MSA_3RF_DESC_BASE<"fcun.d", vfsetun_v2f64, MSA128D>, +class FCUN_D_DESC : MSA_3RF_DESC_BASE<"fcun.d", vfsetun_v2f64, MSA128DOpnd>, IsCommutable; -class FCUNE_W_DESC : MSA_3RF_DESC_BASE<"fcune.w", vfsetune_v4f32, MSA128W>, +class FCUNE_W_DESC : MSA_3RF_DESC_BASE<"fcune.w", vfsetune_v4f32, MSA128WOpnd>, IsCommutable; -class FCUNE_D_DESC : MSA_3RF_DESC_BASE<"fcune.d", vfsetune_v2f64, MSA128D>, +class FCUNE_D_DESC : MSA_3RF_DESC_BASE<"fcune.d", vfsetune_v2f64, MSA128DOpnd>, IsCommutable; -class FDIV_W_DESC : MSA_3RF_DESC_BASE<"fdiv.w", fdiv, MSA128W>; -class FDIV_D_DESC : MSA_3RF_DESC_BASE<"fdiv.d", fdiv, MSA128D>; +class FDIV_W_DESC : MSA_3RF_DESC_BASE<"fdiv.w", fdiv, MSA128WOpnd>; +class FDIV_D_DESC : MSA_3RF_DESC_BASE<"fdiv.d", fdiv, MSA128DOpnd>; class FEXDO_H_DESC : MSA_3RF_DESC_BASE<"fexdo.h", int_mips_fexdo_h, - MSA128H, MSA128W, MSA128W>; + MSA128HOpnd, MSA128WOpnd, MSA128WOpnd>; class FEXDO_W_DESC : MSA_3RF_DESC_BASE<"fexdo.w", int_mips_fexdo_w, - MSA128W, MSA128D, MSA128D>; + MSA128WOpnd, MSA128DOpnd, MSA128DOpnd>; -class FEXP2_W_DESC : MSA_3RF_DESC_BASE<"fexp2.w", int_mips_fexp2_w, MSA128W>; -class FEXP2_D_DESC : MSA_3RF_DESC_BASE<"fexp2.d", int_mips_fexp2_d, MSA128D>; +class FEXP2_W_DESC : MSA_3RF_DESC_BASE<"fexp2.w", int_mips_fexp2_w, + MSA128WOpnd>; +class FEXP2_D_DESC : MSA_3RF_DESC_BASE<"fexp2.d", int_mips_fexp2_d, + MSA128DOpnd>; class FEXUPL_W_DESC : MSA_2RF_DESC_BASE<"fexupl.w", int_mips_fexupl_w, MSA128WOpnd, MSA128HOpnd>; @@ -1746,33 +1738,33 @@ class FLOG2_W_DESC : MSA_2RF_DESC_BASE<"flog2.w", flog2, MSA128WOpnd>; class FLOG2_D_DESC : MSA_2RF_DESC_BASE<"flog2.d", flog2, MSA128DOpnd>; class FMADD_W_DESC : MSA_3RF_4RF_DESC_BASE<"fmadd.w", int_mips_fmadd_w, - MSA128W>; + MSA128WOpnd>; class FMADD_D_DESC : MSA_3RF_4RF_DESC_BASE<"fmadd.d", int_mips_fmadd_d, - MSA128D>; + MSA128DOpnd>; -class FMAX_W_DESC : MSA_3RF_DESC_BASE<"fmax.w", int_mips_fmax_w, MSA128W>; -class FMAX_D_DESC : MSA_3RF_DESC_BASE<"fmax.d", int_mips_fmax_d, MSA128D>; +class FMAX_W_DESC : MSA_3RF_DESC_BASE<"fmax.w", int_mips_fmax_w, MSA128WOpnd>; +class FMAX_D_DESC : MSA_3RF_DESC_BASE<"fmax.d", int_mips_fmax_d, MSA128DOpnd>; class FMAX_A_W_DESC : MSA_3RF_DESC_BASE<"fmax_a.w", int_mips_fmax_a_w, - MSA128W>; + MSA128WOpnd>; class FMAX_A_D_DESC : MSA_3RF_DESC_BASE<"fmax_a.d", int_mips_fmax_a_d, - MSA128D>; + MSA128DOpnd>; -class FMIN_W_DESC : MSA_3RF_DESC_BASE<"fmin.w", int_mips_fmin_w, MSA128W>; -class FMIN_D_DESC : MSA_3RF_DESC_BASE<"fmin.d", int_mips_fmin_d, MSA128D>; +class FMIN_W_DESC : MSA_3RF_DESC_BASE<"fmin.w", int_mips_fmin_w, MSA128WOpnd>; +class FMIN_D_DESC : MSA_3RF_DESC_BASE<"fmin.d", int_mips_fmin_d, MSA128DOpnd>; class FMIN_A_W_DESC : MSA_3RF_DESC_BASE<"fmin_a.w", int_mips_fmin_a_w, - MSA128W>; + MSA128WOpnd>; class FMIN_A_D_DESC : MSA_3RF_DESC_BASE<"fmin_a.d", int_mips_fmin_a_d, - MSA128D>; + MSA128DOpnd>; class FMSUB_W_DESC : MSA_3RF_4RF_DESC_BASE<"fmsub.w", int_mips_fmsub_w, - MSA128W>; + MSA128WOpnd>; class FMSUB_D_DESC : MSA_3RF_4RF_DESC_BASE<"fmsub.d", int_mips_fmsub_d, - MSA128D>; + MSA128DOpnd>; -class FMUL_W_DESC : MSA_3RF_DESC_BASE<"fmul.w", fmul, MSA128W>; -class FMUL_D_DESC : MSA_3RF_DESC_BASE<"fmul.d", fmul, MSA128D>; +class FMUL_W_DESC : MSA_3RF_DESC_BASE<"fmul.w", fmul, MSA128WOpnd>; +class FMUL_D_DESC : MSA_3RF_DESC_BASE<"fmul.d", fmul, MSA128DOpnd>; class FRINT_W_DESC : MSA_2RF_DESC_BASE<"frint.w", frint, MSA128WOpnd>; class FRINT_D_DESC : MSA_2RF_DESC_BASE<"frint.d", frint, MSA128DOpnd>; @@ -1785,44 +1777,54 @@ class FRSQRT_W_DESC : MSA_2RF_DESC_BASE<"frsqrt.w", int_mips_frsqrt_w, class FRSQRT_D_DESC : MSA_2RF_DESC_BASE<"frsqrt.d", int_mips_frsqrt_d, MSA128DOpnd>; -class FSAF_W_DESC : MSA_3RF_DESC_BASE<"fsaf.w", int_mips_fsaf_w, MSA128W>; -class FSAF_D_DESC : MSA_3RF_DESC_BASE<"fsaf.d", int_mips_fsaf_d, MSA128D>; +class FSAF_W_DESC : MSA_3RF_DESC_BASE<"fsaf.w", int_mips_fsaf_w, MSA128WOpnd>; +class FSAF_D_DESC : MSA_3RF_DESC_BASE<"fsaf.d", int_mips_fsaf_d, MSA128DOpnd>; -class FSEQ_W_DESC : MSA_3RF_DESC_BASE<"fseq.w", int_mips_fseq_w, MSA128W>; -class FSEQ_D_DESC : MSA_3RF_DESC_BASE<"fseq.d", int_mips_fseq_d, MSA128D>; +class FSEQ_W_DESC : MSA_3RF_DESC_BASE<"fseq.w", int_mips_fseq_w, MSA128WOpnd>; +class FSEQ_D_DESC : MSA_3RF_DESC_BASE<"fseq.d", int_mips_fseq_d, MSA128DOpnd>; -class FSLE_W_DESC : MSA_3RF_DESC_BASE<"fsle.w", int_mips_fsle_w, MSA128W>; -class FSLE_D_DESC : MSA_3RF_DESC_BASE<"fsle.d", int_mips_fsle_d, MSA128D>; +class FSLE_W_DESC : MSA_3RF_DESC_BASE<"fsle.w", int_mips_fsle_w, MSA128WOpnd>; +class FSLE_D_DESC : MSA_3RF_DESC_BASE<"fsle.d", int_mips_fsle_d, MSA128DOpnd>; -class FSLT_W_DESC : MSA_3RF_DESC_BASE<"fslt.w", int_mips_fslt_w, MSA128W>; -class FSLT_D_DESC : MSA_3RF_DESC_BASE<"fslt.d", int_mips_fslt_d, MSA128D>; +class FSLT_W_DESC : MSA_3RF_DESC_BASE<"fslt.w", int_mips_fslt_w, MSA128WOpnd>; +class FSLT_D_DESC : MSA_3RF_DESC_BASE<"fslt.d", int_mips_fslt_d, MSA128DOpnd>; -class FSNE_W_DESC : MSA_3RF_DESC_BASE<"fsne.w", int_mips_fsne_w, MSA128W>; -class FSNE_D_DESC : MSA_3RF_DESC_BASE<"fsne.d", int_mips_fsne_d, MSA128D>; +class FSNE_W_DESC : MSA_3RF_DESC_BASE<"fsne.w", int_mips_fsne_w, MSA128WOpnd>; +class FSNE_D_DESC : MSA_3RF_DESC_BASE<"fsne.d", int_mips_fsne_d, MSA128DOpnd>; -class FSOR_W_DESC : MSA_3RF_DESC_BASE<"fsor.w", int_mips_fsor_w, MSA128W>; -class FSOR_D_DESC : MSA_3RF_DESC_BASE<"fsor.d", int_mips_fsor_d, MSA128D>; +class FSOR_W_DESC : MSA_3RF_DESC_BASE<"fsor.w", int_mips_fsor_w, MSA128WOpnd>; +class FSOR_D_DESC : MSA_3RF_DESC_BASE<"fsor.d", int_mips_fsor_d, MSA128DOpnd>; class FSQRT_W_DESC : MSA_2RF_DESC_BASE<"fsqrt.w", fsqrt, MSA128WOpnd>; class FSQRT_D_DESC : MSA_2RF_DESC_BASE<"fsqrt.d", fsqrt, MSA128DOpnd>; -class FSUB_W_DESC : MSA_3RF_DESC_BASE<"fsub.w", fsub, MSA128W>; -class FSUB_D_DESC : MSA_3RF_DESC_BASE<"fsub.d", fsub, MSA128D>; +class FSUB_W_DESC : MSA_3RF_DESC_BASE<"fsub.w", fsub, MSA128WOpnd>; +class FSUB_D_DESC : MSA_3RF_DESC_BASE<"fsub.d", fsub, MSA128DOpnd>; -class FSUEQ_W_DESC : MSA_3RF_DESC_BASE<"fsueq.w", int_mips_fsueq_w, MSA128W>; -class FSUEQ_D_DESC : MSA_3RF_DESC_BASE<"fsueq.d", int_mips_fsueq_d, MSA128D>; +class FSUEQ_W_DESC : MSA_3RF_DESC_BASE<"fsueq.w", int_mips_fsueq_w, + MSA128WOpnd>; +class FSUEQ_D_DESC : MSA_3RF_DESC_BASE<"fsueq.d", int_mips_fsueq_d, + MSA128DOpnd>; -class FSULE_W_DESC : MSA_3RF_DESC_BASE<"fsule.w", int_mips_fsule_w, MSA128W>; -class FSULE_D_DESC : MSA_3RF_DESC_BASE<"fsule.d", int_mips_fsule_d, MSA128D>; +class FSULE_W_DESC : MSA_3RF_DESC_BASE<"fsule.w", int_mips_fsule_w, + MSA128WOpnd>; +class FSULE_D_DESC : MSA_3RF_DESC_BASE<"fsule.d", int_mips_fsule_d, + MSA128DOpnd>; -class FSULT_W_DESC : MSA_3RF_DESC_BASE<"fsult.w", int_mips_fsult_w, MSA128W>; -class FSULT_D_DESC : MSA_3RF_DESC_BASE<"fsult.d", int_mips_fsult_d, MSA128D>; +class FSULT_W_DESC : MSA_3RF_DESC_BASE<"fsult.w", int_mips_fsult_w, + MSA128WOpnd>; +class FSULT_D_DESC : MSA_3RF_DESC_BASE<"fsult.d", int_mips_fsult_d, + MSA128DOpnd>; -class FSUN_W_DESC : MSA_3RF_DESC_BASE<"fsun.w", int_mips_fsun_w, MSA128W>; -class FSUN_D_DESC : MSA_3RF_DESC_BASE<"fsun.d", int_mips_fsun_d, MSA128D>; +class FSUN_W_DESC : MSA_3RF_DESC_BASE<"fsun.w", int_mips_fsun_w, + MSA128WOpnd>; +class FSUN_D_DESC : MSA_3RF_DESC_BASE<"fsun.d", int_mips_fsun_d, + MSA128DOpnd>; -class FSUNE_W_DESC : MSA_3RF_DESC_BASE<"fsune.w", int_mips_fsune_w, MSA128W>; -class FSUNE_D_DESC : MSA_3RF_DESC_BASE<"fsune.d", int_mips_fsune_d, MSA128D>; +class FSUNE_W_DESC : MSA_3RF_DESC_BASE<"fsune.w", int_mips_fsune_w, + MSA128WOpnd>; +class FSUNE_D_DESC : MSA_3RF_DESC_BASE<"fsune.d", int_mips_fsune_d, + MSA128DOpnd>; class FTRUNC_S_W_DESC : MSA_2RF_DESC_BASE<"ftrunc_s.w", int_mips_ftrunc_s_w, MSA128WOpnd>; @@ -1845,9 +1847,9 @@ class FTINT_U_D_DESC : MSA_2RF_DESC_BASE<"ftint_u.d", int_mips_ftint_u_d, MSA128DOpnd>; class FTQ_H_DESC : MSA_3RF_DESC_BASE<"ftq.h", int_mips_ftq_h, - MSA128H, MSA128W, MSA128W>; + MSA128HOpnd, MSA128WOpnd, MSA128WOpnd>; class FTQ_W_DESC : MSA_3RF_DESC_BASE<"ftq.w", int_mips_ftq_w, - MSA128W, MSA128D, MSA128D>; + MSA128WOpnd, MSA128DOpnd, MSA128DOpnd>; class HADD_S_H_DESC : MSA_3R_DESC_BASE<"hadd_s.h", int_mips_hadd_s_h, MSA128HOpnd, MSA128BOpnd, MSA128BOpnd>; @@ -1947,14 +1949,14 @@ class LDX_W_DESC : LDX_DESC_BASE<"ldx.w", load, v4i32, MSA128W>; class LDX_D_DESC : LDX_DESC_BASE<"ldx.d", load, v2i64, MSA128D>; class MADD_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"madd_q.h", int_mips_madd_q_h, - MSA128H>; + MSA128HOpnd>; class MADD_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"madd_q.w", int_mips_madd_q_w, - MSA128W>; + MSA128WOpnd>; class MADDR_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"maddr_q.h", int_mips_maddr_q_h, - MSA128H>; + MSA128HOpnd>; class MADDR_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"maddr_q.w", int_mips_maddr_q_w, - MSA128W>; + MSA128WOpnd>; class MADDV_B_DESC : MSA_3R_4R_DESC_BASE<"maddv.b", int_mips_maddv_b, MSA128BOpnd>; @@ -2050,14 +2052,14 @@ class MOVE_V_DESC { } class MSUB_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"msub_q.h", int_mips_msub_q_h, - MSA128H>; + MSA128HOpnd>; class MSUB_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"msub_q.w", int_mips_msub_q_w, - MSA128W>; + MSA128WOpnd>; class MSUBR_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"msubr_q.h", int_mips_msubr_q_h, - MSA128H>; + MSA128HOpnd>; class MSUBR_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"msubr_q.w", int_mips_msubr_q_w, - MSA128W>; + MSA128WOpnd>; class MSUBV_B_DESC : MSA_3R_4R_DESC_BASE<"msubv.b", int_mips_msubv_b, MSA128BOpnd>; @@ -2068,13 +2070,15 @@ class MSUBV_W_DESC : MSA_3R_4R_DESC_BASE<"msubv.w", int_mips_msubv_w, class MSUBV_D_DESC : MSA_3R_4R_DESC_BASE<"msubv.d", int_mips_msubv_d, MSA128DOpnd>; -class MUL_Q_H_DESC : MSA_3RF_DESC_BASE<"mul_q.h", int_mips_mul_q_h, MSA128H>; -class MUL_Q_W_DESC : MSA_3RF_DESC_BASE<"mul_q.w", int_mips_mul_q_w, MSA128W>; +class MUL_Q_H_DESC : MSA_3RF_DESC_BASE<"mul_q.h", int_mips_mul_q_h, + MSA128HOpnd>; +class MUL_Q_W_DESC : MSA_3RF_DESC_BASE<"mul_q.w", int_mips_mul_q_w, + MSA128WOpnd>; class MULR_Q_H_DESC : MSA_3RF_DESC_BASE<"mulr_q.h", int_mips_mulr_q_h, - MSA128H>; + MSA128HOpnd>; class MULR_Q_W_DESC : MSA_3RF_DESC_BASE<"mulr_q.w", int_mips_mulr_q_w, - MSA128W>; + MSA128WOpnd>; class MULV_B_DESC : MSA_3R_DESC_BASE<"mulv.b", mul, MSA128BOpnd>; class MULV_H_DESC : MSA_3R_DESC_BASE<"mulv.h", mul, MSA128HOpnd>; @@ -3121,19 +3125,20 @@ def ST_FW : MSAPat<(store (v4f32 MSA128W:$ws), addrRegImm:$addr), def ST_FD : MSAPat<(store (v2f64 MSA128D:$ws), addrRegImm:$addr), (ST_D MSA128D:$ws, addrRegImm:$addr)>; -class MSA_FABS_PSEUDO_DESC_BASE : - MipsPseudo<(outs RCWD:$wd), - (ins RCWS:$ws), - [(set RCWD:$wd, (fabs RCWS:$ws))]> { + MipsPseudo<(outs ROWD:$wd), + (ins ROWS:$ws), + [(set ROWD:$wd, (fabs ROWS:$ws))]> { InstrItinClass Itinerary = itin; } -def FABS_W : MSA_FABS_PSEUDO_DESC_BASE, - PseudoInstExpansion<(FMAX_A_W MSA128W:$wd, MSA128W:$ws, - MSA128W:$ws)>; -def FABS_D : MSA_FABS_PSEUDO_DESC_BASE, - PseudoInstExpansion<(FMAX_A_D MSA128D:$wd, MSA128D:$ws, - MSA128D:$ws)>; +def FABS_W : MSA_FABS_PSEUDO_DESC_BASE, + PseudoInstExpansion<(FMAX_A_W MSA128WOpnd:$wd, MSA128WOpnd:$ws, + MSA128WOpnd:$ws)>; +def FABS_D : MSA_FABS_PSEUDO_DESC_BASE, + PseudoInstExpansion<(FMAX_A_D MSA128DOpnd:$wd, MSA128DOpnd:$ws, + MSA128DOpnd:$ws)>; class MSABitconvertPat preds = [HasMSA]> : -- cgit v1.1 From d7f5fac1117613ff6dd0e49308d2954ac10b4f1f Mon Sep 17 00:00:00 2001 From: Yunzhong Gao Date: Fri, 27 Sep 2013 01:44:23 +0000 Subject: Fixing Intel format of the vshufpd instruction. Phabricator code review is located at: http://llvm-reviews.chandlerc.com/D1759 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191481 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index be0f6c5..8de1336 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2553,10 +2553,10 @@ defm VSHUFPSY : sse12_shuffle, TB, VEX_4V, VEX_L; defm VSHUFPD : sse12_shuffle, TB, OpSize, VEX_4V; defm VSHUFPDY : sse12_shuffle, TB, OpSize, VEX_4V, VEX_L; let Constraints = "$src1 = $dst" in { -- cgit v1.1 From 11989c2685bfaa1ba639436292d067fb32eda990 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 27 Sep 2013 07:11:17 +0000 Subject: Removal some duplicate patterns. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191488 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 8 -------- 1 file changed, 8 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 8de1336..abc99f6 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -4710,8 +4710,6 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), XS, Requires<[UseSSE2]>, Sched<[WriteLoad]>; let Predicates = [UseAVX], AddedComplexity = 20 in { - def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), - (VMOVZQI2PQIrm addr:$src)>; def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))), (VMOVZQI2PQIrm addr:$src)>; def : Pat<(v2i64 (X86vzload addr:$src)), @@ -4719,8 +4717,6 @@ let Predicates = [UseAVX], AddedComplexity = 20 in { } let Predicates = [UseSSE2], AddedComplexity = 20 in { - def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), - (MOVZQI2PQIrm addr:$src)>; def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))), (MOVZQI2PQIrm addr:$src)>; def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>; @@ -4772,14 +4768,10 @@ def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), let AddedComplexity = 20 in { let Predicates = [UseAVX] in { - def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), - (VMOVZPQILo2PQIrm addr:$src)>; def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), (VMOVZPQILo2PQIrr VR128:$src)>; } let Predicates = [UseSSE2] in { - def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), - (MOVZPQILo2PQIrm addr:$src)>; def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), (MOVZPQILo2PQIrr VR128:$src)>; } -- cgit v1.1 From 1f31a3794ef01d51efa23b892e76565ac23f3029 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 27 Sep 2013 07:16:24 +0000 Subject: Switch HasAVX to UseAVX in one spot to ensure that AVX512 form of VINSERTPS is used in AVX512 mode. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191489 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index abc99f6..3d86549 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -6299,7 +6299,7 @@ multiclass SS41I_insertf32 opc, string asm, bit Is2Addr = 1, } let ExeDomain = SSEPackedSingle in { - let Predicates = [HasAVX] in + let Predicates = [UseAVX] in defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V; let Constraints = "$src1 = $dst" in defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1, SSE_INSERT_ITINS>; -- cgit v1.1 From 62f6fac1c500c9d6cb287db395d9ab3cbd985de9 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 27 Sep 2013 07:20:47 +0000 Subject: Put HasAVX512 predicate on some patterns to properly disable them when AVX512 isn't enabled. Currently it works simply because the SSE and AVX version of the same patterns are checked first in the DAG isel table. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191490 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 884d725..c9901bc 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1997,15 +1997,17 @@ def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst), [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))], IIC_SSE_MOV_LH>, EVEX_4V; -// MOVLHPS patterns -def : Pat<(v4i32 (X86Movlhps VR128X:$src1, VR128X:$src2)), - (VMOVLHPSZrr VR128X:$src1, VR128X:$src2)>; -def : Pat<(v2i64 (X86Movlhps VR128X:$src1, VR128X:$src2)), - (VMOVLHPSZrr (v2i64 VR128X:$src1), VR128X:$src2)>; - -// MOVHLPS patterns -def : Pat<(v4i32 (X86Movhlps VR128X:$src1, VR128X:$src2)), - (VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>; +let Predicates = [HasAVX512] in { + // MOVLHPS patterns + def : Pat<(v4i32 (X86Movlhps VR128X:$src1, VR128X:$src2)), + (VMOVLHPSZrr VR128X:$src1, VR128X:$src2)>; + def : Pat<(v2i64 (X86Movlhps VR128X:$src1, VR128X:$src2)), + (VMOVLHPSZrr (v2i64 VR128X:$src1), VR128X:$src2)>; + + // MOVHLPS patterns + def : Pat<(v4i32 (X86Movhlps VR128X:$src1, VR128X:$src2)), + (VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>; +} //===----------------------------------------------------------------------===// // FMA - Fused Multiply Operations -- cgit v1.1 From 86f309b4d1426b54f23d6ba53d3f5c8a1aa3985b Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Fri, 27 Sep 2013 09:25:29 +0000 Subject: [mips][msa] Added missing check in performSRACombine Reviewers: jacksprat, dsanders Reviewed By: dsanders Differential Revision: http://llvm-reviews.chandlerc.com/D1755 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191495 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsSEISelLowering.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index e86f4cb..721b58a9 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -609,6 +609,10 @@ static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, if (!ShAmount) return SDValue(); + if (Op0Op0->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT && + Op0Op0->getOpcode() != MipsISD::VEXTRACT_ZEXT_ELT) + return SDValue(); + EVT ExtendTy = cast(Op0Op0->getOperand(2))->getVT(); unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits(); -- cgit v1.1 From d2a31a124f3bebbdfc4d886afe33a116893aa689 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Fri, 27 Sep 2013 09:44:59 +0000 Subject: [mips][msa] Expand all truncstores and loadexts for MSA as well as DSP git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191496 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsSEISelLowering.cpp | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 721b58a9..2c316e3 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -41,6 +41,22 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) if (HasMips64) addRegisterClass(MVT::i64, &Mips::GPR64RegClass); + if (Subtarget->hasDSP() || Subtarget->hasMSA()) { + // Expand all truncating stores and extending loads. + unsigned FirstVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + unsigned LastVT = (unsigned)MVT::LAST_VECTOR_VALUETYPE; + + for (unsigned VT0 = FirstVT; VT0 <= LastVT; ++VT0) { + for (unsigned VT1 = FirstVT; VT1 <= LastVT; ++VT1) + setTruncStoreAction((MVT::SimpleValueType)VT0, + (MVT::SimpleValueType)VT1, Expand); + + setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT0, Expand); + setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT0, Expand); + setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT0, Expand); + } + } + if (Subtarget->hasDSP()) { MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; @@ -58,20 +74,6 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::BITCAST, VecTys[i], Legal); } - // Expand all truncating stores and extending loads. - unsigned FirstVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - unsigned LastVT = (unsigned)MVT::LAST_VECTOR_VALUETYPE; - - for (unsigned VT0 = FirstVT; VT0 <= LastVT; ++VT0) { - for (unsigned VT1 = FirstVT; VT1 <= LastVT; ++VT1) - setTruncStoreAction((MVT::SimpleValueType)VT0, - (MVT::SimpleValueType)VT1, Expand); - - setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT0, Expand); - setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT0, Expand); - setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT0, Expand); - } - setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::SRL); -- cgit v1.1 From f1ef27e6e308435035ffec112a6474ed5e009484 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Fri, 27 Sep 2013 10:08:31 +0000 Subject: [mips][msa] MSA requires FR=1 mode (64-bit FPU register file). Report fatal error when using it in FR=0 mode. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191498 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsSubtarget.cpp | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 91e2535..3a12270 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -89,6 +89,11 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, (hasMips64() && (isABI_N32() || isABI_N64()))) && "Invalid Arch & ABI pair."); + if (hasMSA() && !isFP64bit()) + report_fatal_error("MSA requires a 64-bit FPU register file (FR=1 mode). " + "See -mattr=+fp64.", + false); + // Is the target system Linux ? if (TT.find("linux") == std::string::npos) IsLinux = false; -- cgit v1.1 From ba616ef0236a11239a0a2c174627dcdc4ab63434 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Fri, 27 Sep 2013 10:25:41 +0000 Subject: [mips][msa] Tidy up lowerMSABinaryIntr, lowerMSABinaryImmIntr, lowerMSABranchIntr, and lowerMSAUnaryIntr were trivially small functions. Inlined them into their callers. lowerMSASplat now takes its callers SDLoc instead of making a new one. No functional change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191503 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsSEISelLowering.cpp | 250 ++++++++++++++++----------------- 1 file changed, 119 insertions(+), 131 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 2c316e3..51682f0 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -1025,35 +1025,6 @@ static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { return DAG.getMergeValues(Vals, 2, DL); } -static SDValue lowerMSABinaryIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { - SDLoc DL(Op); - SDValue LHS = Op->getOperand(1); - SDValue RHS = Op->getOperand(2); - EVT ResTy = Op->getValueType(0); - - SDValue Result = DAG.getNode(Opc, DL, ResTy, LHS, RHS); - - return Result; -} - -static SDValue lowerMSABinaryImmIntr(SDValue Op, SelectionDAG &DAG, - unsigned Opc, SDValue RHS) { - SDValue LHS = Op->getOperand(1); - EVT ResTy = Op->getValueType(0); - - return DAG.getNode(Opc, SDLoc(Op), ResTy, LHS, RHS); -} - -static SDValue lowerMSABranchIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { - SDLoc DL(Op); - SDValue Value = Op->getOperand(1); - EVT ResTy = Op->getValueType(0); - - SDValue Result = DAG.getNode(Opc, DL, ResTy, Value); - - return Result; -} - // Lower an MSA copy intrinsic into the specified SelectionDAG node static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { SDLoc DL(Op); @@ -1081,12 +1052,11 @@ static SDValue lowerMSAInsertIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { return Result; } -static SDValue lowerMSASplatImm(SDValue Op, SDValue ImmOp, SelectionDAG &DAG) { - EVT ResTy = Op->getValueType(0); +static SDValue +lowerMSASplatImm(SDLoc DL, EVT ResTy, SDValue ImmOp, SelectionDAG &DAG) { EVT ViaVecTy = ResTy; SmallVector Ops; SDValue ImmHiOp; - SDLoc DL(Op); if (ViaVecTy == MVT::v2i64) { ImmHiOp = DAG.getNode(ISD::SRA, DL, MVT::i32, ImmOp, @@ -1112,21 +1082,14 @@ static SDValue lowerMSASplatImm(SDValue Op, SDValue ImmOp, SelectionDAG &DAG) { static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG) { - return lowerMSASplatImm(Op, Op->getOperand(ImmOp), DAG); -} - -static SDValue lowerMSAUnaryIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { - SDLoc DL(Op); - SDValue Value = Op->getOperand(1); - EVT ResTy = Op->getValueType(0); - - SDValue Result = DAG.getNode(Opc, DL, ResTy, Value); - - return Result; + return lowerMSASplatImm(SDLoc(Op), Op->getValueType(0), + Op->getOperand(ImmOp), DAG); } SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + switch (cast(Op->getOperand(0))->getZExtValue()) { default: return SDValue(); @@ -1166,99 +1129,105 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_addv_h: case Intrinsic::mips_addv_w: case Intrinsic::mips_addv_d: - return lowerMSABinaryIntr(Op, DAG, ISD::ADD); + return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); case Intrinsic::mips_addvi_b: case Intrinsic::mips_addvi_h: case Intrinsic::mips_addvi_w: case Intrinsic::mips_addvi_d: - return lowerMSABinaryImmIntr(Op, DAG, ISD::ADD, - lowerMSASplatImm(Op, 2, DAG)); + return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), + lowerMSASplatImm(Op, 2, DAG)); case Intrinsic::mips_and_v: - return lowerMSABinaryIntr(Op, DAG, ISD::AND); + return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); case Intrinsic::mips_andi_b: - return lowerMSABinaryImmIntr(Op, DAG, ISD::AND, - lowerMSASplatImm(Op, 2, DAG)); + return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), + lowerMSASplatImm(Op, 2, DAG)); case Intrinsic::mips_bnz_b: case Intrinsic::mips_bnz_h: case Intrinsic::mips_bnz_w: case Intrinsic::mips_bnz_d: - return lowerMSABranchIntr(Op, DAG, MipsISD::VALL_NONZERO); + return DAG.getNode(MipsISD::VALL_NONZERO, DL, Op->getValueType(0), + Op->getOperand(1)); case Intrinsic::mips_bnz_v: - return lowerMSABranchIntr(Op, DAG, MipsISD::VANY_NONZERO); + return DAG.getNode(MipsISD::VANY_NONZERO, DL, Op->getValueType(0), + Op->getOperand(1)); case Intrinsic::mips_bsel_v: - return DAG.getNode(ISD::VSELECT, SDLoc(Op), Op->getValueType(0), + return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); case Intrinsic::mips_bseli_b: - return DAG.getNode(ISD::VSELECT, SDLoc(Op), Op->getValueType(0), + return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2), lowerMSASplatImm(Op, 3, DAG)); case Intrinsic::mips_bz_b: case Intrinsic::mips_bz_h: case Intrinsic::mips_bz_w: case Intrinsic::mips_bz_d: - return lowerMSABranchIntr(Op, DAG, MipsISD::VALL_ZERO); + return DAG.getNode(MipsISD::VALL_ZERO, DL, Op->getValueType(0), + Op->getOperand(1)); case Intrinsic::mips_bz_v: - return lowerMSABranchIntr(Op, DAG, MipsISD::VANY_ZERO); + return DAG.getNode(MipsISD::VANY_ZERO, DL, Op->getValueType(0), + Op->getOperand(1)); case Intrinsic::mips_ceq_b: case Intrinsic::mips_ceq_h: case Intrinsic::mips_ceq_w: case Intrinsic::mips_ceq_d: - return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2), ISD::SETEQ); case Intrinsic::mips_ceqi_b: case Intrinsic::mips_ceqi_h: case Intrinsic::mips_ceqi_w: case Intrinsic::mips_ceqi_d: - return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG), ISD::SETEQ); case Intrinsic::mips_cle_s_b: case Intrinsic::mips_cle_s_h: case Intrinsic::mips_cle_s_w: case Intrinsic::mips_cle_s_d: - return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2), ISD::SETLE); case Intrinsic::mips_clei_s_b: case Intrinsic::mips_clei_s_h: case Intrinsic::mips_clei_s_w: case Intrinsic::mips_clei_s_d: - return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG), ISD::SETLE); case Intrinsic::mips_cle_u_b: case Intrinsic::mips_cle_u_h: case Intrinsic::mips_cle_u_w: case Intrinsic::mips_cle_u_d: - return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2), ISD::SETULE); case Intrinsic::mips_clei_u_b: case Intrinsic::mips_clei_u_h: case Intrinsic::mips_clei_u_w: case Intrinsic::mips_clei_u_d: - return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG), ISD::SETULE); case Intrinsic::mips_clt_s_b: case Intrinsic::mips_clt_s_h: case Intrinsic::mips_clt_s_w: case Intrinsic::mips_clt_s_d: - return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2), ISD::SETLT); case Intrinsic::mips_clti_s_b: case Intrinsic::mips_clti_s_h: case Intrinsic::mips_clti_s_w: case Intrinsic::mips_clti_s_d: - return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG), ISD::SETLT); case Intrinsic::mips_clt_u_b: case Intrinsic::mips_clt_u_h: case Intrinsic::mips_clt_u_w: case Intrinsic::mips_clt_u_d: - return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2), ISD::SETULT); case Intrinsic::mips_clti_u_b: case Intrinsic::mips_clti_u_h: case Intrinsic::mips_clti_u_w: case Intrinsic::mips_clti_u_d: - return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG), ISD::SETULT); case Intrinsic::mips_copy_s_b: case Intrinsic::mips_copy_s_h: @@ -1272,59 +1241,63 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_div_s_h: case Intrinsic::mips_div_s_w: case Intrinsic::mips_div_s_d: - return lowerMSABinaryIntr(Op, DAG, ISD::SDIV); + return DAG.getNode(ISD::SDIV, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); case Intrinsic::mips_div_u_b: case Intrinsic::mips_div_u_h: case Intrinsic::mips_div_u_w: case Intrinsic::mips_div_u_d: - return lowerMSABinaryIntr(Op, DAG, ISD::UDIV); + return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); case Intrinsic::mips_fadd_w: case Intrinsic::mips_fadd_d: - return lowerMSABinaryIntr(Op, DAG, ISD::FADD); + return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away case Intrinsic::mips_fceq_w: case Intrinsic::mips_fceq_d: - return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2), ISD::SETOEQ); case Intrinsic::mips_fcle_w: case Intrinsic::mips_fcle_d: - return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2), ISD::SETOLE); case Intrinsic::mips_fclt_w: case Intrinsic::mips_fclt_d: - return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2), ISD::SETOLT); case Intrinsic::mips_fcne_w: case Intrinsic::mips_fcne_d: - return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2), ISD::SETONE); case Intrinsic::mips_fcor_w: case Intrinsic::mips_fcor_d: - return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2), ISD::SETO); case Intrinsic::mips_fcueq_w: case Intrinsic::mips_fcueq_d: - return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2), ISD::SETUEQ); case Intrinsic::mips_fcule_w: case Intrinsic::mips_fcule_d: - return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2), ISD::SETULE); case Intrinsic::mips_fcult_w: case Intrinsic::mips_fcult_d: - return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2), ISD::SETULT); case Intrinsic::mips_fcun_w: case Intrinsic::mips_fcun_d: - return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2), ISD::SETUO); case Intrinsic::mips_fcune_w: case Intrinsic::mips_fcune_d: - return DAG.getSetCC(SDLoc(Op), Op->getValueType(0), Op->getOperand(1), + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2), ISD::SETUNE); case Intrinsic::mips_fdiv_w: case Intrinsic::mips_fdiv_d: - return lowerMSABinaryIntr(Op, DAG, ISD::FDIV); + return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); case Intrinsic::mips_fill_b: case Intrinsic::mips_fill_h: case Intrinsic::mips_fill_w: { @@ -1334,47 +1307,49 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, for (unsigned i = 0; i < ResTy.getVectorNumElements(); ++i) Ops.push_back(Op->getOperand(1)); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), ResTy, &Ops[0], + return DAG.getNode(ISD::BUILD_VECTOR, DL, ResTy, &Ops[0], Ops.size()); } case Intrinsic::mips_flog2_w: case Intrinsic::mips_flog2_d: - return lowerMSAUnaryIntr(Op, DAG, ISD::FLOG2); + return DAG.getNode(ISD::FLOG2, DL, Op->getValueType(0), Op->getOperand(1)); case Intrinsic::mips_fmul_w: case Intrinsic::mips_fmul_d: - return lowerMSABinaryIntr(Op, DAG, ISD::FMUL); + return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); case Intrinsic::mips_frint_w: case Intrinsic::mips_frint_d: - return lowerMSAUnaryIntr(Op, DAG, ISD::FRINT); + return DAG.getNode(ISD::FRINT, DL, Op->getValueType(0), Op->getOperand(1)); case Intrinsic::mips_fsqrt_w: case Intrinsic::mips_fsqrt_d: - return lowerMSAUnaryIntr(Op, DAG, ISD::FSQRT); + return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1)); case Intrinsic::mips_fsub_w: case Intrinsic::mips_fsub_d: - return lowerMSABinaryIntr(Op, DAG, ISD::FSUB); + return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); case Intrinsic::mips_ilvev_b: case Intrinsic::mips_ilvev_h: case Intrinsic::mips_ilvev_w: case Intrinsic::mips_ilvev_d: - return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), Op->getValueType(0), + return DAG.getNode(MipsISD::ILVEV, DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2)); case Intrinsic::mips_ilvl_b: case Intrinsic::mips_ilvl_h: case Intrinsic::mips_ilvl_w: case Intrinsic::mips_ilvl_d: - return DAG.getNode(MipsISD::ILVL, SDLoc(Op), Op->getValueType(0), + return DAG.getNode(MipsISD::ILVL, DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2)); case Intrinsic::mips_ilvod_b: case Intrinsic::mips_ilvod_h: case Intrinsic::mips_ilvod_w: case Intrinsic::mips_ilvod_d: - return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), Op->getValueType(0), + return DAG.getNode(MipsISD::ILVOD, DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2)); case Intrinsic::mips_ilvr_b: case Intrinsic::mips_ilvr_h: case Intrinsic::mips_ilvr_w: case Intrinsic::mips_ilvr_d: - return DAG.getNode(MipsISD::ILVR, SDLoc(Op), Op->getValueType(0), + return DAG.getNode(MipsISD::ILVR, DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2)); case Intrinsic::mips_insert_b: case Intrinsic::mips_insert_h: @@ -1389,147 +1364,160 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_max_s_h: case Intrinsic::mips_max_s_w: case Intrinsic::mips_max_s_d: - return lowerMSABinaryIntr(Op, DAG, MipsISD::VSMAX); + return DAG.getNode(MipsISD::VSMAX, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); case Intrinsic::mips_max_u_b: case Intrinsic::mips_max_u_h: case Intrinsic::mips_max_u_w: case Intrinsic::mips_max_u_d: - return lowerMSABinaryIntr(Op, DAG, MipsISD::VUMAX); + return DAG.getNode(MipsISD::VUMAX, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); case Intrinsic::mips_maxi_s_b: case Intrinsic::mips_maxi_s_h: case Intrinsic::mips_maxi_s_w: case Intrinsic::mips_maxi_s_d: - return lowerMSABinaryImmIntr(Op, DAG, MipsISD::VSMAX, - lowerMSASplatImm(Op, 2, DAG)); + return DAG.getNode(MipsISD::VSMAX, DL, Op->getValueType(0), + Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); case Intrinsic::mips_maxi_u_b: case Intrinsic::mips_maxi_u_h: case Intrinsic::mips_maxi_u_w: case Intrinsic::mips_maxi_u_d: - return lowerMSABinaryImmIntr(Op, DAG, MipsISD::VUMAX, - lowerMSASplatImm(Op, 2, DAG)); + return DAG.getNode(MipsISD::VUMAX, DL, Op->getValueType(0), + Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); case Intrinsic::mips_min_s_b: case Intrinsic::mips_min_s_h: case Intrinsic::mips_min_s_w: case Intrinsic::mips_min_s_d: - return lowerMSABinaryIntr(Op, DAG, MipsISD::VSMIN); + return DAG.getNode(MipsISD::VSMIN, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); case Intrinsic::mips_min_u_b: case Intrinsic::mips_min_u_h: case Intrinsic::mips_min_u_w: case Intrinsic::mips_min_u_d: - return lowerMSABinaryIntr(Op, DAG, MipsISD::VUMIN); + return DAG.getNode(MipsISD::VUMIN, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); case Intrinsic::mips_mini_s_b: case Intrinsic::mips_mini_s_h: case Intrinsic::mips_mini_s_w: case Intrinsic::mips_mini_s_d: - return lowerMSABinaryImmIntr(Op, DAG, MipsISD::VSMIN, - lowerMSASplatImm(Op, 2, DAG)); + return DAG.getNode(MipsISD::VSMIN, DL, Op->getValueType(0), + Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); case Intrinsic::mips_mini_u_b: case Intrinsic::mips_mini_u_h: case Intrinsic::mips_mini_u_w: case Intrinsic::mips_mini_u_d: - return lowerMSABinaryImmIntr(Op, DAG, MipsISD::VUMIN, - lowerMSASplatImm(Op, 2, DAG)); + return DAG.getNode(MipsISD::VUMIN, DL, Op->getValueType(0), + Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); case Intrinsic::mips_mulv_b: case Intrinsic::mips_mulv_h: case Intrinsic::mips_mulv_w: case Intrinsic::mips_mulv_d: - return lowerMSABinaryIntr(Op, DAG, ISD::MUL); + return DAG.getNode(ISD::MUL, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); case Intrinsic::mips_nlzc_b: case Intrinsic::mips_nlzc_h: case Intrinsic::mips_nlzc_w: case Intrinsic::mips_nlzc_d: - return lowerMSAUnaryIntr(Op, DAG, ISD::CTLZ); + return DAG.getNode(ISD::CTLZ, DL, Op->getValueType(0), Op->getOperand(1)); case Intrinsic::mips_nor_v: { - SDValue Res = lowerMSABinaryIntr(Op, DAG, ISD::OR); - return DAG.getNOT(SDLoc(Op), Res, Res->getValueType(0)); + SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + return DAG.getNOT(DL, Res, Res->getValueType(0)); } case Intrinsic::mips_nori_b: { - SDValue Res = lowerMSABinaryImmIntr(Op, DAG, ISD::OR, - lowerMSASplatImm(Op, 2, DAG)); - return DAG.getNOT(SDLoc(Op), Res, Res->getValueType(0)); + SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), + Op->getOperand(1), + lowerMSASplatImm(Op, 2, DAG)); + return DAG.getNOT(DL, Res, Res->getValueType(0)); } case Intrinsic::mips_or_v: - return lowerMSABinaryIntr(Op, DAG, ISD::OR); + return DAG.getNode(ISD::OR, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); case Intrinsic::mips_ori_b: - return lowerMSABinaryImmIntr(Op, DAG, ISD::OR, - lowerMSASplatImm(Op, 2, DAG)); + return DAG.getNode(ISD::OR, DL, Op->getValueType(0), + Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); case Intrinsic::mips_pckev_b: case Intrinsic::mips_pckev_h: case Intrinsic::mips_pckev_w: case Intrinsic::mips_pckev_d: - return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), Op->getValueType(0), + return DAG.getNode(MipsISD::PCKEV, DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2)); case Intrinsic::mips_pckod_b: case Intrinsic::mips_pckod_h: case Intrinsic::mips_pckod_w: case Intrinsic::mips_pckod_d: - return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), Op->getValueType(0), + return DAG.getNode(MipsISD::PCKOD, DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2)); case Intrinsic::mips_pcnt_b: case Intrinsic::mips_pcnt_h: case Intrinsic::mips_pcnt_w: case Intrinsic::mips_pcnt_d: - return lowerMSAUnaryIntr(Op, DAG, ISD::CTPOP); + return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1)); case Intrinsic::mips_shf_b: case Intrinsic::mips_shf_h: case Intrinsic::mips_shf_w: - return DAG.getNode(MipsISD::SHF, SDLoc(Op), Op->getValueType(0), + return DAG.getNode(MipsISD::SHF, DL, Op->getValueType(0), Op->getOperand(2), Op->getOperand(1)); case Intrinsic::mips_sll_b: case Intrinsic::mips_sll_h: case Intrinsic::mips_sll_w: case Intrinsic::mips_sll_d: - return lowerMSABinaryIntr(Op, DAG, ISD::SHL); + return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); case Intrinsic::mips_slli_b: case Intrinsic::mips_slli_h: case Intrinsic::mips_slli_w: case Intrinsic::mips_slli_d: - return lowerMSABinaryImmIntr(Op, DAG, ISD::SHL, - lowerMSASplatImm(Op, 2, DAG)); + return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), + Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); case Intrinsic::mips_sra_b: case Intrinsic::mips_sra_h: case Intrinsic::mips_sra_w: case Intrinsic::mips_sra_d: - return lowerMSABinaryIntr(Op, DAG, ISD::SRA); + return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); case Intrinsic::mips_srai_b: case Intrinsic::mips_srai_h: case Intrinsic::mips_srai_w: case Intrinsic::mips_srai_d: - return lowerMSABinaryImmIntr(Op, DAG, ISD::SRA, - lowerMSASplatImm(Op, 2, DAG)); + return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), + Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); case Intrinsic::mips_srl_b: case Intrinsic::mips_srl_h: case Intrinsic::mips_srl_w: case Intrinsic::mips_srl_d: - return lowerMSABinaryIntr(Op, DAG, ISD::SRL); + return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); case Intrinsic::mips_srli_b: case Intrinsic::mips_srli_h: case Intrinsic::mips_srli_w: case Intrinsic::mips_srli_d: - return lowerMSABinaryImmIntr(Op, DAG, ISD::SRL, - lowerMSASplatImm(Op, 2, DAG)); + return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), + Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); case Intrinsic::mips_subv_b: case Intrinsic::mips_subv_h: case Intrinsic::mips_subv_w: case Intrinsic::mips_subv_d: - return lowerMSABinaryIntr(Op, DAG, ISD::SUB); + return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); case Intrinsic::mips_subvi_b: case Intrinsic::mips_subvi_h: case Intrinsic::mips_subvi_w: case Intrinsic::mips_subvi_d: - return lowerMSABinaryImmIntr(Op, DAG, ISD::SUB, - lowerMSASplatImm(Op, 2, DAG)); + return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), + Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); case Intrinsic::mips_vshf_b: case Intrinsic::mips_vshf_h: case Intrinsic::mips_vshf_w: case Intrinsic::mips_vshf_d: - return DAG.getNode(MipsISD::VSHF, SDLoc(Op), Op->getValueType(0), + return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); case Intrinsic::mips_xor_v: - return lowerMSABinaryIntr(Op, DAG, ISD::XOR); + return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); case Intrinsic::mips_xori_b: - return lowerMSABinaryImmIntr(Op, DAG, ISD::XOR, - lowerMSASplatImm(Op, 2, DAG)); + return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), + Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); } } -- cgit v1.1 From 6b968eccd79409b0986f394fa597101cf79433d8 Mon Sep 17 00:00:00 2001 From: Tilmann Scheller Date: Fri, 27 Sep 2013 10:30:18 +0000 Subject: ARM: Teach assembler to enforce constraint for Thumb2 LDRD (literal/immediate) destination register operands. LDRD , ,