diff options
Diffstat (limited to 'lib/Target/Hexagon')
61 files changed, 12481 insertions, 11336 deletions
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt index af7914f..eaa8bef 100644 --- a/lib/Target/Hexagon/CMakeLists.txt +++ b/lib/Target/Hexagon/CMakeLists.txt @@ -13,7 +13,6 @@ add_public_tablegen_target(HexagonCommonTableGen) add_llvm_target(HexagonCodeGen HexagonAsmPrinter.cpp - HexagonCallingConvLower.cpp HexagonCFGOptimizer.cpp HexagonCopyToCombine.cpp HexagonExpandPredSpillCode.cpp diff --git a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index bc64be1..669af8c 100644 --- a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/HexagonBaseInfo.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" #include "MCTargetDesc/HexagonMCTargetDesc.h" - #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCExpr.h" @@ -18,14 +18,13 @@ #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/Endian.h" - -#include <vector> +#include "llvm/Support/raw_ostream.h" #include <array> +#include <vector> using namespace llvm; @@ -48,6 +47,13 @@ public: }; } +static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, void const *Decoder); + static const uint16_t IntRegDecoderTable[] = { Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, Hexagon::R5, Hexagon::R6, Hexagon::R7, Hexagon::R8, Hexagon::R9, @@ -60,6 +66,16 @@ static const uint16_t IntRegDecoderTable[] = { static const uint16_t PredRegDecoderTable[] = { Hexagon::P0, Hexagon::P1, Hexagon::P2, Hexagon::P3 }; +static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo, + const uint16_t Table[], size_t Size) { + if (RegNo < Size) { + Inst.addOperand(MCOperand::CreateReg(Table[RegNo])); + return MCDisassembler::Success; + } + else + return MCDisassembler::Fail; +} + static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, void const *Decoder) { @@ -71,6 +87,81 @@ static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } +static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, const void *Decoder) { + static const uint16_t CtrlRegDecoderTable[] = { + Hexagon::SA0, Hexagon::LC0, Hexagon::SA1, Hexagon::LC1, + Hexagon::P3_0, Hexagon::NoRegister, Hexagon::C6, Hexagon::C7, + Hexagon::USR, Hexagon::PC, Hexagon::UGP, Hexagon::GP, + Hexagon::CS0, Hexagon::CS1, Hexagon::UPCL, Hexagon::UPCH + }; + + if (RegNo >= sizeof(CtrlRegDecoderTable) / sizeof(CtrlRegDecoderTable[0])) + return MCDisassembler::Fail; + + if (CtrlRegDecoderTable[RegNo] == Hexagon::NoRegister) + return MCDisassembler::Fail; + + unsigned Register = CtrlRegDecoderTable[RegNo]; + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, void const *Decoder) { + static const uint16_t CtrlReg64DecoderTable[] = { + Hexagon::C1_0, Hexagon::NoRegister, + Hexagon::C3_2, Hexagon::NoRegister, + Hexagon::NoRegister, Hexagon::NoRegister, + Hexagon::C7_6, Hexagon::NoRegister, + Hexagon::C9_8, Hexagon::NoRegister, + Hexagon::C11_10, Hexagon::NoRegister, + Hexagon::CS, Hexagon::NoRegister, + Hexagon::UPC, Hexagon::NoRegister + }; + + if (RegNo >= sizeof(CtrlReg64DecoderTable) / sizeof(CtrlReg64DecoderTable[0])) + return MCDisassembler::Fail; + + if (CtrlReg64DecoderTable[RegNo] == Hexagon::NoRegister) + return MCDisassembler::Fail; + + unsigned Register = CtrlReg64DecoderTable[RegNo]; + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, const void *Decoder) { + unsigned Register = 0; + switch (RegNo) { + case 0: + Register = Hexagon::M0; + break; + case 1: + Register = Hexagon::M1; + break; + default: + return MCDisassembler::Fail; + } + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, const void *Decoder) { + static const uint16_t DoubleRegDecoderTable[] = { + Hexagon::D0, Hexagon::D1, Hexagon::D2, Hexagon::D3, + Hexagon::D4, Hexagon::D5, Hexagon::D6, Hexagon::D7, + Hexagon::D8, Hexagon::D9, Hexagon::D10, Hexagon::D11, + Hexagon::D12, Hexagon::D13, Hexagon::D14, Hexagon::D15 + }; + + return (DecodeRegisterClass(Inst, RegNo >> 1, + DoubleRegDecoderTable, + sizeof (DoubleRegDecoderTable))); +} + static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, void const *Decoder) { @@ -110,5 +201,7 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size, // Remove parse bits. insn &= ~static_cast<uint32_t>(HexagonII::InstParseBits::INST_PARSE_MASK); - return decodeInstruction(DecoderTable32, MI, insn, Address, this, STI); + DecodeStatus Result = decodeInstruction(DecoderTable32, MI, insn, Address, this, STI); + HexagonMCInstrInfo::AppendImplicitOperands(MI); + return Result; } diff --git a/lib/Target/Hexagon/Disassembler/LLVMBuild.txt b/lib/Target/Hexagon/Disassembler/LLVMBuild.txt index 17ad11b..43bace7 100644 --- a/lib/Target/Hexagon/Disassembler/LLVMBuild.txt +++ b/lib/Target/Hexagon/Disassembler/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = HexagonDisassembler parent = Hexagon -required_libraries = HexagonInfo MCDisassembler Support +required_libraries = HexagonDesc HexagonInfo MCDisassembler Support add_to_library_groups = Hexagon diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h index 64ae69c..e0a3b2f 100644 --- a/lib/Target/Hexagon/Hexagon.h +++ b/lib/Target/Hexagon/Hexagon.h @@ -21,26 +21,24 @@ namespace llvm { class FunctionPass; - class ModulePass; - class TargetMachine; - class MachineInstr; - class HexagonMCInst; class HexagonAsmPrinter; class HexagonTargetMachine; + class MachineInstr; + class MCInst; + class ModulePass; class raw_ostream; + class TargetMachine; FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM, CodeGenOpt::Level OptLevel); FunctionPass *createHexagonDelaySlotFillerPass(const TargetMachine &TM); FunctionPass *createHexagonFPMoverPass(const TargetMachine &TM); FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM); - FunctionPass *createHexagonCFGOptimizer(const HexagonTargetMachine &TM); + FunctionPass *createHexagonCFGOptimizer(); - FunctionPass *createHexagonSplitTFRCondSets(const HexagonTargetMachine &TM); - FunctionPass *createHexagonSplitConst32AndConst64( - const HexagonTargetMachine &TM); - FunctionPass *createHexagonExpandPredSpillCode( - const HexagonTargetMachine &TM); + FunctionPass *createHexagonSplitTFRCondSets(); + FunctionPass *createHexagonSplitConst32AndConst64(); + FunctionPass *createHexagonExpandPredSpillCode(); FunctionPass *createHexagonHardwareLoops(); FunctionPass *createHexagonPeephole(); FunctionPass *createHexagonFixupHwLoops(); @@ -58,7 +56,7 @@ namespace llvm { TargetAsmBackend *createHexagonAsmBackend(const Target &, const std::string &); */ - void HexagonLowerToMC(const MachineInstr *MI, HexagonMCInst &MCI, + void HexagonLowerToMC(MachineInstr const *MI, MCInst &MCI, HexagonAsmPrinter &AP); } // end namespace llvm; diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td index 5f4a6c6..f892c9f 100644 --- a/lib/Target/Hexagon/Hexagon.td +++ b/lib/Target/Hexagon/Hexagon.td @@ -21,35 +21,23 @@ include "llvm/Target/Target.td" // Hexagon Subtarget features. //===----------------------------------------------------------------------===// -// Hexagon Archtectures -def ArchV2 : SubtargetFeature<"v2", "HexagonArchVersion", "V2", - "Hexagon v2">; -def ArchV3 : SubtargetFeature<"v3", "HexagonArchVersion", "V3", - "Hexagon v3">; -def ArchV4 : SubtargetFeature<"v4", "HexagonArchVersion", "V4", - "Hexagon v4">; -def ArchV5 : SubtargetFeature<"v5", "HexagonArchVersion", "V5", - "Hexagon v5">; +// Hexagon Architectures +def ArchV4: SubtargetFeature<"v4", "HexagonArchVersion", "V4", "Hexagon V4">; +def ArchV5: SubtargetFeature<"v5", "HexagonArchVersion", "V5", "Hexagon V5">; //===----------------------------------------------------------------------===// // Hexagon Instruction Predicate Definitions. //===----------------------------------------------------------------------===// -def HasV2T : Predicate<"Subtarget.hasV2TOps()">; -def HasV2TOnly : Predicate<"Subtarget.hasV2TOpsOnly()">; -def NoV2T : Predicate<"!Subtarget.hasV2TOps()">; -def HasV3T : Predicate<"Subtarget.hasV3TOps()">; -def HasV3TOnly : Predicate<"Subtarget.hasV3TOpsOnly()">; -def NoV3T : Predicate<"!Subtarget.hasV3TOps()">; -def HasV4T : Predicate<"Subtarget.hasV4TOps()">; -def NoV4T : Predicate<"!Subtarget.hasV4TOps()">; -def HasV5T : Predicate<"Subtarget.hasV5TOps()">; -def NoV5T : Predicate<"!Subtarget.hasV5TOps()">; -def UseMEMOP : Predicate<"Subtarget.useMemOps()">; -def IEEERndNearV5T : Predicate<"Subtarget.modeIEEERndNear()">; +def HasV5T : Predicate<"Subtarget->hasV5TOps()">; +def NoV5T : Predicate<"!Subtarget->hasV5TOps()">; +def UseMEMOP : Predicate<"Subtarget->useMemOps()">; +def IEEERndNearV5T : Predicate<"Subtarget->modeIEEERndNear()">; //===----------------------------------------------------------------------===// // Classes used for relation maps. //===----------------------------------------------------------------------===// + +class ImmRegShl; // PredRel - Filter class used to relate non-predicated instructions with their // predicated forms. class PredRel; @@ -137,7 +125,7 @@ def getPredOldOpcode : InstrMapping { // def getNewValueOpcode : InstrMapping { let FilterClass = "NewValueRel"; - let RowFields = ["BaseOpcode", "PredSense", "PNewValue"]; + let RowFields = ["BaseOpcode", "PredSense", "PNewValue", "addrMode"]; let ColFields = ["NValueST"]; let KeyCol = ["false"]; let ValueCols = [["true"]]; @@ -149,7 +137,7 @@ def getNewValueOpcode : InstrMapping { // def getNonNVStore : InstrMapping { let FilterClass = "NewValueRel"; - let RowFields = ["BaseOpcode", "PredSense", "PNewValue"]; + let RowFields = ["BaseOpcode", "PredSense", "PNewValue", "addrMode"]; let ColFields = ["NValueST"]; let KeyCol = ["true"]; let ValueCols = [["false"]]; @@ -180,6 +168,14 @@ def getRegForm : InstrMapping { let ValueCols = [["reg"]]; } +def getRegShlForm : InstrMapping { + let FilterClass = "ImmRegShl"; + let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"]; + let ColFields = ["InputType"]; + let KeyCol = ["imm"]; + let ValueCols = [["reg"]]; +} + //===----------------------------------------------------------------------===// // Register File, Calling Conv, Instruction Descriptions //===----------------------------------------------------------------------===// @@ -200,8 +196,10 @@ class Proc<string Name, SchedMachineModel Model, list<SubtargetFeature> Features> : ProcessorModel<Name, Model, Features>; -def : Proc<"hexagonv4", HexagonModelV4, [ArchV2, ArchV3, ArchV4]>; -def : Proc<"hexagonv5", HexagonModelV4, [ArchV2, ArchV3, ArchV4, ArchV5]>; +def : Proc<"hexagonv4", HexagonModelV4, + [ArchV4]>; +def : Proc<"hexagonv5", HexagonModelV4, + [ArchV4, ArchV5]>; //===----------------------------------------------------------------------===// // Declare the target which we are implementing diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index 9240282..180762f 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -19,7 +19,7 @@ #include "HexagonSubtarget.h" #include "HexagonTargetMachine.h" #include "MCTargetDesc/HexagonInstPrinter.h" -#include "MCTargetDesc/HexagonMCInst.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" @@ -61,6 +61,10 @@ static cl::opt<bool> AlignCalls( "hexagon-align-calls", cl::Hidden, cl::init(true), cl::desc("Insert falign after call instruction for Hexagon target")); +HexagonAsmPrinter::HexagonAsmPrinter(TargetMachine &TM, + std::unique_ptr<MCStreamer> Streamer) + : AsmPrinter(TM, std::move(Streamer)), Subtarget(nullptr) {} + void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { const MachineOperand &MO = MI->getOperand(OpNo); @@ -174,7 +178,7 @@ bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, /// void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (MI->isBundle()) { - std::vector<const MachineInstr*> BundleMIs; + std::vector<MachineInstr const *> BundleMIs; const MachineBasicBlock *MBB = MI->getParent(); MachineBasicBlock::const_instr_iterator MII = MI; @@ -183,33 +187,35 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) { while (MII != MBB->end() && MII->isInsideBundle()) { const MachineInstr *MInst = MII; if (MInst->getOpcode() == TargetOpcode::DBG_VALUE || - MInst->getOpcode() == TargetOpcode::IMPLICIT_DEF) { - IgnoreCount++; - ++MII; - continue; + MInst->getOpcode() == TargetOpcode::IMPLICIT_DEF) { + IgnoreCount++; + ++MII; + continue; } - //BundleMIs.push_back(&*MII); + // BundleMIs.push_back(&*MII); BundleMIs.push_back(MInst); ++MII; } unsigned Size = BundleMIs.size(); - assert((Size+IgnoreCount) == MI->getBundleSize() && "Corrupt Bundle!"); + assert((Size + IgnoreCount) == MI->getBundleSize() && "Corrupt Bundle!"); for (unsigned Index = 0; Index < Size; Index++) { - HexagonMCInst MCI; - MCI.setPacketStart(Index == 0); - MCI.setPacketEnd(Index == (Size-1)); + MCInst MCI; HexagonLowerToMC(BundleMIs[Index], MCI, *this); + HexagonMCInstrInfo::AppendImplicitOperands(MCI); + HexagonMCInstrInfo::setPacketBegin(MCI, Index == 0); + HexagonMCInstrInfo::setPacketEnd(MCI, Index == (Size - 1)); EmitToStreamer(OutStreamer, MCI); } } else { - HexagonMCInst MCI; + MCInst MCI; + HexagonLowerToMC(MI, MCI, *this); + HexagonMCInstrInfo::AppendImplicitOperands(MCI); if (MI->getOpcode() == Hexagon::ENDLOOP0) { - MCI.setPacketStart(true); - MCI.setPacketEnd(true); + HexagonMCInstrInfo::setPacketBegin(MCI, true); + HexagonMCInstrInfo::setPacketEnd(MCI, true); } - HexagonLowerToMC(MI, MCI, *this); EmitToStreamer(OutStreamer, MCI); } diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.h b/lib/Target/Hexagon/HexagonAsmPrinter.h index 5f4c162..792fc8b 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.h +++ b/lib/Target/Hexagon/HexagonAsmPrinter.h @@ -25,9 +25,12 @@ namespace llvm { const HexagonSubtarget *Subtarget; public: - explicit HexagonAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer) { - Subtarget = &TM.getSubtarget<HexagonSubtarget>(); + explicit HexagonAsmPrinter(TargetMachine &TM, + std::unique_ptr<MCStreamer> Streamer); + + bool runOnMachineFunction(MachineFunction &Fn) override { + Subtarget = &Fn.getSubtarget<HexagonSubtarget>(); + return AsmPrinter::runOnMachineFunction(Fn); } const char *getPassName() const override { diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp index 8a4e02c..703e691 100644 --- a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp +++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp @@ -37,15 +37,11 @@ namespace { class HexagonCFGOptimizer : public MachineFunctionPass { private: - const HexagonTargetMachine& QTM; - const HexagonSubtarget &QST; - void InvertAndChangeJumpTarget(MachineInstr*, MachineBasicBlock*); public: static char ID; - HexagonCFGOptimizer(const HexagonTargetMachine& TM) - : MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) { + HexagonCFGOptimizer() : MachineFunctionPass(ID) { initializeHexagonCFGOptimizerPass(*PassRegistry::getPassRegistry()); } @@ -59,49 +55,49 @@ private: char HexagonCFGOptimizer::ID = 0; static bool IsConditionalBranch(int Opc) { - return (Opc == Hexagon::JMP_t) || (Opc == Hexagon::JMP_f) - || (Opc == Hexagon::JMP_tnew_t) || (Opc == Hexagon::JMP_fnew_t); + return (Opc == Hexagon::J2_jumpt) || (Opc == Hexagon::J2_jumpf) + || (Opc == Hexagon::J2_jumptnewpt) || (Opc == Hexagon::J2_jumpfnewpt); } static bool IsUnconditionalJump(int Opc) { - return (Opc == Hexagon::JMP); + return (Opc == Hexagon::J2_jump); } void HexagonCFGOptimizer::InvertAndChangeJumpTarget(MachineInstr* MI, MachineBasicBlock* NewTarget) { - const HexagonInstrInfo *QII = QTM.getSubtargetImpl()->getInstrInfo(); + const TargetInstrInfo *TII = + MI->getParent()->getParent()->getSubtarget().getInstrInfo(); int NewOpcode = 0; switch(MI->getOpcode()) { - case Hexagon::JMP_t: - NewOpcode = Hexagon::JMP_f; + case Hexagon::J2_jumpt: + NewOpcode = Hexagon::J2_jumpf; break; - case Hexagon::JMP_f: - NewOpcode = Hexagon::JMP_t; + case Hexagon::J2_jumpf: + NewOpcode = Hexagon::J2_jumpt; break; - case Hexagon::JMP_tnew_t: - NewOpcode = Hexagon::JMP_fnew_t; + case Hexagon::J2_jumptnewpt: + NewOpcode = Hexagon::J2_jumpfnewpt; break; - case Hexagon::JMP_fnew_t: - NewOpcode = Hexagon::JMP_tnew_t; + case Hexagon::J2_jumpfnewpt: + NewOpcode = Hexagon::J2_jumptnewpt; break; default: llvm_unreachable("Cannot handle this case"); } - MI->setDesc(QII->get(NewOpcode)); + MI->setDesc(TII->get(NewOpcode)); MI->getOperand(1).setMBB(NewTarget); } bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { - // Loop over all of the basic blocks. for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); MBBb != MBBe; ++MBBb) { @@ -163,8 +159,8 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { // The target of the unconditional branch must be JumpAroundTarget. // TODO: If not, we should not invert the unconditional branch. MachineBasicBlock* CondBranchTarget = nullptr; - if ((MI->getOpcode() == Hexagon::JMP_t) || - (MI->getOpcode() == Hexagon::JMP_f)) { + if ((MI->getOpcode() == Hexagon::J2_jumpt) || + (MI->getOpcode() == Hexagon::J2_jumpf)) { CondBranchTarget = MI->getOperand(1).getMBB(); } @@ -248,6 +244,6 @@ void llvm::initializeHexagonCFGOptimizerPass(PassRegistry &Registry) { CALL_ONCE_INITIALIZATION(initializePassOnce) } -FunctionPass *llvm::createHexagonCFGOptimizer(const HexagonTargetMachine &TM) { - return new HexagonCFGOptimizer(TM); +FunctionPass *llvm::createHexagonCFGOptimizer() { + return new HexagonCFGOptimizer(); } diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.cpp b/lib/Target/Hexagon/HexagonCallingConvLower.cpp deleted file mode 100644 index 8d78409..0000000 --- a/lib/Target/Hexagon/HexagonCallingConvLower.cpp +++ /dev/null @@ -1,206 +0,0 @@ -//===-- llvm/CallingConvLower.cpp - Calling Convention lowering -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the Hexagon_CCState class, used for lowering and -// implementing calling conventions. Adapted from the machine independent -// version of the class (CCState) but this handles calls to varargs functions -// -//===----------------------------------------------------------------------===// - -#include "HexagonCallingConvLower.h" -#include "Hexagon.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" -using namespace llvm; - -Hexagon_CCState::Hexagon_CCState(CallingConv::ID CC, bool isVarArg, - const TargetMachine &tm, - SmallVectorImpl<CCValAssign> &locs, - LLVMContext &c) - : CallingConv(CC), IsVarArg(isVarArg), TM(tm), Locs(locs), Context(c) { - // No stack is used. - StackOffset = 0; - - UsedRegs.resize( - (TM.getSubtargetImpl()->getRegisterInfo()->getNumRegs() + 31) / 32); -} - -// HandleByVal - Allocate a stack slot large enough to pass an argument by -// value. The size and alignment information of the argument is encoded in its -// parameter attribute. -void Hexagon_CCState::HandleByVal(unsigned ValNo, EVT ValVT, - EVT LocVT, CCValAssign::LocInfo LocInfo, - int MinSize, int MinAlign, - ISD::ArgFlagsTy ArgFlags) { - unsigned Align = ArgFlags.getByValAlign(); - unsigned Size = ArgFlags.getByValSize(); - if (MinSize > (int)Size) - Size = MinSize; - if (MinAlign > (int)Align) - Align = MinAlign; - unsigned Offset = AllocateStack(Size, Align); - - addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset, - LocVT.getSimpleVT(), LocInfo)); -} - -/// MarkAllocated - Mark a register and all of its aliases as allocated. -void Hexagon_CCState::MarkAllocated(unsigned Reg) { - const TargetRegisterInfo &TRI = *TM.getSubtargetImpl()->getRegisterInfo(); - for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI) - UsedRegs[*AI/32] |= 1 << (*AI&31); -} - -/// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node, -/// incorporating info about the formals into this state. -void -Hexagon_CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> - &Ins, - Hexagon_CCAssignFn Fn, - unsigned SretValueInRegs) { - unsigned NumArgs = Ins.size(); - unsigned i = 0; - - // If the function returns a small struct in registers, skip - // over the first (dummy) argument. - if (SretValueInRegs != 0) { - ++i; - } - - - for (; i != NumArgs; ++i) { - EVT ArgVT = Ins[i].VT; - ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; - if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this, 0, 0, false)) { - dbgs() << "Formal argument #" << i << " has unhandled type " - << ArgVT.getEVTString() << "\n"; - abort(); - } - } -} - -/// AnalyzeReturn - Analyze the returned values of an ISD::RET node, -/// incorporating info about the result values into this state. -void -Hexagon_CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, - Hexagon_CCAssignFn Fn, - unsigned SretValueInRegs) { - - // For Hexagon, Return small structures in registers. - if (SretValueInRegs != 0) { - if (SretValueInRegs <= 32) { - unsigned Reg = Hexagon::R0; - addLoc(CCValAssign::getReg(0, MVT::i32, Reg, MVT::i32, - CCValAssign::Full)); - return; - } - if (SretValueInRegs <= 64) { - unsigned Reg = Hexagon::D0; - addLoc(CCValAssign::getReg(0, MVT::i64, Reg, MVT::i64, - CCValAssign::Full)); - return; - } - } - - - // Determine which register each value should be copied into. - for (unsigned i = 0, e = Outs.size(); i != e; ++i) { - EVT VT = Outs[i].VT; - ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; - if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this, -1, -1, false)){ - dbgs() << "Return operand #" << i << " has unhandled type " - << VT.getEVTString() << "\n"; - abort(); - } - } -} - - -/// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info -/// about the passed values into this state. -void -Hexagon_CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> - &Outs, - Hexagon_CCAssignFn Fn, - int NonVarArgsParams, - unsigned SretValueSize) { - unsigned NumOps = Outs.size(); - - unsigned i = 0; - // If the called function returns a small struct in registers, skip - // the first actual parameter. We do not want to pass a pointer to - // the stack location. - if (SretValueSize != 0) { - ++i; - } - - for (; i != NumOps; ++i) { - EVT ArgVT = Outs[i].VT; - ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; - if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this, - NonVarArgsParams, i+1, false)) { - dbgs() << "Call operand #" << i << " has unhandled type " - << ArgVT.getEVTString() << "\n"; - abort(); - } - } -} - -/// AnalyzeCallOperands - Same as above except it takes vectors of types -/// and argument flags. -void -Hexagon_CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs, - SmallVectorImpl<ISD::ArgFlagsTy> &Flags, - Hexagon_CCAssignFn Fn) { - unsigned NumOps = ArgVTs.size(); - for (unsigned i = 0; i != NumOps; ++i) { - EVT ArgVT = ArgVTs[i]; - ISD::ArgFlagsTy ArgFlags = Flags[i]; - if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this, -1, -1, - false)) { - dbgs() << "Call operand #" << i << " has unhandled type " - << ArgVT.getEVTString() << "\n"; - abort(); - } - } -} - -/// AnalyzeCallResult - Analyze the return values of an ISD::CALL node, -/// incorporating info about the passed values into this state. -void -Hexagon_CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, - Hexagon_CCAssignFn Fn, - unsigned SretValueInRegs) { - - for (unsigned i = 0, e = Ins.size(); i != e; ++i) { - EVT VT = Ins[i].VT; - ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); - if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this, -1, -1, false)) { - dbgs() << "Call result #" << i << " has unhandled type " - << VT.getEVTString() << "\n"; - abort(); - } - } -} - -/// AnalyzeCallResult - Same as above except it's specialized for calls which -/// produce a single value. -void Hexagon_CCState::AnalyzeCallResult(EVT VT, Hexagon_CCAssignFn Fn) { - if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this, -1, -1, - false)) { - dbgs() << "Call result has unhandled type " - << VT.getEVTString() << "\n"; - abort(); - } -} diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.h b/lib/Target/Hexagon/HexagonCallingConvLower.h deleted file mode 100644 index 738ed1a..0000000 --- a/lib/Target/Hexagon/HexagonCallingConvLower.h +++ /dev/null @@ -1,187 +0,0 @@ -//===-- HexagonCallingConvLower.h - Calling Conventions ---------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the Hexagon_CCState class, used for lowering -// and implementing calling conventions. Adapted from the target independent -// version but this handles calls to varargs functions -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONCALLINGCONVLOWER_H -#define LLVM_LIB_TARGET_HEXAGON_HEXAGONCALLINGCONVLOWER_H - -#include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/SelectionDAGNodes.h" - -// -// Need to handle varargs. -// -namespace llvm { - class TargetRegisterInfo; - class TargetMachine; - class Hexagon_CCState; - class SDNode; - struct EVT; - -/// Hexagon_CCAssignFn - This function assigns a location for Val, updating -/// State to reflect the change. -typedef bool Hexagon_CCAssignFn(unsigned ValNo, EVT ValVT, - EVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, Hexagon_CCState &State, - int NonVarArgsParams, - int CurrentParam, - bool ForceMem); - - -/// CCState - This class holds information needed while lowering arguments and -/// return values. It captures which registers are already assigned and which -/// stack slots are used. It provides accessors to allocate these values. -class Hexagon_CCState { - CallingConv::ID CallingConv; - bool IsVarArg; - const TargetMachine &TM; - SmallVectorImpl<CCValAssign> &Locs; - LLVMContext &Context; - - unsigned StackOffset; - SmallVector<uint32_t, 16> UsedRegs; -public: - Hexagon_CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &TM, - SmallVectorImpl<CCValAssign> &locs, LLVMContext &c); - - void addLoc(const CCValAssign &V) { - Locs.push_back(V); - } - - LLVMContext &getContext() const { return Context; } - const TargetMachine &getTarget() const { return TM; } - unsigned getCallingConv() const { return CallingConv; } - bool isVarArg() const { return IsVarArg; } - - unsigned getNextStackOffset() const { return StackOffset; } - - /// isAllocated - Return true if the specified register (or an alias) is - /// allocated. - bool isAllocated(unsigned Reg) const { - return UsedRegs[Reg/32] & (1 << (Reg&31)); - } - - /// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node, - /// incorporating info about the formals into this state. - void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins, - Hexagon_CCAssignFn Fn, unsigned SretValueInRegs); - - /// AnalyzeReturn - Analyze the returned values of an ISD::RET node, - /// incorporating info about the result values into this state. - void AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, - Hexagon_CCAssignFn Fn, unsigned SretValueInRegs); - - /// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info - /// about the passed values into this state. - void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs, - Hexagon_CCAssignFn Fn, int NonVarArgsParams, - unsigned SretValueSize); - - /// AnalyzeCallOperands - Same as above except it takes vectors of types - /// and argument flags. - void AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs, - SmallVectorImpl<ISD::ArgFlagsTy> &Flags, - Hexagon_CCAssignFn Fn); - - /// AnalyzeCallResult - Analyze the return values of an ISD::CALL node, - /// incorporating info about the passed values into this state. - void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, - Hexagon_CCAssignFn Fn, unsigned SretValueInRegs); - - /// AnalyzeCallResult - Same as above except it's specialized for calls which - /// produce a single value. - void AnalyzeCallResult(EVT VT, Hexagon_CCAssignFn Fn); - - /// getFirstUnallocated - Return the first unallocated register in the set, or - /// NumRegs if they are all allocated. - unsigned getFirstUnallocated(const unsigned *Regs, unsigned NumRegs) const { - for (unsigned i = 0; i != NumRegs; ++i) - if (!isAllocated(Regs[i])) - return i; - return NumRegs; - } - - /// AllocateReg - Attempt to allocate one register. If it is not available, - /// return zero. Otherwise, return the register, marking it and any aliases - /// as allocated. - unsigned AllocateReg(unsigned Reg) { - if (isAllocated(Reg)) return 0; - MarkAllocated(Reg); - return Reg; - } - - /// Version of AllocateReg with extra register to be shadowed. - unsigned AllocateReg(unsigned Reg, unsigned ShadowReg) { - if (isAllocated(Reg)) return 0; - MarkAllocated(Reg); - MarkAllocated(ShadowReg); - return Reg; - } - - /// AllocateReg - Attempt to allocate one of the specified registers. If none - /// are available, return zero. Otherwise, return the first one available, - /// marking it and any aliases as allocated. - unsigned AllocateReg(const unsigned *Regs, unsigned NumRegs) { - unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs); - if (FirstUnalloc == NumRegs) - return 0; // Didn't find the reg. - - // Mark the register and any aliases as allocated. - unsigned Reg = Regs[FirstUnalloc]; - MarkAllocated(Reg); - return Reg; - } - - /// Version of AllocateReg with list of registers to be shadowed. - unsigned AllocateReg(const unsigned *Regs, const unsigned *ShadowRegs, - unsigned NumRegs) { - unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs); - if (FirstUnalloc == NumRegs) - return 0; // Didn't find the reg. - - // Mark the register and any aliases as allocated. - unsigned Reg = Regs[FirstUnalloc], ShadowReg = ShadowRegs[FirstUnalloc]; - MarkAllocated(Reg); - MarkAllocated(ShadowReg); - return Reg; - } - - /// AllocateStack - Allocate a chunk of stack space with the specified size - /// and alignment. - unsigned AllocateStack(unsigned Size, unsigned Align) { - assert(Align && ((Align-1) & Align) == 0); // Align is power of 2. - StackOffset = ((StackOffset + Align-1) & ~(Align-1)); - unsigned Result = StackOffset; - StackOffset += Size; - return Result; - } - - // HandleByVal - Allocate a stack slot large enough to pass an argument by - // value. The size and alignment information of the argument is encoded in its - // parameter attribute. - void HandleByVal(unsigned ValNo, EVT ValVT, - EVT LocVT, CCValAssign::LocInfo LocInfo, - int MinSize, int MinAlign, ISD::ArgFlagsTy ArgFlags); - -private: - /// MarkAllocated - Mark a register and all of its aliases as allocated. - void MarkAllocated(unsigned Reg); -}; - - - -} // end namespace llvm - -#endif diff --git a/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/lib/Target/Hexagon/HexagonCopyToCombine.cpp index 4e76698..dd193f9 100644 --- a/lib/Target/Hexagon/HexagonCopyToCombine.cpp +++ b/lib/Target/Hexagon/HexagonCopyToCombine.cpp @@ -114,7 +114,7 @@ static bool isCombinableInstType(MachineInstr *MI, const HexagonInstrInfo *TII, bool ShouldCombineAggressively) { switch(MI->getOpcode()) { - case Hexagon::TFR: { + case Hexagon::A2_tfr: { // A COPY instruction can be combined if its arguments are IntRegs (32bit). assert(MI->getOperand(0).isReg() && MI->getOperand(1).isReg()); @@ -124,7 +124,7 @@ static bool isCombinableInstType(MachineInstr *MI, Hexagon::IntRegsRegClass.contains(SrcReg); } - case Hexagon::TFRI: { + case Hexagon::A2_tfrsi: { // A transfer-immediate can be combined if its argument is a signed 8bit // value. assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); @@ -158,11 +158,11 @@ static bool isCombinableInstType(MachineInstr *MI, } static bool isGreaterThan8BitTFRI(MachineInstr *I) { - return I->getOpcode() == Hexagon::TFRI && + return I->getOpcode() == Hexagon::A2_tfrsi && !isInt<8>(I->getOperand(1).getImm()); } static bool isGreaterThan6BitTFRI(MachineInstr *I) { - return I->getOpcode() == Hexagon::TFRI && + return I->getOpcode() == Hexagon::A2_tfrsi && !isUInt<6>(I->getOperand(1).getImm()); } @@ -171,26 +171,14 @@ static bool isGreaterThan6BitTFRI(MachineInstr *I) { static bool areCombinableOperations(const TargetRegisterInfo *TRI, MachineInstr *HighRegInst, MachineInstr *LowRegInst) { - assert((HighRegInst->getOpcode() == Hexagon::TFR || - HighRegInst->getOpcode() == Hexagon::TFRI || + assert((HighRegInst->getOpcode() == Hexagon::A2_tfr || + HighRegInst->getOpcode() == Hexagon::A2_tfrsi || HighRegInst->getOpcode() == Hexagon::TFRI_V4) && - (LowRegInst->getOpcode() == Hexagon::TFR || - LowRegInst->getOpcode() == Hexagon::TFRI || + (LowRegInst->getOpcode() == Hexagon::A2_tfr || + LowRegInst->getOpcode() == Hexagon::A2_tfrsi || LowRegInst->getOpcode() == Hexagon::TFRI_V4) && "Assume individual instructions are of a combinable type"); - const HexagonRegisterInfo *QRI = - static_cast<const HexagonRegisterInfo *>(TRI); - - // V4 added some combine variations (mixed immediate and register source - // operands), if we are on < V4 we can only combine 2 register-to-register - // moves and 2 immediate-to-register moves. We also don't have - // constant-extenders. - if (!QRI->Subtarget.hasV4TOps()) - return HighRegInst->getOpcode() == LowRegInst->getOpcode() && - !isGreaterThan8BitTFRI(HighRegInst) && - !isGreaterThan6BitTFRI(LowRegInst); - // There is no combine of two constant extended values. if ((HighRegInst->getOpcode() == Hexagon::TFRI_V4 || isGreaterThan8BitTFRI(HighRegInst)) && @@ -418,7 +406,7 @@ bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) { // Get target info. TRI = MF.getSubtarget().getRegisterInfo(); - TII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo()); + TII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); // Combine aggressively (for code size) ShouldCombineAggressively = @@ -563,14 +551,14 @@ void HexagonCopyToCombine::emitCombineII(MachineBasicBlock::iterator &InsertPt, // Handle globals. if (HiOperand.isGlobal()) { - BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ii), DoubleDestReg) + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg) .addGlobalAddress(HiOperand.getGlobal(), HiOperand.getOffset(), HiOperand.getTargetFlags()) .addImm(LoOperand.getImm()); return; } if (LoOperand.isGlobal()) { - BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_iI_V4), DoubleDestReg) + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg) .addImm(HiOperand.getImm()) .addGlobalAddress(LoOperand.getGlobal(), LoOperand.getOffset(), LoOperand.getTargetFlags()); @@ -580,7 +568,7 @@ void HexagonCopyToCombine::emitCombineII(MachineBasicBlock::iterator &InsertPt, // Handle constant extended immediates. if (!isInt<8>(HiOperand.getImm())) { assert(isInt<8>(LoOperand.getImm())); - BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ii), DoubleDestReg) + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg) .addImm(HiOperand.getImm()) .addImm(LoOperand.getImm()); return; @@ -588,7 +576,7 @@ void HexagonCopyToCombine::emitCombineII(MachineBasicBlock::iterator &InsertPt, if (!isUInt<6>(LoOperand.getImm())) { assert(isInt<8>(HiOperand.getImm())); - BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_iI_V4), DoubleDestReg) + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineii), DoubleDestReg) .addImm(HiOperand.getImm()) .addImm(LoOperand.getImm()); return; @@ -596,7 +584,7 @@ void HexagonCopyToCombine::emitCombineII(MachineBasicBlock::iterator &InsertPt, // Insert new combine instruction. // DoubleRegDest = combine #HiImm, #LoImm - BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ii), DoubleDestReg) + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combineii), DoubleDestReg) .addImm(HiOperand.getImm()) .addImm(LoOperand.getImm()); } @@ -613,7 +601,7 @@ void HexagonCopyToCombine::emitCombineIR(MachineBasicBlock::iterator &InsertPt, // Handle global. if (HiOperand.isGlobal()) { - BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ir_V4), DoubleDestReg) + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg) .addGlobalAddress(HiOperand.getGlobal(), HiOperand.getOffset(), HiOperand.getTargetFlags()) .addReg(LoReg, LoRegKillFlag); @@ -621,7 +609,7 @@ void HexagonCopyToCombine::emitCombineIR(MachineBasicBlock::iterator &InsertPt, } // Insert new combine instruction. // DoubleRegDest = combine #HiImm, LoReg - BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_Ir_V4), DoubleDestReg) + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineir), DoubleDestReg) .addImm(HiOperand.getImm()) .addReg(LoReg, LoRegKillFlag); } @@ -638,7 +626,7 @@ void HexagonCopyToCombine::emitCombineRI(MachineBasicBlock::iterator &InsertPt, // Handle global. if (LoOperand.isGlobal()) { - BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_rI_V4), DoubleDestReg) + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg) .addReg(HiReg, HiRegKillFlag) .addGlobalAddress(LoOperand.getGlobal(), LoOperand.getOffset(), LoOperand.getTargetFlags()); @@ -647,7 +635,7 @@ void HexagonCopyToCombine::emitCombineRI(MachineBasicBlock::iterator &InsertPt, // Insert new combine instruction. // DoubleRegDest = combine HiReg, #LoImm - BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_rI_V4), DoubleDestReg) + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A4_combineri), DoubleDestReg) .addReg(HiReg, HiRegKillFlag) .addImm(LoOperand.getImm()); } @@ -666,7 +654,7 @@ void HexagonCopyToCombine::emitCombineRR(MachineBasicBlock::iterator &InsertPt, // Insert new combine instruction. // DoubleRegDest = combine HiReg, LoReg - BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::COMBINE_rr), DoubleDestReg) + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combinew), DoubleDestReg) .addReg(HiReg, HiRegKillFlag) .addReg(LoReg, LoRegKillFlag); } diff --git a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp index 8ef4c3a..8176598 100644 --- a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp +++ b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp @@ -20,7 +20,6 @@ #include "Hexagon.h" #include "HexagonMachineFunctionInfo.h" #include "HexagonSubtarget.h" -#include "HexagonTargetMachine.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" #include "llvm/CodeGen/MachineDominators.h" @@ -49,13 +48,9 @@ namespace llvm { namespace { class HexagonExpandPredSpillCode : public MachineFunctionPass { - const HexagonTargetMachine& QTM; - const HexagonSubtarget &QST; - public: static char ID; - HexagonExpandPredSpillCode(const HexagonTargetMachine& TM) : - MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) { + HexagonExpandPredSpillCode() : MachineFunctionPass(ID) { PassRegistry &Registry = *PassRegistry::getPassRegistry(); initializeHexagonExpandPredSpillCodePass(Registry); } @@ -72,7 +67,8 @@ char HexagonExpandPredSpillCode::ID = 0; bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) { - const HexagonInstrInfo *TII = QTM.getSubtargetImpl()->getInstrInfo(); + const HexagonSubtarget &QST = Fn.getSubtarget<HexagonSubtarget>(); + const HexagonInstrInfo *TII = QST.getInstrInfo(); // Loop over all of the basic blocks. for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); @@ -86,45 +82,43 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) { if (Opc == Hexagon::STriw_pred) { // STriw_pred [R30], ofst, SrcReg; unsigned FP = MI->getOperand(0).getReg(); - assert( - FP == - QTM.getSubtargetImpl()->getRegisterInfo()->getFrameRegister() && - "Not a Frame Pointer, Nor a Spill Slot"); + assert(FP == QST.getRegisterInfo()->getFrameRegister() && + "Not a Frame Pointer, Nor a Spill Slot"); assert(MI->getOperand(1).isImm() && "Not an offset"); int Offset = MI->getOperand(1).getImm(); int SrcReg = MI->getOperand(2).getReg(); assert(Hexagon::PredRegsRegClass.contains(SrcReg) && "Not a predicate register"); - if (!TII->isValidOffset(Hexagon::STriw_indexed, Offset)) { - if (!TII->isValidOffset(Hexagon::ADD_ri, Offset)) { + if (!TII->isValidOffset(Hexagon::S2_storeri_io, Offset)) { + if (!TII->isValidOffset(Hexagon::A2_addi, Offset)) { BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::CONST32_Int_Real), HEXAGON_RESERVED_REG_1).addImm(Offset); BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_add), HEXAGON_RESERVED_REG_1) .addReg(FP).addReg(HEXAGON_RESERVED_REG_1); - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd), + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrpr), HEXAGON_RESERVED_REG_2).addReg(SrcReg); BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::STriw_indexed)) + TII->get(Hexagon::S2_storeri_io)) .addReg(HEXAGON_RESERVED_REG_1) .addImm(0).addReg(HEXAGON_RESERVED_REG_2); } else { - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_ri), + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_addi), HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset); - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd), + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrpr), HEXAGON_RESERVED_REG_2).addReg(SrcReg); BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::STriw_indexed)) + TII->get(Hexagon::S2_storeri_io)) .addReg(HEXAGON_RESERVED_REG_1) .addImm(0) .addReg(HEXAGON_RESERVED_REG_2); } } else { - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd), + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrpr), HEXAGON_RESERVED_REG_2).addReg(SrcReg); BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::STriw_indexed)). + TII->get(Hexagon::S2_storeri_io)). addReg(FP).addImm(Offset).addReg(HEXAGON_RESERVED_REG_2); } MII = MBB->erase(MI); @@ -135,14 +129,12 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) { assert(Hexagon::PredRegsRegClass.contains(DstReg) && "Not a predicate register"); unsigned FP = MI->getOperand(1).getReg(); - assert( - FP == - QTM.getSubtargetImpl()->getRegisterInfo()->getFrameRegister() && - "Not a Frame Pointer, Nor a Spill Slot"); + assert(FP == QST.getRegisterInfo()->getFrameRegister() && + "Not a Frame Pointer, Nor a Spill Slot"); assert(MI->getOperand(2).isImm() && "Not an offset"); int Offset = MI->getOperand(2).getImm(); - if (!TII->isValidOffset(Hexagon::LDriw, Offset)) { - if (!TII->isValidOffset(Hexagon::ADD_ri, Offset)) { + if (!TII->isValidOffset(Hexagon::L2_loadri_io, Offset)) { + if (!TII->isValidOffset(Hexagon::A2_addi, Offset)) { BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::CONST32_Int_Real), HEXAGON_RESERVED_REG_1).addImm(Offset); @@ -150,26 +142,26 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) { HEXAGON_RESERVED_REG_1) .addReg(FP) .addReg(HEXAGON_RESERVED_REG_1); - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw), + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::L2_loadri_io), HEXAGON_RESERVED_REG_2) .addReg(HEXAGON_RESERVED_REG_1) .addImm(0); - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs), + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrrp), DstReg).addReg(HEXAGON_RESERVED_REG_2); } else { - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_ri), + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_addi), HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset); - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw), + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::L2_loadri_io), HEXAGON_RESERVED_REG_2) .addReg(HEXAGON_RESERVED_REG_1) .addImm(0); - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs), + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrrp), DstReg).addReg(HEXAGON_RESERVED_REG_2); } } else { - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw), + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::L2_loadri_io), HEXAGON_RESERVED_REG_2).addReg(FP).addImm(Offset); - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs), + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrrp), DstReg).addReg(HEXAGON_RESERVED_REG_2); } MII = MBB->erase(MI); @@ -200,6 +192,6 @@ void llvm::initializeHexagonExpandPredSpillCodePass(PassRegistry &Registry) { } FunctionPass* -llvm::createHexagonExpandPredSpillCode(const HexagonTargetMachine &TM) { - return new HexagonExpandPredSpillCode(TM); +llvm::createHexagonExpandPredSpillCode() { + return new HexagonExpandPredSpillCode(); } diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp index 5f9b927..e8d8f14 100644 --- a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp +++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp @@ -81,8 +81,8 @@ FunctionPass *llvm::createHexagonFixupHwLoops() { /// \brief Returns true if the instruction is a hardware loop instruction. static bool isHardwareLoop(const MachineInstr *MI) { - return MI->getOpcode() == Hexagon::LOOP0_r || - MI->getOpcode() == Hexagon::LOOP0_i; + return MI->getOpcode() == Hexagon::J2_loop0r || + MI->getOpcode() == Hexagon::J2_loop0i; } @@ -168,18 +168,18 @@ void HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF, // First, set the LC0 with the trip count. if (MII->getOperand(1).isReg()) { // Trip count is a register - BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0) + BuildMI(*MBB, MII, DL, TII->get(Hexagon::A2_tfrrcr), Hexagon::LC0) .addReg(MII->getOperand(1).getReg()); } else { // Trip count is an immediate. - BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFRI), Scratch) + BuildMI(*MBB, MII, DL, TII->get(Hexagon::A2_tfrsi), Scratch) .addImm(MII->getOperand(1).getImm()); - BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0) + BuildMI(*MBB, MII, DL, TII->get(Hexagon::A2_tfrrcr), Hexagon::LC0) .addReg(Scratch); } // Then, set the SA0 with the loop start address. BuildMI(*MBB, MII, DL, TII->get(Hexagon::CONST32_Label), Scratch) .addMBB(MII->getOperand(0).getMBB()); - BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::SA0) + BuildMI(*MBB, MII, DL, TII->get(Hexagon::A2_tfrrcr), Hexagon::SA0) .addReg(Scratch); } diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index 356f279..2b1992f 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -50,10 +50,8 @@ void HexagonFrameLowering::determineFrameLayout(MachineFunction &MF) const { unsigned FrameSize = MFI->getStackSize(); // Get the alignments provided by the target. - unsigned TargetAlign = MF.getTarget() - .getSubtargetImpl() - ->getFrameLowering() - ->getStackAlignment(); + unsigned TargetAlign = + MF.getSubtarget().getFrameLowering()->getStackAlignment(); // Get the maximum call frame size of all the calls. unsigned maxCallFrameSize = MFI->getMaxCallFrameSize(); @@ -80,8 +78,8 @@ void HexagonFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineBasicBlock::iterator MBBI = MBB.begin(); - const HexagonRegisterInfo *QRI = static_cast<const HexagonRegisterInfo *>( - MF.getSubtarget().getRegisterInfo()); + const HexagonRegisterInfo *QRI = + MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); determineFrameLayout(MF); @@ -122,17 +120,17 @@ void HexagonFrameLowering::emitPrologue(MachineFunction &MF) const { if (NumBytes >= ALLOCFRAME_MAX) { // Emit allocframe(#0). - BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::ALLOCFRAME)).addImm(0); + BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::S2_allocframe)).addImm(0); // Subtract offset from frame pointer. BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::CONST32_Int_Real), HEXAGON_RESERVED_REG_1).addImm(NumBytes); - BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::SUB_rr), + BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::A2_sub), QRI->getStackRegister()). addReg(QRI->getStackRegister()). addReg(HEXAGON_RESERVED_REG_1); } else { - BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::ALLOCFRAME)).addImm(NumBytes); + BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::S2_allocframe)).addImm(NumBytes); } } } @@ -161,15 +159,14 @@ void HexagonFrameLowering::emitEpilogue(MachineFunction &MF, // Handle EH_RETURN. if (MBBI->getOpcode() == Hexagon::EH_RETURN_JMPR) { assert(MBBI->getOperand(0).isReg() && "Offset should be in register!"); - BuildMI(MBB, MBBI, dl, TII.get(Hexagon::DEALLOCFRAME)); + BuildMI(MBB, MBBI, dl, TII.get(Hexagon::L2_deallocframe)); BuildMI(MBB, MBBI, dl, TII.get(Hexagon::A2_add), Hexagon::R29).addReg(Hexagon::R29).addReg(Hexagon::R28); return; } // Replace 'jumpr r31' instruction with dealloc_return for V4 and higher // versions. - if (MF.getTarget().getSubtarget<HexagonSubtarget>().hasV4TOps() && - MBBI->getOpcode() == Hexagon::JMPret && !DisableDeallocRet) { + if (MBBI->getOpcode() == Hexagon::JMPret && !DisableDeallocRet) { // Check for RESTORE_DEALLOC_RET_JMP_V4 call. Don't emit an extra DEALLOC // instruction if we encounter it. MachineBasicBlock::iterator BeforeJMPR = @@ -183,7 +180,7 @@ void HexagonFrameLowering::emitEpilogue(MachineFunction &MF, // Add dealloc_return. MachineInstrBuilder MIB = - BuildMI(MBB, MBBI_end, dl, TII.get(Hexagon::DEALLOC_RET_V4)); + BuildMI(MBB, MBBI_end, dl, TII.get(Hexagon::L4_return)); // Transfer the function live-out registers. MIB->copyImplicitOps(*MBB.getParent(), &*MBBI); // Remove the JUMPR node. @@ -198,7 +195,7 @@ void HexagonFrameLowering::emitEpilogue(MachineFunction &MF, I->getOpcode() == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4) return; - BuildMI(MBB, MBBI, dl, TII.get(Hexagon::DEALLOCFRAME)); + BuildMI(MBB, MBBI, dl, TII.get(Hexagon::L2_deallocframe)); } } } diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp index e2062a3..1577c33 100644 --- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -28,7 +28,7 @@ #include "llvm/ADT/SmallSet.h" #include "Hexagon.h" -#include "HexagonTargetMachine.h" +#include "HexagonSubtarget.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" @@ -64,9 +64,7 @@ namespace { MachineLoopInfo *MLI; MachineRegisterInfo *MRI; MachineDominatorTree *MDT; - const HexagonTargetMachine *TM; const HexagonInstrInfo *TII; - const HexagonRegisterInfo *TRI; #ifndef NDEBUG static int Counter; #endif @@ -265,9 +263,7 @@ namespace { return Contents.ImmVal; } - void print(raw_ostream &OS, const TargetMachine *TM = nullptr) const { - const TargetRegisterInfo *TRI = - TM ? TM->getSubtargetImpl()->getRegisterInfo() : nullptr; + void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const { if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); } if (isImm()) { OS << Contents.ImmVal; } } @@ -285,8 +281,8 @@ INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops", /// \brief Returns true if the instruction is a hardware loop instruction. static bool isHardwareLoop(const MachineInstr *MI) { - return MI->getOpcode() == Hexagon::LOOP0_r || - MI->getOpcode() == Hexagon::LOOP0_i; + return MI->getOpcode() == Hexagon::J2_loop0r || + MI->getOpcode() == Hexagon::J2_loop0i; } FunctionPass *llvm::createHexagonHardwareLoops() { @@ -302,11 +298,7 @@ bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) { MLI = &getAnalysis<MachineLoopInfo>(); MRI = &MF.getRegInfo(); MDT = &getAnalysis<MachineDominatorTree>(); - TM = static_cast<const HexagonTargetMachine*>(&MF.getTarget()); - TII = static_cast<const HexagonInstrInfo *>( - TM->getSubtargetImpl()->getInstrInfo()); - TRI = static_cast<const HexagonRegisterInfo *>( - TM->getSubtargetImpl()->getRegisterInfo()); + TII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I) { @@ -357,7 +349,7 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L, unsigned PhiOpReg = Phi->getOperand(i).getReg(); MachineInstr *DI = MRI->getVRegDef(PhiOpReg); unsigned UpdOpc = DI->getOpcode(); - bool isAdd = (UpdOpc == Hexagon::ADD_ri); + bool isAdd = (UpdOpc == Hexagon::A2_addi); if (isAdd) { // If the register operand to the add is the PHI we're @@ -540,21 +532,21 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, return nullptr; switch (CondOpc) { - case Hexagon::CMPEQri: - case Hexagon::CMPEQrr: + case Hexagon::C2_cmpeqi: + case Hexagon::C2_cmpeq: Cmp = !Negated ? Comparison::EQ : Comparison::NE; break; - case Hexagon::CMPGTUri: - case Hexagon::CMPGTUrr: + case Hexagon::C2_cmpgtui: + case Hexagon::C2_cmpgtu: Cmp = !Negated ? Comparison::GTu : Comparison::LEu; break; - case Hexagon::CMPGTri: - case Hexagon::CMPGTrr: + case Hexagon::C2_cmpgti: + case Hexagon::C2_cmpgt: Cmp = !Negated ? Comparison::GTs : Comparison::LEs; break; // Very limited support for byte/halfword compares. - case Hexagon::CMPbEQri_V4: - case Hexagon::CMPhEQri_V4: { + case Hexagon::A4_cmpbeqi: + case Hexagon::A4_cmpheqi: { if (IVBump != 1) return nullptr; @@ -574,7 +566,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, } if (InitV >= EndV) return nullptr; - if (CondOpc == Hexagon::CMPbEQri_V4) { + if (CondOpc == Hexagon::A4_cmpbeqi) { if (!isInt<8>(InitV) || !isInt<8>(EndV)) return nullptr; } else { // Hexagon::CMPhEQri_V4 @@ -626,12 +618,12 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, // If so, use the immediate value rather than the register. if (Start->isReg()) { const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg()); - if (StartValInstr && StartValInstr->getOpcode() == Hexagon::TFRI) + if (StartValInstr && StartValInstr->getOpcode() == Hexagon::A2_tfrsi) Start = &StartValInstr->getOperand(1); } if (End->isReg()) { const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg()); - if (EndValInstr && EndValInstr->getOpcode() == Hexagon::TFRI) + if (EndValInstr && EndValInstr->getOpcode() == Hexagon::A2_tfrsi) End = &EndValInstr->getOperand(1); } @@ -781,9 +773,9 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, DistR = End->getReg(); DistSR = End->getSubReg(); } else { - const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::SUB_rr) : - (RegToImm ? TII->get(Hexagon::SUB_ri) : - TII->get(Hexagon::ADD_ri)); + const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) : + (RegToImm ? TII->get(Hexagon::A2_subri) : + TII->get(Hexagon::A2_addi)); unsigned SubR = MRI->createVirtualRegister(IntRC); MachineInstrBuilder SubIB = BuildMI(*PH, InsertPos, DL, SubD, SubR); @@ -811,7 +803,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, } else { // Generate CountR = ADD DistR, AdjVal unsigned AddR = MRI->createVirtualRegister(IntRC); - const MCInstrDesc &AddD = TII->get(Hexagon::ADD_ri); + MCInstrDesc const &AddD = TII->get(Hexagon::A2_addi); BuildMI(*PH, InsertPos, DL, AddD, AddR) .addReg(DistR, 0, DistSR) .addImm(AdjV); @@ -832,7 +824,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, // Generate NormR = LSR DistR, Shift. unsigned LsrR = MRI->createVirtualRegister(IntRC); - const MCInstrDesc &LsrD = TII->get(Hexagon::LSR_ri); + const MCInstrDesc &LsrD = TII->get(Hexagon::S2_lsr_i_r); BuildMI(*PH, InsertPos, DL, LsrD, LsrR) .addReg(AdjR, 0, AdjSR) .addImm(Shift); @@ -1086,7 +1078,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg) .addReg(TripCount->getReg(), 0, TripCount->getSubReg()); // Add the Loop instruction to the beginning of the loop. - BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_r)) + BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::J2_loop0r)) .addMBB(LoopStart) .addReg(CountReg); } else { @@ -1095,14 +1087,14 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { // if the immediate fits in the instructions. Otherwise, we need to // create a new virtual register. int64_t CountImm = TripCount->getImm(); - if (!TII->isValidOffset(Hexagon::LOOP0_i, CountImm)) { + if (!TII->isValidOffset(Hexagon::J2_loop0i, CountImm)) { unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); - BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::TFRI), CountReg) + BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg) .addImm(CountImm); - BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_r)) + BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::J2_loop0r)) .addMBB(LoopStart).addReg(CountReg); } else - BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_i)) + BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::J2_loop0i)) .addMBB(LoopStart).addImm(CountImm); } @@ -1122,8 +1114,8 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { // The loop ends with either: // - a conditional branch followed by an unconditional branch, or // - a conditional branch to the loop start. - if (LastI->getOpcode() == Hexagon::JMP_t || - LastI->getOpcode() == Hexagon::JMP_f) { + if (LastI->getOpcode() == Hexagon::J2_jumpt || + LastI->getOpcode() == Hexagon::J2_jumpf) { // Delete one and change/add an uncond. branch to out of the loop. MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB(); LastI = LastMBB->erase(LastI); @@ -1194,8 +1186,8 @@ MachineInstr *HexagonHardwareLoops::defWithImmediate(unsigned R) { MachineInstr *DI = MRI->getVRegDef(R); unsigned DOpc = DI->getOpcode(); switch (DOpc) { - case Hexagon::TFRI: - case Hexagon::TFRI64: + case Hexagon::A2_tfrsi: + case Hexagon::A2_tfrpi: case Hexagon::CONST32_Int_Real: case Hexagon::CONST64_Int_Real: return DI; @@ -1277,7 +1269,7 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { unsigned PhiReg = Phi->getOperand(i).getReg(); MachineInstr *DI = MRI->getVRegDef(PhiReg); unsigned UpdOpc = DI->getOpcode(); - bool isAdd = (UpdOpc == Hexagon::ADD_ri); + bool isAdd = (UpdOpc == Hexagon::A2_addi); if (isAdd) { // If the register operand to the add/sub is the PHI we are looking diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index dc58c42..fb056b5 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -47,7 +47,7 @@ namespace { class HexagonDAGToDAGISel : public SelectionDAGISel { /// Subtarget - Keep a pointer to the Hexagon Subtarget around so that we can /// make the right decision when generating code for different targets. - const HexagonSubtarget &Subtarget; + const HexagonSubtarget *Subtarget; // Keep a reference to HexagonTargetMachine. const HexagonTargetMachine& TM; @@ -55,9 +55,7 @@ class HexagonDAGToDAGISel : public SelectionDAGISel { public: explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(targetmachine, OptLevel), - Subtarget(targetmachine.getSubtarget<HexagonSubtarget>()), - TM(targetmachine) { + : SelectionDAGISel(targetmachine, OptLevel), TM(targetmachine) { initializeHexagonDAGToDAGISelPass(*PassRegistry::getPassRegistry()); } bool hasNumUsesBelowThresGA(SDNode *N) const; @@ -79,10 +77,21 @@ public: bool SelectADDRriU6_1(SDValue& N, SDValue &R1, SDValue &R2); bool SelectADDRriU6_2(SDValue& N, SDValue &R1, SDValue &R2); + // Complex Pattern Selectors. + inline bool SelectAddrGA(SDValue &N, SDValue &R); + inline bool SelectAddrGP(SDValue &N, SDValue &R); + bool SelectGlobalAddress(SDValue &N, SDValue &R, bool UseGP); + bool SelectAddrFI(SDValue &N, SDValue &R); + const char *getPassName() const override { return "Hexagon DAG->DAG Pattern Instruction Selection"; } + bool runOnMachineFunction(MachineFunction &MF) override { + Subtarget = &MF.getSubtarget<HexagonSubtarget>(); + return SelectionDAGISel::runOnMachineFunction(MF); + } + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. bool SelectInlineAsmMemoryOperand(const SDValue &Op, @@ -138,9 +147,7 @@ SDValue XformMskToBitPosU3Imm(uint8_t Imm) { // Return true if there is exactly one bit set in V, i.e., if V is one of the // following integers: 2^0, 2^1, ..., 2^31. bool ImmIsSingleBit(uint32_t v) const { - uint32_t c = CountPopulation_64(v); - // Only return true if we counted 1 bit. - return c == 1; + return isPowerOf2_32(v); } // XformM5ToU5Imm - Return a target constant with the specified value, of type @@ -170,8 +177,21 @@ inline SDValue XformUToUM1Imm(unsigned Imm) { return CurDAG->getTargetConstant(Imm - 1, MVT::i32); } +// XformSToSM2Imm - Return a target constant decremented by 2. +inline SDValue XformSToSM2Imm(unsigned Imm) { + return CurDAG->getTargetConstant(Imm - 2, MVT::i32); +} + +// XformSToSM3Imm - Return a target constant decremented by 3. +inline SDValue XformSToSM3Imm(unsigned Imm) { + return CurDAG->getTargetConstant(Imm - 3, MVT::i32); +} + // Include the pieces autogenerated from the target description. #include "HexagonGenDAGISel.inc" + +private: + bool isValueExtension(SDValue const &Val, unsigned FromBits, SDValue &Src); }; } // end anonymous namespace @@ -312,56 +332,6 @@ static unsigned doesIntrinsicReturnPredicate(unsigned ID) } } - -// Intrinsics that have predicate operands. -static unsigned doesIntrinsicContainPredicate(unsigned ID) -{ - switch (ID) { - default: - return 0; - case Intrinsic::hexagon_C2_tfrpr: - return Hexagon::TFR_RsPd; - case Intrinsic::hexagon_C2_and: - return Hexagon::AND_pp; - case Intrinsic::hexagon_C2_xor: - return Hexagon::XOR_pp; - case Intrinsic::hexagon_C2_or: - return Hexagon::OR_pp; - case Intrinsic::hexagon_C2_not: - return Hexagon::NOT_p; - case Intrinsic::hexagon_C2_any8: - return Hexagon::ANY_pp; - case Intrinsic::hexagon_C2_all8: - return Hexagon::ALL_pp; - case Intrinsic::hexagon_C2_vitpack: - return Hexagon::VITPACK_pp; - case Intrinsic::hexagon_C2_mask: - return Hexagon::MASK_p; - case Intrinsic::hexagon_C2_mux: - return Hexagon::MUX_rr; - - // Mapping hexagon_C2_muxir to MUX_pri. This is pretty weird - but - // that's how it's mapped in q6protos.h. - case Intrinsic::hexagon_C2_muxir: - return Hexagon::MUX_ri; - - // Mapping hexagon_C2_muxri to MUX_pir. This is pretty weird - but - // that's how it's mapped in q6protos.h. - case Intrinsic::hexagon_C2_muxri: - return Hexagon::MUX_ir; - - case Intrinsic::hexagon_C2_muxii: - return Hexagon::MUX_ii; - case Intrinsic::hexagon_C2_vmux: - return Hexagon::VMUX_prr64; - case Intrinsic::hexagon_S2_valignrb: - return Hexagon::VALIGN_rrp; - case Intrinsic::hexagon_S2_vsplicerb: - return Hexagon::VSPLICE_rrp; - } -} - - static bool OffsetFitsS11(EVT MemType, int64_t Offset) { if (MemType == MVT::i64 && isShiftedInt<11,3>(Offset)) { return true; @@ -404,10 +374,10 @@ SDNode *HexagonDAGToDAGISel::SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl) { dl, PointerTy, TargAddr); // Figure out base + offset opcode - if (LoadedVT == MVT::i64) Opcode = Hexagon::LDrid_indexed; - else if (LoadedVT == MVT::i32) Opcode = Hexagon::LDriw_indexed; - else if (LoadedVT == MVT::i16) Opcode = Hexagon::LDrih_indexed; - else if (LoadedVT == MVT::i8) Opcode = Hexagon::LDrib_indexed; + if (LoadedVT == MVT::i64) Opcode = Hexagon::L2_loadrd_io; + else if (LoadedVT == MVT::i32) Opcode = Hexagon::L2_loadri_io; + else if (LoadedVT == MVT::i16) Opcode = Hexagon::L2_loadrh_io; + else if (LoadedVT == MVT::i8) Opcode = Hexagon::L2_loadrb_io; else llvm_unreachable("unknown memory type"); // Build indexed load. @@ -446,14 +416,13 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD, if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) && N1.getNode()->getValueType(0) == MVT::i32) { - const HexagonInstrInfo *TII = static_cast<const HexagonInstrInfo *>( - TM.getSubtargetImpl()->getInstrInfo()); + const HexagonInstrInfo *TII = Subtarget->getInstrInfo(); if (TII->isValidAutoIncImm(LoadedVT, Val)) { SDValue TargetConst = CurDAG->getTargetConstant(Val, MVT::i32); SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32, MVT::Other, Base, TargetConst, Chain); - SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::SXTW, dl, MVT::i64, + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64, SDValue(Result_1, 0)); MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = LD->getMemOperand(); @@ -474,9 +443,9 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD, SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Other, Base, TargetConst0, Chain); - SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::SXTW, dl, + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64, SDValue(Result_1, 0)); - SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, + SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32, Base, TargetConstVal, SDValue(Result_1, 1)); MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); @@ -513,17 +482,16 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD, if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) && N1.getNode()->getValueType(0) == MVT::i32) { - const HexagonInstrInfo *TII = static_cast<const HexagonInstrInfo *>( - TM.getSubtargetImpl()->getInstrInfo()); + const HexagonInstrInfo *TII = Subtarget->getInstrInfo(); if (TII->isValidAutoIncImm(LoadedVT, Val)) { SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32, MVT::Other, Base, TargetConstVal, Chain); - SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32, + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, TargetConst0); - SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl, + SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl, MVT::i64, MVT::Other, SDValue(Result_2,0), SDValue(Result_1,0)); @@ -548,14 +516,14 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD, SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Other, Base, TargetConst0, Chain); - SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32, + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, TargetConst0); - SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl, + SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl, MVT::i64, MVT::Other, SDValue(Result_2,0), SDValue(Result_1,0)); // Add offset to base. - SDNode* Result_4 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32, + SDNode* Result_4 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32, Base, TargetConstVal, SDValue(Result_1, 1)); MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); @@ -591,28 +559,27 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) { bool zextval = (LD->getExtensionType() == ISD::ZEXTLOAD); // Figure out the opcode. - const HexagonInstrInfo *TII = static_cast<const HexagonInstrInfo *>( - TM.getSubtargetImpl()->getInstrInfo()); + const HexagonInstrInfo *TII = Subtarget->getInstrInfo(); if (LoadedVT == MVT::i64) { if (TII->isValidAutoIncImm(LoadedVT, Val)) - Opcode = Hexagon::POST_LDrid; + Opcode = Hexagon::L2_loadrd_pi; else - Opcode = Hexagon::LDrid; + Opcode = Hexagon::L2_loadrd_io; } else if (LoadedVT == MVT::i32) { if (TII->isValidAutoIncImm(LoadedVT, Val)) - Opcode = Hexagon::POST_LDriw; + Opcode = Hexagon::L2_loadri_pi; else - Opcode = Hexagon::LDriw; + Opcode = Hexagon::L2_loadri_io; } else if (LoadedVT == MVT::i16) { if (TII->isValidAutoIncImm(LoadedVT, Val)) - Opcode = zextval ? Hexagon::POST_LDriuh : Hexagon::POST_LDrih; + Opcode = zextval ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadrh_pi; else - Opcode = zextval ? Hexagon::LDriuh : Hexagon::LDrih; + Opcode = zextval ? Hexagon::L2_loadruh_io : Hexagon::L2_loadrh_io; } else if (LoadedVT == MVT::i8) { if (TII->isValidAutoIncImm(LoadedVT, Val)) - Opcode = zextval ? Hexagon::POST_LDriub : Hexagon::POST_LDrib; + Opcode = zextval ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrb_pi; else - Opcode = zextval ? Hexagon::LDriub : Hexagon::LDrib; + Opcode = zextval ? Hexagon::L2_loadrub_io : Hexagon::L2_loadrb_io; } else llvm_unreachable("unknown memory type"); @@ -652,7 +619,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) { LD->getValueType(0), MVT::Other, Base, TargetConst0, Chain); - SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32, + SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32, Base, TargetConstVal, SDValue(Result_1, 1)); MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); @@ -701,18 +668,17 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) { // Offset value must be within representable range // and must have correct alignment properties. - const HexagonInstrInfo *TII = static_cast<const HexagonInstrInfo *>( - TM.getSubtargetImpl()->getInstrInfo()); + const HexagonInstrInfo *TII = Subtarget->getInstrInfo(); if (TII->isValidAutoIncImm(StoredVT, Val)) { SDValue Ops[] = {Base, CurDAG->getTargetConstant(Val, MVT::i32), Value, Chain}; unsigned Opcode = 0; // Figure out the post inc version of opcode. - if (StoredVT == MVT::i64) Opcode = Hexagon::POST_STdri; - else if (StoredVT == MVT::i32) Opcode = Hexagon::POST_STwri; - else if (StoredVT == MVT::i16) Opcode = Hexagon::POST_SThri; - else if (StoredVT == MVT::i8) Opcode = Hexagon::POST_STbri; + if (StoredVT == MVT::i64) Opcode = Hexagon::S2_storerd_pi; + else if (StoredVT == MVT::i32) Opcode = Hexagon::S2_storeri_pi; + else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_pi; + else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_pi; else llvm_unreachable("unknown memory type"); // Build post increment store. @@ -735,17 +701,17 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) { unsigned Opcode = 0; // Figure out the opcode. - if (StoredVT == MVT::i64) Opcode = Hexagon::STrid; - else if (StoredVT == MVT::i32) Opcode = Hexagon::STriw_indexed; - else if (StoredVT == MVT::i16) Opcode = Hexagon::STrih; - else if (StoredVT == MVT::i8) Opcode = Hexagon::STrib; + if (StoredVT == MVT::i64) Opcode = Hexagon::S2_storerd_io; + else if (StoredVT == MVT::i32) Opcode = Hexagon::S2_storeri_io; + else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_io; + else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_io; else llvm_unreachable("unknown memory type"); // Build regular store. SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); // Build splitted incriment instruction. - SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32, + SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32, Base, TargetConstVal, SDValue(Result_1, 0)); @@ -788,10 +754,10 @@ SDNode *HexagonDAGToDAGISel::SelectBaseOffsetStore(StoreSDNode *ST, TargAddr); // Figure out base + offset opcode - if (StoredVT == MVT::i64) Opcode = Hexagon::STrid_indexed; - else if (StoredVT == MVT::i32) Opcode = Hexagon::STriw_indexed; - else if (StoredVT == MVT::i16) Opcode = Hexagon::STrih_indexed; - else if (StoredVT == MVT::i8) Opcode = Hexagon::STrib_indexed; + if (StoredVT == MVT::i64) Opcode = Hexagon::S2_storerd_io; + else if (StoredVT == MVT::i32) Opcode = Hexagon::S2_storeri_io; + else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_io; + else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_io; else llvm_unreachable("unknown memory type"); SDValue Ops[] = {SDValue(NewBase,0), @@ -865,7 +831,7 @@ SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) { SDValue Chain = LD->getChain(); SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); - OP0 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32, + OP0 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32, MVT::Other, LD->getBasePtr(), TargetConst0, Chain), 0); @@ -891,7 +857,7 @@ SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) { SDValue Chain = LD->getChain(); SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); - OP1 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32, + OP1 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32, MVT::Other, LD->getBasePtr(), TargetConst0, Chain), 0); @@ -900,7 +866,7 @@ SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) { } // Generate a mpy instruction. - SDNode *Result = CurDAG->getMachineNode(Hexagon::MPY64, dl, MVT::i64, + SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_dpmpyss_s0, dl, MVT::i64, OP0, OP1); ReplaceUses(N, Result); return Result; @@ -934,9 +900,9 @@ SDNode *HexagonDAGToDAGISel::SelectSelect(SDNode *N) { if (N000 == N2 && N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 && N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) { - SDNode *SextNode = CurDAG->getMachineNode(Hexagon::SXTH, dl, + SDNode *SextNode = CurDAG->getMachineNode(Hexagon::A2_sxth, dl, MVT::i32, N000); - SDNode *Result = CurDAG->getMachineNode(Hexagon::MAXw_rr, dl, + SDNode *Result = CurDAG->getMachineNode(Hexagon::A2_max, dl, MVT::i32, SDValue(SextNode, 0), N1); @@ -958,9 +924,9 @@ SDNode *HexagonDAGToDAGISel::SelectSelect(SDNode *N) { if (N000 == N2 && N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 && N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) { - SDNode *SextNode = CurDAG->getMachineNode(Hexagon::SXTH, dl, + SDNode *SextNode = CurDAG->getMachineNode(Hexagon::A2_sxth, dl, MVT::i32, N000); - SDNode *Result = CurDAG->getMachineNode(Hexagon::MINw_rr, dl, + SDNode *Result = CurDAG->getMachineNode(Hexagon::A2_min, dl, MVT::i32, SDValue(SextNode, 0), N1); @@ -1045,7 +1011,7 @@ SDNode *HexagonDAGToDAGISel::SelectTruncate(SDNode *N) { SDValue Chain = LD->getChain(); SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); - OP0 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32, + OP0 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32, MVT::Other, LD->getBasePtr(), TargetConst0, Chain), 0); @@ -1070,7 +1036,7 @@ SDNode *HexagonDAGToDAGISel::SelectTruncate(SDNode *N) { SDValue Chain = LD->getChain(); SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); - OP1 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32, + OP1 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32, MVT::Other, LD->getBasePtr(), TargetConst0, Chain), 0); @@ -1079,7 +1045,7 @@ SDNode *HexagonDAGToDAGISel::SelectTruncate(SDNode *N) { } // Generate a mpy instruction. - SDNode *Result = CurDAG->getMachineNode(Hexagon::MPY, dl, MVT::i32, + SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpy_up, dl, MVT::i32, OP0, OP1); ReplaceUses(N, Result); return Result; @@ -1112,7 +1078,7 @@ SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) { if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val.getNode())) if (isInt<9>(CN->getSExtValue())) { SDNode* Result = - CurDAG->getMachineNode(Hexagon::MPYI_ri, dl, + CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl, MVT::i32, Mul_0, Val); ReplaceUses(N, Result); return Result; @@ -1140,7 +1106,7 @@ SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) { dyn_cast<ConstantSDNode>(Val.getNode())) if (isInt<9>(CN->getSExtValue())) { SDNode* Result = - CurDAG->getMachineNode(Hexagon::MPYI_ri, dl, MVT::i32, + CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl, MVT::i32, Shl2_0, Val); ReplaceUses(N, Result); return Result; @@ -1177,13 +1143,13 @@ SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) { if (N->getValueType(0) == MVT::i64) { // Convert the zero_extend to Rs = Pd followed by COMBINE_rr(0,Rs). SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); - SDNode *Result_1 = CurDAG->getMachineNode(Hexagon::TFR_RsPd, dl, + SDNode *Result_1 = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl, MVT::i32, SDValue(IsIntrinsic, 0)); - SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl, + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, TargetConst0); - SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl, + SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl, MVT::i64, MVT::Other, SDValue(Result_2, 0), SDValue(Result_1, 0)); @@ -1192,7 +1158,7 @@ SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) { } if (N->getValueType(0) == MVT::i32) { // Convert the zero_extend to Rs = Pd - SDNode* RsPd = CurDAG->getMachineNode(Hexagon::TFR_RsPd, dl, + SDNode* RsPd = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl, MVT::i32, SDValue(IsIntrinsic, 0)); ReplaceUses(N, RsPd); @@ -1204,56 +1170,30 @@ SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) { return SelectCode(N); } - // // Checking for intrinsics which have predicate registers as operand(s) // and lowering to the actual intrinsic. // SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) { - SDLoc dl(N); - unsigned ID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); - unsigned IntrinsicWithPred = doesIntrinsicContainPredicate(ID); - - // We are concerned with only those intrinsics that have predicate registers - // as at least one of the operands. - if (IntrinsicWithPred) { - SmallVector<SDValue, 8> Ops; - const HexagonInstrInfo *TII = static_cast<const HexagonInstrInfo *>( - TM.getSubtargetImpl()->getInstrInfo()); - const MCInstrDesc &MCID = TII->get(IntrinsicWithPred); - const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo(); - - // Iterate over all the operands of the intrinsics. - // For PredRegs, do the transfer. - // For Double/Int Regs, just preserve the value - // For immediates, lower it. - for (unsigned i = 1; i < N->getNumOperands(); ++i) { - SDNode *Arg = N->getOperand(i).getNode(); - const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI, *MF); - - if (RC == &Hexagon::IntRegsRegClass || - RC == &Hexagon::DoubleRegsRegClass) { - Ops.push_back(SDValue(Arg, 0)); - } else if (RC == &Hexagon::PredRegsRegClass) { - // Do the transfer. - SDNode *PdRs = CurDAG->getMachineNode(Hexagon::TFR_PdRs, dl, MVT::i1, - SDValue(Arg, 0)); - Ops.push_back(SDValue(PdRs,0)); - } else if (!RC && (dyn_cast<ConstantSDNode>(Arg) != nullptr)) { - // This is immediate operand. Lower it here making sure that we DO have - // const SDNode for immediate value. - int32_t Val = cast<ConstantSDNode>(Arg)->getSExtValue(); - SDValue SDVal = CurDAG->getTargetConstant(Val, MVT::i32); - Ops.push_back(SDVal); - } else { - llvm_unreachable("Unimplemented"); - } - } - EVT ReturnValueVT = N->getValueType(0); - SDNode *Result = CurDAG->getMachineNode(IntrinsicWithPred, dl, - ReturnValueVT, Ops); - ReplaceUses(N, Result); - return Result; + unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned Bits; + switch (IID) { + case Intrinsic::hexagon_S2_vsplatrb: + Bits = 8; + break; + case Intrinsic::hexagon_S2_vsplatrh: + Bits = 16; + break; + default: + return SelectCode(N); + } + + SDValue const &V = N->getOperand(1); + SDValue U; + if (isValueExtension(V, Bits, U)) { + SDValue R = CurDAG->getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), + N->getOperand(0), U); + return SelectCode(R.getNode()); } return SelectCode(N); } @@ -1289,19 +1229,19 @@ SDNode *HexagonDAGToDAGISel::SelectConstant(SDNode *N) { if (Val == -1) { // Create the IntReg = 1 node. SDNode* IntRegTFR = - CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32, + CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, CurDAG->getTargetConstant(0, MVT::i32)); // Pd = IntReg - SDNode* Pd = CurDAG->getMachineNode(Hexagon::TFR_PdRs, dl, MVT::i1, + SDNode* Pd = CurDAG->getMachineNode(Hexagon::C2_tfrrp, dl, MVT::i1, SDValue(IntRegTFR, 0)); // not(Pd) - SDNode* NotPd = CurDAG->getMachineNode(Hexagon::NOT_p, dl, MVT::i1, + SDNode* NotPd = CurDAG->getMachineNode(Hexagon::C2_not, dl, MVT::i1, SDValue(Pd, 0)); // xor(not(Pd)) - Result = CurDAG->getMachineNode(Hexagon::XOR_pp, dl, MVT::i1, + Result = CurDAG->getMachineNode(Hexagon::C2_xor, dl, MVT::i1, SDValue(Pd, 0), SDValue(NotPd, 0)); // We have just built: @@ -1334,7 +1274,7 @@ SDNode *HexagonDAGToDAGISel::SelectAdd(SDNode *N) { // Build Rd = Rd' + asr(Rs, Rt). The machine constraints will ensure that // Rd and Rd' are assigned to the same register - SDNode* Result = CurDAG->getMachineNode(Hexagon::ASR_ADD_rr, dl, MVT::i32, + SDNode* Result = CurDAG->getMachineNode(Hexagon::S2_asr_r_r_acc, dl, MVT::i32, N->getOperand(1), Src1->getOperand(0), Src1->getOperand(1)); @@ -1683,3 +1623,126 @@ bool HexagonDAGToDAGISel::foldGlobalAddressImpl(SDValue &N, SDValue &R, } return false; } + +bool HexagonDAGToDAGISel::SelectAddrFI(SDValue& N, SDValue &R) { + if (N.getOpcode() != ISD::FrameIndex) + return false; + FrameIndexSDNode *FX = cast<FrameIndexSDNode>(N); + R = CurDAG->getTargetFrameIndex(FX->getIndex(), MVT::i32); + return true; +} + +inline bool HexagonDAGToDAGISel::SelectAddrGA(SDValue &N, SDValue &R) { + return SelectGlobalAddress(N, R, false); +} + +inline bool HexagonDAGToDAGISel::SelectAddrGP(SDValue &N, SDValue &R) { + return SelectGlobalAddress(N, R, true); +} + +bool HexagonDAGToDAGISel::SelectGlobalAddress(SDValue &N, SDValue &R, + bool UseGP) { + switch (N.getOpcode()) { + case ISD::ADD: { + SDValue N0 = N.getOperand(0); + SDValue N1 = N.getOperand(1); + unsigned GAOpc = N0.getOpcode(); + if (UseGP && GAOpc != HexagonISD::CONST32_GP) + return false; + if (!UseGP && GAOpc != HexagonISD::CONST32) + return false; + if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1)) { + SDValue Addr = N0.getOperand(0); + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Addr)) { + if (GA->getOpcode() == ISD::TargetGlobalAddress) { + uint64_t NewOff = GA->getOffset() + (uint64_t)Const->getSExtValue(); + R = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(Const), + N.getValueType(), NewOff); + return true; + } + } + } + break; + } + case HexagonISD::CONST32: + // The operand(0) of CONST32 is TargetGlobalAddress, which is what we + // want in the instruction. + if (!UseGP) + R = N.getOperand(0); + return !UseGP; + case HexagonISD::CONST32_GP: + if (UseGP) + R = N.getOperand(0); + return UseGP; + default: + return false; + } + + return false; +} + +bool HexagonDAGToDAGISel::isValueExtension(SDValue const &Val, + unsigned FromBits, SDValue &Src) { + unsigned Opc = Val.getOpcode(); + switch (Opc) { + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: { + SDValue const &Op0 = Val.getOperand(0); + EVT T = Op0.getValueType(); + if (T.isInteger() && T.getSizeInBits() == FromBits) { + Src = Op0; + return true; + } + break; + } + case ISD::SIGN_EXTEND_INREG: + case ISD::AssertSext: + case ISD::AssertZext: + if (Val.getOperand(0).getValueType().isInteger()) { + VTSDNode *T = cast<VTSDNode>(Val.getOperand(1)); + if (T->getVT().getSizeInBits() == FromBits) { + Src = Val.getOperand(0); + return true; + } + } + break; + case ISD::AND: { + // Check if this is an AND with "FromBits" of lower bits set to 1. + uint64_t FromMask = (1 << FromBits) - 1; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) { + if (C->getZExtValue() == FromMask) { + Src = Val.getOperand(1); + return true; + } + } + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) { + if (C->getZExtValue() == FromMask) { + Src = Val.getOperand(0); + return true; + } + } + break; + } + case ISD::OR: + case ISD::XOR: { + // OR/XOR with the lower "FromBits" bits set to 0. + uint64_t FromMask = (1 << FromBits) - 1; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) { + if ((C->getZExtValue() & FromMask) == 0) { + Src = Val.getOperand(1); + return true; + } + } + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) { + if ((C->getZExtValue() & FromMask) == 0) { + Src = Val.getOperand(0); + return true; + } + } + } + default: + break; + } + return false; +} diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 7646088..0072994 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -188,7 +188,7 @@ static bool CC_Hexagon32(unsigned ValNo, MVT ValVT, Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, Hexagon::R5 }; - if (unsigned Reg = State.AllocateReg(RegList, 6)) { + if (unsigned Reg = State.AllocateReg(RegList)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -213,7 +213,7 @@ static bool CC_Hexagon64(unsigned ValNo, MVT ValVT, static const MCPhysReg RegList2[] = { Hexagon::R1, Hexagon::R3 }; - if (unsigned Reg = State.AllocateReg(RegList1, RegList2, 2)) { + if (unsigned Reg = State.AllocateReg(RegList1, RegList2)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -404,6 +404,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool &isTailCall = CLI.IsTailCall; CallingConv::ID CallConv = CLI.CallConv; bool isVarArg = CLI.IsVarArg; + bool doesNotReturn = CLI.DoesNotReturn; bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); @@ -462,8 +463,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass; SmallVector<SDValue, 8> MemOpChains; - const HexagonRegisterInfo *QRI = static_cast<const HexagonRegisterInfo *>( - DAG.getSubtarget().getRegisterInfo()); + const HexagonRegisterInfo *QRI = Subtarget->getRegisterInfo(); SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, QRI->getStackRegister(), getPointerTy()); @@ -597,7 +597,8 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (isTailCall) return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, Ops); - Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, Ops); + int OpCode = doesNotReturn ? HexagonISD::CALLv3nr : HexagonISD::CALLv3; + Chain = DAG.getNode(OpCode, dl, NodeTys, Ops); InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. @@ -720,9 +721,7 @@ SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op, cast<RegisterSDNode>(Node->getOperand(i))->getReg(); // Check it to be lr - const HexagonRegisterInfo *QRI = - static_cast<const HexagonRegisterInfo *>( - DAG.getSubtarget().getRegisterInfo()); + const HexagonRegisterInfo *QRI = Subtarget->getRegisterInfo(); if (Reg == QRI->getRARegister()) { FuncInfo->setHasClobberLR(true); break; @@ -815,8 +814,7 @@ HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, // The Sub result contains the new stack start address, so it // must be placed in the stack pointer register. - const HexagonRegisterInfo *QRI = static_cast<const HexagonRegisterInfo *>( - DAG.getSubtarget().getRegisterInfo()); + const HexagonRegisterInfo *QRI = Subtarget->getRegisterInfo(); SDValue CopyChain = DAG.getCopyToReg(Chain, dl, QRI->getStackRegister(), Sub); SDValue Ops[2] = { ArgAdjust, CopyChain }; @@ -875,7 +873,7 @@ const { RegInfo.createVirtualRegister(&Hexagon::IntRegsRegClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); - } else if (RegVT == MVT::i64) { + } else if (RegVT == MVT::i64 || RegVT == MVT::f64) { unsigned VReg = RegInfo.createVirtualRegister(&Hexagon::DoubleRegsRegClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); @@ -963,7 +961,7 @@ HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { SDValue HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { - const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setReturnAddressIsTaken(true); @@ -989,8 +987,7 @@ HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { SDValue HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { - const HexagonRegisterInfo *TRI = static_cast<const HexagonRegisterInfo *>( - DAG.getSubtarget().getRegisterInfo()); + const HexagonRegisterInfo *TRI = Subtarget->getRegisterInfo(); MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); MFI->setFrameAddressIsTaken(true); @@ -1021,9 +1018,10 @@ SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SDLoc dl(Op); Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset); - const HexagonTargetObjectFile &TLOF = - static_cast<const HexagonTargetObjectFile &>(getObjFileLowering()); - if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) { + const HexagonTargetObjectFile *TLOF = + static_cast<const HexagonTargetObjectFile *>( + getTargetMachine().getObjFileLowering()); + if (TLOF->IsGlobalInSmallSection(GV, getTargetMachine())) { return DAG.getNode(HexagonISD::CONST32_GP, dl, getPointerTy(), Result); } @@ -1042,24 +1040,22 @@ HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { // TargetLowering Implementation //===----------------------------------------------------------------------===// -HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &targetmachine) - : TargetLowering(targetmachine), - TM(targetmachine) { - - const HexagonSubtarget &Subtarget = TM.getSubtarget<HexagonSubtarget>(); +HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, + const HexagonSubtarget &STI) + : TargetLowering(TM), Subtarget(&STI) { // Set up the register classes. addRegisterClass(MVT::i32, &Hexagon::IntRegsRegClass); addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass); - if (Subtarget.hasV5TOps()) { + if (Subtarget->hasV5TOps()) { addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass); addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass); } addRegisterClass(MVT::i1, &Hexagon::PredRegsRegClass); - computeRegisterProperties(); + computeRegisterProperties(Subtarget->getRegisterInfo()); // Align loop entry setPrefLoopAlignment(4); @@ -1109,15 +1105,22 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &targetmachine) setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3"); setOperationAction(ISD::FDIV, MVT::f64, Expand); + setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3"); + setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3"); + setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3"); + setOperationAction(ISD::FSQRT, MVT::f32, Expand); setOperationAction(ISD::FSQRT, MVT::f64, Expand); setOperationAction(ISD::FSIN, MVT::f32, Expand); setOperationAction(ISD::FSIN, MVT::f64, Expand); - if (Subtarget.hasV5TOps()) { + if (Subtarget->hasV5TOps()) { // Hexagon V5 Support. setOperationAction(ISD::FADD, MVT::f32, Legal); - setOperationAction(ISD::FADD, MVT::f64, Legal); + setOperationAction(ISD::FADD, MVT::f64, Expand); + setOperationAction(ISD::FSUB, MVT::f32, Legal); + setOperationAction(ISD::FSUB, MVT::f64, Expand); + setOperationAction(ISD::FMUL, MVT::f64, Expand); setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal); setCondCodeAction(ISD::SETOEQ, MVT::f32, Legal); setCondCodeAction(ISD::SETOEQ, MVT::f64, Legal); @@ -1202,11 +1205,14 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &targetmachine) setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi"); setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi"); - setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3"); - setOperationAction(ISD::FADD, MVT::f64, Expand); setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3"); setOperationAction(ISD::FADD, MVT::f32, Expand); + setOperationAction(ISD::FADD, MVT::f64, Expand); + + setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3"); + setOperationAction(ISD::FSUB, MVT::f32, Expand); + setOperationAction(ISD::FSUB, MVT::f64, Expand); setLibcallName(RTLIB::FPEXT_F32_F64, "__hexagon_extendsfdf2"); setOperationAction(ISD::FP_EXTEND, MVT::f32, Expand); @@ -1247,7 +1253,6 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &targetmachine) setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2"); setCondCodeAction(ISD::SETOLT, MVT::f32, Expand); - setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3"); setOperationAction(ISD::FMUL, MVT::f64, Expand); setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3"); @@ -1301,9 +1306,11 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &targetmachine) setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); // Turn FP extload into load/fextend. - setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + for (MVT VT : MVT::fp_valuetypes()) + setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); // Hexagon has a i1 sign extending load. - setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Expand); + for (MVT VT : MVT::integer_valuetypes()) + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand); // Turn FP truncstore into trunc + store. setTruncStoreAction(MVT::f64, MVT::f32, Expand); @@ -1333,7 +1340,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &targetmachine) setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); - if (Subtarget.hasV5TOps()) { + if (Subtarget->hasV5TOps()) { // We need to make the operation type of SELECT node to be Custom, // such that we don't go into the infinite loop of @@ -1422,19 +1429,15 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &targetmachine) setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); - + + setOperationAction(ISD::MULHS, MVT::i64, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); - if (Subtarget.isSubtargetV2()) { - setExceptionPointerRegister(Hexagon::R20); - setExceptionSelectorRegister(Hexagon::R21); - } else { - setExceptionPointerRegister(Hexagon::R0); - setExceptionSelectorRegister(Hexagon::R1); - } + setExceptionPointerRegister(Hexagon::R0); + setExceptionSelectorRegister(Hexagon::R1); // VASTART needs to be custom lowered to use the VarArgsFrameIndex. setOperationAction(ISD::VASTART, MVT::Other, Custom); @@ -1452,8 +1455,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &targetmachine) setMinFunctionAlignment(2); // Needed for DYNAMIC_STACKALLOC expansion. - const HexagonRegisterInfo *QRI = static_cast<const HexagonRegisterInfo *>( - TM.getSubtargetImpl()->getRegisterInfo()); + const HexagonRegisterInfo *QRI = Subtarget->getRegisterInfo(); setStackPointerRegisterToSaveRestore(QRI->getStackRegister()); setSchedulingPreference(Sched::VLIW); } @@ -1476,7 +1478,9 @@ HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { case HexagonISD::Lo: return "HexagonISD::Lo"; case HexagonISD::FTOI: return "HexagonISD::FTOI"; case HexagonISD::ITOF: return "HexagonISD::ITOF"; - case HexagonISD::CALL: return "HexagonISD::CALL"; + case HexagonISD::CALLv3: return "HexagonISD::CALLv3"; + case HexagonISD::CALLv3nr: return "HexagonISD::CALLv3nr"; + case HexagonISD::CALLR: return "HexagonISD::CALLR"; case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG"; case HexagonISD::BR_JT: return "HexagonISD::BR_JT"; case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN"; @@ -1591,10 +1595,10 @@ const { // Inline Assembly Support //===----------------------------------------------------------------------===// -std::pair<unsigned, const TargetRegisterClass*> -HexagonTargetLowering::getRegForInlineAsmConstraint(const - std::string &Constraint, - MVT VT) const { +std::pair<unsigned, const TargetRegisterClass *> +HexagonTargetLowering::getRegForInlineAsmConstraint( + const TargetRegisterInfo *TRI, const std::string &Constraint, + MVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': // R0-R31 @@ -1615,14 +1619,14 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(const } } - return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } /// isFPImmLegal - Returns true if the target can instruction select the /// specified FP immediate natively. If false, the legalizer will /// materialize the FP immediate as a load from a constant pool. bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { - return TM.getSubtarget<HexagonSubtarget>().hasV5TOps(); + return Subtarget->hasV5TOps(); } /// isLegalAddressingMode - Return true if the addressing mode represented by @@ -1705,3 +1709,17 @@ bool HexagonTargetLowering::IsEligibleForTailCallOptimization( // information is not available. return true; } + +// Return true when the given node fits in a positive half word. +bool llvm::isPositiveHalfWord(SDNode *N) { + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); + if (CN && CN->getSExtValue() > 0 && isInt<16>(CN->getSExtValue())) + return true; + + switch (N->getOpcode()) { + default: + return false; + case ISD::SIGN_EXTEND_INREG: + return true; + } +} diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index 63e4392..151c28f 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -21,6 +21,10 @@ #include "llvm/Target/TargetLowering.h" namespace llvm { + +// Return true when the given node fits in a positive half word. +bool isPositiveHalfWord(SDNode *N); + namespace HexagonISD { enum { FIRST_NUMBER = ISD::BUILTIN_OP_END, @@ -45,10 +49,15 @@ namespace llvm { FTOI, // FP to Int within a FP register. ITOF, // Int to FP within a FP register. - CALL, // A call instruction. + CALLv3, // A V3+ call instruction. + CALLv3nr, // A V3+ call instruction that doesn't return. + CALLR, + RET_FLAG, // Return with a flag operand. BR_JT, // Jump table. - BARRIER, // Memory barrier. + BARRIER, // Memory barrier + POPCOUNT, + COMBINE, WrapperJT, WrapperCP, WrapperCombineII, @@ -63,10 +72,13 @@ namespace llvm { WrapperShuffOB, WrapperShuffOH, TC_RETURN, - EH_RETURN + EH_RETURN, + DCFETCH }; } + class HexagonSubtarget; + class HexagonTargetLowering : public TargetLowering { int VarArgsFrameOffset; // Frame offset to start of varargs area. @@ -74,8 +86,9 @@ namespace llvm { unsigned& RetSize) const; public: - const TargetMachine &TM; - explicit HexagonTargetLowering(const TargetMachine &targetmachine); + const HexagonSubtarget *Subtarget; + explicit HexagonTargetLowering(const TargetMachine &TM, + const HexagonSubtarget &Subtarget); /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call @@ -152,8 +165,9 @@ namespace llvm { ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override; - std::pair<unsigned, const TargetRegisterClass*> - getRegForInlineAsmConstraint(const std::string &Constraint, + std::pair<unsigned, const TargetRegisterClass *> + getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + const std::string &Constraint, MVT VT) const override; // Intrinsics diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td index cc27c4c..3d04678 100644 --- a/lib/Target/Hexagon/HexagonInstrFormats.td +++ b/lib/Target/Hexagon/HexagonInstrFormats.td @@ -28,20 +28,12 @@ def TypeXTYPE : IType<8>; def TypeENDLOOP: IType<31>; // Maintain list of valid subtargets for each instruction. -class SubTarget<bits<4> value> { - bits<4> Value = value; +class SubTarget<bits<6> value> { + bits<6> Value = value; } -def HasV2SubT : SubTarget<0xf>; -def HasV2SubTOnly : SubTarget<0x1>; -def NoV2SubT : SubTarget<0x0>; -def HasV3SubT : SubTarget<0xe>; -def HasV3SubTOnly : SubTarget<0x2>; -def NoV3SubT : SubTarget<0x1>; -def HasV4SubT : SubTarget<0xc>; -def NoV4SubT : SubTarget<0x3>; -def HasV5SubT : SubTarget<0x8>; -def NoV5SubT : SubTarget<0x7>; +def HasAnySubT : SubTarget<0x3f>; // 111111 +def HasV5SubT : SubTarget<0x3e>; // 111110 // Addressing modes for load/store instructions class AddrModeType<bits<3> value> { @@ -56,8 +48,8 @@ def BaseLongOffset : AddrModeType<4>; // Indirect with long offset def BaseRegOffset : AddrModeType<5>; // Indirect with register offset def PostInc : AddrModeType<6>; // Post increment addressing mode -class MemAccessSize<bits<3> value> { - bits<3> Value = value; +class MemAccessSize<bits<4> value> { + bits<4> Value = value; } def NoMemAccess : MemAccessSize<0>;// Not a memory acces instruction. @@ -157,11 +149,11 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, bits<2> opExtentAlign = 0; let TSFlags{33-32} = opExtentAlign; // Alignment exponent before extending. - // If an instruction is valid on a subtarget (v2-v5), set the corresponding - // bit from validSubTargets. v2 is the least significant bit. + // If an instruction is valid on a subtarget, set the corresponding + // bit from validSubTargets. // By default, instruction is valid on all subtargets. - SubTarget validSubTargets = HasV2SubT; - let TSFlags{37-34} = validSubTargets.Value; + SubTarget validSubTargets = HasAnySubT; + let TSFlags{39-34} = validSubTargets.Value; // Addressing mode for load/store instructions. AddrModeType addrMode = NoAddrMode; @@ -169,7 +161,7 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, // Memory access size for mem access instructions (load/store) MemAccessSize accessSize = NoMemAccess; - let TSFlags{45-43} = accessSize.Value; + let TSFlags{46-43} = accessSize.Value; bits<1> isTaken = 0; let TSFlags {47} = isTaken; // Branch prediction. @@ -186,13 +178,12 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, string InputType = ""; // Input is "imm" or "reg" type. string isMEMri = "false"; // Set to "true" for load/store with MEMri operand. string isFloat = "false"; // Set to "true" for the floating-point load/store. - string isBrTaken = ""; // Set to "true"/"false" for jump instructions + string isBrTaken = !if(isTaken, "true", "false"); // Set to "true"/"false" for jump instructions let PredSense = !if(isPredicated, !if(isPredicatedFalse, "false", "true"), ""); let PNewValue = !if(isPredicatedNew, "new", ""); let NValueST = !if(isNVStore, "true", "false"); - let isCodeGenOnly = 1; // *** Must match MCTargetDesc/HexagonBaseInfo.h *** } @@ -203,6 +194,7 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, // LD Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. +let mayLoad = 1 in class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = LD_tc_ld_SLOT01> : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>; @@ -365,7 +357,6 @@ class ALU32_ii<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123> : ALU32Inst<outs, ins, asmstr, pattern, cstr, itin>; - // // ALU64 patterns. // diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td index d92f97b..5fec80b 100644 --- a/lib/Target/Hexagon/HexagonInstrFormatsV4.td +++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td @@ -19,6 +19,7 @@ def TypeMEMOP : IType<9>; def TypeNV : IType<10>; +def TypeCOMPOUND : IType<12>; def TypePREFIX : IType<30>; //----------------------------------------------------------------------------// @@ -65,3 +66,7 @@ let isCodeGenOnly = 1 in class EXTENDERInst<dag outs, dag ins, string asmstr, list<dag> pattern = []> : InstHexagon<outs, ins, asmstr, pattern, "", EXTENDER_tc_1_SLOT0123, TypePREFIX>; + +class CJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, COMPOUND, TypeCOMPOUND>; diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 1688c4a..9bae12c 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -78,11 +78,11 @@ unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, switch (MI->getOpcode()) { default: break; - case Hexagon::LDriw: - case Hexagon::LDrid: - case Hexagon::LDrih: - case Hexagon::LDrib: - case Hexagon::LDriub: + case Hexagon::L2_loadri_io: + case Hexagon::L2_loadrd_io: + case Hexagon::L2_loadrh_io: + case Hexagon::L2_loadrb_io: + case Hexagon::L2_loadrub_io: if (MI->getOperand(2).isFI() && MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) { FrameIndex = MI->getOperand(2).getIndex(); @@ -103,10 +103,10 @@ unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const { switch (MI->getOpcode()) { default: break; - case Hexagon::STriw: - case Hexagon::STrid: - case Hexagon::STrih: - case Hexagon::STrib: + case Hexagon::S2_storeri_io: + case Hexagon::S2_storerd_io: + case Hexagon::S2_storerh_io: + case Hexagon::S2_storerb_io: if (MI->getOperand(2).isFI() && MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) { FrameIndex = MI->getOperand(0).getIndex(); @@ -124,8 +124,8 @@ HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const{ - int BOpc = Hexagon::JMP; - int BccOpc = Hexagon::JMP_t; + int BOpc = Hexagon::J2_jump; + int BccOpc = Hexagon::J2_jumpt; assert(TBB && "InsertBranch must not be told to insert a fallthrough"); @@ -134,7 +134,7 @@ HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, // If we want to reverse the branch an odd number of times, we want // JMP_f. if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) { - BccOpc = Hexagon::JMP_f; + BccOpc = Hexagon::J2_jumpf; regPos = 1; } @@ -213,7 +213,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, } // Delete the JMP if it's equivalent to a fall-through. - if (AllowModify && I->getOpcode() == Hexagon::JMP && + if (AllowModify && I->getOpcode() == Hexagon::J2_jump && MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { DEBUG(dbgs()<< "\nErasing the jump to successor block\n";); I->eraseFromParent(); @@ -249,7 +249,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // If there is only one terminator instruction, process it. if (LastInst && !SecondLastInst) { - if (LastOpcode == Hexagon::JMP) { + if (LastOpcode == Hexagon::J2_jump) { TBB = LastInst->getOperand(0).getMBB(); return false; } @@ -274,7 +274,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, bool SecLastOpcodeHasJMP_c = PredOpcodeHasJMP_c(SecLastOpcode); bool SecLastOpcodeHasNot = PredOpcodeHasNot(SecLastOpcode); - if (SecLastOpcodeHasJMP_c && (LastOpcode == Hexagon::JMP)) { + if (SecLastOpcodeHasJMP_c && (LastOpcode == Hexagon::J2_jump)) { TBB = SecondLastInst->getOperand(1).getMBB(); if (SecLastOpcodeHasNot) Cond.push_back(MachineOperand::CreateImm(0)); @@ -285,7 +285,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // If the block ends with two Hexagon:JMPs, handle it. The second one is not // executed, so remove it. - if (SecLastOpcode == Hexagon::JMP && LastOpcode == Hexagon::JMP) { + if (SecLastOpcode == Hexagon::J2_jump && LastOpcode == Hexagon::J2_jump) { TBB = SecondLastInst->getOperand(0).getMBB(); I = LastInst; if (AllowModify) @@ -295,7 +295,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // If the block ends with an ENDLOOP, and JMP, handle it. if (SecLastOpcode == Hexagon::ENDLOOP0 && - LastOpcode == Hexagon::JMP) { + LastOpcode == Hexagon::J2_jump) { TBB = SecondLastInst->getOperand(0).getMBB(); Cond.push_back(SecondLastInst->getOperand(0)); FBB = LastInst->getOperand(0).getMBB(); @@ -308,9 +308,9 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { - int BOpc = Hexagon::JMP; - int BccOpc = Hexagon::JMP_t; - int BccOpcNot = Hexagon::JMP_f; + int BOpc = Hexagon::J2_jump; + int BccOpc = Hexagon::J2_jumpt; + int BccOpcNot = Hexagon::J2_jumpf; MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) return 0; @@ -346,33 +346,31 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI, // Set mask and the first source register. switch (Opc) { - case Hexagon::CMPEHexagon4rr: - case Hexagon::CMPEQri: - case Hexagon::CMPEQrr: - case Hexagon::CMPGT64rr: - case Hexagon::CMPGTU64rr: - case Hexagon::CMPGTUri: - case Hexagon::CMPGTUrr: - case Hexagon::CMPGTri: - case Hexagon::CMPGTrr: + case Hexagon::C2_cmpeqp: + case Hexagon::C2_cmpeqi: + case Hexagon::C2_cmpeq: + case Hexagon::C2_cmpgtp: + case Hexagon::C2_cmpgtup: + case Hexagon::C2_cmpgtui: + case Hexagon::C2_cmpgtu: + case Hexagon::C2_cmpgti: + case Hexagon::C2_cmpgt: SrcReg = MI->getOperand(1).getReg(); Mask = ~0; break; - case Hexagon::CMPbEQri_V4: - case Hexagon::CMPbEQrr_sbsb_V4: - case Hexagon::CMPbEQrr_ubub_V4: - case Hexagon::CMPbGTUri_V4: - case Hexagon::CMPbGTUrr_V4: - case Hexagon::CMPbGTrr_V4: + case Hexagon::A4_cmpbeqi: + case Hexagon::A4_cmpbeq: + case Hexagon::A4_cmpbgtui: + case Hexagon::A4_cmpbgtu: + case Hexagon::A4_cmpbgt: SrcReg = MI->getOperand(1).getReg(); Mask = 0xFF; break; - case Hexagon::CMPhEQri_V4: - case Hexagon::CMPhEQrr_shl_V4: - case Hexagon::CMPhEQrr_xor_V4: - case Hexagon::CMPhGTUri_V4: - case Hexagon::CMPhGTUrr_V4: - case Hexagon::CMPhGTrr_shl_V4: + case Hexagon::A4_cmpheqi: + case Hexagon::A4_cmpheq: + case Hexagon::A4_cmphgtui: + case Hexagon::A4_cmphgtu: + case Hexagon::A4_cmphgt: SrcReg = MI->getOperand(1).getReg(); Mask = 0xFFFF; break; @@ -380,30 +378,28 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI, // Set the value/second source register. switch (Opc) { - case Hexagon::CMPEHexagon4rr: - case Hexagon::CMPEQrr: - case Hexagon::CMPGT64rr: - case Hexagon::CMPGTU64rr: - case Hexagon::CMPGTUrr: - case Hexagon::CMPGTrr: - case Hexagon::CMPbEQrr_sbsb_V4: - case Hexagon::CMPbEQrr_ubub_V4: - case Hexagon::CMPbGTUrr_V4: - case Hexagon::CMPbGTrr_V4: - case Hexagon::CMPhEQrr_shl_V4: - case Hexagon::CMPhEQrr_xor_V4: - case Hexagon::CMPhGTUrr_V4: - case Hexagon::CMPhGTrr_shl_V4: + case Hexagon::C2_cmpeqp: + case Hexagon::C2_cmpeq: + case Hexagon::C2_cmpgtp: + case Hexagon::C2_cmpgtup: + case Hexagon::C2_cmpgtu: + case Hexagon::C2_cmpgt: + case Hexagon::A4_cmpbeq: + case Hexagon::A4_cmpbgtu: + case Hexagon::A4_cmpbgt: + case Hexagon::A4_cmpheq: + case Hexagon::A4_cmphgtu: + case Hexagon::A4_cmphgt: SrcReg2 = MI->getOperand(2).getReg(); return true; - case Hexagon::CMPEQri: - case Hexagon::CMPGTUri: - case Hexagon::CMPGTri: - case Hexagon::CMPbEQri_V4: - case Hexagon::CMPbGTUri_V4: - case Hexagon::CMPhEQri_V4: - case Hexagon::CMPhGTUri_V4: + case Hexagon::C2_cmpeqi: + case Hexagon::C2_cmpgtui: + case Hexagon::C2_cmpgti: + case Hexagon::A4_cmpbeqi: + case Hexagon::A4_cmpbgtui: + case Hexagon::A4_cmpheqi: + case Hexagon::A4_cmphgtui: SrcReg2 = 0; Value = MI->getOperand(2).getImm(); return true; @@ -418,16 +414,16 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { if (Hexagon::IntRegsRegClass.contains(SrcReg, DestReg)) { - BuildMI(MBB, I, DL, get(Hexagon::TFR), DestReg).addReg(SrcReg); + BuildMI(MBB, I, DL, get(Hexagon::A2_tfr), DestReg).addReg(SrcReg); return; } if (Hexagon::DoubleRegsRegClass.contains(SrcReg, DestReg)) { - BuildMI(MBB, I, DL, get(Hexagon::TFR64), DestReg).addReg(SrcReg); + BuildMI(MBB, I, DL, get(Hexagon::A2_tfrp), DestReg).addReg(SrcReg); return; } if (Hexagon::PredRegsRegClass.contains(SrcReg, DestReg)) { // Map Pd = Ps to Pd = or(Ps, Ps). - BuildMI(MBB, I, DL, get(Hexagon::OR_pp), + BuildMI(MBB, I, DL, get(Hexagon::C2_or), DestReg).addReg(SrcReg).addReg(SrcReg); return; } @@ -436,31 +432,31 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB, // We can have an overlap between single and double reg: r1:0 = r0. if(SrcReg == RI.getSubReg(DestReg, Hexagon::subreg_loreg)) { // r1:0 = r0 - BuildMI(MBB, I, DL, get(Hexagon::TFRI), (RI.getSubReg(DestReg, + BuildMI(MBB, I, DL, get(Hexagon::A2_tfrsi), (RI.getSubReg(DestReg, Hexagon::subreg_hireg))).addImm(0); } else { // r1:0 = r1 or no overlap. - BuildMI(MBB, I, DL, get(Hexagon::TFR), (RI.getSubReg(DestReg, + BuildMI(MBB, I, DL, get(Hexagon::A2_tfr), (RI.getSubReg(DestReg, Hexagon::subreg_loreg))).addReg(SrcReg); - BuildMI(MBB, I, DL, get(Hexagon::TFRI), (RI.getSubReg(DestReg, + BuildMI(MBB, I, DL, get(Hexagon::A2_tfrsi), (RI.getSubReg(DestReg, Hexagon::subreg_hireg))).addImm(0); } return; } - if (Hexagon::CRRegsRegClass.contains(DestReg) && + if (Hexagon::CtrRegsRegClass.contains(DestReg) && Hexagon::IntRegsRegClass.contains(SrcReg)) { - BuildMI(MBB, I, DL, get(Hexagon::TFCR), DestReg).addReg(SrcReg); + BuildMI(MBB, I, DL, get(Hexagon::A2_tfrrcr), DestReg).addReg(SrcReg); return; } if (Hexagon::PredRegsRegClass.contains(SrcReg) && Hexagon::IntRegsRegClass.contains(DestReg)) { - BuildMI(MBB, I, DL, get(Hexagon::TFR_RsPd), DestReg). + BuildMI(MBB, I, DL, get(Hexagon::C2_tfrpr), DestReg). addReg(SrcReg, getKillRegState(KillSrc)); return; } if (Hexagon::IntRegsRegClass.contains(SrcReg) && Hexagon::PredRegsRegClass.contains(DestReg)) { - BuildMI(MBB, I, DL, get(Hexagon::TFR_PdRs), DestReg). + BuildMI(MBB, I, DL, get(Hexagon::C2_tfrrp), DestReg). addReg(SrcReg, getKillRegState(KillSrc)); return; } @@ -488,11 +484,11 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Align); if (Hexagon::IntRegsRegClass.hasSubClassEq(RC)) { - BuildMI(MBB, I, DL, get(Hexagon::STriw)) + BuildMI(MBB, I, DL, get(Hexagon::S2_storeri_io)) .addFrameIndex(FI).addImm(0) .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); } else if (Hexagon::DoubleRegsRegClass.hasSubClassEq(RC)) { - BuildMI(MBB, I, DL, get(Hexagon::STrid)) + BuildMI(MBB, I, DL, get(Hexagon::S2_storerd_io)) .addFrameIndex(FI).addImm(0) .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); } else if (Hexagon::PredRegsRegClass.hasSubClassEq(RC)) { @@ -533,10 +529,10 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MFI.getObjectSize(FI), Align); if (RC == &Hexagon::IntRegsRegClass) { - BuildMI(MBB, I, DL, get(Hexagon::LDriw), DestReg) + BuildMI(MBB, I, DL, get(Hexagon::L2_loadri_io), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO); } else if (RC == &Hexagon::DoubleRegsRegClass) { - BuildMI(MBB, I, DL, get(Hexagon::LDrid), DestReg) + BuildMI(MBB, I, DL, get(Hexagon::L2_loadrd_io), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO); } else if (RC == &Hexagon::PredRegsRegClass) { BuildMI(MBB, I, DL, get(Hexagon::LDriw_pred), DestReg) @@ -582,10 +578,6 @@ unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const { } bool HexagonInstrInfo::isExtendable(const MachineInstr *MI) const { - // Constant extenders are allowed only for V4 and above. - if (!Subtarget.hasV4TOps()) - return false; - const MCInstrDesc &MID = MI->getDesc(); const uint64_t F = MID.TSFlags; if ((F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask) @@ -648,78 +640,68 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const { const int Opc = MI->getOpcode(); switch(Opc) { - case Hexagon::TFRI: + case Hexagon::A2_tfrsi: return isInt<12>(MI->getOperand(1).getImm()); - case Hexagon::STrid: - case Hexagon::STrid_indexed: + case Hexagon::S2_storerd_io: return isShiftedUInt<6,3>(MI->getOperand(1).getImm()); - case Hexagon::STriw: - case Hexagon::STriw_indexed: - case Hexagon::STriw_nv_V4: + case Hexagon::S2_storeri_io: + case Hexagon::S2_storerinew_io: return isShiftedUInt<6,2>(MI->getOperand(1).getImm()); - case Hexagon::STrih: - case Hexagon::STrih_indexed: - case Hexagon::STrih_nv_V4: + case Hexagon::S2_storerh_io: + case Hexagon::S2_storerhnew_io: return isShiftedUInt<6,1>(MI->getOperand(1).getImm()); - case Hexagon::STrib: - case Hexagon::STrib_indexed: - case Hexagon::STrib_nv_V4: + case Hexagon::S2_storerb_io: + case Hexagon::S2_storerbnew_io: return isUInt<6>(MI->getOperand(1).getImm()); - case Hexagon::LDrid: - case Hexagon::LDrid_indexed: + case Hexagon::L2_loadrd_io: return isShiftedUInt<6,3>(MI->getOperand(2).getImm()); - case Hexagon::LDriw: - case Hexagon::LDriw_indexed: + case Hexagon::L2_loadri_io: return isShiftedUInt<6,2>(MI->getOperand(2).getImm()); - case Hexagon::LDrih: - case Hexagon::LDriuh: - case Hexagon::LDrih_indexed: - case Hexagon::LDriuh_indexed: + case Hexagon::L2_loadrh_io: + case Hexagon::L2_loadruh_io: return isShiftedUInt<6,1>(MI->getOperand(2).getImm()); - case Hexagon::LDrib: - case Hexagon::LDriub: - case Hexagon::LDrib_indexed: - case Hexagon::LDriub_indexed: + case Hexagon::L2_loadrb_io: + case Hexagon::L2_loadrub_io: return isUInt<6>(MI->getOperand(2).getImm()); - case Hexagon::POST_LDrid: + case Hexagon::L2_loadrd_pi: return isShiftedInt<4,3>(MI->getOperand(3).getImm()); - case Hexagon::POST_LDriw: + case Hexagon::L2_loadri_pi: return isShiftedInt<4,2>(MI->getOperand(3).getImm()); - case Hexagon::POST_LDrih: - case Hexagon::POST_LDriuh: + case Hexagon::L2_loadrh_pi: + case Hexagon::L2_loadruh_pi: return isShiftedInt<4,1>(MI->getOperand(3).getImm()); - case Hexagon::POST_LDrib: - case Hexagon::POST_LDriub: + case Hexagon::L2_loadrb_pi: + case Hexagon::L2_loadrub_pi: return isInt<4>(MI->getOperand(3).getImm()); - case Hexagon::STrib_imm_V4: - case Hexagon::STrih_imm_V4: - case Hexagon::STriw_imm_V4: + case Hexagon::S4_storeirb_io: + case Hexagon::S4_storeirh_io: + case Hexagon::S4_storeiri_io: return (isUInt<6>(MI->getOperand(1).getImm()) && isInt<6>(MI->getOperand(2).getImm())); - case Hexagon::ADD_ri: + case Hexagon::A2_addi: return isInt<8>(MI->getOperand(2).getImm()); - case Hexagon::ASLH: - case Hexagon::ASRH: - case Hexagon::SXTB: - case Hexagon::SXTH: - case Hexagon::ZXTB: - case Hexagon::ZXTH: - return Subtarget.hasV4TOps(); + case Hexagon::A2_aslh: + case Hexagon::A2_asrh: + case Hexagon::A2_sxtb: + case Hexagon::A2_sxth: + case Hexagon::A2_zxtb: + case Hexagon::A2_zxth: + return true; } return true; @@ -739,16 +721,16 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { switch(Opc) { default: llvm_unreachable("Unexpected predicated instruction"); - case Hexagon::COMBINE_rr_cPt: - return Hexagon::COMBINE_rr_cNotPt; - case Hexagon::COMBINE_rr_cNotPt: - return Hexagon::COMBINE_rr_cPt; + case Hexagon::C2_ccombinewt: + return Hexagon::C2_ccombinewf; + case Hexagon::C2_ccombinewf: + return Hexagon::C2_ccombinewt; // Dealloc_return. - case Hexagon::DEALLOC_RET_cPt_V4: - return Hexagon::DEALLOC_RET_cNotPt_V4; - case Hexagon::DEALLOC_RET_cNotPt_V4: - return Hexagon::DEALLOC_RET_cPt_V4; + case Hexagon::L4_return_t: + return Hexagon::L4_return_f; + case Hexagon::L4_return_f: + return Hexagon::L4_return_t; } } @@ -780,22 +762,14 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const { case Hexagon::TFRI_f: return !invertPredicate ? Hexagon::TFRI_cPt_f : Hexagon::TFRI_cNotPt_f; - case Hexagon::COMBINE_rr: - return !invertPredicate ? Hexagon::COMBINE_rr_cPt : - Hexagon::COMBINE_rr_cNotPt; - - // Word. - case Hexagon::STriw_f: - return !invertPredicate ? Hexagon::STriw_cPt : - Hexagon::STriw_cNotPt; - case Hexagon::STriw_indexed_f: - return !invertPredicate ? Hexagon::STriw_indexed_cPt : - Hexagon::STriw_indexed_cNotPt; + case Hexagon::A2_combinew: + return !invertPredicate ? Hexagon::C2_ccombinewt : + Hexagon::C2_ccombinewf; // DEALLOC_RETURN. - case Hexagon::DEALLOC_RET_V4: - return !invertPredicate ? Hexagon::DEALLOC_RET_cPt_V4 : - Hexagon::DEALLOC_RET_cNotPt_V4; + case Hexagon::L4_return: + return !invertPredicate ? Hexagon::L4_return_t: + Hexagon::L4_return_f; } llvm_unreachable("Unexpected predicable instruction"); } @@ -901,7 +875,7 @@ PredicateInstruction(MachineInstr *MI, continue; } else { - assert(false && "Unexpected operand type"); + llvm_unreachable("Unexpected operand type"); } } } @@ -1024,12 +998,10 @@ bool HexagonInstrInfo::isPredicatedNew(unsigned Opcode) const { // Returns true, if a ST insn can be promoted to a new-value store. bool HexagonInstrInfo::mayBeNewStore(const MachineInstr *MI) const { - const HexagonRegisterInfo& QRI = getRegisterInfo(); const uint64_t F = MI->getDesc().TSFlags; return ((F >> HexagonII::mayNVStorePos) & - HexagonII::mayNVStoreMask & - QRI.Subtarget.hasV4TOps()); + HexagonII::mayNVStoreMask); } bool @@ -1082,13 +1054,13 @@ isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumInstrs, bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const { switch (MI->getOpcode()) { default: return false; - case Hexagon::DEALLOC_RET_V4 : - case Hexagon::DEALLOC_RET_cPt_V4 : - case Hexagon::DEALLOC_RET_cNotPt_V4 : - case Hexagon::DEALLOC_RET_cdnPnt_V4 : - case Hexagon::DEALLOC_RET_cNotdnPnt_V4 : - case Hexagon::DEALLOC_RET_cdnPt_V4 : - case Hexagon::DEALLOC_RET_cNotdnPt_V4 : + case Hexagon::L4_return: + case Hexagon::L4_return_t: + case Hexagon::L4_return_f: + case Hexagon::L4_return_tnew_pnt: + case Hexagon::L4_return_fnew_pnt: + case Hexagon::L4_return_tnew_pt: + case Hexagon::L4_return_fnew_pt: return true; } } @@ -1107,63 +1079,55 @@ isValidOffset(const int Opcode, const int Offset) const { switch(Opcode) { - case Hexagon::LDriw: - case Hexagon::LDriw_indexed: - case Hexagon::LDriw_f: - case Hexagon::STriw_indexed: - case Hexagon::STriw: - case Hexagon::STriw_f: + case Hexagon::L2_loadri_io: + case Hexagon::S2_storeri_io: return (Offset >= Hexagon_MEMW_OFFSET_MIN) && (Offset <= Hexagon_MEMW_OFFSET_MAX); - case Hexagon::LDrid: - case Hexagon::LDrid_indexed: - case Hexagon::LDrid_f: - case Hexagon::STrid: - case Hexagon::STrid_indexed: - case Hexagon::STrid_f: + case Hexagon::L2_loadrd_io: + case Hexagon::S2_storerd_io: return (Offset >= Hexagon_MEMD_OFFSET_MIN) && (Offset <= Hexagon_MEMD_OFFSET_MAX); - case Hexagon::LDrih: - case Hexagon::LDriuh: - case Hexagon::STrih: + case Hexagon::L2_loadrh_io: + case Hexagon::L2_loadruh_io: + case Hexagon::S2_storerh_io: return (Offset >= Hexagon_MEMH_OFFSET_MIN) && (Offset <= Hexagon_MEMH_OFFSET_MAX); - case Hexagon::LDrib: - case Hexagon::STrib: - case Hexagon::LDriub: + case Hexagon::L2_loadrb_io: + case Hexagon::S2_storerb_io: + case Hexagon::L2_loadrub_io: return (Offset >= Hexagon_MEMB_OFFSET_MIN) && (Offset <= Hexagon_MEMB_OFFSET_MAX); - case Hexagon::ADD_ri: + case Hexagon::A2_addi: case Hexagon::TFR_FI: return (Offset >= Hexagon_ADDI_OFFSET_MIN) && (Offset <= Hexagon_ADDI_OFFSET_MAX); - case Hexagon::MemOPw_ADDi_V4 : - case Hexagon::MemOPw_SUBi_V4 : - case Hexagon::MemOPw_ADDr_V4 : - case Hexagon::MemOPw_SUBr_V4 : - case Hexagon::MemOPw_ANDr_V4 : - case Hexagon::MemOPw_ORr_V4 : + case Hexagon::L4_iadd_memopw_io: + case Hexagon::L4_isub_memopw_io: + case Hexagon::L4_add_memopw_io: + case Hexagon::L4_sub_memopw_io: + case Hexagon::L4_and_memopw_io: + case Hexagon::L4_or_memopw_io: return (0 <= Offset && Offset <= 255); - case Hexagon::MemOPh_ADDi_V4 : - case Hexagon::MemOPh_SUBi_V4 : - case Hexagon::MemOPh_ADDr_V4 : - case Hexagon::MemOPh_SUBr_V4 : - case Hexagon::MemOPh_ANDr_V4 : - case Hexagon::MemOPh_ORr_V4 : + case Hexagon::L4_iadd_memoph_io: + case Hexagon::L4_isub_memoph_io: + case Hexagon::L4_add_memoph_io: + case Hexagon::L4_sub_memoph_io: + case Hexagon::L4_and_memoph_io: + case Hexagon::L4_or_memoph_io: return (0 <= Offset && Offset <= 127); - case Hexagon::MemOPb_ADDi_V4 : - case Hexagon::MemOPb_SUBi_V4 : - case Hexagon::MemOPb_ADDr_V4 : - case Hexagon::MemOPb_SUBr_V4 : - case Hexagon::MemOPb_ANDr_V4 : - case Hexagon::MemOPb_ORr_V4 : + case Hexagon::L4_iadd_memopb_io: + case Hexagon::L4_isub_memopb_io: + case Hexagon::L4_add_memopb_io: + case Hexagon::L4_sub_memopb_io: + case Hexagon::L4_and_memopb_io: + case Hexagon::L4_or_memopb_io: return (0 <= Offset && Offset <= 63); // LDri_pred and STriw_pred are pseudo operations, so it has to take offset of @@ -1172,7 +1136,7 @@ isValidOffset(const int Opcode, const int Offset) const { case Hexagon::LDriw_pred: return true; - case Hexagon::LOOP0_i: + case Hexagon::J2_loop0i: return isUInt<10>(Offset); // INLINEASM is very special. @@ -1220,31 +1184,31 @@ isMemOp(const MachineInstr *MI) const { switch (MI->getOpcode()) { - default: return false; - case Hexagon::MemOPw_ADDi_V4 : - case Hexagon::MemOPw_SUBi_V4 : - case Hexagon::MemOPw_ADDr_V4 : - case Hexagon::MemOPw_SUBr_V4 : - case Hexagon::MemOPw_ANDr_V4 : - case Hexagon::MemOPw_ORr_V4 : - case Hexagon::MemOPh_ADDi_V4 : - case Hexagon::MemOPh_SUBi_V4 : - case Hexagon::MemOPh_ADDr_V4 : - case Hexagon::MemOPh_SUBr_V4 : - case Hexagon::MemOPh_ANDr_V4 : - case Hexagon::MemOPh_ORr_V4 : - case Hexagon::MemOPb_ADDi_V4 : - case Hexagon::MemOPb_SUBi_V4 : - case Hexagon::MemOPb_ADDr_V4 : - case Hexagon::MemOPb_SUBr_V4 : - case Hexagon::MemOPb_ANDr_V4 : - case Hexagon::MemOPb_ORr_V4 : - case Hexagon::MemOPb_SETBITi_V4: - case Hexagon::MemOPh_SETBITi_V4: - case Hexagon::MemOPw_SETBITi_V4: - case Hexagon::MemOPb_CLRBITi_V4: - case Hexagon::MemOPh_CLRBITi_V4: - case Hexagon::MemOPw_CLRBITi_V4: + default: return false; + case Hexagon::L4_iadd_memopw_io: + case Hexagon::L4_isub_memopw_io: + case Hexagon::L4_add_memopw_io: + case Hexagon::L4_sub_memopw_io: + case Hexagon::L4_and_memopw_io: + case Hexagon::L4_or_memopw_io: + case Hexagon::L4_iadd_memoph_io: + case Hexagon::L4_isub_memoph_io: + case Hexagon::L4_add_memoph_io: + case Hexagon::L4_sub_memoph_io: + case Hexagon::L4_and_memoph_io: + case Hexagon::L4_or_memoph_io: + case Hexagon::L4_iadd_memopb_io: + case Hexagon::L4_isub_memopb_io: + case Hexagon::L4_add_memopb_io: + case Hexagon::L4_sub_memopb_io: + case Hexagon::L4_and_memopb_io: + case Hexagon::L4_or_memopb_io: + case Hexagon::L4_ior_memopb_io: + case Hexagon::L4_ior_memoph_io: + case Hexagon::L4_ior_memopw_io: + case Hexagon::L4_iand_memopb_io: + case Hexagon::L4_iand_memoph_io: + case Hexagon::L4_iand_memopw_io: return true; } return false; @@ -1264,12 +1228,12 @@ isSpillPredRegOp(const MachineInstr *MI) const { bool HexagonInstrInfo::isNewValueJumpCandidate(const MachineInstr *MI) const { switch (MI->getOpcode()) { default: return false; - case Hexagon::CMPEQrr: - case Hexagon::CMPEQri: - case Hexagon::CMPGTrr: - case Hexagon::CMPGTri: - case Hexagon::CMPGTUrr: - case Hexagon::CMPGTUri: + case Hexagon::C2_cmpeq: + case Hexagon::C2_cmpeqi: + case Hexagon::C2_cmpgt: + case Hexagon::C2_cmpgti: + case Hexagon::C2_cmpgtu: + case Hexagon::C2_cmpgtui: return true; } } @@ -1278,20 +1242,19 @@ bool HexagonInstrInfo:: isConditionalTransfer (const MachineInstr *MI) const { switch (MI->getOpcode()) { default: return false; - case Hexagon::TFR_cPt: - case Hexagon::TFR_cNotPt: - case Hexagon::TFRI_cPt: - case Hexagon::TFRI_cNotPt: - case Hexagon::TFR_cdnPt: - case Hexagon::TFR_cdnNotPt: - case Hexagon::TFRI_cdnPt: - case Hexagon::TFRI_cdnNotPt: + case Hexagon::A2_tfrt: + case Hexagon::A2_tfrf: + case Hexagon::C2_cmoveit: + case Hexagon::C2_cmoveif: + case Hexagon::A2_tfrtnew: + case Hexagon::A2_tfrfnew: + case Hexagon::C2_cmovenewit: + case Hexagon::C2_cmovenewif: return true; } } bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const { - const HexagonRegisterInfo& QRI = getRegisterInfo(); switch (MI->getOpcode()) { default: return false; @@ -1303,94 +1266,92 @@ bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const { case Hexagon::A2_pandfnew: case Hexagon::A2_pandt: case Hexagon::A2_pandtnew: + case Hexagon::A4_paslhf: + case Hexagon::A4_paslhfnew: + case Hexagon::A4_paslht: + case Hexagon::A4_paslhtnew: + case Hexagon::A4_pasrhf: + case Hexagon::A4_pasrhfnew: + case Hexagon::A4_pasrht: + case Hexagon::A4_pasrhtnew: case Hexagon::A2_porf: case Hexagon::A2_porfnew: case Hexagon::A2_port: case Hexagon::A2_portnew: + case Hexagon::A2_psubf: + case Hexagon::A2_psubfnew: + case Hexagon::A2_psubt: + case Hexagon::A2_psubtnew: case Hexagon::A2_pxorf: case Hexagon::A2_pxorfnew: case Hexagon::A2_pxort: case Hexagon::A2_pxortnew: - case Hexagon::ADD_ri_cPt: - case Hexagon::ADD_ri_cNotPt: - case Hexagon::SUB_rr_cPt: - case Hexagon::SUB_rr_cNotPt: - case Hexagon::COMBINE_rr_cPt: - case Hexagon::COMBINE_rr_cNotPt: + case Hexagon::A4_psxthf: + case Hexagon::A4_psxthfnew: + case Hexagon::A4_psxtht: + case Hexagon::A4_psxthtnew: + case Hexagon::A4_psxtbf: + case Hexagon::A4_psxtbfnew: + case Hexagon::A4_psxtbt: + case Hexagon::A4_psxtbtnew: + case Hexagon::A4_pzxtbf: + case Hexagon::A4_pzxtbfnew: + case Hexagon::A4_pzxtbt: + case Hexagon::A4_pzxtbtnew: + case Hexagon::A4_pzxthf: + case Hexagon::A4_pzxthfnew: + case Hexagon::A4_pzxtht: + case Hexagon::A4_pzxthtnew: + case Hexagon::A2_paddit: + case Hexagon::A2_paddif: + case Hexagon::C2_ccombinewt: + case Hexagon::C2_ccombinewf: return true; - case Hexagon::ASLH_cPt_V4: - case Hexagon::ASLH_cNotPt_V4: - case Hexagon::ASRH_cPt_V4: - case Hexagon::ASRH_cNotPt_V4: - case Hexagon::SXTB_cPt_V4: - case Hexagon::SXTB_cNotPt_V4: - case Hexagon::SXTH_cPt_V4: - case Hexagon::SXTH_cNotPt_V4: - case Hexagon::ZXTB_cPt_V4: - case Hexagon::ZXTB_cNotPt_V4: - case Hexagon::ZXTH_cPt_V4: - case Hexagon::ZXTH_cNotPt_V4: - return QRI.Subtarget.hasV4TOps(); } } bool HexagonInstrInfo:: isConditionalLoad (const MachineInstr* MI) const { - const HexagonRegisterInfo& QRI = getRegisterInfo(); switch (MI->getOpcode()) { default: return false; - case Hexagon::LDrid_cPt : - case Hexagon::LDrid_cNotPt : - case Hexagon::LDrid_indexed_cPt : - case Hexagon::LDrid_indexed_cNotPt : - case Hexagon::LDriw_cPt : - case Hexagon::LDriw_cNotPt : - case Hexagon::LDriw_indexed_cPt : - case Hexagon::LDriw_indexed_cNotPt : - case Hexagon::LDrih_cPt : - case Hexagon::LDrih_cNotPt : - case Hexagon::LDrih_indexed_cPt : - case Hexagon::LDrih_indexed_cNotPt : - case Hexagon::LDrib_cPt : - case Hexagon::LDrib_cNotPt : - case Hexagon::LDrib_indexed_cPt : - case Hexagon::LDrib_indexed_cNotPt : - case Hexagon::LDriuh_cPt : - case Hexagon::LDriuh_cNotPt : - case Hexagon::LDriuh_indexed_cPt : - case Hexagon::LDriuh_indexed_cNotPt : - case Hexagon::LDriub_cPt : - case Hexagon::LDriub_cNotPt : - case Hexagon::LDriub_indexed_cPt : - case Hexagon::LDriub_indexed_cNotPt : + case Hexagon::L2_ploadrdt_io : + case Hexagon::L2_ploadrdf_io: + case Hexagon::L2_ploadrit_io: + case Hexagon::L2_ploadrif_io: + case Hexagon::L2_ploadrht_io: + case Hexagon::L2_ploadrhf_io: + case Hexagon::L2_ploadrbt_io: + case Hexagon::L2_ploadrbf_io: + case Hexagon::L2_ploadruht_io: + case Hexagon::L2_ploadruhf_io: + case Hexagon::L2_ploadrubt_io: + case Hexagon::L2_ploadrubf_io: + case Hexagon::L2_ploadrdt_pi: + case Hexagon::L2_ploadrdf_pi: + case Hexagon::L2_ploadrit_pi: + case Hexagon::L2_ploadrif_pi: + case Hexagon::L2_ploadrht_pi: + case Hexagon::L2_ploadrhf_pi: + case Hexagon::L2_ploadrbt_pi: + case Hexagon::L2_ploadrbf_pi: + case Hexagon::L2_ploadruht_pi: + case Hexagon::L2_ploadruhf_pi: + case Hexagon::L2_ploadrubt_pi: + case Hexagon::L2_ploadrubf_pi: + case Hexagon::L4_ploadrdt_rr: + case Hexagon::L4_ploadrdf_rr: + case Hexagon::L4_ploadrbt_rr: + case Hexagon::L4_ploadrbf_rr: + case Hexagon::L4_ploadrubt_rr: + case Hexagon::L4_ploadrubf_rr: + case Hexagon::L4_ploadrht_rr: + case Hexagon::L4_ploadrhf_rr: + case Hexagon::L4_ploadruht_rr: + case Hexagon::L4_ploadruhf_rr: + case Hexagon::L4_ploadrit_rr: + case Hexagon::L4_ploadrif_rr: return true; - case Hexagon::POST_LDrid_cPt : - case Hexagon::POST_LDrid_cNotPt : - case Hexagon::POST_LDriw_cPt : - case Hexagon::POST_LDriw_cNotPt : - case Hexagon::POST_LDrih_cPt : - case Hexagon::POST_LDrih_cNotPt : - case Hexagon::POST_LDrib_cPt : - case Hexagon::POST_LDrib_cNotPt : - case Hexagon::POST_LDriuh_cPt : - case Hexagon::POST_LDriuh_cNotPt : - case Hexagon::POST_LDriub_cPt : - case Hexagon::POST_LDriub_cNotPt : - return QRI.Subtarget.hasV4TOps(); - case Hexagon::LDrid_indexed_shl_cPt_V4 : - case Hexagon::LDrid_indexed_shl_cNotPt_V4 : - case Hexagon::LDrib_indexed_shl_cPt_V4 : - case Hexagon::LDrib_indexed_shl_cNotPt_V4 : - case Hexagon::LDriub_indexed_shl_cPt_V4 : - case Hexagon::LDriub_indexed_shl_cNotPt_V4 : - case Hexagon::LDrih_indexed_shl_cPt_V4 : - case Hexagon::LDrih_indexed_shl_cNotPt_V4 : - case Hexagon::LDriuh_indexed_shl_cPt_V4 : - case Hexagon::LDriuh_indexed_shl_cNotPt_V4 : - case Hexagon::LDriw_indexed_shl_cPt_V4 : - case Hexagon::LDriw_indexed_shl_cNotPt_V4 : - return QRI.Subtarget.hasV4TOps(); } } @@ -1430,55 +1391,50 @@ isConditionalLoad (const MachineInstr* MI) const { // is not valid for new-value stores. bool HexagonInstrInfo:: isConditionalStore (const MachineInstr* MI) const { - const HexagonRegisterInfo& QRI = getRegisterInfo(); switch (MI->getOpcode()) { default: return false; - case Hexagon::STrib_imm_cPt_V4 : - case Hexagon::STrib_imm_cNotPt_V4 : - case Hexagon::STrib_indexed_shl_cPt_V4 : - case Hexagon::STrib_indexed_shl_cNotPt_V4 : - case Hexagon::STrib_cPt : - case Hexagon::STrib_cNotPt : - case Hexagon::POST_STbri_cPt : - case Hexagon::POST_STbri_cNotPt : - case Hexagon::STrid_indexed_cPt : - case Hexagon::STrid_indexed_cNotPt : - case Hexagon::STrid_indexed_shl_cPt_V4 : - case Hexagon::POST_STdri_cPt : - case Hexagon::POST_STdri_cNotPt : - case Hexagon::STrih_cPt : - case Hexagon::STrih_cNotPt : - case Hexagon::STrih_indexed_cPt : - case Hexagon::STrih_indexed_cNotPt : - case Hexagon::STrih_imm_cPt_V4 : - case Hexagon::STrih_imm_cNotPt_V4 : - case Hexagon::STrih_indexed_shl_cPt_V4 : - case Hexagon::STrih_indexed_shl_cNotPt_V4 : - case Hexagon::POST_SThri_cPt : - case Hexagon::POST_SThri_cNotPt : - case Hexagon::STriw_cPt : - case Hexagon::STriw_cNotPt : - case Hexagon::STriw_indexed_cPt : - case Hexagon::STriw_indexed_cNotPt : - case Hexagon::STriw_imm_cPt_V4 : - case Hexagon::STriw_imm_cNotPt_V4 : - case Hexagon::STriw_indexed_shl_cPt_V4 : - case Hexagon::STriw_indexed_shl_cNotPt_V4 : - case Hexagon::POST_STwri_cPt : - case Hexagon::POST_STwri_cNotPt : - return QRI.Subtarget.hasV4TOps(); + case Hexagon::S4_storeirbt_io: + case Hexagon::S4_storeirbf_io: + case Hexagon::S4_pstorerbt_rr: + case Hexagon::S4_pstorerbf_rr: + case Hexagon::S2_pstorerbt_io: + case Hexagon::S2_pstorerbf_io: + case Hexagon::S2_pstorerbt_pi: + case Hexagon::S2_pstorerbf_pi: + case Hexagon::S2_pstorerdt_io: + case Hexagon::S2_pstorerdf_io: + case Hexagon::S4_pstorerdt_rr: + case Hexagon::S4_pstorerdf_rr: + case Hexagon::S2_pstorerdt_pi: + case Hexagon::S2_pstorerdf_pi: + case Hexagon::S2_pstorerht_io: + case Hexagon::S2_pstorerhf_io: + case Hexagon::S4_storeirht_io: + case Hexagon::S4_storeirhf_io: + case Hexagon::S4_pstorerht_rr: + case Hexagon::S4_pstorerhf_rr: + case Hexagon::S2_pstorerht_pi: + case Hexagon::S2_pstorerhf_pi: + case Hexagon::S2_pstorerit_io: + case Hexagon::S2_pstorerif_io: + case Hexagon::S4_storeirit_io: + case Hexagon::S4_storeirif_io: + case Hexagon::S4_pstorerit_rr: + case Hexagon::S4_pstorerif_rr: + case Hexagon::S2_pstorerit_pi: + case Hexagon::S2_pstorerif_pi: // V4 global address store before promoting to dot new. - case Hexagon::STd_GP_cPt_V4 : - case Hexagon::STd_GP_cNotPt_V4 : - case Hexagon::STb_GP_cPt_V4 : - case Hexagon::STb_GP_cNotPt_V4 : - case Hexagon::STh_GP_cPt_V4 : - case Hexagon::STh_GP_cNotPt_V4 : - case Hexagon::STw_GP_cPt_V4 : - case Hexagon::STw_GP_cNotPt_V4 : - return QRI.Subtarget.hasV4TOps(); + case Hexagon::S4_pstorerdt_abs: + case Hexagon::S4_pstorerdf_abs: + case Hexagon::S4_pstorerbt_abs: + case Hexagon::S4_pstorerbf_abs: + case Hexagon::S4_pstorerht_abs: + case Hexagon::S4_pstorerhf_abs: + case Hexagon::S4_pstorerit_abs: + case Hexagon::S4_pstorerif_abs: + return true; // Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded // from the "Conditional Store" list. Because a predicated new value store @@ -1566,20 +1522,14 @@ int HexagonInstrInfo::GetDotNewOp(const MachineInstr* MI) const { switch (MI->getOpcode()) { default: llvm_unreachable("Unknown .new type"); // store new value byte - case Hexagon::STrib_shl_V4: - return Hexagon::STrib_shl_nv_V4; - - case Hexagon::STrih_shl_V4: - return Hexagon::STrih_shl_nv_V4; + case Hexagon::S4_storerb_ur: + return Hexagon::S4_storerbnew_ur; - case Hexagon::STriw_f: - return Hexagon::STriw_nv_V4; + case Hexagon::S4_storerh_ur: + return Hexagon::S4_storerhnew_ur; - case Hexagon::STriw_indexed_f: - return Hexagon::STriw_indexed_nv_V4; - - case Hexagon::STriw_shl_V4: - return Hexagon::STriw_shl_nv_V4; + case Hexagon::S4_storeri_ur: + return Hexagon::S4_storerinew_ur; } return 0; @@ -1597,28 +1547,28 @@ int HexagonInstrInfo::GetDotNewPredOp(MachineInstr *MI, switch (MI->getOpcode()) { default: llvm_unreachable("Unknown .new type"); // Condtional Jumps - case Hexagon::JMP_t: - case Hexagon::JMP_f: + case Hexagon::J2_jumpt: + case Hexagon::J2_jumpf: return getDotNewPredJumpOp(MI, MBPI); - case Hexagon::JMPR_t: - return Hexagon::JMPR_tnew_tV3; + case Hexagon::J2_jumprt: + return Hexagon::J2_jumptnewpt; - case Hexagon::JMPR_f: - return Hexagon::JMPR_fnew_tV3; + case Hexagon::J2_jumprf: + return Hexagon::J2_jumprfnewpt; - case Hexagon::JMPret_t: - return Hexagon::JMPret_tnew_tV3; + case Hexagon::JMPrett: + return Hexagon::J2_jumprtnewpt; - case Hexagon::JMPret_f: - return Hexagon::JMPret_fnew_tV3; + case Hexagon::JMPretf: + return Hexagon::J2_jumprfnewpt; // Conditional combine - case Hexagon::COMBINE_rr_cPt : - return Hexagon::COMBINE_rr_cdnPt; - case Hexagon::COMBINE_rr_cNotPt : - return Hexagon::COMBINE_rr_cdnNotPt; + case Hexagon::C2_ccombinewt: + return Hexagon::C2_ccombinewnewt; + case Hexagon::C2_ccombinewf: + return Hexagon::C2_ccombinewnewf; } } @@ -1670,11 +1620,6 @@ bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr *MI, } bool HexagonInstrInfo::isConstExtended(MachineInstr *MI) const { - - // Constant extenders are allowed only for V4 and above. - if (!Subtarget.hasV4TOps()) - return false; - const uint64_t F = MI->getDesc().TSFlags; unsigned isExtended = (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask; if (isExtended) // Instruction must be extended. @@ -1735,10 +1680,10 @@ HexagonInstrInfo::getDotNewPredJumpOp(MachineInstr *MI, taken = true; switch (MI->getOpcode()) { - case Hexagon::JMP_t: - return taken ? Hexagon::JMP_tnew_t : Hexagon::JMP_tnew_nt; - case Hexagon::JMP_f: - return taken ? Hexagon::JMP_fnew_t : Hexagon::JMP_fnew_nt; + case Hexagon::J2_jumpt: + return taken ? Hexagon::J2_jumptnewpt : Hexagon::J2_jumptnew; + case Hexagon::J2_jumpf: + return taken ? Hexagon::J2_jumpfnewpt : Hexagon::J2_jumpfnew; default: llvm_unreachable("Unexpected jump instruction."); @@ -1747,10 +1692,6 @@ HexagonInstrInfo::getDotNewPredJumpOp(MachineInstr *MI, // Returns true if a particular operand is extendable for an instruction. bool HexagonInstrInfo::isOperandExtended(const MachineInstr *MI, unsigned short OperandNum) const { - // Constant extenders are allowed only for V4 and above. - if (!Subtarget.hasV4TOps()) - return false; - const uint64_t F = MI->getDesc().TSFlags; return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask) @@ -1850,16 +1791,16 @@ short HexagonInstrInfo::getNonExtOpcode (const MachineInstr *MI) const { } bool HexagonInstrInfo::PredOpcodeHasJMP_c(Opcode_t Opcode) const { - return (Opcode == Hexagon::JMP_t) || - (Opcode == Hexagon::JMP_f) || - (Opcode == Hexagon::JMP_tnew_t) || - (Opcode == Hexagon::JMP_fnew_t) || - (Opcode == Hexagon::JMP_tnew_nt) || - (Opcode == Hexagon::JMP_fnew_nt); + return (Opcode == Hexagon::J2_jumpt) || + (Opcode == Hexagon::J2_jumpf) || + (Opcode == Hexagon::J2_jumptnewpt) || + (Opcode == Hexagon::J2_jumpfnewpt) || + (Opcode == Hexagon::J2_jumpt) || + (Opcode == Hexagon::J2_jumpf); } bool HexagonInstrInfo::PredOpcodeHasNot(Opcode_t Opcode) const { - return (Opcode == Hexagon::JMP_f) || - (Opcode == Hexagon::JMP_fnew_t) || - (Opcode == Hexagon::JMP_fnew_nt); + return (Opcode == Hexagon::J2_jumpf) || + (Opcode == Hexagon::J2_jumpfnewpt) || + (Opcode == Hexagon::J2_jumpfnew); } diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td index 4090681..60635cf 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/lib/Target/Hexagon/HexagonInstrInfo.td @@ -14,83 +14,100 @@ include "HexagonInstrFormats.td" include "HexagonOperands.td" -//===----------------------------------------------------------------------===// +// Pattern fragment that combines the value type and the register class +// into a single parameter. +// The pat frags in the definitions below need to have a named register, +// otherwise i32 will be assumed regardless of the register class. The +// name of the register does not matter. +def I1 : PatLeaf<(i1 PredRegs:$R)>; +def I32 : PatLeaf<(i32 IntRegs:$R)>; +def I64 : PatLeaf<(i64 DoubleRegs:$R)>; +def F32 : PatLeaf<(f32 IntRegs:$R)>; +def F64 : PatLeaf<(f64 DoubleRegs:$R)>; + +// Pattern fragments to extract the low and high subregisters from a +// 64-bit value. +def LoReg: OutPatFrag<(ops node:$Rs), + (EXTRACT_SUBREG (i64 $Rs), subreg_loreg)>; +def HiReg: OutPatFrag<(ops node:$Rs), + (EXTRACT_SUBREG (i64 $Rs), subreg_hireg)>; -// Multi-class for logical operators. -multiclass ALU32_rr_ri<string OpcStr, SDNode OpNode> { - def rr : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), - !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), - [(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$b), - (i32 IntRegs:$c)))]>; - def ri : ALU32_ri<(outs IntRegs:$dst), (ins s10Imm:$b, IntRegs:$c), - !strconcat("$dst = ", !strconcat(OpcStr, "(#$b, $c)")), - [(set (i32 IntRegs:$dst), (OpNode s10Imm:$b, - (i32 IntRegs:$c)))]>; -} +// SDNode for converting immediate C to C-1. +def DEC_CONST_SIGNED : SDNodeXForm<imm, [{ + // Return the byte immediate const-1 as an SDNode. + int32_t imm = N->getSExtValue(); + return XformSToSM1Imm(imm); +}]>; -// Multi-class for compare ops. -let isCompare = 1 in { -multiclass CMP64_rr<string OpcStr, PatFrag OpNode> { - def rr : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$b, DoubleRegs:$c), - !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), - [(set (i1 PredRegs:$dst), - (OpNode (i64 DoubleRegs:$b), (i64 DoubleRegs:$c)))]>; -} +// SDNode for converting immediate C to C-2. +def DEC2_CONST_SIGNED : SDNodeXForm<imm, [{ + // Return the byte immediate const-2 as an SDNode. + int32_t imm = N->getSExtValue(); + return XformSToSM2Imm(imm); +}]>; + +// SDNode for converting immediate C to C-3. +def DEC3_CONST_SIGNED : SDNodeXForm<imm, [{ + // Return the byte immediate const-3 as an SDNode. + int32_t imm = N->getSExtValue(); + return XformSToSM3Imm(imm); +}]>; -multiclass CMP32_rr_ri_s10<string OpcStr, string CextOp, PatFrag OpNode> { - let CextOpcode = CextOp in { - let InputType = "reg" in - def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c), - !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), - [(set (i1 PredRegs:$dst), - (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>; +// SDNode for converting immediate C to C-1. +def DEC_CONST_UNSIGNED : SDNodeXForm<imm, [{ + // Return the byte immediate const-1 as an SDNode. + uint32_t imm = N->getZExtValue(); + return XformUToUM1Imm(imm); +}]>; - let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, - opExtentBits = 10, InputType = "imm" in - def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s10Ext:$c), - !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")), - [(set (i1 PredRegs:$dst), - (OpNode (i32 IntRegs:$b), s10ExtPred:$c))]>; +//===----------------------------------------------------------------------===// +// Compare +//===----------------------------------------------------------------------===// +let hasSideEffects = 0, isCompare = 1, InputType = "imm", isExtendable = 1, + opExtendable = 2 in +class T_CMP <string mnemonic, bits<2> MajOp, bit isNot, Operand ImmOp> + : ALU32Inst <(outs PredRegs:$dst), + (ins IntRegs:$src1, ImmOp:$src2), + "$dst = "#!if(isNot, "!","")#mnemonic#"($src1, #$src2)", + [], "",ALU32_2op_tc_2early_SLOT0123 >, ImmRegRel { + bits<2> dst; + bits<5> src1; + bits<10> src2; + let CextOpcode = mnemonic; + let opExtentBits = !if(!eq(mnemonic, "cmp.gtu"), 9, 10); + let isExtentSigned = !if(!eq(mnemonic, "cmp.gtu"), 0, 1); + + let IClass = 0b0111; + + let Inst{27-24} = 0b0101; + let Inst{23-22} = MajOp; + let Inst{21} = !if(!eq(mnemonic, "cmp.gtu"), 0, src2{9}); + let Inst{20-16} = src1; + let Inst{13-5} = src2{8-0}; + let Inst{4} = isNot; + let Inst{3-2} = 0b00; + let Inst{1-0} = dst; } -} -multiclass CMP32_rr_ri_u9<string OpcStr, string CextOp, PatFrag OpNode> { - let CextOpcode = CextOp in { - let InputType = "reg" in - def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c), - !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), - [(set (i1 PredRegs:$dst), - (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>; +def C2_cmpeqi : T_CMP <"cmp.eq", 0b00, 0, s10Ext>; +def C2_cmpgti : T_CMP <"cmp.gt", 0b01, 0, s10Ext>; +def C2_cmpgtui : T_CMP <"cmp.gtu", 0b10, 0, u9Ext>; - let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, - opExtentBits = 9, InputType = "imm" in - def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u9Ext:$c), - !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")), - [(set (i1 PredRegs:$dst), - (OpNode (i32 IntRegs:$b), u9ExtPred:$c))]>; - } -} +class T_CMP_pat <InstHexagon MI, PatFrag OpNode, PatLeaf ImmPred> + : Pat<(i1 (OpNode (i32 IntRegs:$src1), ImmPred:$src2)), + (MI IntRegs:$src1, ImmPred:$src2)>; -multiclass CMP32_ri_s8<string OpcStr, PatFrag OpNode> { -let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8 in - def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s8Ext:$c), - !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")), - [(set (i1 PredRegs:$dst), (OpNode (i32 IntRegs:$b), - s8ExtPred:$c))]>; -} -} +def : T_CMP_pat <C2_cmpeqi, seteq, s10ImmPred>; +def : T_CMP_pat <C2_cmpgti, setgt, s10ImmPred>; +def : T_CMP_pat <C2_cmpgtui, setugt, u9ImmPred>; //===----------------------------------------------------------------------===// -// ALU32/ALU (Instructions with register-register form) +// ALU32/ALU + //===----------------------------------------------------------------------===// def SDTHexagonI64I32I32 : SDTypeProfile<1, 2, [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; -def HexagonWrapperCombineII : - SDNode<"HexagonISD::WrapperCombineII", SDTHexagonI64I32I32>; - -def HexagonWrapperCombineRR : - SDNode<"HexagonISD::WrapperCombineRR", SDTHexagonI64I32I32>; +def HexagonCOMBINE : SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>; let hasSideEffects = 0, hasNewValue = 1, InputType = "reg" in class T_ALU32_3op<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit OpsRev, @@ -145,6 +162,41 @@ class T_ALU32_3op_pred<string mnemonic, bits<3> MajOp, bits<3> MinOp, let Inst{4-0} = Rd; } +class T_ALU32_combineh<string Op1, string Op2, bits<3> MajOp, bits<3> MinOp, + bit OpsRev> + : T_ALU32_3op<"", MajOp, MinOp, OpsRev, 0> { + let AsmString = "$Rd = combine($Rs"#Op1#", $Rt"#Op2#")"; +} + +def A2_combine_hh : T_ALU32_combineh<".h", ".h", 0b011, 0b100, 1>; +def A2_combine_hl : T_ALU32_combineh<".h", ".l", 0b011, 0b101, 1>; +def A2_combine_lh : T_ALU32_combineh<".l", ".h", 0b011, 0b110, 1>; +def A2_combine_ll : T_ALU32_combineh<".l", ".l", 0b011, 0b111, 1>; + +class T_ALU32_3op_sfx<string mnemonic, string suffix, bits<3> MajOp, + bits<3> MinOp, bit OpsRev, bit IsComm> + : T_ALU32_3op<"", MajOp, MinOp, OpsRev, IsComm> { + let AsmString = "$Rd = "#mnemonic#"($Rs, $Rt)"#suffix; +} + +def A2_svaddh : T_ALU32_3op<"vaddh", 0b110, 0b000, 0, 1>; +def A2_svsubh : T_ALU32_3op<"vsubh", 0b110, 0b100, 1, 0>; + +let Defs = [USR_OVF], Itinerary = ALU32_3op_tc_2_SLOT0123 in { + def A2_svaddhs : T_ALU32_3op_sfx<"vaddh", ":sat", 0b110, 0b001, 0, 1>; + def A2_addsat : T_ALU32_3op_sfx<"add", ":sat", 0b110, 0b010, 0, 1>; + def A2_svadduhs : T_ALU32_3op_sfx<"vadduh", ":sat", 0b110, 0b011, 0, 1>; + def A2_svsubhs : T_ALU32_3op_sfx<"vsubh", ":sat", 0b110, 0b101, 1, 0>; + def A2_subsat : T_ALU32_3op_sfx<"sub", ":sat", 0b110, 0b110, 1, 0>; + def A2_svsubuhs : T_ALU32_3op_sfx<"vsubuh", ":sat", 0b110, 0b111, 1, 0>; +} + +let Itinerary = ALU32_3op_tc_2_SLOT0123 in +def A2_svavghs : T_ALU32_3op_sfx<"vavgh", ":rnd", 0b111, 0b001, 0, 1>; + +def A2_svavgh : T_ALU32_3op<"vavgh", 0b111, 0b000, 0, 1>; +def A2_svnavgh : T_ALU32_3op<"vnavgh", 0b111, 0b011, 1, 0>; + multiclass T_ALU32_3op_p<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit OpsRev> { def t : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 0, 0>; @@ -160,7 +212,6 @@ multiclass T_ALU32_3op_A2<string mnemonic, bits<3> MajOp, bits<3> MinOp, defm A2_p#NAME : T_ALU32_3op_p<mnemonic, MajOp, MinOp, OpsRev>; } -let isCodeGenOnly = 0 in defm add : T_ALU32_3op_A2<"add", 0b011, 0b000, 0, 1>; defm and : T_ALU32_3op_A2<"and", 0b001, 0b000, 0, 1>; defm or : T_ALU32_3op_A2<"or", 0b001, 0b001, 0, 1>; @@ -178,282 +229,418 @@ def: BinOp32_pat<or, A2_or, i32>; def: BinOp32_pat<sub, A2_sub, i32>; def: BinOp32_pat<xor, A2_xor, i32>; -multiclass ALU32_Pbase<string mnemonic, RegisterClass RC, bit isNot, - bit isPredNew> { - let isPredicatedNew = isPredNew in - def NAME : ALU32_rr<(outs RC:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs: $src3), - !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ", - ") $dst = ")#mnemonic#"($src2, $src3)", - []>; +// A few special cases producing register pairs: +let OutOperandList = (outs DoubleRegs:$Rd), hasNewValue = 0 in { + def S2_packhl : T_ALU32_3op <"packhl", 0b101, 0b100, 0, 0>; + + let isPredicable = 1 in + def A2_combinew : T_ALU32_3op <"combine", 0b101, 0b000, 0, 0>; + + // Conditional combinew uses "newt/f" instead of "t/fnew". + def C2_ccombinewt : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 0, 0>; + def C2_ccombinewf : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 1, 0>; + def C2_ccombinewnewt : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 0, 1>; + def C2_ccombinewnewf : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 1, 1>; } -multiclass ALU32_Pred<string mnemonic, RegisterClass RC, bit PredNot> { - let isPredicatedFalse = PredNot in { - defm _c#NAME : ALU32_Pbase<mnemonic, RC, PredNot, 0>; - // Predicate new - defm _cdn#NAME : ALU32_Pbase<mnemonic, RC, PredNot, 1>; - } +let hasSideEffects = 0, hasNewValue = 1, isCompare = 1, InputType = "reg" in +class T_ALU32_3op_cmp<string mnemonic, bits<2> MinOp, bit IsNeg, bit IsComm> + : ALU32_rr<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt), + "$Pd = "#mnemonic#"($Rs, $Rt)", + [], "", ALU32_3op_tc_1_SLOT0123>, ImmRegRel { + let CextOpcode = mnemonic; + let isCommutable = IsComm; + bits<5> Rs; + bits<5> Rt; + bits<2> Pd; + + let IClass = 0b1111; + let Inst{27-24} = 0b0010; + let Inst{22-21} = MinOp; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{4} = IsNeg; + let Inst{3-2} = 0b00; + let Inst{1-0} = Pd; } -let InputType = "reg" in -multiclass ALU32_base<string mnemonic, string CextOp, SDNode OpNode> { - let CextOpcode = CextOp, BaseOpcode = CextOp#_rr in { - let isPredicable = 1 in - def NAME : ALU32_rr<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = "#mnemonic#"($src1, $src2)", - [(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))]>; - - let neverHasSideEffects = 1, isPredicated = 1 in { - defm Pt : ALU32_Pred<mnemonic, IntRegs, 0>; - defm NotPt : ALU32_Pred<mnemonic, IntRegs, 1>; - } - } +let Itinerary = ALU32_3op_tc_2early_SLOT0123 in { + def C2_cmpeq : T_ALU32_3op_cmp< "cmp.eq", 0b00, 0, 1>; + def C2_cmpgt : T_ALU32_3op_cmp< "cmp.gt", 0b10, 0, 0>; + def C2_cmpgtu : T_ALU32_3op_cmp< "cmp.gtu", 0b11, 0, 0>; } -defm SUB_rr : ALU32_base<"sub", "SUB", sub>, ImmRegRel, PredNewRel; +// Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones +// that reverse the order of the operands. +class RevCmp<PatFrag F> : PatFrag<(ops node:$rhs, node:$lhs), F.Fragment>; -// Combines the two integer registers SRC1 and SRC2 into a double register. -let isPredicable = 1 in -class T_Combine : ALU32_rr<(outs DoubleRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = combine($src1, $src2)", - [(set (i64 DoubleRegs:$dst), - (i64 (HexagonWrapperCombineRR (i32 IntRegs:$src1), - (i32 IntRegs:$src2))))]>; - -multiclass Combine_base { - let BaseOpcode = "combine" in { - def NAME : T_Combine; - let neverHasSideEffects = 1, isPredicated = 1 in { - defm Pt : ALU32_Pred<"combine", DoubleRegs, 0>; - defm NotPt : ALU32_Pred<"combine", DoubleRegs, 1>; - } - } -} +// Pats for compares. They use PatFrags as operands, not SDNodes, +// since seteq/setgt/etc. are defined as ParFrags. +class T_cmp32_rr_pat<InstHexagon MI, PatFrag Op, ValueType VT> + : Pat<(VT (Op (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), + (VT (MI IntRegs:$Rs, IntRegs:$Rt))>; -defm COMBINE_rr : Combine_base, PredNewRel; +def: T_cmp32_rr_pat<C2_cmpeq, seteq, i1>; +def: T_cmp32_rr_pat<C2_cmpgt, setgt, i1>; +def: T_cmp32_rr_pat<C2_cmpgtu, setugt, i1>; -// Combines the two immediates SRC1 and SRC2 into a double register. -class COMBINE_imm<Operand imm1, Operand imm2, PatLeaf pat1, PatLeaf pat2> : - ALU32_ii<(outs DoubleRegs:$dst), (ins imm1:$src1, imm2:$src2), - "$dst = combine(#$src1, #$src2)", - [(set (i64 DoubleRegs:$dst), - (i64 (HexagonWrapperCombineII (i32 pat1:$src1), (i32 pat2:$src2))))]>; +def: T_cmp32_rr_pat<C2_cmpgt, RevCmp<setlt>, i1>; +def: T_cmp32_rr_pat<C2_cmpgtu, RevCmp<setult>, i1>; -let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 8 in -def COMBINE_Ii : COMBINE_imm<s8Ext, s8Imm, s8ExtPred, s8ImmPred>; +let CextOpcode = "MUX", InputType = "reg", hasNewValue = 1 in +def C2_mux: ALU32_rr<(outs IntRegs:$Rd), + (ins PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt), + "$Rd = mux($Pu, $Rs, $Rt)", [], "", ALU32_3op_tc_1_SLOT0123>, ImmRegRel { + bits<5> Rd; + bits<2> Pu; + bits<5> Rs; + bits<5> Rt; + + let CextOpcode = "mux"; + let InputType = "reg"; + let hasSideEffects = 0; + let IClass = 0b1111; + + let Inst{27-24} = 0b0100; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{6-5} = Pu; + let Inst{4-0} = Rd; +} + +def: Pat<(i32 (select (i1 PredRegs:$Pu), (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), + (C2_mux PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt)>; + +// Combines the two immediates into a double register. +// Increase complexity to make it greater than any complexity of a combine +// that involves a register. + +let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1, + isExtentSigned = 1, isExtendable = 1, opExtentBits = 8, opExtendable = 1, + AddedComplexity = 75 in +def A2_combineii: ALU32Inst <(outs DoubleRegs:$Rdd), (ins s8Ext:$s8, s8Imm:$S8), + "$Rdd = combine(#$s8, #$S8)", + [(set (i64 DoubleRegs:$Rdd), + (i64 (HexagonCOMBINE(i32 s8ExtPred:$s8), (i32 s8ImmPred:$S8))))]> { + bits<5> Rdd; + bits<8> s8; + bits<8> S8; + + let IClass = 0b0111; + let Inst{27-23} = 0b11000; + let Inst{22-16} = S8{7-1}; + let Inst{13} = S8{0}; + let Inst{12-5} = s8; + let Inst{4-0} = Rdd; + } //===----------------------------------------------------------------------===// -// ALU32/ALU (ADD with register-immediate form) +// Template class for predicated ADD of a reg and an Immediate value. //===----------------------------------------------------------------------===// -multiclass ALU32ri_Pbase<string mnemonic, bit isNot, bit isPredNew> { - let isPredicatedNew = isPredNew in - def NAME : ALU32_ri<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, s8Ext: $src3), - !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ", - ") $dst = ")#mnemonic#"($src2, #$src3)", - []>; -} +let hasNewValue = 1, hasSideEffects = 0 in +class T_Addri_Pred <bit PredNot, bit PredNew> + : ALU32_ri <(outs IntRegs:$Rd), + (ins PredRegs:$Pu, IntRegs:$Rs, s8Ext:$s8), + !if(PredNot, "if (!$Pu", "if ($Pu")#!if(PredNew,".new) $Rd = ", + ") $Rd = ")#"add($Rs, #$s8)"> { + bits<5> Rd; + bits<2> Pu; + bits<5> Rs; + bits<8> s8; + + let isPredicatedNew = PredNew; + let IClass = 0b0111; + + let Inst{27-24} = 0b0100; + let Inst{23} = PredNot; + let Inst{22-21} = Pu; + let Inst{20-16} = Rs; + let Inst{13} = PredNew; + let Inst{12-5} = s8; + let Inst{4-0} = Rd; + } -multiclass ALU32ri_Pred<string mnemonic, bit PredNot> { +//===----------------------------------------------------------------------===// +// A2_addi: Add a signed immediate to a register. +//===----------------------------------------------------------------------===// +let hasNewValue = 1, hasSideEffects = 0 in +class T_Addri <Operand immOp> + : ALU32_ri <(outs IntRegs:$Rd), + (ins IntRegs:$Rs, immOp:$s16), + "$Rd = add($Rs, #$s16)", [], "", ALU32_ADDI_tc_1_SLOT0123> { + bits<5> Rd; + bits<5> Rs; + bits<16> s16; + + let IClass = 0b1011; + + let Inst{27-21} = s16{15-9}; + let Inst{20-16} = Rs; + let Inst{13-5} = s16{8-0}; + let Inst{4-0} = Rd; + } + +//===----------------------------------------------------------------------===// +// Multiclass for ADD of a register and an immediate value. +//===----------------------------------------------------------------------===// +multiclass Addri_Pred<string mnemonic, bit PredNot> { let isPredicatedFalse = PredNot in { - defm _c#NAME : ALU32ri_Pbase<mnemonic, PredNot, 0>; + def NAME : T_Addri_Pred<PredNot, 0>; // Predicate new - defm _cdn#NAME : ALU32ri_Pbase<mnemonic, PredNot, 1>; + def NAME#new : T_Addri_Pred<PredNot, 1>; } } -let isExtendable = 1, InputType = "imm" in -multiclass ALU32ri_base<string mnemonic, string CextOp, SDNode OpNode> { - let CextOpcode = CextOp, BaseOpcode = CextOp#_ri in { - let opExtendable = 2, isExtentSigned = 1, opExtentBits = 16, - isPredicable = 1 in - def NAME : ALU32_ri<(outs IntRegs:$dst), - (ins IntRegs:$src1, s16Ext:$src2), - "$dst = "#mnemonic#"($src1, #$src2)", - [(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$src1), - (s16ExtPred:$src2)))]>; +let isExtendable = 1, isExtentSigned = 1, InputType = "imm" in +multiclass Addri_base<string mnemonic, SDNode OpNode> { + let CextOpcode = mnemonic, BaseOpcode = mnemonic#_ri in { + let opExtendable = 2, opExtentBits = 16, isPredicable = 1 in + def A2_#NAME : T_Addri<s16Ext>; - let opExtendable = 3, isExtentSigned = 1, opExtentBits = 8, - neverHasSideEffects = 1, isPredicated = 1 in { - defm Pt : ALU32ri_Pred<mnemonic, 0>; - defm NotPt : ALU32ri_Pred<mnemonic, 1>; + let opExtendable = 3, opExtentBits = 8, isPredicated = 1 in { + defm A2_p#NAME#t : Addri_Pred<mnemonic, 0>; + defm A2_p#NAME#f : Addri_Pred<mnemonic, 1>; } } } -defm ADD_ri : ALU32ri_base<"add", "ADD", add>, ImmRegRel, PredNewRel; +defm addi : Addri_base<"add", add>, ImmRegRel, PredNewRel; -let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 10, -CextOpcode = "OR", InputType = "imm" in -def OR_ri : ALU32_ri<(outs IntRegs:$dst), - (ins IntRegs:$src1, s10Ext:$src2), - "$dst = or($src1, #$src2)", - [(set (i32 IntRegs:$dst), (or (i32 IntRegs:$src1), - s10ExtPred:$src2))]>, ImmRegRel; +def: Pat<(i32 (add I32:$Rs, s16ExtPred:$s16)), + (i32 (A2_addi I32:$Rs, imm:$s16))>; +//===----------------------------------------------------------------------===// +// Template class used for the following ALU32 instructions. +// Rd=and(Rs,#s10) +// Rd=or(Rs,#s10) +//===----------------------------------------------------------------------===// let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 10, -InputType = "imm", CextOpcode = "AND" in -def AND_ri : ALU32_ri<(outs IntRegs:$dst), - (ins IntRegs:$src1, s10Ext:$src2), - "$dst = and($src1, #$src2)", - [(set (i32 IntRegs:$dst), (and (i32 IntRegs:$src1), - s10ExtPred:$src2))]>, ImmRegRel; +InputType = "imm", hasNewValue = 1 in +class T_ALU32ri_logical <string mnemonic, SDNode OpNode, bits<2> MinOp> + : ALU32_ri <(outs IntRegs:$Rd), + (ins IntRegs:$Rs, s10Ext:$s10), + "$Rd = "#mnemonic#"($Rs, #$s10)" , + [(set (i32 IntRegs:$Rd), (OpNode (i32 IntRegs:$Rs), s10ExtPred:$s10))]> { + bits<5> Rd; + bits<5> Rs; + bits<10> s10; + let CextOpcode = mnemonic; + + let IClass = 0b0111; + + let Inst{27-24} = 0b0110; + let Inst{23-22} = MinOp; + let Inst{21} = s10{9}; + let Inst{20-16} = Rs; + let Inst{13-5} = s10{8-0}; + let Inst{4-0} = Rd; + } -// Nop. -let neverHasSideEffects = 1, isCodeGenOnly = 0 in -def NOP : ALU32_rr<(outs), (ins), - "nop", - []>; +def A2_orir : T_ALU32ri_logical<"or", or, 0b10>, ImmRegRel; +def A2_andir : T_ALU32ri_logical<"and", and, 0b00>, ImmRegRel; +// Subtract register from immediate // Rd32=sub(#s10,Rs32) -let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 10, -CextOpcode = "SUB", InputType = "imm" in -def SUB_ri : ALU32_ri<(outs IntRegs:$dst), - (ins s10Ext:$src1, IntRegs:$src2), - "$dst = sub(#$src1, $src2)", - [(set IntRegs:$dst, (sub s10ExtPred:$src1, IntRegs:$src2))]>, - ImmRegRel; - -// Rd = not(Rs) gets mapped to Rd=sub(#-1, Rs). -def : Pat<(not (i32 IntRegs:$src1)), - (SUB_ri -1, (i32 IntRegs:$src1))>; - -// Rd = neg(Rs) gets mapped to Rd=sub(#0, Rs). -// Pattern definition for 'neg' was not necessary. - -multiclass TFR_Pred<bit PredNot> { - let isPredicatedFalse = PredNot in { - def _c#NAME : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - !if(PredNot, "if (!$src1", "if ($src1")#") $dst = $src2", - []>; - // Predicate new - let isPredicatedNew = 1 in - def _cdn#NAME : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - !if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = $src2", - []>; +let isExtendable = 1, CextOpcode = "sub", opExtendable = 1, isExtentSigned = 1, + opExtentBits = 10, InputType = "imm", hasNewValue = 1, hasSideEffects = 0 in +def A2_subri: ALU32_ri <(outs IntRegs:$Rd), (ins s10Ext:$s10, IntRegs:$Rs), + "$Rd = sub(#$s10, $Rs)", []>, ImmRegRel { + bits<5> Rd; + bits<10> s10; + bits<5> Rs; + + let IClass = 0b0111; + + let Inst{27-22} = 0b011001; + let Inst{21} = s10{9}; + let Inst{20-16} = Rs; + let Inst{13-5} = s10{8-0}; + let Inst{4-0} = Rd; } + +// Nop. +let hasSideEffects = 0 in +def A2_nop: ALU32Inst <(outs), (ins), "nop" > { + let IClass = 0b0111; + let Inst{27-24} = 0b1111; } -let InputType = "reg", neverHasSideEffects = 1 in -multiclass TFR_base<string CextOp> { - let CextOpcode = CextOp, BaseOpcode = CextOp in { - let isPredicable = 1 in - def NAME : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), - "$dst = $src1", - []>; +def: Pat<(sub s10ExtPred:$s10, IntRegs:$Rs), + (A2_subri imm:$s10, IntRegs:$Rs)>; - let isPredicated = 1 in { - defm Pt : TFR_Pred<0>; - defm NotPt : TFR_Pred<1>; - } +// Rd = not(Rs) gets mapped to Rd=sub(#-1, Rs). +def: Pat<(not (i32 IntRegs:$src1)), + (A2_subri -1, IntRegs:$src1)>; + +let hasSideEffects = 0, hasNewValue = 1 in +class T_tfr16<bit isHi> + : ALU32Inst <(outs IntRegs:$Rx), (ins IntRegs:$src1, u16Imm:$u16), + "$Rx"#!if(isHi, ".h", ".l")#" = #$u16", + [], "$src1 = $Rx" > { + bits<5> Rx; + bits<16> u16; + + let IClass = 0b0111; + let Inst{27-26} = 0b00; + let Inst{25-24} = !if(isHi, 0b10, 0b01); + let Inst{23-22} = u16{15-14}; + let Inst{21} = 0b1; + let Inst{20-16} = Rx; + let Inst{13-0} = u16{13-0}; } -} -class T_TFR64_Pred<bit PredNot, bit isPredNew> - : ALU32_rr<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, DoubleRegs:$src2), - !if(PredNot, "if (!$src1", "if ($src1")# - !if(isPredNew, ".new) ", ") ")#"$dst = $src2", []> -{ +def A2_tfril: T_tfr16<0>; +def A2_tfrih: T_tfr16<1>; + +// Conditional transfer is an alias to conditional "Rd = add(Rs, #0)". +let isPredicated = 1, hasNewValue = 1, opNewValue = 0 in +class T_tfr_pred<bit isPredNot, bit isPredNew> + : ALU32Inst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if ("#!if(isPredNot, "!", "")# + "$src1"#!if(isPredNew, ".new", "")# + ") $dst = $src2"> { bits<5> dst; bits<2> src1; bits<5> src2; - let IClass = 0b1111; - let Inst{27-24} = 0b1101; + let isPredicatedFalse = isPredNot; + let isPredicatedNew = isPredNew; + let IClass = 0b0111; + + let Inst{27-24} = 0b0100; + let Inst{23} = isPredNot; let Inst{13} = isPredNew; - let Inst{7} = PredNot; + let Inst{12-5} = 0; let Inst{4-0} = dst; - let Inst{6-5} = src1; - let Inst{20-17} = src2{4-1}; - let Inst{16} = 0b1; - let Inst{12-9} = src2{4-1}; - let Inst{8} = 0b0; -} + let Inst{22-21} = src1; + let Inst{20-16} = src2; + } -multiclass TFR64_Pred<bit PredNot> { - let isPredicatedFalse = PredNot in { - def _c#NAME : T_TFR64_Pred<PredNot, 0>; +let isPredicable = 1 in +class T_tfr : ALU32Inst<(outs IntRegs:$dst), (ins IntRegs:$src), + "$dst = $src"> { + bits<5> dst; + bits<5> src; - let isPredicatedNew = 1 in - def _cdn#NAME : T_TFR64_Pred<PredNot, 1>; // Predicate new + let IClass = 0b0111; + + let Inst{27-21} = 0b0000011; + let Inst{20-16} = src; + let Inst{13} = 0b0; + let Inst{4-0} = dst; + } + +let InputType = "reg", hasNewValue = 1, hasSideEffects = 0 in +multiclass tfr_base<string CextOp> { + let CextOpcode = CextOp, BaseOpcode = CextOp in { + def NAME : T_tfr; + + // Predicate + def t : T_tfr_pred<0, 0>; + def f : T_tfr_pred<1, 0>; + // Predicate new + def tnew : T_tfr_pred<0, 1>; + def fnew : T_tfr_pred<1, 1>; } } -let neverHasSideEffects = 1 in +// Assembler mapped to C2_ccombinew[t|f|newt|newf]. +// Please don't add bits to this instruction as it'll be converted into +// 'combine' before object code emission. +let isPredicated = 1 in +class T_tfrp_pred<bit PredNot, bit PredNew> + : ALU32_rr <(outs DoubleRegs:$dst), + (ins PredRegs:$src1, DoubleRegs:$src2), + "if ("#!if(PredNot, "!", "")#"$src1" + #!if(PredNew, ".new", "")#") $dst = $src2" > { + let isPredicatedFalse = PredNot; + let isPredicatedNew = PredNew; + } + +// Assembler mapped to A2_combinew. +// Please don't add bits to this instruction as it'll be converted into +// 'combine' before object code emission. +class T_tfrp : ALU32Inst <(outs DoubleRegs:$dst), + (ins DoubleRegs:$src), + "$dst = $src">; + +let hasSideEffects = 0 in multiclass TFR64_base<string BaseName> { let BaseOpcode = BaseName in { let isPredicable = 1 in - def NAME : ALU32Inst <(outs DoubleRegs:$dst), - (ins DoubleRegs:$src1), - "$dst = $src1" > { - bits<5> dst; - bits<5> src1; - - let IClass = 0b1111; - let Inst{27-23} = 0b01010; - let Inst{4-0} = dst; - let Inst{20-17} = src1{4-1}; - let Inst{16} = 0b1; - let Inst{12-9} = src1{4-1}; - let Inst{8} = 0b0; - } - - let isPredicated = 1 in { - defm Pt : TFR64_Pred<0>; - defm NotPt : TFR64_Pred<1>; - } + def NAME : T_tfrp; + // Predicate + def t : T_tfrp_pred <0, 0>; + def f : T_tfrp_pred <1, 0>; + // Predicate new + def tnew : T_tfrp_pred <0, 1>; + def fnew : T_tfrp_pred <1, 1>; } } -multiclass TFRI_Pred<bit PredNot> { - let isMoveImm = 1, isPredicatedFalse = PredNot in { - def _c#NAME : ALU32_ri<(outs IntRegs:$dst), - (ins PredRegs:$src1, s12Ext:$src2), - !if(PredNot, "if (!$src1", "if ($src1")#") $dst = #$src2", - []>; +let InputType = "imm", isExtendable = 1, isExtentSigned = 1, opExtentBits = 12, + isMoveImm = 1, opExtendable = 2, BaseOpcode = "TFRI", CextOpcode = "TFR", + hasSideEffects = 0, isPredicated = 1, hasNewValue = 1 in +class T_TFRI_Pred<bit PredNot, bit PredNew> + : ALU32_ri<(outs IntRegs:$Rd), (ins PredRegs:$Pu, s12Ext:$s12), + "if ("#!if(PredNot,"!","")#"$Pu"#!if(PredNew,".new","")#") $Rd = #$s12", + [], "", ALU32_2op_tc_1_SLOT0123>, ImmRegRel, PredNewRel { + let isPredicatedFalse = PredNot; + let isPredicatedNew = PredNew; - // Predicate new - let isPredicatedNew = 1 in - def _cdn#NAME : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, s12Ext:$src2), - !if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = #$src2", - []>; - } -} - -let InputType = "imm", isExtendable = 1, isExtentSigned = 1 in -multiclass TFRI_base<string CextOp> { - let CextOpcode = CextOp, BaseOpcode = CextOp#I in { - let isAsCheapAsAMove = 1 , opExtendable = 1, opExtentBits = 16, - isMoveImm = 1, isPredicable = 1, isReMaterializable = 1 in - def NAME : ALU32_ri<(outs IntRegs:$dst), (ins s16Ext:$src1), - "$dst = #$src1", - [(set (i32 IntRegs:$dst), s16ExtPred:$src1)]>; - - let opExtendable = 2, opExtentBits = 12, neverHasSideEffects = 1, - isPredicated = 1 in { - defm Pt : TFRI_Pred<0>; - defm NotPt : TFRI_Pred<1>; - } - } + bits<5> Rd; + bits<2> Pu; + bits<12> s12; + + let IClass = 0b0111; + let Inst{27-24} = 0b1110; + let Inst{23} = PredNot; + let Inst{22-21} = Pu; + let Inst{20} = 0b0; + let Inst{19-16,12-5} = s12; + let Inst{13} = PredNew; + let Inst{4-0} = Rd; } -defm TFRI : TFRI_base<"TFR">, ImmRegRel, PredNewRel; -defm TFR : TFR_base<"TFR">, ImmRegRel, PredNewRel; -defm TFR64 : TFR64_base<"TFR64">, PredNewRel; +def C2_cmoveit : T_TFRI_Pred<0, 0>; +def C2_cmoveif : T_TFRI_Pred<1, 0>; +def C2_cmovenewit : T_TFRI_Pred<0, 1>; +def C2_cmovenewif : T_TFRI_Pred<1, 1>; + +let InputType = "imm", isExtendable = 1, isExtentSigned = 1, + CextOpcode = "TFR", BaseOpcode = "TFRI", hasNewValue = 1, opNewValue = 0, + isAsCheapAsAMove = 1 , opExtendable = 1, opExtentBits = 16, isMoveImm = 1, + isPredicated = 0, isPredicable = 1, isReMaterializable = 1 in +def A2_tfrsi : ALU32Inst<(outs IntRegs:$Rd), (ins s16Ext:$s16), "$Rd = #$s16", + [(set (i32 IntRegs:$Rd), s16ExtPred:$s16)], "", ALU32_2op_tc_1_SLOT0123>, + ImmRegRel, PredRel { + bits<5> Rd; + bits<16> s16; + + let IClass = 0b0111; + let Inst{27-24} = 0b1000; + let Inst{23-22,20-16,13-5} = s16; + let Inst{4-0} = Rd; +} + +defm A2_tfr : tfr_base<"TFR">, ImmRegRel, PredNewRel; +let isAsmParserOnly = 1 in +defm A2_tfrp : TFR64_base<"TFR64">, PredNewRel; + +// Assembler mapped +let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1, + isAsmParserOnly = 1 in +def A2_tfrpi : ALU64_rr<(outs DoubleRegs:$dst), (ins s8Imm64:$src1), + "$dst = #$src1", + [(set (i64 DoubleRegs:$dst), s8Imm64Pred:$src1)]>; + +// TODO: see if this instruction can be deleted.. +let isExtendable = 1, opExtendable = 1, opExtentBits = 6, + isAsmParserOnly = 1 in +def TFRI64_V4 : ALU64_rr<(outs DoubleRegs:$dst), (ins u6Ext:$src1), + "$dst = #$src1">; -// Transfer control register. -let neverHasSideEffects = 1 in -def TFCR : CRInst<(outs CRRegs:$dst), (ins IntRegs:$src1), - "$dst = $src1", - []>; //===----------------------------------------------------------------------===// // ALU32/ALU - //===----------------------------------------------------------------------===// @@ -462,159 +649,344 @@ def TFCR : CRInst<(outs CRRegs:$dst), (ins IntRegs:$src1), //===----------------------------------------------------------------------===// // ALU32/PERM + //===----------------------------------------------------------------------===// +// Scalar mux register immediate. +let hasSideEffects = 0, isExtentSigned = 1, CextOpcode = "MUX", + InputType = "imm", hasNewValue = 1, isExtendable = 1, opExtentBits = 8 in +class T_MUX1 <bit MajOp, dag ins, string AsmStr> + : ALU32Inst <(outs IntRegs:$Rd), ins, AsmStr>, ImmRegRel { + bits<5> Rd; + bits<2> Pu; + bits<8> s8; + bits<5> Rs; + + let IClass = 0b0111; + let Inst{27-24} = 0b0011; + let Inst{23} = MajOp; + let Inst{22-21} = Pu; + let Inst{20-16} = Rs; + let Inst{13} = 0b0; + let Inst{12-5} = s8; + let Inst{4-0} = Rd; +} + +let opExtendable = 2 in +def C2_muxri : T_MUX1<0b1, (ins PredRegs:$Pu, s8Ext:$s8, IntRegs:$Rs), + "$Rd = mux($Pu, #$s8, $Rs)">; + +let opExtendable = 3 in +def C2_muxir : T_MUX1<0b0, (ins PredRegs:$Pu, IntRegs:$Rs, s8Ext:$s8), + "$Rd = mux($Pu, $Rs, #$s8)">; + +def : Pat<(i32 (select I1:$Pu, s8ExtPred:$s8, I32:$Rs)), + (C2_muxri I1:$Pu, s8ExtPred:$s8, I32:$Rs)>; + +def : Pat<(i32 (select I1:$Pu, I32:$Rs, s8ExtPred:$s8)), + (C2_muxir I1:$Pu, I32:$Rs, s8ExtPred:$s8)>; + +// C2_muxii: Scalar mux immediates. +let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, + opExtentBits = 8, opExtendable = 2 in +def C2_muxii: ALU32Inst <(outs IntRegs:$Rd), + (ins PredRegs:$Pu, s8Ext:$s8, s8Imm:$S8), + "$Rd = mux($Pu, #$s8, #$S8)" , + [(set (i32 IntRegs:$Rd), + (i32 (select I1:$Pu, s8ExtPred:$s8, s8ImmPred:$S8)))] > { + bits<5> Rd; + bits<2> Pu; + bits<8> s8; + bits<8> S8; + + let IClass = 0b0111; + + let Inst{27-25} = 0b101; + let Inst{24-23} = Pu; + let Inst{22-16} = S8{7-1}; + let Inst{13} = S8{0}; + let Inst{12-5} = s8; + let Inst{4-0} = Rd; + } + +//===----------------------------------------------------------------------===// +// template class for non-predicated alu32_2op instructions +// - aslh, asrh, sxtb, sxth, zxth +//===----------------------------------------------------------------------===// +let hasNewValue = 1, opNewValue = 0 in +class T_ALU32_2op <string mnemonic, bits<3> minOp> : + ALU32Inst <(outs IntRegs:$Rd), (ins IntRegs:$Rs), + "$Rd = "#mnemonic#"($Rs)", [] > { + bits<5> Rd; + bits<5> Rs; + + let IClass = 0b0111; + + let Inst{27-24} = 0b0000; + let Inst{23-21} = minOp; + let Inst{13} = 0b0; + let Inst{4-0} = Rd; + let Inst{20-16} = Rs; +} + +//===----------------------------------------------------------------------===// +// template class for predicated alu32_2op instructions +// - aslh, asrh, sxtb, sxth, zxtb, zxth +//===----------------------------------------------------------------------===// +let hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +class T_ALU32_2op_Pred <string mnemonic, bits<3> minOp, bit isPredNot, + bit isPredNew > : + ALU32Inst <(outs IntRegs:$Rd), (ins PredRegs:$Pu, IntRegs:$Rs), + !if(isPredNot, "if (!$Pu", "if ($Pu") + #!if(isPredNew, ".new) ",") ")#"$Rd = "#mnemonic#"($Rs)"> { + bits<5> Rd; + bits<2> Pu; + bits<5> Rs; + + let IClass = 0b0111; -let neverHasSideEffects = 1 in -def COMBINE_ii : ALU32_ii<(outs DoubleRegs:$dst), - (ins s8Imm:$src1, s8Imm:$src2), - "$dst = combine(#$src1, #$src2)", - []>; - -// Mux. -def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1, - DoubleRegs:$src2, - DoubleRegs:$src3), - "$dst = vmux($src1, $src2, $src3)", - []>; - -let CextOpcode = "MUX", InputType = "reg" in -def MUX_rr : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, - IntRegs:$src2, IntRegs:$src3), - "$dst = mux($src1, $src2, $src3)", - [(set (i32 IntRegs:$dst), - (i32 (select (i1 PredRegs:$src1), (i32 IntRegs:$src2), - (i32 IntRegs:$src3))))]>, ImmRegRel; - -let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8, -CextOpcode = "MUX", InputType = "imm" in -def MUX_ir : ALU32_ir<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Ext:$src2, - IntRegs:$src3), - "$dst = mux($src1, #$src2, $src3)", - [(set (i32 IntRegs:$dst), - (i32 (select (i1 PredRegs:$src1), s8ExtPred:$src2, - (i32 IntRegs:$src3))))]>, ImmRegRel; - -let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 8, -CextOpcode = "MUX", InputType = "imm" in -def MUX_ri : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, - s8Ext:$src3), - "$dst = mux($src1, $src2, #$src3)", - [(set (i32 IntRegs:$dst), - (i32 (select (i1 PredRegs:$src1), (i32 IntRegs:$src2), - s8ExtPred:$src3)))]>, ImmRegRel; - -let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8 in -def MUX_ii : ALU32_ii<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Ext:$src2, - s8Imm:$src3), - "$dst = mux($src1, #$src2, #$src3)", - [(set (i32 IntRegs:$dst), (i32 (select (i1 PredRegs:$src1), - s8ExtPred:$src2, - s8ImmPred:$src3)))]>; - -// ALU32 - aslh, asrh, sxtb, sxth, zxtb, zxth -multiclass ALU32_2op_Pbase<string mnemonic, bit isNot, bit isPredNew> { - let isPredicatedNew = isPredNew in - def NAME : ALU32Inst<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ", - ") $dst = ")#mnemonic#"($src2)">, - Requires<[HasV4T]>; -} - -multiclass ALU32_2op_Pred<string mnemonic, bit PredNot> { + let Inst{27-24} = 0b0000; + let Inst{23-21} = minOp; + let Inst{13} = 0b1; + let Inst{11} = isPredNot; + let Inst{10} = isPredNew; + let Inst{4-0} = Rd; + let Inst{9-8} = Pu; + let Inst{20-16} = Rs; +} + +multiclass ALU32_2op_Pred<string mnemonic, bits<3> minOp, bit PredNot> { let isPredicatedFalse = PredNot in { - defm _c#NAME : ALU32_2op_Pbase<mnemonic, PredNot, 0>; + def NAME : T_ALU32_2op_Pred<mnemonic, minOp, PredNot, 0>; + // Predicate new - defm _cdn#NAME : ALU32_2op_Pbase<mnemonic, PredNot, 1>; + let isPredicatedNew = 1 in + def NAME#new : T_ALU32_2op_Pred<mnemonic, minOp, PredNot, 1>; } } -multiclass ALU32_2op_base<string mnemonic> { +multiclass ALU32_2op_base<string mnemonic, bits<3> minOp> { let BaseOpcode = mnemonic in { - let isPredicable = 1, neverHasSideEffects = 1 in - def NAME : ALU32Inst<(outs IntRegs:$dst), - (ins IntRegs:$src1), - "$dst = "#mnemonic#"($src1)">; - - let Predicates = [HasV4T], validSubTargets = HasV4SubT, isPredicated = 1, - neverHasSideEffects = 1 in { - defm Pt_V4 : ALU32_2op_Pred<mnemonic, 0>; - defm NotPt_V4 : ALU32_2op_Pred<mnemonic, 1>; + let isPredicable = 1, hasSideEffects = 0 in + def A2_#NAME : T_ALU32_2op<mnemonic, minOp>; + + let isPredicated = 1, hasSideEffects = 0 in { + defm A4_p#NAME#t : ALU32_2op_Pred<mnemonic, minOp, 0>; + defm A4_p#NAME#f : ALU32_2op_Pred<mnemonic, minOp, 1>; } } } -defm ASLH : ALU32_2op_base<"aslh">, PredNewRel; -defm ASRH : ALU32_2op_base<"asrh">, PredNewRel; -defm SXTB : ALU32_2op_base<"sxtb">, PredNewRel; -defm SXTH : ALU32_2op_base<"sxth">, PredNewRel; -defm ZXTB : ALU32_2op_base<"zxtb">, PredNewRel; -defm ZXTH : ALU32_2op_base<"zxth">, PredNewRel; +defm aslh : ALU32_2op_base<"aslh", 0b000>, PredNewRel; +defm asrh : ALU32_2op_base<"asrh", 0b001>, PredNewRel; +defm sxtb : ALU32_2op_base<"sxtb", 0b101>, PredNewRel; +defm sxth : ALU32_2op_base<"sxth", 0b111>, PredNewRel; +defm zxth : ALU32_2op_base<"zxth", 0b110>, PredNewRel; + +// Rd=zxtb(Rs): assembler mapped to Rd=and(Rs,#255). +// Compiler would want to generate 'zxtb' instead of 'and' becuase 'zxtb' has +// predicated forms while 'and' doesn't. Since integrated assembler can't +// handle 'mapped' instructions, we need to encode 'zxtb' same as 'and' where +// immediate operand is set to '255'. + +let hasNewValue = 1, opNewValue = 0 in +class T_ZXTB: ALU32Inst < (outs IntRegs:$Rd), (ins IntRegs:$Rs), + "$Rd = zxtb($Rs)", [] > { // Rd = and(Rs,255) + bits<5> Rd; + bits<5> Rs; + bits<10> s10 = 255; + + let IClass = 0b0111; + + let Inst{27-22} = 0b011000; + let Inst{4-0} = Rd; + let Inst{20-16} = Rs; + let Inst{21} = s10{9}; + let Inst{13-5} = s10{8-0}; +} -def : Pat <(shl (i32 IntRegs:$src1), (i32 16)), - (ASLH IntRegs:$src1)>; +//Rd=zxtb(Rs): assembler mapped to "Rd=and(Rs,#255) +multiclass ZXTB_base <string mnemonic, bits<3> minOp> { + let BaseOpcode = mnemonic in { + let isPredicable = 1, hasSideEffects = 0 in + def A2_#NAME : T_ZXTB; -def : Pat <(sra (i32 IntRegs:$src1), (i32 16)), - (ASRH IntRegs:$src1)>; + let isPredicated = 1, hasSideEffects = 0 in { + defm A4_p#NAME#t : ALU32_2op_Pred<mnemonic, minOp, 0>; + defm A4_p#NAME#f : ALU32_2op_Pred<mnemonic, minOp, 1>; + } + } +} -def : Pat <(sext_inreg (i32 IntRegs:$src1), i8), - (SXTB IntRegs:$src1)>; +defm zxtb : ZXTB_base<"zxtb",0b100>, PredNewRel; -def : Pat <(sext_inreg (i32 IntRegs:$src1), i16), - (SXTH IntRegs:$src1)>; +def: Pat<(shl I32:$src1, (i32 16)), (A2_aslh I32:$src1)>; +def: Pat<(sra I32:$src1, (i32 16)), (A2_asrh I32:$src1)>; +def: Pat<(sext_inreg I32:$src1, i8), (A2_sxtb I32:$src1)>; +def: Pat<(sext_inreg I32:$src1, i16), (A2_sxth I32:$src1)>; //===----------------------------------------------------------------------===// -// ALU32/PERM - +// Template class for vector add and avg //===----------------------------------------------------------------------===// +class T_VectALU_64 <string opc, bits<3> majOp, bits<3> minOp, + bit isSat, bit isRnd, bit isCrnd, bit SwapOps > + : ALU64_rr < (outs DoubleRegs:$Rdd), + (ins DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Rdd = "#opc#"($Rss, $Rtt)"#!if(isRnd, ":rnd", "") + #!if(isCrnd,":crnd","") + #!if(isSat, ":sat", ""), + [], "", ALU64_tc_2_SLOT23 > { + bits<5> Rdd; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1101; + + let Inst{27-24} = 0b0011; + let Inst{23-21} = majOp; + let Inst{20-16} = !if (SwapOps, Rtt, Rss); + let Inst{12-8} = !if (SwapOps, Rss, Rtt); + let Inst{7-5} = minOp; + let Inst{4-0} = Rdd; + } -//===----------------------------------------------------------------------===// -// ALU32/PRED + -//===----------------------------------------------------------------------===// +// ALU64 - Vector add +// Rdd=vadd[u][bhw](Rss,Rtt) +let Itinerary = ALU64_tc_1_SLOT23 in { + def A2_vaddub : T_VectALU_64 < "vaddub", 0b000, 0b000, 0, 0, 0, 0>; + def A2_vaddh : T_VectALU_64 < "vaddh", 0b000, 0b010, 0, 0, 0, 0>; + def A2_vaddw : T_VectALU_64 < "vaddw", 0b000, 0b101, 0, 0, 0, 0>; +} -// Compare. -defm CMPGTU : CMP32_rr_ri_u9<"cmp.gtu", "CMPGTU", setugt>, ImmRegRel; -defm CMPGT : CMP32_rr_ri_s10<"cmp.gt", "CMPGT", setgt>, ImmRegRel; -defm CMPEQ : CMP32_rr_ri_s10<"cmp.eq", "CMPEQ", seteq>, ImmRegRel; +// Rdd=vadd[u][bhw](Rss,Rtt):sat +let Defs = [USR_OVF] in { + def A2_vaddubs : T_VectALU_64 < "vaddub", 0b000, 0b001, 1, 0, 0, 0>; + def A2_vaddhs : T_VectALU_64 < "vaddh", 0b000, 0b011, 1, 0, 0, 0>; + def A2_vadduhs : T_VectALU_64 < "vadduh", 0b000, 0b100, 1, 0, 0, 0>; + def A2_vaddws : T_VectALU_64 < "vaddw", 0b000, 0b110, 1, 0, 0, 0>; +} -// SDNode for converting immediate C to C-1. -def DEC_CONST_SIGNED : SDNodeXForm<imm, [{ - // Return the byte immediate const-1 as an SDNode. - int32_t imm = N->getSExtValue(); - return XformSToSM1Imm(imm); -}]>; +// ALU64 - Vector average +// Rdd=vavg[u][bhw](Rss,Rtt) +let Itinerary = ALU64_tc_1_SLOT23 in { + def A2_vavgub : T_VectALU_64 < "vavgub", 0b010, 0b000, 0, 0, 0, 0>; + def A2_vavgh : T_VectALU_64 < "vavgh", 0b010, 0b010, 0, 0, 0, 0>; + def A2_vavguh : T_VectALU_64 < "vavguh", 0b010, 0b101, 0, 0, 0, 0>; + def A2_vavgw : T_VectALU_64 < "vavgw", 0b011, 0b000, 0, 0, 0, 0>; + def A2_vavguw : T_VectALU_64 < "vavguw", 0b011, 0b011, 0, 0, 0, 0>; +} -// SDNode for converting immediate C to C-1. -def DEC_CONST_UNSIGNED : SDNodeXForm<imm, [{ - // Return the byte immediate const-1 as an SDNode. - uint32_t imm = N->getZExtValue(); - return XformUToUM1Imm(imm); -}]>; +// Rdd=vavg[u][bhw](Rss,Rtt)[:rnd|:crnd] +def A2_vavgubr : T_VectALU_64 < "vavgub", 0b010, 0b001, 0, 1, 0, 0>; +def A2_vavghr : T_VectALU_64 < "vavgh", 0b010, 0b011, 0, 1, 0, 0>; +def A2_vavghcr : T_VectALU_64 < "vavgh", 0b010, 0b100, 0, 0, 1, 0>; +def A2_vavguhr : T_VectALU_64 < "vavguh", 0b010, 0b110, 0, 1, 0, 0>; + +def A2_vavgwr : T_VectALU_64 < "vavgw", 0b011, 0b001, 0, 1, 0, 0>; +def A2_vavgwcr : T_VectALU_64 < "vavgw", 0b011, 0b010, 0, 0, 1, 0>; +def A2_vavguwr : T_VectALU_64 < "vavguw", 0b011, 0b100, 0, 1, 0, 0>; + +// Rdd=vnavg[bh](Rss,Rtt) +let Itinerary = ALU64_tc_1_SLOT23 in { + def A2_vnavgh : T_VectALU_64 < "vnavgh", 0b100, 0b000, 0, 0, 0, 1>; + def A2_vnavgw : T_VectALU_64 < "vnavgw", 0b100, 0b011, 0, 0, 0, 1>; +} + +// Rdd=vnavg[bh](Rss,Rtt)[:rnd|:crnd]:sat +let Defs = [USR_OVF] in { + def A2_vnavghr : T_VectALU_64 < "vnavgh", 0b100, 0b001, 1, 1, 0, 1>; + def A2_vnavghcr : T_VectALU_64 < "vnavgh", 0b100, 0b010, 1, 0, 1, 1>; + def A2_vnavgwr : T_VectALU_64 < "vnavgw", 0b100, 0b100, 1, 1, 0, 1>; + def A2_vnavgwcr : T_VectALU_64 < "vnavgw", 0b100, 0b110, 1, 0, 1, 1>; +} + +// Rdd=vsub[u][bh](Rss,Rtt) +let Itinerary = ALU64_tc_1_SLOT23 in { + def A2_vsubub : T_VectALU_64 < "vsubub", 0b001, 0b000, 0, 0, 0, 1>; + def A2_vsubh : T_VectALU_64 < "vsubh", 0b001, 0b010, 0, 0, 0, 1>; + def A2_vsubw : T_VectALU_64 < "vsubw", 0b001, 0b101, 0, 0, 0, 1>; +} + +// Rdd=vsub[u][bh](Rss,Rtt):sat +let Defs = [USR_OVF] in { + def A2_vsububs : T_VectALU_64 < "vsubub", 0b001, 0b001, 1, 0, 0, 1>; + def A2_vsubhs : T_VectALU_64 < "vsubh", 0b001, 0b011, 1, 0, 0, 1>; + def A2_vsubuhs : T_VectALU_64 < "vsubuh", 0b001, 0b100, 1, 0, 0, 1>; + def A2_vsubws : T_VectALU_64 < "vsubw", 0b001, 0b110, 1, 0, 0, 1>; +} -def CTLZ_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1), - "$dst = cl0($src1)", - [(set (i32 IntRegs:$dst), (ctlz (i32 IntRegs:$src1)))]>; +// Rdd=vmax[u][bhw](Rss,Rtt) +def A2_vmaxb : T_VectALU_64 < "vmaxb", 0b110, 0b110, 0, 0, 0, 1>; +def A2_vmaxub : T_VectALU_64 < "vmaxub", 0b110, 0b000, 0, 0, 0, 1>; +def A2_vmaxh : T_VectALU_64 < "vmaxh", 0b110, 0b001, 0, 0, 0, 1>; +def A2_vmaxuh : T_VectALU_64 < "vmaxuh", 0b110, 0b010, 0, 0, 0, 1>; +def A2_vmaxw : T_VectALU_64 < "vmaxw", 0b110, 0b011, 0, 0, 0, 1>; +def A2_vmaxuw : T_VectALU_64 < "vmaxuw", 0b101, 0b101, 0, 0, 0, 1>; + +// Rdd=vmin[u][bhw](Rss,Rtt) +def A2_vminb : T_VectALU_64 < "vminb", 0b110, 0b111, 0, 0, 0, 1>; +def A2_vminub : T_VectALU_64 < "vminub", 0b101, 0b000, 0, 0, 0, 1>; +def A2_vminh : T_VectALU_64 < "vminh", 0b101, 0b001, 0, 0, 0, 1>; +def A2_vminuh : T_VectALU_64 < "vminuh", 0b101, 0b010, 0, 0, 0, 1>; +def A2_vminw : T_VectALU_64 < "vminw", 0b101, 0b011, 0, 0, 0, 1>; +def A2_vminuw : T_VectALU_64 < "vminuw", 0b101, 0b100, 0, 0, 0, 1>; -def CTTZ_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1), - "$dst = ct0($src1)", - [(set (i32 IntRegs:$dst), (cttz (i32 IntRegs:$src1)))]>; +//===----------------------------------------------------------------------===// +// Template class for vector compare +//===----------------------------------------------------------------------===// +let hasSideEffects = 0 in +class T_vcmp <string Str, bits<4> minOp> + : ALU64_rr <(outs PredRegs:$Pd), + (ins DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Pd = "#Str#"($Rss, $Rtt)", [], + "", ALU64_tc_2early_SLOT23> { + bits<2> Pd; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1101; + + let Inst{27-23} = 0b00100; + let Inst{13} = minOp{3}; + let Inst{7-5} = minOp{2-0}; + let Inst{1-0} = Pd; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + } -def CTLZ64_rr : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1), - "$dst = cl0($src1)", - [(set (i32 IntRegs:$dst), (i32 (trunc (ctlz (i64 DoubleRegs:$src1)))))]>; +class T_vcmp_pat<InstHexagon MI, PatFrag Op, ValueType T> + : Pat<(i1 (Op (T DoubleRegs:$Rss), (T DoubleRegs:$Rtt))), + (i1 (MI DoubleRegs:$Rss, DoubleRegs:$Rtt))>; + +// Vector compare bytes +def A2_vcmpbeq : T_vcmp <"vcmpb.eq", 0b0110>; +def A2_vcmpbgtu : T_vcmp <"vcmpb.gtu", 0b0111>; + +// Vector compare halfwords +def A2_vcmpheq : T_vcmp <"vcmph.eq", 0b0011>; +def A2_vcmphgt : T_vcmp <"vcmph.gt", 0b0100>; +def A2_vcmphgtu : T_vcmp <"vcmph.gtu", 0b0101>; + +// Vector compare words +def A2_vcmpweq : T_vcmp <"vcmpw.eq", 0b0000>; +def A2_vcmpwgt : T_vcmp <"vcmpw.gt", 0b0001>; +def A2_vcmpwgtu : T_vcmp <"vcmpw.gtu", 0b0010>; + +def: T_vcmp_pat<A2_vcmpbeq, seteq, v8i8>; +def: T_vcmp_pat<A2_vcmpbgtu, setugt, v8i8>; +def: T_vcmp_pat<A2_vcmpheq, seteq, v4i16>; +def: T_vcmp_pat<A2_vcmphgt, setgt, v4i16>; +def: T_vcmp_pat<A2_vcmphgtu, setugt, v4i16>; +def: T_vcmp_pat<A2_vcmpweq, seteq, v2i32>; +def: T_vcmp_pat<A2_vcmpwgt, setgt, v2i32>; +def: T_vcmp_pat<A2_vcmpwgtu, setugt, v2i32>; -def CTTZ64_rr : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1), - "$dst = ct0($src1)", - [(set (i32 IntRegs:$dst), (i32 (trunc (cttz (i64 DoubleRegs:$src1)))))]>; +//===----------------------------------------------------------------------===// +// ALU32/PERM - +//===----------------------------------------------------------------------===// -def TSTBIT_rr : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - "$dst = tstbit($src1, $src2)", - [(set (i1 PredRegs:$dst), - (setne (and (shl 1, (i32 IntRegs:$src2)), (i32 IntRegs:$src1)), 0))]>; -def TSTBIT_ri : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), - "$dst = tstbit($src1, $src2)", - [(set (i1 PredRegs:$dst), - (setne (and (shl 1, (u5ImmPred:$src2)), (i32 IntRegs:$src1)), 0))]>; +//===----------------------------------------------------------------------===// +// ALU32/PRED + +//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // ALU32/PRED - @@ -625,112 +997,280 @@ def TSTBIT_ri : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), // ALU64/ALU + //===----------------------------------------------------------------------===// // Add. -def ADD64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, - DoubleRegs:$src2), - "$dst = add($src1, $src2)", - [(set (i64 DoubleRegs:$dst), (add (i64 DoubleRegs:$src1), - (i64 DoubleRegs:$src2)))]>; +//===----------------------------------------------------------------------===// +// Template Class +// Add/Subtract halfword +// Rd=add(Rt.L,Rs.[HL])[:sat] +// Rd=sub(Rt.L,Rs.[HL])[:sat] +// Rd=add(Rt.[LH],Rs.[HL])[:sat][:<16] +// Rd=sub(Rt.[LH],Rs.[HL])[:sat][:<16] +//===----------------------------------------------------------------------===// -// Add halfword. +let hasNewValue = 1, opNewValue = 0 in +class T_XTYPE_ADD_SUB <bits<2> LHbits, bit isSat, bit hasShift, bit isSub> + : ALU64Inst <(outs IntRegs:$Rd), (ins IntRegs:$Rt, IntRegs:$Rs), + "$Rd = "#!if(isSub,"sub","add")#"($Rt." + #!if(hasShift, !if(LHbits{1},"h","l"),"l") #", $Rs." + #!if(hasShift, !if(LHbits{0},"h)","l)"), !if(LHbits{1},"h)","l)")) + #!if(isSat,":sat","") + #!if(hasShift,":<<16",""), [], "", ALU64_tc_1_SLOT23> { + bits<5> Rd; + bits<5> Rt; + bits<5> Rs; + let IClass = 0b1101; + + let Inst{27-23} = 0b01010; + let Inst{22} = hasShift; + let Inst{21} = isSub; + let Inst{7} = isSat; + let Inst{6-5} = LHbits; + let Inst{4-0} = Rd; + let Inst{12-8} = Rt; + let Inst{20-16} = Rs; + } -// Compare. -defm CMPEHexagon4 : CMP64_rr<"cmp.eq", seteq>; -defm CMPGT64 : CMP64_rr<"cmp.gt", setgt>; -defm CMPGTU64 : CMP64_rr<"cmp.gtu", setugt>; - -// Logical operations. -def AND_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, - DoubleRegs:$src2), - "$dst = and($src1, $src2)", - [(set (i64 DoubleRegs:$dst), (and (i64 DoubleRegs:$src1), - (i64 DoubleRegs:$src2)))]>; - -def OR_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, - DoubleRegs:$src2), - "$dst = or($src1, $src2)", - [(set (i64 DoubleRegs:$dst), (or (i64 DoubleRegs:$src1), - (i64 DoubleRegs:$src2)))]>; - -def XOR_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, - DoubleRegs:$src2), - "$dst = xor($src1, $src2)", - [(set (i64 DoubleRegs:$dst), (xor (i64 DoubleRegs:$src1), - (i64 DoubleRegs:$src2)))]>; - -// Maximum. -def MAXw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - "$dst = max($src2, $src1)", - [(set (i32 IntRegs:$dst), - (i32 (select (i1 (setlt (i32 IntRegs:$src2), - (i32 IntRegs:$src1))), - (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>; +//Rd=sub(Rt.L,Rs.[LH]) +def A2_subh_l16_ll : T_XTYPE_ADD_SUB <0b00, 0, 0, 1>; +def A2_subh_l16_hl : T_XTYPE_ADD_SUB <0b10, 0, 0, 1>; -def MAXUw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - "$dst = maxu($src2, $src1)", - [(set (i32 IntRegs:$dst), - (i32 (select (i1 (setult (i32 IntRegs:$src2), - (i32 IntRegs:$src1))), - (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>; - -def MAXd_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, - DoubleRegs:$src2), - "$dst = max($src2, $src1)", - [(set (i64 DoubleRegs:$dst), - (i64 (select (i1 (setlt (i64 DoubleRegs:$src2), - (i64 DoubleRegs:$src1))), - (i64 DoubleRegs:$src1), - (i64 DoubleRegs:$src2))))]>; - -def MAXUd_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, - DoubleRegs:$src2), - "$dst = maxu($src2, $src1)", - [(set (i64 DoubleRegs:$dst), - (i64 (select (i1 (setult (i64 DoubleRegs:$src2), - (i64 DoubleRegs:$src1))), - (i64 DoubleRegs:$src1), - (i64 DoubleRegs:$src2))))]>; - -// Minimum. -def MINw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - "$dst = min($src2, $src1)", - [(set (i32 IntRegs:$dst), - (i32 (select (i1 (setgt (i32 IntRegs:$src2), - (i32 IntRegs:$src1))), - (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>; +//Rd=add(Rt.L,Rs.[LH]) +def A2_addh_l16_ll : T_XTYPE_ADD_SUB <0b00, 0, 0, 0>; +def A2_addh_l16_hl : T_XTYPE_ADD_SUB <0b10, 0, 0, 0>; -def MINUw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - "$dst = minu($src2, $src1)", - [(set (i32 IntRegs:$dst), - (i32 (select (i1 (setugt (i32 IntRegs:$src2), - (i32 IntRegs:$src1))), - (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>; - -def MINd_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, - DoubleRegs:$src2), - "$dst = min($src2, $src1)", - [(set (i64 DoubleRegs:$dst), - (i64 (select (i1 (setgt (i64 DoubleRegs:$src2), - (i64 DoubleRegs:$src1))), - (i64 DoubleRegs:$src1), - (i64 DoubleRegs:$src2))))]>; - -def MINUd_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, - DoubleRegs:$src2), - "$dst = minu($src2, $src1)", - [(set (i64 DoubleRegs:$dst), - (i64 (select (i1 (setugt (i64 DoubleRegs:$src2), - (i64 DoubleRegs:$src1))), - (i64 DoubleRegs:$src1), - (i64 DoubleRegs:$src2))))]>; - -// Subtract. -def SUB64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, - DoubleRegs:$src2), - "$dst = sub($src1, $src2)", - [(set (i64 DoubleRegs:$dst), (sub (i64 DoubleRegs:$src1), - (i64 DoubleRegs:$src2)))]>; +let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF] in { + //Rd=sub(Rt.L,Rs.[LH]):sat + def A2_subh_l16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 0, 1>; + def A2_subh_l16_sat_hl : T_XTYPE_ADD_SUB <0b10, 1, 0, 1>; + + //Rd=add(Rt.L,Rs.[LH]):sat + def A2_addh_l16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 0, 0>; + def A2_addh_l16_sat_hl : T_XTYPE_ADD_SUB <0b10, 1, 0, 0>; +} + +//Rd=sub(Rt.[LH],Rs.[LH]):<<16 +def A2_subh_h16_ll : T_XTYPE_ADD_SUB <0b00, 0, 1, 1>; +def A2_subh_h16_lh : T_XTYPE_ADD_SUB <0b01, 0, 1, 1>; +def A2_subh_h16_hl : T_XTYPE_ADD_SUB <0b10, 0, 1, 1>; +def A2_subh_h16_hh : T_XTYPE_ADD_SUB <0b11, 0, 1, 1>; + +//Rd=add(Rt.[LH],Rs.[LH]):<<16 +def A2_addh_h16_ll : T_XTYPE_ADD_SUB <0b00, 0, 1, 0>; +def A2_addh_h16_lh : T_XTYPE_ADD_SUB <0b01, 0, 1, 0>; +def A2_addh_h16_hl : T_XTYPE_ADD_SUB <0b10, 0, 1, 0>; +def A2_addh_h16_hh : T_XTYPE_ADD_SUB <0b11, 0, 1, 0>; + +let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF] in { + //Rd=sub(Rt.[LH],Rs.[LH]):sat:<<16 + def A2_subh_h16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 1, 1>; + def A2_subh_h16_sat_lh : T_XTYPE_ADD_SUB <0b01, 1, 1, 1>; + def A2_subh_h16_sat_hl : T_XTYPE_ADD_SUB <0b10, 1, 1, 1>; + def A2_subh_h16_sat_hh : T_XTYPE_ADD_SUB <0b11, 1, 1, 1>; + + //Rd=add(Rt.[LH],Rs.[LH]):sat:<<16 + def A2_addh_h16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 1, 0>; + def A2_addh_h16_sat_lh : T_XTYPE_ADD_SUB <0b01, 1, 1, 0>; + def A2_addh_h16_sat_hl : T_XTYPE_ADD_SUB <0b10, 1, 1, 0>; + def A2_addh_h16_sat_hh : T_XTYPE_ADD_SUB <0b11, 1, 1, 0>; +} + +// Add halfword. +def: Pat<(sext_inreg (add I32:$src1, I32:$src2), i16), + (A2_addh_l16_ll I32:$src1, I32:$src2)>; + +def: Pat<(sra (add (shl I32:$src1, (i32 16)), I32:$src2), (i32 16)), + (A2_addh_l16_hl I32:$src1, I32:$src2)>; + +def: Pat<(shl (add I32:$src1, I32:$src2), (i32 16)), + (A2_addh_h16_ll I32:$src1, I32:$src2)>; // Subtract halfword. +def: Pat<(sext_inreg (sub I32:$src1, I32:$src2), i16), + (A2_subh_l16_ll I32:$src1, I32:$src2)>; + +def: Pat<(shl (sub I32:$src1, I32:$src2), (i32 16)), + (A2_subh_h16_ll I32:$src1, I32:$src2)>; + +let hasSideEffects = 0, hasNewValue = 1 in +def S2_parityp: ALU64Inst<(outs IntRegs:$Rd), + (ins DoubleRegs:$Rs, DoubleRegs:$Rt), + "$Rd = parity($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> { + bits<5> Rd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1101; + let Inst{27-24} = 0b0000; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{4-0} = Rd; +} + +let hasNewValue = 1, opNewValue = 0, hasSideEffects = 0 in +class T_XTYPE_MIN_MAX < bit isMax, bit isUnsigned > + : ALU64Inst < (outs IntRegs:$Rd), (ins IntRegs:$Rt, IntRegs:$Rs), + "$Rd = "#!if(isMax,"max","min")#!if(isUnsigned,"u","") + #"($Rt, $Rs)", [], "", ALU64_tc_2_SLOT23> { + bits<5> Rd; + bits<5> Rt; + bits<5> Rs; + + let IClass = 0b1101; + + let Inst{27-23} = 0b01011; + let Inst{22-21} = !if(isMax, 0b10, 0b01); + let Inst{7} = isUnsigned; + let Inst{4-0} = Rd; + let Inst{12-8} = !if(isMax, Rs, Rt); + let Inst{20-16} = !if(isMax, Rt, Rs); + } + +def A2_min : T_XTYPE_MIN_MAX < 0, 0 >; +def A2_minu : T_XTYPE_MIN_MAX < 0, 1 >; +def A2_max : T_XTYPE_MIN_MAX < 1, 0 >; +def A2_maxu : T_XTYPE_MIN_MAX < 1, 1 >; + +// Here, depending on the operand being selected, we'll either generate a +// min or max instruction. +// Ex: +// (a>b)?a:b --> max(a,b) => Here check performed is '>' and the value selected +// is the larger of two. So, the corresponding HexagonInst is passed in 'Inst'. +// (a>b)?b:a --> min(a,b) => Here check performed is '>' but the smaller value +// is selected and the corresponding HexagonInst is passed in 'SwapInst'. + +multiclass T_MinMax_pats <PatFrag Op, RegisterClass RC, ValueType VT, + InstHexagon Inst, InstHexagon SwapInst> { + def: Pat<(select (i1 (Op (VT RC:$src1), (VT RC:$src2))), + (VT RC:$src1), (VT RC:$src2)), + (Inst RC:$src1, RC:$src2)>; + def: Pat<(select (i1 (Op (VT RC:$src1), (VT RC:$src2))), + (VT RC:$src2), (VT RC:$src1)), + (SwapInst RC:$src1, RC:$src2)>; +} + + +multiclass MinMax_pats <PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> { + defm: T_MinMax_pats<Op, IntRegs, i32, Inst, SwapInst>; + + def: Pat<(sext_inreg (i32 (select (i1 (Op (i32 PositiveHalfWord:$src1), + (i32 PositiveHalfWord:$src2))), + (i32 PositiveHalfWord:$src1), + (i32 PositiveHalfWord:$src2))), i16), + (Inst IntRegs:$src1, IntRegs:$src2)>; + + def: Pat<(sext_inreg (i32 (select (i1 (Op (i32 PositiveHalfWord:$src1), + (i32 PositiveHalfWord:$src2))), + (i32 PositiveHalfWord:$src2), + (i32 PositiveHalfWord:$src1))), i16), + (SwapInst IntRegs:$src1, IntRegs:$src2)>; +} + +let AddedComplexity = 200 in { + defm: MinMax_pats<setge, A2_max, A2_min>; + defm: MinMax_pats<setgt, A2_max, A2_min>; + defm: MinMax_pats<setle, A2_min, A2_max>; + defm: MinMax_pats<setlt, A2_min, A2_max>; + defm: MinMax_pats<setuge, A2_maxu, A2_minu>; + defm: MinMax_pats<setugt, A2_maxu, A2_minu>; + defm: MinMax_pats<setule, A2_minu, A2_maxu>; + defm: MinMax_pats<setult, A2_minu, A2_maxu>; +} + +class T_cmp64_rr<string mnemonic, bits<3> MinOp, bit IsComm> + : ALU64_rr<(outs PredRegs:$Pd), (ins DoubleRegs:$Rs, DoubleRegs:$Rt), + "$Pd = "#mnemonic#"($Rs, $Rt)", [], "", ALU64_tc_2early_SLOT23> { + let isCompare = 1; + let isCommutable = IsComm; + let hasSideEffects = 0; + + bits<2> Pd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1101; + let Inst{27-21} = 0b0010100; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{7-5} = MinOp; + let Inst{1-0} = Pd; +} + +def C2_cmpeqp : T_cmp64_rr<"cmp.eq", 0b000, 1>; +def C2_cmpgtp : T_cmp64_rr<"cmp.gt", 0b010, 0>; +def C2_cmpgtup : T_cmp64_rr<"cmp.gtu", 0b100, 0>; + +class T_cmp64_rr_pat<InstHexagon MI, PatFrag CmpOp> + : Pat<(i1 (CmpOp (i64 DoubleRegs:$Rs), (i64 DoubleRegs:$Rt))), + (i1 (MI DoubleRegs:$Rs, DoubleRegs:$Rt))>; + +def: T_cmp64_rr_pat<C2_cmpeqp, seteq>; +def: T_cmp64_rr_pat<C2_cmpgtp, setgt>; +def: T_cmp64_rr_pat<C2_cmpgtup, setugt>; +def: T_cmp64_rr_pat<C2_cmpgtp, RevCmp<setlt>>; +def: T_cmp64_rr_pat<C2_cmpgtup, RevCmp<setult>>; + +def C2_vmux : ALU64_rr<(outs DoubleRegs:$Rd), + (ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt), + "$Rd = vmux($Pu, $Rs, $Rt)", [], "", ALU64_tc_1_SLOT23> { + let hasSideEffects = 0; + + bits<5> Rd; + bits<2> Pu; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1101; + let Inst{27-24} = 0b0001; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{6-5} = Pu; + let Inst{4-0} = Rd; +} + +class T_ALU64_rr<string mnemonic, string suffix, bits<4> RegType, + bits<3> MajOp, bits<3> MinOp, bit OpsRev, bit IsComm, + string Op2Pfx> + : ALU64_rr<(outs DoubleRegs:$Rd), (ins DoubleRegs:$Rs, DoubleRegs:$Rt), + "$Rd = " #mnemonic# "($Rs, " #Op2Pfx# "$Rt)" #suffix, [], + "", ALU64_tc_1_SLOT23> { + let hasSideEffects = 0; + let isCommutable = IsComm; + + bits<5> Rs; + bits<5> Rt; + bits<5> Rd; + + let IClass = 0b1101; + let Inst{27-24} = RegType; + let Inst{23-21} = MajOp; + let Inst{20-16} = !if (OpsRev,Rt,Rs); + let Inst{12-8} = !if (OpsRev,Rs,Rt); + let Inst{7-5} = MinOp; + let Inst{4-0} = Rd; +} + +class T_ALU64_arith<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit IsSat, + bit OpsRev, bit IsComm> + : T_ALU64_rr<mnemonic, !if(IsSat,":sat",""), 0b0011, MajOp, MinOp, OpsRev, + IsComm, "">; + +def A2_addp : T_ALU64_arith<"add", 0b000, 0b111, 0, 0, 1>; +def A2_subp : T_ALU64_arith<"sub", 0b001, 0b111, 0, 1, 0>; + +def: Pat<(i64 (add I64:$Rs, I64:$Rt)), (A2_addp I64:$Rs, I64:$Rt)>; +def: Pat<(i64 (sub I64:$Rs, I64:$Rt)), (A2_subp I64:$Rs, I64:$Rt)>; + +class T_ALU64_logical<string mnemonic, bits<3> MinOp, bit OpsRev, bit IsComm, + bit IsNeg> + : T_ALU64_rr<mnemonic, "", 0b0011, 0b111, MinOp, OpsRev, IsComm, + !if(IsNeg,"~","")>; + +def A2_andp : T_ALU64_logical<"and", 0b000, 0, 1, 0>; +def A2_orp : T_ALU64_logical<"or", 0b010, 0, 1, 0>; +def A2_xorp : T_ALU64_logical<"xor", 0b100, 0, 1, 0>; + +def: Pat<(i64 (and I64:$Rs, I64:$Rt)), (A2_andp I64:$Rs, I64:$Rt)>; +def: Pat<(i64 (or I64:$Rs, I64:$Rt)), (A2_orp I64:$Rs, I64:$Rt)>; +def: Pat<(i64 (xor I64:$Rs, I64:$Rt)), (A2_xorp I64:$Rs, I64:$Rt)>; //===----------------------------------------------------------------------===// // ALU64/ALU - @@ -762,82 +1302,119 @@ def SUB64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, // Pipelined looping instructions. // Logical operations on predicates. -def AND_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2), - "$dst = and($src1, $src2)", - [(set (i1 PredRegs:$dst), (and (i1 PredRegs:$src1), - (i1 PredRegs:$src2)))]>; - -let neverHasSideEffects = 1 in -def AND_pnotp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, - PredRegs:$src2), - "$dst = and($src1, !$src2)", - []>; - -def ANY_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1), - "$dst = any8($src1)", - []>; - -def ALL_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1), - "$dst = all8($src1)", - []>; - -def VITPACK_pp : SInst<(outs IntRegs:$dst), (ins PredRegs:$src1, - PredRegs:$src2), - "$dst = vitpack($src1, $src2)", - []>; +let hasSideEffects = 0 in +class T_LOGICAL_1OP<string MnOp, bits<2> OpBits> + : CRInst<(outs PredRegs:$Pd), (ins PredRegs:$Ps), + "$Pd = " # MnOp # "($Ps)", [], "", CR_tc_2early_SLOT23> { + bits<2> Pd; + bits<2> Ps; + + let IClass = 0b0110; + let Inst{27-23} = 0b10111; + let Inst{22-21} = OpBits; + let Inst{20} = 0b0; + let Inst{17-16} = Ps; + let Inst{13} = 0b0; + let Inst{1-0} = Pd; +} -def VALIGN_rrp : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, - DoubleRegs:$src2, - PredRegs:$src3), - "$dst = valignb($src1, $src2, $src3)", - []>; +def C2_any8 : T_LOGICAL_1OP<"any8", 0b00>; +def C2_all8 : T_LOGICAL_1OP<"all8", 0b01>; +def C2_not : T_LOGICAL_1OP<"not", 0b10>; + +def: Pat<(i1 (not (i1 PredRegs:$Ps))), + (C2_not PredRegs:$Ps)>; + +let hasSideEffects = 0 in +class T_LOGICAL_2OP<string MnOp, bits<3> OpBits, bit IsNeg, bit Rev> + : CRInst<(outs PredRegs:$Pd), (ins PredRegs:$Ps, PredRegs:$Pt), + "$Pd = " # MnOp # "($Ps, " # !if (IsNeg,"!","") # "$Pt)", + [], "", CR_tc_2early_SLOT23> { + bits<2> Pd; + bits<2> Ps; + bits<2> Pt; + + let IClass = 0b0110; + let Inst{27-24} = 0b1011; + let Inst{23-21} = OpBits; + let Inst{20} = 0b0; + let Inst{17-16} = !if(Rev,Pt,Ps); // Rs and Rt are reversed for some + let Inst{13} = 0b0; // instructions. + let Inst{9-8} = !if(Rev,Ps,Pt); + let Inst{1-0} = Pd; +} -def VSPLICE_rrp : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, - DoubleRegs:$src2, - PredRegs:$src3), - "$dst = vspliceb($src1, $src2, $src3)", - []>; +def C2_and : T_LOGICAL_2OP<"and", 0b000, 0, 1>; +def C2_or : T_LOGICAL_2OP<"or", 0b001, 0, 1>; +def C2_xor : T_LOGICAL_2OP<"xor", 0b010, 0, 0>; +def C2_andn : T_LOGICAL_2OP<"and", 0b011, 1, 1>; +def C2_orn : T_LOGICAL_2OP<"or", 0b111, 1, 1>; -def MASK_p : SInst<(outs DoubleRegs:$dst), (ins PredRegs:$src1), - "$dst = mask($src1)", - []>; +def: Pat<(i1 (and I1:$Ps, I1:$Pt)), (C2_and I1:$Ps, I1:$Pt)>; +def: Pat<(i1 (or I1:$Ps, I1:$Pt)), (C2_or I1:$Ps, I1:$Pt)>; +def: Pat<(i1 (xor I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, I1:$Pt)>; +def: Pat<(i1 (and I1:$Ps, (not I1:$Pt))), (C2_andn I1:$Ps, I1:$Pt)>; +def: Pat<(i1 (or I1:$Ps, (not I1:$Pt))), (C2_orn I1:$Ps, I1:$Pt)>; -def NOT_p : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1), - "$dst = not($src1)", - [(set (i1 PredRegs:$dst), (not (i1 PredRegs:$src1)))]>; - -def OR_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2), - "$dst = or($src1, $src2)", - [(set (i1 PredRegs:$dst), (or (i1 PredRegs:$src1), - (i1 PredRegs:$src2)))]>; +let hasSideEffects = 0, hasNewValue = 1 in +def C2_vitpack : SInst<(outs IntRegs:$Rd), (ins PredRegs:$Ps, PredRegs:$Pt), + "$Rd = vitpack($Ps, $Pt)", [], "", S_2op_tc_1_SLOT23> { + bits<5> Rd; + bits<2> Ps; + bits<2> Pt; + + let IClass = 0b1000; + let Inst{27-24} = 0b1001; + let Inst{22-21} = 0b00; + let Inst{17-16} = Ps; + let Inst{9-8} = Pt; + let Inst{4-0} = Rd; +} -def XOR_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2), - "$dst = xor($src1, $src2)", - [(set (i1 PredRegs:$dst), (xor (i1 PredRegs:$src1), - (i1 PredRegs:$src2)))]>; +let hasSideEffects = 0 in +def C2_mask : SInst<(outs DoubleRegs:$Rd), (ins PredRegs:$Pt), + "$Rd = mask($Pt)", [], "", S_2op_tc_1_SLOT23> { + bits<5> Rd; + bits<2> Pt; + let IClass = 0b1000; + let Inst{27-24} = 0b0110; + let Inst{9-8} = Pt; + let Inst{4-0} = Rd; +} // User control register transfer. //===----------------------------------------------------------------------===// // CR - //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// JR + +//===----------------------------------------------------------------------===// + def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; -def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, - [SDNPHasChain]>; + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>; def SDHexagonBR_JT: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; def HexagonBR_JT: SDNode<"HexagonISD::BR_JT", SDHexagonBR_JT, [SDNPHasChain]>; -let InputType = "imm", isBarrier = 1, isPredicable = 1, -Defs = [PC], isExtendable = 1, opExtendable = 0, isExtentSigned = 1, -opExtentBits = 24, isCodeGenOnly = 0 in -class T_JMP <dag InsDag, list<dag> JumpList = []> - : JInst<(outs), InsDag, - "jump $dst" , JumpList> { - bits<24> dst; +class CondStr<string CReg, bit True, bit New> { + string S = "if (" # !if(True,"","!") # CReg # !if(New,".new","") # ") "; +} +class JumpOpcStr<string Mnemonic, bit New, bit Taken> { + string S = Mnemonic # !if(Taken, ":t", !if(New, ":nt", "")); +} +let isBranch = 1, isBarrier = 1, Defs = [PC], hasSideEffects = 0, + isPredicable = 1, + isExtendable = 1, opExtendable = 0, isExtentSigned = 1, + opExtentBits = 24, opExtentAlign = 2, InputType = "imm" in +class T_JMP<string ExtStr> + : JInst<(outs), (ins brtarget:$dst), + "jump " # ExtStr # "$dst", + [], "", J_tc_2early_SLOT23> { + bits<24> dst; let IClass = 0b0101; let Inst{27-25} = 0b100; @@ -845,16 +1422,16 @@ class T_JMP <dag InsDag, list<dag> JumpList = []> let Inst{13-1} = dst{14-2}; } -let InputType = "imm", isExtendable = 1, opExtendable = 1, isExtentSigned = 1, -Defs = [PC], isPredicated = 1, opExtentBits = 17 in -class T_JMP_c <bit PredNot, bit isPredNew, bit isTak>: - JInst<(outs ), (ins PredRegs:$src, brtarget:$dst), - !if(PredNot, "if (!$src", "if ($src")# - !if(isPredNew, ".new) ", ") ")#"jump"# - !if(isPredNew, !if(isTak, ":t ", ":nt "), " ")#"$dst"> { - +let isBranch = 1, Defs = [PC], hasSideEffects = 0, isPredicated = 1, + isExtendable = 1, opExtendable = 1, isExtentSigned = 1, + opExtentBits = 17, opExtentAlign = 2, InputType = "imm" in +class T_JMP_c<bit PredNot, bit isPredNew, bit isTak, string ExtStr> + : JInst<(outs), (ins PredRegs:$src, brtarget:$dst), + CondStr<"$src", !if(PredNot,0,1), isPredNew>.S # + JumpOpcStr<"jump", isPredNew, isTak>.S # " " # + ExtStr # "$dst", + [], "", J_tc_2early_SLOT23>, ImmRegRel { let isTaken = isTak; - let isBrTaken = !if(isPredNew, !if(isTaken, "true", "false"), ""); let isPredicatedFalse = PredNot; let isPredicatedNew = isPredNew; bits<2> src; @@ -864,7 +1441,7 @@ class T_JMP_c <bit PredNot, bit isPredNew, bit isTak>: let Inst{27-24} = 0b1100; let Inst{21} = PredNot; - let Inst{12} = !if(isPredNew, isTak, zero); + let Inst{12} = isTak; let Inst{11} = isPredNew; let Inst{9-8} = src; let Inst{23-22} = dst{16-15}; @@ -873,11 +1450,28 @@ class T_JMP_c <bit PredNot, bit isPredNew, bit isTak>: let Inst{7-1} = dst{8-2}; } -let isBarrier = 1, Defs = [PC], isPredicable = 1, InputType = "reg" in -class T_JMPr<dag InsDag = (ins IntRegs:$dst)> - : JRInst<(outs ), InsDag, - "jumpr $dst" , - []> { +multiclass JMP_Pred<bit PredNot, string ExtStr> { + def NAME : T_JMP_c<PredNot, 0, 0, ExtStr>; // not taken + // Predicate new + def NAME#newpt : T_JMP_c<PredNot, 1, 1, ExtStr>; // taken + def NAME#new : T_JMP_c<PredNot, 1, 0, ExtStr>; // not taken +} + +multiclass JMP_base<string BaseOp, string ExtStr> { + let BaseOpcode = BaseOp in { + def NAME : T_JMP<ExtStr>; + defm t : JMP_Pred<0, ExtStr>; + defm f : JMP_Pred<1, ExtStr>; + } +} + +// Jumps to address stored in a register, JUMPR_MISC +// if ([[!]P[.new]]) jumpr[:t/nt] Rs +let isBranch = 1, isIndirectBranch = 1, isBarrier = 1, Defs = [PC], + isPredicable = 1, hasSideEffects = 0, InputType = "reg" in +class T_JMPr + : JRInst<(outs), (ins IntRegs:$dst), + "jumpr $dst", [], "", J_tc_2early_SLOT2> { bits<5> dst; let IClass = 0b0101; @@ -885,15 +1479,15 @@ class T_JMPr<dag InsDag = (ins IntRegs:$dst)> let Inst{20-16} = dst; } -let Defs = [PC], isPredicated = 1, InputType = "reg" in -class T_JMPr_c <bit PredNot, bit isPredNew, bit isTak>: - JRInst <(outs ), (ins PredRegs:$src, IntRegs:$dst), - !if(PredNot, "if (!$src", "if ($src")# - !if(isPredNew, ".new) ", ") ")#"jumpr"# - !if(isPredNew, !if(isTak, ":t ", ":nt "), " ")#"$dst"> { +let isBranch = 1, isIndirectBranch = 1, Defs = [PC], isPredicated = 1, + hasSideEffects = 0, InputType = "reg" in +class T_JMPr_c <bit PredNot, bit isPredNew, bit isTak> + : JRInst <(outs), (ins PredRegs:$src, IntRegs:$dst), + CondStr<"$src", !if(PredNot,0,1), isPredNew>.S # + JumpOpcStr<"jumpr", isPredNew, isTak>.S # " $dst", [], + "", J_tc_2early_SLOT2> { let isTaken = isTak; - let isBrTaken = !if(isPredNew, !if(isTaken, "true", "false"), ""); let isPredicatedFalse = PredNot; let isPredicatedNew = isPredNew; bits<2> src; @@ -904,73 +1498,88 @@ class T_JMPr_c <bit PredNot, bit isPredNew, bit isTak>: let Inst{27-22} = 0b001101; let Inst{21} = PredNot; let Inst{20-16} = dst; - let Inst{12} = !if(isPredNew, isTak, zero); + let Inst{12} = isTak; let Inst{11} = isPredNew; let Inst{9-8} = src; - let Predicates = !if(isPredNew, [HasV3T], [HasV2T]); - let validSubTargets = !if(isPredNew, HasV3SubT, HasV2SubT); -} - -multiclass JMP_Pred<bit PredNot> { - def _#NAME : T_JMP_c<PredNot, 0, 0>; - // Predicate new - def _#NAME#new_t : T_JMP_c<PredNot, 1, 1>; // taken - def _#NAME#new_nt : T_JMP_c<PredNot, 1, 0>; // not taken -} - -multiclass JMP_base<string BaseOp> { - let BaseOpcode = BaseOp in { - def NAME : T_JMP<(ins brtarget:$dst), [(br bb:$dst)]>; - defm t : JMP_Pred<0>; - defm f : JMP_Pred<1>; - } } multiclass JMPR_Pred<bit PredNot> { - def NAME: T_JMPr_c<PredNot, 0, 0>; + def NAME : T_JMPr_c<PredNot, 0, 0>; // not taken // Predicate new - def NAME#new_tV3 : T_JMPr_c<PredNot, 1, 1>; // taken - def NAME#new_ntV3 : T_JMPr_c<PredNot, 1, 0>; // not taken + def NAME#newpt : T_JMPr_c<PredNot, 1, 1>; // taken + def NAME#new : T_JMPr_c<PredNot, 1, 0>; // not taken } multiclass JMPR_base<string BaseOp> { let BaseOpcode = BaseOp in { def NAME : T_JMPr; - defm _t : JMPR_Pred<0>; - defm _f : JMPR_Pred<1>; + defm t : JMPR_Pred<0>; + defm f : JMPR_Pred<1>; } } -let isTerminator = 1, neverHasSideEffects = 1 in { -let isBranch = 1 in -defm JMP : JMP_base<"JMP">, PredNewRel; +let isCall = 1, hasSideEffects = 1 in +class JUMPR_MISC_CALLR<bit isPred, bit isPredNot, + dag InputDag = (ins IntRegs:$Rs)> + : JRInst<(outs), InputDag, + !if(isPred, !if(isPredNot, "if (!$Pu) callr $Rs", + "if ($Pu) callr $Rs"), + "callr $Rs"), + [], "", J_tc_2early_SLOT2> { + bits<5> Rs; + bits<2> Pu; + let isPredicated = isPred; + let isPredicatedFalse = isPredNot; -let isBranch = 1, isIndirectBranch = 1 in -defm JMPR : JMPR_base<"JMPr">, PredNewRel; + let IClass = 0b0101; + let Inst{27-25} = 0b000; + let Inst{24-23} = !if (isPred, 0b10, 0b01); + let Inst{22} = 0; + let Inst{21} = isPredNot; + let Inst{9-8} = !if (isPred, Pu, 0b00); + let Inst{20-16} = Rs; -let isReturn = 1, isCodeGenOnly = 1 in -defm JMPret : JMPR_base<"JMPret">, PredNewRel; + } + +let Defs = VolatileV3.Regs in { + def J2_callrt : JUMPR_MISC_CALLR<1, 0, (ins PredRegs:$Pu, IntRegs:$Rs)>; + def J2_callrf : JUMPR_MISC_CALLR<1, 1, (ins PredRegs:$Pu, IntRegs:$Rs)>; } -def : Pat<(retflag), - (JMPret (i32 R31))>; +let isTerminator = 1, hasSideEffects = 0 in { + defm J2_jump : JMP_base<"JMP", "">, PredNewRel; -def : Pat <(brcond (i1 PredRegs:$src1), bb:$offset), - (JMP_t (i1 PredRegs:$src1), bb:$offset)>; + // Deal with explicit assembly + // - never extened a jump #, always extend a jump ## + let isAsmParserOnly = 1 in { + defm J2_jump_ext : JMP_base<"JMP", "##">; + defm J2_jump_noext : JMP_base<"JMP", "#">; + } -// A return through builtin_eh_return. -let isReturn = 1, isTerminator = 1, isBarrier = 1, neverHasSideEffects = 1, -isCodeGenOnly = 1, Defs = [PC], Uses = [R28], isPredicable = 0 in -def EH_RETURN_JMPR : T_JMPr; + defm J2_jumpr : JMPR_base<"JMPr">, PredNewRel; -def : Pat<(eh_return), - (EH_RETURN_JMPR (i32 R31))>; + let isReturn = 1, isCodeGenOnly = 1 in + defm JMPret : JMPR_base<"JMPret">, PredNewRel; +} -def : Pat<(HexagonBR_JT (i32 IntRegs:$dst)), - (JMPR (i32 IntRegs:$dst))>; +def: Pat<(br bb:$dst), + (J2_jump brtarget:$dst)>; +def: Pat<(retflag), + (JMPret (i32 R31))>; +def: Pat<(brcond (i1 PredRegs:$src1), bb:$offset), + (J2_jumpt PredRegs:$src1, bb:$offset)>; -def : Pat<(brind (i32 IntRegs:$dst)), - (JMPR (i32 IntRegs:$dst))>; +// A return through builtin_eh_return. +let isReturn = 1, isTerminator = 1, isBarrier = 1, hasSideEffects = 0, + isCodeGenOnly = 1, Defs = [PC], Uses = [R28], isPredicable = 0 in +def EH_RETURN_JMPR : T_JMPr; + +def: Pat<(eh_return), + (EH_RETURN_JMPR (i32 R31))>; +def: Pat<(HexagonBR_JT (i32 IntRegs:$dst)), + (J2_jumpr IntRegs:$dst)>; +def: Pat<(brind (i32 IntRegs:$dst)), + (J2_jumpr IntRegs:$dst)>; //===----------------------------------------------------------------------===// // JR - @@ -979,265 +1588,688 @@ def : Pat<(brind (i32 IntRegs:$dst)), //===----------------------------------------------------------------------===// // LD + //===----------------------------------------------------------------------===// -/// -// Load -- MEMri operand -multiclass LD_MEMri_Pbase<string mnemonic, RegisterClass RC, - bit isNot, bit isPredNew> { - let isPredicatedNew = isPredNew in - def NAME : LDInst2<(outs RC:$dst), - (ins PredRegs:$src1, MEMri:$addr), - !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", - ") ")#"$dst = "#mnemonic#"($addr)", - []>; -} - -multiclass LD_MEMri_Pred<string mnemonic, RegisterClass RC, bit PredNot> { - let isPredicatedFalse = PredNot in { - defm _c#NAME : LD_MEMri_Pbase<mnemonic, RC, PredNot, 0>; - // Predicate new - defm _cdn#NAME : LD_MEMri_Pbase<mnemonic, RC, PredNot, 1>; + +// Load - Base with Immediate offset addressing mode +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, AddedComplexity = 20 in +class T_load_io <string mnemonic, RegisterClass RC, bits<4> MajOp, + Operand ImmOp> + : LDInst<(outs RC:$dst), (ins IntRegs:$src1, ImmOp:$offset), + "$dst = "#mnemonic#"($src1 + #$offset)", []>, AddrModeRel { + bits<4> name; + bits<5> dst; + bits<5> src1; + bits<14> offset; + bits<11> offsetBits; + + string ImmOpStr = !cast<string>(ImmOp); + let offsetBits = !if (!eq(ImmOpStr, "s11_3Ext"), offset{13-3}, + !if (!eq(ImmOpStr, "s11_2Ext"), offset{12-2}, + !if (!eq(ImmOpStr, "s11_1Ext"), offset{11-1}, + /* s11_0Ext */ offset{10-0}))); + let opExtentBits = !if (!eq(ImmOpStr, "s11_3Ext"), 14, + !if (!eq(ImmOpStr, "s11_2Ext"), 13, + !if (!eq(ImmOpStr, "s11_1Ext"), 12, + /* s11_0Ext */ 11))); + let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1); + + let IClass = 0b1001; + + let Inst{27} = 0b0; + let Inst{26-25} = offsetBits{10-9}; + let Inst{24-21} = MajOp; + let Inst{20-16} = src1; + let Inst{13-5} = offsetBits{8-0}; + let Inst{4-0} = dst; } -} -let isExtendable = 1, neverHasSideEffects = 1 in -multiclass LD_MEMri<string mnemonic, string CextOp, RegisterClass RC, - bits<5> ImmBits, bits<5> PredImmBits> { +let opExtendable = 3, isExtentSigned = 0, isPredicated = 1 in +class T_pload_io <string mnemonic, RegisterClass RC, bits<4>MajOp, + Operand ImmOp, bit isNot, bit isPredNew> + : LDInst<(outs RC:$dst), + (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset), + "if ("#!if(isNot, "!$src1", "$src1") + #!if(isPredNew, ".new", "") + #") $dst = "#mnemonic#"($src2 + #$offset)", + [],"", V2LDST_tc_ld_SLOT01> , AddrModeRel { + bits<5> dst; + bits<2> src1; + bits<5> src2; + bits<9> offset; + bits<6> offsetBits; + string ImmOpStr = !cast<string>(ImmOp); + + let offsetBits = !if (!eq(ImmOpStr, "u6_3Ext"), offset{8-3}, + !if (!eq(ImmOpStr, "u6_2Ext"), offset{7-2}, + !if (!eq(ImmOpStr, "u6_1Ext"), offset{6-1}, + /* u6_0Ext */ offset{5-0}))); + let opExtentBits = !if (!eq(ImmOpStr, "u6_3Ext"), 9, + !if (!eq(ImmOpStr, "u6_2Ext"), 8, + !if (!eq(ImmOpStr, "u6_1Ext"), 7, + /* u6_0Ext */ 6))); + let hasNewValue = !if (!eq(ImmOpStr, "u6_3Ext"), 0, 1); + let isPredicatedNew = isPredNew; + let isPredicatedFalse = isNot; + + let IClass = 0b0100; + + let Inst{27} = 0b0; + let Inst{27} = 0b0; + let Inst{26} = isNot; + let Inst{25} = isPredNew; + let Inst{24-21} = MajOp; + let Inst{20-16} = src2; + let Inst{13} = 0b0; + let Inst{12-11} = src1; + let Inst{10-5} = offsetBits; + let Inst{4-0} = dst; + } - let CextOpcode = CextOp, BaseOpcode = CextOp in { - let opExtendable = 2, isExtentSigned = 1, opExtentBits = ImmBits, - isPredicable = 1 in - def NAME : LDInst2<(outs RC:$dst), (ins MEMri:$addr), - "$dst = "#mnemonic#"($addr)", - []>; - - let opExtendable = 3, isExtentSigned = 0, opExtentBits = PredImmBits, - isPredicated = 1 in { - defm Pt : LD_MEMri_Pred<mnemonic, RC, 0 >; - defm NotPt : LD_MEMri_Pred<mnemonic, RC, 1 >; - } +let isExtendable = 1, hasSideEffects = 0, addrMode = BaseImmOffset in +multiclass LD_Idxd<string mnemonic, string CextOp, RegisterClass RC, + Operand ImmOp, Operand predImmOp, bits<4>MajOp> { + let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in { + let isPredicable = 1 in + def L2_#NAME#_io : T_load_io <mnemonic, RC, MajOp, ImmOp>; + + // Predicated + def L2_p#NAME#t_io : T_pload_io <mnemonic, RC, MajOp, predImmOp, 0, 0>; + def L2_p#NAME#f_io : T_pload_io <mnemonic, RC, MajOp, predImmOp, 1, 0>; + + // Predicated new + def L2_p#NAME#tnew_io : T_pload_io <mnemonic, RC, MajOp, predImmOp, 0, 1>; + def L2_p#NAME#fnew_io : T_pload_io <mnemonic, RC, MajOp, predImmOp, 1, 1>; } } -let addrMode = BaseImmOffset, isMEMri = "true" in { - let accessSize = ByteAccess in { - defm LDrib: LD_MEMri < "memb", "LDrib", IntRegs, 11, 6>, AddrModeRel; - defm LDriub: LD_MEMri < "memub" , "LDriub", IntRegs, 11, 6>, AddrModeRel; - } +let accessSize = ByteAccess in { + defm loadrb: LD_Idxd <"memb", "LDrib", IntRegs, s11_0Ext, u6_0Ext, 0b1000>; + defm loadrub: LD_Idxd <"memub", "LDriub", IntRegs, s11_0Ext, u6_0Ext, 0b1001>; +} - let accessSize = HalfWordAccess in { - defm LDrih: LD_MEMri < "memh", "LDrih", IntRegs, 12, 7>, AddrModeRel; - defm LDriuh: LD_MEMri < "memuh", "LDriuh", IntRegs, 12, 7>, AddrModeRel; - } +let accessSize = HalfWordAccess, opExtentAlign = 1 in { + defm loadrh: LD_Idxd <"memh", "LDrih", IntRegs, s11_1Ext, u6_1Ext, 0b1010>; + defm loadruh: LD_Idxd <"memuh", "LDriuh", IntRegs, s11_1Ext, u6_1Ext, 0b1011>; +} - let accessSize = WordAccess in - defm LDriw: LD_MEMri < "memw", "LDriw", IntRegs, 13, 8>, AddrModeRel; +let accessSize = WordAccess, opExtentAlign = 2 in +defm loadri: LD_Idxd <"memw", "LDriw", IntRegs, s11_2Ext, u6_2Ext, 0b1100>; - let accessSize = DoubleWordAccess in - defm LDrid: LD_MEMri < "memd", "LDrid", DoubleRegs, 14, 9>, AddrModeRel; +let accessSize = DoubleWordAccess, opExtentAlign = 3 in +defm loadrd: LD_Idxd <"memd", "LDrid", DoubleRegs, s11_3Ext, u6_3Ext, 0b1110>; + +let accessSize = HalfWordAccess, opExtentAlign = 1 in { + def L2_loadbsw2_io: T_load_io<"membh", IntRegs, 0b0001, s11_1Ext>; + def L2_loadbzw2_io: T_load_io<"memubh", IntRegs, 0b0011, s11_1Ext>; } -def : Pat < (i32 (sextloadi8 ADDRriS11_0:$addr)), - (LDrib ADDRriS11_0:$addr) >; +let accessSize = WordAccess, opExtentAlign = 2 in { + def L2_loadbzw4_io: T_load_io<"memubh", DoubleRegs, 0b0101, s11_2Ext>; + def L2_loadbsw4_io: T_load_io<"membh", DoubleRegs, 0b0111, s11_2Ext>; +} -def : Pat < (i32 (zextloadi8 ADDRriS11_0:$addr)), - (LDriub ADDRriS11_0:$addr) >; +let addrMode = BaseImmOffset, isExtendable = 1, hasSideEffects = 0, + opExtendable = 3, isExtentSigned = 1 in +class T_loadalign_io <string str, bits<4> MajOp, Operand ImmOp> + : LDInst<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, IntRegs:$src2, ImmOp:$offset), + "$dst = "#str#"($src2 + #$offset)", [], + "$src1 = $dst">, AddrModeRel { + bits<4> name; + bits<5> dst; + bits<5> src2; + bits<12> offset; + bits<11> offsetBits; + + let offsetBits = !if (!eq(!cast<string>(ImmOp), "s11_1Ext"), offset{11-1}, + /* s11_0Ext */ offset{10-0}); + let IClass = 0b1001; + + let Inst{27} = 0b0; + let Inst{26-25} = offsetBits{10-9}; + let Inst{24-21} = MajOp; + let Inst{20-16} = src2; + let Inst{13-5} = offsetBits{8-0}; + let Inst{4-0} = dst; + } -def : Pat < (i32 (sextloadi16 ADDRriS11_1:$addr)), - (LDrih ADDRriS11_1:$addr) >; +let accessSize = HalfWordAccess, opExtentBits = 12, opExtentAlign = 1 in +def L2_loadalignh_io: T_loadalign_io <"memh_fifo", 0b0010, s11_1Ext>; -def : Pat < (i32 (zextloadi16 ADDRriS11_1:$addr)), - (LDriuh ADDRriS11_1:$addr) >; +let accessSize = ByteAccess, opExtentBits = 11 in +def L2_loadalignb_io: T_loadalign_io <"memb_fifo", 0b0100, s11_0Ext>; -def : Pat < (i32 (load ADDRriS11_2:$addr)), - (LDriw ADDRriS11_2:$addr) >; +// Patterns to select load-indexed (i.e. load from base+offset). +multiclass Loadx_pat<PatFrag Load, ValueType VT, PatLeaf ImmPred, + InstHexagon MI> { + def: Pat<(VT (Load AddrFI:$fi)), (VT (MI AddrFI:$fi, 0))>; + def: Pat<(VT (Load (add (i32 IntRegs:$Rs), ImmPred:$Off))), + (VT (MI IntRegs:$Rs, imm:$Off))>; + def: Pat<(VT (Load (i32 IntRegs:$Rs))), (VT (MI IntRegs:$Rs, 0))>; +} -def : Pat < (i64 (load ADDRriS11_3:$addr)), - (LDrid ADDRriS11_3:$addr) >; +let AddedComplexity = 20 in { + defm: Loadx_pat<load, i32, s11_2ExtPred, L2_loadri_io>; + defm: Loadx_pat<load, i64, s11_3ExtPred, L2_loadrd_io>; + defm: Loadx_pat<atomic_load_8 , i32, s11_0ExtPred, L2_loadrub_io>; + defm: Loadx_pat<atomic_load_16, i32, s11_1ExtPred, L2_loadruh_io>; + defm: Loadx_pat<atomic_load_32, i32, s11_2ExtPred, L2_loadri_io>; + defm: Loadx_pat<atomic_load_64, i64, s11_3ExtPred, L2_loadrd_io>; + + defm: Loadx_pat<extloadi1, i32, s11_0ExtPred, L2_loadrub_io>; + defm: Loadx_pat<extloadi8, i32, s11_0ExtPred, L2_loadrub_io>; + defm: Loadx_pat<extloadi16, i32, s11_1ExtPred, L2_loadruh_io>; + defm: Loadx_pat<sextloadi8, i32, s11_0ExtPred, L2_loadrb_io>; + defm: Loadx_pat<sextloadi16, i32, s11_1ExtPred, L2_loadrh_io>; + defm: Loadx_pat<zextloadi1, i32, s11_0ExtPred, L2_loadrub_io>; + defm: Loadx_pat<zextloadi8, i32, s11_0ExtPred, L2_loadrub_io>; + defm: Loadx_pat<zextloadi16, i32, s11_1ExtPred, L2_loadruh_io>; + // No sextloadi1. +} +// Sign-extending loads of i1 need to replicate the lowest bit throughout +// the 32-bit value. Since the loaded value can only be 0 or 1, 0-v should +// do the trick. +let AddedComplexity = 20 in +def: Pat<(i32 (sextloadi1 (i32 IntRegs:$Rs))), + (A2_subri 0, (L2_loadrub_io IntRegs:$Rs, 0))>; -// Load - Base with Immediate offset addressing mode -multiclass LD_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp, - bit isNot, bit isPredNew> { - let isPredicatedNew = isPredNew in - def NAME : LDInst2<(outs RC:$dst), - (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3), - !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", - ") ")#"$dst = "#mnemonic#"($src2+#$src3)", - []>; -} - -multiclass LD_Idxd_Pred<string mnemonic, RegisterClass RC, Operand predImmOp, - bit PredNot> { - let isPredicatedFalse = PredNot in { - defm _c#NAME : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>; - // Predicate new - defm _cdn#NAME : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 1>; +//===----------------------------------------------------------------------===// +// Post increment load +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// Template class for non-predicated post increment loads with immediate offset. +//===----------------------------------------------------------------------===// +let hasSideEffects = 0, addrMode = PostInc in +class T_load_pi <string mnemonic, RegisterClass RC, Operand ImmOp, + bits<4> MajOp > + : LDInstPI <(outs RC:$dst, IntRegs:$dst2), + (ins IntRegs:$src1, ImmOp:$offset), + "$dst = "#mnemonic#"($src1++#$offset)" , + [], + "$src1 = $dst2" > , + PredNewRel { + bits<5> dst; + bits<5> src1; + bits<7> offset; + bits<4> offsetBits; + + string ImmOpStr = !cast<string>(ImmOp); + let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3}, + !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2}, + !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1}, + /* s4_0Imm */ offset{3-0}))); + let hasNewValue = !if (!eq(ImmOpStr, "s4_3Imm"), 0, 1); + + let IClass = 0b1001; + + let Inst{27-25} = 0b101; + let Inst{24-21} = MajOp; + let Inst{20-16} = src1; + let Inst{13-12} = 0b00; + let Inst{8-5} = offsetBits; + let Inst{4-0} = dst; } -} -let isExtendable = 1, neverHasSideEffects = 1 in -multiclass LD_Idxd<string mnemonic, string CextOp, RegisterClass RC, - Operand ImmOp, Operand predImmOp, bits<5> ImmBits, - bits<5> PredImmBits> { +//===----------------------------------------------------------------------===// +// Template class for predicated post increment loads with immediate offset. +//===----------------------------------------------------------------------===// +let isPredicated = 1, hasSideEffects = 0, addrMode = PostInc in +class T_pload_pi <string mnemonic, RegisterClass RC, Operand ImmOp, + bits<4> MajOp, bit isPredNot, bit isPredNew > + : LDInst <(outs RC:$dst, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset), + !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#"$dst = "#mnemonic#"($src2++#$offset)", + [] , + "$src2 = $dst2" > , + PredNewRel { + bits<5> dst; + bits<2> src1; + bits<5> src2; + bits<7> offset; + bits<4> offsetBits; - let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in { - let opExtendable = 2, isExtentSigned = 1, opExtentBits = ImmBits, - isPredicable = 1, AddedComplexity = 20 in - def NAME : LDInst2<(outs RC:$dst), (ins IntRegs:$src1, ImmOp:$offset), - "$dst = "#mnemonic#"($src1+#$offset)", - []>; - - let opExtendable = 3, isExtentSigned = 0, opExtentBits = PredImmBits, - isPredicated = 1 in { - defm Pt : LD_Idxd_Pred<mnemonic, RC, predImmOp, 0 >; - defm NotPt : LD_Idxd_Pred<mnemonic, RC, predImmOp, 1 >; - } + let isPredicatedNew = isPredNew; + let isPredicatedFalse = isPredNot; + + string ImmOpStr = !cast<string>(ImmOp); + let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3}, + !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2}, + !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1}, + /* s4_0Imm */ offset{3-0}))); + let hasNewValue = !if (!eq(ImmOpStr, "s4_3Imm"), 0, 1); + + let IClass = 0b1001; + + let Inst{27-25} = 0b101; + let Inst{24-21} = MajOp; + let Inst{20-16} = src2; + let Inst{13} = 0b1; + let Inst{12} = isPredNew; + let Inst{11} = isPredNot; + let Inst{10-9} = src1; + let Inst{8-5} = offsetBits; + let Inst{4-0} = dst; } -} -let addrMode = BaseImmOffset in { - let accessSize = ByteAccess in { - defm LDrib_indexed: LD_Idxd <"memb", "LDrib", IntRegs, s11_0Ext, u6_0Ext, - 11, 6>, AddrModeRel; - defm LDriub_indexed: LD_Idxd <"memub" , "LDriub", IntRegs, s11_0Ext, u6_0Ext, - 11, 6>, AddrModeRel; - } - let accessSize = HalfWordAccess in { - defm LDrih_indexed: LD_Idxd <"memh", "LDrih", IntRegs, s11_1Ext, u6_1Ext, - 12, 7>, AddrModeRel; - defm LDriuh_indexed: LD_Idxd <"memuh", "LDriuh", IntRegs, s11_1Ext, u6_1Ext, - 12, 7>, AddrModeRel; +//===----------------------------------------------------------------------===// +// Multiclass for post increment loads with immediate offset. +//===----------------------------------------------------------------------===// + +multiclass LD_PostInc <string mnemonic, string BaseOp, RegisterClass RC, + Operand ImmOp, bits<4> MajOp> { + let BaseOpcode = "POST_"#BaseOp in { + let isPredicable = 1 in + def L2_#NAME#_pi : T_load_pi < mnemonic, RC, ImmOp, MajOp>; + + // Predicated + def L2_p#NAME#t_pi : T_pload_pi < mnemonic, RC, ImmOp, MajOp, 0, 0>; + def L2_p#NAME#f_pi : T_pload_pi < mnemonic, RC, ImmOp, MajOp, 1, 0>; + + // Predicated new + def L2_p#NAME#tnew_pi : T_pload_pi < mnemonic, RC, ImmOp, MajOp, 0, 1>; + def L2_p#NAME#fnew_pi : T_pload_pi < mnemonic, RC, ImmOp, MajOp, 1, 1>; } - let accessSize = WordAccess in - defm LDriw_indexed: LD_Idxd <"memw", "LDriw", IntRegs, s11_2Ext, u6_2Ext, - 13, 8>, AddrModeRel; +} - let accessSize = DoubleWordAccess in - defm LDrid_indexed: LD_Idxd <"memd", "LDrid", DoubleRegs, s11_3Ext, u6_3Ext, - 14, 9>, AddrModeRel; +// post increment byte loads with immediate offset +let accessSize = ByteAccess in { + defm loadrb : LD_PostInc <"memb", "LDrib", IntRegs, s4_0Imm, 0b1000>; + defm loadrub : LD_PostInc <"memub", "LDriub", IntRegs, s4_0Imm, 0b1001>; } -let AddedComplexity = 20 in { -def : Pat < (i32 (sextloadi8 (add IntRegs:$src1, s11_0ExtPred:$offset))), - (LDrib_indexed IntRegs:$src1, s11_0ExtPred:$offset) >; +// post increment halfword loads with immediate offset +let accessSize = HalfWordAccess, opExtentAlign = 1 in { + defm loadrh : LD_PostInc <"memh", "LDrih", IntRegs, s4_1Imm, 0b1010>; + defm loadruh : LD_PostInc <"memuh", "LDriuh", IntRegs, s4_1Imm, 0b1011>; +} -def : Pat < (i32 (zextloadi8 (add IntRegs:$src1, s11_0ExtPred:$offset))), - (LDriub_indexed IntRegs:$src1, s11_0ExtPred:$offset) >; +// post increment word loads with immediate offset +let accessSize = WordAccess, opExtentAlign = 2 in +defm loadri : LD_PostInc <"memw", "LDriw", IntRegs, s4_2Imm, 0b1100>; -def : Pat < (i32 (sextloadi16 (add IntRegs:$src1, s11_1ExtPred:$offset))), - (LDrih_indexed IntRegs:$src1, s11_1ExtPred:$offset) >; +// post increment doubleword loads with immediate offset +let accessSize = DoubleWordAccess, opExtentAlign = 3 in +defm loadrd : LD_PostInc <"memd", "LDrid", DoubleRegs, s4_3Imm, 0b1110>; + +// Rd=memb[u]h(Rx++#s4:1) +// Rdd=memb[u]h(Rx++#s4:2) +let accessSize = HalfWordAccess, opExtentAlign = 1 in { + def L2_loadbsw2_pi : T_load_pi <"membh", IntRegs, s4_1Imm, 0b0001>; + def L2_loadbzw2_pi : T_load_pi <"memubh", IntRegs, s4_1Imm, 0b0011>; +} +let accessSize = WordAccess, opExtentAlign = 2, hasNewValue = 0 in { + def L2_loadbsw4_pi : T_load_pi <"membh", DoubleRegs, s4_2Imm, 0b0111>; + def L2_loadbzw4_pi : T_load_pi <"memubh", DoubleRegs, s4_2Imm, 0b0101>; +} -def : Pat < (i32 (zextloadi16 (add IntRegs:$src1, s11_1ExtPred:$offset))), - (LDriuh_indexed IntRegs:$src1, s11_1ExtPred:$offset) >; +//===----------------------------------------------------------------------===// +// Template class for post increment fifo loads with immediate offset. +//===----------------------------------------------------------------------===// +let hasSideEffects = 0, addrMode = PostInc in +class T_loadalign_pi <string mnemonic, Operand ImmOp, bits<4> MajOp > + : LDInstPI <(outs DoubleRegs:$dst, IntRegs:$dst2), + (ins DoubleRegs:$src1, IntRegs:$src2, ImmOp:$offset), + "$dst = "#mnemonic#"($src2++#$offset)" , + [], "$src2 = $dst2, $src1 = $dst" > , + PredNewRel { + bits<5> dst; + bits<5> src2; + bits<5> offset; + bits<4> offsetBits; + + let offsetBits = !if (!eq(!cast<string>(ImmOp), "s4_1Imm"), offset{4-1}, + /* s4_0Imm */ offset{3-0}); + let IClass = 0b1001; + + let Inst{27-25} = 0b101; + let Inst{24-21} = MajOp; + let Inst{20-16} = src2; + let Inst{13-12} = 0b00; + let Inst{8-5} = offsetBits; + let Inst{4-0} = dst; + } -def : Pat < (i32 (load (add IntRegs:$src1, s11_2ExtPred:$offset))), - (LDriw_indexed IntRegs:$src1, s11_2ExtPred:$offset) >; +// Ryy=memh_fifo(Rx++#s4:1) +// Ryy=memb_fifo(Rx++#s4:0) +let accessSize = ByteAccess in +def L2_loadalignb_pi : T_loadalign_pi <"memb_fifo", s4_0Imm, 0b0100>; -def : Pat < (i64 (load (add IntRegs:$src1, s11_3ExtPred:$offset))), - (LDrid_indexed IntRegs:$src1, s11_3ExtPred:$offset) >; -} +let accessSize = HalfWordAccess, opExtentAlign = 1 in +def L2_loadalignh_pi : T_loadalign_pi <"memh_fifo", s4_1Imm, 0b0010>; //===----------------------------------------------------------------------===// -// Post increment load +// Template class for post increment loads with register offset. //===----------------------------------------------------------------------===// +let hasSideEffects = 0, addrMode = PostInc in +class T_load_pr <string mnemonic, RegisterClass RC, bits<4> MajOp, + MemAccessSize AccessSz> + : LDInstPI <(outs RC:$dst, IntRegs:$_dst_), + (ins IntRegs:$src1, ModRegs:$src2), + "$dst = "#mnemonic#"($src1++$src2)" , + [], "$src1 = $_dst_" > { + bits<5> dst; + bits<5> src1; + bits<1> src2; + + let accessSize = AccessSz; + let IClass = 0b1001; + + let Inst{27-25} = 0b110; + let Inst{24-21} = MajOp; + let Inst{20-16} = src1; + let Inst{13} = src2; + let Inst{12} = 0b0; + let Inst{7} = 0b0; + let Inst{4-0} = dst; + } + +let hasNewValue = 1 in { + def L2_loadrb_pr : T_load_pr <"memb", IntRegs, 0b1000, ByteAccess>; + def L2_loadrub_pr : T_load_pr <"memub", IntRegs, 0b1001, ByteAccess>; + def L2_loadrh_pr : T_load_pr <"memh", IntRegs, 0b1010, HalfWordAccess>; + def L2_loadruh_pr : T_load_pr <"memuh", IntRegs, 0b1011, HalfWordAccess>; + def L2_loadri_pr : T_load_pr <"memw", IntRegs, 0b1100, WordAccess>; -multiclass LD_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp, - bit isNot, bit isPredNew> { - let isPredicatedNew = isPredNew in - def NAME : LDInst2PI<(outs RC:$dst, IntRegs:$dst2), - (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset), - !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", - ") ")#"$dst = "#mnemonic#"($src2++#$offset)", - [], - "$src2 = $dst2">; + def L2_loadbzw2_pr : T_load_pr <"memubh", IntRegs, 0b0011, HalfWordAccess>; } -multiclass LD_PostInc_Pred<string mnemonic, RegisterClass RC, - Operand ImmOp, bit PredNot> { - let isPredicatedFalse = PredNot in { - defm _c#NAME : LD_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 0>; - // Predicate new - let Predicates = [HasV4T], validSubTargets = HasV4SubT in - defm _cdn#NAME#_V4 : LD_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 1>; - } +def L2_loadrd_pr : T_load_pr <"memd", DoubleRegs, 0b1110, DoubleWordAccess>; +def L2_loadbzw4_pr : T_load_pr <"memubh", DoubleRegs, 0b0101, WordAccess>; + +// Load predicate. +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13, + isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in +def LDriw_pred : LDInst<(outs PredRegs:$dst), + (ins IntRegs:$addr, s11_2Ext:$off), + ".error \"should not emit\"", []>; + +let Defs = [R29, R30, R31], Uses = [R30], hasSideEffects = 0 in + def L2_deallocframe : LDInst<(outs), (ins), + "deallocframe", + []> { + let IClass = 0b1001; + + let Inst{27-16} = 0b000000011110; + let Inst{13} = 0b0; + let Inst{4-0} = 0b11110; } -multiclass LD_PostInc<string mnemonic, string BaseOp, RegisterClass RC, - Operand ImmOp> { +// Load / Post increment circular addressing mode. +let Uses = [CS], hasSideEffects = 0 in +class T_load_pcr<string mnemonic, RegisterClass RC, bits<4> MajOp> + : LDInst <(outs RC:$dst, IntRegs:$_dst_), + (ins IntRegs:$Rz, ModRegs:$Mu), + "$dst = "#mnemonic#"($Rz ++ I:circ($Mu))", [], + "$Rz = $_dst_" > { + bits<5> dst; + bits<5> Rz; + bit Mu; + + let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1); + let IClass = 0b1001; - let BaseOpcode = "POST_"#BaseOp in { - let isPredicable = 1 in - def NAME : LDInst2PI<(outs RC:$dst, IntRegs:$dst2), - (ins IntRegs:$src1, ImmOp:$offset), - "$dst = "#mnemonic#"($src1++#$offset)", - [], - "$src1 = $dst2">; - - let isPredicated = 1 in { - defm Pt : LD_PostInc_Pred<mnemonic, RC, ImmOp, 0 >; - defm NotPt : LD_PostInc_Pred<mnemonic, RC, ImmOp, 1 >; - } + let Inst{27-25} = 0b100; + let Inst{24-21} = MajOp; + let Inst{20-16} = Rz; + let Inst{13} = Mu; + let Inst{12} = 0b0; + let Inst{9} = 0b1; + let Inst{7} = 0b0; + let Inst{4-0} = dst; + } + +let accessSize = ByteAccess in { + def L2_loadrb_pcr : T_load_pcr <"memb", IntRegs, 0b1000>; + def L2_loadrub_pcr : T_load_pcr <"memub", IntRegs, 0b1001>; +} + +let accessSize = HalfWordAccess in { + def L2_loadrh_pcr : T_load_pcr <"memh", IntRegs, 0b1010>; + def L2_loadruh_pcr : T_load_pcr <"memuh", IntRegs, 0b1011>; + def L2_loadbsw2_pcr : T_load_pcr <"membh", IntRegs, 0b0001>; + def L2_loadbzw2_pcr : T_load_pcr <"memubh", IntRegs, 0b0011>; +} + +let accessSize = WordAccess in { + def L2_loadri_pcr : T_load_pcr <"memw", IntRegs, 0b1100>; + let hasNewValue = 0 in { + def L2_loadbzw4_pcr : T_load_pcr <"memubh", DoubleRegs, 0b0101>; + def L2_loadbsw4_pcr : T_load_pcr <"membh", DoubleRegs, 0b0111>; } } -let hasCtrlDep = 1, neverHasSideEffects = 1, addrMode = PostInc in { - defm POST_LDrib : LD_PostInc<"memb", "LDrib", IntRegs, s4_0Imm>, - PredNewRel; - defm POST_LDriub : LD_PostInc<"memub", "LDriub", IntRegs, s4_0Imm>, - PredNewRel; - defm POST_LDrih : LD_PostInc<"memh", "LDrih", IntRegs, s4_1Imm>, - PredNewRel; - defm POST_LDriuh : LD_PostInc<"memuh", "LDriuh", IntRegs, s4_1Imm>, - PredNewRel; - defm POST_LDriw : LD_PostInc<"memw", "LDriw", IntRegs, s4_2Imm>, - PredNewRel; - defm POST_LDrid : LD_PostInc<"memd", "LDrid", DoubleRegs, s4_3Imm>, - PredNewRel; +let accessSize = DoubleWordAccess in +def L2_loadrd_pcr : T_load_pcr <"memd", DoubleRegs, 0b1110>; + +// Load / Post increment circular addressing mode. +let Uses = [CS], hasSideEffects = 0 in +class T_loadalign_pcr<string mnemonic, bits<4> MajOp, MemAccessSize AccessSz > + : LDInst <(outs DoubleRegs:$dst, IntRegs:$_dst_), + (ins DoubleRegs:$_src_, IntRegs:$Rz, ModRegs:$Mu), + "$dst = "#mnemonic#"($Rz ++ I:circ($Mu))", [], + "$Rz = $_dst_, $dst = $_src_" > { + bits<5> dst; + bits<5> Rz; + bit Mu; + + let accessSize = AccessSz; + let IClass = 0b1001; + + let Inst{27-25} = 0b100; + let Inst{24-21} = MajOp; + let Inst{20-16} = Rz; + let Inst{13} = Mu; + let Inst{12} = 0b0; + let Inst{9} = 0b1; + let Inst{7} = 0b0; + let Inst{4-0} = dst; + } + +def L2_loadalignb_pcr : T_loadalign_pcr <"memb_fifo", 0b0100, ByteAccess>; +def L2_loadalignh_pcr : T_loadalign_pcr <"memh_fifo", 0b0010, HalfWordAccess>; + +//===----------------------------------------------------------------------===// +// Circular loads with immediate offset. +//===----------------------------------------------------------------------===// +let Uses = [CS], mayLoad = 1, hasSideEffects = 0 in +class T_load_pci <string mnemonic, RegisterClass RC, + Operand ImmOp, bits<4> MajOp> + : LDInstPI<(outs RC:$dst, IntRegs:$_dst_), + (ins IntRegs:$Rz, ImmOp:$offset, ModRegs:$Mu), + "$dst = "#mnemonic#"($Rz ++ #$offset:circ($Mu))", [], + "$Rz = $_dst_"> { + bits<5> dst; + bits<5> Rz; + bits<1> Mu; + bits<7> offset; + bits<4> offsetBits; + + string ImmOpStr = !cast<string>(ImmOp); + let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1); + let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3}, + !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2}, + !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1}, + /* s4_0Imm */ offset{3-0}))); + let IClass = 0b1001; + let Inst{27-25} = 0b100; + let Inst{24-21} = MajOp; + let Inst{20-16} = Rz; + let Inst{13} = Mu; + let Inst{12} = 0b0; + let Inst{9} = 0b0; + let Inst{8-5} = offsetBits; + let Inst{4-0} = dst; + } + +// Byte variants of circ load +let accessSize = ByteAccess in { + def L2_loadrb_pci : T_load_pci <"memb", IntRegs, s4_0Imm, 0b1000>; + def L2_loadrub_pci : T_load_pci <"memub", IntRegs, s4_0Imm, 0b1001>; } -def : Pat< (i32 (extloadi1 ADDRriS11_0:$addr)), - (i32 (LDrib ADDRriS11_0:$addr)) >; +// Half word variants of circ load +let accessSize = HalfWordAccess in { + def L2_loadrh_pci : T_load_pci <"memh", IntRegs, s4_1Imm, 0b1010>; + def L2_loadruh_pci : T_load_pci <"memuh", IntRegs, s4_1Imm, 0b1011>; + def L2_loadbzw2_pci : T_load_pci <"memubh", IntRegs, s4_1Imm, 0b0011>; + def L2_loadbsw2_pci : T_load_pci <"membh", IntRegs, s4_1Imm, 0b0001>; +} -// Load byte any-extend. -def : Pat < (i32 (extloadi8 ADDRriS11_0:$addr)), - (i32 (LDrib ADDRriS11_0:$addr)) >; +// Word variants of circ load +let accessSize = WordAccess in +def L2_loadri_pci : T_load_pci <"memw", IntRegs, s4_2Imm, 0b1100>; -// Indexed load byte any-extend. -let AddedComplexity = 20 in -def : Pat < (i32 (extloadi8 (add IntRegs:$src1, s11_0ImmPred:$offset))), - (i32 (LDrib_indexed IntRegs:$src1, s11_0ImmPred:$offset)) >; +let accessSize = WordAccess, hasNewValue = 0 in { + def L2_loadbzw4_pci : T_load_pci <"memubh", DoubleRegs, s4_2Imm, 0b0101>; + def L2_loadbsw4_pci : T_load_pci <"membh", DoubleRegs, s4_2Imm, 0b0111>; +} -def : Pat < (i32 (extloadi16 ADDRriS11_1:$addr)), - (i32 (LDrih ADDRriS11_1:$addr))>; +let accessSize = DoubleWordAccess, hasNewValue = 0 in +def L2_loadrd_pci : T_load_pci <"memd", DoubleRegs, s4_3Imm, 0b1110>; -let AddedComplexity = 20 in -def : Pat < (i32 (extloadi16 (add IntRegs:$src1, s11_1ImmPred:$offset))), - (i32 (LDrih_indexed IntRegs:$src1, s11_1ImmPred:$offset)) >; +//===----------------------------------------------------------------------===// +// Circular loads - Pseudo +// +// Please note that the input operand order in the pseudo instructions +// doesn't match with the real instructions. Pseudo instructions operand +// order should mimics the ordering in the intrinsics. Also, 'src2' doesn't +// appear in the AsmString because it's same as 'dst'. +//===----------------------------------------------------------------------===// +let isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0, isPseudo = 1 in +class T_load_pci_pseudo <string opc, RegisterClass RC> + : LDInstPI<(outs IntRegs:$_dst_, RC:$dst), + (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4Imm:$src4), + ".error \"$dst = "#opc#"($src1++#$src4:circ($src3))\"", + [], "$src1 = $_dst_">; + +def L2_loadrb_pci_pseudo : T_load_pci_pseudo <"memb", IntRegs>; +def L2_loadrub_pci_pseudo : T_load_pci_pseudo <"memub", IntRegs>; +def L2_loadrh_pci_pseudo : T_load_pci_pseudo <"memh", IntRegs>; +def L2_loadruh_pci_pseudo : T_load_pci_pseudo <"memuh", IntRegs>; +def L2_loadri_pci_pseudo : T_load_pci_pseudo <"memw", IntRegs>; +def L2_loadrd_pci_pseudo : T_load_pci_pseudo <"memd", DoubleRegs>; + + +// TODO: memb_fifo and memh_fifo must take destination register as input. +// One-off circ loads - not enough in common to break into a class. +let accessSize = ByteAccess in +def L2_loadalignb_pci : T_load_pci <"memb_fifo", DoubleRegs, s4_0Imm, 0b0100>; + +let accessSize = HalfWordAccess, opExtentAlign = 1 in +def L2_loadalignh_pci : T_load_pci <"memh_fifo", DoubleRegs, s4_1Imm, 0b0010>; + +// L[24]_load[wd]_locked: Load word/double with lock. +let isSoloAX = 1 in +class T_load_locked <string mnemonic, RegisterClass RC> + : LD0Inst <(outs RC:$dst), + (ins IntRegs:$src), + "$dst = "#mnemonic#"($src)"> { + bits<5> dst; + bits<5> src; + let IClass = 0b1001; + let Inst{27-21} = 0b0010000; + let Inst{20-16} = src; + let Inst{13-12} = !if (!eq(mnemonic, "memd_locked"), 0b01, 0b00); + let Inst{5} = 0; + let Inst{4-0} = dst; +} +let hasNewValue = 1, accessSize = WordAccess, opNewValue = 0 in + def L2_loadw_locked : T_load_locked <"memw_locked", IntRegs>; +let accessSize = DoubleWordAccess in + def L4_loadd_locked : T_load_locked <"memd_locked", DoubleRegs>; + +// S[24]_store[wd]_locked: Store word/double conditionally. +let isSoloAX = 1, isPredicateLate = 1 in +class T_store_locked <string mnemonic, RegisterClass RC> + : ST0Inst <(outs PredRegs:$Pd), (ins IntRegs:$Rs, RC:$Rt), + mnemonic#"($Rs, $Pd) = $Rt"> { + bits<2> Pd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1010; + let Inst{27-23} = 0b00001; + let Inst{22} = !if (!eq(mnemonic, "memw_locked"), 0b0, 0b1); + let Inst{21} = 0b1; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{1-0} = Pd; +} -let AddedComplexity = 10 in -def : Pat < (i32 (zextloadi1 ADDRriS11_0:$addr)), - (i32 (LDriub ADDRriS11_0:$addr))>; +let accessSize = WordAccess in +def S2_storew_locked : T_store_locked <"memw_locked", IntRegs>; -let AddedComplexity = 20 in -def : Pat < (i32 (zextloadi1 (add IntRegs:$src1, s11_0ImmPred:$offset))), - (i32 (LDriub_indexed IntRegs:$src1, s11_0ImmPred:$offset))>; +let accessSize = DoubleWordAccess in +def S4_stored_locked : T_store_locked <"memd_locked", DoubleRegs>; -// Load predicate. -let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13, -isPseudo = 1, Defs = [R10,R11,D5], neverHasSideEffects = 1 in -def LDriw_pred : LDInst2<(outs PredRegs:$dst), - (ins MEMri:$addr), - "Error; should not emit", - []>; +//===----------------------------------------------------------------------===// +// Bit-reversed loads with auto-increment register +//===----------------------------------------------------------------------===// +let hasSideEffects = 0 in +class T_load_pbr<string mnemonic, RegisterClass RC, + MemAccessSize addrSize, bits<4> majOp> + : LDInst + <(outs RC:$dst, IntRegs:$_dst_), + (ins IntRegs:$Rz, ModRegs:$Mu), + "$dst = "#mnemonic#"($Rz ++ $Mu:brev)" , + [] , "$Rz = $_dst_" > { + + let accessSize = addrSize; + + bits<5> dst; + bits<5> Rz; + bits<1> Mu; + + let IClass = 0b1001; + + let Inst{27-25} = 0b111; + let Inst{24-21} = majOp; + let Inst{20-16} = Rz; + let Inst{13} = Mu; + let Inst{12} = 0b0; + let Inst{7} = 0b0; + let Inst{4-0} = dst; + } -// Deallocate stack frame. -let Defs = [R29, R30, R31], Uses = [R29], neverHasSideEffects = 1 in { - def DEALLOCFRAME : LDInst2<(outs), (ins), - "deallocframe", - []>; +let hasNewValue =1, opNewValue = 0 in { + def L2_loadrb_pbr : T_load_pbr <"memb", IntRegs, ByteAccess, 0b1000>; + def L2_loadrub_pbr : T_load_pbr <"memub", IntRegs, ByteAccess, 0b1001>; + def L2_loadrh_pbr : T_load_pbr <"memh", IntRegs, HalfWordAccess, 0b1010>; + def L2_loadruh_pbr : T_load_pbr <"memuh", IntRegs, HalfWordAccess, 0b1011>; + def L2_loadbsw2_pbr : T_load_pbr <"membh", IntRegs, HalfWordAccess, 0b0001>; + def L2_loadbzw2_pbr : T_load_pbr <"memubh", IntRegs, HalfWordAccess, 0b0011>; + def L2_loadri_pbr : T_load_pbr <"memw", IntRegs, WordAccess, 0b1100>; } -// Load and unpack bytes to halfwords. +def L2_loadbzw4_pbr : T_load_pbr <"memubh", DoubleRegs, WordAccess, 0b0101>; +def L2_loadbsw4_pbr : T_load_pbr <"membh", DoubleRegs, WordAccess, 0b0111>; +def L2_loadrd_pbr : T_load_pbr <"memd", DoubleRegs, DoubleWordAccess, 0b1110>; + +def L2_loadalignb_pbr :T_load_pbr <"memb_fifo", DoubleRegs, ByteAccess, 0b0100>; +def L2_loadalignh_pbr :T_load_pbr <"memh_fifo", DoubleRegs, + HalfWordAccess, 0b0010>; + +//===----------------------------------------------------------------------===// +// Bit-reversed loads - Pseudo +// +// Please note that 'src2' doesn't appear in the AsmString because +// it's same as 'dst'. +//===----------------------------------------------------------------------===// +let isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0, isPseudo = 1 in +class T_load_pbr_pseudo <string opc, RegisterClass RC> + : LDInstPI<(outs IntRegs:$_dst_, RC:$dst), + (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), + ".error \"$dst = "#opc#"($src1++$src3:brev)\"", + [], "$src1 = $_dst_">; + +def L2_loadrb_pbr_pseudo : T_load_pbr_pseudo <"memb", IntRegs>; +def L2_loadrub_pbr_pseudo : T_load_pbr_pseudo <"memub", IntRegs>; +def L2_loadrh_pbr_pseudo : T_load_pbr_pseudo <"memh", IntRegs>; +def L2_loadruh_pbr_pseudo : T_load_pbr_pseudo <"memuh", IntRegs>; +def L2_loadri_pbr_pseudo : T_load_pbr_pseudo <"memw", IntRegs>; +def L2_loadrd_pbr_pseudo : T_load_pbr_pseudo <"memd", DoubleRegs>; + //===----------------------------------------------------------------------===// // LD - //===----------------------------------------------------------------------===// @@ -1259,180 +2291,934 @@ let Defs = [R29, R30, R31], Uses = [R29], neverHasSideEffects = 1 in { //===----------------------------------------------------------------------===// // MTYPE/MPYH + //===----------------------------------------------------------------------===// -// Multiply and use lower result. -// Rd=+mpyi(Rs,#u8) -let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 8 in -def MPYI_riu : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Ext:$src2), - "$dst =+ mpyi($src1, #$src2)", - [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1), - u8ExtPred:$src2))]>; -// Rd=-mpyi(Rs,#u8) -def MPYI_rin : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2), - "$dst =- mpyi($src1, #$src2)", - [(set (i32 IntRegs:$dst), (ineg (mul (i32 IntRegs:$src1), - u8ImmPred:$src2)))]>; +//===----------------------------------------------------------------------===// +// Template Class +// MPYS / Multipy signed/unsigned halfwords +//Rd=mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:rnd][:sat] +//===----------------------------------------------------------------------===// + +let hasNewValue = 1, opNewValue = 0 in +class T_M2_mpy < bits<2> LHbits, bit isSat, bit isRnd, + bit hasShift, bit isUnsigned> + : MInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rd = "#!if(isUnsigned,"mpyu","mpy")#"($Rs."#!if(LHbits{1},"h","l") + #", $Rt."#!if(LHbits{0},"h)","l)") + #!if(hasShift,":<<1","") + #!if(isRnd,":rnd","") + #!if(isSat,":sat",""), + [], "", M_tc_3x_SLOT23 > { + bits<5> Rd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b1100; + let Inst{23} = hasShift; + let Inst{22} = isUnsigned; + let Inst{21} = isRnd; + let Inst{7} = isSat; + let Inst{6-5} = LHbits; + let Inst{4-0} = Rd; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + } + +//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1] +def M2_mpy_ll_s1: T_M2_mpy<0b00, 0, 0, 1, 0>; +def M2_mpy_ll_s0: T_M2_mpy<0b00, 0, 0, 0, 0>; +def M2_mpy_lh_s1: T_M2_mpy<0b01, 0, 0, 1, 0>; +def M2_mpy_lh_s0: T_M2_mpy<0b01, 0, 0, 0, 0>; +def M2_mpy_hl_s1: T_M2_mpy<0b10, 0, 0, 1, 0>; +def M2_mpy_hl_s0: T_M2_mpy<0b10, 0, 0, 0, 0>; +def M2_mpy_hh_s1: T_M2_mpy<0b11, 0, 0, 1, 0>; +def M2_mpy_hh_s0: T_M2_mpy<0b11, 0, 0, 0, 0>; + +//Rd=mpyu(Rs.[H|L],Rt.[H|L])[:<<1] +def M2_mpyu_ll_s1: T_M2_mpy<0b00, 0, 0, 1, 1>; +def M2_mpyu_ll_s0: T_M2_mpy<0b00, 0, 0, 0, 1>; +def M2_mpyu_lh_s1: T_M2_mpy<0b01, 0, 0, 1, 1>; +def M2_mpyu_lh_s0: T_M2_mpy<0b01, 0, 0, 0, 1>; +def M2_mpyu_hl_s1: T_M2_mpy<0b10, 0, 0, 1, 1>; +def M2_mpyu_hl_s0: T_M2_mpy<0b10, 0, 0, 0, 1>; +def M2_mpyu_hh_s1: T_M2_mpy<0b11, 0, 0, 1, 1>; +def M2_mpyu_hh_s0: T_M2_mpy<0b11, 0, 0, 0, 1>; + +//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1]:rnd +def M2_mpy_rnd_ll_s1: T_M2_mpy <0b00, 0, 1, 1, 0>; +def M2_mpy_rnd_ll_s0: T_M2_mpy <0b00, 0, 1, 0, 0>; +def M2_mpy_rnd_lh_s1: T_M2_mpy <0b01, 0, 1, 1, 0>; +def M2_mpy_rnd_lh_s0: T_M2_mpy <0b01, 0, 1, 0, 0>; +def M2_mpy_rnd_hl_s1: T_M2_mpy <0b10, 0, 1, 1, 0>; +def M2_mpy_rnd_hl_s0: T_M2_mpy <0b10, 0, 1, 0, 0>; +def M2_mpy_rnd_hh_s1: T_M2_mpy <0b11, 0, 1, 1, 0>; +def M2_mpy_rnd_hh_s0: T_M2_mpy <0b11, 0, 1, 0, 0>; + +//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1][:sat] +//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1][:rnd][:sat] +let Defs = [USR_OVF] in { + def M2_mpy_sat_ll_s1: T_M2_mpy <0b00, 1, 0, 1, 0>; + def M2_mpy_sat_ll_s0: T_M2_mpy <0b00, 1, 0, 0, 0>; + def M2_mpy_sat_lh_s1: T_M2_mpy <0b01, 1, 0, 1, 0>; + def M2_mpy_sat_lh_s0: T_M2_mpy <0b01, 1, 0, 0, 0>; + def M2_mpy_sat_hl_s1: T_M2_mpy <0b10, 1, 0, 1, 0>; + def M2_mpy_sat_hl_s0: T_M2_mpy <0b10, 1, 0, 0, 0>; + def M2_mpy_sat_hh_s1: T_M2_mpy <0b11, 1, 0, 1, 0>; + def M2_mpy_sat_hh_s0: T_M2_mpy <0b11, 1, 0, 0, 0>; + + def M2_mpy_sat_rnd_ll_s1: T_M2_mpy <0b00, 1, 1, 1, 0>; + def M2_mpy_sat_rnd_ll_s0: T_M2_mpy <0b00, 1, 1, 0, 0>; + def M2_mpy_sat_rnd_lh_s1: T_M2_mpy <0b01, 1, 1, 1, 0>; + def M2_mpy_sat_rnd_lh_s0: T_M2_mpy <0b01, 1, 1, 0, 0>; + def M2_mpy_sat_rnd_hl_s1: T_M2_mpy <0b10, 1, 1, 1, 0>; + def M2_mpy_sat_rnd_hl_s0: T_M2_mpy <0b10, 1, 1, 0, 0>; + def M2_mpy_sat_rnd_hh_s1: T_M2_mpy <0b11, 1, 1, 1, 0>; + def M2_mpy_sat_rnd_hh_s0: T_M2_mpy <0b11, 1, 1, 0, 0>; +} + +//===----------------------------------------------------------------------===// +// Template Class +// MPYS / Multipy signed/unsigned halfwords and add/subtract the +// result from the accumulator. +//Rx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat] +//===----------------------------------------------------------------------===// + +let hasNewValue = 1, opNewValue = 0 in +class T_M2_mpy_acc < bits<2> LHbits, bit isSat, bit isNac, + bit hasShift, bit isUnsigned > + : MInst_acc<(outs IntRegs:$Rx), (ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt), + "$Rx "#!if(isNac,"-= ","+= ")#!if(isUnsigned,"mpyu","mpy") + #"($Rs."#!if(LHbits{1},"h","l") + #", $Rt."#!if(LHbits{0},"h)","l)") + #!if(hasShift,":<<1","") + #!if(isSat,":sat",""), + [], "$dst2 = $Rx", M_tc_3x_SLOT23 > { + bits<5> Rx; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1110; + let Inst{27-24} = 0b1110; + let Inst{23} = hasShift; + let Inst{22} = isUnsigned; + let Inst{21} = isNac; + let Inst{7} = isSat; + let Inst{6-5} = LHbits; + let Inst{4-0} = Rx; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + } + +//Rx += mpy(Rs.[H|L],Rt.[H|L])[:<<1] +def M2_mpy_acc_ll_s1: T_M2_mpy_acc <0b00, 0, 0, 1, 0>; +def M2_mpy_acc_ll_s0: T_M2_mpy_acc <0b00, 0, 0, 0, 0>; +def M2_mpy_acc_lh_s1: T_M2_mpy_acc <0b01, 0, 0, 1, 0>; +def M2_mpy_acc_lh_s0: T_M2_mpy_acc <0b01, 0, 0, 0, 0>; +def M2_mpy_acc_hl_s1: T_M2_mpy_acc <0b10, 0, 0, 1, 0>; +def M2_mpy_acc_hl_s0: T_M2_mpy_acc <0b10, 0, 0, 0, 0>; +def M2_mpy_acc_hh_s1: T_M2_mpy_acc <0b11, 0, 0, 1, 0>; +def M2_mpy_acc_hh_s0: T_M2_mpy_acc <0b11, 0, 0, 0, 0>; + +//Rx += mpyu(Rs.[H|L],Rt.[H|L])[:<<1] +def M2_mpyu_acc_ll_s1: T_M2_mpy_acc <0b00, 0, 0, 1, 1>; +def M2_mpyu_acc_ll_s0: T_M2_mpy_acc <0b00, 0, 0, 0, 1>; +def M2_mpyu_acc_lh_s1: T_M2_mpy_acc <0b01, 0, 0, 1, 1>; +def M2_mpyu_acc_lh_s0: T_M2_mpy_acc <0b01, 0, 0, 0, 1>; +def M2_mpyu_acc_hl_s1: T_M2_mpy_acc <0b10, 0, 0, 1, 1>; +def M2_mpyu_acc_hl_s0: T_M2_mpy_acc <0b10, 0, 0, 0, 1>; +def M2_mpyu_acc_hh_s1: T_M2_mpy_acc <0b11, 0, 0, 1, 1>; +def M2_mpyu_acc_hh_s0: T_M2_mpy_acc <0b11, 0, 0, 0, 1>; + +//Rx -= mpy(Rs.[H|L],Rt.[H|L])[:<<1] +def M2_mpy_nac_ll_s1: T_M2_mpy_acc <0b00, 0, 1, 1, 0>; +def M2_mpy_nac_ll_s0: T_M2_mpy_acc <0b00, 0, 1, 0, 0>; +def M2_mpy_nac_lh_s1: T_M2_mpy_acc <0b01, 0, 1, 1, 0>; +def M2_mpy_nac_lh_s0: T_M2_mpy_acc <0b01, 0, 1, 0, 0>; +def M2_mpy_nac_hl_s1: T_M2_mpy_acc <0b10, 0, 1, 1, 0>; +def M2_mpy_nac_hl_s0: T_M2_mpy_acc <0b10, 0, 1, 0, 0>; +def M2_mpy_nac_hh_s1: T_M2_mpy_acc <0b11, 0, 1, 1, 0>; +def M2_mpy_nac_hh_s0: T_M2_mpy_acc <0b11, 0, 1, 0, 0>; + +//Rx -= mpyu(Rs.[H|L],Rt.[H|L])[:<<1] +def M2_mpyu_nac_ll_s1: T_M2_mpy_acc <0b00, 0, 1, 1, 1>; +def M2_mpyu_nac_ll_s0: T_M2_mpy_acc <0b00, 0, 1, 0, 1>; +def M2_mpyu_nac_lh_s1: T_M2_mpy_acc <0b01, 0, 1, 1, 1>; +def M2_mpyu_nac_lh_s0: T_M2_mpy_acc <0b01, 0, 1, 0, 1>; +def M2_mpyu_nac_hl_s1: T_M2_mpy_acc <0b10, 0, 1, 1, 1>; +def M2_mpyu_nac_hl_s0: T_M2_mpy_acc <0b10, 0, 1, 0, 1>; +def M2_mpyu_nac_hh_s1: T_M2_mpy_acc <0b11, 0, 1, 1, 1>; +def M2_mpyu_nac_hh_s0: T_M2_mpy_acc <0b11, 0, 1, 0, 1>; + +//Rx += mpy(Rs.[H|L],Rt.[H|L])[:<<1]:sat +def M2_mpy_acc_sat_ll_s1: T_M2_mpy_acc <0b00, 1, 0, 1, 0>; +def M2_mpy_acc_sat_ll_s0: T_M2_mpy_acc <0b00, 1, 0, 0, 0>; +def M2_mpy_acc_sat_lh_s1: T_M2_mpy_acc <0b01, 1, 0, 1, 0>; +def M2_mpy_acc_sat_lh_s0: T_M2_mpy_acc <0b01, 1, 0, 0, 0>; +def M2_mpy_acc_sat_hl_s1: T_M2_mpy_acc <0b10, 1, 0, 1, 0>; +def M2_mpy_acc_sat_hl_s0: T_M2_mpy_acc <0b10, 1, 0, 0, 0>; +def M2_mpy_acc_sat_hh_s1: T_M2_mpy_acc <0b11, 1, 0, 1, 0>; +def M2_mpy_acc_sat_hh_s0: T_M2_mpy_acc <0b11, 1, 0, 0, 0>; + +//Rx -= mpy(Rs.[H|L],Rt.[H|L])[:<<1]:sat +def M2_mpy_nac_sat_ll_s1: T_M2_mpy_acc <0b00, 1, 1, 1, 0>; +def M2_mpy_nac_sat_ll_s0: T_M2_mpy_acc <0b00, 1, 1, 0, 0>; +def M2_mpy_nac_sat_lh_s1: T_M2_mpy_acc <0b01, 1, 1, 1, 0>; +def M2_mpy_nac_sat_lh_s0: T_M2_mpy_acc <0b01, 1, 1, 0, 0>; +def M2_mpy_nac_sat_hl_s1: T_M2_mpy_acc <0b10, 1, 1, 1, 0>; +def M2_mpy_nac_sat_hl_s0: T_M2_mpy_acc <0b10, 1, 1, 0, 0>; +def M2_mpy_nac_sat_hh_s1: T_M2_mpy_acc <0b11, 1, 1, 1, 0>; +def M2_mpy_nac_sat_hh_s0: T_M2_mpy_acc <0b11, 1, 1, 0, 0>; + +//===----------------------------------------------------------------------===// +// Template Class +// MPYS / Multipy signed/unsigned halfwords and add/subtract the +// result from the 64-bit destination register. +//Rxx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat] +//===----------------------------------------------------------------------===// + +class T_M2_mpyd_acc < bits<2> LHbits, bit isNac, bit hasShift, bit isUnsigned> + : MInst_acc<(outs DoubleRegs:$Rxx), + (ins DoubleRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt), + "$Rxx "#!if(isNac,"-= ","+= ")#!if(isUnsigned,"mpyu","mpy") + #"($Rs."#!if(LHbits{1},"h","l") + #", $Rt."#!if(LHbits{0},"h)","l)") + #!if(hasShift,":<<1",""), + [], "$dst2 = $Rxx", M_tc_3x_SLOT23 > { + bits<5> Rxx; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b0110; + let Inst{23} = hasShift; + let Inst{22} = isUnsigned; + let Inst{21} = isNac; + let Inst{7} = 0; + let Inst{6-5} = LHbits; + let Inst{4-0} = Rxx; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + } + +def M2_mpyd_acc_hh_s0: T_M2_mpyd_acc <0b11, 0, 0, 0>; +def M2_mpyd_acc_hl_s0: T_M2_mpyd_acc <0b10, 0, 0, 0>; +def M2_mpyd_acc_lh_s0: T_M2_mpyd_acc <0b01, 0, 0, 0>; +def M2_mpyd_acc_ll_s0: T_M2_mpyd_acc <0b00, 0, 0, 0>; + +def M2_mpyd_acc_hh_s1: T_M2_mpyd_acc <0b11, 0, 1, 0>; +def M2_mpyd_acc_hl_s1: T_M2_mpyd_acc <0b10, 0, 1, 0>; +def M2_mpyd_acc_lh_s1: T_M2_mpyd_acc <0b01, 0, 1, 0>; +def M2_mpyd_acc_ll_s1: T_M2_mpyd_acc <0b00, 0, 1, 0>; + +def M2_mpyd_nac_hh_s0: T_M2_mpyd_acc <0b11, 1, 0, 0>; +def M2_mpyd_nac_hl_s0: T_M2_mpyd_acc <0b10, 1, 0, 0>; +def M2_mpyd_nac_lh_s0: T_M2_mpyd_acc <0b01, 1, 0, 0>; +def M2_mpyd_nac_ll_s0: T_M2_mpyd_acc <0b00, 1, 0, 0>; + +def M2_mpyd_nac_hh_s1: T_M2_mpyd_acc <0b11, 1, 1, 0>; +def M2_mpyd_nac_hl_s1: T_M2_mpyd_acc <0b10, 1, 1, 0>; +def M2_mpyd_nac_lh_s1: T_M2_mpyd_acc <0b01, 1, 1, 0>; +def M2_mpyd_nac_ll_s1: T_M2_mpyd_acc <0b00, 1, 1, 0>; + +def M2_mpyud_acc_hh_s0: T_M2_mpyd_acc <0b11, 0, 0, 1>; +def M2_mpyud_acc_hl_s0: T_M2_mpyd_acc <0b10, 0, 0, 1>; +def M2_mpyud_acc_lh_s0: T_M2_mpyd_acc <0b01, 0, 0, 1>; +def M2_mpyud_acc_ll_s0: T_M2_mpyd_acc <0b00, 0, 0, 1>; + +def M2_mpyud_acc_hh_s1: T_M2_mpyd_acc <0b11, 0, 1, 1>; +def M2_mpyud_acc_hl_s1: T_M2_mpyd_acc <0b10, 0, 1, 1>; +def M2_mpyud_acc_lh_s1: T_M2_mpyd_acc <0b01, 0, 1, 1>; +def M2_mpyud_acc_ll_s1: T_M2_mpyd_acc <0b00, 0, 1, 1>; + +def M2_mpyud_nac_hh_s0: T_M2_mpyd_acc <0b11, 1, 0, 1>; +def M2_mpyud_nac_hl_s0: T_M2_mpyd_acc <0b10, 1, 0, 1>; +def M2_mpyud_nac_lh_s0: T_M2_mpyd_acc <0b01, 1, 0, 1>; +def M2_mpyud_nac_ll_s0: T_M2_mpyd_acc <0b00, 1, 0, 1>; + +def M2_mpyud_nac_hh_s1: T_M2_mpyd_acc <0b11, 1, 1, 1>; +def M2_mpyud_nac_hl_s1: T_M2_mpyd_acc <0b10, 1, 1, 1>; +def M2_mpyud_nac_lh_s1: T_M2_mpyd_acc <0b01, 1, 1, 1>; +def M2_mpyud_nac_ll_s1: T_M2_mpyd_acc <0b00, 1, 1, 1>; + +//===----------------------------------------------------------------------===// +// Template Class -- Vector Multipy +// Used for complex multiply real or imaginary, dual multiply and even halfwords +//===----------------------------------------------------------------------===// +class T_M2_vmpy < string opc, bits<3> MajOp, bits<3> MinOp, bit hasShift, + bit isRnd, bit isSat > + : MInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Rdd = "#opc#"($Rss, $Rtt)"#!if(hasShift,":<<1","") + #!if(isRnd,":rnd","") + #!if(isSat,":sat",""), + [] > { + bits<5> Rdd; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b1000; + let Inst{23-21} = MajOp; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rdd; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + } + +// Vector complex multiply imaginary: Rdd=vcmpyi(Rss,Rtt)[:<<1]:sat +let Defs = [USR_OVF] in { +def M2_vcmpy_s1_sat_i: T_M2_vmpy <"vcmpyi", 0b110, 0b110, 1, 0, 1>; +def M2_vcmpy_s0_sat_i: T_M2_vmpy <"vcmpyi", 0b010, 0b110, 0, 0, 1>; + +// Vector complex multiply real: Rdd=vcmpyr(Rss,Rtt)[:<<1]:sat +def M2_vcmpy_s1_sat_r: T_M2_vmpy <"vcmpyr", 0b101, 0b110, 1, 0, 1>; +def M2_vcmpy_s0_sat_r: T_M2_vmpy <"vcmpyr", 0b001, 0b110, 0, 0, 1>; + +// Vector dual multiply: Rdd=vdmpy(Rss,Rtt)[:<<1]:sat +def M2_vdmpys_s1: T_M2_vmpy <"vdmpy", 0b100, 0b100, 1, 0, 1>; +def M2_vdmpys_s0: T_M2_vmpy <"vdmpy", 0b000, 0b100, 0, 0, 1>; + +// Vector multiply even halfwords: Rdd=vmpyeh(Rss,Rtt)[:<<1]:sat +def M2_vmpy2es_s1: T_M2_vmpy <"vmpyeh", 0b100, 0b110, 1, 0, 1>; +def M2_vmpy2es_s0: T_M2_vmpy <"vmpyeh", 0b000, 0b110, 0, 0, 1>; + +//Rdd=vmpywoh(Rss,Rtt)[:<<1][:rnd]:sat +def M2_mmpyh_s0: T_M2_vmpy <"vmpywoh", 0b000, 0b111, 0, 0, 1>; +def M2_mmpyh_s1: T_M2_vmpy <"vmpywoh", 0b100, 0b111, 1, 0, 1>; +def M2_mmpyh_rs0: T_M2_vmpy <"vmpywoh", 0b001, 0b111, 0, 1, 1>; +def M2_mmpyh_rs1: T_M2_vmpy <"vmpywoh", 0b101, 0b111, 1, 1, 1>; + +//Rdd=vmpyweh(Rss,Rtt)[:<<1][:rnd]:sat +def M2_mmpyl_s0: T_M2_vmpy <"vmpyweh", 0b000, 0b101, 0, 0, 1>; +def M2_mmpyl_s1: T_M2_vmpy <"vmpyweh", 0b100, 0b101, 1, 0, 1>; +def M2_mmpyl_rs0: T_M2_vmpy <"vmpyweh", 0b001, 0b101, 0, 1, 1>; +def M2_mmpyl_rs1: T_M2_vmpy <"vmpyweh", 0b101, 0b101, 1, 1, 1>; + +//Rdd=vmpywouh(Rss,Rtt)[:<<1][:rnd]:sat +def M2_mmpyuh_s0: T_M2_vmpy <"vmpywouh", 0b010, 0b111, 0, 0, 1>; +def M2_mmpyuh_s1: T_M2_vmpy <"vmpywouh", 0b110, 0b111, 1, 0, 1>; +def M2_mmpyuh_rs0: T_M2_vmpy <"vmpywouh", 0b011, 0b111, 0, 1, 1>; +def M2_mmpyuh_rs1: T_M2_vmpy <"vmpywouh", 0b111, 0b111, 1, 1, 1>; + +//Rdd=vmpyweuh(Rss,Rtt)[:<<1][:rnd]:sat +def M2_mmpyul_s0: T_M2_vmpy <"vmpyweuh", 0b010, 0b101, 0, 0, 1>; +def M2_mmpyul_s1: T_M2_vmpy <"vmpyweuh", 0b110, 0b101, 1, 0, 1>; +def M2_mmpyul_rs0: T_M2_vmpy <"vmpyweuh", 0b011, 0b101, 0, 1, 1>; +def M2_mmpyul_rs1: T_M2_vmpy <"vmpyweuh", 0b111, 0b101, 1, 1, 1>; +} + +let hasNewValue = 1, opNewValue = 0 in +class T_MType_mpy <string mnemonic, bits<4> RegTyBits, RegisterClass RC, + bits<3> MajOp, bits<3> MinOp, bit isSat = 0, bit isRnd = 0, + string op2Suffix = "", bit isRaw = 0, bit isHi = 0 > + : MInst <(outs IntRegs:$dst), (ins RC:$src1, RC:$src2), + "$dst = "#mnemonic + #"($src1, $src2"#op2Suffix#")" + #!if(MajOp{2}, ":<<1", "") + #!if(isRnd, ":rnd", "") + #!if(isSat, ":sat", "") + #!if(isRaw, !if(isHi, ":raw:hi", ":raw:lo"), ""), [] > { + bits<5> dst; + bits<5> src1; + bits<5> src2; + + let IClass = 0b1110; + + let Inst{27-24} = RegTyBits; + let Inst{23-21} = MajOp; + let Inst{20-16} = src1; + let Inst{13} = 0b0; + let Inst{12-8} = src2; + let Inst{7-5} = MinOp; + let Inst{4-0} = dst; + } + +class T_MType_vrcmpy <string mnemonic, bits<3> MajOp, bits<3> MinOp, bit isHi> + : T_MType_mpy <mnemonic, 0b1001, DoubleRegs, MajOp, MinOp, 1, 1, "", 1, isHi>; + +class T_MType_dd <string mnemonic, bits<3> MajOp, bits<3> MinOp, + bit isSat = 0, bit isRnd = 0 > + : T_MType_mpy <mnemonic, 0b1001, DoubleRegs, MajOp, MinOp, isSat, isRnd>; + +class T_MType_rr1 <string mnemonic, bits<3> MajOp, bits<3> MinOp, + bit isSat = 0, bit isRnd = 0 > + : T_MType_mpy<mnemonic, 0b1101, IntRegs, MajOp, MinOp, isSat, isRnd>; + +class T_MType_rr2 <string mnemonic, bits<3> MajOp, bits<3> MinOp, + bit isSat = 0, bit isRnd = 0, string op2str = "" > + : T_MType_mpy<mnemonic, 0b1101, IntRegs, MajOp, MinOp, isSat, isRnd, op2str>; + +def M2_vradduh : T_MType_dd <"vradduh", 0b000, 0b001, 0, 0>; +def M2_vdmpyrs_s0 : T_MType_dd <"vdmpy", 0b000, 0b000, 1, 1>; +def M2_vdmpyrs_s1 : T_MType_dd <"vdmpy", 0b100, 0b000, 1, 1>; + +let CextOpcode = "mpyi", InputType = "reg" in +def M2_mpyi : T_MType_rr1 <"mpyi", 0b000, 0b000>, ImmRegRel; + +def M2_mpy_up : T_MType_rr1 <"mpy", 0b000, 0b001>; +def M2_mpyu_up : T_MType_rr1 <"mpyu", 0b010, 0b001>; + +def M2_dpmpyss_rnd_s0 : T_MType_rr1 <"mpy", 0b001, 0b001, 0, 1>; + +def M2_vmpy2s_s0pack : T_MType_rr1 <"vmpyh", 0b001, 0b111, 1, 1>; +def M2_vmpy2s_s1pack : T_MType_rr1 <"vmpyh", 0b101, 0b111, 1, 1>; + +def M2_hmmpyh_rs1 : T_MType_rr2 <"mpy", 0b101, 0b100, 1, 1, ".h">; +def M2_hmmpyl_rs1 : T_MType_rr2 <"mpy", 0b111, 0b100, 1, 1, ".l">; + +def M2_cmpyrs_s0 : T_MType_rr2 <"cmpy", 0b001, 0b110, 1, 1>; +def M2_cmpyrs_s1 : T_MType_rr2 <"cmpy", 0b101, 0b110, 1, 1>; +def M2_cmpyrsc_s0 : T_MType_rr2 <"cmpy", 0b011, 0b110, 1, 1, "*">; +def M2_cmpyrsc_s1 : T_MType_rr2 <"cmpy", 0b111, 0b110, 1, 1, "*">; + +// V4 Instructions +def M2_vraddh : T_MType_dd <"vraddh", 0b001, 0b111, 0>; +def M2_mpysu_up : T_MType_rr1 <"mpysu", 0b011, 0b001, 0>; +def M2_mpy_up_s1 : T_MType_rr1 <"mpy", 0b101, 0b010, 0>; +def M2_mpy_up_s1_sat : T_MType_rr1 <"mpy", 0b111, 0b000, 1>; + +def M2_hmmpyh_s1 : T_MType_rr2 <"mpy", 0b101, 0b000, 1, 0, ".h">; +def M2_hmmpyl_s1 : T_MType_rr2 <"mpy", 0b101, 0b001, 1, 0, ".l">; + +def: Pat<(i32 (mul I32:$src1, I32:$src2)), (M2_mpyi I32:$src1, I32:$src2)>; +def: Pat<(i32 (mulhs I32:$src1, I32:$src2)), (M2_mpy_up I32:$src1, I32:$src2)>; +def: Pat<(i32 (mulhu I32:$src1, I32:$src2)), (M2_mpyu_up I32:$src1, I32:$src2)>; + +let hasNewValue = 1, opNewValue = 0 in +class T_MType_mpy_ri <bit isNeg, Operand ImmOp, list<dag> pattern> + : MInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs, ImmOp:$u8), + "$Rd ="#!if(isNeg, "- ", "+ ")#"mpyi($Rs, #$u8)" , + pattern, "", M_tc_3x_SLOT23> { + bits<5> Rd; + bits<5> Rs; + bits<8> u8; + + let IClass = 0b1110; + + let Inst{27-24} = 0b0000; + let Inst{23} = isNeg; + let Inst{13} = 0b0; + let Inst{4-0} = Rd; + let Inst{20-16} = Rs; + let Inst{12-5} = u8; + } + +let isExtendable = 1, opExtentBits = 8, opExtendable = 2 in +def M2_mpysip : T_MType_mpy_ri <0, u8Ext, + [(set (i32 IntRegs:$Rd), (mul IntRegs:$Rs, u8ExtPred:$u8))]>; + +def M2_mpysin : T_MType_mpy_ri <1, u8Imm, + [(set (i32 IntRegs:$Rd), (ineg (mul IntRegs:$Rs, + u8ImmPred:$u8)))]>; + +// Assember mapped to M2_mpyi +let isAsmParserOnly = 1 in +def M2_mpyui : MInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = mpyui($src1, $src2)">; // Rd=mpyi(Rs,#m9) // s9 is NOT the same as m9 - but it works.. so far. -// Assembler maps to either Rd=+mpyi(Rs,#u8 or Rd=-mpyi(Rs,#u8) +// Assembler maps to either Rd=+mpyi(Rs,#u8) or Rd=-mpyi(Rs,#u8) // depending on the value of m9. See Arch Spec. let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 9, -CextOpcode = "MPYI", InputType = "imm" in -def MPYI_ri : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Ext:$src2), - "$dst = mpyi($src1, #$src2)", - [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1), - s9ExtPred:$src2))]>, ImmRegRel; - -// Rd=mpyi(Rs,Rt) -let CextOpcode = "MPYI", InputType = "reg" in -def MPYI : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - "$dst = mpyi($src1, $src2)", - [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))]>, ImmRegRel; - -// Rx+=mpyi(Rs,#u8) -let isExtendable = 1, opExtendable = 3, isExtentSigned = 0, opExtentBits = 8, -CextOpcode = "MPYI_acc", InputType = "imm" in -def MPYI_acc_ri : MInst_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, u8Ext:$src3), - "$dst += mpyi($src2, #$src3)", - [(set (i32 IntRegs:$dst), - (add (mul (i32 IntRegs:$src2), u8ExtPred:$src3), - (i32 IntRegs:$src1)))], - "$src1 = $dst">, ImmRegRel; + CextOpcode = "mpyi", InputType = "imm", hasNewValue = 1, + isAsmParserOnly = 1 in +def M2_mpysmi : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Ext:$src2), + "$dst = mpyi($src1, #$src2)", + [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1), + s9ExtPred:$src2))]>, ImmRegRel; + +let hasNewValue = 1, isExtendable = 1, opExtentBits = 8, opExtendable = 3, + InputType = "imm" in +class T_MType_acc_ri <string mnemonic, bits<3> MajOp, Operand ImmOp, + list<dag> pattern = []> + : MInst < (outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, ImmOp:$src3), + "$dst "#mnemonic#"($src2, #$src3)", + pattern, "$src1 = $dst", M_tc_2_SLOT23> { + bits<5> dst; + bits<5> src2; + bits<8> src3; + + let IClass = 0b1110; + + let Inst{27-26} = 0b00; + let Inst{25-23} = MajOp; + let Inst{20-16} = src2; + let Inst{13} = 0b0; + let Inst{12-5} = src3; + let Inst{4-0} = dst; + } -// Rx+=mpyi(Rs,Rt) -let CextOpcode = "MPYI_acc", InputType = "reg" in -def MPYI_acc_rr : MInst_acc<(outs IntRegs:$dst), +let InputType = "reg", hasNewValue = 1 in +class T_MType_acc_rr <string mnemonic, bits<3> MajOp, bits<3> MinOp, + bit isSwap = 0, list<dag> pattern = [], bit hasNot = 0, + bit isSat = 0, bit isShift = 0> + : MInst < (outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "$dst += mpyi($src2, $src3)", - [(set (i32 IntRegs:$dst), - (add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)), - (i32 IntRegs:$src1)))], - "$src1 = $dst">, ImmRegRel; - -// Rx-=mpyi(Rs,#u8) -let isExtendable = 1, opExtendable = 3, isExtentSigned = 0, opExtentBits = 8 in -def MPYI_sub_ri : MInst_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, u8Ext:$src3), - "$dst -= mpyi($src2, #$src3)", - [(set (i32 IntRegs:$dst), - (sub (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2), - u8ExtPred:$src3)))], - "$src1 = $dst">; - -// Multiply and use upper result. -// Rd=mpy(Rs,Rt.H):<<1:rnd:sat -// Rd=mpy(Rs,Rt.L):<<1:rnd:sat -// Rd=mpy(Rs,Rt) -def MPY : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - "$dst = mpy($src1, $src2)", - [(set (i32 IntRegs:$dst), (mulhs (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))]>; - -// Rd=mpy(Rs,Rt):rnd -// Rd=mpyu(Rs,Rt) -def MPYU : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - "$dst = mpyu($src1, $src2)", - [(set (i32 IntRegs:$dst), (mulhu (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))]>; - -// Multiply and use full result. -// Rdd=mpyu(Rs,Rt) -def MPYU64 : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - "$dst = mpyu($src1, $src2)", - [(set (i64 DoubleRegs:$dst), - (mul (i64 (anyext (i32 IntRegs:$src1))), - (i64 (anyext (i32 IntRegs:$src2)))))]>; - -// Rdd=mpy(Rs,Rt) -def MPY64 : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - "$dst = mpy($src1, $src2)", - [(set (i64 DoubleRegs:$dst), - (mul (i64 (sext (i32 IntRegs:$src1))), - (i64 (sext (i32 IntRegs:$src2)))))]>; + "$dst "#mnemonic#"($src2, "#!if(hasNot, "~$src3)","$src3)") + #!if(isShift, ":<<1", "") + #!if(isSat, ":sat", ""), + pattern, "$src1 = $dst", M_tc_2_SLOT23 > { + bits<5> dst; + bits<5> src2; + bits<5> src3; + + let IClass = 0b1110; + + let Inst{27-24} = 0b1111; + let Inst{23-21} = MajOp; + let Inst{20-16} = !if(isSwap, src3, src2); + let Inst{13} = 0b0; + let Inst{12-8} = !if(isSwap, src2, src3); + let Inst{7-5} = MinOp; + let Inst{4-0} = dst; + } + +let CextOpcode = "MPYI_acc", Itinerary = M_tc_3x_SLOT23 in { + def M2_macsip : T_MType_acc_ri <"+= mpyi", 0b010, u8Ext, + [(set (i32 IntRegs:$dst), + (add (mul IntRegs:$src2, u8ExtPred:$src3), + IntRegs:$src1))]>, ImmRegRel; + + def M2_maci : T_MType_acc_rr <"+= mpyi", 0b000, 0b000, 0, + [(set (i32 IntRegs:$dst), + (add (mul IntRegs:$src2, IntRegs:$src3), + IntRegs:$src1))]>, ImmRegRel; +} + +let CextOpcode = "ADD_acc" in { + let isExtentSigned = 1 in + def M2_accii : T_MType_acc_ri <"+= add", 0b100, s8Ext, + [(set (i32 IntRegs:$dst), + (add (add (i32 IntRegs:$src2), s8_16ExtPred:$src3), + (i32 IntRegs:$src1)))]>, ImmRegRel; + + def M2_acci : T_MType_acc_rr <"+= add", 0b000, 0b001, 0, + [(set (i32 IntRegs:$dst), + (add (add (i32 IntRegs:$src2), (i32 IntRegs:$src3)), + (i32 IntRegs:$src1)))]>, ImmRegRel; +} + +let CextOpcode = "SUB_acc" in { + let isExtentSigned = 1 in + def M2_naccii : T_MType_acc_ri <"-= add", 0b101, s8Ext>, ImmRegRel; + + def M2_nacci : T_MType_acc_rr <"-= add", 0b100, 0b001, 0>, ImmRegRel; +} + +let Itinerary = M_tc_3x_SLOT23 in +def M2_macsin : T_MType_acc_ri <"-= mpyi", 0b011, u8Ext>; + +def M2_xor_xacc : T_MType_acc_rr < "^= xor", 0b100, 0b011, 0>; +def M2_subacc : T_MType_acc_rr <"+= sub", 0b000, 0b011, 1>; + +class T_MType_acc_pat1 <InstHexagon MI, SDNode firstOp, SDNode secOp, + PatLeaf ImmPred> + : Pat <(secOp IntRegs:$src1, (firstOp IntRegs:$src2, ImmPred:$src3)), + (MI IntRegs:$src1, IntRegs:$src2, ImmPred:$src3)>; + +class T_MType_acc_pat2 <InstHexagon MI, SDNode firstOp, SDNode secOp> + : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2, IntRegs:$src3))), + (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +def : T_MType_acc_pat2 <M2_xor_xacc, xor, xor>; +def : T_MType_acc_pat1 <M2_macsin, mul, sub, u8ExtPred>; + +def : T_MType_acc_pat1 <M2_naccii, add, sub, s8_16ExtPred>; +def : T_MType_acc_pat2 <M2_nacci, add, sub>; + +//===----------------------------------------------------------------------===// +// Template Class -- XType Vector Instructions +//===----------------------------------------------------------------------===// +class T_XTYPE_Vect < string opc, bits<3> MajOp, bits<3> MinOp, bit isConj > + : MInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Rdd = "#opc#"($Rss, $Rtt"#!if(isConj,"*)",")"), + [] > { + bits<5> Rdd; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b1000; + let Inst{23-21} = MajOp; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rdd; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + } + +class T_XTYPE_Vect_acc < string opc, bits<3> MajOp, bits<3> MinOp, bit isConj > + : MInst <(outs DoubleRegs:$Rdd), + (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Rdd += "#opc#"($Rss, $Rtt"#!if(isConj,"*)",")"), + [], "$dst2 = $Rdd",M_tc_3x_SLOT23 > { + bits<5> Rdd; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b1010; + let Inst{23-21} = MajOp; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rdd; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + } + +class T_XTYPE_Vect_diff < bits<3> MajOp, string opc > + : MInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rtt, DoubleRegs:$Rss), + "$Rdd = "#opc#"($Rtt, $Rss)", + [], "",M_tc_2_SLOT23 > { + bits<5> Rdd; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b1000; + let Inst{23-21} = MajOp; + let Inst{7-5} = 0b000; + let Inst{4-0} = Rdd; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + } + +// Vector reduce add unsigned bytes: Rdd32=vrmpybu(Rss32,Rtt32) +def A2_vraddub: T_XTYPE_Vect <"vraddub", 0b010, 0b001, 0>; +def A2_vraddub_acc: T_XTYPE_Vect_acc <"vraddub", 0b010, 0b001, 0>; + +// Vector sum of absolute differences unsigned bytes: Rdd=vrsadub(Rss,Rtt) +def A2_vrsadub: T_XTYPE_Vect <"vrsadub", 0b010, 0b010, 0>; +def A2_vrsadub_acc: T_XTYPE_Vect_acc <"vrsadub", 0b010, 0b010, 0>; + +// Vector absolute difference: Rdd=vabsdiffh(Rtt,Rss) +def M2_vabsdiffh: T_XTYPE_Vect_diff<0b011, "vabsdiffh">; + +// Vector absolute difference words: Rdd=vabsdiffw(Rtt,Rss) +def M2_vabsdiffw: T_XTYPE_Vect_diff<0b001, "vabsdiffw">; + +// Vector reduce complex multiply real or imaginary: +// Rdd[+]=vrcmpy[ir](Rss,Rtt[*]) +def M2_vrcmpyi_s0: T_XTYPE_Vect <"vrcmpyi", 0b000, 0b000, 0>; +def M2_vrcmpyi_s0c: T_XTYPE_Vect <"vrcmpyi", 0b010, 0b000, 1>; +def M2_vrcmaci_s0: T_XTYPE_Vect_acc <"vrcmpyi", 0b000, 0b000, 0>; +def M2_vrcmaci_s0c: T_XTYPE_Vect_acc <"vrcmpyi", 0b010, 0b000, 1>; + +def M2_vrcmpyr_s0: T_XTYPE_Vect <"vrcmpyr", 0b000, 0b001, 0>; +def M2_vrcmpyr_s0c: T_XTYPE_Vect <"vrcmpyr", 0b011, 0b001, 1>; +def M2_vrcmacr_s0: T_XTYPE_Vect_acc <"vrcmpyr", 0b000, 0b001, 0>; +def M2_vrcmacr_s0c: T_XTYPE_Vect_acc <"vrcmpyr", 0b011, 0b001, 1>; + +// Vector reduce halfwords: +// Rdd[+]=vrmpyh(Rss,Rtt) +def M2_vrmpy_s0: T_XTYPE_Vect <"vrmpyh", 0b000, 0b010, 0>; +def M2_vrmac_s0: T_XTYPE_Vect_acc <"vrmpyh", 0b000, 0b010, 0>; + +//===----------------------------------------------------------------------===// +// Template Class -- Vector Multipy with accumulation. +// Used for complex multiply real or imaginary, dual multiply and even halfwords +//===----------------------------------------------------------------------===// +let Defs = [USR_OVF] in +class T_M2_vmpy_acc_sat < string opc, bits<3> MajOp, bits<3> MinOp, + bit hasShift, bit isRnd > + : MInst <(outs DoubleRegs:$Rxx), + (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Rxx += "#opc#"($Rss, $Rtt)"#!if(hasShift,":<<1","") + #!if(isRnd,":rnd","")#":sat", + [], "$dst2 = $Rxx",M_tc_3x_SLOT23 > { + bits<5> Rxx; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b1010; + let Inst{23-21} = MajOp; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rxx; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + } + +class T_M2_vmpy_acc < string opc, bits<3> MajOp, bits<3> MinOp, + bit hasShift, bit isRnd > + : MInst <(outs DoubleRegs:$Rxx), + (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Rxx += "#opc#"($Rss, $Rtt)"#!if(hasShift,":<<1","") + #!if(isRnd,":rnd",""), + [], "$dst2 = $Rxx",M_tc_3x_SLOT23 > { + bits<5> Rxx; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b1010; + let Inst{23-21} = MajOp; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rxx; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + } + +// Vector multiply word by signed half with accumulation +// Rxx+=vmpyw[eo]h(Rss,Rtt)[:<<1][:rnd]:sat +def M2_mmacls_s1: T_M2_vmpy_acc_sat <"vmpyweh", 0b100, 0b101, 1, 0>; +def M2_mmacls_s0: T_M2_vmpy_acc_sat <"vmpyweh", 0b000, 0b101, 0, 0>; +def M2_mmacls_rs1: T_M2_vmpy_acc_sat <"vmpyweh", 0b101, 0b101, 1, 1>; +def M2_mmacls_rs0: T_M2_vmpy_acc_sat <"vmpyweh", 0b001, 0b101, 0, 1>; + +def M2_mmachs_s1: T_M2_vmpy_acc_sat <"vmpywoh", 0b100, 0b111, 1, 0>; +def M2_mmachs_s0: T_M2_vmpy_acc_sat <"vmpywoh", 0b000, 0b111, 0, 0>; +def M2_mmachs_rs1: T_M2_vmpy_acc_sat <"vmpywoh", 0b101, 0b111, 1, 1>; +def M2_mmachs_rs0: T_M2_vmpy_acc_sat <"vmpywoh", 0b001, 0b111, 0, 1>; + +// Vector multiply word by unsigned half with accumulation +// Rxx+=vmpyw[eo]uh(Rss,Rtt)[:<<1][:rnd]:sat +def M2_mmaculs_s1: T_M2_vmpy_acc_sat <"vmpyweuh", 0b110, 0b101, 1, 0>; +def M2_mmaculs_s0: T_M2_vmpy_acc_sat <"vmpyweuh", 0b010, 0b101, 0, 0>; +def M2_mmaculs_rs1: T_M2_vmpy_acc_sat <"vmpyweuh", 0b111, 0b101, 1, 1>; +def M2_mmaculs_rs0: T_M2_vmpy_acc_sat <"vmpyweuh", 0b011, 0b101, 0, 1>; + +def M2_mmacuhs_s1: T_M2_vmpy_acc_sat <"vmpywouh", 0b110, 0b111, 1, 0>; +def M2_mmacuhs_s0: T_M2_vmpy_acc_sat <"vmpywouh", 0b010, 0b111, 0, 0>; +def M2_mmacuhs_rs1: T_M2_vmpy_acc_sat <"vmpywouh", 0b111, 0b111, 1, 1>; +def M2_mmacuhs_rs0: T_M2_vmpy_acc_sat <"vmpywouh", 0b011, 0b111, 0, 1>; + +// Vector multiply even halfwords with accumulation +// Rxx+=vmpyeh(Rss,Rtt)[:<<1][:sat] +def M2_vmac2es: T_M2_vmpy_acc <"vmpyeh", 0b001, 0b010, 0, 0>; +def M2_vmac2es_s1: T_M2_vmpy_acc_sat <"vmpyeh", 0b100, 0b110, 1, 0>; +def M2_vmac2es_s0: T_M2_vmpy_acc_sat <"vmpyeh", 0b000, 0b110, 0, 0>; + +// Vector dual multiply with accumulation +// Rxx+=vdmpy(Rss,Rtt)[:sat] +def M2_vdmacs_s1: T_M2_vmpy_acc_sat <"vdmpy", 0b100, 0b100, 1, 0>; +def M2_vdmacs_s0: T_M2_vmpy_acc_sat <"vdmpy", 0b000, 0b100, 0, 0>; + +// Vector complex multiply real or imaginary with accumulation +// Rxx+=vcmpy[ir](Rss,Rtt):sat +def M2_vcmac_s0_sat_r: T_M2_vmpy_acc_sat <"vcmpyr", 0b001, 0b100, 0, 0>; +def M2_vcmac_s0_sat_i: T_M2_vmpy_acc_sat <"vcmpyi", 0b010, 0b100, 0, 0>; + +//===----------------------------------------------------------------------===// +// Template Class -- Multiply signed/unsigned halfwords with and without +// saturation and rounding +//===----------------------------------------------------------------------===// +class T_M2_mpyd < bits<2> LHbits, bit isRnd, bit hasShift, bit isUnsigned > + : MInst < (outs DoubleRegs:$Rdd), (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rdd = "#!if(isUnsigned,"mpyu","mpy")#"($Rs."#!if(LHbits{1},"h","l") + #", $Rt."#!if(LHbits{0},"h)","l)") + #!if(hasShift,":<<1","") + #!if(isRnd,":rnd",""), + [] > { + bits<5> Rdd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b0100; + let Inst{23} = hasShift; + let Inst{22} = isUnsigned; + let Inst{21} = isRnd; + let Inst{6-5} = LHbits; + let Inst{4-0} = Rdd; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; +} + +def M2_mpyd_hh_s0: T_M2_mpyd<0b11, 0, 0, 0>; +def M2_mpyd_hl_s0: T_M2_mpyd<0b10, 0, 0, 0>; +def M2_mpyd_lh_s0: T_M2_mpyd<0b01, 0, 0, 0>; +def M2_mpyd_ll_s0: T_M2_mpyd<0b00, 0, 0, 0>; + +def M2_mpyd_hh_s1: T_M2_mpyd<0b11, 0, 1, 0>; +def M2_mpyd_hl_s1: T_M2_mpyd<0b10, 0, 1, 0>; +def M2_mpyd_lh_s1: T_M2_mpyd<0b01, 0, 1, 0>; +def M2_mpyd_ll_s1: T_M2_mpyd<0b00, 0, 1, 0>; + +def M2_mpyd_rnd_hh_s0: T_M2_mpyd<0b11, 1, 0, 0>; +def M2_mpyd_rnd_hl_s0: T_M2_mpyd<0b10, 1, 0, 0>; +def M2_mpyd_rnd_lh_s0: T_M2_mpyd<0b01, 1, 0, 0>; +def M2_mpyd_rnd_ll_s0: T_M2_mpyd<0b00, 1, 0, 0>; + +def M2_mpyd_rnd_hh_s1: T_M2_mpyd<0b11, 1, 1, 0>; +def M2_mpyd_rnd_hl_s1: T_M2_mpyd<0b10, 1, 1, 0>; +def M2_mpyd_rnd_lh_s1: T_M2_mpyd<0b01, 1, 1, 0>; +def M2_mpyd_rnd_ll_s1: T_M2_mpyd<0b00, 1, 1, 0>; + +//Rdd=mpyu(Rs.[HL],Rt.[HL])[:<<1] +def M2_mpyud_hh_s0: T_M2_mpyd<0b11, 0, 0, 1>; +def M2_mpyud_hl_s0: T_M2_mpyd<0b10, 0, 0, 1>; +def M2_mpyud_lh_s0: T_M2_mpyd<0b01, 0, 0, 1>; +def M2_mpyud_ll_s0: T_M2_mpyd<0b00, 0, 0, 1>; + +def M2_mpyud_hh_s1: T_M2_mpyd<0b11, 0, 1, 1>; +def M2_mpyud_hl_s1: T_M2_mpyd<0b10, 0, 1, 1>; +def M2_mpyud_lh_s1: T_M2_mpyd<0b01, 0, 1, 1>; +def M2_mpyud_ll_s1: T_M2_mpyd<0b00, 0, 1, 1>; + +//===----------------------------------------------------------------------===// +// Template Class for xtype mpy: +// Vector multiply +// Complex multiply +// multiply 32X32 and use full result +//===----------------------------------------------------------------------===// +let hasSideEffects = 0 in +class T_XTYPE_mpy64 <string mnemonic, bits<3> MajOp, bits<3> MinOp, + bit isSat, bit hasShift, bit isConj> + : MInst <(outs DoubleRegs:$Rdd), + (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rdd = "#mnemonic#"($Rs, $Rt"#!if(isConj,"*)",")") + #!if(hasShift,":<<1","") + #!if(isSat,":sat",""), + [] > { + bits<5> Rdd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b0101; + let Inst{23-21} = MajOp; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rdd; + } + +//===----------------------------------------------------------------------===// +// Template Class for xtype mpy with accumulation into 64-bit: +// Vector multiply +// Complex multiply +// multiply 32X32 and use full result +//===----------------------------------------------------------------------===// +class T_XTYPE_mpy64_acc <string op1, string op2, bits<3> MajOp, bits<3> MinOp, + bit isSat, bit hasShift, bit isConj> + : MInst <(outs DoubleRegs:$Rxx), + (ins DoubleRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt), + "$Rxx "#op2#"= "#op1#"($Rs, $Rt"#!if(isConj,"*)",")") + #!if(hasShift,":<<1","") + #!if(isSat,":sat",""), + + [] , "$dst2 = $Rxx" > { + bits<5> Rxx; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b0111; + let Inst{23-21} = MajOp; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rxx; + } + +// MPY - Multiply and use full result +// Rdd = mpy[u](Rs,Rt) +def M2_dpmpyss_s0 : T_XTYPE_mpy64 < "mpy", 0b000, 0b000, 0, 0, 0>; +def M2_dpmpyuu_s0 : T_XTYPE_mpy64 < "mpyu", 0b010, 0b000, 0, 0, 0>; + +// Rxx[+-]= mpy[u](Rs,Rt) +def M2_dpmpyss_acc_s0 : T_XTYPE_mpy64_acc < "mpy", "+", 0b000, 0b000, 0, 0, 0>; +def M2_dpmpyss_nac_s0 : T_XTYPE_mpy64_acc < "mpy", "-", 0b001, 0b000, 0, 0, 0>; +def M2_dpmpyuu_acc_s0 : T_XTYPE_mpy64_acc < "mpyu", "+", 0b010, 0b000, 0, 0, 0>; +def M2_dpmpyuu_nac_s0 : T_XTYPE_mpy64_acc < "mpyu", "-", 0b011, 0b000, 0, 0, 0>; + +// Complex multiply real or imaginary +// Rxx=cmpy[ir](Rs,Rt) +def M2_cmpyi_s0 : T_XTYPE_mpy64 < "cmpyi", 0b000, 0b001, 0, 0, 0>; +def M2_cmpyr_s0 : T_XTYPE_mpy64 < "cmpyr", 0b000, 0b010, 0, 0, 0>; + +// Rxx+=cmpy[ir](Rs,Rt) +def M2_cmaci_s0 : T_XTYPE_mpy64_acc < "cmpyi", "+", 0b000, 0b001, 0, 0, 0>; +def M2_cmacr_s0 : T_XTYPE_mpy64_acc < "cmpyr", "+", 0b000, 0b010, 0, 0, 0>; + +// Complex multiply +// Rdd=cmpy(Rs,Rt)[:<<]:sat +def M2_cmpys_s0 : T_XTYPE_mpy64 < "cmpy", 0b000, 0b110, 1, 0, 0>; +def M2_cmpys_s1 : T_XTYPE_mpy64 < "cmpy", 0b100, 0b110, 1, 1, 0>; + +// Rdd=cmpy(Rs,Rt*)[:<<]:sat +def M2_cmpysc_s0 : T_XTYPE_mpy64 < "cmpy", 0b010, 0b110, 1, 0, 1>; +def M2_cmpysc_s1 : T_XTYPE_mpy64 < "cmpy", 0b110, 0b110, 1, 1, 1>; + +// Rxx[-+]=cmpy(Rs,Rt)[:<<1]:sat +def M2_cmacs_s0 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b000, 0b110, 1, 0, 0>; +def M2_cnacs_s0 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b000, 0b111, 1, 0, 0>; +def M2_cmacs_s1 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b100, 0b110, 1, 1, 0>; +def M2_cnacs_s1 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b100, 0b111, 1, 1, 0>; + +// Rxx[-+]=cmpy(Rs,Rt*)[:<<1]:sat +def M2_cmacsc_s0 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b010, 0b110, 1, 0, 1>; +def M2_cnacsc_s0 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b010, 0b111, 1, 0, 1>; +def M2_cmacsc_s1 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b110, 0b110, 1, 1, 1>; +def M2_cnacsc_s1 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b110, 0b111, 1, 1, 1>; + +// Vector multiply halfwords +// Rdd=vmpyh(Rs,Rt)[:<<]:sat +//let Defs = [USR_OVF] in { + def M2_vmpy2s_s1 : T_XTYPE_mpy64 < "vmpyh", 0b100, 0b101, 1, 1, 0>; + def M2_vmpy2s_s0 : T_XTYPE_mpy64 < "vmpyh", 0b000, 0b101, 1, 0, 0>; +//} + +// Rxx+=vmpyh(Rs,Rt)[:<<1][:sat] +def M2_vmac2 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b001, 0b001, 0, 0, 0>; +def M2_vmac2s_s1 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b100, 0b101, 1, 1, 0>; +def M2_vmac2s_s0 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b000, 0b101, 1, 0, 0>; + +def: Pat<(i64 (mul (i64 (anyext (i32 IntRegs:$src1))), + (i64 (anyext (i32 IntRegs:$src2))))), + (M2_dpmpyuu_s0 IntRegs:$src1, IntRegs:$src2)>; + +def: Pat<(i64 (mul (i64 (sext (i32 IntRegs:$src1))), + (i64 (sext (i32 IntRegs:$src2))))), + (M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2)>; + +def: Pat<(i64 (mul (is_sext_i32:$src1), + (is_sext_i32:$src2))), + (M2_dpmpyss_s0 (LoReg DoubleRegs:$src1), (LoReg DoubleRegs:$src2))>; // Multiply and accumulate, use full result. // Rxx[+-]=mpy(Rs,Rt) -// Rxx+=mpy(Rs,Rt) -def MPY64_acc : MInst_acc<(outs DoubleRegs:$dst), - (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "$dst += mpy($src2, $src3)", - [(set (i64 DoubleRegs:$dst), - (add (mul (i64 (sext (i32 IntRegs:$src2))), - (i64 (sext (i32 IntRegs:$src3)))), - (i64 DoubleRegs:$src1)))], - "$src1 = $dst">; - -// Rxx-=mpy(Rs,Rt) -def MPY64_sub : MInst_acc<(outs DoubleRegs:$dst), - (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "$dst -= mpy($src2, $src3)", - [(set (i64 DoubleRegs:$dst), - (sub (i64 DoubleRegs:$src1), - (mul (i64 (sext (i32 IntRegs:$src2))), - (i64 (sext (i32 IntRegs:$src3))))))], - "$src1 = $dst">; - -// Rxx[+-]=mpyu(Rs,Rt) -// Rxx+=mpyu(Rs,Rt) -def MPYU64_acc : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, - IntRegs:$src2, IntRegs:$src3), - "$dst += mpyu($src2, $src3)", - [(set (i64 DoubleRegs:$dst), - (add (mul (i64 (anyext (i32 IntRegs:$src2))), - (i64 (anyext (i32 IntRegs:$src3)))), - (i64 DoubleRegs:$src1)))], "$src1 = $dst">; - -// Rxx-=mpyu(Rs,Rt) -def MPYU64_sub : MInst_acc<(outs DoubleRegs:$dst), - (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "$dst -= mpyu($src2, $src3)", - [(set (i64 DoubleRegs:$dst), - (sub (i64 DoubleRegs:$src1), - (mul (i64 (anyext (i32 IntRegs:$src2))), - (i64 (anyext (i32 IntRegs:$src3))))))], - "$src1 = $dst">; - - -let InputType = "reg", CextOpcode = "ADD_acc" in -def ADDrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, - IntRegs:$src2, IntRegs:$src3), - "$dst += add($src2, $src3)", - [(set (i32 IntRegs:$dst), (add (add (i32 IntRegs:$src2), - (i32 IntRegs:$src3)), - (i32 IntRegs:$src1)))], - "$src1 = $dst">, ImmRegRel; - -let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 8, -InputType = "imm", CextOpcode = "ADD_acc" in -def ADDri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, - IntRegs:$src2, s8Ext:$src3), - "$dst += add($src2, #$src3)", - [(set (i32 IntRegs:$dst), (add (add (i32 IntRegs:$src2), - s8_16ExtPred:$src3), - (i32 IntRegs:$src1)))], - "$src1 = $dst">, ImmRegRel; - -let CextOpcode = "SUB_acc", InputType = "reg" in -def SUBrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, - IntRegs:$src2, IntRegs:$src3), - "$dst -= add($src2, $src3)", - [(set (i32 IntRegs:$dst), - (sub (i32 IntRegs:$src1), (add (i32 IntRegs:$src2), - (i32 IntRegs:$src3))))], - "$src1 = $dst">, ImmRegRel; - -let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 8, -CextOpcode = "SUB_acc", InputType = "imm" in -def SUBri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, - IntRegs:$src2, s8Ext:$src3), - "$dst -= add($src2, #$src3)", - [(set (i32 IntRegs:$dst), (sub (i32 IntRegs:$src1), - (add (i32 IntRegs:$src2), - s8_16ExtPred:$src3)))], - "$src1 = $dst">, ImmRegRel; + +def: Pat<(i64 (add (i64 DoubleRegs:$src1), + (mul (i64 (sext (i32 IntRegs:$src2))), + (i64 (sext (i32 IntRegs:$src3)))))), + (M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +def: Pat<(i64 (sub (i64 DoubleRegs:$src1), + (mul (i64 (sext (i32 IntRegs:$src2))), + (i64 (sext (i32 IntRegs:$src3)))))), + (M2_dpmpyss_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +def: Pat<(i64 (add (i64 DoubleRegs:$src1), + (mul (i64 (anyext (i32 IntRegs:$src2))), + (i64 (anyext (i32 IntRegs:$src3)))))), + (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +def: Pat<(i64 (add (i64 DoubleRegs:$src1), + (mul (i64 (zext (i32 IntRegs:$src2))), + (i64 (zext (i32 IntRegs:$src3)))))), + (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +def: Pat<(i64 (sub (i64 DoubleRegs:$src1), + (mul (i64 (anyext (i32 IntRegs:$src2))), + (i64 (anyext (i32 IntRegs:$src3)))))), + (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +def: Pat<(i64 (sub (i64 DoubleRegs:$src1), + (mul (i64 (zext (i32 IntRegs:$src2))), + (i64 (zext (i32 IntRegs:$src3)))))), + (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; //===----------------------------------------------------------------------===// // MTYPE/MPYH - @@ -1464,321 +3250,1134 @@ def SUBri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, //===----------------------------------------------------------------------===// /// // Store doubleword. - //===----------------------------------------------------------------------===// -// Post increment store +// Template class for non-predicated post increment stores with immediate offset //===----------------------------------------------------------------------===// +let isPredicable = 1, hasSideEffects = 0, addrMode = PostInc in +class T_store_pi <string mnemonic, RegisterClass RC, Operand ImmOp, + bits<4> MajOp, bit isHalf > + : STInst <(outs IntRegs:$_dst_), + (ins IntRegs:$src1, ImmOp:$offset, RC:$src2), + mnemonic#"($src1++#$offset) = $src2"#!if(isHalf, ".h", ""), + [], "$src1 = $_dst_" >, + AddrModeRel { + bits<5> src1; + bits<5> src2; + bits<7> offset; + bits<4> offsetBits; + + string ImmOpStr = !cast<string>(ImmOp); + let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3}, + !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2}, + !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1}, + /* s4_0Imm */ offset{3-0}))); + let isNVStorable = !if (!eq(ImmOpStr, "s4_3Imm"), 0, 1); + + let IClass = 0b1010; + + let Inst{27-25} = 0b101; + let Inst{24-21} = MajOp; + let Inst{20-16} = src1; + let Inst{13} = 0b0; + let Inst{12-8} = src2; + let Inst{7} = 0b0; + let Inst{6-3} = offsetBits; + let Inst{1} = 0b0; + } -multiclass ST_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp, - bit isNot, bit isPredNew> { - let isPredicatedNew = isPredNew in - def NAME : STInst2PI<(outs IntRegs:$dst), +//===----------------------------------------------------------------------===// +// Template class for predicated post increment stores with immediate offset +//===----------------------------------------------------------------------===// +let isPredicated = 1, hasSideEffects = 0, addrMode = PostInc in +class T_pstore_pi <string mnemonic, RegisterClass RC, Operand ImmOp, + bits<4> MajOp, bit isHalf, bit isPredNot, bit isPredNew > + : STInst <(outs IntRegs:$_dst_), (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3), - !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", - ") ")#mnemonic#"($src2++#$offset) = $src3", - [], - "$src2 = $dst">; -} + !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"($src2++#$offset) = $src3"#!if(isHalf, ".h", ""), + [], "$src2 = $_dst_" >, + AddrModeRel { + bits<2> src1; + bits<5> src2; + bits<7> offset; + bits<5> src3; + bits<4> offsetBits; -multiclass ST_PostInc_Pred<string mnemonic, RegisterClass RC, - Operand ImmOp, bit PredNot> { - let isPredicatedFalse = PredNot in { - defm _c#NAME : ST_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 0>; - // Predicate new - let Predicates = [HasV4T], validSubTargets = HasV4SubT in - defm _cdn#NAME#_V4 : ST_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 1>; + string ImmOpStr = !cast<string>(ImmOp); + let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3}, + !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2}, + !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1}, + /* s4_0Imm */ offset{3-0}))); + + let isNVStorable = !if (!eq(ImmOpStr, "s4_3Imm"), 0, 1); + let isPredicatedNew = isPredNew; + let isPredicatedFalse = isPredNot; + + let IClass = 0b1010; + + let Inst{27-25} = 0b101; + let Inst{24-21} = MajOp; + let Inst{20-16} = src2; + let Inst{13} = 0b1; + let Inst{12-8} = src3; + let Inst{7} = isPredNew; + let Inst{6-3} = offsetBits; + let Inst{2} = isPredNot; + let Inst{1-0} = src1; } -} -let hasCtrlDep = 1, isNVStorable = 1, neverHasSideEffects = 1 in multiclass ST_PostInc<string mnemonic, string BaseOp, RegisterClass RC, - Operand ImmOp> { + Operand ImmOp, bits<4> MajOp, bit isHalf = 0 > { - let hasCtrlDep = 1, BaseOpcode = "POST_"#BaseOp in { - let isPredicable = 1 in - def NAME : STInst2PI<(outs IntRegs:$dst), - (ins IntRegs:$src1, ImmOp:$offset, RC:$src2), - mnemonic#"($src1++#$offset) = $src2", - [], - "$src1 = $dst">; - - let isPredicated = 1 in { - defm Pt : ST_PostInc_Pred<mnemonic, RC, ImmOp, 0 >; - defm NotPt : ST_PostInc_Pred<mnemonic, RC, ImmOp, 1 >; - } + let BaseOpcode = "POST_"#BaseOp in { + def S2_#NAME#_pi : T_store_pi <mnemonic, RC, ImmOp, MajOp, isHalf>; + + // Predicated + def S2_p#NAME#t_pi : T_pstore_pi <mnemonic, RC, ImmOp, MajOp, isHalf, 0, 0>; + def S2_p#NAME#f_pi : T_pstore_pi <mnemonic, RC, ImmOp, MajOp, isHalf, 1, 0>; + + // Predicated new + def S2_p#NAME#tnew_pi : T_pstore_pi <mnemonic, RC, ImmOp, MajOp, + isHalf, 0, 1>; + def S2_p#NAME#fnew_pi : T_pstore_pi <mnemonic, RC, ImmOp, MajOp, + isHalf, 1, 1>; } } -defm POST_STbri: ST_PostInc <"memb", "STrib", IntRegs, s4_0Imm>, AddrModeRel; -defm POST_SThri: ST_PostInc <"memh", "STrih", IntRegs, s4_1Imm>, AddrModeRel; -defm POST_STwri: ST_PostInc <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel; +let accessSize = ByteAccess in +defm storerb: ST_PostInc <"memb", "STrib", IntRegs, s4_0Imm, 0b1000>; -let isNVStorable = 0 in -defm POST_STdri: ST_PostInc <"memd", "STrid", DoubleRegs, s4_3Imm>, AddrModeRel; +let accessSize = HalfWordAccess in +defm storerh: ST_PostInc <"memh", "STrih", IntRegs, s4_1Imm, 0b1010>; -def : Pat<(post_truncsti8 (i32 IntRegs:$src1), IntRegs:$src2, - s4_3ImmPred:$offset), - (POST_STbri IntRegs:$src2, s4_0ImmPred:$offset, IntRegs:$src1)>; +let accessSize = WordAccess in +defm storeri: ST_PostInc <"memw", "STriw", IntRegs, s4_2Imm, 0b1100>; -def : Pat<(post_truncsti16 (i32 IntRegs:$src1), IntRegs:$src2, - s4_3ImmPred:$offset), - (POST_SThri IntRegs:$src2, s4_1ImmPred:$offset, IntRegs:$src1)>; +let accessSize = DoubleWordAccess in +defm storerd: ST_PostInc <"memd", "STrid", DoubleRegs, s4_3Imm, 0b1110>; -def : Pat<(post_store (i32 IntRegs:$src1), IntRegs:$src2, s4_2ImmPred:$offset), - (POST_STwri IntRegs:$src2, s4_1ImmPred:$offset, IntRegs:$src1)>; +let accessSize = HalfWordAccess, isNVStorable = 0 in +defm storerf: ST_PostInc <"memh", "STrih_H", IntRegs, s4_1Imm, 0b1011, 1>; -def : Pat<(post_store (i64 DoubleRegs:$src1), IntRegs:$src2, - s4_3ImmPred:$offset), - (POST_STdri IntRegs:$src2, s4_3ImmPred:$offset, DoubleRegs:$src1)>; +class Storepi_pat<PatFrag Store, PatFrag Value, PatFrag Offset, + InstHexagon MI> + : Pat<(Store Value:$src1, I32:$src2, Offset:$offset), + (MI I32:$src2, imm:$offset, Value:$src1)>; + +def: Storepi_pat<post_truncsti8, I32, s4_0ImmPred, S2_storerb_pi>; +def: Storepi_pat<post_truncsti16, I32, s4_1ImmPred, S2_storerh_pi>; +def: Storepi_pat<post_store, I32, s4_2ImmPred, S2_storeri_pi>; +def: Storepi_pat<post_store, I64, s4_3ImmPred, S2_storerd_pi>; //===----------------------------------------------------------------------===// -// multiclass for the store instructions with MEMri operand. +// Template class for post increment stores with register offset. //===----------------------------------------------------------------------===// -multiclass ST_MEMri_Pbase<string mnemonic, RegisterClass RC, bit isNot, - bit isPredNew> { - let isPredicatedNew = isPredNew in - def NAME : STInst2<(outs), - (ins PredRegs:$src1, MEMri:$addr, RC: $src2), - !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", - ") ")#mnemonic#"($addr) = $src2", - []>; -} +let isNVStorable = 1 in +class T_store_pr <string mnemonic, RegisterClass RC, bits<3> MajOp, + MemAccessSize AccessSz, bit isHalf = 0> + : STInst <(outs IntRegs:$_dst_), + (ins IntRegs:$src1, ModRegs:$src2, RC:$src3), + mnemonic#"($src1++$src2) = $src3"#!if(isHalf, ".h", ""), + [], "$src1 = $_dst_" > { + bits<5> src1; + bits<1> src2; + bits<5> src3; + let accessSize = AccessSz; + + let IClass = 0b1010; -multiclass ST_MEMri_Pred<string mnemonic, RegisterClass RC, bit PredNot> { - let isPredicatedFalse = PredNot in { - defm _c#NAME : ST_MEMri_Pbase<mnemonic, RC, PredNot, 0>; + let Inst{27-24} = 0b1101; + let Inst{23-21} = MajOp; + let Inst{20-16} = src1; + let Inst{13} = src2; + let Inst{12-8} = src3; + let Inst{7} = 0b0; + } - // Predicate new - let validSubTargets = HasV4SubT, Predicates = [HasV4T] in - defm _cdn#NAME#_V4 : ST_MEMri_Pbase<mnemonic, RC, PredNot, 1>; +def S2_storerb_pr : T_store_pr<"memb", IntRegs, 0b000, ByteAccess>; +def S2_storerh_pr : T_store_pr<"memh", IntRegs, 0b010, HalfWordAccess>; +def S2_storeri_pr : T_store_pr<"memw", IntRegs, 0b100, WordAccess>; +def S2_storerd_pr : T_store_pr<"memd", DoubleRegs, 0b110, DoubleWordAccess>; + +def S2_storerf_pr : T_store_pr<"memh", IntRegs, 0b011, HalfWordAccess, 1>; + +let opExtendable = 1, isExtentSigned = 1, isPredicable = 1 in +class T_store_io <string mnemonic, RegisterClass RC, Operand ImmOp, + bits<3>MajOp, bit isH = 0> + : STInst <(outs), + (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), + mnemonic#"($src1+#$src2) = $src3"#!if(isH,".h","")>, + AddrModeRel, ImmRegRel { + bits<5> src1; + bits<14> src2; // Actual address offset + bits<5> src3; + bits<11> offsetBits; // Represents offset encoding + + string ImmOpStr = !cast<string>(ImmOp); + + let opExtentBits = !if (!eq(ImmOpStr, "s11_3Ext"), 14, + !if (!eq(ImmOpStr, "s11_2Ext"), 13, + !if (!eq(ImmOpStr, "s11_1Ext"), 12, + /* s11_0Ext */ 11))); + let offsetBits = !if (!eq(ImmOpStr, "s11_3Ext"), src2{13-3}, + !if (!eq(ImmOpStr, "s11_2Ext"), src2{12-2}, + !if (!eq(ImmOpStr, "s11_1Ext"), src2{11-1}, + /* s11_0Ext */ src2{10-0}))); + let IClass = 0b1010; + + let Inst{27} = 0b0; + let Inst{26-25} = offsetBits{10-9}; + let Inst{24} = 0b1; + let Inst{23-21} = MajOp; + let Inst{20-16} = src1; + let Inst{13} = offsetBits{8}; + let Inst{12-8} = src3; + let Inst{7-0} = offsetBits{7-0}; } -} -let isExtendable = 1, isNVStorable = 1, neverHasSideEffects = 1 in -multiclass ST_MEMri<string mnemonic, string CextOp, RegisterClass RC, - bits<5> ImmBits, bits<5> PredImmBits> { +let opExtendable = 2, isPredicated = 1 in +class T_pstore_io <string mnemonic, RegisterClass RC, Operand ImmOp, + bits<3>MajOp, bit PredNot, bit isPredNew, bit isH = 0> + : STInst <(outs), + (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4), + !if(PredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"($src2+#$src3) = $src4"#!if(isH,".h",""), + [],"",V2LDST_tc_st_SLOT01 >, + AddrModeRel, ImmRegRel { + bits<2> src1; + bits<5> src2; + bits<9> src3; // Actual address offset + bits<5> src4; + bits<6> offsetBits; // Represents offset encoding + + let isPredicatedNew = isPredNew; + let isPredicatedFalse = PredNot; - let CextOpcode = CextOp, BaseOpcode = CextOp in { - let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits, - isPredicable = 1 in - def NAME : STInst2<(outs), - (ins MEMri:$addr, RC:$src), - mnemonic#"($addr) = $src", - []>; - - let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits, - isPredicated = 1 in { - defm Pt : ST_MEMri_Pred<mnemonic, RC, 0>; - defm NotPt : ST_MEMri_Pred<mnemonic, RC, 1>; - } + string ImmOpStr = !cast<string>(ImmOp); + let opExtentBits = !if (!eq(ImmOpStr, "u6_3Ext"), 9, + !if (!eq(ImmOpStr, "u6_2Ext"), 8, + !if (!eq(ImmOpStr, "u6_1Ext"), 7, + /* u6_0Ext */ 6))); + let offsetBits = !if (!eq(ImmOpStr, "u6_3Ext"), src3{8-3}, + !if (!eq(ImmOpStr, "u6_2Ext"), src3{7-2}, + !if (!eq(ImmOpStr, "u6_1Ext"), src3{6-1}, + /* u6_0Ext */ src3{5-0}))); + let IClass = 0b0100; + + let Inst{27} = 0b0; + let Inst{26} = PredNot; + let Inst{25} = isPredNew; + let Inst{24} = 0b0; + let Inst{23-21} = MajOp; + let Inst{20-16} = src2; + let Inst{13} = offsetBits{5}; + let Inst{12-8} = src4; + let Inst{7-3} = offsetBits{4-0}; + let Inst{1-0} = src1; + } + +let isExtendable = 1, isNVStorable = 1, hasSideEffects = 0 in +multiclass ST_Idxd<string mnemonic, string CextOp, RegisterClass RC, + Operand ImmOp, Operand predImmOp, bits<3> MajOp, bit isH = 0> { + let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in { + def S2_#NAME#_io : T_store_io <mnemonic, RC, ImmOp, MajOp, isH>; + + // Predicated + def S2_p#NAME#t_io : T_pstore_io<mnemonic, RC, predImmOp, MajOp, 0, 0, isH>; + def S2_p#NAME#f_io : T_pstore_io<mnemonic, RC, predImmOp, MajOp, 1, 0, isH>; + + // Predicated new + def S4_p#NAME#tnew_io : T_pstore_io <mnemonic, RC, predImmOp, + MajOp, 0, 1, isH>; + def S4_p#NAME#fnew_io : T_pstore_io <mnemonic, RC, predImmOp, + MajOp, 1, 1, isH>; } } -let addrMode = BaseImmOffset, isMEMri = "true" in { +let addrMode = BaseImmOffset, InputType = "imm" in { let accessSize = ByteAccess in - defm STrib: ST_MEMri < "memb", "STrib", IntRegs, 11, 6>, AddrModeRel; + defm storerb: ST_Idxd < "memb", "STrib", IntRegs, s11_0Ext, u6_0Ext, 0b000>; + + let accessSize = HalfWordAccess, opExtentAlign = 1 in + defm storerh: ST_Idxd < "memh", "STrih", IntRegs, s11_1Ext, u6_1Ext, 0b010>; + + let accessSize = WordAccess, opExtentAlign = 2 in + defm storeri: ST_Idxd < "memw", "STriw", IntRegs, s11_2Ext, u6_2Ext, 0b100>; - let accessSize = HalfWordAccess in - defm STrih: ST_MEMri < "memh", "STrih", IntRegs, 12, 7>, AddrModeRel; + let accessSize = DoubleWordAccess, isNVStorable = 0, opExtentAlign = 3 in + defm storerd: ST_Idxd < "memd", "STrid", DoubleRegs, s11_3Ext, + u6_3Ext, 0b110>; - let accessSize = WordAccess in - defm STriw: ST_MEMri < "memw", "STriw", IntRegs, 13, 8>, AddrModeRel; + let accessSize = HalfWordAccess, opExtentAlign = 1 in + defm storerf: ST_Idxd < "memh", "STrif", IntRegs, s11_1Ext, + u6_1Ext, 0b011, 1>; +} - let accessSize = DoubleWordAccess, isNVStorable = 0 in - defm STrid: ST_MEMri < "memd", "STrid", DoubleRegs, 14, 9>, AddrModeRel; +// Patterns for generating stores, where the address takes different forms: +// - frameindex,, +// - base + offset, +// - simple (base address without offset). +// These would usually be used together (via Storex_pat defined below), but +// in some cases one may want to apply different properties (such as +// AddedComplexity) to the individual patterns. +class Storex_fi_pat<PatFrag Store, PatFrag Value, InstHexagon MI> + : Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, Value:$Rs)>; +class Storex_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, + InstHexagon MI> + : Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)), + (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>; +class Storex_simple_pat<PatFrag Store, PatFrag Value, InstHexagon MI> + : Pat<(Store Value:$Rt, (i32 IntRegs:$Rs)), + (MI IntRegs:$Rs, 0, Value:$Rt)>; + +// Patterns for generating stores, where the address takes different forms, +// and where the value being stored is transformed through the value modifier +// ValueMod. The address forms are same as above. +class Storexm_fi_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod, + InstHexagon MI> + : Pat<(Store Value:$Rs, AddrFI:$fi), + (MI AddrFI:$fi, 0, (ValueMod Value:$Rs))>; +class Storexm_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, + PatFrag ValueMod, InstHexagon MI> + : Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)), + (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>; +class Storexm_simple_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod, + InstHexagon MI> + : Pat<(Store Value:$Rt, (i32 IntRegs:$Rs)), + (MI IntRegs:$Rs, 0, (ValueMod Value:$Rt))>; + +multiclass Storex_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred, + InstHexagon MI> { + def: Storex_fi_pat <Store, Value, MI>; + def: Storex_add_pat <Store, Value, ImmPred, MI>; } -def : Pat<(truncstorei8 (i32 IntRegs:$src1), ADDRriS11_0:$addr), - (STrib ADDRriS11_0:$addr, (i32 IntRegs:$src1))>; +multiclass Storexm_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred, + PatFrag ValueMod, InstHexagon MI> { + def: Storexm_fi_pat <Store, Value, ValueMod, MI>; + def: Storexm_add_pat <Store, Value, ImmPred, ValueMod, MI>; +} -def : Pat<(truncstorei16 (i32 IntRegs:$src1), ADDRriS11_1:$addr), - (STrih ADDRriS11_1:$addr, (i32 IntRegs:$src1))>; +// Regular stores in the DAG have two operands: value and address. +// Atomic stores also have two, but they are reversed: address, value. +// To use atomic stores with the patterns, they need to have their operands +// swapped. This relies on the knowledge that the F.Fragment uses names +// "ptr" and "val". +class SwapSt<PatFrag F> + : PatFrag<(ops node:$val, node:$ptr), F.Fragment>; -def : Pat<(store (i32 IntRegs:$src1), ADDRriS11_2:$addr), - (STriw ADDRriS11_2:$addr, (i32 IntRegs:$src1))>; +let AddedComplexity = 20 in { + defm: Storex_pat<truncstorei8, I32, s11_0ExtPred, S2_storerb_io>; + defm: Storex_pat<truncstorei16, I32, s11_1ExtPred, S2_storerh_io>; + defm: Storex_pat<store, I32, s11_2ExtPred, S2_storeri_io>; + defm: Storex_pat<store, I64, s11_3ExtPred, S2_storerd_io>; + + defm: Storex_pat<SwapSt<atomic_store_8>, I32, s11_0ExtPred, S2_storerb_io>; + defm: Storex_pat<SwapSt<atomic_store_16>, I32, s11_1ExtPred, S2_storerh_io>; + defm: Storex_pat<SwapSt<atomic_store_32>, I32, s11_2ExtPred, S2_storeri_io>; + defm: Storex_pat<SwapSt<atomic_store_64>, I64, s11_3ExtPred, S2_storerd_io>; +} -def : Pat<(store (i64 DoubleRegs:$src1), ADDRriS11_3:$addr), - (STrid ADDRriS11_3:$addr, (i64 DoubleRegs:$src1))>; +// Simple patterns should be tried with the least priority. +def: Storex_simple_pat<truncstorei8, I32, S2_storerb_io>; +def: Storex_simple_pat<truncstorei16, I32, S2_storerh_io>; +def: Storex_simple_pat<store, I32, S2_storeri_io>; +def: Storex_simple_pat<store, I64, S2_storerd_io>; +def: Storex_simple_pat<SwapSt<atomic_store_8>, I32, S2_storerb_io>; +def: Storex_simple_pat<SwapSt<atomic_store_16>, I32, S2_storerh_io>; +def: Storex_simple_pat<SwapSt<atomic_store_32>, I32, S2_storeri_io>; +def: Storex_simple_pat<SwapSt<atomic_store_64>, I64, S2_storerd_io>; + +let AddedComplexity = 20 in { + defm: Storexm_pat<truncstorei8, I64, s11_0ExtPred, LoReg, S2_storerb_io>; + defm: Storexm_pat<truncstorei16, I64, s11_1ExtPred, LoReg, S2_storerh_io>; + defm: Storexm_pat<truncstorei32, I64, s11_2ExtPred, LoReg, S2_storeri_io>; +} + +def: Storexm_simple_pat<truncstorei8, I64, LoReg, S2_storerb_io>; +def: Storexm_simple_pat<truncstorei16, I64, LoReg, S2_storerh_io>; +def: Storexm_simple_pat<truncstorei32, I64, LoReg, S2_storeri_io>; + +// Store predicate. +let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13, + isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in +def STriw_pred : STInst<(outs), + (ins IntRegs:$addr, s11_2Ext:$off, PredRegs:$src1), + ".error \"should not emit\"", []>; + +// S2_allocframe: Allocate stack frame. +let Defs = [R29, R30], Uses = [R29, R31, R30], + hasSideEffects = 0, accessSize = DoubleWordAccess in +def S2_allocframe: ST0Inst < + (outs), (ins u11_3Imm:$u11_3), + "allocframe(#$u11_3)" > { + bits<14> u11_3; + + let IClass = 0b1010; + let Inst{27-16} = 0b000010011101; + let Inst{13-11} = 0b000; + let Inst{10-0} = u11_3{13-3}; + } + +// S2_storer[bhwdf]_pci: Store byte/half/word/double. +// S2_storer[bhwdf]_pci -> S2_storerbnew_pci +let Uses = [CS], isNVStorable = 1 in +class T_store_pci <string mnemonic, RegisterClass RC, + Operand Imm, bits<4>MajOp, + MemAccessSize AlignSize, string RegSrc = "Rt"> + : STInst <(outs IntRegs:$_dst_), + (ins IntRegs:$Rz, Imm:$offset, ModRegs:$Mu, RC:$Rt), + #mnemonic#"($Rz ++ #$offset:circ($Mu)) = $"#RegSrc#"", + [] , + "$Rz = $_dst_" > { + bits<5> Rz; + bits<7> offset; + bits<1> Mu; + bits<5> Rt; + let accessSize = AlignSize; + + let IClass = 0b1010; + let Inst{27-25} = 0b100; + let Inst{24-21} = MajOp; + let Inst{20-16} = Rz; + let Inst{13} = Mu; + let Inst{12-8} = Rt; + let Inst{7} = 0b0; + let Inst{6-3} = + !if (!eq(!cast<string>(AlignSize), "DoubleWordAccess"), offset{6-3}, + !if (!eq(!cast<string>(AlignSize), "WordAccess"), offset{5-2}, + !if (!eq(!cast<string>(AlignSize), "HalfWordAccess"), offset{4-1}, + /* ByteAccess */ offset{3-0}))); + let Inst{1} = 0b0; + } + +def S2_storerb_pci : T_store_pci<"memb", IntRegs, s4_0Imm, 0b1000, + ByteAccess>; +def S2_storerh_pci : T_store_pci<"memh", IntRegs, s4_1Imm, 0b1010, + HalfWordAccess>; +def S2_storerf_pci : T_store_pci<"memh", IntRegs, s4_1Imm, 0b1011, + HalfWordAccess, "Rt.h">; +def S2_storeri_pci : T_store_pci<"memw", IntRegs, s4_2Imm, 0b1100, + WordAccess>; +def S2_storerd_pci : T_store_pci<"memd", DoubleRegs, s4_3Imm, 0b1110, + DoubleWordAccess>; + +let Uses = [CS], isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 4 in +class T_storenew_pci <string mnemonic, Operand Imm, + bits<2>MajOp, MemAccessSize AlignSize> + : NVInst < (outs IntRegs:$_dst_), + (ins IntRegs:$Rz, Imm:$offset, ModRegs:$Mu, IntRegs:$Nt), + #mnemonic#"($Rz ++ #$offset:circ($Mu)) = $Nt.new", + [], + "$Rz = $_dst_"> { + bits<5> Rz; + bits<6> offset; + bits<1> Mu; + bits<3> Nt; + + let accessSize = AlignSize; + + let IClass = 0b1010; + let Inst{27-21} = 0b1001101; + let Inst{20-16} = Rz; + let Inst{13} = Mu; + let Inst{12-11} = MajOp; + let Inst{10-8} = Nt; + let Inst{7} = 0b0; + let Inst{6-3} = + !if (!eq(!cast<string>(AlignSize), "WordAccess"), offset{5-2}, + !if (!eq(!cast<string>(AlignSize), "HalfWordAccess"), offset{4-1}, + /* ByteAccess */ offset{3-0})); + let Inst{1} = 0b0; + } + +def S2_storerbnew_pci : T_storenew_pci <"memb", s4_0Imm, 0b00, ByteAccess>; +def S2_storerhnew_pci : T_storenew_pci <"memh", s4_1Imm, 0b01, HalfWordAccess>; +def S2_storerinew_pci : T_storenew_pci <"memw", s4_2Imm, 0b10, WordAccess>; + +//===----------------------------------------------------------------------===// +// Circular stores - Pseudo +// +// Please note that the input operand order in the pseudo instructions +// doesn't match with the real instructions. Pseudo instructions operand +// order should mimics the ordering in the intrinsics. +//===----------------------------------------------------------------------===// +let isCodeGenOnly = 1, mayStore = 1, hasSideEffects = 0, isPseudo = 1 in +class T_store_pci_pseudo <string opc, RegisterClass RC> + : STInstPI<(outs IntRegs:$_dst_), + (ins IntRegs:$src1, RC:$src2, IntRegs:$src3, s4Imm:$src4), + ".error \""#opc#"($src1++#$src4:circ($src3)) = $src2\"", + [], "$_dst_ = $src1">; + +def S2_storerb_pci_pseudo : T_store_pci_pseudo <"memb", IntRegs>; +def S2_storerh_pci_pseudo : T_store_pci_pseudo <"memh", IntRegs>; +def S2_storerf_pci_pseudo : T_store_pci_pseudo <"memh", IntRegs>; +def S2_storeri_pci_pseudo : T_store_pci_pseudo <"memw", IntRegs>; +def S2_storerd_pci_pseudo : T_store_pci_pseudo <"memd", DoubleRegs>; //===----------------------------------------------------------------------===// -// multiclass for the store instructions with base+immediate offset -// addressing mode +// Circular stores with auto-increment register //===----------------------------------------------------------------------===// -multiclass ST_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp, - bit isNot, bit isPredNew> { - let isPredicatedNew = isPredNew in - def NAME : STInst2<(outs), - (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC: $src4), - !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", - ") ")#mnemonic#"($src2+#$src3) = $src4", - []>; +let Uses = [CS], isNVStorable = 1 in +class T_store_pcr <string mnemonic, RegisterClass RC, bits<4>MajOp, + MemAccessSize AlignSize, string RegSrc = "Rt"> + : STInst <(outs IntRegs:$_dst_), + (ins IntRegs:$Rz, ModRegs:$Mu, RC:$Rt), + #mnemonic#"($Rz ++ I:circ($Mu)) = $"#RegSrc#"", + [], + "$Rz = $_dst_" > { + bits<5> Rz; + bits<1> Mu; + bits<5> Rt; + + let accessSize = AlignSize; + + let IClass = 0b1010; + let Inst{27-25} = 0b100; + let Inst{24-21} = MajOp; + let Inst{20-16} = Rz; + let Inst{13} = Mu; + let Inst{12-8} = Rt; + let Inst{7} = 0b0; + let Inst{1} = 0b1; + } + +def S2_storerb_pcr : T_store_pcr<"memb", IntRegs, 0b1000, ByteAccess>; +def S2_storerh_pcr : T_store_pcr<"memh", IntRegs, 0b1010, HalfWordAccess>; +def S2_storeri_pcr : T_store_pcr<"memw", IntRegs, 0b1100, WordAccess>; +def S2_storerd_pcr : T_store_pcr<"memd", DoubleRegs, 0b1110, DoubleWordAccess>; +def S2_storerf_pcr : T_store_pcr<"memh", IntRegs, 0b1011, + HalfWordAccess, "Rt.h">; + +//===----------------------------------------------------------------------===// +// Circular .new stores with auto-increment register +//===----------------------------------------------------------------------===// +let Uses = [CS], isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 3 in +class T_storenew_pcr <string mnemonic, bits<2>MajOp, + MemAccessSize AlignSize> + : NVInst <(outs IntRegs:$_dst_), + (ins IntRegs:$Rz, ModRegs:$Mu, IntRegs:$Nt), + #mnemonic#"($Rz ++ I:circ($Mu)) = $Nt.new" , + [] , + "$Rz = $_dst_"> { + bits<5> Rz; + bits<1> Mu; + bits<3> Nt; + + let accessSize = AlignSize; + + let IClass = 0b1010; + let Inst{27-21} = 0b1001101; + let Inst{20-16} = Rz; + let Inst{13} = Mu; + let Inst{12-11} = MajOp; + let Inst{10-8} = Nt; + let Inst{7} = 0b0; + let Inst{1} = 0b1; + } + +def S2_storerbnew_pcr : T_storenew_pcr <"memb", 0b00, ByteAccess>; +def S2_storerhnew_pcr : T_storenew_pcr <"memh", 0b01, HalfWordAccess>; +def S2_storerinew_pcr : T_storenew_pcr <"memw", 0b10, WordAccess>; + +//===----------------------------------------------------------------------===// +// Bit-reversed stores with auto-increment register +//===----------------------------------------------------------------------===// +let hasSideEffects = 0 in +class T_store_pbr<string mnemonic, RegisterClass RC, + MemAccessSize addrSize, bits<3> majOp, + bit isHalf = 0> + : STInst + <(outs IntRegs:$_dst_), + (ins IntRegs:$Rz, ModRegs:$Mu, RC:$src), + #mnemonic#"($Rz ++ $Mu:brev) = $src"#!if (!eq(isHalf, 1), ".h", ""), + [], "$Rz = $_dst_" > { + + let accessSize = addrSize; + + bits<5> Rz; + bits<1> Mu; + bits<5> src; + + let IClass = 0b1010; + + let Inst{27-24} = 0b1111; + let Inst{23-21} = majOp; + let Inst{7} = 0b0; + let Inst{20-16} = Rz; + let Inst{13} = Mu; + let Inst{12-8} = src; + } + +let isNVStorable = 1 in { + let BaseOpcode = "S2_storerb_pbr" in + def S2_storerb_pbr : T_store_pbr<"memb", IntRegs, ByteAccess, + 0b000>, NewValueRel; + let BaseOpcode = "S2_storerh_pbr" in + def S2_storerh_pbr : T_store_pbr<"memh", IntRegs, HalfWordAccess, + 0b010>, NewValueRel; + let BaseOpcode = "S2_storeri_pbr" in + def S2_storeri_pbr : T_store_pbr<"memw", IntRegs, WordAccess, + 0b100>, NewValueRel; } -multiclass ST_Idxd_Pred<string mnemonic, RegisterClass RC, Operand predImmOp, - bit PredNot> { - let isPredicatedFalse = PredNot, isPredicated = 1 in { - defm _c#NAME : ST_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>; +def S2_storerf_pbr : T_store_pbr<"memh", IntRegs, HalfWordAccess, 0b011, 1>; +def S2_storerd_pbr : T_store_pbr<"memd", DoubleRegs, DoubleWordAccess, 0b110>; - // Predicate new - let validSubTargets = HasV4SubT, Predicates = [HasV4T] in - defm _cdn#NAME#_V4 : ST_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 1>; +//===----------------------------------------------------------------------===// +// Bit-reversed .new stores with auto-increment register +//===----------------------------------------------------------------------===// +let isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 3, + hasSideEffects = 0 in +class T_storenew_pbr<string mnemonic, MemAccessSize addrSize, bits<2> majOp> + : NVInst <(outs IntRegs:$_dst_), + (ins IntRegs:$Rz, ModRegs:$Mu, IntRegs:$Nt), + #mnemonic#"($Rz ++ $Mu:brev) = $Nt.new", [], + "$Rz = $_dst_">, NewValueRel { + let accessSize = addrSize; + bits<5> Rz; + bits<1> Mu; + bits<3> Nt; + + let IClass = 0b1010; + + let Inst{27-21} = 0b1111101; + let Inst{12-11} = majOp; + let Inst{7} = 0b0; + let Inst{20-16} = Rz; + let Inst{13} = Mu; + let Inst{10-8} = Nt; } + +let BaseOpcode = "S2_storerb_pbr" in +def S2_storerbnew_pbr : T_storenew_pbr<"memb", ByteAccess, 0b00>; + +let BaseOpcode = "S2_storerh_pbr" in +def S2_storerhnew_pbr : T_storenew_pbr<"memh", HalfWordAccess, 0b01>; + +let BaseOpcode = "S2_storeri_pbr" in +def S2_storerinew_pbr : T_storenew_pbr<"memw", WordAccess, 0b10>; + +//===----------------------------------------------------------------------===// +// Bit-reversed stores - Pseudo +// +// Please note that the input operand order in the pseudo instructions +// doesn't match with the real instructions. Pseudo instructions operand +// order should mimics the ordering in the intrinsics. +//===----------------------------------------------------------------------===// +let isCodeGenOnly = 1, mayStore = 1, hasSideEffects = 0, isPseudo = 1 in +class T_store_pbr_pseudo <string opc, RegisterClass RC> + : STInstPI<(outs IntRegs:$_dst_), + (ins IntRegs:$src1, RC:$src2, IntRegs:$src3), + ".error \""#opc#"($src1++$src3:brev) = $src2\"", + [], "$_dst_ = $src1">; + +def S2_storerb_pbr_pseudo : T_store_pbr_pseudo <"memb", IntRegs>; +def S2_storerh_pbr_pseudo : T_store_pbr_pseudo <"memh", IntRegs>; +def S2_storeri_pbr_pseudo : T_store_pbr_pseudo <"memw", IntRegs>; +def S2_storerf_pbr_pseudo : T_store_pbr_pseudo <"memh", IntRegs>; +def S2_storerd_pbr_pseudo : T_store_pbr_pseudo <"memd", DoubleRegs>; + +//===----------------------------------------------------------------------===// +// ST - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Template class for S_2op instructions. +//===----------------------------------------------------------------------===// +let hasSideEffects = 0 in +class T_S2op_1 <string mnemonic, bits<4> RegTyBits, RegisterClass RCOut, + RegisterClass RCIn, bits<2> MajOp, bits<3> MinOp, bit isSat> + : SInst <(outs RCOut:$dst), (ins RCIn:$src), + "$dst = "#mnemonic#"($src)"#!if(isSat, ":sat", ""), + [], "", S_2op_tc_1_SLOT23 > { + bits<5> dst; + bits<5> src; + + let IClass = 0b1000; + + let Inst{27-24} = RegTyBits; + let Inst{23-22} = MajOp; + let Inst{21} = 0b0; + let Inst{20-16} = src; + let Inst{7-5} = MinOp; + let Inst{4-0} = dst; + } + +class T_S2op_1_di <string mnemonic, bits<2> MajOp, bits<3> MinOp> + : T_S2op_1 <mnemonic, 0b0100, DoubleRegs, IntRegs, MajOp, MinOp, 0>; + +let hasNewValue = 1 in +class T_S2op_1_id <string mnemonic, bits<2> MajOp, bits<3> MinOp, bit isSat = 0> + : T_S2op_1 <mnemonic, 0b1000, IntRegs, DoubleRegs, MajOp, MinOp, isSat>; + +let hasNewValue = 1 in +class T_S2op_1_ii <string mnemonic, bits<2> MajOp, bits<3> MinOp, bit isSat = 0> + : T_S2op_1 <mnemonic, 0b1100, IntRegs, IntRegs, MajOp, MinOp, isSat>; + +// Vector sign/zero extend +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { + def S2_vsxtbh : T_S2op_1_di <"vsxtbh", 0b00, 0b000>; + def S2_vsxthw : T_S2op_1_di <"vsxthw", 0b00, 0b100>; + def S2_vzxtbh : T_S2op_1_di <"vzxtbh", 0b00, 0b010>; + def S2_vzxthw : T_S2op_1_di <"vzxthw", 0b00, 0b110>; } -let isExtendable = 1, isNVStorable = 1, neverHasSideEffects = 1 in -multiclass ST_Idxd<string mnemonic, string CextOp, RegisterClass RC, - Operand ImmOp, Operand predImmOp, bits<5> ImmBits, - bits<5> PredImmBits> { +// Vector splat bytes/halfwords +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { + def S2_vsplatrb : T_S2op_1_ii <"vsplatb", 0b01, 0b111>; + def S2_vsplatrh : T_S2op_1_di <"vsplath", 0b01, 0b010>; +} - let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in { - let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits, - isPredicable = 1 in - def NAME : STInst2<(outs), - (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), - mnemonic#"($src1+#$src2) = $src3", - []>; +// Sign extend word to doubleword +def A2_sxtw : T_S2op_1_di <"sxtw", 0b01, 0b000>; - let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits in { - defm Pt : ST_Idxd_Pred<mnemonic, RC, predImmOp, 0>; - defm NotPt : ST_Idxd_Pred<mnemonic, RC, predImmOp, 1>; - } +def: Pat <(i64 (sext I32:$src)), (A2_sxtw I32:$src)>; + +// Vector saturate and pack +let Defs = [USR_OVF] in { + def S2_svsathb : T_S2op_1_ii <"vsathb", 0b10, 0b000>; + def S2_svsathub : T_S2op_1_ii <"vsathub", 0b10, 0b010>; + def S2_vsathb : T_S2op_1_id <"vsathb", 0b00, 0b110>; + def S2_vsathub : T_S2op_1_id <"vsathub", 0b00, 0b000>; + def S2_vsatwh : T_S2op_1_id <"vsatwh", 0b00, 0b010>; + def S2_vsatwuh : T_S2op_1_id <"vsatwuh", 0b00, 0b100>; +} + +// Vector truncate +def S2_vtrunohb : T_S2op_1_id <"vtrunohb", 0b10, 0b000>; +def S2_vtrunehb : T_S2op_1_id <"vtrunehb", 0b10, 0b010>; + +// Swizzle the bytes of a word +def A2_swiz : T_S2op_1_ii <"swiz", 0b10, 0b111>; + +// Saturate +let Defs = [USR_OVF] in { + def A2_sat : T_S2op_1_id <"sat", 0b11, 0b000>; + def A2_satb : T_S2op_1_ii <"satb", 0b11, 0b111>; + def A2_satub : T_S2op_1_ii <"satub", 0b11, 0b110>; + def A2_sath : T_S2op_1_ii <"sath", 0b11, 0b100>; + def A2_satuh : T_S2op_1_ii <"satuh", 0b11, 0b101>; + def A2_roundsat : T_S2op_1_id <"round", 0b11, 0b001, 0b1>; +} + +let Itinerary = S_2op_tc_2_SLOT23 in { + // Vector round and pack + def S2_vrndpackwh : T_S2op_1_id <"vrndwh", 0b10, 0b100>; + + let Defs = [USR_OVF] in + def S2_vrndpackwhs : T_S2op_1_id <"vrndwh", 0b10, 0b110, 1>; + + // Bit reverse + def S2_brev : T_S2op_1_ii <"brev", 0b01, 0b110>; + + // Absolute value word + def A2_abs : T_S2op_1_ii <"abs", 0b10, 0b100>; + + let Defs = [USR_OVF] in + def A2_abssat : T_S2op_1_ii <"abs", 0b10, 0b101, 1>; + + // Negate with saturation + let Defs = [USR_OVF] in + def A2_negsat : T_S2op_1_ii <"neg", 0b10, 0b110, 1>; +} + +def: Pat<(i32 (select (i1 (setlt (i32 IntRegs:$src), 0)), + (i32 (sub 0, (i32 IntRegs:$src))), + (i32 IntRegs:$src))), + (A2_abs IntRegs:$src)>; + +let AddedComplexity = 50 in +def: Pat<(i32 (xor (add (sra (i32 IntRegs:$src), (i32 31)), + (i32 IntRegs:$src)), + (sra (i32 IntRegs:$src), (i32 31)))), + (A2_abs IntRegs:$src)>; + +class T_S2op_2 <string mnemonic, bits<4> RegTyBits, RegisterClass RCOut, + RegisterClass RCIn, bits<3> MajOp, bits<3> MinOp, + bit isSat, bit isRnd, list<dag> pattern = []> + : SInst <(outs RCOut:$dst), + (ins RCIn:$src, u5Imm:$u5), + "$dst = "#mnemonic#"($src, #$u5)"#!if(isSat, ":sat", "") + #!if(isRnd, ":rnd", ""), + pattern, "", S_2op_tc_2_SLOT23> { + bits<5> dst; + bits<5> src; + bits<5> u5; + + let IClass = 0b1000; + + let Inst{27-24} = RegTyBits; + let Inst{23-21} = MajOp; + let Inst{20-16} = src; + let Inst{13} = 0b0; + let Inst{12-8} = u5; + let Inst{7-5} = MinOp; + let Inst{4-0} = dst; } + +class T_S2op_2_di <string mnemonic, bits<3> MajOp, bits<3> MinOp> + : T_S2op_2 <mnemonic, 0b1000, DoubleRegs, IntRegs, MajOp, MinOp, 0, 0>; + +let hasNewValue = 1 in +class T_S2op_2_id <string mnemonic, bits<3> MajOp, bits<3> MinOp> + : T_S2op_2 <mnemonic, 0b1000, IntRegs, DoubleRegs, MajOp, MinOp, 0, 0>; + +let hasNewValue = 1 in +class T_S2op_2_ii <string mnemonic, bits<3> MajOp, bits<3> MinOp, + bit isSat = 0, bit isRnd = 0, list<dag> pattern = []> + : T_S2op_2 <mnemonic, 0b1100, IntRegs, IntRegs, MajOp, MinOp, + isSat, isRnd, pattern>; + +class T_S2op_shift <string mnemonic, bits<3> MajOp, bits<3> MinOp, SDNode OpNd> + : T_S2op_2_ii <mnemonic, MajOp, MinOp, 0, 0, + [(set (i32 IntRegs:$dst), (OpNd (i32 IntRegs:$src), + (u5ImmPred:$u5)))]>; + +// Vector arithmetic shift right by immediate with truncate and pack +def S2_asr_i_svw_trun : T_S2op_2_id <"vasrw", 0b110, 0b010>; + +// Arithmetic/logical shift right/left by immediate +let Itinerary = S_2op_tc_1_SLOT23 in { + def S2_asr_i_r : T_S2op_shift <"asr", 0b000, 0b000, sra>; + def S2_lsr_i_r : T_S2op_shift <"lsr", 0b000, 0b001, srl>; + def S2_asl_i_r : T_S2op_shift <"asl", 0b000, 0b010, shl>; } -let addrMode = BaseImmOffset, InputType = "reg" in { - let accessSize = ByteAccess in - defm STrib_indexed: ST_Idxd < "memb", "STrib", IntRegs, s11_0Ext, - u6_0Ext, 11, 6>, AddrModeRel, ImmRegRel; +// Shift left by immediate with saturation +let Defs = [USR_OVF] in +def S2_asl_i_r_sat : T_S2op_2_ii <"asl", 0b010, 0b010, 1>; + +// Shift right with round +def S2_asr_i_r_rnd : T_S2op_2_ii <"asr", 0b010, 0b000, 0, 1>; + +let isAsmParserOnly = 1 in +def S2_asr_i_r_rnd_goodsyntax + : SInst <(outs IntRegs:$dst), (ins IntRegs:$src, u5Imm:$u5), + "$dst = asrrnd($src, #$u5)", + [], "", S_2op_tc_1_SLOT23>; + +let isAsmParserOnly = 1 in +def A2_not: ALU32_rr<(outs IntRegs:$dst),(ins IntRegs:$src), + "$dst = not($src)">; + +def: Pat<(i32 (sra (i32 (add (i32 (sra I32:$src1, u5ImmPred:$src2)), + (i32 1))), + (i32 1))), + (S2_asr_i_r_rnd IntRegs:$src1, u5ImmPred:$src2)>; + +class T_S2op_3<string opc, bits<2>MajOp, bits<3>minOp, bits<1> sat = 0> + : SInst<(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss), + "$Rdd = "#opc#"($Rss)"#!if(!eq(sat, 1),":sat","")> { + bits<5> Rss; + bits<5> Rdd; + let IClass = 0b1000; + let Inst{27-24} = 0; + let Inst{23-22} = MajOp; + let Inst{20-16} = Rss; + let Inst{7-5} = minOp; + let Inst{4-0} = Rdd; +} + +def A2_absp : T_S2op_3 <"abs", 0b10, 0b110>; +def A2_negp : T_S2op_3 <"neg", 0b10, 0b101>; +def A2_notp : T_S2op_3 <"not", 0b10, 0b100>; + +// Innterleave/deinterleave +def S2_interleave : T_S2op_3 <"interleave", 0b11, 0b101>; +def S2_deinterleave : T_S2op_3 <"deinterleave", 0b11, 0b100>; + +// Vector Complex conjugate +def A2_vconj : T_S2op_3 <"vconj", 0b10, 0b111, 1>; + +// Vector saturate without pack +def S2_vsathb_nopack : T_S2op_3 <"vsathb", 0b00, 0b111>; +def S2_vsathub_nopack : T_S2op_3 <"vsathub", 0b00, 0b100>; +def S2_vsatwh_nopack : T_S2op_3 <"vsatwh", 0b00, 0b110>; +def S2_vsatwuh_nopack : T_S2op_3 <"vsatwuh", 0b00, 0b101>; + +// Vector absolute value halfwords with and without saturation +// Rdd64=vabsh(Rss64)[:sat] +def A2_vabsh : T_S2op_3 <"vabsh", 0b01, 0b100>; +def A2_vabshsat : T_S2op_3 <"vabsh", 0b01, 0b101, 1>; + +// Vector absolute value words with and without saturation +def A2_vabsw : T_S2op_3 <"vabsw", 0b01, 0b110>; +def A2_vabswsat : T_S2op_3 <"vabsw", 0b01, 0b111, 1>; + +def : Pat<(not (i64 DoubleRegs:$src1)), + (A2_notp DoubleRegs:$src1)>; + +//===----------------------------------------------------------------------===// +// STYPE/BIT + +//===----------------------------------------------------------------------===// +// Bit count + +let hasSideEffects = 0, hasNewValue = 1 in +class T_COUNT_LEADING<string MnOp, bits<3> MajOp, bits<3> MinOp, bit Is32, + dag Out, dag Inp> + : SInst<Out, Inp, "$Rd = "#MnOp#"($Rs)", [], "", S_2op_tc_1_SLOT23> { + bits<5> Rs; + bits<5> Rd; + let IClass = 0b1000; + let Inst{27} = 0b1; + let Inst{26} = Is32; + let Inst{25-24} = 0b00; + let Inst{23-21} = MajOp; + let Inst{20-16} = Rs; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rd; +} - let accessSize = HalfWordAccess in - defm STrih_indexed: ST_Idxd < "memh", "STrih", IntRegs, s11_1Ext, - u6_1Ext, 12, 7>, AddrModeRel, ImmRegRel; +class T_COUNT_LEADING_32<string MnOp, bits<3> MajOp, bits<3> MinOp> + : T_COUNT_LEADING<MnOp, MajOp, MinOp, 0b1, + (outs IntRegs:$Rd), (ins IntRegs:$Rs)>; + +class T_COUNT_LEADING_64<string MnOp, bits<3> MajOp, bits<3> MinOp> + : T_COUNT_LEADING<MnOp, MajOp, MinOp, 0b0, + (outs IntRegs:$Rd), (ins DoubleRegs:$Rs)>; + +def S2_cl0 : T_COUNT_LEADING_32<"cl0", 0b000, 0b101>; +def S2_cl1 : T_COUNT_LEADING_32<"cl1", 0b000, 0b110>; +def S2_ct0 : T_COUNT_LEADING_32<"ct0", 0b010, 0b100>; +def S2_ct1 : T_COUNT_LEADING_32<"ct1", 0b010, 0b101>; +def S2_cl0p : T_COUNT_LEADING_64<"cl0", 0b010, 0b010>; +def S2_cl1p : T_COUNT_LEADING_64<"cl1", 0b010, 0b100>; +def S2_clb : T_COUNT_LEADING_32<"clb", 0b000, 0b100>; +def S2_clbp : T_COUNT_LEADING_64<"clb", 0b010, 0b000>; +def S2_clbnorm : T_COUNT_LEADING_32<"normamt", 0b000, 0b111>; + +def: Pat<(i32 (ctlz I32:$Rs)), (S2_cl0 I32:$Rs)>; +def: Pat<(i32 (ctlz (not I32:$Rs))), (S2_cl1 I32:$Rs)>; +def: Pat<(i32 (cttz I32:$Rs)), (S2_ct0 I32:$Rs)>; +def: Pat<(i32 (cttz (not I32:$Rs))), (S2_ct1 I32:$Rs)>; +def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>; +def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>; + +// Bit set/clear/toggle - let accessSize = WordAccess in - defm STriw_indexed: ST_Idxd < "memw", "STriw", IntRegs, s11_2Ext, - u6_2Ext, 13, 8>, AddrModeRel, ImmRegRel; +let hasSideEffects = 0, hasNewValue = 1 in +class T_SCT_BIT_IMM<string MnOp, bits<3> MinOp> + : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, u5Imm:$u5), + "$Rd = "#MnOp#"($Rs, #$u5)", [], "", S_2op_tc_1_SLOT23> { + bits<5> Rd; + bits<5> Rs; + bits<5> u5; + let IClass = 0b1000; + let Inst{27-21} = 0b1100110; + let Inst{20-16} = Rs; + let Inst{13} = 0b0; + let Inst{12-8} = u5; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rd; +} - let accessSize = DoubleWordAccess, isNVStorable = 0 in - defm STrid_indexed: ST_Idxd < "memd", "STrid", DoubleRegs, s11_3Ext, - u6_3Ext, 14, 9>, AddrModeRel; +let hasSideEffects = 0, hasNewValue = 1 in +class T_SCT_BIT_REG<string MnOp, bits<2> MinOp> + : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rd = "#MnOp#"($Rs, $Rt)", [], "", S_3op_tc_1_SLOT23> { + bits<5> Rd; + bits<5> Rs; + bits<5> Rt; + let IClass = 0b1100; + let Inst{27-22} = 0b011010; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{7-6} = MinOp; + let Inst{4-0} = Rd; } -let AddedComplexity = 10 in { -def : Pat<(truncstorei8 (i32 IntRegs:$src1), (add IntRegs:$src2, - s11_0ExtPred:$offset)), - (STrib_indexed IntRegs:$src2, s11_0ImmPred:$offset, - (i32 IntRegs:$src1))>; +def S2_clrbit_i : T_SCT_BIT_IMM<"clrbit", 0b001>; +def S2_setbit_i : T_SCT_BIT_IMM<"setbit", 0b000>; +def S2_togglebit_i : T_SCT_BIT_IMM<"togglebit", 0b010>; +def S2_clrbit_r : T_SCT_BIT_REG<"clrbit", 0b01>; +def S2_setbit_r : T_SCT_BIT_REG<"setbit", 0b00>; +def S2_togglebit_r : T_SCT_BIT_REG<"togglebit", 0b10>; + +def: Pat<(i32 (and (i32 IntRegs:$Rs), (not (shl 1, u5ImmPred:$u5)))), + (S2_clrbit_i IntRegs:$Rs, u5ImmPred:$u5)>; +def: Pat<(i32 (or (i32 IntRegs:$Rs), (shl 1, u5ImmPred:$u5))), + (S2_setbit_i IntRegs:$Rs, u5ImmPred:$u5)>; +def: Pat<(i32 (xor (i32 IntRegs:$Rs), (shl 1, u5ImmPred:$u5))), + (S2_togglebit_i IntRegs:$Rs, u5ImmPred:$u5)>; +def: Pat<(i32 (and (i32 IntRegs:$Rs), (not (shl 1, (i32 IntRegs:$Rt))))), + (S2_clrbit_r IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(i32 (or (i32 IntRegs:$Rs), (shl 1, (i32 IntRegs:$Rt)))), + (S2_setbit_r IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(i32 (xor (i32 IntRegs:$Rs), (shl 1, (i32 IntRegs:$Rt)))), + (S2_togglebit_r IntRegs:$Rs, IntRegs:$Rt)>; + +// Bit test + +let hasSideEffects = 0 in +class T_TEST_BIT_IMM<string MnOp, bits<3> MajOp> + : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, u5Imm:$u5), + "$Pd = "#MnOp#"($Rs, #$u5)", + [], "", S_2op_tc_2early_SLOT23> { + bits<2> Pd; + bits<5> Rs; + bits<5> u5; + let IClass = 0b1000; + let Inst{27-24} = 0b0101; + let Inst{23-21} = MajOp; + let Inst{20-16} = Rs; + let Inst{13} = 0; + let Inst{12-8} = u5; + let Inst{1-0} = Pd; +} -def : Pat<(truncstorei16 (i32 IntRegs:$src1), (add IntRegs:$src2, - s11_1ExtPred:$offset)), - (STrih_indexed IntRegs:$src2, s11_1ImmPred:$offset, - (i32 IntRegs:$src1))>; +let hasSideEffects = 0 in +class T_TEST_BIT_REG<string MnOp, bit IsNeg> + : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt), + "$Pd = "#MnOp#"($Rs, $Rt)", + [], "", S_3op_tc_2early_SLOT23> { + bits<2> Pd; + bits<5> Rs; + bits<5> Rt; + let IClass = 0b1100; + let Inst{27-22} = 0b011100; + let Inst{21} = IsNeg; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{1-0} = Pd; +} -def : Pat<(store (i32 IntRegs:$src1), (add IntRegs:$src2, - s11_2ExtPred:$offset)), - (STriw_indexed IntRegs:$src2, s11_2ImmPred:$offset, - (i32 IntRegs:$src1))>; +def S2_tstbit_i : T_TEST_BIT_IMM<"tstbit", 0b000>; +def S2_tstbit_r : T_TEST_BIT_REG<"tstbit", 0>; + +let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. + def: Pat<(i1 (setne (and (shl 1, u5ImmPred:$u5), (i32 IntRegs:$Rs)), 0)), + (S2_tstbit_i IntRegs:$Rs, u5ImmPred:$u5)>; + def: Pat<(i1 (setne (and (shl 1, (i32 IntRegs:$Rt)), (i32 IntRegs:$Rs)), 0)), + (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt)>; + def: Pat<(i1 (trunc (i32 IntRegs:$Rs))), + (S2_tstbit_i IntRegs:$Rs, 0)>; + def: Pat<(i1 (trunc (i64 DoubleRegs:$Rs))), + (S2_tstbit_i (LoReg DoubleRegs:$Rs), 0)>; +} -def : Pat<(store (i64 DoubleRegs:$src1), (add IntRegs:$src2, - s11_3ExtPred:$offset)), - (STrid_indexed IntRegs:$src2, s11_3ImmPred:$offset, - (i64 DoubleRegs:$src1))>; +let hasSideEffects = 0 in +class T_TEST_BITS_IMM<string MnOp, bits<2> MajOp, bit IsNeg> + : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, u6Imm:$u6), + "$Pd = "#MnOp#"($Rs, #$u6)", + [], "", S_2op_tc_2early_SLOT23> { + bits<2> Pd; + bits<5> Rs; + bits<6> u6; + let IClass = 0b1000; + let Inst{27-24} = 0b0101; + let Inst{23-22} = MajOp; + let Inst{21} = IsNeg; + let Inst{20-16} = Rs; + let Inst{13-8} = u6; + let Inst{1-0} = Pd; } -// memh(Rx++#s4:1)=Rt.H +let hasSideEffects = 0 in +class T_TEST_BITS_REG<string MnOp, bits<2> MajOp, bit IsNeg> + : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt), + "$Pd = "#MnOp#"($Rs, $Rt)", + [], "", S_3op_tc_2early_SLOT23> { + bits<2> Pd; + bits<5> Rs; + bits<5> Rt; + let IClass = 0b1100; + let Inst{27-24} = 0b0111; + let Inst{23-22} = MajOp; + let Inst{21} = IsNeg; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{1-0} = Pd; +} -// Store word. -// Store predicate. -let Defs = [R10,R11,D5], neverHasSideEffects = 1 in -def STriw_pred : STInst2<(outs), - (ins MEMri:$addr, PredRegs:$src1), - "Error; should not emit", - []>; +def C2_bitsclri : T_TEST_BITS_IMM<"bitsclr", 0b10, 0>; +def C2_bitsclr : T_TEST_BITS_REG<"bitsclr", 0b10, 0>; +def C2_bitsset : T_TEST_BITS_REG<"bitsset", 0b01, 0>; -// Allocate stack frame. -let Defs = [R29, R30], Uses = [R31, R30], neverHasSideEffects = 1 in { - def ALLOCFRAME : STInst2<(outs), - (ins i32imm:$amt), - "allocframe(#$amt)", - []>; +let AddedComplexity = 20 in { // Complexity greater than compare reg-imm. + def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), u6ImmPred:$u6), 0)), + (C2_bitsclri IntRegs:$Rs, u6ImmPred:$u6)>; + def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), 0)), + (C2_bitsclr IntRegs:$Rs, IntRegs:$Rt)>; } + +let AddedComplexity = 10 in // Complexity greater than compare reg-reg. +def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), IntRegs:$Rt)), + (C2_bitsset IntRegs:$Rs, IntRegs:$Rt)>; + //===----------------------------------------------------------------------===// -// ST - +// STYPE/BIT - //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// STYPE/ALU + +// STYPE/COMPLEX + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// STYPE/COMPLEX - //===----------------------------------------------------------------------===// -// Logical NOT. -def NOT_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1), - "$dst = not($src1)", - [(set (i64 DoubleRegs:$dst), (not (i64 DoubleRegs:$src1)))]>; +//===----------------------------------------------------------------------===// +// XTYPE/PERM + +//===----------------------------------------------------------------------===// -// Sign extend word to doubleword. -def SXTW : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1), - "$dst = sxtw($src1)", - [(set (i64 DoubleRegs:$dst), (sext (i32 IntRegs:$src1)))]>; //===----------------------------------------------------------------------===// -// STYPE/ALU - +// XTYPE/PERM - //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// STYPE/BIT + +// STYPE/PRED + //===----------------------------------------------------------------------===// -// clrbit. -def CLRBIT : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), - "$dst = clrbit($src1, #$src2)", - [(set (i32 IntRegs:$dst), (and (i32 IntRegs:$src1), - (not - (shl 1, u5ImmPred:$src2))))]>; - -def CLRBIT_31 : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), - "$dst = clrbit($src1, #$src2)", - []>; - -// Map from r0 = and(r1, 2147483647) to r0 = clrbit(r1, #31). -def : Pat <(and (i32 IntRegs:$src1), 2147483647), - (CLRBIT_31 (i32 IntRegs:$src1), 31)>; - -// setbit. -def SETBIT : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), - "$dst = setbit($src1, #$src2)", - [(set (i32 IntRegs:$dst), (or (i32 IntRegs:$src1), - (shl 1, u5ImmPred:$src2)))]>; - -// Map from r0 = or(r1, -2147483648) to r0 = setbit(r1, #31). -def SETBIT_31 : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), - "$dst = setbit($src1, #$src2)", - []>; - -def : Pat <(or (i32 IntRegs:$src1), -2147483648), - (SETBIT_31 (i32 IntRegs:$src1), 31)>; - -// togglebit. -def TOGBIT : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), - "$dst = setbit($src1, #$src2)", - [(set (i32 IntRegs:$dst), (xor (i32 IntRegs:$src1), - (shl 1, u5ImmPred:$src2)))]>; - -// Map from r0 = xor(r1, -2147483648) to r0 = togglebit(r1, #31). -def TOGBIT_31 : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), - "$dst = togglebit($src1, #$src2)", - []>; - -def : Pat <(xor (i32 IntRegs:$src1), -2147483648), - (TOGBIT_31 (i32 IntRegs:$src1), 31)>; // Predicate transfer. -let neverHasSideEffects = 1 in -def TFR_RsPd : SInst<(outs IntRegs:$dst), (ins PredRegs:$src1), - "$dst = $src1 /* Should almost never emit this. */", - []>; +let hasSideEffects = 0, hasNewValue = 1 in +def C2_tfrpr : SInst<(outs IntRegs:$Rd), (ins PredRegs:$Ps), + "$Rd = $Ps", [], "", S_2op_tc_1_SLOT23> { + bits<5> Rd; + bits<2> Ps; + + let IClass = 0b1000; + let Inst{27-24} = 0b1001; + let Inst{22} = 0b1; + let Inst{17-16} = Ps; + let Inst{4-0} = Rd; +} + +// Transfer general register to predicate. +let hasSideEffects = 0 in +def C2_tfrrp: SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs), + "$Pd = $Rs", [], "", S_2op_tc_2early_SLOT23> { + bits<2> Pd; + bits<5> Rs; + + let IClass = 0b1000; + let Inst{27-21} = 0b0101010; + let Inst{20-16} = Rs; + let Inst{1-0} = Pd; +} + +let hasSideEffects = 0, isCodeGenOnly = 1 in +def C2_pxfer_map: SInst<(outs PredRegs:$dst), (ins PredRegs:$src), + "$dst = $src">; + + +// Patterns for loads of i1: +def: Pat<(i1 (load AddrFI:$fi)), + (C2_tfrrp (L2_loadrub_io AddrFI:$fi, 0))>; +def: Pat<(i1 (load (add (i32 IntRegs:$Rs), s11_0ExtPred:$Off))), + (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, imm:$Off))>; +def: Pat<(i1 (load (i32 IntRegs:$Rs))), + (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>; + +def I1toI32: OutPatFrag<(ops node:$Rs), + (C2_muxii (i1 $Rs), 1, 0)>; + +def I32toI1: OutPatFrag<(ops node:$Rs), + (i1 (C2_tfrrp (i32 $Rs)))>; + +defm: Storexm_pat<store, I1, s11_0ExtPred, I1toI32, S2_storerb_io>; +def: Storexm_simple_pat<store, I1, I1toI32, S2_storerb_io>; -def TFR_PdRs : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1), - "$dst = $src1 /* Should almost never emit this. */", - [(set (i1 PredRegs:$dst), (trunc (i32 IntRegs:$src1)))]>; //===----------------------------------------------------------------------===// // STYPE/PRED - //===----------------------------------------------------------------------===// @@ -1786,88 +4385,56 @@ def TFR_PdRs : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1), //===----------------------------------------------------------------------===// // STYPE/SHIFT + //===----------------------------------------------------------------------===// +class S_2OpInstImm<string Mnemonic, bits<3>MajOp, bits<3>MinOp, + Operand Imm, list<dag> pattern = [], bit isRnd = 0> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, Imm:$src2), + "$dst = "#Mnemonic#"($src1, #$src2)"#!if(isRnd, ":rnd", ""), + pattern> { + bits<5> src1; + bits<5> dst; + let IClass = 0b1000; + let Inst{27-24} = 0; + let Inst{23-21} = MajOp; + let Inst{20-16} = src1; + let Inst{7-5} = MinOp; + let Inst{4-0} = dst; +} + +class S_2OpInstImmI6<string Mnemonic, SDNode OpNode, bits<3>MinOp> + : S_2OpInstImm<Mnemonic, 0b000, MinOp, u6Imm, + [(set (i64 DoubleRegs:$dst), (OpNode (i64 DoubleRegs:$src1), + u6ImmPred:$src2))]> { + bits<6> src2; + let Inst{13-8} = src2; +} + // Shift by immediate. -def ASR_ri : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), - "$dst = asr($src1, #$src2)", - [(set (i32 IntRegs:$dst), (sra (i32 IntRegs:$src1), - u5ImmPred:$src2))]>; - -def ASRd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2), - "$dst = asr($src1, #$src2)", - [(set (i64 DoubleRegs:$dst), (sra (i64 DoubleRegs:$src1), - u6ImmPred:$src2))]>; - -def ASL : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), - "$dst = asl($src1, #$src2)", - [(set (i32 IntRegs:$dst), (shl (i32 IntRegs:$src1), - u5ImmPred:$src2))]>; - -def ASLd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2), - "$dst = asl($src1, #$src2)", - [(set (i64 DoubleRegs:$dst), (shl (i64 DoubleRegs:$src1), - u6ImmPred:$src2))]>; - -def LSR_ri : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), - "$dst = lsr($src1, #$src2)", - [(set (i32 IntRegs:$dst), (srl (i32 IntRegs:$src1), - u5ImmPred:$src2))]>; - -def LSRd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2), - "$dst = lsr($src1, #$src2)", - [(set (i64 DoubleRegs:$dst), (srl (i64 DoubleRegs:$src1), - u6ImmPred:$src2))]>; - -// Shift by immediate and add. -let AddedComplexity = 100 in -def ADDASL : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, - u3Imm:$src3), - "$dst = addasl($src1, $src2, #$src3)", - [(set (i32 IntRegs:$dst), (add (i32 IntRegs:$src1), - (shl (i32 IntRegs:$src2), - u3ImmPred:$src3)))]>; - -// Shift by register. -def ASL_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - "$dst = asl($src1, $src2)", - [(set (i32 IntRegs:$dst), (shl (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))]>; - -def ASR_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - "$dst = asr($src1, $src2)", - [(set (i32 IntRegs:$dst), (sra (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))]>; - -def LSL_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - "$dst = lsl($src1, $src2)", - [(set (i32 IntRegs:$dst), (shl (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))]>; - -def LSR_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - "$dst = lsr($src1, $src2)", - [(set (i32 IntRegs:$dst), (srl (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))]>; - -def ASLd : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), - "$dst = asl($src1, $src2)", - [(set (i64 DoubleRegs:$dst), (shl (i64 DoubleRegs:$src1), - (i32 IntRegs:$src2)))]>; - -def LSLd : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), - "$dst = lsl($src1, $src2)", - [(set (i64 DoubleRegs:$dst), (shl (i64 DoubleRegs:$src1), - (i32 IntRegs:$src2)))]>; - -def ASRd_rr : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, - IntRegs:$src2), - "$dst = asr($src1, $src2)", - [(set (i64 DoubleRegs:$dst), (sra (i64 DoubleRegs:$src1), - (i32 IntRegs:$src2)))]>; - -def LSRd_rr : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, - IntRegs:$src2), - "$dst = lsr($src1, $src2)", - [(set (i64 DoubleRegs:$dst), (srl (i64 DoubleRegs:$src1), - (i32 IntRegs:$src2)))]>; +def S2_asr_i_p : S_2OpInstImmI6<"asr", sra, 0b000>; +def S2_asl_i_p : S_2OpInstImmI6<"asl", shl, 0b010>; +def S2_lsr_i_p : S_2OpInstImmI6<"lsr", srl, 0b001>; + +// Shift left by small amount and add. +let AddedComplexity = 100, hasNewValue = 1, hasSideEffects = 0 in +def S2_addasl_rrri: SInst <(outs IntRegs:$Rd), + (ins IntRegs:$Rt, IntRegs:$Rs, u3Imm:$u3), + "$Rd = addasl($Rt, $Rs, #$u3)" , + [(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rt), + (shl (i32 IntRegs:$Rs), u3ImmPred:$u3)))], + "", S_3op_tc_2_SLOT23> { + bits<5> Rd; + bits<5> Rt; + bits<5> Rs; + bits<3> u3; + + let IClass = 0b1100; + + let Inst{27-21} = 0b0100000; + let Inst{20-16} = Rs; + let Inst{13} = 0b0; + let Inst{12-8} = Rt; + let Inst{7-5} = u3; + let Inst{4-0} = Rd; + } //===----------------------------------------------------------------------===// // STYPE/SHIFT - @@ -1894,39 +4461,222 @@ def LSRd_rr : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, //===----------------------------------------------------------------------===// // SYSTEM/USER + //===----------------------------------------------------------------------===// -def SDHexagonBARRIER: SDTypeProfile<0, 0, []>; -def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDHexagonBARRIER, - [SDNPHasChain]>; +def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>; -let hasSideEffects = 1, isSolo = 1 in -def BARRIER : SYSInst<(outs), (ins), +let hasSideEffects = 1, isSoloAX = 1 in +def Y2_barrier : SYSInst<(outs), (ins), "barrier", - [(HexagonBARRIER)]>; + [(HexagonBARRIER)],"",ST_tc_st_SLOT0> { + let Inst{31-28} = 0b1010; + let Inst{27-21} = 0b1000000; +} //===----------------------------------------------------------------------===// // SYSTEM/SUPER - //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// CRUSER - Type. +//===----------------------------------------------------------------------===// +// HW loop +let isExtendable = 1, isExtentSigned = 1, opExtentBits = 9, opExtentAlign = 2, + opExtendable = 0, hasSideEffects = 0 in +class LOOP_iBase<string mnemonic, Operand brOp, bit mustExtend = 0> + : CRInst<(outs), (ins brOp:$offset, u10Imm:$src2), + #mnemonic#"($offset, #$src2)", + [], "" , CR_tc_3x_SLOT3> { + bits<9> offset; + bits<10> src2; + + let IClass = 0b0110; + + let Inst{27-22} = 0b100100; + let Inst{21} = !if (!eq(mnemonic, "loop0"), 0b0, 0b1); + let Inst{20-16} = src2{9-5}; + let Inst{12-8} = offset{8-4}; + let Inst{7-5} = src2{4-2}; + let Inst{4-3} = offset{3-2}; + let Inst{1-0} = src2{1-0}; +} -// TFRI64 - assembly mapped. -let isReMaterializable = 1 in -def TFRI64 : ALU64_rr<(outs DoubleRegs:$dst), (ins s8Imm64:$src1), - "$dst = #$src1", - [(set (i64 DoubleRegs:$dst), s8Imm64Pred:$src1)]>; - -// Pseudo instruction to encode a set of conditional transfers. -// This instruction is used instead of a mux and trades-off codesize -// for performance. We conduct this transformation optimistically in -// the hope that these instructions get promoted to dot-new transfers. -let AddedComplexity = 100, isPredicated = 1 in -def TFR_condset_rr : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, - IntRegs:$src2, - IntRegs:$src3), - "Error; should not emit", - [(set (i32 IntRegs:$dst), - (i32 (select (i1 PredRegs:$src1), - (i32 IntRegs:$src2), - (i32 IntRegs:$src3))))]>; -let AddedComplexity = 100, isPredicated = 1 in +let isExtendable = 1, isExtentSigned = 1, opExtentBits = 9, opExtentAlign = 2, + opExtendable = 0, hasSideEffects = 0 in +class LOOP_rBase<string mnemonic, Operand brOp, bit mustExtend = 0> + : CRInst<(outs), (ins brOp:$offset, IntRegs:$src2), + #mnemonic#"($offset, $src2)", + [], "" ,CR_tc_3x_SLOT3> { + bits<9> offset; + bits<5> src2; + + let IClass = 0b0110; + + let Inst{27-22} = 0b000000; + let Inst{21} = !if (!eq(mnemonic, "loop0"), 0b0, 0b1); + let Inst{20-16} = src2; + let Inst{12-8} = offset{8-4}; + let Inst{4-3} = offset{3-2}; + } + +multiclass LOOP_ri<string mnemonic> { + def i : LOOP_iBase<mnemonic, brtarget>; + def r : LOOP_rBase<mnemonic, brtarget>; +} + + +let Defs = [SA0, LC0, USR] in +defm J2_loop0 : LOOP_ri<"loop0">; + +// Interestingly only loop0's appear to set usr.lpcfg +let Defs = [SA1, LC1] in +defm J2_loop1 : LOOP_ri<"loop1">; + +let isBranch = 1, isTerminator = 1, hasSideEffects = 0, + Defs = [PC, LC0], Uses = [SA0, LC0] in { +def ENDLOOP0 : Endloop<(outs), (ins brtarget:$offset), + ":endloop0", + []>; +} + +let isBranch = 1, isTerminator = 1, hasSideEffects = 0, + Defs = [PC, LC1], Uses = [SA1, LC1] in { +def ENDLOOP1 : Endloop<(outs), (ins brtarget:$offset), + ":endloop1", + []>; +} + +// Pipelined loop instructions, sp[123]loop0 +let Defs = [LC0, SA0, P3, USR], hasSideEffects = 0, + isExtentSigned = 1, isExtendable = 1, opExtentBits = 9, opExtentAlign = 2, + opExtendable = 0, isPredicateLate = 1 in +class SPLOOP_iBase<string SP, bits<2> op> + : CRInst <(outs), (ins brtarget:$r7_2, u10Imm:$U10), + "p3 = sp"#SP#"loop0($r7_2, #$U10)" > { + bits<9> r7_2; + bits<10> U10; + + let IClass = 0b0110; + + let Inst{22-21} = op; + let Inst{27-23} = 0b10011; + let Inst{20-16} = U10{9-5}; + let Inst{12-8} = r7_2{8-4}; + let Inst{7-5} = U10{4-2}; + let Inst{4-3} = r7_2{3-2}; + let Inst{1-0} = U10{1-0}; + } + +let Defs = [LC0, SA0, P3, USR], hasSideEffects = 0, + isExtentSigned = 1, isExtendable = 1, opExtentBits = 9, opExtentAlign = 2, + opExtendable = 0, isPredicateLate = 1 in +class SPLOOP_rBase<string SP, bits<2> op> + : CRInst <(outs), (ins brtarget:$r7_2, IntRegs:$Rs), + "p3 = sp"#SP#"loop0($r7_2, $Rs)" > { + bits<9> r7_2; + bits<5> Rs; + + let IClass = 0b0110; + + let Inst{22-21} = op; + let Inst{27-23} = 0b00001; + let Inst{20-16} = Rs; + let Inst{12-8} = r7_2{8-4}; + let Inst{4-3} = r7_2{3-2}; + } + +multiclass SPLOOP_ri<string mnemonic, bits<2> op> { + def i : SPLOOP_iBase<mnemonic, op>; + def r : SPLOOP_rBase<mnemonic, op>; +} + +defm J2_ploop1s : SPLOOP_ri<"1", 0b01>; +defm J2_ploop2s : SPLOOP_ri<"2", 0b10>; +defm J2_ploop3s : SPLOOP_ri<"3", 0b11>; + +// if (Rs[!>=<]=#0) jump:[t/nt] +let Defs = [PC], isPredicated = 1, isBranch = 1, hasSideEffects = 0, + hasSideEffects = 0 in +class J2_jump_0_Base<string compare, bit isTak, bits<2> op> + : CRInst <(outs), (ins IntRegs:$Rs, brtarget:$r13_2), + "if ($Rs"#compare#"#0) jump"#!if(isTak, ":t", ":nt")#" $r13_2" > { + bits<5> Rs; + bits<15> r13_2; + + let IClass = 0b0110; + + let Inst{27-24} = 0b0001; + let Inst{23-22} = op; + let Inst{12} = isTak; + let Inst{21} = r13_2{14}; + let Inst{20-16} = Rs; + let Inst{11-1} = r13_2{12-2}; + let Inst{13} = r13_2{13}; + } + +multiclass J2_jump_compare_0<string compare, bits<2> op> { + def NAME : J2_jump_0_Base<compare, 0, op>; + def NAME#pt : J2_jump_0_Base<compare, 1, op>; +} + +defm J2_jumprz : J2_jump_compare_0<"!=", 0b00>; +defm J2_jumprgtez : J2_jump_compare_0<">=", 0b01>; +defm J2_jumprnz : J2_jump_compare_0<"==", 0b10>; +defm J2_jumprltez : J2_jump_compare_0<"<=", 0b11>; + +// Transfer to/from Control/GPR Guest/GPR +let hasSideEffects = 0 in +class TFR_CR_RS_base<RegisterClass CTRC, RegisterClass RC, bit isDouble> + : CRInst <(outs CTRC:$dst), (ins RC:$src), + "$dst = $src", [], "", CR_tc_3x_SLOT3> { + bits<5> dst; + bits<5> src; + + let IClass = 0b0110; + + let Inst{27-25} = 0b001; + let Inst{24} = isDouble; + let Inst{23-21} = 0b001; + let Inst{20-16} = src; + let Inst{4-0} = dst; + } + +def A2_tfrrcr : TFR_CR_RS_base<CtrRegs, IntRegs, 0b0>; +def A4_tfrpcp : TFR_CR_RS_base<CtrRegs64, DoubleRegs, 0b1>; +def : InstAlias<"m0 = $Rs", (A2_tfrrcr C6, IntRegs:$Rs)>; +def : InstAlias<"m1 = $Rs", (A2_tfrrcr C7, IntRegs:$Rs)>; + +let hasSideEffects = 0 in +class TFR_RD_CR_base<RegisterClass RC, RegisterClass CTRC, bit isSingle> + : CRInst <(outs RC:$dst), (ins CTRC:$src), + "$dst = $src", [], "", CR_tc_3x_SLOT3> { + bits<5> dst; + bits<5> src; + + let IClass = 0b0110; + + let Inst{27-26} = 0b10; + let Inst{25} = isSingle; + let Inst{24-21} = 0b0000; + let Inst{20-16} = src; + let Inst{4-0} = dst; + } + +let hasNewValue = 1, opNewValue = 0 in +def A2_tfrcrr : TFR_RD_CR_base<IntRegs, CtrRegs, 1>; +def A4_tfrcpp : TFR_RD_CR_base<DoubleRegs, CtrRegs64, 0>; +def : InstAlias<"$Rd = m0", (A2_tfrcrr IntRegs:$Rd, C6)>; +def : InstAlias<"$Rd = m1", (A2_tfrcrr IntRegs:$Rd, C7)>; + +// Y4_trace: Send value to etm trace. +let isSoloAX = 1, hasSideEffects = 0 in +def Y4_trace: CRInst <(outs), (ins IntRegs:$Rs), + "trace($Rs)"> { + bits<5> Rs; + + let IClass = 0b0110; + let Inst{27-21} = 0b0010010; + let Inst{20-16} = Rs; + } + +let AddedComplexity = 100, isPredicated = 1, isCodeGenOnly = 1 in def TFR_condset_ri : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, s12Imm:$src3), "Error; should not emit", @@ -1934,7 +4684,7 @@ def TFR_condset_ri : ALU32_rr<(outs IntRegs:$dst), (i32 (select (i1 PredRegs:$src1), (i32 IntRegs:$src2), s12ImmPred:$src3)))]>; -let AddedComplexity = 100, isPredicated = 1 in +let AddedComplexity = 100, isPredicated = 1, isCodeGenOnly = 1 in def TFR_condset_ir : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, s12Imm:$src2, IntRegs:$src3), "Error; should not emit", @@ -1942,7 +4692,7 @@ def TFR_condset_ir : ALU32_rr<(outs IntRegs:$dst), (i32 (select (i1 PredRegs:$src1), s12ImmPred:$src2, (i32 IntRegs:$src3))))]>; -let AddedComplexity = 100, isPredicated = 1 in +let AddedComplexity = 100, isPredicated = 1, isCodeGenOnly = 1 in def TFR_condset_ii : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, s12Imm:$src2, s12Imm:$src3), "Error; should not emit", @@ -1951,115 +4701,109 @@ def TFR_condset_ii : ALU32_rr<(outs IntRegs:$dst), s12ImmPred:$src3)))]>; // Generate frameindex addresses. -let isReMaterializable = 1 in +let isReMaterializable = 1, isCodeGenOnly = 1 in def TFR_FI : ALU32_ri<(outs IntRegs:$dst), (ins FrameIndex:$src1), "$dst = add($src1)", [(set (i32 IntRegs:$dst), ADDRri:$src1)]>; -// -// CR - Type. -// -let neverHasSideEffects = 1, Defs = [SA0, LC0] in { -def LOOP0_i : CRInst<(outs), (ins brtarget:$offset, u10Imm:$src2), - "loop0($offset, #$src2)", - []>; -} +// Support for generating global address. +// Taken from X86InstrInfo.td. +def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, + SDTCisVT<1, i32>, + SDTCisPtrTy<0>]>; +def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>; +def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>; -let neverHasSideEffects = 1, Defs = [SA0, LC0] in { -def LOOP0_r : CRInst<(outs), (ins brtarget:$offset, IntRegs:$src2), - "loop0($offset, $src2)", - []>; -} +// HI/LO Instructions +let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0, + hasNewValue = 1, opNewValue = 0 in +class REG_IMMED<string RegHalf, string Op, bit Rs, bits<3> MajOp, bit MinOp> + : ALU32_ri<(outs IntRegs:$dst), + (ins i32imm:$imm_value), + "$dst"#RegHalf#" = #"#Op#"($imm_value)", []> { + bits<5> dst; + bits<32> imm_value; + let IClass = 0b0111; -let isBranch = 1, isTerminator = 1, neverHasSideEffects = 1, - Defs = [PC, LC0], Uses = [SA0, LC0] in { -def ENDLOOP0 : Endloop<(outs), (ins brtarget:$offset), - ":endloop0", - []>; + let Inst{27} = Rs; + let Inst{26-24} = MajOp; + let Inst{21} = MinOp; + let Inst{20-16} = dst; + let Inst{23-22} = !if (!eq(Op, "LO"), imm_value{15-14}, imm_value{31-30}); + let Inst{13-0} = !if (!eq(Op, "LO"), imm_value{13-0}, imm_value{29-16}); } -// Support for generating global address. -// Taken from X86InstrInfo.td. -def SDTHexagonCONST32 : SDTypeProfile<1, 1, [ - SDTCisVT<0, i32>, - SDTCisVT<1, i32>, - SDTCisPtrTy<0>]>; -def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>; -def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>; +let isAsmParserOnly = 1 in { + def LO : REG_IMMED<".l", "LO", 0b0, 0b001, 0b1>; + def LO_H : REG_IMMED<".l", "HI", 0b0, 0b001, 0b1>; + def HI : REG_IMMED<".h", "HI", 0b0, 0b010, 0b1>; + def HI_L : REG_IMMED<".h", "LO", 0b0, 0b010, 0b1>; +} -// HI/LO Instructions -let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in -def LO : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global), - "$dst.l = #LO($global)", - []>; +let isMoveImm = 1, isCodeGenOnly = 1 in +def LO_PIC : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label), + "$dst.l = #LO($label@GOTREL)", + []>; -let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in -def HI : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global), - "$dst.h = #HI($global)", - []>; +let isMoveImm = 1, isCodeGenOnly = 1 in +def HI_PIC : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label), + "$dst.h = #HI($label@GOTREL)", + []>; -let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in +let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0, + isAsmParserOnly = 1 in def LOi : ALU32_ri<(outs IntRegs:$dst), (ins i32imm:$imm_value), "$dst.l = #LO($imm_value)", []>; -let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in +let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0, + isAsmParserOnly = 1 in def HIi : ALU32_ri<(outs IntRegs:$dst), (ins i32imm:$imm_value), "$dst.h = #HI($imm_value)", []>; -let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in +let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0, + isAsmParserOnly = 1 in def LO_jt : ALU32_ri<(outs IntRegs:$dst), (ins jumptablebase:$jt), "$dst.l = #LO($jt)", []>; -let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in +let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0, + isAsmParserOnly = 1 in def HI_jt : ALU32_ri<(outs IntRegs:$dst), (ins jumptablebase:$jt), "$dst.h = #HI($jt)", []>; - -let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in -def LO_label : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label), - "$dst.l = #LO($label)", - []>; - -let isReMaterializable = 1, isMoveImm = 1 , neverHasSideEffects = 1 in -def HI_label : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label), - "$dst.h = #HI($label)", - []>; - // This pattern is incorrect. When we add small data, we should change // this pattern to use memw(#foo). // This is for sdata. -let isMoveImm = 1 in -def CONST32 : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global), +let isMoveImm = 1, isAsmParserOnly = 1 in +def CONST32 : CONSTLDInst<(outs IntRegs:$dst), (ins globaladdress:$global), "$dst = CONST32(#$global)", [(set (i32 IntRegs:$dst), (load (HexagonCONST32 tglobaltlsaddr:$global)))]>; -// This is for non-sdata. let isReMaterializable = 1, isMoveImm = 1 in def CONST32_set : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global), "$dst = CONST32(#$global)", [(set (i32 IntRegs:$dst), (HexagonCONST32 tglobaladdr:$global))]>; -let isReMaterializable = 1, isMoveImm = 1 in -def CONST32_set_jt : LDInst2<(outs IntRegs:$dst), (ins jumptablebase:$jt), +let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in +def CONST32_set_jt : CONSTLDInst<(outs IntRegs:$dst), (ins jumptablebase:$jt), "$dst = CONST32(#$jt)", [(set (i32 IntRegs:$dst), (HexagonCONST32 tjumptable:$jt))]>; -let isReMaterializable = 1, isMoveImm = 1 in +let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in def CONST32GP_set : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global), "$dst = CONST32(#$global)", [(set (i32 IntRegs:$dst), (HexagonCONST32_GP tglobaladdr:$global))]>; -let isReMaterializable = 1, isMoveImm = 1 in -def CONST32_Int_Real : LDInst2<(outs IntRegs:$dst), (ins i32imm:$global), +let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in +def CONST32_Int_Real : CONSTLDInst<(outs IntRegs:$dst), (ins i32imm:$global), "$dst = CONST32(#$global)", [(set (i32 IntRegs:$dst), imm:$global) ]>; @@ -2067,839 +4811,921 @@ def CONST32_Int_Real : LDInst2<(outs IntRegs:$dst), (ins i32imm:$global), def : Pat<(HexagonCONST32_GP tblockaddress:$addr), (CONST32_Int_Real tblockaddress:$addr)>; -let isReMaterializable = 1, isMoveImm = 1 in +let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in def CONST32_Label : LDInst2<(outs IntRegs:$dst), (ins bblabel:$label), "$dst = CONST32($label)", [(set (i32 IntRegs:$dst), (HexagonCONST32 bbl:$label))]>; -let isReMaterializable = 1, isMoveImm = 1 in -def CONST64_Int_Real : LDInst2<(outs DoubleRegs:$dst), (ins i64imm:$global), +let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in +def CONST64_Int_Real : CONSTLDInst<(outs DoubleRegs:$dst), (ins i64imm:$global), "$dst = CONST64(#$global)", - [(set (i64 DoubleRegs:$dst), imm:$global) ]>; + [(set (i64 DoubleRegs:$dst), imm:$global)]>; -def TFR_PdFalse : SInst<(outs PredRegs:$dst), (ins), - "$dst = xor($dst, $dst)", - [(set (i1 PredRegs:$dst), 0)]>; +let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1, + isCodeGenOnly = 1 in +def TFR_PdTrue : SInst<(outs PredRegs:$dst), (ins), "", + [(set (i1 PredRegs:$dst), 1)]>; -def MPY_trsext : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - "$dst = mpy($src1, $src2)", - [(set (i32 IntRegs:$dst), - (trunc (i64 (srl (i64 (mul (i64 (sext (i32 IntRegs:$src1))), - (i64 (sext (i32 IntRegs:$src2))))), - (i32 32)))))]>; +let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1, + isCodeGenOnly = 1 in +def TFR_PdFalse : SInst<(outs PredRegs:$dst), (ins), "$dst = xor($dst, $dst)", + [(set (i1 PredRegs:$dst), 0)]>; // Pseudo instructions. def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; - -def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, +def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>; -def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; - def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart, [SDNPHasChain, SDNPOutGlue]>; +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; -def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; - -def call : SDNode<"HexagonISD::CALL", SDT_SPCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; // For tailcalls a HexagonTCRet SDNode has 3 SDNode Properties - a chain, // Optional Flag and Variable Arguments. // Its 1 Operand has pointer type. -def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; - -let Defs = [R29, R30], Uses = [R31, R30, R29] in { - def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt), - "Should never be emitted", - [(callseq_start timm:$amt)]>; -} - -let Defs = [R29, R30, R31], Uses = [R29] in { - def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), - "Should never be emitted", - [(callseq_end timm:$amt1, timm:$amt2)]>; -} -// Call subroutine. -let isCall = 1, neverHasSideEffects = 1, - Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, - R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { - def CALL : JInst<(outs), (ins calltarget:$dst), - "call $dst", []>; -} - -// Call subroutine from register. -let isCall = 1, neverHasSideEffects = 1, - Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, - R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { - def CALLR : JRInst<(outs), (ins IntRegs:$dst), - "callr $dst", - []>; - } +def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +let Defs = [R29, R30], Uses = [R31, R30, R29], isPseudo = 1 in +def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt), + ".error \"should not emit\" ", + [(callseq_start timm:$amt)]>; +let Defs = [R29, R30, R31], Uses = [R29], isPseudo = 1 in +def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), + ".error \"should not emit\" ", + [(callseq_end timm:$amt1, timm:$amt2)]>; + +// Call subroutine indirectly. +let Defs = VolatileV3.Regs in +def J2_callr : JUMPR_MISC_CALLR<0, 1>; // Indirect tail-call. -let isCodeGenOnly = 1, isCall = 1, isReturn = 1 in -def TCRETURNR : T_JMPr; +let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0, + isTerminator = 1, isCodeGenOnly = 1 in +def TCRETURNr : T_JMPr; // Direct tail-calls. let isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0, isTerminator = 1, isCodeGenOnly = 1 in { - def TCRETURNtg : T_JMP<(ins calltarget:$dst)>; - def TCRETURNtext : T_JMP<(ins calltarget:$dst)>; + def TCRETURNtg : JInst<(outs), (ins calltarget:$dst), "jump $dst", + [], "", J_tc_2early_SLOT23>; + def TCRETURNtext : JInst<(outs), (ins calltarget:$dst), "jump $dst", + [], "", J_tc_2early_SLOT23>; } -// Map call instruction. -def : Pat<(call (i32 IntRegs:$dst)), - (CALLR (i32 IntRegs:$dst))>, Requires<[HasV2TOnly]>; -def : Pat<(call tglobaladdr:$dst), - (CALL tglobaladdr:$dst)>, Requires<[HasV2TOnly]>; -def : Pat<(call texternalsym:$dst), - (CALL texternalsym:$dst)>, Requires<[HasV2TOnly]>; //Tail calls. -def : Pat<(HexagonTCRet tglobaladdr:$dst), - (TCRETURNtg tglobaladdr:$dst)>; -def : Pat<(HexagonTCRet texternalsym:$dst), - (TCRETURNtext texternalsym:$dst)>; -def : Pat<(HexagonTCRet (i32 IntRegs:$dst)), - (TCRETURNR (i32 IntRegs:$dst))>; - -// Atomic load and store support -// 8 bit atomic load -def : Pat<(atomic_load_8 ADDRriS11_0:$src1), - (i32 (LDriub ADDRriS11_0:$src1))>; - -def : Pat<(atomic_load_8 (add (i32 IntRegs:$src1), s11_0ImmPred:$offset)), - (i32 (LDriub_indexed (i32 IntRegs:$src1), s11_0ImmPred:$offset))>; - -// 16 bit atomic load -def : Pat<(atomic_load_16 ADDRriS11_1:$src1), - (i32 (LDriuh ADDRriS11_1:$src1))>; - -def : Pat<(atomic_load_16 (add (i32 IntRegs:$src1), s11_1ImmPred:$offset)), - (i32 (LDriuh_indexed (i32 IntRegs:$src1), s11_1ImmPred:$offset))>; - -def : Pat<(atomic_load_32 ADDRriS11_2:$src1), - (i32 (LDriw ADDRriS11_2:$src1))>; - -def : Pat<(atomic_load_32 (add (i32 IntRegs:$src1), s11_2ImmPred:$offset)), - (i32 (LDriw_indexed (i32 IntRegs:$src1), s11_2ImmPred:$offset))>; - -// 64 bit atomic load -def : Pat<(atomic_load_64 ADDRriS11_3:$src1), - (i64 (LDrid ADDRriS11_3:$src1))>; - -def : Pat<(atomic_load_64 (add (i32 IntRegs:$src1), s11_3ImmPred:$offset)), - (i64 (LDrid_indexed (i32 IntRegs:$src1), s11_3ImmPred:$offset))>; - - -def : Pat<(atomic_store_8 ADDRriS11_0:$src2, (i32 IntRegs:$src1)), - (STrib ADDRriS11_0:$src2, (i32 IntRegs:$src1))>; - -def : Pat<(atomic_store_8 (add (i32 IntRegs:$src2), s11_0ImmPred:$offset), - (i32 IntRegs:$src1)), - (STrib_indexed (i32 IntRegs:$src2), s11_0ImmPred:$offset, - (i32 IntRegs:$src1))>; - - -def : Pat<(atomic_store_16 ADDRriS11_1:$src2, (i32 IntRegs:$src1)), - (STrih ADDRriS11_1:$src2, (i32 IntRegs:$src1))>; - -def : Pat<(atomic_store_16 (i32 IntRegs:$src1), - (add (i32 IntRegs:$src2), s11_1ImmPred:$offset)), - (STrih_indexed (i32 IntRegs:$src2), s11_1ImmPred:$offset, - (i32 IntRegs:$src1))>; - -def : Pat<(atomic_store_32 ADDRriS11_2:$src2, (i32 IntRegs:$src1)), - (STriw ADDRriS11_2:$src2, (i32 IntRegs:$src1))>; - -def : Pat<(atomic_store_32 (add (i32 IntRegs:$src2), s11_2ImmPred:$offset), - (i32 IntRegs:$src1)), - (STriw_indexed (i32 IntRegs:$src2), s11_2ImmPred:$offset, - (i32 IntRegs:$src1))>; - - - - -def : Pat<(atomic_store_64 ADDRriS11_3:$src2, (i64 DoubleRegs:$src1)), - (STrid ADDRriS11_3:$src2, (i64 DoubleRegs:$src1))>; - -def : Pat<(atomic_store_64 (add (i32 IntRegs:$src2), s11_3ImmPred:$offset), - (i64 DoubleRegs:$src1)), - (STrid_indexed (i32 IntRegs:$src2), s11_3ImmPred:$offset, - (i64 DoubleRegs:$src1))>; +def: Pat<(HexagonTCRet tglobaladdr:$dst), + (TCRETURNtg tglobaladdr:$dst)>; +def: Pat<(HexagonTCRet texternalsym:$dst), + (TCRETURNtext texternalsym:$dst)>; +def: Pat<(HexagonTCRet (i32 IntRegs:$dst)), + (TCRETURNr (i32 IntRegs:$dst))>; // Map from r0 = and(r1, 65535) to r0 = zxth(r1) -def : Pat <(and (i32 IntRegs:$src1), 65535), - (ZXTH (i32 IntRegs:$src1))>; +def: Pat<(and (i32 IntRegs:$src1), 65535), + (A2_zxth IntRegs:$src1)>; // Map from r0 = and(r1, 255) to r0 = zxtb(r1). -def : Pat <(and (i32 IntRegs:$src1), 255), - (ZXTB (i32 IntRegs:$src1))>; +def: Pat<(and (i32 IntRegs:$src1), 255), + (A2_zxtb IntRegs:$src1)>; // Map Add(p1, true) to p1 = not(p1). // Add(p1, false) should never be produced, // if it does, it got to be mapped to NOOP. -def : Pat <(add (i1 PredRegs:$src1), -1), - (NOT_p (i1 PredRegs:$src1))>; - -// Map from p0 = setlt(r0, r1) r2 = mux(p0, r3, r4) => -// p0 = cmp.lt(r0, r1), r0 = mux(p0, r2, r1). -// cmp.lt(r0, r1) -> cmp.gt(r1, r0) -def : Pat <(select (i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))), - (i32 IntRegs:$src3), - (i32 IntRegs:$src4)), - (i32 (TFR_condset_rr (CMPGTrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)), - (i32 IntRegs:$src4), (i32 IntRegs:$src3)))>, - Requires<[HasV2TOnly]>; +def: Pat<(add (i1 PredRegs:$src1), -1), + (C2_not PredRegs:$src1)>; // Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i). -def : Pat <(select (not (i1 PredRegs:$src1)), s8ImmPred:$src2, s8ImmPred:$src3), - (i32 (TFR_condset_ii (i1 PredRegs:$src1), s8ImmPred:$src3, - s8ImmPred:$src2))>; +def: Pat<(select (not (i1 PredRegs:$src1)), s8ImmPred:$src2, s8ExtPred:$src3), + (C2_muxii PredRegs:$src1, s8ExtPred:$src3, s8ImmPred:$src2)>; // Map from p0 = pnot(p0); r0 = select(p0, #i, r1) -// => r0 = TFR_condset_ri(p0, r1, #i) -def : Pat <(select (not (i1 PredRegs:$src1)), s12ImmPred:$src2, - (i32 IntRegs:$src3)), - (i32 (TFR_condset_ri (i1 PredRegs:$src1), (i32 IntRegs:$src3), - s12ImmPred:$src2))>; +// => r0 = C2_muxir(p0, r1, #i) +def: Pat<(select (not (i1 PredRegs:$src1)), s8ExtPred:$src2, + (i32 IntRegs:$src3)), + (C2_muxir PredRegs:$src1, IntRegs:$src3, s8ExtPred:$src2)>; // Map from p0 = pnot(p0); r0 = mux(p0, r1, #i) -// => r0 = TFR_condset_ir(p0, #i, r1) -def : Pat <(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s12ImmPred:$src3), - (i32 (TFR_condset_ir (i1 PredRegs:$src1), s12ImmPred:$src3, - (i32 IntRegs:$src2)))>; +// => r0 = C2_muxri (p0, #i, r1) +def: Pat<(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s8ExtPred:$src3), + (C2_muxri PredRegs:$src1, s8ExtPred:$src3, IntRegs:$src2)>; // Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump. -def : Pat <(brcond (not (i1 PredRegs:$src1)), bb:$offset), - (JMP_f (i1 PredRegs:$src1), bb:$offset)>; +def: Pat<(brcond (not (i1 PredRegs:$src1)), bb:$offset), + (J2_jumpf PredRegs:$src1, bb:$offset)>; -// Map from p2 = pnot(p2); p1 = and(p0, p2) => p1 = and(p0, !p2). -def : Pat <(and (i1 PredRegs:$src1), (not (i1 PredRegs:$src2))), - (i1 (AND_pnotp (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>; +// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = A2_sxtw(Rss.lo). +def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i32)), + (A2_sxtw (LoReg DoubleRegs:$src1))>; +// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = A2_sxtw(A2_sxth(Rss.lo)). +def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i16)), + (A2_sxtw (A2_sxth (LoReg DoubleRegs:$src1)))>; -let AddedComplexity = 100 in -def : Pat <(i64 (zextloadi1 (HexagonCONST32 tglobaladdr:$global))), - (i64 (COMBINE_rr (TFRI 0), - (LDriub_indexed (CONST32_set tglobaladdr:$global), 0)))>, - Requires<[NoV4T]>; - -// Map from i1 loads to 32 bits. This assumes that the i1* is byte aligned. -let AddedComplexity = 10 in -def : Pat <(i32 (zextloadi1 ADDRriS11_0:$addr)), - (i32 (A2_and (i32 (LDrib ADDRriS11_0:$addr)), (TFRI 0x1)))>; - -// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = SXTW(Rss.lo). -def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i32)), - (i64 (SXTW (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg))))>; - -// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = SXTW(SXTH(Rss.lo)). -def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i16)), - (i64 (SXTW (i32 (SXTH (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), - subreg_loreg))))))>; - -// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = SXTW(SXTB(Rss.lo)). -def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i8)), - (i64 (SXTW (i32 (SXTB (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), - subreg_loreg))))))>; +// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = A2_sxtw(A2_sxtb(Rss.lo)). +def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i8)), + (A2_sxtw (A2_sxtb (LoReg DoubleRegs:$src1)))>; // We want to prevent emitting pnot's as much as possible. -// Map brcond with an unsupported setcc to a JMP_f. +// Map brcond with an unsupported setcc to a J2_jumpf. def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))), bb:$offset), - (JMP_f (CMPEQrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)), + (J2_jumpf (C2_cmpeq (i32 IntRegs:$src1), (i32 IntRegs:$src2)), bb:$offset)>; def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), s10ImmPred:$src2)), bb:$offset), - (JMP_f (CMPEQri (i32 IntRegs:$src1), s10ImmPred:$src2), bb:$offset)>; + (J2_jumpf (C2_cmpeqi (i32 IntRegs:$src1), s10ImmPred:$src2), bb:$offset)>; -def : Pat <(brcond (i1 (setne (i1 PredRegs:$src1), (i1 -1))), bb:$offset), - (JMP_f (i1 PredRegs:$src1), bb:$offset)>; +def: Pat<(brcond (i1 (setne (i1 PredRegs:$src1), (i1 -1))), bb:$offset), + (J2_jumpf PredRegs:$src1, bb:$offset)>; -def : Pat <(brcond (i1 (setne (i1 PredRegs:$src1), (i1 0))), bb:$offset), - (JMP_t (i1 PredRegs:$src1), bb:$offset)>; +def: Pat<(brcond (i1 (setne (i1 PredRegs:$src1), (i1 0))), bb:$offset), + (J2_jumpt PredRegs:$src1, bb:$offset)>; // cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1) -def : Pat <(brcond (i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)), - bb:$offset), - (JMP_f (CMPGTri (i32 IntRegs:$src1), - (DEC_CONST_SIGNED s8ImmPred:$src2)), bb:$offset)>; +def: Pat<(brcond (i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)), bb:$offset), + (J2_jumpf (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s8ImmPred:$src2)), + bb:$offset)>; // cmp.lt(r0, r1) -> cmp.gt(r1, r0) def : Pat <(brcond (i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))), bb:$offset), - (JMP_t (CMPGTrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)), bb:$offset)>; + (J2_jumpt (C2_cmpgt (i32 IntRegs:$src2), (i32 IntRegs:$src1)), bb:$offset)>; def : Pat <(brcond (i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), bb:$offset), - (JMP_f (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)), + (J2_jumpf (C2_cmpgtup (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)), bb:$offset)>; def : Pat <(brcond (i1 (setule (i32 IntRegs:$src1), (i32 IntRegs:$src2))), bb:$offset), - (JMP_f (CMPGTUrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)), + (J2_jumpf (C2_cmpgtu (i32 IntRegs:$src1), (i32 IntRegs:$src2)), bb:$offset)>; def : Pat <(brcond (i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), bb:$offset), - (JMP_f (CMPGTU64rr (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)), + (J2_jumpf (C2_cmpgtup (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)), bb:$offset)>; // Map from a 64-bit select to an emulated 64-bit mux. // Hexagon does not support 64-bit MUXes; so emulate with combines. -def : Pat <(select (i1 PredRegs:$src1), (i64 DoubleRegs:$src2), - (i64 DoubleRegs:$src3)), - (i64 (COMBINE_rr (i32 (MUX_rr (i1 PredRegs:$src1), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), - subreg_hireg)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src3), - subreg_hireg)))), - (i32 (MUX_rr (i1 PredRegs:$src1), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), - subreg_loreg)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src3), - subreg_loreg))))))>; +def: Pat<(select (i1 PredRegs:$src1), (i64 DoubleRegs:$src2), + (i64 DoubleRegs:$src3)), + (A2_combinew (C2_mux PredRegs:$src1, (HiReg DoubleRegs:$src2), + (HiReg DoubleRegs:$src3)), + (C2_mux PredRegs:$src1, (LoReg DoubleRegs:$src2), + (LoReg DoubleRegs:$src3)))>; // Map from a 1-bit select to logical ops. // From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3). -def : Pat <(select (i1 PredRegs:$src1), (i1 PredRegs:$src2), - (i1 PredRegs:$src3)), - (OR_pp (AND_pp (i1 PredRegs:$src1), (i1 PredRegs:$src2)), - (AND_pp (NOT_p (i1 PredRegs:$src1)), (i1 PredRegs:$src3)))>; +def: Pat<(select (i1 PredRegs:$src1), (i1 PredRegs:$src2), (i1 PredRegs:$src3)), + (C2_or (C2_and PredRegs:$src1, PredRegs:$src2), + (C2_and (C2_not PredRegs:$src1), PredRegs:$src3))>; // Map Pd = load(addr) -> Rs = load(addr); Pd = Rs. def : Pat<(i1 (load ADDRriS11_2:$addr)), - (i1 (TFR_PdRs (i32 (LDrib ADDRriS11_2:$addr))))>; + (i1 (C2_tfrrp (i32 (L2_loadrb_io AddrFI:$addr, 0))))>; // Map for truncating from 64 immediates to 32 bit immediates. -def : Pat<(i32 (trunc (i64 DoubleRegs:$src))), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), subreg_loreg))>; +def: Pat<(i32 (trunc (i64 DoubleRegs:$src))), + (LoReg DoubleRegs:$src)>; // Map for truncating from i64 immediates to i1 bit immediates. -def : Pat<(i1 (trunc (i64 DoubleRegs:$src))), - (i1 (TFR_PdRs (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), - subreg_loreg))))>; +def: Pat<(i1 (trunc (i64 DoubleRegs:$src))), + (C2_tfrrp (LoReg DoubleRegs:$src))>; // Map memb(Rs) = Rdd -> memb(Rs) = Rt. def : Pat<(truncstorei8 (i64 DoubleRegs:$src), ADDRriS11_0:$addr), - (STrib ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), + (S2_storerb_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), subreg_loreg)))>; // Map memh(Rs) = Rdd -> memh(Rs) = Rt. def : Pat<(truncstorei16 (i64 DoubleRegs:$src), ADDRriS11_0:$addr), - (STrih ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), + (S2_storerh_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), subreg_loreg)))>; // Map memw(Rs) = Rdd -> memw(Rs) = Rt def : Pat<(truncstorei32 (i64 DoubleRegs:$src), ADDRriS11_0:$addr), - (STriw ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), + (S2_storeri_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), subreg_loreg)))>; // Map memw(Rs) = Rdd -> memw(Rs) = Rt. def : Pat<(truncstorei32 (i64 DoubleRegs:$src), ADDRriS11_0:$addr), - (STriw ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), + (S2_storeri_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), subreg_loreg)))>; // Map from i1 = constant<-1>; memw(addr) = i1 -> r0 = 1; memw(addr) = r0. def : Pat<(store (i1 -1), ADDRriS11_2:$addr), - (STrib ADDRriS11_2:$addr, (TFRI 1))>; + (S2_storerb_io AddrFI:$addr, 0, (A2_tfrsi 1))>; // Map from i1 = constant<-1>; store i1 -> r0 = 1; store r0. def : Pat<(store (i1 -1), ADDRriS11_2:$addr), - (STrib ADDRriS11_2:$addr, (TFRI 1))>; + (S2_storerb_io AddrFI:$addr, 0, (A2_tfrsi 1))>; // Map from memb(Rs) = Pd -> Rt = mux(Pd, #0, #1); store Rt. def : Pat<(store (i1 PredRegs:$src1), ADDRriS11_2:$addr), - (STrib ADDRriS11_2:$addr, (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0)) )>; - -// Map Rdd = anyext(Rs) -> Rdd = sxtw(Rs). -// Hexagon_TODO: We can probably use combine but that will cost 2 instructions. -// Better way to do this? -def : Pat<(i64 (anyext (i32 IntRegs:$src1))), - (i64 (SXTW (i32 IntRegs:$src1)))>; + (S2_storerb_io AddrFI:$addr, 0, (i32 (C2_muxii (i1 PredRegs:$src1), 1, 0)) )>; -// Map cmple -> cmpgt. // rs <= rt -> !(rs > rt). -def : Pat<(i1 (setle (i32 IntRegs:$src1), s10ExtPred:$src2)), - (i1 (NOT_p (CMPGTri (i32 IntRegs:$src1), s10ExtPred:$src2)))>; +let AddedComplexity = 30 in +def: Pat<(i1 (setle (i32 IntRegs:$src1), s10ExtPred:$src2)), + (C2_not (C2_cmpgti IntRegs:$src1, s10ExtPred:$src2))>; // rs <= rt -> !(rs > rt). def : Pat<(i1 (setle (i32 IntRegs:$src1), (i32 IntRegs:$src2))), - (i1 (NOT_p (CMPGTrr (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>; + (i1 (C2_not (C2_cmpgt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>; // Rss <= Rtt -> !(Rss > Rtt). -def : Pat<(i1 (setle (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), - (i1 (NOT_p (CMPGT64rr (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))))>; +def: Pat<(i1 (setle (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (C2_not (C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2))>; // Map cmpne -> cmpeq. // Hexagon_TODO: We should improve on this. // rs != rt -> !(rs == rt). -def : Pat <(i1 (setne (i32 IntRegs:$src1), s10ExtPred:$src2)), - (i1 (NOT_p(i1 (CMPEQri (i32 IntRegs:$src1), s10ExtPred:$src2))))>; +let AddedComplexity = 30 in +def: Pat<(i1 (setne (i32 IntRegs:$src1), s10ExtPred:$src2)), + (C2_not (C2_cmpeqi IntRegs:$src1, s10ExtPred:$src2))>; // Map cmpne(Rs) -> !cmpeqe(Rs). // rs != rt -> !(rs == rt). def : Pat <(i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))), - (i1 (NOT_p (i1 (CMPEQrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)))))>; + (i1 (C2_not (i1 (C2_cmpeq (i32 IntRegs:$src1), (i32 IntRegs:$src2)))))>; // Convert setne back to xor for hexagon since we compute w/ pred registers. -def : Pat <(i1 (setne (i1 PredRegs:$src1), (i1 PredRegs:$src2))), - (i1 (XOR_pp (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>; +def: Pat<(i1 (setne (i1 PredRegs:$src1), (i1 PredRegs:$src2))), + (C2_xor PredRegs:$src1, PredRegs:$src2)>; // Map cmpne(Rss) -> !cmpew(Rss). // rs != rt -> !(rs == rt). -def : Pat <(i1 (setne (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), - (i1 (NOT_p (i1 (CMPEHexagon4rr (i64 DoubleRegs:$src1), - (i64 DoubleRegs:$src2)))))>; +def: Pat<(i1 (setne (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (C2_not (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2))>; // Map cmpge(Rs, Rt) -> !(cmpgt(Rs, Rt). // rs >= rt -> !(rt > rs). def : Pat <(i1 (setge (i32 IntRegs:$src1), (i32 IntRegs:$src2))), - (i1 (NOT_p (i1 (CMPGTrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)))))>; + (i1 (C2_not (i1 (C2_cmpgt (i32 IntRegs:$src2), (i32 IntRegs:$src1)))))>; // cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1) -def : Pat <(i1 (setge (i32 IntRegs:$src1), s8ExtPred:$src2)), - (i1 (CMPGTri (i32 IntRegs:$src1), (DEC_CONST_SIGNED s8ExtPred:$src2)))>; +let AddedComplexity = 30 in +def: Pat<(i1 (setge (i32 IntRegs:$src1), s8ExtPred:$src2)), + (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s8ExtPred:$src2))>; // Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss). // rss >= rtt -> !(rtt > rss). -def : Pat <(i1 (setge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), - (i1 (NOT_p (i1 (CMPGT64rr (i64 DoubleRegs:$src2), - (i64 DoubleRegs:$src1)))))>; +def: Pat<(i1 (setge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (C2_not (C2_cmpgtp DoubleRegs:$src2, DoubleRegs:$src1))>; // Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm). // !cmpge(Rs, Imm) -> !cmpgt(Rs, Imm-1). // rs < rt -> !(rs >= rt). -def : Pat <(i1 (setlt (i32 IntRegs:$src1), s8ExtPred:$src2)), - (i1 (NOT_p (CMPGTri (i32 IntRegs:$src1), (DEC_CONST_SIGNED s8ExtPred:$src2))))>; - -// Map cmplt(Rs, Rt) -> cmpgt(Rt, Rs). -// rs < rt -> rt > rs. -// We can let assembler map it, or we can do in the compiler itself. -def : Pat <(i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))), - (i1 (CMPGTrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)))>; - -// Map cmplt(Rss, Rtt) -> cmpgt(Rtt, Rss). -// rss < rtt -> (rtt > rss). -def : Pat <(i1 (setlt (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), - (i1 (CMPGT64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)))>; - -// Map from cmpltu(Rs, Rd) -> cmpgtu(Rd, Rs) -// rs < rt -> rt > rs. -// We can let assembler map it, or we can do in the compiler itself. -def : Pat <(i1 (setult (i32 IntRegs:$src1), (i32 IntRegs:$src2))), - (i1 (CMPGTUrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)))>; - -// Map from cmpltu(Rss, Rdd) -> cmpgtu(Rdd, Rss). -// rs < rt -> rt > rs. -def : Pat <(i1 (setult (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), - (i1 (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)))>; +let AddedComplexity = 30 in +def: Pat<(i1 (setlt (i32 IntRegs:$src1), s8ExtPred:$src2)), + (C2_not (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s8ExtPred:$src2)))>; // Generate cmpgeu(Rs, #0) -> cmpeq(Rs, Rs) -def : Pat <(i1 (setuge (i32 IntRegs:$src1), 0)), - (i1 (CMPEQrr (i32 IntRegs:$src1), (i32 IntRegs:$src1)))>; +def: Pat<(i1 (setuge (i32 IntRegs:$src1), 0)), + (C2_cmpeq IntRegs:$src1, IntRegs:$src1)>; // Generate cmpgeu(Rs, #u8) -> cmpgtu(Rs, #u8 -1) -def : Pat <(i1 (setuge (i32 IntRegs:$src1), u8ExtPred:$src2)), - (i1 (CMPGTUri (i32 IntRegs:$src1), (DEC_CONST_UNSIGNED u8ExtPred:$src2)))>; +def: Pat<(i1 (setuge (i32 IntRegs:$src1), u8ExtPred:$src2)), + (C2_cmpgtui IntRegs:$src1, (DEC_CONST_UNSIGNED u8ExtPred:$src2))>; // Generate cmpgtu(Rs, #u9) -def : Pat <(i1 (setugt (i32 IntRegs:$src1), u9ExtPred:$src2)), - (i1 (CMPGTUri (i32 IntRegs:$src1), u9ExtPred:$src2))>; - -// Map from Rs >= Rt -> !(Rt > Rs). -// rs >= rt -> !(rt > rs). -def : Pat <(i1 (setuge (i32 IntRegs:$src1), (i32 IntRegs:$src2))), - (i1 (NOT_p (CMPGTUrr (i32 IntRegs:$src2), (i32 IntRegs:$src1))))>; +def: Pat<(i1 (setugt (i32 IntRegs:$src1), u9ExtPred:$src2)), + (C2_cmpgtui IntRegs:$src1, u9ExtPred:$src2)>; // Map from Rs >= Rt -> !(Rt > Rs). // rs >= rt -> !(rt > rs). -def : Pat <(i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), - (i1 (NOT_p (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1))))>; - -// Map from cmpleu(Rs, Rt) -> !cmpgtu(Rs, Rt). -// Map from (Rs <= Rt) -> !(Rs > Rt). -def : Pat <(i1 (setule (i32 IntRegs:$src1), (i32 IntRegs:$src2))), - (i1 (NOT_p (CMPGTUrr (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>; +def: Pat<(i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (C2_not (C2_cmpgtup DoubleRegs:$src2, DoubleRegs:$src1))>; // Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1). // Map from (Rs <= Rt) -> !(Rs > Rt). -def : Pat <(i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), - (i1 (NOT_p (CMPGTU64rr (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))))>; +def: Pat<(i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (C2_not (C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2))>; // Sign extends. // i1 -> i32 -def : Pat <(i32 (sext (i1 PredRegs:$src1))), - (i32 (MUX_ii (i1 PredRegs:$src1), -1, 0))>; +def: Pat<(i32 (sext (i1 PredRegs:$src1))), + (C2_muxii PredRegs:$src1, -1, 0)>; // i1 -> i64 -def : Pat <(i64 (sext (i1 PredRegs:$src1))), - (i64 (COMBINE_rr (TFRI -1), (MUX_ii (i1 PredRegs:$src1), -1, 0)))>; - -// Convert sign-extended load back to load and sign extend. -// i8 -> i64 -def: Pat <(i64 (sextloadi8 ADDRriS11_0:$src1)), - (i64 (SXTW (LDrib ADDRriS11_0:$src1)))>; - -// Convert any-extended load back to load and sign extend. -// i8 -> i64 -def: Pat <(i64 (extloadi8 ADDRriS11_0:$src1)), - (i64 (SXTW (LDrib ADDRriS11_0:$src1)))>; - -// Convert sign-extended load back to load and sign extend. -// i16 -> i64 -def: Pat <(i64 (sextloadi16 ADDRriS11_1:$src1)), - (i64 (SXTW (LDrih ADDRriS11_1:$src1)))>; +def: Pat<(i64 (sext (i1 PredRegs:$src1))), + (A2_combinew (A2_tfrsi -1), (C2_muxii PredRegs:$src1, -1, 0))>; // Convert sign-extended load back to load and sign extend. // i32 -> i64 def: Pat <(i64 (sextloadi32 ADDRriS11_2:$src1)), - (i64 (SXTW (LDriw ADDRriS11_2:$src1)))>; - + (i64 (A2_sxtw (L2_loadri_io AddrFI:$src1, 0)))>; // Zero extends. // i1 -> i32 -def : Pat <(i32 (zext (i1 PredRegs:$src1))), - (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0))>; +def: Pat<(i32 (zext (i1 PredRegs:$src1))), + (C2_muxii PredRegs:$src1, 1, 0)>; -// i1 -> i64 -def : Pat <(i64 (zext (i1 PredRegs:$src1))), - (i64 (COMBINE_rr (TFRI 0), (MUX_ii (i1 PredRegs:$src1), 1, 0)))>, - Requires<[NoV4T]>; +// Map from Rs = Pd to Pd = mux(Pd, #1, #0) +def: Pat<(i32 (anyext (i1 PredRegs:$src1))), + (C2_muxii PredRegs:$src1, 1, 0)>; -// i32 -> i64 -def : Pat <(i64 (zext (i32 IntRegs:$src1))), - (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>, - Requires<[NoV4T]>; +// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0)) +def: Pat<(i64 (anyext (i1 PredRegs:$src1))), + (A2_sxtw (C2_muxii PredRegs:$src1, 1, 0))>; -// i8 -> i64 -def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)), - (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>, - Requires<[NoV4T]>; +def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh), + (i32 32))), + (i64 (zextloadi32 ADDRriS11_2:$srcLow)))), + (i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg), + (L2_loadri_io AddrFI:$srcLow, 0)))>; -let AddedComplexity = 20 in -def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1), - s11_0ExtPred:$offset))), - (i64 (COMBINE_rr (TFRI 0), (LDriub_indexed IntRegs:$src1, - s11_0ExtPred:$offset)))>, - Requires<[NoV4T]>; +// Multiply 64-bit unsigned and use upper result. +def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)), + (A2_addp + (M2_dpmpyuu_acc_s0 + (S2_lsr_i_p + (A2_addp + (M2_dpmpyuu_acc_s0 + (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $src1), (LoReg $src2)), 32), + (HiReg $src1), + (LoReg $src2)), + (A2_combinew (A2_tfrsi 0), + (LoReg (M2_dpmpyuu_s0 (LoReg $src1), (HiReg $src2))))), + 32), + (HiReg $src1), + (HiReg $src2)), + (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $src1), (HiReg $src2)), 32) +)>; -// i1 -> i64 -def: Pat <(i64 (zextloadi1 ADDRriS11_0:$src1)), - (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>, - Requires<[NoV4T]>; +// Hexagon specific ISD nodes. +def SDTHexagonADJDYNALLOC : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, + SDTCisVT<1, i32>]>; +def SDTHexagonARGEXTEND : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>; -let AddedComplexity = 20 in -def: Pat <(i64 (zextloadi1 (add (i32 IntRegs:$src1), - s11_0ExtPred:$offset))), - (i64 (COMBINE_rr (TFRI 0), (LDriub_indexed IntRegs:$src1, - s11_0ExtPred:$offset)))>, - Requires<[NoV4T]>; +def Hexagon_ADJDYNALLOC : SDNode<"HexagonISD::ADJDYNALLOC", + SDTHexagonADJDYNALLOC>; +def Hexagon_ARGEXTEND : SDNode<"HexagonISD::ARGEXTEND", SDTHexagonARGEXTEND>; -// i16 -> i64 -def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)), - (i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>, - Requires<[NoV4T]>; +// Needed to tag these instructions for stack layout. +let isCodeGenOnly = 1, usesCustomInserter = 1 in +def ADJDYNALLOC : T_Addri<s6Imm>; -let AddedComplexity = 20 in -def: Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1), - s11_1ExtPred:$offset))), - (i64 (COMBINE_rr (TFRI 0), (LDriuh_indexed IntRegs:$src1, - s11_1ExtPred:$offset)))>, - Requires<[NoV4T]>; +def: Pat<(Hexagon_ADJDYNALLOC I32:$Rs, s16ImmPred:$s16), + (ADJDYNALLOC I32:$Rs, imm:$s16)>; -// i32 -> i64 -def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)), - (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>, - Requires<[NoV4T]>; +let isCodeGenOnly = 1 in +def ARGEXTEND : ALU32_rr <(outs IntRegs:$dst), (ins IntRegs:$src1), + "$dst = $src1", + [(set (i32 IntRegs:$dst), + (Hexagon_ARGEXTEND (i32 IntRegs:$src1)))]>; let AddedComplexity = 100 in -def: Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))), - (i64 (COMBINE_rr (TFRI 0), (LDriw_indexed IntRegs:$src1, - s11_2ExtPred:$offset)))>, - Requires<[NoV4T]>; +def: Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND (i32 IntRegs:$src1)), i16)), + (i32 IntRegs:$src1)>; -let AddedComplexity = 10 in -def: Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)), - (i32 (LDriw ADDRriS11_0:$src1))>; +def HexagonWrapperJT: SDNode<"HexagonISD::WrapperJT", SDTIntUnaryOp>; -// Map from Rs = Pd to Pd = mux(Pd, #1, #0) -def : Pat <(i32 (zext (i1 PredRegs:$src1))), - (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0))>; +def : Pat<(HexagonWrapperJT tjumptable:$dst), + (i32 (CONST32_set_jt tjumptable:$dst))>; -// Map from Rs = Pd to Pd = mux(Pd, #1, #0) -def : Pat <(i32 (anyext (i1 PredRegs:$src1))), - (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0))>; +// XTYPE/SHIFT +// +//===----------------------------------------------------------------------===// +// Template Class +// Shift by immediate/register and accumulate/logical +//===----------------------------------------------------------------------===// -// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0)) -def : Pat <(i64 (anyext (i1 PredRegs:$src1))), - (i64 (SXTW (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0))))>; +// Rx[+-&|]=asr(Rs,#u5) +// Rx[+-&|^]=lsr(Rs,#u5) +// Rx[+-&|^]=asl(Rs,#u5) + +let hasNewValue = 1, opNewValue = 0 in +class T_shift_imm_acc_r <string opc1, string opc2, SDNode OpNode1, + SDNode OpNode2, bits<3> majOp, bits<2> minOp> + : SInst_acc<(outs IntRegs:$Rx), + (ins IntRegs:$src1, IntRegs:$Rs, u5Imm:$u5), + "$Rx "#opc2#opc1#"($Rs, #$u5)", + [(set (i32 IntRegs:$Rx), + (OpNode2 (i32 IntRegs:$src1), + (OpNode1 (i32 IntRegs:$Rs), u5ImmPred:$u5)))], + "$src1 = $Rx", S_2op_tc_2_SLOT23> { + bits<5> Rx; + bits<5> Rs; + bits<5> u5; + + let IClass = 0b1000; + + let Inst{27-24} = 0b1110; + let Inst{23-22} = majOp{2-1}; + let Inst{13} = 0b0; + let Inst{7} = majOp{0}; + let Inst{6-5} = minOp; + let Inst{4-0} = Rx; + let Inst{20-16} = Rs; + let Inst{12-8} = u5; + } +// Rx[+-&|]=asr(Rs,Rt) +// Rx[+-&|^]=lsr(Rs,Rt) +// Rx[+-&|^]=asl(Rs,Rt) + +let hasNewValue = 1, opNewValue = 0 in +class T_shift_reg_acc_r <string opc1, string opc2, SDNode OpNode1, + SDNode OpNode2, bits<2> majOp, bits<2> minOp> + : SInst_acc<(outs IntRegs:$Rx), + (ins IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt), + "$Rx "#opc2#opc1#"($Rs, $Rt)", + [(set (i32 IntRegs:$Rx), + (OpNode2 (i32 IntRegs:$src1), + (OpNode1 (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))], + "$src1 = $Rx", S_3op_tc_2_SLOT23 > { + bits<5> Rx; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1100; -let AddedComplexity = 100 in -def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh), - (i32 32))), - (i64 (zextloadi32 (i32 (add IntRegs:$src2, - s11_2ExtPred:$offset2)))))), - (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg), - (LDriw_indexed IntRegs:$src2, - s11_2ExtPred:$offset2)))>; + let Inst{27-24} = 0b1100; + let Inst{23-22} = majOp; + let Inst{7-6} = minOp; + let Inst{4-0} = Rx; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + } -def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh), - (i32 32))), - (i64 (zextloadi32 ADDRriS11_2:$srcLow)))), - (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg), - (LDriw ADDRriS11_2:$srcLow)))>; +// Rxx[+-&|]=asr(Rss,#u6) +// Rxx[+-&|^]=lsr(Rss,#u6) +// Rxx[+-&|^]=asl(Rss,#u6) + +class T_shift_imm_acc_p <string opc1, string opc2, SDNode OpNode1, + SDNode OpNode2, bits<3> majOp, bits<2> minOp> + : SInst_acc<(outs DoubleRegs:$Rxx), + (ins DoubleRegs:$src1, DoubleRegs:$Rss, u6Imm:$u6), + "$Rxx "#opc2#opc1#"($Rss, #$u6)", + [(set (i64 DoubleRegs:$Rxx), + (OpNode2 (i64 DoubleRegs:$src1), + (OpNode1 (i64 DoubleRegs:$Rss), u6ImmPred:$u6)))], + "$src1 = $Rxx", S_2op_tc_2_SLOT23> { + bits<5> Rxx; + bits<5> Rss; + bits<6> u6; + + let IClass = 0b1000; + + let Inst{27-24} = 0b0010; + let Inst{23-22} = majOp{2-1}; + let Inst{7} = majOp{0}; + let Inst{6-5} = minOp; + let Inst{4-0} = Rxx; + let Inst{20-16} = Rss; + let Inst{13-8} = u6; + } -def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh), - (i32 32))), - (i64 (zext (i32 IntRegs:$srcLow))))), - (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg), - IntRegs:$srcLow))>; -let AddedComplexity = 100 in -def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh), - (i32 32))), - (i64 (zextloadi32 (i32 (add IntRegs:$src2, - s11_2ExtPred:$offset2)))))), - (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg), - (LDriw_indexed IntRegs:$src2, - s11_2ExtPred:$offset2)))>; +// Rxx[+-&|]=asr(Rss,Rt) +// Rxx[+-&|^]=lsr(Rss,Rt) +// Rxx[+-&|^]=asl(Rss,Rt) +// Rxx[+-&|^]=lsl(Rss,Rt) + +class T_shift_reg_acc_p <string opc1, string opc2, SDNode OpNode1, + SDNode OpNode2, bits<3> majOp, bits<2> minOp> + : SInst_acc<(outs DoubleRegs:$Rxx), + (ins DoubleRegs:$src1, DoubleRegs:$Rss, IntRegs:$Rt), + "$Rxx "#opc2#opc1#"($Rss, $Rt)", + [(set (i64 DoubleRegs:$Rxx), + (OpNode2 (i64 DoubleRegs:$src1), + (OpNode1 (i64 DoubleRegs:$Rss), (i32 IntRegs:$Rt))))], + "$src1 = $Rxx", S_3op_tc_2_SLOT23> { + bits<5> Rxx; + bits<5> Rss; + bits<5> Rt; + + let IClass = 0b1100; + + let Inst{27-24} = 0b1011; + let Inst{23-21} = majOp; + let Inst{20-16} = Rss; + let Inst{12-8} = Rt; + let Inst{7-6} = minOp; + let Inst{4-0} = Rxx; + } -def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh), - (i32 32))), - (i64 (zextloadi32 ADDRriS11_2:$srcLow)))), - (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg), - (LDriw ADDRriS11_2:$srcLow)))>; +//===----------------------------------------------------------------------===// +// Multi-class for the shift instructions with logical/arithmetic operators. +//===----------------------------------------------------------------------===// -def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh), - (i32 32))), - (i64 (zext (i32 IntRegs:$srcLow))))), - (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg), - IntRegs:$srcLow))>; - -// Any extended 64-bit load. -// anyext i32 -> i64 -def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)), - (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>, - Requires<[NoV4T]>; - -// When there is an offset we should prefer the pattern below over the pattern above. -// The complexity of the above is 13 (gleaned from HexagonGenDAGIsel.inc) -// So this complexity below is comfortably higher to allow for choosing the below. -// If this is not done then we generate addresses such as -// ******************************************** -// r1 = add (r0, #4) -// r1 = memw(r1 + #0) -// instead of -// r1 = memw(r0 + #4) -// ******************************************** +multiclass xtype_imm_base<string OpcStr1, string OpcStr2, SDNode OpNode1, + SDNode OpNode2, bits<3> majOp, bits<2> minOp > { + def _i_r#NAME : T_shift_imm_acc_r< OpcStr1, OpcStr2, OpNode1, + OpNode2, majOp, minOp >; + def _i_p#NAME : T_shift_imm_acc_p< OpcStr1, OpcStr2, OpNode1, + OpNode2, majOp, minOp >; +} + +multiclass xtype_imm_acc<string opc1, SDNode OpNode, bits<2>minOp> { + let AddedComplexity = 100 in + defm _acc : xtype_imm_base< opc1, "+= ", OpNode, add, 0b001, minOp>; + + defm _nac : xtype_imm_base< opc1, "-= ", OpNode, sub, 0b000, minOp>; + defm _and : xtype_imm_base< opc1, "&= ", OpNode, and, 0b010, minOp>; + defm _or : xtype_imm_base< opc1, "|= ", OpNode, or, 0b011, minOp>; +} + +multiclass xtype_xor_imm_acc<string opc1, SDNode OpNode, bits<2>minOp> { let AddedComplexity = 100 in -def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))), - (i64 (COMBINE_rr (TFRI 0), (LDriw_indexed IntRegs:$src1, - s11_2ExtPred:$offset)))>, - Requires<[NoV4T]>; + defm _xacc : xtype_imm_base< opc1, "^= ", OpNode, xor, 0b100, minOp>; +} -// anyext i16 -> i64. -def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)), - (i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>, - Requires<[NoV4T]>; +defm S2_asr : xtype_imm_acc<"asr", sra, 0b00>; -let AddedComplexity = 20 in -def: Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1), - s11_1ExtPred:$offset))), - (i64 (COMBINE_rr (TFRI 0), (LDrih_indexed IntRegs:$src1, - s11_1ExtPred:$offset)))>, - Requires<[NoV4T]>; +defm S2_lsr : xtype_imm_acc<"lsr", srl, 0b01>, + xtype_xor_imm_acc<"lsr", srl, 0b01>; -// Map from Rdd = zxtw(Rs) -> Rdd = combine(0, Rs). -def : Pat<(i64 (zext (i32 IntRegs:$src1))), - (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>, - Requires<[NoV4T]>; +defm S2_asl : xtype_imm_acc<"asl", shl, 0b10>, + xtype_xor_imm_acc<"asl", shl, 0b10>; -// Multiply 64-bit unsigned and use upper result. -def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)), - (i64 - (MPYU64_acc - (i64 - (COMBINE_rr - (TFRI 0), - (i32 - (EXTRACT_SUBREG - (i64 - (LSRd_ri - (i64 - (MPYU64_acc - (i64 - (MPYU64_acc - (i64 - (COMBINE_rr (TFRI 0), - (i32 - (EXTRACT_SUBREG - (i64 - (LSRd_ri - (i64 - (MPYU64 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), - subreg_loreg)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), - subreg_loreg)))), 32)), - subreg_loreg)))), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_loreg)))), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg)))), - 32)), subreg_loreg)))), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg))))>; - -// Multiply 64-bit signed and use upper result. -def : Pat <(mulhs (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)), - (i64 - (MPY64_acc - (i64 - (COMBINE_rr (TFRI 0), - (i32 - (EXTRACT_SUBREG - (i64 - (LSRd_ri - (i64 - (MPY64_acc - (i64 - (MPY64_acc - (i64 - (COMBINE_rr (TFRI 0), - (i32 - (EXTRACT_SUBREG - (i64 - (LSRd_ri - (i64 - (MPYU64 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), - subreg_loreg)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), - subreg_loreg)))), 32)), - subreg_loreg)))), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_loreg)))), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg)))), - 32)), subreg_loreg)))), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg))))>; +multiclass xtype_reg_acc_r<string opc1, SDNode OpNode, bits<2>minOp> { + let AddedComplexity = 100 in + def _acc : T_shift_reg_acc_r <opc1, "+= ", OpNode, add, 0b11, minOp>; -// Hexagon specific ISD nodes. -//def SDTHexagonADJDYNALLOC : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>]>; -def SDTHexagonADJDYNALLOC : SDTypeProfile<1, 2, - [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; -def Hexagon_ADJDYNALLOC : SDNode<"HexagonISD::ADJDYNALLOC", - SDTHexagonADJDYNALLOC>; -// Needed to tag these instructions for stack layout. -let usesCustomInserter = 1 in -def ADJDYNALLOC : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1, - s16Imm:$src2), - "$dst = add($src1, #$src2)", - [(set (i32 IntRegs:$dst), - (Hexagon_ADJDYNALLOC (i32 IntRegs:$src1), - s16ImmPred:$src2))]>; + def _nac : T_shift_reg_acc_r <opc1, "-= ", OpNode, sub, 0b10, minOp>; + def _and : T_shift_reg_acc_r <opc1, "&= ", OpNode, and, 0b01, minOp>; + def _or : T_shift_reg_acc_r <opc1, "|= ", OpNode, or, 0b00, minOp>; +} -def SDTHexagonARGEXTEND : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>; -def Hexagon_ARGEXTEND : SDNode<"HexagonISD::ARGEXTEND", SDTHexagonARGEXTEND>; -def ARGEXTEND : ALU32_rr <(outs IntRegs:$dst), (ins IntRegs:$src1), - "$dst = $src1", - [(set (i32 IntRegs:$dst), - (Hexagon_ARGEXTEND (i32 IntRegs:$src1)))]>; +multiclass xtype_reg_acc_p<string opc1, SDNode OpNode, bits<2>minOp> { + let AddedComplexity = 100 in + def _acc : T_shift_reg_acc_p <opc1, "+= ", OpNode, add, 0b110, minOp>; -let AddedComplexity = 100 in -def : Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND (i32 IntRegs:$src1)), i16)), - (COPY (i32 IntRegs:$src1))>; + def _nac : T_shift_reg_acc_p <opc1, "-= ", OpNode, sub, 0b100, minOp>; + def _and : T_shift_reg_acc_p <opc1, "&= ", OpNode, and, 0b010, minOp>; + def _or : T_shift_reg_acc_p <opc1, "|= ", OpNode, or, 0b000, minOp>; + def _xor : T_shift_reg_acc_p <opc1, "^= ", OpNode, xor, 0b011, minOp>; +} -def HexagonWrapperJT: SDNode<"HexagonISD::WrapperJT", SDTIntUnaryOp>; +multiclass xtype_reg_acc<string OpcStr, SDNode OpNode, bits<2> minOp > { + defm _r_r : xtype_reg_acc_r <OpcStr, OpNode, minOp>; + defm _r_p : xtype_reg_acc_p <OpcStr, OpNode, minOp>; +} -def : Pat<(HexagonWrapperJT tjumptable:$dst), - (i32 (CONST32_set_jt tjumptable:$dst))>; +defm S2_asl : xtype_reg_acc<"asl", shl, 0b10>; +defm S2_asr : xtype_reg_acc<"asr", sra, 0b00>; +defm S2_lsr : xtype_reg_acc<"lsr", srl, 0b01>; +defm S2_lsl : xtype_reg_acc<"lsl", shl, 0b11>; -// XTYPE/SHIFT +//===----------------------------------------------------------------------===// +let hasSideEffects = 0 in +class T_S3op_1 <string mnemonic, RegisterClass RC, bits<2> MajOp, bits<3> MinOp, + bit SwapOps, bit isSat = 0, bit isRnd = 0, bit hasShift = 0> + : SInst <(outs RC:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2), + "$dst = "#mnemonic#"($src1, $src2)"#!if(isRnd, ":rnd", "") + #!if(hasShift,":>>1","") + #!if(isSat, ":sat", ""), + [], "", S_3op_tc_2_SLOT23 > { + bits<5> dst; + bits<5> src1; + bits<5> src2; -// Multi-class for logical operators : -// Shift by immediate/register and accumulate/logical -multiclass xtype_imm<string OpcStr, SDNode OpNode1, SDNode OpNode2> { - def _ri : SInst_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, u5Imm:$src3), - !strconcat("$dst ", !strconcat(OpcStr, "($src2, #$src3)")), - [(set (i32 IntRegs:$dst), - (OpNode2 (i32 IntRegs:$src1), - (OpNode1 (i32 IntRegs:$src2), - u5ImmPred:$src3)))], - "$src1 = $dst">; - - def d_ri : SInst_acc<(outs DoubleRegs:$dst), - (ins DoubleRegs:$src1, DoubleRegs:$src2, u6Imm:$src3), - !strconcat("$dst ", !strconcat(OpcStr, "($src2, #$src3)")), - [(set (i64 DoubleRegs:$dst), (OpNode2 (i64 DoubleRegs:$src1), - (OpNode1 (i64 DoubleRegs:$src2), u6ImmPred:$src3)))], - "$src1 = $dst">; -} - -// Multi-class for logical operators : -// Shift by register and accumulate/logical (32/64 bits) -multiclass xtype_reg<string OpcStr, SDNode OpNode1, SDNode OpNode2> { - def _rr : SInst_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), - !strconcat("$dst ", !strconcat(OpcStr, "($src2, $src3)")), - [(set (i32 IntRegs:$dst), - (OpNode2 (i32 IntRegs:$src1), - (OpNode1 (i32 IntRegs:$src2), - (i32 IntRegs:$src3))))], - "$src1 = $dst">; + let IClass = 0b1100; - def d_rr : SInst_acc<(outs DoubleRegs:$dst), - (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), - !strconcat("$dst ", !strconcat(OpcStr, "($src2, $src3)")), - [(set (i64 DoubleRegs:$dst), - (OpNode2 (i64 DoubleRegs:$src1), - (OpNode1 (i64 DoubleRegs:$src2), - (i32 IntRegs:$src3))))], - "$src1 = $dst">; + let Inst{27-24} = 0b0001; + let Inst{23-22} = MajOp; + let Inst{20-16} = !if (SwapOps, src2, src1); + let Inst{12-8} = !if (SwapOps, src1, src2); + let Inst{7-5} = MinOp; + let Inst{4-0} = dst; + } -} +class T_S3op_64 <string mnemonic, bits<2> MajOp, bits<3> MinOp, bit SwapOps, + bit isSat = 0, bit isRnd = 0, bit hasShift = 0 > + : T_S3op_1 <mnemonic, DoubleRegs, MajOp, MinOp, SwapOps, + isSat, isRnd, hasShift>; -multiclass basic_xtype_imm<string OpcStr, SDNode OpNode> { -let AddedComplexity = 100 in - defm _ADD : xtype_imm< !strconcat("+= ", OpcStr), OpNode, add>; - defm _SUB : xtype_imm< !strconcat("-= ", OpcStr), OpNode, sub>; - defm _AND : xtype_imm< !strconcat("&= ", OpcStr), OpNode, and>; - defm _OR : xtype_imm< !strconcat("|= ", OpcStr), OpNode, or>; +let Itinerary = S_3op_tc_1_SLOT23 in { + def S2_shuffeb : T_S3op_64 < "shuffeb", 0b00, 0b010, 0>; + def S2_shuffeh : T_S3op_64 < "shuffeh", 0b00, 0b110, 0>; + def S2_shuffob : T_S3op_64 < "shuffob", 0b00, 0b100, 1>; + def S2_shuffoh : T_S3op_64 < "shuffoh", 0b10, 0b000, 1>; + + def S2_vtrunewh : T_S3op_64 < "vtrunewh", 0b10, 0b010, 0>; + def S2_vtrunowh : T_S3op_64 < "vtrunowh", 0b10, 0b100, 0>; } -multiclass basic_xtype_reg<string OpcStr, SDNode OpNode> { -let AddedComplexity = 100 in - defm _ADD : xtype_reg< !strconcat("+= ", OpcStr), OpNode, add>; - defm _SUB : xtype_reg< !strconcat("-= ", OpcStr), OpNode, sub>; - defm _AND : xtype_reg< !strconcat("&= ", OpcStr), OpNode, and>; - defm _OR : xtype_reg< !strconcat("|= ", OpcStr), OpNode, or>; +def S2_lfsp : T_S3op_64 < "lfs", 0b10, 0b110, 0>; + +let hasSideEffects = 0 in +class T_S3op_2 <string mnemonic, bits<3> MajOp, bit SwapOps> + : SInst < (outs DoubleRegs:$Rdd), + (ins DoubleRegs:$Rss, DoubleRegs:$Rtt, PredRegs:$Pu), + "$Rdd = "#mnemonic#"($Rss, $Rtt, $Pu)", + [], "", S_3op_tc_1_SLOT23 > { + bits<5> Rdd; + bits<5> Rss; + bits<5> Rtt; + bits<2> Pu; + + let IClass = 0b1100; + + let Inst{27-24} = 0b0010; + let Inst{23-21} = MajOp; + let Inst{20-16} = !if (SwapOps, Rtt, Rss); + let Inst{12-8} = !if (SwapOps, Rss, Rtt); + let Inst{6-5} = Pu; + let Inst{4-0} = Rdd; + } + +def S2_valignrb : T_S3op_2 < "valignb", 0b000, 1>; +def S2_vsplicerb : T_S3op_2 < "vspliceb", 0b100, 0>; + +//===----------------------------------------------------------------------===// +// Template class used by vector shift, vector rotate, vector neg, +// 32-bit shift, 64-bit shifts, etc. +//===----------------------------------------------------------------------===// + +let hasSideEffects = 0 in +class T_S3op_3 <string mnemonic, RegisterClass RC, bits<2> MajOp, + bits<2> MinOp, bit isSat = 0, list<dag> pattern = [] > + : SInst <(outs RC:$dst), + (ins RC:$src1, IntRegs:$src2), + "$dst = "#mnemonic#"($src1, $src2)"#!if(isSat, ":sat", ""), + pattern, "", S_3op_tc_1_SLOT23> { + bits<5> dst; + bits<5> src1; + bits<5> src2; + + let IClass = 0b1100; + + let Inst{27-24} = !if(!eq(!cast<string>(RC), "IntRegs"), 0b0110, 0b0011); + let Inst{23-22} = MajOp; + let Inst{20-16} = src1; + let Inst{12-8} = src2; + let Inst{7-6} = MinOp; + let Inst{4-0} = dst; + } + +let hasNewValue = 1 in +class T_S3op_shift32 <string mnemonic, SDNode OpNode, bits<2> MinOp> + : T_S3op_3 <mnemonic, IntRegs, 0b01, MinOp, 0, + [(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))]>; + +let hasNewValue = 1, Itinerary = S_3op_tc_2_SLOT23 in +class T_S3op_shift32_Sat <string mnemonic, bits<2> MinOp> + : T_S3op_3 <mnemonic, IntRegs, 0b00, MinOp, 1, []>; + + +class T_S3op_shift64 <string mnemonic, SDNode OpNode, bits<2> MinOp> + : T_S3op_3 <mnemonic, DoubleRegs, 0b10, MinOp, 0, + [(set (i64 DoubleRegs:$dst), (OpNode (i64 DoubleRegs:$src1), + (i32 IntRegs:$src2)))]>; + + +class T_S3op_shiftVect <string mnemonic, bits<2> MajOp, bits<2> MinOp> + : T_S3op_3 <mnemonic, DoubleRegs, MajOp, MinOp, 0, []>; + + +// Shift by register +// Rdd=[asr|lsr|asl|lsl](Rss,Rt) + +def S2_asr_r_p : T_S3op_shift64 < "asr", sra, 0b00>; +def S2_lsr_r_p : T_S3op_shift64 < "lsr", srl, 0b01>; +def S2_asl_r_p : T_S3op_shift64 < "asl", shl, 0b10>; +def S2_lsl_r_p : T_S3op_shift64 < "lsl", shl, 0b11>; + +// Rd=[asr|lsr|asl|lsl](Rs,Rt) + +def S2_asr_r_r : T_S3op_shift32<"asr", sra, 0b00>; +def S2_lsr_r_r : T_S3op_shift32<"lsr", srl, 0b01>; +def S2_asl_r_r : T_S3op_shift32<"asl", shl, 0b10>; +def S2_lsl_r_r : T_S3op_shift32<"lsl", shl, 0b11>; + +// Shift by register with saturation +// Rd=asr(Rs,Rt):sat +// Rd=asl(Rs,Rt):sat + +let Defs = [USR_OVF] in { + def S2_asr_r_r_sat : T_S3op_shift32_Sat<"asr", 0b00>; + def S2_asl_r_r_sat : T_S3op_shift32_Sat<"asl", 0b10>; } -multiclass xtype_xor_imm<string OpcStr, SDNode OpNode> { -let AddedComplexity = 100 in - defm _XOR : xtype_imm< !strconcat("^= ", OpcStr), OpNode, xor>; +let hasNewValue = 1, hasSideEffects = 0 in +class T_S3op_8 <string opc, bits<3> MinOp, bit isSat, bit isRnd, bit hasShift, bit hasSplat = 0> + : SInst < (outs IntRegs:$Rd), + (ins DoubleRegs:$Rss, IntRegs:$Rt), + "$Rd = "#opc#"($Rss, $Rt"#!if(hasSplat, "*", "")#")" + #!if(hasShift, ":<<1", "") + #!if(isRnd, ":rnd", "") + #!if(isSat, ":sat", ""), + [], "", S_3op_tc_1_SLOT23 > { + bits<5> Rd; + bits<5> Rss; + bits<5> Rt; + + let IClass = 0b1100; + + let Inst{27-24} = 0b0101; + let Inst{20-16} = Rss; + let Inst{12-8} = Rt; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rd; + } + +def S2_asr_r_svw_trun : T_S3op_8<"vasrw", 0b010, 0, 0, 0>; + +let Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23 in +def S2_vcrotate : T_S3op_shiftVect < "vcrotate", 0b11, 0b00>; + +let hasSideEffects = 0 in +class T_S3op_7 <string mnemonic, bit MajOp > + : SInst <(outs DoubleRegs:$Rdd), + (ins DoubleRegs:$Rss, DoubleRegs:$Rtt, u3Imm:$u3), + "$Rdd = "#mnemonic#"($Rss, $Rtt, #$u3)" , + [], "", S_3op_tc_1_SLOT23 > { + bits<5> Rdd; + bits<5> Rss; + bits<5> Rtt; + bits<3> u3; + + let IClass = 0b1100; + + let Inst{27-24} = 0b0000; + let Inst{23} = MajOp; + let Inst{20-16} = !if(MajOp, Rss, Rtt); + let Inst{12-8} = !if(MajOp, Rtt, Rss); + let Inst{7-5} = u3; + let Inst{4-0} = Rdd; + } + +def S2_valignib : T_S3op_7 < "valignb", 0>; +def S2_vspliceib : T_S3op_7 < "vspliceb", 1>; + +//===----------------------------------------------------------------------===// +// Template class for 'insert bitfield' instructions +//===----------------------------------------------------------------------===// +let hasSideEffects = 0 in +class T_S3op_insert <string mnemonic, RegisterClass RC> + : SInst <(outs RC:$dst), + (ins RC:$src1, RC:$src2, DoubleRegs:$src3), + "$dst = "#mnemonic#"($src2, $src3)" , + [], "$src1 = $dst", S_3op_tc_1_SLOT23 > { + bits<5> dst; + bits<5> src2; + bits<5> src3; + + let IClass = 0b1100; + + let Inst{27-26} = 0b10; + let Inst{25-24} = !if(!eq(!cast<string>(RC), "IntRegs"), 0b00, 0b10); + let Inst{23} = 0b0; + let Inst{20-16} = src2; + let Inst{12-8} = src3; + let Inst{4-0} = dst; + } + +let hasSideEffects = 0 in +class T_S2op_insert <bits<4> RegTyBits, RegisterClass RC, Operand ImmOp> + : SInst <(outs RC:$dst), (ins RC:$dst2, RC:$src1, ImmOp:$src2, ImmOp:$src3), + "$dst = insert($src1, #$src2, #$src3)", + [], "$dst2 = $dst", S_2op_tc_2_SLOT23> { + bits<5> dst; + bits<5> src1; + bits<6> src2; + bits<6> src3; + bit bit23; + bit bit13; + string ImmOpStr = !cast<string>(ImmOp); + + let bit23 = !if (!eq(ImmOpStr, "u6Imm"), src3{5}, 0); + let bit13 = !if (!eq(ImmOpStr, "u6Imm"), src2{5}, 0); + + let IClass = 0b1000; + + let Inst{27-24} = RegTyBits; + let Inst{23} = bit23; + let Inst{22-21} = src3{4-3}; + let Inst{20-16} = src1; + let Inst{13} = bit13; + let Inst{12-8} = src2{4-0}; + let Inst{7-5} = src3{2-0}; + let Inst{4-0} = dst; + } + +// Rx=insert(Rs,Rtt) +// Rx=insert(Rs,#u5,#U5) +let hasNewValue = 1 in { + def S2_insert_rp : T_S3op_insert <"insert", IntRegs>; + def S2_insert : T_S2op_insert <0b1111, IntRegs, u5Imm>; } -defm ASL : basic_xtype_imm<"asl", shl>, basic_xtype_reg<"asl", shl>, - xtype_xor_imm<"asl", shl>; +// Rxx=insert(Rss,Rtt) +// Rxx=insert(Rss,#u6,#U6) +def S2_insertp_rp : T_S3op_insert<"insert", DoubleRegs>; +def S2_insertp : T_S2op_insert <0b0011, DoubleRegs, u6Imm>; -defm LSR : basic_xtype_imm<"lsr", srl>, basic_xtype_reg<"lsr", srl>, - xtype_xor_imm<"lsr", srl>; +//===----------------------------------------------------------------------===// +// Template class for 'extract bitfield' instructions +//===----------------------------------------------------------------------===// +let hasNewValue = 1, hasSideEffects = 0 in +class T_S3op_extract <string mnemonic, bits<2> MinOp> + : SInst <(outs IntRegs:$Rd), (ins IntRegs:$Rs, DoubleRegs:$Rtt), + "$Rd = "#mnemonic#"($Rs, $Rtt)", + [], "", S_3op_tc_2_SLOT23 > { + bits<5> Rd; + bits<5> Rs; + bits<5> Rtt; + + let IClass = 0b1100; + + let Inst{27-22} = 0b100100; + let Inst{20-16} = Rs; + let Inst{12-8} = Rtt; + let Inst{7-6} = MinOp; + let Inst{4-0} = Rd; + } + +let hasSideEffects = 0 in +class T_S2op_extract <string mnemonic, bits<4> RegTyBits, + RegisterClass RC, Operand ImmOp> + : SInst <(outs RC:$dst), (ins RC:$src1, ImmOp:$src2, ImmOp:$src3), + "$dst = "#mnemonic#"($src1, #$src2, #$src3)", + [], "", S_2op_tc_2_SLOT23> { + bits<5> dst; + bits<5> src1; + bits<6> src2; + bits<6> src3; + bit bit23; + bit bit13; + string ImmOpStr = !cast<string>(ImmOp); + + let bit23 = !if (!eq(ImmOpStr, "u6Imm"), src3{5}, + !if (!eq(mnemonic, "extractu"), 0, 1)); + + let bit13 = !if (!eq(ImmOpStr, "u6Imm"), src2{5}, 0); + + let IClass = 0b1000; + + let Inst{27-24} = RegTyBits; + let Inst{23} = bit23; + let Inst{22-21} = src3{4-3}; + let Inst{20-16} = src1; + let Inst{13} = bit13; + let Inst{12-8} = src2{4-0}; + let Inst{7-5} = src3{2-0}; + let Inst{4-0} = dst; + } -defm ASR : basic_xtype_imm<"asr", sra>, basic_xtype_reg<"asr", sra>; -defm LSL : basic_xtype_reg<"lsl", shl>; +// Extract bitfield + +// Rdd=extractu(Rss,Rtt) +// Rdd=extractu(Rss,#u6,#U6) +def S2_extractup_rp : T_S3op_64 < "extractu", 0b00, 0b000, 0>; +def S2_extractup : T_S2op_extract <"extractu", 0b0001, DoubleRegs, u6Imm>; + +// Rd=extractu(Rs,Rtt) +// Rd=extractu(Rs,#u5,#U5) +let hasNewValue = 1 in { + def S2_extractu_rp : T_S3op_extract<"extractu", 0b00>; + def S2_extractu : T_S2op_extract <"extractu", 0b1101, IntRegs, u5Imm>; +} // Change the sign of the immediate for Rd=-mpyi(Rs,#u8) -def : Pat <(mul (i32 IntRegs:$src1), (ineg n8ImmPred:$src2)), - (i32 (MPYI_rin (i32 IntRegs:$src1), u8ImmPred:$src2))>; +def: Pat<(mul (i32 IntRegs:$src1), (ineg n8ImmPred:$src2)), + (M2_mpysin IntRegs:$src1, u8ImmPred:$src2)>; + +//===----------------------------------------------------------------------===// +// :raw for of tableindx[bdhw] insns +//===----------------------------------------------------------------------===// + +let hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +class tableidxRaw<string OpStr, bits<2>MinOp> + : SInst <(outs IntRegs:$Rx), + (ins IntRegs:$_dst_, IntRegs:$Rs, u4Imm:$u4, s6Imm:$S6), + "$Rx = "#OpStr#"($Rs, #$u4, #$S6):raw", + [], "$Rx = $_dst_" > { + bits<5> Rx; + bits<5> Rs; + bits<4> u4; + bits<6> S6; + + let IClass = 0b1000; + + let Inst{27-24} = 0b0111; + let Inst{23-22} = MinOp; + let Inst{21} = u4{3}; + let Inst{20-16} = Rs; + let Inst{13-8} = S6; + let Inst{7-5} = u4{2-0}; + let Inst{4-0} = Rx; + } + +def S2_tableidxb : tableidxRaw<"tableidxb", 0b00>; +def S2_tableidxh : tableidxRaw<"tableidxh", 0b01>; +def S2_tableidxw : tableidxRaw<"tableidxw", 0b10>; +def S2_tableidxd : tableidxRaw<"tableidxd", 0b11>; //===----------------------------------------------------------------------===// // V3 Instructions + @@ -2930,3 +5756,9 @@ include "HexagonInstrInfoV5.td" //===----------------------------------------------------------------------===// // V5 Instructions - //===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ALU32/64/Vector + +//===----------------------------------------------------------------------===/// + +include "HexagonInstrInfoVector.td"
\ No newline at end of file diff --git a/lib/Target/Hexagon/HexagonInstrInfoV3.td b/lib/Target/Hexagon/HexagonInstrInfoV3.td index 7e75554..84d035d 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV3.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV3.td @@ -21,13 +21,52 @@ def callv3nr : SDNode<"HexagonISD::CALLv3nr", SDT_SPCall, // J + //===----------------------------------------------------------------------===// // Call subroutine. -let isCall = 1, neverHasSideEffects = 1, - Defs = [D0, D1, D2, D3, D4, D5, D6, D7, R28, R31, - P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { - def CALLv3 : JInst<(outs), (ins calltarget:$dst), - "call $dst", []>, Requires<[HasV3T]>; +let isCall = 1, hasSideEffects = 1, Defs = VolatileV3.Regs, isPredicable = 1, + isExtended = 0, isExtendable = 1, opExtendable = 0, + isExtentSigned = 1, opExtentBits = 24, opExtentAlign = 2 in +class T_Call<string ExtStr> + : JInst<(outs), (ins calltarget:$dst), + "call " # ExtStr # "$dst", [], "", J_tc_2early_SLOT23> { + let BaseOpcode = "call"; + bits<24> dst; + + let IClass = 0b0101; + let Inst{27-25} = 0b101; + let Inst{24-16,13-1} = dst{23-2}; + let Inst{0} = 0b0; +} + +let isCall = 1, hasSideEffects = 1, Defs = VolatileV3.Regs, isPredicated = 1, + isExtended = 0, isExtendable = 1, opExtendable = 1, + isExtentSigned = 1, opExtentBits = 17, opExtentAlign = 2 in +class T_CallPred<bit IfTrue, string ExtStr> + : JInst<(outs), (ins PredRegs:$Pu, calltarget:$dst), + CondStr<"$Pu", IfTrue, 0>.S # "call " # ExtStr # "$dst", + [], "", J_tc_2early_SLOT23> { + let BaseOpcode = "call"; + let isPredicatedFalse = !if(IfTrue,0,1); + bits<2> Pu; + bits<17> dst; + + let IClass = 0b0101; + let Inst{27-24} = 0b1101; + let Inst{23-22,20-16,13,7-1} = dst{16-2}; + let Inst{21} = !if(IfTrue,0,1); + let Inst{11} = 0b0; + let Inst{9-8} = Pu; +} + +multiclass T_Calls<string ExtStr> { + def NAME : T_Call<ExtStr>; + def t : T_CallPred<1, ExtStr>; + def f : T_CallPred<0, ExtStr>; } +defm J2_call: T_Calls<"">, PredRel; + +let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1, Defs = VolatileV3.Regs in +def CALLv3nr : T_Call<"">, PredRel; + //===----------------------------------------------------------------------===// // J - //===----------------------------------------------------------------------===// @@ -37,13 +76,10 @@ let isCall = 1, neverHasSideEffects = 1, // JR + //===----------------------------------------------------------------------===// // Call subroutine from register. -let isCall = 1, neverHasSideEffects = 1, - Defs = [D0, D1, D2, D3, D4, D5, D6, D7, R28, R31, - P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { - def CALLRv3 : JRInst<(outs), (ins IntRegs:$dst), - "callr $dst", - []>, Requires<[HasV3TOnly]>; - } + +let isCodeGenOnly = 1, Defs = VolatileV3.Regs in { + def CALLRv3nr : JUMPR_MISC_CALLR<0, 1>; // Call, no return. +} //===----------------------------------------------------------------------===// // JR - @@ -53,27 +89,63 @@ let isCall = 1, neverHasSideEffects = 1, // ALU64/ALU + //===----------------------------------------------------------------------===// -let AddedComplexity = 200 in -def MAXw_dd : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, - DoubleRegs:$src2), - "$dst = max($src2, $src1)", - [(set (i64 DoubleRegs:$dst), - (i64 (select (i1 (setlt (i64 DoubleRegs:$src2), - (i64 DoubleRegs:$src1))), - (i64 DoubleRegs:$src1), - (i64 DoubleRegs:$src2))))]>, -Requires<[HasV3T]>; - -let AddedComplexity = 200 in -def MINw_dd : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, - DoubleRegs:$src2), - "$dst = min($src2, $src1)", - [(set (i64 DoubleRegs:$dst), - (i64 (select (i1 (setgt (i64 DoubleRegs:$src2), - (i64 DoubleRegs:$src1))), - (i64 DoubleRegs:$src1), - (i64 DoubleRegs:$src2))))]>, -Requires<[HasV3T]>; +let Defs = [USR_OVF], Itinerary = ALU64_tc_2_SLOT23 in +def A2_addpsat : T_ALU64_arith<"add", 0b011, 0b101, 1, 0, 1>; + +class T_ALU64_addsp_hl<string suffix, bits<3> MinOp> + : T_ALU64_rr<"add", suffix, 0b0011, 0b011, MinOp, 0, 0, "">; + +def A2_addspl : T_ALU64_addsp_hl<":raw:lo", 0b110>; +def A2_addsph : T_ALU64_addsp_hl<":raw:hi", 0b111>; + +let hasSideEffects = 0, isAsmParserOnly = 1 in +def A2_addsp : ALU64_rr<(outs DoubleRegs:$Rd), + (ins IntRegs:$Rs, DoubleRegs:$Rt), "$Rd = add($Rs, $Rt)", + [(set (i64 DoubleRegs:$Rd), (i64 (add (i64 (sext (i32 IntRegs:$Rs))), + (i64 DoubleRegs:$Rt))))], + "", ALU64_tc_1_SLOT23>; + + +let hasSideEffects = 0 in +class T_XTYPE_MIN_MAX_P<bit isMax, bit isUnsigned> + : ALU64Inst<(outs DoubleRegs:$Rd), (ins DoubleRegs:$Rt, DoubleRegs:$Rs), + "$Rd = "#!if(isMax,"max","min")#!if(isUnsigned,"u","") + #"($Rt, $Rs)", [], "", ALU64_tc_2_SLOT23> { + bits<5> Rd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1101; + + let Inst{27-23} = 0b00111; + let Inst{22-21} = !if(isMax, 0b10, 0b01); + let Inst{20-16} = !if(isMax, Rt, Rs); + let Inst{12-8} = !if(isMax, Rs, Rt); + let Inst{7} = 0b1; + let Inst{6} = !if(isMax, 0b0, 0b1); + let Inst{5} = isUnsigned; + let Inst{4-0} = Rd; +} + +def A2_minp : T_XTYPE_MIN_MAX_P<0, 0>; +def A2_minup : T_XTYPE_MIN_MAX_P<0, 1>; +def A2_maxp : T_XTYPE_MIN_MAX_P<1, 0>; +def A2_maxup : T_XTYPE_MIN_MAX_P<1, 1>; + +multiclass MinMax_pats_p<PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> { + defm: T_MinMax_pats<Op, DoubleRegs, i64, Inst, SwapInst>; +} + +let AddedComplexity = 200 in { + defm: MinMax_pats_p<setge, A2_maxp, A2_minp>; + defm: MinMax_pats_p<setgt, A2_maxp, A2_minp>; + defm: MinMax_pats_p<setle, A2_minp, A2_maxp>; + defm: MinMax_pats_p<setlt, A2_minp, A2_maxp>; + defm: MinMax_pats_p<setuge, A2_maxup, A2_minup>; + defm: MinMax_pats_p<setugt, A2_maxup, A2_minup>; + defm: MinMax_pats_p<setule, A2_minup, A2_maxup>; + defm: MinMax_pats_p<setult, A2_minup, A2_maxup>; +} //===----------------------------------------------------------------------===// // ALU64/ALU - @@ -83,25 +155,112 @@ Requires<[HasV3T]>; //def : Pat <(brcond (i1 (seteq (i32 IntRegs:$src1), 0)), bb:$offset), -// (JMP_RegEzt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>; +// (JMP_RegEzt (i32 IntRegs:$src1), bb:$offset)>; //def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), 0)), bb:$offset), -// (JMP_RegNzt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>; +// (JMP_RegNzt (i32 IntRegs:$src1), bb:$offset)>; //def : Pat <(brcond (i1 (setle (i32 IntRegs:$src1), 0)), bb:$offset), -// (JMP_RegLezt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>; +// (JMP_RegLezt (i32 IntRegs:$src1), bb:$offset)>; //def : Pat <(brcond (i1 (setge (i32 IntRegs:$src1), 0)), bb:$offset), -// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>; +// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>; //def : Pat <(brcond (i1 (setgt (i32 IntRegs:$src1), -1)), bb:$offset), -// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>; - +// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>; // Map call instruction -def : Pat<(call (i32 IntRegs:$dst)), - (CALLRv3 (i32 IntRegs:$dst))>, Requires<[HasV3T]>; -def : Pat<(call tglobaladdr:$dst), - (CALLv3 tglobaladdr:$dst)>, Requires<[HasV3T]>; -def : Pat<(call texternalsym:$dst), - (CALLv3 texternalsym:$dst)>, Requires<[HasV3T]>; +def : Pat<(callv3 (i32 IntRegs:$dst)), + (J2_callr (i32 IntRegs:$dst))>; +def : Pat<(callv3 tglobaladdr:$dst), + (J2_call tglobaladdr:$dst)>; +def : Pat<(callv3 texternalsym:$dst), + (J2_call texternalsym:$dst)>; +def : Pat<(callv3 tglobaltlsaddr:$dst), + (J2_call tglobaltlsaddr:$dst)>; + +def : Pat<(callv3nr (i32 IntRegs:$dst)), + (CALLRv3nr (i32 IntRegs:$dst))>; +def : Pat<(callv3nr tglobaladdr:$dst), + (CALLv3nr tglobaladdr:$dst)>; +def : Pat<(callv3nr texternalsym:$dst), + (CALLv3nr texternalsym:$dst)>; + +//===----------------------------------------------------------------------===// +// :raw form of vrcmpys:hi/lo insns +//===----------------------------------------------------------------------===// +// Vector reduce complex multiply by scalar. +let Defs = [USR_OVF], hasSideEffects = 0 in +class T_vrcmpRaw<string HiLo, bits<3>MajOp>: + MInst<(outs DoubleRegs:$Rdd), + (ins DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Rdd = vrcmpys($Rss, $Rtt):<<1:sat:raw:"#HiLo, []> { + bits<5> Rdd; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b1000; + let Inst{23-21} = MajOp; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + let Inst{7-5} = 0b100; + let Inst{4-0} = Rdd; +} + +def M2_vrcmpys_s1_h: T_vrcmpRaw<"hi", 0b101>; +def M2_vrcmpys_s1_l: T_vrcmpRaw<"lo", 0b111>; + +// Assembler mapped to M2_vrcmpys_s1_h or M2_vrcmpys_s1_l +let hasSideEffects = 0, isAsmParserOnly = 1 in +def M2_vrcmpys_s1 + : MInst<(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, IntRegs:$Rt), + "$Rdd=vrcmpys($Rss,$Rt):<<1:sat">; + +// Vector reduce complex multiply by scalar with accumulation. +let Defs = [USR_OVF], hasSideEffects = 0 in +class T_vrcmpys_acc<string HiLo, bits<3>MajOp>: + MInst <(outs DoubleRegs:$Rxx), + (ins DoubleRegs:$_src_, DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Rxx += vrcmpys($Rss, $Rtt):<<1:sat:raw:"#HiLo, [], + "$Rxx = $_src_"> { + bits<5> Rxx; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b1010; + let Inst{23-21} = MajOp; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + let Inst{7-5} = 0b100; + let Inst{4-0} = Rxx; + } + +def M2_vrcmpys_acc_s1_h: T_vrcmpys_acc<"hi", 0b101>; +def M2_vrcmpys_acc_s1_l: T_vrcmpys_acc<"lo", 0b111>; + +// Assembler mapped to M2_vrcmpys_acc_s1_h or M2_vrcmpys_acc_s1_l + +let isAsmParserOnly = 1 in +def M2_vrcmpys_acc_s1 + : MInst <(outs DoubleRegs:$dst), + (ins DoubleRegs:$dst2, DoubleRegs:$src1, IntRegs:$src2), + "$dst += vrcmpys($src1, $src2):<<1:sat", [], + "$dst2 = $dst">; + +def M2_vrcmpys_s1rp_h : T_MType_vrcmpy <"vrcmpys", 0b101, 0b110, 1>; +def M2_vrcmpys_s1rp_l : T_MType_vrcmpy <"vrcmpys", 0b101, 0b111, 0>; + +// Assembler mapped to M2_vrcmpys_s1rp_h or M2_vrcmpys_s1rp_l +let isAsmParserOnly = 1 in +def M2_vrcmpys_s1rp + : MInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss, IntRegs:$Rt), + "$Rd=vrcmpys($Rss,$Rt):<<1:rnd:sat">; + + +// S2_cabacdecbin: Cabac decode bin. +let Defs = [P0], isPredicateLate = 1, Itinerary = S_3op_tc_1_SLOT23 in +def S2_cabacdecbin : T_S3op_64 < "decbin", 0b11, 0b110, 0>; diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td index d39f7d7..0e4dde3 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -11,25 +11,34 @@ // //===----------------------------------------------------------------------===// -let neverHasSideEffects = 1 in -class T_Immext<dag ins> : - EXTENDERInst<(outs), ins, "immext(#$imm)", []>, - Requires<[HasV4T]>; - -def IMMEXT_b : T_Immext<(ins brtarget:$imm)>; -def IMMEXT_c : T_Immext<(ins calltarget:$imm)>; -def IMMEXT_g : T_Immext<(ins globaladdress:$imm)>; -def IMMEXT_i : T_Immext<(ins u26_6Imm:$imm)>; - -// Fold (add (CONST32 tglobaladdr:$addr) <offset>) into a global address. -def FoldGlobalAddr : ComplexPattern<i32, 1, "foldGlobalAddress", [], []>; +def addrga: PatLeaf<(i32 AddrGA:$Addr)>; +def addrgp: PatLeaf<(i32 AddrGP:$Addr)>; + +let hasSideEffects = 0 in +class T_Immext<Operand ImmType> + : EXTENDERInst<(outs), (ins ImmType:$imm), + "immext(#$imm)", []> { + bits<32> imm; + let IClass = 0b0000; + + let Inst{27-16} = imm{31-20}; + let Inst{13-0} = imm{19-6}; + } -// Fold (add (CONST32_GP tglobaladdr:$addr) <offset>) into a global address. -def FoldGlobalAddrGP : ComplexPattern<i32, 1, "foldGlobalAddressGP", [], []>; +def A4_ext : T_Immext<u26_6Imm>; +let isCodeGenOnly = 1 in { + let isBranch = 1 in + def A4_ext_b : T_Immext<brtarget>; + let isCall = 1 in + def A4_ext_c : T_Immext<calltarget>; + def A4_ext_g : T_Immext<globaladdress>; +} -def NumUsesBelowThresCONST32 : PatFrag<(ops node:$addr), - (HexagonCONST32 node:$addr), [{ - return hasNumUsesBelowThresGA(N->getOperand(0).getNode()); +def BITPOS32 : SDNodeXForm<imm, [{ + // Return the bit position we will set [0-31]. + // As an SDNode. + int32_t imm = N->getSExtValue(); + return XformMskToBitPosU5Imm(imm); }]>; // Hexagon V4 Architecture spec defines 8 instruction classes: @@ -95,63 +104,158 @@ def NumUsesBelowThresCONST32 : PatFrag<(ops node:$addr), //===----------------------------------------------------------------------===// // ALU32 + //===----------------------------------------------------------------------===// -// Generate frame index addresses. -let neverHasSideEffects = 1, isReMaterializable = 1, -isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in -def TFR_FI_immext_V4 : ALU32_ri<(outs IntRegs:$dst), - (ins IntRegs:$src1, s32Imm:$offset), - "$dst = add($src1, ##$offset)", - []>, - Requires<[HasV4T]>; - -// Rd=cmp.eq(Rs,#s8) -let validSubTargets = HasV4SubT, isExtendable = 1, opExtendable = 2, -isExtentSigned = 1, opExtentBits = 8 in -def V4_A4_rcmpeqi : ALU32_ri<(outs IntRegs:$Rd), - (ins IntRegs:$Rs, s8Ext:$s8), - "$Rd = cmp.eq($Rs, #$s8)", - [(set (i32 IntRegs:$Rd), - (i32 (zext (i1 (seteq (i32 IntRegs:$Rs), - s8ExtPred:$s8)))))]>, - Requires<[HasV4T]>; - -// Preserve the TSTBIT generation -def : Pat <(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, (i32 IntRegs:$src2))), - (i32 IntRegs:$src1))), 0)))), - (i32 (MUX_ii (i1 (TSTBIT_rr (i32 IntRegs:$src1), (i32 IntRegs:$src2))), - 1, 0))>; - -// Interfered with tstbit generation, above pattern preserves, see : tstbit.ll -// Rd=cmp.ne(Rs,#s8) -let validSubTargets = HasV4SubT, isExtendable = 1, opExtendable = 2, -isExtentSigned = 1, opExtentBits = 8 in -def V4_A4_rcmpneqi : ALU32_ri<(outs IntRegs:$Rd), - (ins IntRegs:$Rs, s8Ext:$s8), - "$Rd = !cmp.eq($Rs, #$s8)", - [(set (i32 IntRegs:$Rd), - (i32 (zext (i1 (setne (i32 IntRegs:$Rs), - s8ExtPred:$s8)))))]>, - Requires<[HasV4T]>; - -// Rd=cmp.eq(Rs,Rt) -let validSubTargets = HasV4SubT in -def V4_A4_rcmpeq : ALU32_ri<(outs IntRegs:$Rd), - (ins IntRegs:$Rs, IntRegs:$Rt), - "$Rd = cmp.eq($Rs, $Rt)", - [(set (i32 IntRegs:$Rd), - (i32 (zext (i1 (seteq (i32 IntRegs:$Rs), - IntRegs:$Rt)))))]>, - Requires<[HasV4T]>; - -// Rd=cmp.ne(Rs,Rt) -let validSubTargets = HasV4SubT in -def V4_A4_rcmpneq : ALU32_ri<(outs IntRegs:$Rd), - (ins IntRegs:$Rs, IntRegs:$Rt), - "$Rd = !cmp.eq($Rs, $Rt)", - [(set (i32 IntRegs:$Rd), - (i32 (zext (i1 (setne (i32 IntRegs:$Rs), - IntRegs:$Rt)))))]>, - Requires<[HasV4T]>; + +class T_ALU32_3op_not<string mnemonic, bits<3> MajOp, bits<3> MinOp, + bit OpsRev> + : T_ALU32_3op<mnemonic, MajOp, MinOp, OpsRev, 0> { + let AsmString = "$Rd = "#mnemonic#"($Rs, ~$Rt)"; +} + +let BaseOpcode = "andn_rr", CextOpcode = "andn" in +def A4_andn : T_ALU32_3op_not<"and", 0b001, 0b100, 1>; +let BaseOpcode = "orn_rr", CextOpcode = "orn" in +def A4_orn : T_ALU32_3op_not<"or", 0b001, 0b101, 1>; + +let CextOpcode = "rcmp.eq" in +def A4_rcmpeq : T_ALU32_3op<"cmp.eq", 0b011, 0b010, 0, 1>; +let CextOpcode = "!rcmp.eq" in +def A4_rcmpneq : T_ALU32_3op<"!cmp.eq", 0b011, 0b011, 0, 1>; + +def C4_cmpneq : T_ALU32_3op_cmp<"!cmp.eq", 0b00, 1, 1>; +def C4_cmplte : T_ALU32_3op_cmp<"!cmp.gt", 0b10, 1, 0>; +def C4_cmplteu : T_ALU32_3op_cmp<"!cmp.gtu", 0b11, 1, 0>; + +// Pats for instruction selection. + +// A class to embed the usual comparison patfrags within a zext to i32. +// The seteq/setne frags use "lhs" and "rhs" as operands, so use the same +// names, or else the frag's "body" won't match the operands. +class CmpInReg<PatFrag Op> + : PatFrag<(ops node:$lhs, node:$rhs),(i32 (zext (i1 Op.Fragment)))>; + +def: T_cmp32_rr_pat<A4_rcmpeq, CmpInReg<seteq>, i32>; +def: T_cmp32_rr_pat<A4_rcmpneq, CmpInReg<setne>, i32>; + +def: T_cmp32_rr_pat<C4_cmpneq, setne, i1>; + +class T_CMP_rrbh<string mnemonic, bits<3> MinOp, bit IsComm> + : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt), + "$Pd = "#mnemonic#"($Rs, $Rt)", [], "", S_3op_tc_2early_SLOT23>, + ImmRegRel { + let InputType = "reg"; + let CextOpcode = mnemonic; + let isCompare = 1; + let isCommutable = IsComm; + let hasSideEffects = 0; + + bits<2> Pd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1100; + let Inst{27-21} = 0b0111110; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{7-5} = MinOp; + let Inst{1-0} = Pd; +} + +def A4_cmpbeq : T_CMP_rrbh<"cmpb.eq", 0b110, 1>; +def A4_cmpbgt : T_CMP_rrbh<"cmpb.gt", 0b010, 0>; +def A4_cmpbgtu : T_CMP_rrbh<"cmpb.gtu", 0b111, 0>; +def A4_cmpheq : T_CMP_rrbh<"cmph.eq", 0b011, 1>; +def A4_cmphgt : T_CMP_rrbh<"cmph.gt", 0b100, 0>; +def A4_cmphgtu : T_CMP_rrbh<"cmph.gtu", 0b101, 0>; + +let AddedComplexity = 100 in { + def: Pat<(i1 (seteq (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), + 255), 0)), + (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt)>; + def: Pat<(i1 (setne (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), + 255), 0)), + (C2_not (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt))>; + def: Pat<(i1 (seteq (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), + 65535), 0)), + (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt)>; + def: Pat<(i1 (setne (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), + 65535), 0)), + (C2_not (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt))>; +} + +class T_CMP_ribh<string mnemonic, bits<2> MajOp, bit IsHalf, bit IsComm, + Operand ImmType, bit IsImmExt, bit IsImmSigned, int ImmBits> + : ALU64Inst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, ImmType:$Imm), + "$Pd = "#mnemonic#"($Rs, #$Imm)", [], "", ALU64_tc_2early_SLOT23>, + ImmRegRel { + let InputType = "imm"; + let CextOpcode = mnemonic; + let isCompare = 1; + let isCommutable = IsComm; + let hasSideEffects = 0; + let isExtendable = IsImmExt; + let opExtendable = !if (IsImmExt, 2, 0); + let isExtentSigned = IsImmSigned; + let opExtentBits = ImmBits; + + bits<2> Pd; + bits<5> Rs; + bits<8> Imm; + + let IClass = 0b1101; + let Inst{27-24} = 0b1101; + let Inst{22-21} = MajOp; + let Inst{20-16} = Rs; + let Inst{12-5} = Imm; + let Inst{4} = 0b0; + let Inst{3} = IsHalf; + let Inst{1-0} = Pd; +} + +def A4_cmpbeqi : T_CMP_ribh<"cmpb.eq", 0b00, 0, 1, u8Imm, 0, 0, 8>; +def A4_cmpbgti : T_CMP_ribh<"cmpb.gt", 0b01, 0, 0, s8Imm, 0, 1, 8>; +def A4_cmpbgtui : T_CMP_ribh<"cmpb.gtu", 0b10, 0, 0, u7Ext, 1, 0, 7>; +def A4_cmpheqi : T_CMP_ribh<"cmph.eq", 0b00, 1, 1, s8Ext, 1, 1, 8>; +def A4_cmphgti : T_CMP_ribh<"cmph.gt", 0b01, 1, 0, s8Ext, 1, 1, 8>; +def A4_cmphgtui : T_CMP_ribh<"cmph.gtu", 0b10, 1, 0, u7Ext, 1, 0, 7>; + +class T_RCMP_EQ_ri<string mnemonic, bit IsNeg> + : ALU32_ri<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s8Ext:$s8), + "$Rd = "#mnemonic#"($Rs, #$s8)", [], "", ALU32_2op_tc_1_SLOT0123>, + ImmRegRel { + let InputType = "imm"; + let CextOpcode = !if (IsNeg, "!rcmp.eq", "rcmp.eq"); + let isExtendable = 1; + let opExtendable = 2; + let isExtentSigned = 1; + let opExtentBits = 8; + let hasNewValue = 1; + + bits<5> Rd; + bits<5> Rs; + bits<8> s8; + + let IClass = 0b0111; + let Inst{27-24} = 0b0011; + let Inst{22} = 0b1; + let Inst{21} = IsNeg; + let Inst{20-16} = Rs; + let Inst{13} = 0b1; + let Inst{12-5} = s8; + let Inst{4-0} = Rd; +} + +def A4_rcmpeqi : T_RCMP_EQ_ri<"cmp.eq", 0>; +def A4_rcmpneqi : T_RCMP_EQ_ri<"!cmp.eq", 1>; + +def: Pat<(i32 (zext (i1 (seteq (i32 IntRegs:$Rs), s8ExtPred:$s8)))), + (A4_rcmpeqi IntRegs:$Rs, s8ExtPred:$s8)>; +def: Pat<(i32 (zext (i1 (setne (i32 IntRegs:$Rs), s8ExtPred:$s8)))), + (A4_rcmpneqi IntRegs:$Rs, s8ExtPred:$s8)>; + +// Preserve the S2_tstbit_r generation +def: Pat<(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, (i32 IntRegs:$src2))), + (i32 IntRegs:$src1))), 0)))), + (C2_muxii (S2_tstbit_r IntRegs:$src1, IntRegs:$src2), 1, 0)>; //===----------------------------------------------------------------------===// // ALU32 - @@ -162,24 +266,31 @@ def V4_A4_rcmpneq : ALU32_ri<(outs IntRegs:$Rd), // ALU32/PERM + //===----------------------------------------------------------------------===// -// Combine -// Rdd=combine(Rs, #s8) -let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8, - neverHasSideEffects = 1, validSubTargets = HasV4SubT in -def COMBINE_rI_V4 : ALU32_ri<(outs DoubleRegs:$dst), - (ins IntRegs:$src1, s8Ext:$src2), - "$dst = combine($src1, #$src2)", - []>, - Requires<[HasV4T]>; - -// Rdd=combine(#s8, Rs) -let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 8, - neverHasSideEffects = 1, validSubTargets = HasV4SubT in -def COMBINE_Ir_V4 : ALU32_ir<(outs DoubleRegs:$dst), - (ins s8Ext:$src1, IntRegs:$src2), - "$dst = combine(#$src1, $src2)", - []>, - Requires<[HasV4T]>; +// Combine a word and an immediate into a register pair. +let hasSideEffects = 0, isExtentSigned = 1, isExtendable = 1, + opExtentBits = 8 in +class T_Combine1 <bits<2> MajOp, dag ins, string AsmStr> + : ALU32Inst <(outs DoubleRegs:$Rdd), ins, AsmStr> { + bits<5> Rdd; + bits<5> Rs; + bits<8> s8; + + let IClass = 0b0111; + let Inst{27-24} = 0b0011; + let Inst{22-21} = MajOp; + let Inst{20-16} = Rs; + let Inst{13} = 0b1; + let Inst{12-5} = s8; + let Inst{4-0} = Rdd; + } + +let opExtendable = 2 in +def A4_combineri : T_Combine1<0b00, (ins IntRegs:$Rs, s8Ext:$s8), + "$Rdd = combine($Rs, #$s8)">; + +let opExtendable = 1 in +def A4_combineir : T_Combine1<0b01, (ins s8Ext:$s8, IntRegs:$Rs), + "$Rdd = combine(#$s8, $Rs)">; def HexagonWrapperCombineRI_V4 : SDNode<"HexagonISD::WrapperCombineRI_V4", SDTHexagonI64I32I32>; @@ -187,274 +298,355 @@ def HexagonWrapperCombineIR_V4 : SDNode<"HexagonISD::WrapperCombineIR_V4", SDTHexagonI64I32I32>; def : Pat <(HexagonWrapperCombineRI_V4 IntRegs:$r, s8ExtPred:$i), - (COMBINE_rI_V4 IntRegs:$r, s8ExtPred:$i)>, - Requires<[HasV4T]>; + (A4_combineri IntRegs:$r, s8ExtPred:$i)>; def : Pat <(HexagonWrapperCombineIR_V4 s8ExtPred:$i, IntRegs:$r), - (COMBINE_Ir_V4 s8ExtPred:$i, IntRegs:$r)>, - Requires<[HasV4T]>; + (A4_combineir s8ExtPred:$i, IntRegs:$r)>; -let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 6, - neverHasSideEffects = 1, validSubTargets = HasV4SubT in -def COMBINE_iI_V4 : ALU32_ii<(outs DoubleRegs:$dst), - (ins s8Imm:$src1, u6Ext:$src2), - "$dst = combine(#$src1, #$src2)", - []>, - Requires<[HasV4T]>; +// A4_combineii: Set two small immediates. +let hasSideEffects = 0, isExtendable = 1, opExtentBits = 6, opExtendable = 2 in +def A4_combineii: ALU32Inst<(outs DoubleRegs:$Rdd), (ins s8Imm:$s8, u6Ext:$U6), + "$Rdd = combine(#$s8, #$U6)"> { + bits<5> Rdd; + bits<8> s8; + bits<6> U6; + + let IClass = 0b0111; + let Inst{27-23} = 0b11001; + let Inst{20-16} = U6{5-1}; + let Inst{13} = U6{0}; + let Inst{12-5} = s8; + let Inst{4-0} = Rdd; + } + +// The complexity of the combine with two immediates should be greater than +// the complexity of a combine involving a register. +let AddedComplexity = 75 in +def: Pat<(HexagonCOMBINE s8ImmPred:$s8, u6ExtPred:$u6), + (A4_combineii imm:$s8, imm:$u6)>; //===----------------------------------------------------------------------===// -// ALU32/PERM + +// ALU32/PERM - //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // LD + //===----------------------------------------------------------------------===// + +def Zext64: OutPatFrag<(ops node:$Rs), + (i64 (A4_combineir 0, (i32 $Rs)))>; +def Sext64: OutPatFrag<(ops node:$Rs), + (i64 (A2_sxtw (i32 $Rs)))>; + +// Patterns to generate indexed loads with different forms of the address: +// - frameindex, +// - base + offset, +// - base (without offset). +multiclass Loadxm_pat<PatFrag Load, ValueType VT, PatFrag ValueMod, + PatLeaf ImmPred, InstHexagon MI> { + def: Pat<(VT (Load AddrFI:$fi)), + (VT (ValueMod (MI AddrFI:$fi, 0)))>; + def: Pat<(VT (Load (add IntRegs:$Rs, ImmPred:$Off))), + (VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>; + def: Pat<(VT (Load (i32 IntRegs:$Rs))), + (VT (ValueMod (MI IntRegs:$Rs, 0)))>; +} + +defm: Loadxm_pat<extloadi1, i64, Zext64, s11_0ExtPred, L2_loadrub_io>; +defm: Loadxm_pat<extloadi8, i64, Zext64, s11_0ExtPred, L2_loadrub_io>; +defm: Loadxm_pat<extloadi16, i64, Zext64, s11_1ExtPred, L2_loadruh_io>; +defm: Loadxm_pat<zextloadi1, i64, Zext64, s11_0ExtPred, L2_loadrub_io>; +defm: Loadxm_pat<zextloadi8, i64, Zext64, s11_0ExtPred, L2_loadrub_io>; +defm: Loadxm_pat<zextloadi16, i64, Zext64, s11_1ExtPred, L2_loadruh_io>; +defm: Loadxm_pat<sextloadi8, i64, Sext64, s11_0ExtPred, L2_loadrb_io>; +defm: Loadxm_pat<sextloadi16, i64, Sext64, s11_1ExtPred, L2_loadrh_io>; + +// Map Rdd = anyext(Rs) -> Rdd = combine(#0, Rs). +def: Pat<(i64 (anyext (i32 IntRegs:$src1))), (Zext64 IntRegs:$src1)>; + //===----------------------------------------------------------------------===// // Template class for load instructions with Absolute set addressing mode. //===----------------------------------------------------------------------===// -let isExtended = 1, opExtendable = 2, neverHasSideEffects = 1, -validSubTargets = HasV4SubT, addrMode = AbsoluteSet in -class T_LD_abs_set<string mnemonic, RegisterClass RC>: - LDInst2<(outs RC:$dst1, IntRegs:$dst2), - (ins u0AlwaysExt:$addr), - "$dst1 = "#mnemonic#"($dst2=##$addr)", - []>, - Requires<[HasV4T]>; +let isExtended = 1, opExtendable = 2, opExtentBits = 6, addrMode = AbsoluteSet, + hasSideEffects = 0 in +class T_LD_abs_set<string mnemonic, RegisterClass RC, bits<4>MajOp>: + LDInst<(outs RC:$dst1, IntRegs:$dst2), + (ins u6Ext:$addr), + "$dst1 = "#mnemonic#"($dst2 = #$addr)", + []> { + bits<7> name; + bits<5> dst1; + bits<5> dst2; + bits<6> addr; + + let IClass = 0b1001; + let Inst{27-25} = 0b101; + let Inst{24-21} = MajOp; + let Inst{13-12} = 0b01; + let Inst{4-0} = dst1; + let Inst{20-16} = dst2; + let Inst{11-8} = addr{5-2}; + let Inst{6-5} = addr{1-0}; +} + +let accessSize = ByteAccess, hasNewValue = 1 in { + def L4_loadrb_ap : T_LD_abs_set <"memb", IntRegs, 0b1000>; + def L4_loadrub_ap : T_LD_abs_set <"memub", IntRegs, 0b1001>; +} -def LDrid_abs_set_V4 : T_LD_abs_set <"memd", DoubleRegs>; -def LDrib_abs_set_V4 : T_LD_abs_set <"memb", IntRegs>; -def LDriub_abs_set_V4 : T_LD_abs_set <"memub", IntRegs>; -def LDrih_abs_set_V4 : T_LD_abs_set <"memh", IntRegs>; -def LDriw_abs_set_V4 : T_LD_abs_set <"memw", IntRegs>; -def LDriuh_abs_set_V4 : T_LD_abs_set <"memuh", IntRegs>; +let accessSize = HalfWordAccess, hasNewValue = 1 in { + def L4_loadrh_ap : T_LD_abs_set <"memh", IntRegs, 0b1010>; + def L4_loadruh_ap : T_LD_abs_set <"memuh", IntRegs, 0b1011>; + def L4_loadbsw2_ap : T_LD_abs_set <"membh", IntRegs, 0b0001>; + def L4_loadbzw2_ap : T_LD_abs_set <"memubh", IntRegs, 0b0011>; +} +let accessSize = WordAccess, hasNewValue = 1 in + def L4_loadri_ap : T_LD_abs_set <"memw", IntRegs, 0b1100>; -// multiclass for load instructions with base + register offset -// addressing mode -multiclass ld_idxd_shl_pbase<string mnemonic, RegisterClass RC, bit isNot, - bit isPredNew> { - let isPredicatedNew = isPredNew in - def NAME : LDInst2<(outs RC:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), - !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", - ") ")#"$dst = "#mnemonic#"($src2+$src3<<#$offset)", - []>, Requires<[HasV4T]>; +let accessSize = WordAccess in { + def L4_loadbzw4_ap : T_LD_abs_set <"memubh", DoubleRegs, 0b0101>; + def L4_loadbsw4_ap : T_LD_abs_set <"membh", DoubleRegs, 0b0111>; } -multiclass ld_idxd_shl_pred<string mnemonic, RegisterClass RC, bit PredNot> { - let isPredicatedFalse = PredNot in { - defm _c#NAME : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 0>; - // Predicate new - defm _cdn#NAME : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 1>; +let accessSize = DoubleWordAccess in +def L4_loadrd_ap : T_LD_abs_set <"memd", DoubleRegs, 0b1110>; + +let accessSize = ByteAccess in + def L4_loadalignb_ap : T_LD_abs_set <"memb_fifo", DoubleRegs, 0b0100>; + +let accessSize = HalfWordAccess in +def L4_loadalignh_ap : T_LD_abs_set <"memh_fifo", DoubleRegs, 0b0010>; + +// Load - Indirect with long offset +let InputType = "imm", addrMode = BaseLongOffset, isExtended = 1, +opExtentBits = 6, opExtendable = 3 in +class T_LoadAbsReg <string mnemonic, string CextOp, RegisterClass RC, + bits<4> MajOp> + : LDInst <(outs RC:$dst), (ins IntRegs:$src1, u2Imm:$src2, u6Ext:$src3), + "$dst = "#mnemonic#"($src1<<#$src2 + #$src3)", + [] >, ImmRegShl { + bits<5> dst; + bits<5> src1; + bits<2> src2; + bits<6> src3; + let CextOpcode = CextOp; + let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1); + + let IClass = 0b1001; + let Inst{27-25} = 0b110; + let Inst{24-21} = MajOp; + let Inst{20-16} = src1; + let Inst{13} = src2{1}; + let Inst{12} = 0b1; + let Inst{11-8} = src3{5-2}; + let Inst{7} = src2{0}; + let Inst{6-5} = src3{1-0}; + let Inst{4-0} = dst; } + +let accessSize = ByteAccess in { + def L4_loadrb_ur : T_LoadAbsReg<"memb", "LDrib", IntRegs, 0b1000>; + def L4_loadrub_ur : T_LoadAbsReg<"memub", "LDriub", IntRegs, 0b1001>; + def L4_loadalignb_ur : T_LoadAbsReg<"memb_fifo", "LDrib_fifo", + DoubleRegs, 0b0100>; } -let neverHasSideEffects = 1 in -multiclass ld_idxd_shl<string mnemonic, string CextOp, RegisterClass RC> { - let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in { - let isPredicable = 1 in - def NAME#_V4 : LDInst2<(outs RC:$dst), - (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset), - "$dst = "#mnemonic#"($src1+$src2<<#$offset)", - []>, Requires<[HasV4T]>; - - let isPredicated = 1 in { - defm Pt_V4 : ld_idxd_shl_pred<mnemonic, RC, 0 >; - defm NotPt_V4 : ld_idxd_shl_pred<mnemonic, RC, 1>; - } - } +let accessSize = HalfWordAccess in { + def L4_loadrh_ur : T_LoadAbsReg<"memh", "LDrih", IntRegs, 0b1010>; + def L4_loadruh_ur : T_LoadAbsReg<"memuh", "LDriuh", IntRegs, 0b1011>; + def L4_loadbsw2_ur : T_LoadAbsReg<"membh", "LDribh2", IntRegs, 0b0001>; + def L4_loadbzw2_ur : T_LoadAbsReg<"memubh", "LDriubh2", IntRegs, 0b0011>; + def L4_loadalignh_ur : T_LoadAbsReg<"memh_fifo", "LDrih_fifo", + DoubleRegs, 0b0010>; } -let addrMode = BaseRegOffset in { - let accessSize = ByteAccess in { - defm LDrib_indexed_shl: ld_idxd_shl<"memb", "LDrib", IntRegs>, - AddrModeRel; - defm LDriub_indexed_shl: ld_idxd_shl<"memub", "LDriub", IntRegs>, - AddrModeRel; - } - let accessSize = HalfWordAccess in { - defm LDrih_indexed_shl: ld_idxd_shl<"memh", "LDrih", IntRegs>, AddrModeRel; - defm LDriuh_indexed_shl: ld_idxd_shl<"memuh", "LDriuh", IntRegs>, - AddrModeRel; - } - let accessSize = WordAccess in - defm LDriw_indexed_shl: ld_idxd_shl<"memw", "LDriw", IntRegs>, AddrModeRel; +let accessSize = WordAccess in { + def L4_loadri_ur : T_LoadAbsReg<"memw", "LDriw", IntRegs, 0b1100>; + def L4_loadbsw4_ur : T_LoadAbsReg<"membh", "LDribh4", DoubleRegs, 0b0111>; + def L4_loadbzw4_ur : T_LoadAbsReg<"memubh", "LDriubh4", DoubleRegs, 0b0101>; +} + +let accessSize = DoubleWordAccess in +def L4_loadrd_ur : T_LoadAbsReg<"memd", "LDrid", DoubleRegs, 0b1110>; + - let accessSize = DoubleWordAccess in - defm LDrid_indexed_shl: ld_idxd_shl<"memd", "LDrid", DoubleRegs>, - AddrModeRel; +multiclass T_LoadAbsReg_Pat <PatFrag ldOp, InstHexagon MI, ValueType VT = i32> { + def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2ImmPred:$src2), + (HexagonCONST32 tglobaladdr:$src3)))), + (MI IntRegs:$src1, u2ImmPred:$src2, tglobaladdr:$src3)>; + + def : Pat <(VT (ldOp (add IntRegs:$src1, + (HexagonCONST32 tglobaladdr:$src2)))), + (MI IntRegs:$src1, 0, tglobaladdr:$src2)>; } -// 'def pats' for load instructions with base + register offset and non-zero -// immediate value. Immediate value is used to left-shift the second -// register operand. -let AddedComplexity = 40 in { -def : Pat <(i32 (sextloadi8 (add IntRegs:$src1, - (shl IntRegs:$src2, u2ImmPred:$offset)))), - (LDrib_indexed_shl_V4 IntRegs:$src1, - IntRegs:$src2, u2ImmPred:$offset)>, - Requires<[HasV4T]>; - -def : Pat <(i32 (zextloadi8 (add IntRegs:$src1, - (shl IntRegs:$src2, u2ImmPred:$offset)))), - (LDriub_indexed_shl_V4 IntRegs:$src1, - IntRegs:$src2, u2ImmPred:$offset)>, - Requires<[HasV4T]>; - -def : Pat <(i32 (extloadi8 (add IntRegs:$src1, - (shl IntRegs:$src2, u2ImmPred:$offset)))), - (LDriub_indexed_shl_V4 IntRegs:$src1, - IntRegs:$src2, u2ImmPred:$offset)>, - Requires<[HasV4T]>; - -def : Pat <(i32 (sextloadi16 (add IntRegs:$src1, - (shl IntRegs:$src2, u2ImmPred:$offset)))), - (LDrih_indexed_shl_V4 IntRegs:$src1, - IntRegs:$src2, u2ImmPred:$offset)>, - Requires<[HasV4T]>; - -def : Pat <(i32 (zextloadi16 (add IntRegs:$src1, - (shl IntRegs:$src2, u2ImmPred:$offset)))), - (LDriuh_indexed_shl_V4 IntRegs:$src1, - IntRegs:$src2, u2ImmPred:$offset)>, - Requires<[HasV4T]>; - -def : Pat <(i32 (extloadi16 (add IntRegs:$src1, - (shl IntRegs:$src2, u2ImmPred:$offset)))), - (LDriuh_indexed_shl_V4 IntRegs:$src1, - IntRegs:$src2, u2ImmPred:$offset)>, - Requires<[HasV4T]>; - -def : Pat <(i32 (load (add IntRegs:$src1, - (shl IntRegs:$src2, u2ImmPred:$offset)))), - (LDriw_indexed_shl_V4 IntRegs:$src1, - IntRegs:$src2, u2ImmPred:$offset)>, - Requires<[HasV4T]>; - -def : Pat <(i64 (load (add IntRegs:$src1, - (shl IntRegs:$src2, u2ImmPred:$offset)))), - (LDrid_indexed_shl_V4 IntRegs:$src1, - IntRegs:$src2, u2ImmPred:$offset)>, - Requires<[HasV4T]>; +let AddedComplexity = 60 in { +defm : T_LoadAbsReg_Pat <sextloadi8, L4_loadrb_ur>; +defm : T_LoadAbsReg_Pat <zextloadi8, L4_loadrub_ur>; +defm : T_LoadAbsReg_Pat <extloadi8, L4_loadrub_ur>; + +defm : T_LoadAbsReg_Pat <sextloadi16, L4_loadrh_ur>; +defm : T_LoadAbsReg_Pat <zextloadi16, L4_loadruh_ur>; +defm : T_LoadAbsReg_Pat <extloadi16, L4_loadruh_ur>; + +defm : T_LoadAbsReg_Pat <load, L4_loadri_ur>; +defm : T_LoadAbsReg_Pat <load, L4_loadrd_ur, i64>; } +//===----------------------------------------------------------------------===// +// Template classes for the non-predicated load instructions with +// base + register offset addressing mode +//===----------------------------------------------------------------------===// +class T_load_rr <string mnemonic, RegisterClass RC, bits<3> MajOp>: + LDInst<(outs RC:$dst), (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$u2), + "$dst = "#mnemonic#"($src1 + $src2<<#$u2)", + [], "", V4LDST_tc_ld_SLOT01>, ImmRegShl, AddrModeRel { + bits<5> dst; + bits<5> src1; + bits<5> src2; + bits<2> u2; -// 'def pats' for load instruction base + register offset and -// zero immediate value. -let AddedComplexity = 10 in { -def : Pat <(i64 (load (add IntRegs:$src1, IntRegs:$src2))), - (LDrid_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, - Requires<[HasV4T]>; + let IClass = 0b0011; -def : Pat <(i32 (sextloadi8 (add IntRegs:$src1, IntRegs:$src2))), - (LDrib_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, - Requires<[HasV4T]>; + let Inst{27-24} = 0b1010; + let Inst{23-21} = MajOp; + let Inst{20-16} = src1; + let Inst{12-8} = src2; + let Inst{13} = u2{1}; + let Inst{7} = u2{0}; + let Inst{4-0} = dst; + } -def : Pat <(i32 (zextloadi8 (add IntRegs:$src1, IntRegs:$src2))), - (LDriub_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, - Requires<[HasV4T]>; +//===----------------------------------------------------------------------===// +// Template classes for the predicated load instructions with +// base + register offset addressing mode +//===----------------------------------------------------------------------===// +let isPredicated = 1 in +class T_pload_rr <string mnemonic, RegisterClass RC, bits<3> MajOp, + bit isNot, bit isPredNew>: + LDInst <(outs RC:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$u2), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#"$dst = "#mnemonic#"($src2+$src3<<#$u2)", + [], "", V4LDST_tc_ld_SLOT01>, AddrModeRel { + bits<5> dst; + bits<2> src1; + bits<5> src2; + bits<5> src3; + bits<2> u2; + + let isPredicatedFalse = isNot; + let isPredicatedNew = isPredNew; -def : Pat <(i32 (extloadi8 (add IntRegs:$src1, IntRegs:$src2))), - (LDriub_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, - Requires<[HasV4T]>; + let IClass = 0b0011; -def : Pat <(i32 (sextloadi16 (add IntRegs:$src1, IntRegs:$src2))), - (LDrih_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, - Requires<[HasV4T]>; + let Inst{27-26} = 0b00; + let Inst{25} = isPredNew; + let Inst{24} = isNot; + let Inst{23-21} = MajOp; + let Inst{20-16} = src2; + let Inst{12-8} = src3; + let Inst{13} = u2{1}; + let Inst{7} = u2{0}; + let Inst{6-5} = src1; + let Inst{4-0} = dst; + } -def : Pat <(i32 (zextloadi16 (add IntRegs:$src1, IntRegs:$src2))), - (LDriuh_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, - Requires<[HasV4T]>; +//===----------------------------------------------------------------------===// +// multiclass for load instructions with base + register offset +// addressing mode +//===----------------------------------------------------------------------===// +let hasSideEffects = 0, addrMode = BaseRegOffset in +multiclass ld_idxd_shl <string mnemonic, string CextOp, RegisterClass RC, + bits<3> MajOp > { + let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl, + InputType = "reg" in { + let isPredicable = 1 in + def L4_#NAME#_rr : T_load_rr <mnemonic, RC, MajOp>; -def : Pat <(i32 (extloadi16 (add IntRegs:$src1, IntRegs:$src2))), - (LDriuh_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, - Requires<[HasV4T]>; + // Predicated + def L4_p#NAME#t_rr : T_pload_rr <mnemonic, RC, MajOp, 0, 0>; + def L4_p#NAME#f_rr : T_pload_rr <mnemonic, RC, MajOp, 1, 0>; -def : Pat <(i32 (load (add IntRegs:$src1, IntRegs:$src2))), - (LDriw_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, - Requires<[HasV4T]>; + // Predicated new + def L4_p#NAME#tnew_rr : T_pload_rr <mnemonic, RC, MajOp, 0, 1>; + def L4_p#NAME#fnew_rr : T_pload_rr <mnemonic, RC, MajOp, 1, 1>; + } } -// zext i1->i64 -def : Pat <(i64 (zext (i1 PredRegs:$src1))), - (i64 (COMBINE_Ir_V4 0, (MUX_ii (i1 PredRegs:$src1), 1, 0)))>, - Requires<[HasV4T]>; +let hasNewValue = 1, accessSize = ByteAccess in { + defm loadrb : ld_idxd_shl<"memb", "LDrib", IntRegs, 0b000>; + defm loadrub : ld_idxd_shl<"memub", "LDriub", IntRegs, 0b001>; +} -// zext i32->i64 -def : Pat <(i64 (zext (i32 IntRegs:$src1))), - (i64 (COMBINE_Ir_V4 0, (i32 IntRegs:$src1)))>, - Requires<[HasV4T]>; -// zext i8->i64 -def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)), - (i64 (COMBINE_Ir_V4 0, (LDriub ADDRriS11_0:$src1)))>, - Requires<[HasV4T]>; - -let AddedComplexity = 20 in -def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1), - s11_0ExtPred:$offset))), - (i64 (COMBINE_Ir_V4 0, (LDriub_indexed IntRegs:$src1, - s11_0ExtPred:$offset)))>, - Requires<[HasV4T]>; +let hasNewValue = 1, accessSize = HalfWordAccess in { + defm loadrh : ld_idxd_shl<"memh", "LDrih", IntRegs, 0b010>; + defm loadruh : ld_idxd_shl<"memuh", "LDriuh", IntRegs, 0b011>; +} + +let hasNewValue = 1, accessSize = WordAccess in +defm loadri : ld_idxd_shl<"memw", "LDriw", IntRegs, 0b100>; + +let accessSize = DoubleWordAccess in +defm loadrd : ld_idxd_shl<"memd", "LDrid", DoubleRegs, 0b110>; + +// 'def pats' for load instructions with base + register offset and non-zero +// immediate value. Immediate value is used to left-shift the second +// register operand. +class Loadxs_pat<PatFrag Load, ValueType VT, InstHexagon MI> + : Pat<(VT (Load (add (i32 IntRegs:$Rs), + (i32 (shl (i32 IntRegs:$Rt), u2ImmPred:$u2))))), + (VT (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2))>; + +let AddedComplexity = 40 in { + def: Loadxs_pat<extloadi8, i32, L4_loadrub_rr>; + def: Loadxs_pat<zextloadi8, i32, L4_loadrub_rr>; + def: Loadxs_pat<sextloadi8, i32, L4_loadrb_rr>; + def: Loadxs_pat<extloadi16, i32, L4_loadruh_rr>; + def: Loadxs_pat<zextloadi16, i32, L4_loadruh_rr>; + def: Loadxs_pat<sextloadi16, i32, L4_loadrh_rr>; + def: Loadxs_pat<load, i32, L4_loadri_rr>; + def: Loadxs_pat<load, i64, L4_loadrd_rr>; +} + +// 'def pats' for load instruction base + register offset and +// zero immediate value. +class Loadxs_simple_pat<PatFrag Load, ValueType VT, InstHexagon MI> + : Pat<(VT (Load (add (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)))), + (VT (MI IntRegs:$Rs, IntRegs:$Rt, 0))>; + +let AddedComplexity = 20 in { + def: Loadxs_simple_pat<extloadi8, i32, L4_loadrub_rr>; + def: Loadxs_simple_pat<zextloadi8, i32, L4_loadrub_rr>; + def: Loadxs_simple_pat<sextloadi8, i32, L4_loadrb_rr>; + def: Loadxs_simple_pat<extloadi16, i32, L4_loadruh_rr>; + def: Loadxs_simple_pat<zextloadi16, i32, L4_loadruh_rr>; + def: Loadxs_simple_pat<sextloadi16, i32, L4_loadrh_rr>; + def: Loadxs_simple_pat<load, i32, L4_loadri_rr>; + def: Loadxs_simple_pat<load, i64, L4_loadrd_rr>; +} // zext i1->i64 -def: Pat <(i64 (zextloadi1 ADDRriS11_0:$src1)), - (i64 (COMBINE_Ir_V4 0, (LDriub ADDRriS11_0:$src1)))>, - Requires<[HasV4T]>; - -let AddedComplexity = 20 in -def: Pat <(i64 (zextloadi1 (add (i32 IntRegs:$src1), - s11_0ExtPred:$offset))), - (i64 (COMBINE_Ir_V4 0, (LDriub_indexed IntRegs:$src1, - s11_0ExtPred:$offset)))>, - Requires<[HasV4T]>; - -// zext i16->i64 -def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)), - (i64 (COMBINE_Ir_V4 0, (LDriuh ADDRriS11_1:$src1)))>, - Requires<[HasV4T]>; - -let AddedComplexity = 20 in -def: Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1), - s11_1ExtPred:$offset))), - (i64 (COMBINE_Ir_V4 0, (LDriuh_indexed IntRegs:$src1, - s11_1ExtPred:$offset)))>, - Requires<[HasV4T]>; - -// anyext i16->i64 -def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)), - (i64 (COMBINE_Ir_V4 0, (LDrih ADDRriS11_2:$src1)))>, - Requires<[HasV4T]>; - -let AddedComplexity = 20 in -def: Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1), - s11_1ExtPred:$offset))), - (i64 (COMBINE_Ir_V4 0, (LDrih_indexed IntRegs:$src1, - s11_1ExtPred:$offset)))>, - Requires<[HasV4T]>; +def: Pat<(i64 (zext (i1 PredRegs:$src1))), + (Zext64 (C2_muxii PredRegs:$src1, 1, 0))>; + +// zext i32->i64 +def: Pat<(i64 (zext (i32 IntRegs:$src1))), + (Zext64 IntRegs:$src1)>; // zext i32->i64 def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)), - (i64 (COMBINE_Ir_V4 0, (LDriw ADDRriS11_2:$src1)))>, - Requires<[HasV4T]>; + (i64 (A4_combineir 0, (L2_loadri_io AddrFI:$src1, 0)))>; let AddedComplexity = 100 in def: Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))), - (i64 (COMBINE_Ir_V4 0, (LDriw_indexed IntRegs:$src1, - s11_2ExtPred:$offset)))>, - Requires<[HasV4T]>; + (i64 (A4_combineir 0, (L2_loadri_io IntRegs:$src1, + s11_2ExtPred:$offset)))>; // anyext i32->i64 def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)), - (i64 (COMBINE_Ir_V4 0, (LDriw ADDRriS11_2:$src1)))>, - Requires<[HasV4T]>; - -let AddedComplexity = 100 in -def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))), - (i64 (COMBINE_Ir_V4 0, (LDriw_indexed IntRegs:$src1, - s11_2ExtPred:$offset)))>, - Requires<[HasV4T]>; - - + (i64 (A4_combineir 0, (L2_loadri_io AddrFI:$src1, 0)))>; //===----------------------------------------------------------------------===// // LD - @@ -467,194 +659,357 @@ def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))), //===----------------------------------------------------------------------===// // Template class for store instructions with Absolute set addressing mode. //===----------------------------------------------------------------------===// -let isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT, -addrMode = AbsoluteSet in -class T_ST_abs_set<string mnemonic, RegisterClass RC>: - STInst2<(outs IntRegs:$dst1), - (ins RC:$src1, u0AlwaysExt:$src2), - mnemonic#"($dst1=##$src2) = $src1", - []>, - Requires<[HasV4T]>; +let isExtended = 1, opExtendable = 1, opExtentBits = 6, + addrMode = AbsoluteSet, isNVStorable = 1 in +class T_ST_absset <string mnemonic, string BaseOp, RegisterClass RC, + bits<3> MajOp, MemAccessSize AccessSz, bit isHalf = 0> + : STInst<(outs IntRegs:$dst), + (ins u6Ext:$addr, RC:$src), + mnemonic#"($dst = #$addr) = $src"#!if(isHalf, ".h","")>, NewValueRel { + bits<5> dst; + bits<6> addr; + bits<5> src; + let accessSize = AccessSz; + let BaseOpcode = BaseOp#"_AbsSet"; + + let IClass = 0b1010; + + let Inst{27-24} = 0b1011; + let Inst{23-21} = MajOp; + let Inst{20-16} = dst; + let Inst{13} = 0b0; + let Inst{12-8} = src; + let Inst{7} = 0b1; + let Inst{5-0} = addr; + } -def STrid_abs_set_V4 : T_ST_abs_set <"memd", DoubleRegs>; -def STrib_abs_set_V4 : T_ST_abs_set <"memb", IntRegs>; -def STrih_abs_set_V4 : T_ST_abs_set <"memh", IntRegs>; -def STriw_abs_set_V4 : T_ST_abs_set <"memw", IntRegs>; +def S4_storerb_ap : T_ST_absset <"memb", "STrib", IntRegs, 0b000, ByteAccess>; +def S4_storerh_ap : T_ST_absset <"memh", "STrih", IntRegs, 0b010, + HalfWordAccess>; +def S4_storeri_ap : T_ST_absset <"memw", "STriw", IntRegs, 0b100, WordAccess>; -//===----------------------------------------------------------------------===// -// multiclass for store instructions with base + register offset addressing -// mode -//===----------------------------------------------------------------------===// -multiclass ST_Idxd_shl_Pbase<string mnemonic, RegisterClass RC, bit isNot, - bit isPredNew> { - let isPredicatedNew = isPredNew in - def NAME : STInst2<(outs), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, - RC:$src5), - !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", - ") ")#mnemonic#"($src2+$src3<<#$src4) = $src5", - []>, - Requires<[HasV4T]>; +let isNVStorable = 0 in { + def S4_storerf_ap : T_ST_absset <"memh", "STrif", IntRegs, + 0b011, HalfWordAccess, 1>; + def S4_storerd_ap : T_ST_absset <"memd", "STrid", DoubleRegs, + 0b110, DoubleWordAccess>; } -multiclass ST_Idxd_shl_Pred<string mnemonic, RegisterClass RC, bit PredNot> { - let isPredicatedFalse = PredNot in { - defm _c#NAME : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 0>; - // Predicate new - defm _cdn#NAME : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 1>; +let opExtendable = 1, isNewValue = 1, isNVStore = 1, opNewValue = 2, +isExtended = 1, opExtentBits= 6 in +class T_ST_absset_nv <string mnemonic, string BaseOp, bits<2> MajOp, + MemAccessSize AccessSz > + : NVInst <(outs IntRegs:$dst), + (ins u6Ext:$addr, IntRegs:$src), + mnemonic#"($dst = #$addr) = $src.new">, NewValueRel { + bits<5> dst; + bits<6> addr; + bits<3> src; + let accessSize = AccessSz; + let BaseOpcode = BaseOp#"_AbsSet"; + + let IClass = 0b1010; + + let Inst{27-21} = 0b1011101; + let Inst{20-16} = dst; + let Inst{13-11} = 0b000; + let Inst{12-11} = MajOp; + let Inst{10-8} = src; + let Inst{7} = 0b1; + let Inst{5-0} = addr; } + +let mayStore = 1, addrMode = AbsoluteSet in { + def S4_storerbnew_ap : T_ST_absset_nv <"memb", "STrib", 0b00, ByteAccess>; + def S4_storerhnew_ap : T_ST_absset_nv <"memh", "STrih", 0b01, HalfWordAccess>; + def S4_storerinew_ap : T_ST_absset_nv <"memw", "STriw", 0b10, WordAccess>; } -let isNVStorable = 1 in -multiclass ST_Idxd_shl<string mnemonic, string CextOp, RegisterClass RC> { - let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in { - let isPredicable = 1 in - def NAME#_V4 : STInst2<(outs), - (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, RC:$src4), - mnemonic#"($src1+$src2<<#$src3) = $src4", - []>, - Requires<[HasV4T]>; - - let isPredicated = 1 in { - defm Pt_V4 : ST_Idxd_shl_Pred<mnemonic, RC, 0 >; - defm NotPt_V4 : ST_Idxd_shl_Pred<mnemonic, RC, 1>; - } - } +let isExtended = 1, opExtendable = 2, opExtentBits = 6, InputType = "imm", +addrMode = BaseLongOffset, AddedComplexity = 40 in +class T_StoreAbsReg <string mnemonic, string CextOp, RegisterClass RC, + bits<3> MajOp, MemAccessSize AccessSz, bit isHalf = 0> + : STInst<(outs), + (ins IntRegs:$src1, u2Imm:$src2, u6Ext:$src3, RC:$src4), + mnemonic#"($src1<<#$src2 + #$src3) = $src4"#!if(isHalf, ".h",""), + []>, ImmRegShl, NewValueRel { + + bits<5> src1; + bits<2> src2; + bits<6> src3; + bits<5> src4; + + let accessSize = AccessSz; + let CextOpcode = CextOp; + let BaseOpcode = CextOp#"_shl"; + let IClass = 0b1010; + + let Inst{27-24} =0b1101; + let Inst{23-21} = MajOp; + let Inst{20-16} = src1; + let Inst{13} = src2{1}; + let Inst{12-8} = src4; + let Inst{7} = 0b1; + let Inst{6} = src2{0}; + let Inst{5-0} = src3; } -// multiclass for new-value store instructions with base + register offset -// addressing mode. -multiclass ST_Idxd_shl_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot, - bit isPredNew> { - let isPredicatedNew = isPredNew in - def NAME#_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, - RC:$src5), - !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", - ") ")#mnemonic#"($src2+$src3<<#$src4) = $src5.new", - []>, - Requires<[HasV4T]>; +def S4_storerb_ur : T_StoreAbsReg <"memb", "STrib", IntRegs, 0b000, ByteAccess>; +def S4_storerh_ur : T_StoreAbsReg <"memh", "STrih", IntRegs, 0b010, + HalfWordAccess>; +def S4_storerf_ur : T_StoreAbsReg <"memh", "STrif", IntRegs, 0b011, + HalfWordAccess, 1>; +def S4_storeri_ur : T_StoreAbsReg <"memw", "STriw", IntRegs, 0b100, WordAccess>; +def S4_storerd_ur : T_StoreAbsReg <"memd", "STrid", DoubleRegs, 0b110, + DoubleWordAccess>; + +let AddedComplexity = 40 in +multiclass T_StoreAbsReg_Pats <InstHexagon MI, RegisterClass RC, ValueType VT, + PatFrag stOp> { + def : Pat<(stOp (VT RC:$src4), + (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2), + u0AlwaysExtPred:$src3)), + (MI IntRegs:$src1, u2ImmPred:$src2, u0AlwaysExtPred:$src3, RC:$src4)>; + + def : Pat<(stOp (VT RC:$src4), + (add (shl IntRegs:$src1, u2ImmPred:$src2), + (HexagonCONST32 tglobaladdr:$src3))), + (MI IntRegs:$src1, u2ImmPred:$src2, tglobaladdr:$src3, RC:$src4)>; + + def : Pat<(stOp (VT RC:$src4), + (add IntRegs:$src1, (HexagonCONST32 tglobaladdr:$src3))), + (MI IntRegs:$src1, 0, tglobaladdr:$src3, RC:$src4)>; } -multiclass ST_Idxd_shl_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> { - let isPredicatedFalse = PredNot in { - defm _c#NAME : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 0>; - // Predicate new - defm _cdn#NAME : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 1>; +defm : T_StoreAbsReg_Pats <S4_storerd_ur, DoubleRegs, i64, store>; +defm : T_StoreAbsReg_Pats <S4_storeri_ur, IntRegs, i32, store>; +defm : T_StoreAbsReg_Pats <S4_storerb_ur, IntRegs, i32, truncstorei8>; +defm : T_StoreAbsReg_Pats <S4_storerh_ur, IntRegs, i32, truncstorei16>; + +let mayStore = 1, isNVStore = 1, isExtended = 1, addrMode = BaseLongOffset, + opExtentBits = 6, isNewValue = 1, opNewValue = 3, opExtendable = 2 in +class T_StoreAbsRegNV <string mnemonic, string CextOp, bits<2> MajOp, + MemAccessSize AccessSz> + : NVInst <(outs ), + (ins IntRegs:$src1, u2Imm:$src2, u6Ext:$src3, IntRegs:$src4), + mnemonic#"($src1<<#$src2 + #$src3) = $src4.new">, NewValueRel { + bits<5> src1; + bits<2> src2; + bits<6> src3; + bits<3> src4; + + let CextOpcode = CextOp; + let BaseOpcode = CextOp#"_shl"; + let IClass = 0b1010; + + let Inst{27-21} = 0b1101101; + let Inst{12-11} = 0b00; + let Inst{7} = 0b1; + let Inst{20-16} = src1; + let Inst{13} = src2{1}; + let Inst{12-11} = MajOp; + let Inst{10-8} = src4; + let Inst{6} = src2{0}; + let Inst{5-0} = src3; } -} -let mayStore = 1, isNVStore = 1 in -multiclass ST_Idxd_shl_nv<string mnemonic, string CextOp, RegisterClass RC> { - let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in { - let isPredicable = 1 in - def NAME#_nv_V4 : NVInst_V4<(outs), - (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, RC:$src4), - mnemonic#"($src1+$src2<<#$src3) = $src4.new", - []>, - Requires<[HasV4T]>; - - let isPredicated = 1 in { - defm Pt : ST_Idxd_shl_Pred_nv<mnemonic, RC, 0 >; - defm NotPt : ST_Idxd_shl_Pred_nv<mnemonic, RC, 1>; - } +def S4_storerbnew_ur : T_StoreAbsRegNV <"memb", "STrib", 0b00, ByteAccess>; +def S4_storerhnew_ur : T_StoreAbsRegNV <"memh", "STrih", 0b01, HalfWordAccess>; +def S4_storerinew_ur : T_StoreAbsRegNV <"memw", "STriw", 0b10, WordAccess>; + +//===----------------------------------------------------------------------===// +// Template classes for the non-predicated store instructions with +// base + register offset addressing mode +//===----------------------------------------------------------------------===// +let isPredicable = 1 in +class T_store_rr <string mnemonic, RegisterClass RC, bits<3> MajOp, bit isH> + : STInst < (outs ), (ins IntRegs:$Rs, IntRegs:$Ru, u2Imm:$u2, RC:$Rt), + mnemonic#"($Rs + $Ru<<#$u2) = $Rt"#!if(isH, ".h",""), + [],"",V4LDST_tc_st_SLOT01>, ImmRegShl, AddrModeRel { + + bits<5> Rs; + bits<5> Ru; + bits<2> u2; + bits<5> Rt; + + let IClass = 0b0011; + + let Inst{27-24} = 0b1011; + let Inst{23-21} = MajOp; + let Inst{20-16} = Rs; + let Inst{12-8} = Ru; + let Inst{13} = u2{1}; + let Inst{7} = u2{0}; + let Inst{4-0} = Rt; } -} -let addrMode = BaseRegOffset, neverHasSideEffects = 1, -validSubTargets = HasV4SubT in { - let accessSize = ByteAccess in - defm STrib_indexed_shl: ST_Idxd_shl<"memb", "STrib", IntRegs>, - ST_Idxd_shl_nv<"memb", "STrib", IntRegs>, AddrModeRel; +//===----------------------------------------------------------------------===// +// Template classes for the predicated store instructions with +// base + register offset addressing mode +//===----------------------------------------------------------------------===// +let isPredicated = 1 in +class T_pstore_rr <string mnemonic, RegisterClass RC, bits<3> MajOp, + bit isNot, bit isPredNew, bit isH> + : STInst <(outs), + (ins PredRegs:$Pv, IntRegs:$Rs, IntRegs:$Ru, u2Imm:$u2, RC:$Rt), + + !if(isNot, "if (!$Pv", "if ($Pv")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"($Rs+$Ru<<#$u2) = $Rt"#!if(isH, ".h",""), + [], "", V4LDST_tc_st_SLOT01> , AddrModeRel{ + bits<2> Pv; + bits<5> Rs; + bits<5> Ru; + bits<2> u2; + bits<5> Rt; + + let isPredicatedFalse = isNot; + let isPredicatedNew = isPredNew; - let accessSize = HalfWordAccess in - defm STrih_indexed_shl: ST_Idxd_shl<"memh", "STrih", IntRegs>, - ST_Idxd_shl_nv<"memh", "STrih", IntRegs>, AddrModeRel; + let IClass = 0b0011; - let accessSize = WordAccess in - defm STriw_indexed_shl: ST_Idxd_shl<"memw", "STriw", IntRegs>, - ST_Idxd_shl_nv<"memw", "STriw", IntRegs>, AddrModeRel; + let Inst{27-26} = 0b01; + let Inst{25} = isPredNew; + let Inst{24} = isNot; + let Inst{23-21} = MajOp; + let Inst{20-16} = Rs; + let Inst{12-8} = Ru; + let Inst{13} = u2{1}; + let Inst{7} = u2{0}; + let Inst{6-5} = Pv; + let Inst{4-0} = Rt; + } - let isNVStorable = 0, accessSize = DoubleWordAccess in - defm STrid_indexed_shl: ST_Idxd_shl<"memd", "STrid", DoubleRegs>, AddrModeRel; -} +//===----------------------------------------------------------------------===// +// Template classes for the new-value store instructions with +// base + register offset addressing mode +//===----------------------------------------------------------------------===// +let isPredicable = 1, isNewValue = 1, opNewValue = 3 in +class T_store_new_rr <string mnemonic, bits<2> MajOp> : + NVInst < (outs ), (ins IntRegs:$Rs, IntRegs:$Ru, u2Imm:$u2, IntRegs:$Nt), + mnemonic#"($Rs + $Ru<<#$u2) = $Nt.new", + [],"",V4LDST_tc_st_SLOT0>, ImmRegShl, AddrModeRel { -let Predicates = [HasV4T], AddedComplexity = 10 in { -def : Pat<(truncstorei8 (i32 IntRegs:$src4), - (add IntRegs:$src1, (shl IntRegs:$src2, - u2ImmPred:$src3))), - (STrib_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, - u2ImmPred:$src3, IntRegs:$src4)>; + bits<5> Rs; + bits<5> Ru; + bits<2> u2; + bits<3> Nt; -def : Pat<(truncstorei16 (i32 IntRegs:$src4), - (add IntRegs:$src1, (shl IntRegs:$src2, - u2ImmPred:$src3))), - (STrih_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, - u2ImmPred:$src3, IntRegs:$src4)>; + let IClass = 0b0011; -def : Pat<(store (i32 IntRegs:$src4), - (add IntRegs:$src1, (shl IntRegs:$src2, u2ImmPred:$src3))), - (STriw_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, - u2ImmPred:$src3, IntRegs:$src4)>; + let Inst{27-21} = 0b1011101; + let Inst{20-16} = Rs; + let Inst{12-8} = Ru; + let Inst{13} = u2{1}; + let Inst{7} = u2{0}; + let Inst{4-3} = MajOp; + let Inst{2-0} = Nt; + } -def : Pat<(store (i64 DoubleRegs:$src4), - (add IntRegs:$src1, (shl IntRegs:$src2, u2ImmPred:$src3))), - (STrid_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, - u2ImmPred:$src3, DoubleRegs:$src4)>; -} +//===----------------------------------------------------------------------===// +// Template classes for the predicated new-value store instructions with +// base + register offset addressing mode +//===----------------------------------------------------------------------===// +let isPredicated = 1, isNewValue = 1, opNewValue = 4 in +class T_pstore_new_rr <string mnemonic, bits<2> MajOp, bit isNot, bit isPredNew> + : NVInst<(outs), + (ins PredRegs:$Pv, IntRegs:$Rs, IntRegs:$Ru, u2Imm:$u2, IntRegs:$Nt), + !if(isNot, "if (!$Pv", "if ($Pv")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"($Rs+$Ru<<#$u2) = $Nt.new", + [], "", V4LDST_tc_st_SLOT0>, AddrModeRel { + bits<2> Pv; + bits<5> Rs; + bits<5> Ru; + bits<2> u2; + bits<3> Nt; + + let isPredicatedFalse = isNot; + let isPredicatedNew = isPredNew; -let isExtended = 1, opExtendable = 2 in -class T_ST_LongOff <string mnemonic, PatFrag stOp, RegisterClass RC, ValueType VT> : - STInst<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, RC:$src4), - mnemonic#"($src1<<#$src2+##$src3) = $src4", - [(stOp (VT RC:$src4), - (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2), - u0AlwaysExtPred:$src3))]>, - Requires<[HasV4T]>; + let IClass = 0b0011; + let Inst{27-26} = 0b01; + let Inst{25} = isPredNew; + let Inst{24} = isNot; + let Inst{23-21} = 0b101; + let Inst{20-16} = Rs; + let Inst{12-8} = Ru; + let Inst{13} = u2{1}; + let Inst{7} = u2{0}; + let Inst{6-5} = Pv; + let Inst{4-3} = MajOp; + let Inst{2-0} = Nt; + } -let isExtended = 1, opExtendable = 2, mayStore = 1, isNVStore = 1 in -class T_ST_LongOff_nv <string mnemonic> : - NVInst_V4<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4), - mnemonic#"($src1<<#$src2+##$src3) = $src4.new", - []>, - Requires<[HasV4T]>; +//===----------------------------------------------------------------------===// +// multiclass for store instructions with base + register offset addressing +// mode +//===----------------------------------------------------------------------===// +let isNVStorable = 1 in +multiclass ST_Idxd_shl<string mnemonic, string CextOp, RegisterClass RC, + bits<3> MajOp, bit isH = 0> { + let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in { + def S4_#NAME#_rr : T_store_rr <mnemonic, RC, MajOp, isH>; -multiclass ST_LongOff <string mnemonic, string BaseOp, PatFrag stOp> { - let BaseOpcode = BaseOp#"_shl" in { - let isNVStorable = 1 in - def NAME#_V4 : T_ST_LongOff<mnemonic, stOp, IntRegs, i32>; + // Predicated + def S4_p#NAME#t_rr : T_pstore_rr <mnemonic, RC, MajOp, 0, 0, isH>; + def S4_p#NAME#f_rr : T_pstore_rr <mnemonic, RC, MajOp, 1, 0, isH>; - def NAME#_nv_V4 : T_ST_LongOff_nv<mnemonic>; + // Predicated new + def S4_p#NAME#tnew_rr : T_pstore_rr <mnemonic, RC, MajOp, 0, 1, isH>; + def S4_p#NAME#fnew_rr : T_pstore_rr <mnemonic, RC, MajOp, 1, 1, isH>; } } -let AddedComplexity = 10, validSubTargets = HasV4SubT in { - def STrid_shl_V4 : T_ST_LongOff<"memd", store, DoubleRegs, i64>; - defm STrib_shl : ST_LongOff <"memb", "STrib", truncstorei8>, NewValueRel; - defm STrih_shl : ST_LongOff <"memh", "Strih", truncstorei16>, NewValueRel; - defm STriw_shl : ST_LongOff <"memw", "STriw", store>, NewValueRel; +//===----------------------------------------------------------------------===// +// multiclass for new-value store instructions with base + register offset +// addressing mode. +//===----------------------------------------------------------------------===// +let mayStore = 1, isNVStore = 1 in +multiclass ST_Idxd_shl_nv <string mnemonic, string CextOp, RegisterClass RC, + bits<2> MajOp> { + let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in { + def S4_#NAME#new_rr : T_store_new_rr<mnemonic, MajOp>; + + // Predicated + def S4_p#NAME#newt_rr : T_pstore_new_rr <mnemonic, MajOp, 0, 0>; + def S4_p#NAME#newf_rr : T_pstore_new_rr <mnemonic, MajOp, 1, 0>; + + // Predicated new + def S4_p#NAME#newtnew_rr : T_pstore_new_rr <mnemonic, MajOp, 0, 1>; + def S4_p#NAME#newfnew_rr : T_pstore_new_rr <mnemonic, MajOp, 1, 1>; + } } -let AddedComplexity = 40 in -multiclass T_ST_LOff_Pats <InstHexagon I, RegisterClass RC, ValueType VT, - PatFrag stOp> { - def : Pat<(stOp (VT RC:$src4), - (add (shl IntRegs:$src1, u2ImmPred:$src2), - (NumUsesBelowThresCONST32 tglobaladdr:$src3))), - (I IntRegs:$src1, u2ImmPred:$src2, tglobaladdr:$src3, RC:$src4)>; +let addrMode = BaseRegOffset, InputType = "reg", hasSideEffects = 0 in { + let accessSize = ByteAccess in + defm storerb: ST_Idxd_shl<"memb", "STrib", IntRegs, 0b000>, + ST_Idxd_shl_nv<"memb", "STrib", IntRegs, 0b00>; - def : Pat<(stOp (VT RC:$src4), - (add IntRegs:$src1, - (NumUsesBelowThresCONST32 tglobaladdr:$src3))), - (I IntRegs:$src1, 0, tglobaladdr:$src3, RC:$src4)>; + let accessSize = HalfWordAccess in + defm storerh: ST_Idxd_shl<"memh", "STrih", IntRegs, 0b010>, + ST_Idxd_shl_nv<"memh", "STrih", IntRegs, 0b01>; + + let accessSize = WordAccess in + defm storeri: ST_Idxd_shl<"memw", "STriw", IntRegs, 0b100>, + ST_Idxd_shl_nv<"memw", "STriw", IntRegs, 0b10>; + + let isNVStorable = 0, accessSize = DoubleWordAccess in + defm storerd: ST_Idxd_shl<"memd", "STrid", DoubleRegs, 0b110>; + + let isNVStorable = 0, accessSize = HalfWordAccess in + defm storerf: ST_Idxd_shl<"memh", "STrif", IntRegs, 0b011, 1>; } -defm : T_ST_LOff_Pats<STrid_shl_V4, DoubleRegs, i64, store>; -defm : T_ST_LOff_Pats<STriw_shl_V4, IntRegs, i32, store>; -defm : T_ST_LOff_Pats<STrib_shl_V4, IntRegs, i32, truncstorei8>; -defm : T_ST_LOff_Pats<STrih_shl_V4, IntRegs, i32, truncstorei16>; +class Storexs_pat<PatFrag Store, PatFrag Value, InstHexagon MI> + : Pat<(Store Value:$Ru, (add (i32 IntRegs:$Rs), + (i32 (shl (i32 IntRegs:$Rt), u2ImmPred:$u2)))), + (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2, Value:$Ru)>; + +let AddedComplexity = 40 in { + def: Storexs_pat<truncstorei8, I32, S4_storerb_rr>; + def: Storexs_pat<truncstorei16, I32, S4_storerh_rr>; + def: Storexs_pat<store, I32, S4_storeri_rr>; + def: Storexs_pat<store, I64, S4_storerd_rr>; +} // memd(Rx++#s4:3)=Rtt // memd(Rx++#s4:3:circ(Mu))=Rtt @@ -668,75 +1023,151 @@ defm : T_ST_LOff_Pats<STrih_shl_V4, IntRegs, i32, truncstorei16>; // TODO: needs to be implemented. //===----------------------------------------------------------------------===// +// Template class +//===----------------------------------------------------------------------===// +let isPredicable = 1, isExtendable = 1, isExtentSigned = 1, opExtentBits = 8, + opExtendable = 2 in +class T_StoreImm <string mnemonic, Operand OffsetOp, bits<2> MajOp > + : STInst <(outs ), (ins IntRegs:$Rs, OffsetOp:$offset, s8Ext:$S8), + mnemonic#"($Rs+#$offset)=#$S8", + [], "", V4LDST_tc_st_SLOT01>, + ImmRegRel, PredNewRel { + bits<5> Rs; + bits<8> S8; + bits<8> offset; + bits<6> offsetBits; + + string OffsetOpStr = !cast<string>(OffsetOp); + let offsetBits = !if (!eq(OffsetOpStr, "u6_2Imm"), offset{7-2}, + !if (!eq(OffsetOpStr, "u6_1Imm"), offset{6-1}, + /* u6_0Imm */ offset{5-0})); + + let IClass = 0b0011; + + let Inst{27-25} = 0b110; + let Inst{22-21} = MajOp; + let Inst{20-16} = Rs; + let Inst{12-7} = offsetBits; + let Inst{13} = S8{7}; + let Inst{6-0} = S8{6-0}; + } + +let isPredicated = 1, isExtendable = 1, isExtentSigned = 1, opExtentBits = 6, + opExtendable = 3 in +class T_StoreImm_pred <string mnemonic, Operand OffsetOp, bits<2> MajOp, + bit isPredNot, bit isPredNew > + : STInst <(outs ), + (ins PredRegs:$Pv, IntRegs:$Rs, OffsetOp:$offset, s6Ext:$S6), + !if(isPredNot, "if (!$Pv", "if ($Pv")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"($Rs+#$offset)=#$S6", + [], "", V4LDST_tc_st_SLOT01>, + ImmRegRel, PredNewRel { + bits<2> Pv; + bits<5> Rs; + bits<6> S6; + bits<8> offset; + bits<6> offsetBits; + + string OffsetOpStr = !cast<string>(OffsetOp); + let offsetBits = !if (!eq(OffsetOpStr, "u6_2Imm"), offset{7-2}, + !if (!eq(OffsetOpStr, "u6_1Imm"), offset{6-1}, + /* u6_0Imm */ offset{5-0})); + let isPredicatedNew = isPredNew; + let isPredicatedFalse = isPredNot; + + let IClass = 0b0011; + + let Inst{27-25} = 0b100; + let Inst{24} = isPredNew; + let Inst{23} = isPredNot; + let Inst{22-21} = MajOp; + let Inst{20-16} = Rs; + let Inst{13} = S6{5}; + let Inst{12-7} = offsetBits; + let Inst{6-5} = Pv; + let Inst{4-0} = S6{4-0}; + } + + +//===----------------------------------------------------------------------===// // multiclass for store instructions with base + immediate offset // addressing mode and immediate stored value. // mem[bhw](Rx++#s4:3)=#s8 // if ([!]Pv[.new]) mem[bhw](Rx++#s4:3)=#s6 //===----------------------------------------------------------------------===// -multiclass ST_Imm_Pbase<string mnemonic, Operand OffsetOp, bit isNot, - bit isPredNew> { - let isPredicatedNew = isPredNew in - def NAME : STInst2<(outs), - (ins PredRegs:$src1, IntRegs:$src2, OffsetOp:$src3, s6Ext:$src4), - !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", - ") ")#mnemonic#"($src2+#$src3) = #$src4", - []>, - Requires<[HasV4T]>; -} -multiclass ST_Imm_Pred<string mnemonic, Operand OffsetOp, bit PredNot> { - let isPredicatedFalse = PredNot in { - defm _c#NAME : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 0>; - // Predicate new - defm _cdn#NAME : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 1>; - } +multiclass ST_Imm_Pred <string mnemonic, Operand OffsetOp, bits<2> MajOp, + bit PredNot> { + def _io : T_StoreImm_pred <mnemonic, OffsetOp, MajOp, PredNot, 0>; + // Predicate new + def new_io : T_StoreImm_pred <mnemonic, OffsetOp, MajOp, PredNot, 1>; } -let isExtendable = 1, isExtentSigned = 1, neverHasSideEffects = 1 in -multiclass ST_Imm<string mnemonic, string CextOp, Operand OffsetOp> { +multiclass ST_Imm <string mnemonic, string CextOp, Operand OffsetOp, + bits<2> MajOp> { let CextOpcode = CextOp, BaseOpcode = CextOp#_imm in { - let opExtendable = 2, opExtentBits = 8, isPredicable = 1 in - def NAME#_V4 : STInst2<(outs), - (ins IntRegs:$src1, OffsetOp:$src2, s8Ext:$src3), - mnemonic#"($src1+#$src2) = #$src3", - []>, - Requires<[HasV4T]>; - - let opExtendable = 3, opExtentBits = 6, isPredicated = 1 in { - defm Pt_V4 : ST_Imm_Pred<mnemonic, OffsetOp, 0>; - defm NotPt_V4 : ST_Imm_Pred<mnemonic, OffsetOp, 1 >; - } + def _io : T_StoreImm <mnemonic, OffsetOp, MajOp>; + + defm t : ST_Imm_Pred <mnemonic, OffsetOp, MajOp, 0>; + defm f : ST_Imm_Pred <mnemonic, OffsetOp, MajOp, 1>; } } -let addrMode = BaseImmOffset, InputType = "imm", -validSubTargets = HasV4SubT in { +let hasSideEffects = 0, addrMode = BaseImmOffset, + InputType = "imm" in { let accessSize = ByteAccess in - defm STrib_imm : ST_Imm<"memb", "STrib", u6_0Imm>, ImmRegRel, PredNewRel; + defm S4_storeirb : ST_Imm<"memb", "STrib", u6_0Imm, 0b00>; let accessSize = HalfWordAccess in - defm STrih_imm : ST_Imm<"memh", "STrih", u6_1Imm>, ImmRegRel, PredNewRel; + defm S4_storeirh : ST_Imm<"memh", "STrih", u6_1Imm, 0b01>; let accessSize = WordAccess in - defm STriw_imm : ST_Imm<"memw", "STriw", u6_2Imm>, ImmRegRel, PredNewRel; + defm S4_storeiri : ST_Imm<"memw", "STriw", u6_2Imm, 0b10>; } -let Predicates = [HasV4T], AddedComplexity = 10 in { -def: Pat<(truncstorei8 s8ExtPred:$src3, (add IntRegs:$src1, u6_0ImmPred:$src2)), - (STrib_imm_V4 IntRegs:$src1, u6_0ImmPred:$src2, s8ExtPred:$src3)>; +def IMM_BYTE : SDNodeXForm<imm, [{ + // -1 etc is represented as 255 etc + // assigning to a byte restores our desired signed value. + int8_t imm = N->getSExtValue(); + return CurDAG->getTargetConstant(imm, MVT::i32); +}]>; -def: Pat<(truncstorei16 s8ExtPred:$src3, (add IntRegs:$src1, - u6_1ImmPred:$src2)), - (STrih_imm_V4 IntRegs:$src1, u6_1ImmPred:$src2, s8ExtPred:$src3)>; +def IMM_HALF : SDNodeXForm<imm, [{ + // -1 etc is represented as 65535 etc + // assigning to a short restores our desired signed value. + int16_t imm = N->getSExtValue(); + return CurDAG->getTargetConstant(imm, MVT::i32); +}]>; -def: Pat<(store s8ExtPred:$src3, (add IntRegs:$src1, u6_2ImmPred:$src2)), - (STriw_imm_V4 IntRegs:$src1, u6_2ImmPred:$src2, s8ExtPred:$src3)>; +def IMM_WORD : SDNodeXForm<imm, [{ + // -1 etc can be represented as 4294967295 etc + // Currently, it's not doing this. But some optimization + // might convert -1 to a large +ve number. + // assigning to a word restores our desired signed value. + int32_t imm = N->getSExtValue(); + return CurDAG->getTargetConstant(imm, MVT::i32); +}]>; + +def ToImmByte : OutPatFrag<(ops node:$R), (IMM_BYTE $R)>; +def ToImmHalf : OutPatFrag<(ops node:$R), (IMM_HALF $R)>; +def ToImmWord : OutPatFrag<(ops node:$R), (IMM_WORD $R)>; + +let AddedComplexity = 40 in { + // Not using frameindex patterns for these stores, because the offset + // is not extendable. This could cause problems during removing the frame + // indices, since the offset with respect to R29/R30 may not fit in the + // u6 field. + def: Storexm_add_pat<truncstorei8, s8ExtPred, u6_0ImmPred, ToImmByte, + S4_storeirb_io>; + def: Storexm_add_pat<truncstorei16, s8ExtPred, u6_1ImmPred, ToImmHalf, + S4_storeirh_io>; + def: Storexm_add_pat<store, s8ExtPred, u6_2ImmPred, ToImmWord, + S4_storeiri_io>; } -let AddedComplexity = 6 in -def : Pat <(truncstorei8 s8ExtPred:$src2, (i32 IntRegs:$src1)), - (STrib_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>, - Requires<[HasV4T]>; +def: Storexm_simple_pat<truncstorei8, s8ExtPred, ToImmByte, S4_storeirb_io>; +def: Storexm_simple_pat<truncstorei16, s8ExtPred, ToImmHalf, S4_storeirh_io>; +def: Storexm_simple_pat<store, s8ExtPred, ToImmWord, S4_storeiri_io>; // memb(Rx++#s4:0:circ(Mu))=Rt // memb(Rx++I:circ(Mu))=Rt @@ -744,16 +1175,10 @@ def : Pat <(truncstorei8 s8ExtPred:$src2, (i32 IntRegs:$src1)), // memb(Rx++Mu:brev)=Rt // memb(gp+#u16:0)=Rt - // Store halfword. // TODO: needs to be implemented // memh(Re=#U6)=Rt.H // memh(Rs+#s11:1)=Rt.H -let AddedComplexity = 6 in -def : Pat <(truncstorei16 s8ExtPred:$src2, (i32 IntRegs:$src1)), - (STrih_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>, - Requires<[HasV4T]>; - // memh(Rs+Ru<<#u2)=Rt.H // TODO: needs to be implemented. @@ -770,7 +1195,6 @@ def : Pat <(truncstorei16 s8ExtPred:$src2, (i32 IntRegs:$src1)), // if ([!]Pv[.new]) memh(#u6)=Rt.H // if ([!]Pv[.new]) memh(#u6)=Rt - // if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt.H // TODO: needs to be implemented. @@ -780,20 +1204,6 @@ def : Pat <(truncstorei16 s8ExtPred:$src2, (i32 IntRegs:$src1)), // Store word. // memw(Re=#U6)=Rt // TODO: Needs to be implemented. - -// Store predicate: -let neverHasSideEffects = 1 in -def STriw_pred_V4 : STInst2<(outs), - (ins MEMri:$addr, PredRegs:$src1), - "Error; should not emit", - []>, - Requires<[HasV4T]>; - -let AddedComplexity = 6 in -def : Pat <(store s8ExtPred:$src2, (i32 IntRegs:$src1)), - (STriw_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>, - Requires<[HasV4T]>; - // memw(Rx++#s4:2)=Rt // memw(Rx++#s4:2:circ(Mu))=Rt // memw(Rx++I:circ(Mu))=Rt @@ -809,175 +1219,285 @@ def : Pat <(store s8ExtPred:$src2, (i32 IntRegs:$src1)), // NV/ST + //===----------------------------------------------------------------------===// -// multiclass for new-value store instructions with base + immediate offset. -// -multiclass ST_Idxd_Pbase_nv<string mnemonic, RegisterClass RC, - Operand predImmOp, bit isNot, bit isPredNew> { - let isPredicatedNew = isPredNew in - def NAME#_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC: $src4), - !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", - ") ")#mnemonic#"($src2+#$src3) = $src4.new", - []>, - Requires<[HasV4T]>; -} +let opNewValue = 2, opExtendable = 1, isExtentSigned = 1, isPredicable = 1 in +class T_store_io_nv <string mnemonic, RegisterClass RC, + Operand ImmOp, bits<2>MajOp> + : NVInst_V4 <(outs), + (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), + mnemonic#"($src1+#$src2) = $src3.new", + [],"",ST_tc_st_SLOT0> { + bits<5> src1; + bits<13> src2; // Actual address offset + bits<3> src3; + bits<11> offsetBits; // Represents offset encoding + + let opExtentBits = !if (!eq(mnemonic, "memb"), 11, + !if (!eq(mnemonic, "memh"), 12, + !if (!eq(mnemonic, "memw"), 13, 0))); -multiclass ST_Idxd_Pred_nv<string mnemonic, RegisterClass RC, Operand predImmOp, - bit PredNot> { - let isPredicatedFalse = PredNot in { - defm _c#NAME : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 0>; - // Predicate new - defm _cdn#NAME : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 1>; + let opExtentAlign = !if (!eq(mnemonic, "memb"), 0, + !if (!eq(mnemonic, "memh"), 1, + !if (!eq(mnemonic, "memw"), 2, 0))); + + let offsetBits = !if (!eq(mnemonic, "memb"), src2{10-0}, + !if (!eq(mnemonic, "memh"), src2{11-1}, + !if (!eq(mnemonic, "memw"), src2{12-2}, 0))); + + let IClass = 0b1010; + + let Inst{27} = 0b0; + let Inst{26-25} = offsetBits{10-9}; + let Inst{24-21} = 0b1101; + let Inst{20-16} = src1; + let Inst{13} = offsetBits{8}; + let Inst{12-11} = MajOp; + let Inst{10-8} = src3; + let Inst{7-0} = offsetBits{7-0}; } -} -let mayStore = 1, isNVStore = 1, neverHasSideEffects = 1, isExtendable = 1 in +let opExtendable = 2, opNewValue = 3, isPredicated = 1 in +class T_pstore_io_nv <string mnemonic, RegisterClass RC, Operand predImmOp, + bits<2>MajOp, bit PredNot, bit isPredNew> + : NVInst_V4 <(outs), + (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC:$src4), + !if(PredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"($src2+#$src3) = $src4.new", + [],"",V2LDST_tc_st_SLOT0> { + bits<2> src1; + bits<5> src2; + bits<9> src3; + bits<3> src4; + bits<6> offsetBits; // Represents offset encoding + + let isPredicatedNew = isPredNew; + let isPredicatedFalse = PredNot; + let opExtentBits = !if (!eq(mnemonic, "memb"), 6, + !if (!eq(mnemonic, "memh"), 7, + !if (!eq(mnemonic, "memw"), 8, 0))); + + let opExtentAlign = !if (!eq(mnemonic, "memb"), 0, + !if (!eq(mnemonic, "memh"), 1, + !if (!eq(mnemonic, "memw"), 2, 0))); + + let offsetBits = !if (!eq(mnemonic, "memb"), src3{5-0}, + !if (!eq(mnemonic, "memh"), src3{6-1}, + !if (!eq(mnemonic, "memw"), src3{7-2}, 0))); + + let IClass = 0b0100; + + let Inst{27} = 0b0; + let Inst{26} = PredNot; + let Inst{25} = isPredNew; + let Inst{24-21} = 0b0101; + let Inst{20-16} = src2; + let Inst{13} = offsetBits{5}; + let Inst{12-11} = MajOp; + let Inst{10-8} = src4; + let Inst{7-3} = offsetBits{4-0}; + let Inst{2} = 0b0; + let Inst{1-0} = src1; + } + +// multiclass for new-value store instructions with base + immediate offset. +// +let mayStore = 1, isNVStore = 1, isNewValue = 1, hasSideEffects = 0, + isExtendable = 1 in multiclass ST_Idxd_nv<string mnemonic, string CextOp, RegisterClass RC, - Operand ImmOp, Operand predImmOp, bits<5> ImmBits, - bits<5> PredImmBits> { + Operand ImmOp, Operand predImmOp, bits<2> MajOp> { let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in { - let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits, - isPredicable = 1 in - def NAME#_nv_V4 : NVInst_V4<(outs), - (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), - mnemonic#"($src1+#$src2) = $src3.new", - []>, - Requires<[HasV4T]>; - - let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits, - isPredicated = 1 in { - defm Pt : ST_Idxd_Pred_nv<mnemonic, RC, predImmOp, 0>; - defm NotPt : ST_Idxd_Pred_nv<mnemonic, RC, predImmOp, 1>; - } + def S2_#NAME#new_io : T_store_io_nv <mnemonic, RC, ImmOp, MajOp>; + // Predicated + def S2_p#NAME#newt_io :T_pstore_io_nv <mnemonic, RC, predImmOp, MajOp, 0, 0>; + def S2_p#NAME#newf_io :T_pstore_io_nv <mnemonic, RC, predImmOp, MajOp, 1, 0>; + // Predicated new + def S4_p#NAME#newtnew_io :T_pstore_io_nv <mnemonic, RC, predImmOp, + MajOp, 0, 1>; + def S4_p#NAME#newfnew_io :T_pstore_io_nv <mnemonic, RC, predImmOp, + MajOp, 1, 1>; } } -let addrMode = BaseImmOffset, validSubTargets = HasV4SubT in { +let addrMode = BaseImmOffset, InputType = "imm" in { let accessSize = ByteAccess in - defm STrib_indexed: ST_Idxd_nv<"memb", "STrib", IntRegs, s11_0Ext, - u6_0Ext, 11, 6>, AddrModeRel; + defm storerb: ST_Idxd_nv<"memb", "STrib", IntRegs, s11_0Ext, + u6_0Ext, 0b00>, AddrModeRel; - let accessSize = HalfWordAccess in - defm STrih_indexed: ST_Idxd_nv<"memh", "STrih", IntRegs, s11_1Ext, - u6_1Ext, 12, 7>, AddrModeRel; + let accessSize = HalfWordAccess, opExtentAlign = 1 in + defm storerh: ST_Idxd_nv<"memh", "STrih", IntRegs, s11_1Ext, + u6_1Ext, 0b01>, AddrModeRel; - let accessSize = WordAccess in - defm STriw_indexed: ST_Idxd_nv<"memw", "STriw", IntRegs, s11_2Ext, - u6_2Ext, 13, 8>, AddrModeRel; + let accessSize = WordAccess, opExtentAlign = 2 in + defm storeri: ST_Idxd_nv<"memw", "STriw", IntRegs, s11_2Ext, + u6_2Ext, 0b10>, AddrModeRel; } -// multiclass for new-value store instructions with base + immediate offset. -// and MEMri operand. -multiclass ST_MEMri_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot, - bit isPredNew> { - let isPredicatedNew = isPredNew in - def NAME#_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, MEMri:$addr, RC: $src2), - !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", - ") ")#mnemonic#"($addr) = $src2.new", - []>, - Requires<[HasV4T]>; -} - -multiclass ST_MEMri_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> { - let isPredicatedFalse = PredNot in { - defm _c#NAME : ST_MEMri_Pbase_nv<mnemonic, RC, PredNot, 0>; - - // Predicate new - defm _cdn#NAME : ST_MEMri_Pbase_nv<mnemonic, RC, PredNot, 1>; - } -} - -let mayStore = 1, isNVStore = 1, isExtendable = 1, neverHasSideEffects = 1 in -multiclass ST_MEMri_nv<string mnemonic, string CextOp, RegisterClass RC, - bits<5> ImmBits, bits<5> PredImmBits> { - - let CextOpcode = CextOp, BaseOpcode = CextOp in { - let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits, - isPredicable = 1 in - def NAME#_nv_V4 : NVInst_V4<(outs), - (ins MEMri:$addr, RC:$src), - mnemonic#"($addr) = $src.new", - []>, - Requires<[HasV4T]>; - - let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits, - neverHasSideEffects = 1, isPredicated = 1 in { - defm Pt : ST_MEMri_Pred_nv<mnemonic, RC, 0>; - defm NotPt : ST_MEMri_Pred_nv<mnemonic, RC, 1>; - } +//===----------------------------------------------------------------------===// +// Post increment loads with register offset. +//===----------------------------------------------------------------------===// + +let hasNewValue = 1 in +def L2_loadbsw2_pr : T_load_pr <"membh", IntRegs, 0b0001, HalfWordAccess>; + +def L2_loadbsw4_pr : T_load_pr <"membh", DoubleRegs, 0b0111, WordAccess>; + +let hasSideEffects = 0, addrMode = PostInc in +class T_loadalign_pr <string mnemonic, bits<4> MajOp, MemAccessSize AccessSz> + : LDInstPI <(outs DoubleRegs:$dst, IntRegs:$_dst_), + (ins DoubleRegs:$src1, IntRegs:$src2, ModRegs:$src3), + "$dst = "#mnemonic#"($src2++$src3)", [], + "$src1 = $dst, $src2 = $_dst_"> { + bits<5> dst; + bits<5> src2; + bits<1> src3; + + let accessSize = AccessSz; + let IClass = 0b1001; + + let Inst{27-25} = 0b110; + let Inst{24-21} = MajOp; + let Inst{20-16} = src2; + let Inst{13} = src3; + let Inst{12} = 0b0; + let Inst{7} = 0b0; + let Inst{4-0} = dst; } -} -let addrMode = BaseImmOffset, isMEMri = "true", validSubTargets = HasV4SubT, -mayStore = 1 in { - let accessSize = ByteAccess in - defm STrib: ST_MEMri_nv<"memb", "STrib", IntRegs, 11, 6>, AddrModeRel; +def L2_loadalignb_pr : T_loadalign_pr <"memb_fifo", 0b0100, ByteAccess>; +def L2_loadalignh_pr : T_loadalign_pr <"memh_fifo", 0b0010, HalfWordAccess>; - let accessSize = HalfWordAccess in - defm STrih: ST_MEMri_nv<"memh", "STrih", IntRegs, 12, 7>, AddrModeRel; +//===----------------------------------------------------------------------===// +// Template class for non-predicated post increment .new stores +// mem[bhwd](Rx++#s4:[0123])=Nt.new +//===----------------------------------------------------------------------===// +let isPredicable = 1, hasSideEffects = 0, addrMode = PostInc, isNVStore = 1, + isNewValue = 1, opNewValue = 3 in +class T_StorePI_nv <string mnemonic, Operand ImmOp, bits<2> MajOp > + : NVInstPI_V4 <(outs IntRegs:$_dst_), + (ins IntRegs:$src1, ImmOp:$offset, IntRegs:$src2), + mnemonic#"($src1++#$offset) = $src2.new", + [], "$src1 = $_dst_">, + AddrModeRel { + bits<5> src1; + bits<3> src2; + bits<7> offset; + bits<4> offsetBits; - let accessSize = WordAccess in - defm STriw: ST_MEMri_nv<"memw", "STriw", IntRegs, 13, 8>, AddrModeRel; -} + string ImmOpStr = !cast<string>(ImmOp); + let offsetBits = !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2}, + !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1}, + /* s4_0Imm */ offset{3-0})); + let IClass = 0b1010; + + let Inst{27-21} = 0b1011101; + let Inst{20-16} = src1; + let Inst{13} = 0b0; + let Inst{12-11} = MajOp; + let Inst{10-8} = src2; + let Inst{7} = 0b0; + let Inst{6-3} = offsetBits; + let Inst{1} = 0b0; + } //===----------------------------------------------------------------------===// -// Post increment store -// mem[bhwd](Rx++#s4:[0123])=Nt.new +// Template class for predicated post increment .new stores +// if([!]Pv[.new]) mem[bhwd](Rx++#s4:[0123])=Nt.new //===----------------------------------------------------------------------===// +let isPredicated = 1, hasSideEffects = 0, addrMode = PostInc, isNVStore = 1, + isNewValue = 1, opNewValue = 4 in +class T_StorePI_nv_pred <string mnemonic, Operand ImmOp, + bits<2> MajOp, bit isPredNot, bit isPredNew > + : NVInstPI_V4 <(outs IntRegs:$_dst_), + (ins PredRegs:$src1, IntRegs:$src2, + ImmOp:$offset, IntRegs:$src3), + !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"($src2++#$offset) = $src3.new", + [], "$src2 = $_dst_">, + AddrModeRel { + bits<2> src1; + bits<5> src2; + bits<3> src3; + bits<7> offset; + bits<4> offsetBits; + + string ImmOpStr = !cast<string>(ImmOp); + let offsetBits = !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2}, + !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1}, + /* s4_0Imm */ offset{3-0})); + let isPredicatedNew = isPredNew; + let isPredicatedFalse = isPredNot; + + let IClass = 0b1010; + + let Inst{27-21} = 0b1011101; + let Inst{20-16} = src2; + let Inst{13} = 0b1; + let Inst{12-11} = MajOp; + let Inst{10-8} = src3; + let Inst{7} = isPredNew; + let Inst{6-3} = offsetBits; + let Inst{2} = isPredNot; + let Inst{1-0} = src1; + } + +multiclass ST_PostInc_Pred_nv<string mnemonic, Operand ImmOp, + bits<2> MajOp, bit PredNot> { + def _pi : T_StorePI_nv_pred <mnemonic, ImmOp, MajOp, PredNot, 0>; -multiclass ST_PostInc_Pbase_nv<string mnemonic, RegisterClass RC, Operand ImmOp, - bit isNot, bit isPredNew> { - let isPredicatedNew = isPredNew in - def NAME#_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3), - !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", - ") ")#mnemonic#"($src2++#$offset) = $src3.new", - [], - "$src2 = $dst">, - Requires<[HasV4T]>; + // Predicate new + def new_pi : T_StorePI_nv_pred <mnemonic, ImmOp, MajOp, PredNot, 1>; } -multiclass ST_PostInc_Pred_nv<string mnemonic, RegisterClass RC, - Operand ImmOp, bit PredNot> { - let isPredicatedFalse = PredNot in { - defm _c#NAME : ST_PostInc_Pbase_nv<mnemonic, RC, ImmOp, PredNot, 0>; - // Predicate new - let Predicates = [HasV4T], validSubTargets = HasV4SubT in - defm _cdn#NAME : ST_PostInc_Pbase_nv<mnemonic, RC, ImmOp, PredNot, 1>; +multiclass ST_PostInc_nv<string mnemonic, string BaseOp, Operand ImmOp, + bits<2> MajOp> { + let BaseOpcode = "POST_"#BaseOp in { + def S2_#NAME#_pi : T_StorePI_nv <mnemonic, ImmOp, MajOp>; + + // Predicated + defm S2_p#NAME#t : ST_PostInc_Pred_nv <mnemonic, ImmOp, MajOp, 0>; + defm S2_p#NAME#f : ST_PostInc_Pred_nv <mnemonic, ImmOp, MajOp, 1>; } } -let hasCtrlDep = 1, isNVStore = 1, neverHasSideEffects = 1 in -multiclass ST_PostInc_nv<string mnemonic, string BaseOp, RegisterClass RC, - Operand ImmOp> { +let accessSize = ByteAccess in +defm storerbnew: ST_PostInc_nv <"memb", "STrib", s4_0Imm, 0b00>; - let BaseOpcode = "POST_"#BaseOp in { - let isPredicable = 1 in - def NAME#_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins IntRegs:$src1, ImmOp:$offset, RC:$src2), - mnemonic#"($src1++#$offset) = $src2.new", - [], - "$src1 = $dst">, - Requires<[HasV4T]>; - - let isPredicated = 1 in { - defm Pt : ST_PostInc_Pred_nv<mnemonic, RC, ImmOp, 0 >; - defm NotPt : ST_PostInc_Pred_nv<mnemonic, RC, ImmOp, 1 >; - } +let accessSize = HalfWordAccess in +defm storerhnew: ST_PostInc_nv <"memh", "STrih", s4_1Imm, 0b01>; + +let accessSize = WordAccess in +defm storerinew: ST_PostInc_nv <"memw", "STriw", s4_2Imm, 0b10>; + +//===----------------------------------------------------------------------===// +// Template class for post increment .new stores with register offset +//===----------------------------------------------------------------------===// +let isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 3 in +class T_StorePI_RegNV <string mnemonic, bits<2> MajOp, MemAccessSize AccessSz> + : NVInstPI_V4 <(outs IntRegs:$_dst_), + (ins IntRegs:$src1, ModRegs:$src2, IntRegs:$src3), + #mnemonic#"($src1++$src2) = $src3.new", + [], "$src1 = $_dst_"> { + bits<5> src1; + bits<1> src2; + bits<3> src3; + let accessSize = AccessSz; + + let IClass = 0b1010; + + let Inst{27-21} = 0b1101101; + let Inst{20-16} = src1; + let Inst{13} = src2; + let Inst{12-11} = MajOp; + let Inst{10-8} = src3; + let Inst{7} = 0b0; } -} -let addrMode = PostInc, validSubTargets = HasV4SubT in { -defm POST_STbri: ST_PostInc_nv <"memb", "STrib", IntRegs, s4_0Imm>, AddrModeRel; -defm POST_SThri: ST_PostInc_nv <"memh", "STrih", IntRegs, s4_1Imm>, AddrModeRel; -defm POST_STwri: ST_PostInc_nv <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel; -} +def S2_storerbnew_pr : T_StorePI_RegNV<"memb", 0b00, ByteAccess>; +def S2_storerhnew_pr : T_StorePI_RegNV<"memh", 0b01, HalfWordAccess>; +def S2_storerinew_pr : T_StorePI_RegNV<"memw", 0b10, WordAccess>; // memb(Rx++#s4:0:circ(Mu))=Nt.new // memb(Rx++I:circ(Mu))=Nt.new -// memb(Rx++Mu)=Nt.new // memb(Rx++Mu:brev)=Nt.new // memh(Rx++#s4:1:circ(Mu))=Nt.new // memh(Rx++I:circ(Mu))=Nt.new @@ -1002,7 +1522,8 @@ defm POST_STwri: ST_PostInc_nv <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel; // operands. //===----------------------------------------------------------------------===// -let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11 in +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11, + opExtentAlign = 2 in class NVJrr_template<string mnemonic, bits<3> majOp, bit NvOpNum, bit isNegCond, bit isTak> : NVInst_V4<(outs), @@ -1010,8 +1531,7 @@ class NVJrr_template<string mnemonic, bits<3> majOp, bit NvOpNum, "if ("#!if(isNegCond, "!","")#mnemonic# "($src1"#!if(!eq(NvOpNum, 0),".new, ",", ")# "$src2"#!if(!eq(NvOpNum, 1),".new))","))")#" jump:" - #!if(isTak, "t","nt")#" $offset", - []>, Requires<[HasV4T]> { + #!if(isTak, "t","nt")#" $offset", []> { bits<5> src1; bits<5> src2; @@ -1020,14 +1540,14 @@ class NVJrr_template<string mnemonic, bits<3> majOp, bit NvOpNum, bits<11> offset; let isTaken = isTak; - let isBrTaken = !if(isTaken, "true", "false"); let isPredicatedFalse = isNegCond; + let opNewValue{0} = NvOpNum; let Ns = !if(!eq(NvOpNum, 0), src1{2-0}, src2{2-0}); let RegOp = !if(!eq(NvOpNum, 0), src2, src1); let IClass = 0b0010; - let Inst{26} = 0b0; + let Inst{27-26} = 0b00; let Inst{25-23} = majOp; let Inst{22} = isNegCond; let Inst{18-16} = Ns; @@ -1041,9 +1561,9 @@ class NVJrr_template<string mnemonic, bits<3> majOp, bit NvOpNum, multiclass NVJrr_cond<string mnemonic, bits<3> majOp, bit NvOpNum, bit isNegCond> { // Branch not taken: - def _nt_V4: NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 0>; + def _nt: NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 0>; // Branch taken: - def _t_V4: NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 1>; + def _t : NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 1>; } // NvOpNum = 0 -> First Operand is a new-value Register @@ -1052,8 +1572,8 @@ multiclass NVJrr_cond<string mnemonic, bits<3> majOp, bit NvOpNum, multiclass NVJrr_base<string mnemonic, string BaseOp, bits<3> majOp, bit NvOpNum> { let BaseOpcode = BaseOp#_NVJ in { - defm _t_Jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 0>; // True cond - defm _f_Jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 1>; // False cond + defm _t_jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 0>; // True cond + defm _f_jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 1>; // False cond } } @@ -1064,12 +1584,12 @@ multiclass NVJrr_base<string mnemonic, string BaseOp, bits<3> majOp, // if ([!]cmp.gtu(Rt,Ns.new)) jump:[n]t #r9:2 let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1, - Defs = [PC], neverHasSideEffects = 1, validSubTargets = HasV4SubT in { - defm CMPEQrr : NVJrr_base<"cmp.eq", "CMPEQ", 0b000, 0>, PredRel; - defm CMPGTrr : NVJrr_base<"cmp.gt", "CMPGT", 0b001, 0>, PredRel; - defm CMPGTUrr : NVJrr_base<"cmp.gtu", "CMPGTU", 0b010, 0>, PredRel; - defm CMPLTrr : NVJrr_base<"cmp.gt", "CMPLT", 0b011, 1>, PredRel; - defm CMPLTUrr : NVJrr_base<"cmp.gtu", "CMPLTU", 0b100, 1>, PredRel; + Defs = [PC], hasSideEffects = 0 in { + defm J4_cmpeq : NVJrr_base<"cmp.eq", "CMPEQ", 0b000, 0>, PredRel; + defm J4_cmpgt : NVJrr_base<"cmp.gt", "CMPGT", 0b001, 0>, PredRel; + defm J4_cmpgtu : NVJrr_base<"cmp.gtu", "CMPGTU", 0b010, 0>, PredRel; + defm J4_cmplt : NVJrr_base<"cmp.gt", "CMPLT", 0b011, 1>, PredRel; + defm J4_cmpltu : NVJrr_base<"cmp.gtu", "CMPLTU", 0b100, 1>, PredRel; } //===----------------------------------------------------------------------===// @@ -1077,18 +1597,18 @@ let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1, // with a register and an unsigned immediate (U5) operand. //===----------------------------------------------------------------------===// -let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11 in +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11, + opExtentAlign = 2 in class NVJri_template<string mnemonic, bits<3> majOp, bit isNegCond, bit isTak> : NVInst_V4<(outs), (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset), "if ("#!if(isNegCond, "!","")#mnemonic#"($src1.new, #$src2)) jump:" - #!if(isTak, "t","nt")#" $offset", - []>, Requires<[HasV4T]> { + #!if(isTak, "t","nt")#" $offset", []> { let isTaken = isTak; let isPredicatedFalse = isNegCond; - let isBrTaken = !if(isTaken, "true", "false"); + let isTaken = isTak; bits<3> src1; bits<5> src2; @@ -1107,15 +1627,15 @@ class NVJri_template<string mnemonic, bits<3> majOp, bit isNegCond, multiclass NVJri_cond<string mnemonic, bits<3> majOp, bit isNegCond> { // Branch not taken: - def _nt_V4: NVJri_template<mnemonic, majOp, isNegCond, 0>; + def _nt: NVJri_template<mnemonic, majOp, isNegCond, 0>; // Branch taken: - def _t_V4: NVJri_template<mnemonic, majOp, isNegCond, 1>; + def _t : NVJri_template<mnemonic, majOp, isNegCond, 1>; } multiclass NVJri_base<string mnemonic, string BaseOp, bits<3> majOp> { let BaseOpcode = BaseOp#_NVJri in { - defm _t_Jumpnv : NVJri_cond<mnemonic, majOp, 0>; // True Cond - defm _f_Jumpnv : NVJri_cond<mnemonic, majOp, 1>; // False cond + defm _t_jumpnv : NVJri_cond<mnemonic, majOp, 0>; // True Cond + defm _f_jumpnv : NVJri_cond<mnemonic, majOp, 1>; // False cond } } @@ -1124,10 +1644,10 @@ multiclass NVJri_base<string mnemonic, string BaseOp, bits<3> majOp> { // if ([!]cmp.gtu(Ns.new,#U5)) jump:[n]t #r9:2 let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1, - Defs = [PC], neverHasSideEffects = 1, validSubTargets = HasV4SubT in { - defm CMPEQri : NVJri_base<"cmp.eq", "CMPEQ", 0b000>, PredRel; - defm CMPGTri : NVJri_base<"cmp.gt", "CMPGT", 0b001>, PredRel; - defm CMPGTUri : NVJri_base<"cmp.gtu", "CMPGTU", 0b010>, PredRel; + Defs = [PC], hasSideEffects = 0 in { + defm J4_cmpeqi : NVJri_base<"cmp.eq", "CMPEQ", 0b000>, PredRel; + defm J4_cmpgti : NVJri_base<"cmp.gt", "CMPGT", 0b001>, PredRel; + defm J4_cmpgtui : NVJri_base<"cmp.gtu", "CMPGTU", 0b010>, PredRel; } //===----------------------------------------------------------------------===// @@ -1135,19 +1655,19 @@ let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1, // with a register and an hardcoded 0/-1 immediate value. //===----------------------------------------------------------------------===// -let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 11 in +let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 11, + opExtentAlign = 2 in class NVJ_ConstImm_template<string mnemonic, bits<3> majOp, string ImmVal, bit isNegCond, bit isTak> : NVInst_V4<(outs), (ins IntRegs:$src1, brtarget:$offset), "if ("#!if(isNegCond, "!","")#mnemonic #"($src1.new, #"#ImmVal#")) jump:" - #!if(isTak, "t","nt")#" $offset", - []>, Requires<[HasV4T]> { + #!if(isTak, "t","nt")#" $offset", []> { let isTaken = isTak; let isPredicatedFalse = isNegCond; - let isBrTaken = !if(isTaken, "true", "false"); + let isTaken = isTak; bits<3> src1; bits<11> offset; @@ -1164,16 +1684,16 @@ class NVJ_ConstImm_template<string mnemonic, bits<3> majOp, string ImmVal, multiclass NVJ_ConstImm_cond<string mnemonic, bits<3> majOp, string ImmVal, bit isNegCond> { // Branch not taken: - def _nt_V4: NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 0>; + def _nt: NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 0>; // Branch taken: - def _t_V4: NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 1>; + def _t : NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 1>; } multiclass NVJ_ConstImm_base<string mnemonic, string BaseOp, bits<3> majOp, string ImmVal> { let BaseOpcode = BaseOp#_NVJ_ConstImm in { - defm _t_Jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 0>; // True cond - defm _f_Jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 1>; // False Cond + defm _t_jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 0>; // True + defm _f_jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 1>; // False } } @@ -1182,51 +1702,194 @@ multiclass NVJ_ConstImm_base<string mnemonic, string BaseOp, bits<3> majOp, // if ([!]cmp.gt(Ns.new,#-1)) jump:[n]t #r9:2 let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator=1, - Defs = [PC], neverHasSideEffects = 1 in { - defm TSTBIT0 : NVJ_ConstImm_base<"tstbit", "TSTBIT", 0b011, "0">, PredRel; - defm CMPEQn1 : NVJ_ConstImm_base<"cmp.eq", "CMPEQ", 0b100, "-1">, PredRel; - defm CMPGTn1 : NVJ_ConstImm_base<"cmp.gt", "CMPGT", 0b101, "-1">, PredRel; + Defs = [PC], hasSideEffects = 0 in { + defm J4_tstbit0 : NVJ_ConstImm_base<"tstbit", "TSTBIT", 0b011, "0">, PredRel; + defm J4_cmpeqn1 : NVJ_ConstImm_base<"cmp.eq", "CMPEQ", 0b100, "-1">, PredRel; + defm J4_cmpgtn1 : NVJ_ConstImm_base<"cmp.gt", "CMPGT", 0b101, "-1">, PredRel; +} + +// J4_hintjumpr: Hint indirect conditional jump. +let isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in +def J4_hintjumpr: JRInst < + (outs), + (ins IntRegs:$Rs), + "hintjr($Rs)"> { + bits<5> Rs; + let IClass = 0b0101; + let Inst{27-21} = 0b0010101; + let Inst{20-16} = Rs; + } + +//===----------------------------------------------------------------------===// +// NV/J - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// CR + +//===----------------------------------------------------------------------===// + +// PC-relative add +let hasNewValue = 1, isExtendable = 1, opExtendable = 1, + isExtentSigned = 0, opExtentBits = 6, hasSideEffects = 0, Uses = [PC] in +def C4_addipc : CRInst <(outs IntRegs:$Rd), (ins u6Ext:$u6), + "$Rd = add(pc, #$u6)", [], "", CR_tc_2_SLOT3 > { + bits<5> Rd; + bits<6> u6; + + let IClass = 0b0110; + let Inst{27-16} = 0b101001001001; + let Inst{12-7} = u6; + let Inst{4-0} = Rd; + } + + + +let hasSideEffects = 0 in +class T_LOGICAL_3OP<string MnOp1, string MnOp2, bits<2> OpBits, bit IsNeg> + : CRInst<(outs PredRegs:$Pd), + (ins PredRegs:$Ps, PredRegs:$Pt, PredRegs:$Pu), + "$Pd = " # MnOp1 # "($Ps, " # MnOp2 # "($Pt, " # + !if (IsNeg,"!","") # "$Pu))", + [], "", CR_tc_2early_SLOT23> { + bits<2> Pd; + bits<2> Ps; + bits<2> Pt; + bits<2> Pu; + + let IClass = 0b0110; + let Inst{27-24} = 0b1011; + let Inst{23} = IsNeg; + let Inst{22-21} = OpBits; + let Inst{20} = 0b1; + let Inst{17-16} = Ps; + let Inst{13} = 0b0; + let Inst{9-8} = Pt; + let Inst{7-6} = Pu; + let Inst{1-0} = Pd; } +def C4_and_and : T_LOGICAL_3OP<"and", "and", 0b00, 0>; +def C4_and_or : T_LOGICAL_3OP<"and", "or", 0b01, 0>; +def C4_or_and : T_LOGICAL_3OP<"or", "and", 0b10, 0>; +def C4_or_or : T_LOGICAL_3OP<"or", "or", 0b11, 0>; +def C4_and_andn : T_LOGICAL_3OP<"and", "and", 0b00, 1>; +def C4_and_orn : T_LOGICAL_3OP<"and", "or", 0b01, 1>; +def C4_or_andn : T_LOGICAL_3OP<"or", "and", 0b10, 1>; +def C4_or_orn : T_LOGICAL_3OP<"or", "or", 0b11, 1>; + +// op(Ps, op(Pt, Pu)) +class LogLog_pat<SDNode Op1, SDNode Op2, InstHexagon MI> + : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, I1:$Pu))), + (MI I1:$Ps, I1:$Pt, I1:$Pu)>; + +// op(Ps, op(Pt, ~Pu)) +class LogLogNot_pat<SDNode Op1, SDNode Op2, InstHexagon MI> + : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, (not I1:$Pu)))), + (MI I1:$Ps, I1:$Pt, I1:$Pu)>; + +def: LogLog_pat<and, and, C4_and_and>; +def: LogLog_pat<and, or, C4_and_or>; +def: LogLog_pat<or, and, C4_or_and>; +def: LogLog_pat<or, or, C4_or_or>; + +def: LogLogNot_pat<and, and, C4_and_andn>; +def: LogLogNot_pat<and, or, C4_and_orn>; +def: LogLogNot_pat<or, and, C4_or_andn>; +def: LogLogNot_pat<or, or, C4_or_orn>; + +//===----------------------------------------------------------------------===// +// CR - +//===----------------------------------------------------------------------===// + //===----------------------------------------------------------------------===// // XTYPE/ALU + //===----------------------------------------------------------------------===// +// Logical with-not instructions. +def A4_andnp : T_ALU64_logical<"and", 0b001, 1, 0, 1>; +def A4_ornp : T_ALU64_logical<"or", 0b011, 1, 0, 1>; + +def: Pat<(i64 (and (i64 DoubleRegs:$Rs), (i64 (not (i64 DoubleRegs:$Rt))))), + (A4_andnp DoubleRegs:$Rs, DoubleRegs:$Rt)>; +def: Pat<(i64 (or (i64 DoubleRegs:$Rs), (i64 (not (i64 DoubleRegs:$Rt))))), + (A4_ornp DoubleRegs:$Rs, DoubleRegs:$Rt)>; + +let hasNewValue = 1, hasSideEffects = 0 in +def S4_parity: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rd = parity($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> { + bits<5> Rd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1101; + let Inst{27-21} = 0b0101111; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{4-0} = Rd; +} + // Add and accumulate. // Rd=add(Rs,add(Ru,#s6)) -let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 6, -validSubTargets = HasV4SubT in -def ADDr_ADDri_V4 : MInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, s6Ext:$src3), - "$dst = add($src1, add($src2, #$src3))", - [(set (i32 IntRegs:$dst), - (add (i32 IntRegs:$src1), (add (i32 IntRegs:$src2), - s6_16ExtPred:$src3)))]>, - Requires<[HasV4T]>; - -// Rd=add(Rs,sub(#s6,Ru)) -let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 6, -validSubTargets = HasV4SubT in -def ADDr_SUBri_V4 : MInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, s6Ext:$src2, IntRegs:$src3), - "$dst = add($src1, sub(#$src2, $src3))", - [(set (i32 IntRegs:$dst), - (add (i32 IntRegs:$src1), (sub s6_10ExtPred:$src2, - (i32 IntRegs:$src3))))]>, - Requires<[HasV4T]>; - -// Generates the same instruction as ADDr_SUBri_V4 but matches different -// pattern. -// Rd=add(Rs,sub(#s6,Ru)) -let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 6, -validSubTargets = HasV4SubT in -def ADDri_SUBr_V4 : MInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, s6Ext:$src2, IntRegs:$src3), - "$dst = add($src1, sub(#$src2, $src3))", - [(set (i32 IntRegs:$dst), - (sub (add (i32 IntRegs:$src1), s6_10ExtPred:$src2), - (i32 IntRegs:$src3)))]>, - Requires<[HasV4T]>; +let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, opExtentBits = 6, + opExtendable = 3 in +def S4_addaddi : ALU64Inst <(outs IntRegs:$Rd), + (ins IntRegs:$Rs, IntRegs:$Ru, s6Ext:$s6), + "$Rd = add($Rs, add($Ru, #$s6))" , + [(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rs), + (add (i32 IntRegs:$Ru), s6_16ExtPred:$s6)))], + "", ALU64_tc_2_SLOT23> { + bits<5> Rd; + bits<5> Rs; + bits<5> Ru; + bits<6> s6; + + let IClass = 0b1101; + + let Inst{27-23} = 0b10110; + let Inst{22-21} = s6{5-4}; + let Inst{20-16} = Rs; + let Inst{13} = s6{3}; + let Inst{12-8} = Rd; + let Inst{7-5} = s6{2-0}; + let Inst{4-0} = Ru; + } + +let isExtentSigned = 1, hasSideEffects = 0, hasNewValue = 1, isExtendable = 1, + opExtentBits = 6, opExtendable = 2 in +def S4_subaddi: ALU64Inst <(outs IntRegs:$Rd), + (ins IntRegs:$Rs, s6Ext:$s6, IntRegs:$Ru), + "$Rd = add($Rs, sub(#$s6, $Ru))", + [], "", ALU64_tc_2_SLOT23> { + bits<5> Rd; + bits<5> Rs; + bits<6> s6; + bits<5> Ru; + + let IClass = 0b1101; + + let Inst{27-23} = 0b10111; + let Inst{22-21} = s6{5-4}; + let Inst{20-16} = Rs; + let Inst{13} = s6{3}; + let Inst{12-8} = Rd; + let Inst{7-5} = s6{2-0}; + let Inst{4-0} = Ru; + } + +// Rd=add(Rs,sub(#s6,Ru)) +def: Pat<(add (i32 IntRegs:$src1), (sub s6_10ExtPred:$src2, + (i32 IntRegs:$src3))), + (S4_subaddi IntRegs:$src1, s6_10ExtPred:$src2, IntRegs:$src3)>; + +// Rd=sub(add(Rs,#s6),Ru) +def: Pat<(sub (add (i32 IntRegs:$src1), s6_10ExtPred:$src2), + (i32 IntRegs:$src3)), + (S4_subaddi IntRegs:$src1, s6_10ExtPred:$src2, IntRegs:$src3)>; + +// Rd=add(sub(Rs,Ru),#s6) +def: Pat<(add (sub (i32 IntRegs:$src1), (i32 IntRegs:$src3)), + (s6_10ExtPred:$src2)), + (S4_subaddi IntRegs:$src1, s6_10ExtPred:$src2, IntRegs:$src3)>; // Add or subtract doublewords with carry. @@ -1235,213 +1898,316 @@ def ADDri_SUBr_V4 : MInst<(outs IntRegs:$dst), //TODO: // Rdd=sub(Rss,Rtt,Px):carry +// Extract bitfield +// Rdd=extract(Rss,#u6,#U6) +// Rdd=extract(Rss,Rtt) +// Rd=extract(Rs,Rtt) +// Rd=extract(Rs,#u5,#U5) -// Logical doublewords. -// Rdd=and(Rtt,~Rss) -let validSubTargets = HasV4SubT in -def ANDd_NOTd_V4 : MInst<(outs DoubleRegs:$dst), - (ins DoubleRegs:$src1, DoubleRegs:$src2), - "$dst = and($src1, ~$src2)", - [(set (i64 DoubleRegs:$dst), (and (i64 DoubleRegs:$src1), - (not (i64 DoubleRegs:$src2))))]>, - Requires<[HasV4T]>; - -// Rdd=or(Rtt,~Rss) -let validSubTargets = HasV4SubT in -def ORd_NOTd_V4 : MInst<(outs DoubleRegs:$dst), - (ins DoubleRegs:$src1, DoubleRegs:$src2), - "$dst = or($src1, ~$src2)", - [(set (i64 DoubleRegs:$dst), - (or (i64 DoubleRegs:$src1), (not (i64 DoubleRegs:$src2))))]>, - Requires<[HasV4T]>; - - -// Logical-logical doublewords. -// Rxx^=xor(Rss,Rtt) -let validSubTargets = HasV4SubT in -def XORd_XORdd: MInst_acc<(outs DoubleRegs:$dst), - (ins DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), - "$dst ^= xor($src2, $src3)", - [(set (i64 DoubleRegs:$dst), - (xor (i64 DoubleRegs:$src1), (xor (i64 DoubleRegs:$src2), - (i64 DoubleRegs:$src3))))], - "$src1 = $dst">, - Requires<[HasV4T]>; +def S4_extractp_rp : T_S3op_64 < "extract", 0b11, 0b100, 0>; +def S4_extractp : T_S2op_extract <"extract", 0b1010, DoubleRegs, u6Imm>; +let hasNewValue = 1 in { + def S4_extract_rp : T_S3op_extract<"extract", 0b01>; + def S4_extract : T_S2op_extract <"extract", 0b1101, IntRegs, u5Imm>; +} + +// Complex add/sub halfwords/words +let Defs = [USR_OVF] in { + def S4_vxaddsubh : T_S3op_64 < "vxaddsubh", 0b01, 0b100, 0, 1>; + def S4_vxaddsubw : T_S3op_64 < "vxaddsubw", 0b01, 0b000, 0, 1>; + def S4_vxsubaddh : T_S3op_64 < "vxsubaddh", 0b01, 0b110, 0, 1>; + def S4_vxsubaddw : T_S3op_64 < "vxsubaddw", 0b01, 0b010, 0, 1>; +} + +let Defs = [USR_OVF] in { + def S4_vxaddsubhr : T_S3op_64 < "vxaddsubh", 0b11, 0b000, 0, 1, 1, 1>; + def S4_vxsubaddhr : T_S3op_64 < "vxsubaddh", 0b11, 0b010, 0, 1, 1, 1>; +} + +let Itinerary = M_tc_3x_SLOT23, Defs = [USR_OVF] in { + def M4_mac_up_s1_sat: T_MType_acc_rr<"+= mpy", 0b011, 0b000, 0, [], 0, 1, 1>; + def M4_nac_up_s1_sat: T_MType_acc_rr<"-= mpy", 0b011, 0b001, 0, [], 0, 1, 1>; +} + +// Logical xor with xor accumulation. +// Rxx^=xor(Rss,Rtt) +let hasSideEffects = 0 in +def M4_xor_xacc + : SInst <(outs DoubleRegs:$Rxx), + (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Rxx ^= xor($Rss, $Rtt)", + [(set (i64 DoubleRegs:$Rxx), + (xor (i64 DoubleRegs:$dst2), (xor (i64 DoubleRegs:$Rss), + (i64 DoubleRegs:$Rtt))))], + "$dst2 = $Rxx", S_3op_tc_1_SLOT23> { + bits<5> Rxx; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1100; + + let Inst{27-22} = 0b101010; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + let Inst{7-5} = 0b000; + let Inst{4-0} = Rxx; + } + +// Rotate and reduce bytes +// Rdd=vrcrotate(Rss,Rt,#u2) +let hasSideEffects = 0 in +def S4_vrcrotate + : SInst <(outs DoubleRegs:$Rdd), + (ins DoubleRegs:$Rss, IntRegs:$Rt, u2Imm:$u2), + "$Rdd = vrcrotate($Rss, $Rt, #$u2)", + [], "", S_3op_tc_3x_SLOT23> { + bits<5> Rdd; + bits<5> Rss; + bits<5> Rt; + bits<2> u2; + + let IClass = 0b1100; + + let Inst{27-22} = 0b001111; + let Inst{20-16} = Rss; + let Inst{13} = u2{1}; + let Inst{12-8} = Rt; + let Inst{7-6} = 0b11; + let Inst{5} = u2{0}; + let Inst{4-0} = Rdd; + } + +// Rotate and reduce bytes with accumulation +// Rxx+=vrcrotate(Rss,Rt,#u2) +let hasSideEffects = 0 in +def S4_vrcrotate_acc + : SInst <(outs DoubleRegs:$Rxx), + (ins DoubleRegs:$dst2, DoubleRegs:$Rss, IntRegs:$Rt, u2Imm:$u2), + "$Rxx += vrcrotate($Rss, $Rt, #$u2)", [], + "$dst2 = $Rxx", S_3op_tc_3x_SLOT23> { + bits<5> Rxx; + bits<5> Rss; + bits<5> Rt; + bits<2> u2; + + let IClass = 0b1100; + + let Inst{27-21} = 0b1011101; + let Inst{20-16} = Rss; + let Inst{13} = u2{1}; + let Inst{12-8} = Rt; + let Inst{5} = u2{0}; + let Inst{4-0} = Rxx; + } + +// Vector reduce conditional negate halfwords +let hasSideEffects = 0 in +def S2_vrcnegh + : SInst <(outs DoubleRegs:$Rxx), + (ins DoubleRegs:$dst2, DoubleRegs:$Rss, IntRegs:$Rt), + "$Rxx += vrcnegh($Rss, $Rt)", [], + "$dst2 = $Rxx", S_3op_tc_3x_SLOT23> { + bits<5> Rxx; + bits<5> Rss; + bits<5> Rt; + + let IClass = 0b1100; + + let Inst{27-21} = 0b1011001; + let Inst{20-16} = Rss; + let Inst{13} = 0b1; + let Inst{12-8} = Rt; + let Inst{7-5} = 0b111; + let Inst{4-0} = Rxx; + } + +// Split bitfield +def A4_bitspliti : T_S2op_2_di <"bitsplit", 0b110, 0b100>; + +// Arithmetic/Convergent round +def A4_cround_ri : T_S2op_2_ii <"cround", 0b111, 0b000>; + +def A4_round_ri : T_S2op_2_ii <"round", 0b111, 0b100>; + +let Defs = [USR_OVF] in +def A4_round_ri_sat : T_S2op_2_ii <"round", 0b111, 0b110, 1>; // Logical-logical words. -// Rx=or(Ru,and(Rx,#s10)) -let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 10, -validSubTargets = HasV4SubT in -def ORr_ANDri_V4 : MInst_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs: $src2, s10Ext:$src3), - "$dst = or($src1, and($src2, #$src3))", - [(set (i32 IntRegs:$dst), - (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), - s10ExtPred:$src3)))], - "$src2 = $dst">, - Requires<[HasV4T]>; +// Compound or-and -- Rx=or(Ru,and(Rx,#s10)) +let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, opExtentBits = 10, + opExtendable = 3 in +def S4_or_andix: + ALU64Inst<(outs IntRegs:$Rx), + (ins IntRegs:$Ru, IntRegs:$_src_, s10Ext:$s10), + "$Rx = or($Ru, and($_src_, #$s10))" , + [(set (i32 IntRegs:$Rx), + (or (i32 IntRegs:$Ru), (and (i32 IntRegs:$_src_), s10ExtPred:$s10)))] , + "$_src_ = $Rx", ALU64_tc_2_SLOT23> { + bits<5> Rx; + bits<5> Ru; + bits<10> s10; + + let IClass = 0b1101; + + let Inst{27-22} = 0b101001; + let Inst{20-16} = Rx; + let Inst{21} = s10{9}; + let Inst{13-5} = s10{8-0}; + let Inst{4-0} = Ru; + } + +// Miscellaneous ALU64 instructions. +// +let hasNewValue = 1, hasSideEffects = 0 in +def A4_modwrapu: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rd = modwrap($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> { + bits<5> Rd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1101; + let Inst{27-21} = 0b0011111; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{7-5} = 0b111; + let Inst{4-0} = Rd; +} + +let hasSideEffects = 0 in +def A4_bitsplit: ALU64Inst<(outs DoubleRegs:$Rd), + (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rd = bitsplit($Rs, $Rt)", [], "", ALU64_tc_1_SLOT23> { + bits<5> Rd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1101; + let Inst{27-24} = 0b0100; + let Inst{21} = 0b1; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{4-0} = Rd; +} + +let hasSideEffects = 0 in +def dep_S2_packhl: ALU64Inst<(outs DoubleRegs:$Rd), + (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rd = packhl($Rs, $Rt):deprecated", [], "", ALU64_tc_1_SLOT23> { + bits<5> Rd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1101; + let Inst{27-24} = 0b0100; + let Inst{21} = 0b0; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{4-0} = Rd; +} + +let hasNewValue = 1, hasSideEffects = 0 in +def dep_A2_addsat: ALU64Inst<(outs IntRegs:$Rd), + (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rd = add($Rs, $Rt):sat:deprecated", [], "", ALU64_tc_2_SLOT23> { + bits<5> Rd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1101; + let Inst{27-21} = 0b0101100; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{7} = 0b0; + let Inst{4-0} = Rd; +} + +let hasNewValue = 1, hasSideEffects = 0 in +def dep_A2_subsat: ALU64Inst<(outs IntRegs:$Rd), + (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rd = sub($Rs, $Rt):sat:deprecated", [], "", ALU64_tc_2_SLOT23> { + bits<5> Rd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1101; + let Inst{27-21} = 0b0101100; + let Inst{20-16} = Rt; + let Inst{12-8} = Rs; + let Inst{7} = 0b1; + let Inst{4-0} = Rd; +} + +// Rx[&|]=xor(Rs,Rt) +def M4_or_xor : T_MType_acc_rr < "|= xor", 0b110, 0b001, 0>; +def M4_and_xor : T_MType_acc_rr < "&= xor", 0b010, 0b010, 0>; + +// Rx[&|^]=or(Rs,Rt) +def M4_xor_or : T_MType_acc_rr < "^= or", 0b110, 0b011, 0>; + +let CextOpcode = "ORr_ORr" in +def M4_or_or : T_MType_acc_rr < "|= or", 0b110, 0b000, 0>; +def M4_and_or : T_MType_acc_rr < "&= or", 0b010, 0b001, 0>; // Rx[&|^]=and(Rs,Rt) -// Rx&=and(Rs,Rt) -let validSubTargets = HasV4SubT in -def ANDr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), - "$dst &= and($src2, $src3)", - [(set (i32 IntRegs:$dst), - (and (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), - (i32 IntRegs:$src3))))], - "$src1 = $dst">, - Requires<[HasV4T]>; - -// Rx|=and(Rs,Rt) -let validSubTargets = HasV4SubT, CextOpcode = "ORr_ANDr", InputType = "reg" in -def ORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), - "$dst |= and($src2, $src3)", - [(set (i32 IntRegs:$dst), - (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), - (i32 IntRegs:$src3))))], - "$src1 = $dst">, - Requires<[HasV4T]>, ImmRegRel; - -// Rx^=and(Rs,Rt) -let validSubTargets = HasV4SubT in -def XORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), - "$dst ^= and($src2, $src3)", - [(set (i32 IntRegs:$dst), - (xor (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), - (i32 IntRegs:$src3))))], - "$src1 = $dst">, - Requires<[HasV4T]>; +def M4_xor_and : T_MType_acc_rr < "^= and", 0b110, 0b010, 0>; + +let CextOpcode = "ORr_ANDr" in +def M4_or_and : T_MType_acc_rr < "|= and", 0b010, 0b011, 0>; +def M4_and_and : T_MType_acc_rr < "&= and", 0b010, 0b000, 0>; // Rx[&|^]=and(Rs,~Rt) -// Rx&=and(Rs,~Rt) -let validSubTargets = HasV4SubT in -def ANDr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), - "$dst &= and($src2, ~$src3)", - [(set (i32 IntRegs:$dst), - (and (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), - (not (i32 IntRegs:$src3)))))], - "$src1 = $dst">, - Requires<[HasV4T]>; - -// Rx|=and(Rs,~Rt) -let validSubTargets = HasV4SubT in -def ORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), - "$dst |= and($src2, ~$src3)", - [(set (i32 IntRegs:$dst), - (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), - (not (i32 IntRegs:$src3)))))], - "$src1 = $dst">, - Requires<[HasV4T]>; - -// Rx^=and(Rs,~Rt) -let validSubTargets = HasV4SubT in -def XORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), - "$dst ^= and($src2, ~$src3)", - [(set (i32 IntRegs:$dst), - (xor (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), - (not (i32 IntRegs:$src3)))))], - "$src1 = $dst">, - Requires<[HasV4T]>; +def M4_xor_andn : T_MType_acc_rr < "^= and", 0b001, 0b010, 0, [], 1>; +def M4_or_andn : T_MType_acc_rr < "|= and", 0b001, 0b000, 0, [], 1>; +def M4_and_andn : T_MType_acc_rr < "&= and", 0b001, 0b001, 0, [], 1>; + +def: T_MType_acc_pat2 <M4_or_xor, xor, or>; +def: T_MType_acc_pat2 <M4_and_xor, xor, and>; +def: T_MType_acc_pat2 <M4_or_and, and, or>; +def: T_MType_acc_pat2 <M4_and_and, and, and>; +def: T_MType_acc_pat2 <M4_xor_and, and, xor>; +def: T_MType_acc_pat2 <M4_or_or, or, or>; +def: T_MType_acc_pat2 <M4_and_or, or, and>; +def: T_MType_acc_pat2 <M4_xor_or, or, xor>; + +class T_MType_acc_pat3 <InstHexagon MI, SDNode firstOp, SDNode secOp> + : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2, + (not IntRegs:$src3)))), + (i32 (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3))>; + +def: T_MType_acc_pat3 <M4_or_andn, and, or>; +def: T_MType_acc_pat3 <M4_and_andn, and, and>; +def: T_MType_acc_pat3 <M4_xor_andn, and, xor>; + +// Compound or-or and or-and +let isExtentSigned = 1, InputType = "imm", hasNewValue = 1, isExtendable = 1, + opExtentBits = 10, opExtendable = 3 in +class T_CompOR <string mnemonic, bits<2> MajOp, SDNode OpNode> + : MInst_acc <(outs IntRegs:$Rx), + (ins IntRegs:$src1, IntRegs:$Rs, s10Ext:$s10), + "$Rx |= "#mnemonic#"($Rs, #$s10)", + [(set (i32 IntRegs:$Rx), (or (i32 IntRegs:$src1), + (OpNode (i32 IntRegs:$Rs), s10ExtPred:$s10)))], + "$src1 = $Rx", ALU64_tc_2_SLOT23>, ImmRegRel { + bits<5> Rx; + bits<5> Rs; + bits<10> s10; + + let IClass = 0b1101; + + let Inst{27-24} = 0b1010; + let Inst{23-22} = MajOp; + let Inst{20-16} = Rs; + let Inst{21} = s10{9}; + let Inst{13-5} = s10{8-0}; + let Inst{4-0} = Rx; + } -// Rx[&|^]=or(Rs,Rt) -// Rx&=or(Rs,Rt) -let validSubTargets = HasV4SubT in -def ANDr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), - "$dst &= or($src2, $src3)", - [(set (i32 IntRegs:$dst), - (and (i32 IntRegs:$src1), (or (i32 IntRegs:$src2), - (i32 IntRegs:$src3))))], - "$src1 = $dst">, - Requires<[HasV4T]>; - -// Rx|=or(Rs,Rt) -let validSubTargets = HasV4SubT, CextOpcode = "ORr_ORr", InputType = "reg" in -def ORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), - "$dst |= or($src2, $src3)", - [(set (i32 IntRegs:$dst), - (or (i32 IntRegs:$src1), (or (i32 IntRegs:$src2), - (i32 IntRegs:$src3))))], - "$src1 = $dst">, - Requires<[HasV4T]>, ImmRegRel; - -// Rx^=or(Rs,Rt) -let validSubTargets = HasV4SubT in -def XORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), - "$dst ^= or($src2, $src3)", - [(set (i32 IntRegs:$dst), - (xor (i32 IntRegs:$src1), (or (i32 IntRegs:$src2), - (i32 IntRegs:$src3))))], - "$src1 = $dst">, - Requires<[HasV4T]>; - -// Rx[&|^]=xor(Rs,Rt) -// Rx&=xor(Rs,Rt) -let validSubTargets = HasV4SubT in -def ANDr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), - "$dst &= xor($src2, $src3)", - [(set (i32 IntRegs:$dst), - (and (i32 IntRegs:$src1), (xor (i32 IntRegs:$src2), - (i32 IntRegs:$src3))))], - "$src1 = $dst">, - Requires<[HasV4T]>; - -// Rx|=xor(Rs,Rt) -let validSubTargets = HasV4SubT in -def ORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), - "$dst |= xor($src2, $src3)", - [(set (i32 IntRegs:$dst), - (and (i32 IntRegs:$src1), (xor (i32 IntRegs:$src2), - (i32 IntRegs:$src3))))], - "$src1 = $dst">, - Requires<[HasV4T]>; - -// Rx^=xor(Rs,Rt) -let validSubTargets = HasV4SubT in -def XORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), - "$dst ^= xor($src2, $src3)", - [(set (i32 IntRegs:$dst), - (and (i32 IntRegs:$src1), (xor (i32 IntRegs:$src2), - (i32 IntRegs:$src3))))], - "$src1 = $dst">, - Requires<[HasV4T]>; - -// Rx|=and(Rs,#s10) -let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 10, -validSubTargets = HasV4SubT, CextOpcode = "ORr_ANDr", InputType = "imm" in -def ORr_ANDri2_V4 : MInst_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs: $src2, s10Ext:$src3), - "$dst |= and($src2, #$src3)", - [(set (i32 IntRegs:$dst), - (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), - s10ExtPred:$src3)))], - "$src1 = $dst">, - Requires<[HasV4T]>, ImmRegRel; - -// Rx|=or(Rs,#s10) -let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 10, -validSubTargets = HasV4SubT, CextOpcode = "ORr_ORr", InputType = "imm" in -def ORr_ORri_V4 : MInst_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs: $src2, s10Ext:$src3), - "$dst |= or($src2, #$src3)", - [(set (i32 IntRegs:$dst), - (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), - s10ExtPred:$src3)))], - "$src1 = $dst">, - Requires<[HasV4T]>, ImmRegRel; +let CextOpcode = "ORr_ANDr" in +def S4_or_andi : T_CompOR <"and", 0b00, and>; +let CextOpcode = "ORr_ORr" in +def S4_or_ori : T_CompOR <"or", 0b10, or>; // Modulo wrap // Rd=modwrap(Rs,Rt) @@ -1480,269 +2246,483 @@ def ORr_ORri_V4 : MInst_acc<(outs IntRegs:$dst), // XTYPE/ALU - //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// XTYPE/BIT + +//===----------------------------------------------------------------------===// + +// Bit reverse +def S2_brevp : T_S2op_3 <"brev", 0b11, 0b110>; + +// Bit count +def S2_ct0p : T_COUNT_LEADING_64<"ct0", 0b111, 0b010>; +def S2_ct1p : T_COUNT_LEADING_64<"ct1", 0b111, 0b100>; +def S4_clbpnorm : T_COUNT_LEADING_64<"normamt", 0b011, 0b000>; + +def: Pat<(i32 (trunc (cttz (i64 DoubleRegs:$Rss)))), + (S2_ct0p (i64 DoubleRegs:$Rss))>; +def: Pat<(i32 (trunc (cttz (not (i64 DoubleRegs:$Rss))))), + (S2_ct1p (i64 DoubleRegs:$Rss))>; + +let hasSideEffects = 0, hasNewValue = 1 in +def S4_clbaddi : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s6Imm:$s6), + "$Rd = add(clb($Rs), #$s6)", [], "", S_2op_tc_2_SLOT23> { + bits<5> Rs; + bits<5> Rd; + bits<6> s6; + let IClass = 0b1000; + let Inst{27-24} = 0b1100; + let Inst{23-21} = 0b001; + let Inst{20-16} = Rs; + let Inst{13-8} = s6; + let Inst{7-5} = 0b000; + let Inst{4-0} = Rd; +} + +let hasSideEffects = 0, hasNewValue = 1 in +def S4_clbpaddi : SInst<(outs IntRegs:$Rd), (ins DoubleRegs:$Rs, s6Imm:$s6), + "$Rd = add(clb($Rs), #$s6)", [], "", S_2op_tc_2_SLOT23> { + bits<5> Rs; + bits<5> Rd; + bits<6> s6; + let IClass = 0b1000; + let Inst{27-24} = 0b1000; + let Inst{23-21} = 0b011; + let Inst{20-16} = Rs; + let Inst{13-8} = s6; + let Inst{7-5} = 0b010; + let Inst{4-0} = Rd; +} + + +// Bit test/set/clear +def S4_ntstbit_i : T_TEST_BIT_IMM<"!tstbit", 0b001>; +def S4_ntstbit_r : T_TEST_BIT_REG<"!tstbit", 1>; + +let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. + def: Pat<(i1 (seteq (and (shl 1, u5ImmPred:$u5), (i32 IntRegs:$Rs)), 0)), + (S4_ntstbit_i (i32 IntRegs:$Rs), u5ImmPred:$u5)>; + def: Pat<(i1 (seteq (and (shl 1, (i32 IntRegs:$Rt)), (i32 IntRegs:$Rs)), 0)), + (S4_ntstbit_r (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))>; +} + +// Add extra complexity to prefer these instructions over bitsset/bitsclr. +// The reason is that tstbit/ntstbit can be folded into a compound instruction: +// if ([!]tstbit(...)) jump ... +let AddedComplexity = 100 in +def: Pat<(i1 (setne (and (i32 IntRegs:$Rs), (i32 Set5ImmPred:$u5)), (i32 0))), + (S2_tstbit_i (i32 IntRegs:$Rs), (BITPOS32 Set5ImmPred:$u5))>; + +let AddedComplexity = 100 in +def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 Set5ImmPred:$u5)), (i32 0))), + (S4_ntstbit_i (i32 IntRegs:$Rs), (BITPOS32 Set5ImmPred:$u5))>; + +def C4_nbitsset : T_TEST_BITS_REG<"!bitsset", 0b01, 1>; +def C4_nbitsclr : T_TEST_BITS_REG<"!bitsclr", 0b10, 1>; +def C4_nbitsclri : T_TEST_BITS_IMM<"!bitsclr", 0b10, 1>; + +// Do not increase complexity of these patterns. In the DAG, "cmp i8" may be +// represented as a compare against "value & 0xFF", which is an exact match +// for cmpb (same for cmph). The patterns below do not contain any additional +// complexity that would make them preferable, and if they were actually used +// instead of cmpb/cmph, they would result in a compare against register that +// is loaded with the byte/half mask (i.e. 0xFF or 0xFFFF). +def: Pat<(i1 (setne (and I32:$Rs, u6ImmPred:$u6), 0)), + (C4_nbitsclri I32:$Rs, u6ImmPred:$u6)>; +def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), 0)), + (C4_nbitsclr I32:$Rs, I32:$Rt)>; +def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)), + (C4_nbitsset I32:$Rs, I32:$Rt)>; + +//===----------------------------------------------------------------------===// +// XTYPE/BIT - +//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // XTYPE/MPY + //===----------------------------------------------------------------------===// -// Multiply and user lower result. -// Rd=add(#u6,mpyi(Rs,#U6)) -let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 6, -validSubTargets = HasV4SubT in -def ADDi_MPYri_V4 : MInst<(outs IntRegs:$dst), - (ins u6Ext:$src1, IntRegs:$src2, u6Imm:$src3), - "$dst = add(#$src1, mpyi($src2, #$src3))", - [(set (i32 IntRegs:$dst), - (add (mul (i32 IntRegs:$src2), u6ImmPred:$src3), - u6ExtPred:$src1))]>, - Requires<[HasV4T]>; +// Rd=add(#u6,mpyi(Rs,#U6)) -- Multiply by immed and add immed. + +let hasNewValue = 1, isExtendable = 1, opExtentBits = 6, opExtendable = 1 in +def M4_mpyri_addi : MInst<(outs IntRegs:$Rd), + (ins u6Ext:$u6, IntRegs:$Rs, u6Imm:$U6), + "$Rd = add(#$u6, mpyi($Rs, #$U6))" , + [(set (i32 IntRegs:$Rd), + (add (mul (i32 IntRegs:$Rs), u6ImmPred:$U6), + u6ExtPred:$u6))] ,"",ALU64_tc_3x_SLOT23> { + bits<5> Rd; + bits<6> u6; + bits<5> Rs; + bits<6> U6; + + let IClass = 0b1101; + + let Inst{27-24} = 0b1000; + let Inst{23} = U6{5}; + let Inst{22-21} = u6{5-4}; + let Inst{20-16} = Rs; + let Inst{13} = u6{3}; + let Inst{12-8} = Rd; + let Inst{7-5} = u6{2-0}; + let Inst{4-0} = U6{4-0}; + } + +// Rd=add(#u6,mpyi(Rs,Rt)) +let CextOpcode = "ADD_MPY", InputType = "imm", hasNewValue = 1, + isExtendable = 1, opExtentBits = 6, opExtendable = 1 in +def M4_mpyrr_addi : MInst <(outs IntRegs:$Rd), + (ins u6Ext:$u6, IntRegs:$Rs, IntRegs:$Rt), + "$Rd = add(#$u6, mpyi($Rs, $Rt))" , + [(set (i32 IntRegs:$Rd), + (add (mul (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), u6ExtPred:$u6))], + "", ALU64_tc_3x_SLOT23>, ImmRegRel { + bits<5> Rd; + bits<6> u6; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1101; + + let Inst{27-23} = 0b01110; + let Inst{22-21} = u6{5-4}; + let Inst{20-16} = Rs; + let Inst{13} = u6{3}; + let Inst{12-8} = Rt; + let Inst{7-5} = u6{2-0}; + let Inst{4-0} = Rd; + } + +let hasNewValue = 1 in +class T_AddMpy <bit MajOp, PatLeaf ImmPred, dag ins> + : ALU64Inst <(outs IntRegs:$dst), ins, + "$dst = add($src1, mpyi("#!if(MajOp,"$src3, #$src2))", + "#$src2, $src3))"), + [(set (i32 IntRegs:$dst), + (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src3), ImmPred:$src2)))], + "", ALU64_tc_3x_SLOT23> { + bits<5> dst; + bits<5> src1; + bits<8> src2; + bits<5> src3; + + let IClass = 0b1101; + + bits<6> ImmValue = !if(MajOp, src2{5-0}, src2{7-2}); + + let Inst{27-24} = 0b1111; + let Inst{23} = MajOp; + let Inst{22-21} = ImmValue{5-4}; + let Inst{20-16} = src3; + let Inst{13} = ImmValue{3}; + let Inst{12-8} = dst; + let Inst{7-5} = ImmValue{2-0}; + let Inst{4-0} = src1; + } + +def M4_mpyri_addr_u2 : T_AddMpy<0b0, u6_2ImmPred, + (ins IntRegs:$src1, u6_2Imm:$src2, IntRegs:$src3)>; + +let isExtendable = 1, opExtentBits = 6, opExtendable = 3, + CextOpcode = "ADD_MPY", InputType = "imm" in +def M4_mpyri_addr : T_AddMpy<0b1, u6ExtPred, + (ins IntRegs:$src1, IntRegs:$src3, u6Ext:$src2)>, ImmRegRel; + +// Rx=add(Ru,mpyi(Rx,Rs)) +let CextOpcode = "ADD_MPY", InputType = "reg", hasNewValue = 1 in +def M4_mpyrr_addr: MInst_acc <(outs IntRegs:$Rx), + (ins IntRegs:$Ru, IntRegs:$_src_, IntRegs:$Rs), + "$Rx = add($Ru, mpyi($_src_, $Rs))", + [(set (i32 IntRegs:$Rx), (add (i32 IntRegs:$Ru), + (mul (i32 IntRegs:$_src_), (i32 IntRegs:$Rs))))], + "$_src_ = $Rx", M_tc_3x_SLOT23>, ImmRegRel { + bits<5> Rx; + bits<5> Ru; + bits<5> Rs; + + let IClass = 0b1110; + + let Inst{27-21} = 0b0011000; + let Inst{12-8} = Rx; + let Inst{4-0} = Ru; + let Inst{20-16} = Rs; + } // Rd=add(##,mpyi(Rs,#U6)) def : Pat <(add (mul (i32 IntRegs:$src2), u6ImmPred:$src3), (HexagonCONST32 tglobaladdr:$src1)), - (i32 (ADDi_MPYri_V4 tglobaladdr:$src1, IntRegs:$src2, + (i32 (M4_mpyri_addi tglobaladdr:$src1, IntRegs:$src2, u6ImmPred:$src3))>; -// Rd=add(#u6,mpyi(Rs,Rt)) -let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 6, -validSubTargets = HasV4SubT, InputType = "imm", CextOpcode = "ADD_MPY" in -def ADDi_MPYrr_V4 : MInst<(outs IntRegs:$dst), - (ins u6Ext:$src1, IntRegs:$src2, IntRegs:$src3), - "$dst = add(#$src1, mpyi($src2, $src3))", - [(set (i32 IntRegs:$dst), - (add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)), - u6ExtPred:$src1))]>, - Requires<[HasV4T]>, ImmRegRel; - // Rd=add(##,mpyi(Rs,Rt)) def : Pat <(add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)), (HexagonCONST32 tglobaladdr:$src1)), - (i32 (ADDi_MPYrr_V4 tglobaladdr:$src1, IntRegs:$src2, + (i32 (M4_mpyrr_addi tglobaladdr:$src1, IntRegs:$src2, IntRegs:$src3))>; -// Rd=add(Ru,mpyi(#u6:2,Rs)) -let validSubTargets = HasV4SubT in -def ADDr_MPYir_V4 : MInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, u6Imm:$src2, IntRegs:$src3), - "$dst = add($src1, mpyi(#$src2, $src3))", - [(set (i32 IntRegs:$dst), - (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src3), - u6_2ImmPred:$src2)))]>, - Requires<[HasV4T]>; - -// Rd=add(Ru,mpyi(Rs,#u6)) -let isExtendable = 1, opExtendable = 3, isExtentSigned = 0, opExtentBits = 6, -validSubTargets = HasV4SubT, InputType = "imm", CextOpcode = "ADD_MPY" in -def ADDr_MPYri_V4 : MInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, u6Ext:$src3), - "$dst = add($src1, mpyi($src2, #$src3))", - [(set (i32 IntRegs:$dst), - (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2), - u6ExtPred:$src3)))]>, - Requires<[HasV4T]>, ImmRegRel; +// Vector reduce multiply word by signed half (32x16) +//Rdd=vrmpyweh(Rss,Rtt)[:<<1] +def M4_vrmpyeh_s0 : T_M2_vmpy<"vrmpyweh", 0b010, 0b100, 0, 0, 0>; +def M4_vrmpyeh_s1 : T_M2_vmpy<"vrmpyweh", 0b110, 0b100, 1, 0, 0>; -// Rx=add(Ru,mpyi(Rx,Rs)) -let validSubTargets = HasV4SubT, InputType = "reg", CextOpcode = "ADD_MPY" in -def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "$dst = add($src1, mpyi($src2, $src3))", - [(set (i32 IntRegs:$dst), - (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2), - (i32 IntRegs:$src3))))], - "$src2 = $dst">, - Requires<[HasV4T]>, ImmRegRel; +//Rdd=vrmpywoh(Rss,Rtt)[:<<1] +def M4_vrmpyoh_s0 : T_M2_vmpy<"vrmpywoh", 0b001, 0b010, 0, 0, 0>; +def M4_vrmpyoh_s1 : T_M2_vmpy<"vrmpywoh", 0b101, 0b010, 1, 0, 0>; +//Rdd+=vrmpyweh(Rss,Rtt)[:<<1] +def M4_vrmpyeh_acc_s0: T_M2_vmpy_acc<"vrmpyweh", 0b001, 0b110, 0, 0>; +def M4_vrmpyeh_acc_s1: T_M2_vmpy_acc<"vrmpyweh", 0b101, 0b110, 1, 0>; -// Polynomial multiply words -// Rdd=pmpyw(Rs,Rt) -// Rxx^=pmpyw(Rs,Rt) +//Rdd=vrmpywoh(Rss,Rtt)[:<<1] +def M4_vrmpyoh_acc_s0: T_M2_vmpy_acc<"vrmpywoh", 0b011, 0b110, 0, 0>; +def M4_vrmpyoh_acc_s1: T_M2_vmpy_acc<"vrmpywoh", 0b111, 0b110, 1, 0>; -// Vector reduce multiply word by signed half (32x16) -// Rdd=vrmpyweh(Rss,Rtt)[:<<1] -// Rdd=vrmpywoh(Rss,Rtt)[:<<1] -// Rxx+=vrmpyweh(Rss,Rtt)[:<<1] -// Rxx+=vrmpywoh(Rss,Rtt)[:<<1] - -// Multiply and use upper result -// Rd=mpy(Rs,Rt.H):<<1:sat -// Rd=mpy(Rs,Rt.L):<<1:sat -// Rd=mpy(Rs,Rt):<<1 -// Rd=mpy(Rs,Rt):<<1:sat -// Rd=mpysu(Rs,Rt) -// Rx+=mpy(Rs,Rt):<<1:sat -// Rx-=mpy(Rs,Rt):<<1:sat - -// Vector multiply bytes -// Rdd=vmpybsu(Rs,Rt) -// Rdd=vmpybu(Rs,Rt) -// Rxx+=vmpybsu(Rs,Rt) -// Rxx+=vmpybu(Rs,Rt) +// Vector multiply halfwords, signed by unsigned +// Rdd=vmpyhsu(Rs,Rt)[:<<]:sat +def M2_vmpy2su_s0 : T_XTYPE_mpy64 < "vmpyhsu", 0b000, 0b111, 1, 0, 0>; +def M2_vmpy2su_s1 : T_XTYPE_mpy64 < "vmpyhsu", 0b100, 0b111, 1, 1, 0>; + +// Rxx+=vmpyhsu(Rs,Rt)[:<<1]:sat +def M2_vmac2su_s0 : T_XTYPE_mpy64_acc < "vmpyhsu", "+", 0b011, 0b101, 1, 0, 0>; +def M2_vmac2su_s1 : T_XTYPE_mpy64_acc < "vmpyhsu", "+", 0b111, 0b101, 1, 1, 0>; // Vector polynomial multiply halfwords // Rdd=vpmpyh(Rs,Rt) +def M4_vpmpyh : T_XTYPE_mpy64 < "vpmpyh", 0b110, 0b111, 0, 0, 0>; + // Rxx^=vpmpyh(Rs,Rt) +def M4_vpmpyh_acc : T_XTYPE_mpy64_acc < "vpmpyh", "^", 0b101, 0b111, 0, 0, 0>; + +// Polynomial multiply words +// Rdd=pmpyw(Rs,Rt) +def M4_pmpyw : T_XTYPE_mpy64 < "pmpyw", 0b010, 0b111, 0, 0, 0>; + +// Rxx^=pmpyw(Rs,Rt) +def M4_pmpyw_acc : T_XTYPE_mpy64_acc < "pmpyw", "^", 0b001, 0b111, 0, 0, 0>; //===----------------------------------------------------------------------===// // XTYPE/MPY - //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// ALU64/Vector compare +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// Template class for vector compare +//===----------------------------------------------------------------------===// + +let hasSideEffects = 0 in +class T_vcmpImm <string Str, bits<2> cmpOp, bits<2> minOp, Operand ImmOprnd> + : ALU64_rr <(outs PredRegs:$Pd), + (ins DoubleRegs:$Rss, ImmOprnd:$Imm), + "$Pd = "#Str#"($Rss, #$Imm)", + [], "", ALU64_tc_2early_SLOT23> { + bits<2> Pd; + bits<5> Rss; + bits<32> Imm; + bits<8> ImmBits; + let ImmBits{6-0} = Imm{6-0}; + let ImmBits{7} = !if (!eq(cmpOp,0b10), 0b0, Imm{7}); // 0 for vcmp[bhw].gtu + + let IClass = 0b1101; + + let Inst{27-24} = 0b1100; + let Inst{22-21} = cmpOp; + let Inst{20-16} = Rss; + let Inst{12-5} = ImmBits; + let Inst{4-3} = minOp; + let Inst{1-0} = Pd; + } + +// Vector compare bytes +def A4_vcmpbgt : T_vcmp <"vcmpb.gt", 0b1010>; +def: T_vcmp_pat<A4_vcmpbgt, setgt, v8i8>; + +let AsmString = "$Pd = any8(vcmpb.eq($Rss, $Rtt))" in +def A4_vcmpbeq_any : T_vcmp <"any8(vcmpb.gt", 0b1000>; + +def A4_vcmpbeqi : T_vcmpImm <"vcmpb.eq", 0b00, 0b00, u8Imm>; +def A4_vcmpbgti : T_vcmpImm <"vcmpb.gt", 0b01, 0b00, s8Imm>; +def A4_vcmpbgtui : T_vcmpImm <"vcmpb.gtu", 0b10, 0b00, u7Imm>; + +// Vector compare halfwords +def A4_vcmpheqi : T_vcmpImm <"vcmph.eq", 0b00, 0b01, s8Imm>; +def A4_vcmphgti : T_vcmpImm <"vcmph.gt", 0b01, 0b01, s8Imm>; +def A4_vcmphgtui : T_vcmpImm <"vcmph.gtu", 0b10, 0b01, u7Imm>; + +// Vector compare words +def A4_vcmpweqi : T_vcmpImm <"vcmpw.eq", 0b00, 0b10, s8Imm>; +def A4_vcmpwgti : T_vcmpImm <"vcmpw.gt", 0b01, 0b10, s8Imm>; +def A4_vcmpwgtui : T_vcmpImm <"vcmpw.gtu", 0b10, 0b10, u7Imm>; //===----------------------------------------------------------------------===// // XTYPE/SHIFT + //===----------------------------------------------------------------------===// - -// Shift by immediate and accumulate. -// Rx=add(#u8,asl(Rx,#U5)) -let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, -validSubTargets = HasV4SubT in -def ADDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst), - (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), - "$dst = add(#$src1, asl($src2, #$src3))", - [(set (i32 IntRegs:$dst), - (add (shl (i32 IntRegs:$src2), u5ImmPred:$src3), - u8ExtPred:$src1))], - "$src2 = $dst">, - Requires<[HasV4T]>; - -// Rx=add(#u8,lsr(Rx,#U5)) -let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, -validSubTargets = HasV4SubT in -def ADDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst), - (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), - "$dst = add(#$src1, lsr($src2, #$src3))", - [(set (i32 IntRegs:$dst), - (add (srl (i32 IntRegs:$src2), u5ImmPred:$src3), - u8ExtPred:$src1))], - "$src2 = $dst">, - Requires<[HasV4T]>; - -// Rx=sub(#u8,asl(Rx,#U5)) -let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, -validSubTargets = HasV4SubT in -def SUBi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst), - (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), - "$dst = sub(#$src1, asl($src2, #$src3))", - [(set (i32 IntRegs:$dst), - (sub (shl (i32 IntRegs:$src2), u5ImmPred:$src3), - u8ExtPred:$src1))], - "$src2 = $dst">, - Requires<[HasV4T]>; - -// Rx=sub(#u8,lsr(Rx,#U5)) -let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, -validSubTargets = HasV4SubT in -def SUBi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst), - (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), - "$dst = sub(#$src1, lsr($src2, #$src3))", - [(set (i32 IntRegs:$dst), - (sub (srl (i32 IntRegs:$src2), u5ImmPred:$src3), - u8ExtPred:$src1))], - "$src2 = $dst">, - Requires<[HasV4T]>; - - -//Shift by immediate and logical. -//Rx=and(#u8,asl(Rx,#U5)) -let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, -validSubTargets = HasV4SubT in -def ANDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst), - (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), - "$dst = and(#$src1, asl($src2, #$src3))", - [(set (i32 IntRegs:$dst), - (and (shl (i32 IntRegs:$src2), u5ImmPred:$src3), - u8ExtPred:$src1))], - "$src2 = $dst">, - Requires<[HasV4T]>; - -//Rx=and(#u8,lsr(Rx,#U5)) +// Shift by immediate and accumulate/logical. +// Rx=add(#u8,asl(Rx,#U5)) Rx=add(#u8,lsr(Rx,#U5)) +// Rx=sub(#u8,asl(Rx,#U5)) Rx=sub(#u8,lsr(Rx,#U5)) +// Rx=and(#u8,asl(Rx,#U5)) Rx=and(#u8,lsr(Rx,#U5)) +// Rx=or(#u8,asl(Rx,#U5)) Rx=or(#u8,lsr(Rx,#U5)) let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, -validSubTargets = HasV4SubT in -def ANDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst), - (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), - "$dst = and(#$src1, lsr($src2, #$src3))", - [(set (i32 IntRegs:$dst), - (and (srl (i32 IntRegs:$src2), u5ImmPred:$src3), - u8ExtPred:$src1))], - "$src2 = $dst">, - Requires<[HasV4T]>; - -//Rx=or(#u8,asl(Rx,#U5)) -let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, -AddedComplexity = 30, validSubTargets = HasV4SubT in -def ORi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst), - (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), - "$dst = or(#$src1, asl($src2, #$src3))", - [(set (i32 IntRegs:$dst), - (or (shl (i32 IntRegs:$src2), u5ImmPred:$src3), - u8ExtPred:$src1))], - "$src2 = $dst">, - Requires<[HasV4T]>; - -//Rx=or(#u8,lsr(Rx,#U5)) -let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, -AddedComplexity = 30, validSubTargets = HasV4SubT in -def ORi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst), - (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), - "$dst = or(#$src1, lsr($src2, #$src3))", - [(set (i32 IntRegs:$dst), - (or (srl (i32 IntRegs:$src2), u5ImmPred:$src3), - u8ExtPred:$src1))], - "$src2 = $dst">, - Requires<[HasV4T]>; - - -//Shift by register. -//Rd=lsl(#s6,Rt) -let validSubTargets = HasV4SubT in { -def LSLi_V4 : MInst<(outs IntRegs:$dst), (ins s6Imm:$src1, IntRegs:$src2), - "$dst = lsl(#$src1, $src2)", - [(set (i32 IntRegs:$dst), (shl s6ImmPred:$src1, - (i32 IntRegs:$src2)))]>, - Requires<[HasV4T]>; - - -//Shift by register and logical. -//Rxx^=asl(Rss,Rt) -def ASLd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst), - (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), - "$dst ^= asl($src2, $src3)", - [(set (i64 DoubleRegs:$dst), - (xor (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$src2), - (i32 IntRegs:$src3))))], - "$src1 = $dst">, - Requires<[HasV4T]>; - -//Rxx^=asr(Rss,Rt) -def ASRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst), - (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), - "$dst ^= asr($src2, $src3)", - [(set (i64 DoubleRegs:$dst), - (xor (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$src2), - (i32 IntRegs:$src3))))], - "$src1 = $dst">, - Requires<[HasV4T]>; - -//Rxx^=lsl(Rss,Rt) -def LSLd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst), - (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), - "$dst ^= lsl($src2, $src3)", - [(set (i64 DoubleRegs:$dst), (xor (i64 DoubleRegs:$src1), - (shl (i64 DoubleRegs:$src2), - (i32 IntRegs:$src3))))], - "$src1 = $dst">, - Requires<[HasV4T]>; - -//Rxx^=lsr(Rss,Rt) -def LSRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst), - (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), - "$dst ^= lsr($src2, $src3)", - [(set (i64 DoubleRegs:$dst), - (xor (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$src2), - (i32 IntRegs:$src3))))], - "$src1 = $dst">, - Requires<[HasV4T]>; + hasNewValue = 1, opNewValue = 0 in +class T_S4_ShiftOperate<string MnOp, string MnSh, SDNode Op, SDNode Sh, + bit asl_lsr, bits<2> MajOp, InstrItinClass Itin> + : MInst_acc<(outs IntRegs:$Rd), (ins u8Ext:$u8, IntRegs:$Rx, u5Imm:$U5), + "$Rd = "#MnOp#"(#$u8, "#MnSh#"($Rx, #$U5))", + [(set (i32 IntRegs:$Rd), + (Op (Sh I32:$Rx, u5ImmPred:$U5), u8ExtPred:$u8))], + "$Rd = $Rx", Itin> { + + bits<5> Rd; + bits<8> u8; + bits<5> Rx; + bits<5> U5; + + let IClass = 0b1101; + let Inst{27-24} = 0b1110; + let Inst{23-21} = u8{7-5}; + let Inst{20-16} = Rd; + let Inst{13} = u8{4}; + let Inst{12-8} = U5; + let Inst{7-5} = u8{3-1}; + let Inst{4} = asl_lsr; + let Inst{3} = u8{0}; + let Inst{2-1} = MajOp; +} + +multiclass T_ShiftOperate<string mnemonic, SDNode Op, bits<2> MajOp, + InstrItinClass Itin> { + def _asl_ri : T_S4_ShiftOperate<mnemonic, "asl", Op, shl, 0, MajOp, Itin>; + def _lsr_ri : T_S4_ShiftOperate<mnemonic, "lsr", Op, srl, 1, MajOp, Itin>; +} + +let AddedComplexity = 200 in { + defm S4_addi : T_ShiftOperate<"add", add, 0b10, ALU64_tc_2_SLOT23>; + defm S4_andi : T_ShiftOperate<"and", and, 0b00, ALU64_tc_2_SLOT23>; } +let AddedComplexity = 30 in +defm S4_ori : T_ShiftOperate<"or", or, 0b01, ALU64_tc_1_SLOT23>; + +defm S4_subi : T_ShiftOperate<"sub", sub, 0b11, ALU64_tc_1_SLOT23>; + +let AddedComplexity = 200 in { + def: Pat<(add addrga:$addr, (shl I32:$src2, u5ImmPred:$src3)), + (S4_addi_asl_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>; + def: Pat<(add addrga:$addr, (srl I32:$src2, u5ImmPred:$src3)), + (S4_addi_lsr_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>; + def: Pat<(sub addrga:$addr, (shl I32:$src2, u5ImmPred:$src3)), + (S4_subi_asl_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>; + def: Pat<(sub addrga:$addr, (srl I32:$src2, u5ImmPred:$src3)), + (S4_subi_lsr_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>; +} + +// Vector conditional negate +// Rdd=vcnegh(Rss,Rt) +let Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23 in +def S2_vcnegh : T_S3op_shiftVect < "vcnegh", 0b11, 0b01>; + +// Rd=[cround|round](Rs,Rt) +let hasNewValue = 1, Itinerary = S_3op_tc_2_SLOT23 in { + def A4_cround_rr : T_S3op_3 < "cround", IntRegs, 0b11, 0b00>; + def A4_round_rr : T_S3op_3 < "round", IntRegs, 0b11, 0b10>; +} + +// Rd=round(Rs,Rt):sat +let hasNewValue = 1, Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23 in +def A4_round_rr_sat : T_S3op_3 < "round", IntRegs, 0b11, 0b11, 1>; + +// Rd=[cmpyiwh|cmpyrwh](Rss,Rt):<<1:rnd:sat +let Defs = [USR_OVF], Itinerary = S_3op_tc_3x_SLOT23 in { + def M4_cmpyi_wh : T_S3op_8<"cmpyiwh", 0b100, 1, 1, 1>; + def M4_cmpyr_wh : T_S3op_8<"cmpyrwh", 0b110, 1, 1, 1>; +} + +// Rdd=[add|sub](Rss,Rtt,Px):carry +let isPredicateLate = 1, hasSideEffects = 0 in +class T_S3op_carry <string mnemonic, bits<3> MajOp> + : SInst < (outs DoubleRegs:$Rdd, PredRegs:$Px), + (ins DoubleRegs:$Rss, DoubleRegs:$Rtt, PredRegs:$Pu), + "$Rdd = "#mnemonic#"($Rss, $Rtt, $Pu):carry", + [], "$Px = $Pu", S_3op_tc_1_SLOT23 > { + bits<5> Rdd; + bits<5> Rss; + bits<5> Rtt; + bits<2> Pu; + + let IClass = 0b1100; + + let Inst{27-24} = 0b0010; + let Inst{23-21} = MajOp; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + let Inst{6-5} = Pu; + let Inst{4-0} = Rdd; + } + +def A4_addp_c : T_S3op_carry < "add", 0b110 >; +def A4_subp_c : T_S3op_carry < "sub", 0b111 >; + +let Itinerary = S_3op_tc_3_SLOT23, hasSideEffects = 0 in +class T_S3op_6 <string mnemonic, bits<3> MinOp, bit isUnsigned> + : SInst <(outs DoubleRegs:$Rxx), + (ins DoubleRegs:$dst2, DoubleRegs:$Rss, IntRegs:$Ru), + "$Rxx = "#mnemonic#"($Rss, $Ru)" , + [] , "$dst2 = $Rxx"> { + bits<5> Rxx; + bits<5> Rss; + bits<5> Ru; + + let IClass = 0b1100; + + let Inst{27-21} = 0b1011001; + let Inst{20-16} = Rss; + let Inst{13} = isUnsigned; + let Inst{12-8} = Rxx; + let Inst{7-5} = MinOp; + let Inst{4-0} = Ru; + } + +// Vector reduce maximum halfwords +// Rxx=vrmax[u]h(Rss,Ru) +def A4_vrmaxh : T_S3op_6 < "vrmaxh", 0b001, 0>; +def A4_vrmaxuh : T_S3op_6 < "vrmaxuh", 0b001, 1>; + +// Vector reduce maximum words +// Rxx=vrmax[u]w(Rss,Ru) +def A4_vrmaxw : T_S3op_6 < "vrmaxw", 0b010, 0>; +def A4_vrmaxuw : T_S3op_6 < "vrmaxuw", 0b010, 1>; + +// Vector reduce minimum halfwords +// Rxx=vrmin[u]h(Rss,Ru) +def A4_vrminh : T_S3op_6 < "vrminh", 0b101, 0>; +def A4_vrminuh : T_S3op_6 < "vrminuh", 0b101, 1>; + +// Vector reduce minimum words +// Rxx=vrmin[u]w(Rss,Ru) +def A4_vrminw : T_S3op_6 < "vrminw", 0b110, 0>; +def A4_vrminuw : T_S3op_6 < "vrminuw", 0b110, 1>; + +// Shift an immediate left by register amount. +let hasNewValue = 1, hasSideEffects = 0 in +def S4_lsli: SInst <(outs IntRegs:$Rd), (ins s6Imm:$s6, IntRegs:$Rt), + "$Rd = lsl(#$s6, $Rt)" , + [(set (i32 IntRegs:$Rd), (shl s6ImmPred:$s6, + (i32 IntRegs:$Rt)))], + "", S_3op_tc_1_SLOT23> { + bits<5> Rd; + bits<6> s6; + bits<5> Rt; + + let IClass = 0b1100; + + let Inst{27-22} = 0b011010; + let Inst{20-16} = s6{5-1}; + let Inst{12-8} = Rt; + let Inst{7-6} = 0b11; + let Inst{4-0} = Rd; + let Inst{5} = s6{0}; + } + //===----------------------------------------------------------------------===// // XTYPE/SHIFT - //===----------------------------------------------------------------------===// @@ -1830,7 +2810,7 @@ class MemOp_rr_base <string opc, bits<2> opcBits, Operand ImmOp, (ins IntRegs:$base, ImmOp:$offset, IntRegs:$delta), opc#"($base+#$offset)"#memOp#"$delta", []>, - Requires<[HasV4T, UseMEMOP]> { + Requires<[UseMEMOP]> { bits<5> base; bits<5> delta; @@ -1841,6 +2821,7 @@ class MemOp_rr_base <string opc, bits<2> opcBits, Operand ImmOp, !if (!eq(opcBits, 0b01), offset{6-1}, !if (!eq(opcBits, 0b10), offset{7-2},0))); + let opExtentAlign = opcBits; let IClass = 0b0011; let Inst{27-24} = 0b1110; let Inst{22-21} = opcBits; @@ -1861,7 +2842,7 @@ class MemOp_ri_base <string opc, bits<2> opcBits, Operand ImmOp, opc#"($base+#$offset)"#memOp#"#$delta" #!if(memOpBits{1},")", ""), // clrbit, setbit - include ')' []>, - Requires<[HasV4T, UseMEMOP]> { + Requires<[UseMEMOP]> { bits<5> base; bits<5> delta; @@ -1872,6 +2853,7 @@ class MemOp_ri_base <string opc, bits<2> opcBits, Operand ImmOp, !if (!eq(opcBits, 0b01), offset{6-1}, !if (!eq(opcBits, 0b10), offset{7-2},0))); + let opExtentAlign = opcBits; let IClass = 0b0011; let Inst{27-24} = 0b1111; let Inst{22-21} = opcBits; @@ -1884,36 +2866,35 @@ class MemOp_ri_base <string opc, bits<2> opcBits, Operand ImmOp, // multiclass to define MemOp instructions with register operand. multiclass MemOp_rr<string opc, bits<2> opcBits, Operand ImmOp> { - def _ADD#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " += ", 0b00>; // add - def _SUB#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " -= ", 0b01>; // sub - def _AND#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " &= ", 0b10>; // and - def _OR#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " |= ", 0b11>; // or + def L4_add#NAME : MemOp_rr_base <opc, opcBits, ImmOp, " += ", 0b00>; // add + def L4_sub#NAME : MemOp_rr_base <opc, opcBits, ImmOp, " -= ", 0b01>; // sub + def L4_and#NAME : MemOp_rr_base <opc, opcBits, ImmOp, " &= ", 0b10>; // and + def L4_or#NAME : MemOp_rr_base <opc, opcBits, ImmOp, " |= ", 0b11>; // or } // multiclass to define MemOp instructions with immediate Operand. multiclass MemOp_ri<string opc, bits<2> opcBits, Operand ImmOp> { - def _ADD#NAME#_V4 : MemOp_ri_base <opc, opcBits, ImmOp, " += ", 0b00 >; - def _SUB#NAME#_V4 : MemOp_ri_base <opc, opcBits, ImmOp, " -= ", 0b01 >; - def _CLRBIT#NAME#_V4 : MemOp_ri_base<opc, opcBits, ImmOp, " =clrbit(", 0b10>; - def _SETBIT#NAME#_V4 : MemOp_ri_base<opc, opcBits, ImmOp, " =setbit(", 0b11>; + def L4_iadd#NAME : MemOp_ri_base <opc, opcBits, ImmOp, " += ", 0b00 >; + def L4_isub#NAME : MemOp_ri_base <opc, opcBits, ImmOp, " -= ", 0b01 >; + def L4_iand#NAME : MemOp_ri_base<opc, opcBits, ImmOp, " = clrbit(", 0b10>; + def L4_ior#NAME : MemOp_ri_base<opc, opcBits, ImmOp, " = setbit(", 0b11>; } multiclass MemOp_base <string opc, bits<2> opcBits, Operand ImmOp> { - defm r : MemOp_rr <opc, opcBits, ImmOp>; - defm i : MemOp_ri <opc, opcBits, ImmOp>; + defm _#NAME : MemOp_rr <opc, opcBits, ImmOp>; + defm _#NAME : MemOp_ri <opc, opcBits, ImmOp>; } // Define MemOp instructions. -let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, -validSubTargets =HasV4SubT in { +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0 in { let opExtentBits = 6, accessSize = ByteAccess in - defm MemOPb : MemOp_base <"memb", 0b00, u6_0Ext>; + defm memopb_io : MemOp_base <"memb", 0b00, u6_0Ext>; let opExtentBits = 7, accessSize = HalfWordAccess in - defm MemOPh : MemOp_base <"memh", 0b01, u6_1Ext>; + defm memoph_io : MemOp_base <"memh", 0b01, u6_1Ext>; let opExtentBits = 8, accessSize = WordAccess in - defm MemOPw : MemOp_base <"memw", 0b10, u6_2Ext>; + defm memopw_io : MemOp_base <"memw", 0b10, u6_2Ext>; } //===----------------------------------------------------------------------===// @@ -1926,40 +2907,40 @@ validSubTargets =HasV4SubT in { multiclass MemOpi_u5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred, InstHexagon MI, SDNode OpNode> { let AddedComplexity = 180 in - def : Pat < (stOp (OpNode (ldOp IntRegs:$addr), u5ImmPred:$addend), - IntRegs:$addr), - (MI IntRegs:$addr, #0, u5ImmPred:$addend )>; + def: Pat<(stOp (OpNode (ldOp IntRegs:$addr), u5ImmPred:$addend), + IntRegs:$addr), + (MI IntRegs:$addr, 0, u5ImmPred:$addend)>; let AddedComplexity = 190 in - def : Pat <(stOp (OpNode (ldOp (add IntRegs:$base, ExtPred:$offset)), - u5ImmPred:$addend), - (add IntRegs:$base, ExtPred:$offset)), - (MI IntRegs:$base, ExtPred:$offset, u5ImmPred:$addend)>; + def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, ExtPred:$offset)), + u5ImmPred:$addend), + (add IntRegs:$base, ExtPred:$offset)), + (MI IntRegs:$base, ExtPred:$offset, u5ImmPred:$addend)>; } multiclass MemOpi_u5ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred, InstHexagon addMI, InstHexagon subMI> { - defm : MemOpi_u5Pats<ldOp, stOp, ExtPred, addMI, add>; - defm : MemOpi_u5Pats<ldOp, stOp, ExtPred, subMI, sub>; + defm: MemOpi_u5Pats<ldOp, stOp, ExtPred, addMI, add>; + defm: MemOpi_u5Pats<ldOp, stOp, ExtPred, subMI, sub>; } multiclass MemOpi_u5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > { // Half Word - defm : MemOpi_u5ALUOp <ldOpHalf, truncstorei16, u6_1ExtPred, - MemOPh_ADDi_V4, MemOPh_SUBi_V4>; + defm: MemOpi_u5ALUOp <ldOpHalf, truncstorei16, u6_1ExtPred, + L4_iadd_memoph_io, L4_isub_memoph_io>; // Byte - defm : MemOpi_u5ALUOp <ldOpByte, truncstorei8, u6ExtPred, - MemOPb_ADDi_V4, MemOPb_SUBi_V4>; + defm: MemOpi_u5ALUOp <ldOpByte, truncstorei8, u6ExtPred, + L4_iadd_memopb_io, L4_isub_memopb_io>; } -let Predicates = [HasV4T, UseMEMOP] in { - defm : MemOpi_u5ExtType<zextloadi8, zextloadi16>; // zero extend - defm : MemOpi_u5ExtType<sextloadi8, sextloadi16>; // sign extend - defm : MemOpi_u5ExtType<extloadi8, extloadi16>; // any extend +let Predicates = [UseMEMOP] in { + defm: MemOpi_u5ExtType<zextloadi8, zextloadi16>; // zero extend + defm: MemOpi_u5ExtType<sextloadi8, sextloadi16>; // sign extend + defm: MemOpi_u5ExtType<extloadi8, extloadi16>; // any extend // Word - defm : MemOpi_u5ALUOp <load, store, u6_2ExtPred, MemOPw_ADDi_V4, - MemOPw_SUBi_V4>; + defm: MemOpi_u5ALUOp <load, store, u6_2ExtPred, L4_iadd_memopw_io, + L4_isub_memopw_io>; } //===----------------------------------------------------------------------===// @@ -1970,37 +2951,36 @@ let Predicates = [HasV4T, UseMEMOP] in { //===----------------------------------------------------------------------===// multiclass MemOpi_m5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf extPred, - PatLeaf immPred, ComplexPattern addrPred, - SDNodeXForm xformFunc, InstHexagon MI> { + PatLeaf immPred, SDNodeXForm xformFunc, + InstHexagon MI> { let AddedComplexity = 190 in - def : Pat <(stOp (add (ldOp IntRegs:$addr), immPred:$subend), - IntRegs:$addr), - (MI IntRegs:$addr, #0, (xformFunc immPred:$subend) )>; + def: Pat<(stOp (add (ldOp IntRegs:$addr), immPred:$subend), IntRegs:$addr), + (MI IntRegs:$addr, 0, (xformFunc immPred:$subend))>; let AddedComplexity = 195 in - def : Pat<(stOp (add (ldOp (add IntRegs:$base, extPred:$offset)), - immPred:$subend), - (add IntRegs:$base, extPred:$offset)), - (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$subend))>; + def: Pat<(stOp (add (ldOp (add IntRegs:$base, extPred:$offset)), + immPred:$subend), + (add IntRegs:$base, extPred:$offset)), + (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$subend))>; } multiclass MemOpi_m5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > { // Half Word - defm : MemOpi_m5Pats <ldOpHalf, truncstorei16, u6_1ExtPred, m5HImmPred, - ADDRriU6_1, MEMOPIMM_HALF, MemOPh_SUBi_V4>; + defm: MemOpi_m5Pats <ldOpHalf, truncstorei16, u6_1ExtPred, m5HImmPred, + MEMOPIMM_HALF, L4_isub_memoph_io>; // Byte - defm : MemOpi_m5Pats <ldOpByte, truncstorei8, u6ExtPred, m5BImmPred, - ADDRriU6_0, MEMOPIMM_BYTE, MemOPb_SUBi_V4>; + defm: MemOpi_m5Pats <ldOpByte, truncstorei8, u6ExtPred, m5BImmPred, + MEMOPIMM_BYTE, L4_isub_memopb_io>; } -let Predicates = [HasV4T, UseMEMOP] in { - defm : MemOpi_m5ExtType<zextloadi8, zextloadi16>; // zero extend - defm : MemOpi_m5ExtType<sextloadi8, sextloadi16>; // sign extend - defm : MemOpi_m5ExtType<extloadi8, extloadi16>; // any extend +let Predicates = [UseMEMOP] in { + defm: MemOpi_m5ExtType<zextloadi8, zextloadi16>; // zero extend + defm: MemOpi_m5ExtType<sextloadi8, sextloadi16>; // sign extend + defm: MemOpi_m5ExtType<extloadi8, extloadi16>; // any extend // Word - defm : MemOpi_m5Pats <load, store, u6_2ExtPred, m5ImmPred, - ADDRriU6_2, MEMOPIMM, MemOPw_SUBi_V4>; + defm: MemOpi_m5Pats <load, store, u6_2ExtPred, m5ImmPred, + MEMOPIMM, L4_isub_memopw_io>; } //===----------------------------------------------------------------------===// @@ -2010,52 +2990,50 @@ let Predicates = [HasV4T, UseMEMOP] in { //===----------------------------------------------------------------------===// multiclass MemOpi_bitPats <PatFrag ldOp, PatFrag stOp, PatLeaf immPred, - PatLeaf extPred, ComplexPattern addrPred, - SDNodeXForm xformFunc, InstHexagon MI, SDNode OpNode> { + PatLeaf extPred, SDNodeXForm xformFunc, InstHexagon MI, + SDNode OpNode> { // mem[bhw](Rs+#u6:[012]) = [clrbit|setbit](#U5) let AddedComplexity = 250 in - def : Pat<(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)), - immPred:$bitend), - (add IntRegs:$base, extPred:$offset)), - (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$bitend))>; + def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)), + immPred:$bitend), + (add IntRegs:$base, extPred:$offset)), + (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$bitend))>; // mem[bhw](Rs+#0) = [clrbit|setbit](#U5) let AddedComplexity = 225 in - def : Pat <(stOp (OpNode (ldOp (addrPred IntRegs:$addr, extPred:$offset)), - immPred:$bitend), - (addrPred (i32 IntRegs:$addr), extPred:$offset)), - (MI IntRegs:$addr, extPred:$offset, (xformFunc immPred:$bitend))>; + def: Pat<(stOp (OpNode (ldOp IntRegs:$addr), immPred:$bitend), IntRegs:$addr), + (MI IntRegs:$addr, 0, (xformFunc immPred:$bitend))>; } -multiclass MemOpi_bitExtType<PatFrag ldOpByte, PatFrag ldOpHalf > { +multiclass MemOpi_bitExtType<PatFrag ldOpByte, PatFrag ldOpHalf> { // Byte - clrbit - defm : MemOpi_bitPats<ldOpByte, truncstorei8, Clr3ImmPred, u6ExtPred, - ADDRriU6_0, CLRMEMIMM_BYTE, MemOPb_CLRBITi_V4, and>; + defm: MemOpi_bitPats<ldOpByte, truncstorei8, Clr3ImmPred, u6ExtPred, + CLRMEMIMM_BYTE, L4_iand_memopb_io, and>; // Byte - setbit - defm : MemOpi_bitPats<ldOpByte, truncstorei8, Set3ImmPred, u6ExtPred, - ADDRriU6_0, SETMEMIMM_BYTE, MemOPb_SETBITi_V4, or>; + defm: MemOpi_bitPats<ldOpByte, truncstorei8, Set3ImmPred, u6ExtPred, + SETMEMIMM_BYTE, L4_ior_memopb_io, or>; // Half Word - clrbit - defm : MemOpi_bitPats<ldOpHalf, truncstorei16, Clr4ImmPred, u6_1ExtPred, - ADDRriU6_1, CLRMEMIMM_SHORT, MemOPh_CLRBITi_V4, and>; + defm: MemOpi_bitPats<ldOpHalf, truncstorei16, Clr4ImmPred, u6_1ExtPred, + CLRMEMIMM_SHORT, L4_iand_memoph_io, and>; // Half Word - setbit - defm : MemOpi_bitPats<ldOpHalf, truncstorei16, Set4ImmPred, u6_1ExtPred, - ADDRriU6_1, SETMEMIMM_SHORT, MemOPh_SETBITi_V4, or>; + defm: MemOpi_bitPats<ldOpHalf, truncstorei16, Set4ImmPred, u6_1ExtPred, + SETMEMIMM_SHORT, L4_ior_memoph_io, or>; } -let Predicates = [HasV4T, UseMEMOP] in { +let Predicates = [UseMEMOP] in { // mem[bh](Rs+#0) = [clrbit|setbit](#U5) // mem[bh](Rs+#u6:[01]) = [clrbit|setbit](#U5) - defm : MemOpi_bitExtType<zextloadi8, zextloadi16>; // zero extend - defm : MemOpi_bitExtType<sextloadi8, sextloadi16>; // sign extend - defm : MemOpi_bitExtType<extloadi8, extloadi16>; // any extend + defm: MemOpi_bitExtType<zextloadi8, zextloadi16>; // zero extend + defm: MemOpi_bitExtType<sextloadi8, sextloadi16>; // sign extend + defm: MemOpi_bitExtType<extloadi8, extloadi16>; // any extend // memw(Rs+#0) = [clrbit|setbit](#U5) // memw(Rs+#u6:2) = [clrbit|setbit](#U5) - defm : MemOpi_bitPats<load, store, Clr5ImmPred, u6_2ExtPred, ADDRriU6_2, - CLRMEMIMM, MemOPw_CLRBITi_V4, and>; - defm : MemOpi_bitPats<load, store, Set5ImmPred, u6_2ExtPred, ADDRriU6_2, - SETMEMIMM, MemOPw_SETBITi_V4, or>; + defm: MemOpi_bitPats<load, store, Clr5ImmPred, u6_2ExtPred, CLRMEMIMM, + L4_iand_memopw_io, and>; + defm: MemOpi_bitPats<load, store, Set5ImmPred, u6_2ExtPred, SETMEMIMM, + L4_ior_memopw_io, or>; } //===----------------------------------------------------------------------===// @@ -2065,54 +3043,51 @@ let Predicates = [HasV4T, UseMEMOP] in { // mem[bhw](Rs+#U6:[012]) [+-&|]= Rt //===----------------------------------------------------------------------===// -multiclass MemOpr_Pats <PatFrag ldOp, PatFrag stOp, ComplexPattern addrPred, - PatLeaf extPred, InstHexagon MI, SDNode OpNode> { +multiclass MemOpr_Pats <PatFrag ldOp, PatFrag stOp, PatLeaf extPred, + InstHexagon MI, SDNode OpNode> { let AddedComplexity = 141 in // mem[bhw](Rs+#0) [+-&|]= Rt - def : Pat <(stOp (OpNode (ldOp (addrPred IntRegs:$addr, extPred:$offset)), - (i32 IntRegs:$addend)), - (addrPred (i32 IntRegs:$addr), extPred:$offset)), - (MI IntRegs:$addr, extPred:$offset, (i32 IntRegs:$addend) )>; + def: Pat<(stOp (OpNode (ldOp IntRegs:$addr), (i32 IntRegs:$addend)), + IntRegs:$addr), + (MI IntRegs:$addr, 0, (i32 IntRegs:$addend))>; // mem[bhw](Rs+#U6:[012]) [+-&|]= Rt let AddedComplexity = 150 in - def : Pat <(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)), - (i32 IntRegs:$orend)), - (add IntRegs:$base, extPred:$offset)), - (MI IntRegs:$base, extPred:$offset, (i32 IntRegs:$orend) )>; + def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)), + (i32 IntRegs:$orend)), + (add IntRegs:$base, extPred:$offset)), + (MI IntRegs:$base, extPred:$offset, (i32 IntRegs:$orend))>; } -multiclass MemOPr_ALUOp<PatFrag ldOp, PatFrag stOp, - ComplexPattern addrPred, PatLeaf extPred, +multiclass MemOPr_ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf extPred, InstHexagon addMI, InstHexagon subMI, - InstHexagon andMI, InstHexagon orMI > { - - defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, addMI, add>; - defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, subMI, sub>; - defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, andMI, and>; - defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, orMI, or>; + InstHexagon andMI, InstHexagon orMI> { + defm: MemOpr_Pats <ldOp, stOp, extPred, addMI, add>; + defm: MemOpr_Pats <ldOp, stOp, extPred, subMI, sub>; + defm: MemOpr_Pats <ldOp, stOp, extPred, andMI, and>; + defm: MemOpr_Pats <ldOp, stOp, extPred, orMI, or>; } multiclass MemOPr_ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > { // Half Word - defm : MemOPr_ALUOp <ldOpHalf, truncstorei16, ADDRriU6_1, u6_1ExtPred, - MemOPh_ADDr_V4, MemOPh_SUBr_V4, - MemOPh_ANDr_V4, MemOPh_ORr_V4>; + defm: MemOPr_ALUOp <ldOpHalf, truncstorei16, u6_1ExtPred, + L4_add_memoph_io, L4_sub_memoph_io, + L4_and_memoph_io, L4_or_memoph_io>; // Byte - defm : MemOPr_ALUOp <ldOpByte, truncstorei8, ADDRriU6_0, u6ExtPred, - MemOPb_ADDr_V4, MemOPb_SUBr_V4, - MemOPb_ANDr_V4, MemOPb_ORr_V4>; + defm: MemOPr_ALUOp <ldOpByte, truncstorei8, u6ExtPred, + L4_add_memopb_io, L4_sub_memopb_io, + L4_and_memopb_io, L4_or_memopb_io>; } // Define 'def Pats' for MemOps with register addend. -let Predicates = [HasV4T, UseMEMOP] in { +let Predicates = [UseMEMOP] in { // Byte, Half Word - defm : MemOPr_ExtType<zextloadi8, zextloadi16>; // zero extend - defm : MemOPr_ExtType<sextloadi8, sextloadi16>; // sign extend - defm : MemOPr_ExtType<extloadi8, extloadi16>; // any extend + defm: MemOPr_ExtType<zextloadi8, zextloadi16>; // zero extend + defm: MemOPr_ExtType<sextloadi8, sextloadi16>; // sign extend + defm: MemOPr_ExtType<extloadi8, extloadi16>; // any extend // Word - defm : MemOPr_ALUOp <load, store, ADDRriU6_2, u6_2ExtPred, MemOPw_ADDr_V4, - MemOPw_SUBr_V4, MemOPw_ANDr_V4, MemOPw_ORr_V4 >; + defm: MemOPr_ALUOp <load, store, u6_2ExtPred, L4_add_memopw_io, + L4_sub_memopw_io, L4_and_memopw_io, L4_or_memopw_io>; } //===----------------------------------------------------------------------===// @@ -2130,123 +3105,28 @@ let Predicates = [HasV4T, UseMEMOP] in { // incorrect code for negative numbers. // Pd=cmpb.eq(Rs,#u8) -let isCompare = 1, isExtendable = 1, opExtendable = 2, hasSideEffects = 0, - validSubTargets = HasV4SubT in -class CMP_NOT_REG_IMM<string OpName, bits<2> op, Operand ImmOp, - list<dag> Pattern> - : ALU32Inst <(outs PredRegs:$dst), (ins IntRegs:$src1, ImmOp:$src2), - "$dst = !cmp."#OpName#"($src1, #$src2)", - Pattern, - "", ALU32_2op_tc_2early_SLOT0123> { - bits<2> dst; - bits<5> src1; - bits<10> src2; +// p=!cmp.eq(r1,#s10) +def C4_cmpneqi : T_CMP <"cmp.eq", 0b00, 1, s10Ext>; +def C4_cmpltei : T_CMP <"cmp.gt", 0b01, 1, s10Ext>; +def C4_cmplteui : T_CMP <"cmp.gtu", 0b10, 1, u9Ext>; - let IClass = 0b0111; - let Inst{27-24} = 0b0101; - let Inst{23-22} = op; - let Inst{20-16} = src1; - let Inst{21} = !if (!eq(OpName, "gtu"), 0b0, src2{9}); - let Inst{13-5} = src2{8-0}; - let Inst{4-2} = 0b100; - let Inst{1-0} = dst; -} - -let opExtentBits = 10, isExtentSigned = 1 in { -def C4_cmpneqi : CMP_NOT_REG_IMM <"eq", 0b00, s10Ext, [(set (i1 PredRegs:$dst), - (setne (i32 IntRegs:$src1), s10ExtPred:$src2))]>; - -def C4_cmpltei : CMP_NOT_REG_IMM <"gt", 0b01, s10Ext, [(set (i1 PredRegs:$dst), - (not (setgt (i32 IntRegs:$src1), s10ExtPred:$src2)))]>; - -} -let opExtentBits = 9 in -def C4_cmplteui : CMP_NOT_REG_IMM <"gtu", 0b10, u9Ext, [(set (i1 PredRegs:$dst), - (not (setugt (i32 IntRegs:$src1), u9ExtPred:$src2)))]>; - - - -// p=!cmp.eq(r1,r2) -let isCompare = 1, validSubTargets = HasV4SubT in -def CMPnotEQ_rr : ALU32_rr<(outs PredRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = !cmp.eq($src1, $src2)", - [(set (i1 PredRegs:$dst), - (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2)))]>, - Requires<[HasV4T]>; - -// p=!cmp.gt(r1,r2) -let isCompare = 1, validSubTargets = HasV4SubT in -def CMPnotGT_rr : ALU32_rr<(outs PredRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = !cmp.gt($src1, $src2)", - [(set (i1 PredRegs:$dst), - (not (setgt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>, - Requires<[HasV4T]>; - - -// p=!cmp.gtu(r1,r2) -let isCompare = 1, validSubTargets = HasV4SubT in -def CMPnotGTU_rr : ALU32_rr<(outs PredRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = !cmp.gtu($src1, $src2)", - [(set (i1 PredRegs:$dst), - (not (setugt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>, - Requires<[HasV4T]>; - -let isCompare = 1, validSubTargets = HasV4SubT in -def CMPbEQri_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, u8Imm:$src2), - "$dst = cmpb.eq($src1, #$src2)", - [(set (i1 PredRegs:$dst), - (seteq (and (i32 IntRegs:$src1), 255), u8ImmPred:$src2))]>, - Requires<[HasV4T]>; - -def : Pat <(brcond (i1 (setne (and (i32 IntRegs:$src1), 255), u8ImmPred:$src2)), - bb:$offset), - (JMP_f (CMPbEQri_V4 (i32 IntRegs:$src1), u8ImmPred:$src2), - bb:$offset)>, - Requires<[HasV4T]>; - -// Pd=cmpb.eq(Rs,Rt) -let isCompare = 1, validSubTargets = HasV4SubT in -def CMPbEQrr_ubub_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = cmpb.eq($src1, $src2)", - [(set (i1 PredRegs:$dst), - (seteq (and (xor (i32 IntRegs:$src1), - (i32 IntRegs:$src2)), 255), 0))]>, - Requires<[HasV4T]>; - -// Pd=cmpb.eq(Rs,Rt) -let isCompare = 1, validSubTargets = HasV4SubT in -def CMPbEQrr_sbsb_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = cmpb.eq($src1, $src2)", - [(set (i1 PredRegs:$dst), - (seteq (shl (i32 IntRegs:$src1), (i32 24)), - (shl (i32 IntRegs:$src2), (i32 24))))]>, - Requires<[HasV4T]>; - -// Pd=cmpb.gt(Rs,Rt) -let isCompare = 1, validSubTargets = HasV4SubT in -def CMPbGTrr_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = cmpb.gt($src1, $src2)", - [(set (i1 PredRegs:$dst), - (setgt (shl (i32 IntRegs:$src1), (i32 24)), - (shl (i32 IntRegs:$src2), (i32 24))))]>, - Requires<[HasV4T]>; - -// Pd=cmpb.gtu(Rs,#u7) -let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 7, -isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPbGTU", InputType = "imm" in -def CMPbGTUri_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, u7Ext:$src2), - "$dst = cmpb.gtu($src1, #$src2)", - [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 255), - u7ExtPred:$src2))]>, - Requires<[HasV4T]>, ImmRegRel; +def : T_CMP_pat <C4_cmpneqi, setne, s10ExtPred>; +def : T_CMP_pat <C4_cmpltei, setle, s10ExtPred>; +def : T_CMP_pat <C4_cmplteui, setule, u9ImmPred>; + +// rs <= rt -> !(rs > rt). +/* +def: Pat<(i1 (setle (i32 IntRegs:$src1), s10ExtPred:$src2)), + (C2_not (C2_cmpgti IntRegs:$src1, s10ExtPred:$src2))>; +// (C4_cmpltei IntRegs:$src1, s10ExtPred:$src2)>; +*/ +// Map cmplt(Rs, Imm) -> !cmpgt(Rs, Imm-1). +def: Pat<(i1 (setlt (i32 IntRegs:$src1), s8ExtPred:$src2)), + (C4_cmpltei IntRegs:$src1, (DEC_CONST_SIGNED s8ExtPred:$src2))>; + +// rs != rt -> !(rs == rt). +def: Pat<(i1 (setne (i32 IntRegs:$src1), s10ExtPred:$src2)), + (C4_cmpneqi IntRegs:$src1, s10ExtPred:$src2)>; // SDNode for converting immediate C to C-1. def DEC_CONST_BYTE : SDNodeXForm<imm, [{ @@ -2263,10 +3143,9 @@ def DEC_CONST_BYTE : SDNodeXForm<imm, [{ // if (!Pd.new) Rd=#0 def : Pat <(i32 (zext (i1 (seteq (i32 (and (i32 IntRegs:$Rs), 255)), u8ExtPred:$u8)))), - (i32 (TFR_condset_ii (i1 (CMPbEQri_V4 (i32 IntRegs:$Rs), + (i32 (TFR_condset_ii (i1 (A4_cmpbeqi (i32 IntRegs:$Rs), (u8ExtPred:$u8))), - 1, 0))>, - Requires<[HasV4T]>; + 1, 0))>; // For the sequence // zext( setne ( and(Rs, 255), u8)) @@ -2276,10 +3155,9 @@ def : Pat <(i32 (zext (i1 (seteq (i32 (and (i32 IntRegs:$Rs), 255)), // if (!Pd.new) Rd=#1 def : Pat <(i32 (zext (i1 (setne (i32 (and (i32 IntRegs:$Rs), 255)), u8ExtPred:$u8)))), - (i32 (TFR_condset_ii (i1 (CMPbEQri_V4 (i32 IntRegs:$Rs), + (i32 (TFR_condset_ii (i1 (A4_cmpbeqi (i32 IntRegs:$Rs), (u8ExtPred:$u8))), - 0, 1))>, - Requires<[HasV4T]>; + 0, 1))>; // For the sequence // zext( seteq (Rs, and(Rt, 255))) @@ -2289,10 +3167,9 @@ def : Pat <(i32 (zext (i1 (setne (i32 (and (i32 IntRegs:$Rs), 255)), // if (!Pd.new) Rd=#0 def : Pat <(i32 (zext (i1 (seteq (i32 IntRegs:$Rt), (i32 (and (i32 IntRegs:$Rs), 255)))))), - (i32 (TFR_condset_ii (i1 (CMPbEQrr_ubub_V4 (i32 IntRegs:$Rs), + (i32 (TFR_condset_ii (i1 (A4_cmpbeq (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), - 1, 0))>, - Requires<[HasV4T]>; + 1, 0))>; // For the sequence // zext( setne (Rs, and(Rt, 255))) @@ -2302,10 +3179,9 @@ def : Pat <(i32 (zext (i1 (seteq (i32 IntRegs:$Rt), // if (!Pd.new) Rd=#1 def : Pat <(i32 (zext (i1 (setne (i32 IntRegs:$Rt), (i32 (and (i32 IntRegs:$Rs), 255)))))), - (i32 (TFR_condset_ii (i1 (CMPbEQrr_ubub_V4 (i32 IntRegs:$Rs), + (i32 (TFR_condset_ii (i1 (A4_cmpbeq (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), - 0, 1))>, - Requires<[HasV4T]>; + 0, 1))>; // For the sequence // zext( setugt ( and(Rs, 255), u8)) @@ -2315,10 +3191,9 @@ def : Pat <(i32 (zext (i1 (setne (i32 IntRegs:$Rt), // if (!Pd.new) Rd=#0 def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 255)), u8ExtPred:$u8)))), - (i32 (TFR_condset_ii (i1 (CMPbGTUri_V4 (i32 IntRegs:$Rs), + (i32 (TFR_condset_ii (i1 (A4_cmpbgtui (i32 IntRegs:$Rs), (u8ExtPred:$u8))), - 1, 0))>, - Requires<[HasV4T]>; + 1, 0))>; // For the sequence // zext( setugt ( and(Rs, 254), u8)) @@ -2328,10 +3203,9 @@ def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 255)), // if (!Pd.new) Rd=#0 def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 254)), u8ExtPred:$u8)))), - (i32 (TFR_condset_ii (i1 (CMPbGTUri_V4 (i32 IntRegs:$Rs), + (i32 (TFR_condset_ii (i1 (A4_cmpbgtui (i32 IntRegs:$Rs), (u8ExtPred:$u8))), - 1, 0))>, - Requires<[HasV4T]>; + 1, 0))>; // For the sequence // zext( setult ( Rs, Rt)) @@ -2341,10 +3215,9 @@ def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 254)), // if (!Pd.new) Rd=#0 // cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs) def : Pat <(i32 (zext (i1 (setult (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), - (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rt), + (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rt), (i32 IntRegs:$Rs))), - 1, 0))>, - Requires<[HasV4T]>; + 1, 0))>; // For the sequence // zext( setlt ( Rs, Rt)) @@ -2354,10 +3227,9 @@ def : Pat <(i32 (zext (i1 (setult (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), // if (!Pd.new) Rd=#0 // cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs) def : Pat <(i32 (zext (i1 (setlt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), - (i32 (TFR_condset_ii (i1 (CMPGTrr (i32 IntRegs:$Rt), + (i32 (TFR_condset_ii (i1 (C2_cmpgt (i32 IntRegs:$Rt), (i32 IntRegs:$Rs))), - 1, 0))>, - Requires<[HasV4T]>; + 1, 0))>; // For the sequence // zext( setugt ( Rs, Rt)) @@ -2366,10 +3238,9 @@ def : Pat <(i32 (zext (i1 (setlt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), // if (Pd.new) Rd=#1 // if (!Pd.new) Rd=#0 def : Pat <(i32 (zext (i1 (setugt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), - (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rs), + (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), - 1, 0))>, - Requires<[HasV4T]>; + 1, 0))>; // This pattern interefers with coremark performance, not implementing at this // time. @@ -2388,10 +3259,9 @@ def : Pat <(i32 (zext (i1 (setugt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), // if (!Pd.new) Rd=#1 // cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs) def : Pat <(i32 (zext (i1 (setuge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), - (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rt), + (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rt), (i32 IntRegs:$Rs))), - 0, 1))>, - Requires<[HasV4T]>; + 0, 1))>; // For the sequence // zext( setge ( Rs, Rt)) @@ -2401,10 +3271,9 @@ def : Pat <(i32 (zext (i1 (setuge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), // if (!Pd.new) Rd=#1 // cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs) def : Pat <(i32 (zext (i1 (setge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), - (i32 (TFR_condset_ii (i1 (CMPGTrr (i32 IntRegs:$Rt), + (i32 (TFR_condset_ii (i1 (C2_cmpgt (i32 IntRegs:$Rt), (i32 IntRegs:$Rs))), - 0, 1))>, - Requires<[HasV4T]>; + 0, 1))>; // For the sequence // zext( setule ( Rs, Rt)) @@ -2413,10 +3282,9 @@ def : Pat <(i32 (zext (i1 (setge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), // if (Pd.new) Rd=#0 // if (!Pd.new) Rd=#1 def : Pat <(i32 (zext (i1 (setule (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), - (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rs), + (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), - 0, 1))>, - Requires<[HasV4T]>; + 0, 1))>; // For the sequence // zext( setle ( Rs, Rt)) @@ -2425,16 +3293,15 @@ def : Pat <(i32 (zext (i1 (setule (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), // if (Pd.new) Rd=#0 // if (!Pd.new) Rd=#1 def : Pat <(i32 (zext (i1 (setle (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), - (i32 (TFR_condset_ii (i1 (CMPGTrr (i32 IntRegs:$Rs), + (i32 (TFR_condset_ii (i1 (C2_cmpgt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), - 0, 1))>, - Requires<[HasV4T]>; + 0, 1))>; // For the sequence // zext( setult ( and(Rs, 255), u8)) // Use the isdigit transformation below -// Generate code of the form 'mux_ii(cmpbgtu(Rdd, C-1),0,1)' +// Generate code of the form 'C2_muxii(cmpbgtui(Rdd, C-1),0,1)' // for C code of the form r = ((c>='0') & (c<='9')) ? 1 : 0;. // The isdigit transformation relies on two 'clever' aspects: // 1) The data type is unsigned which allows us to eliminate a zero test after @@ -2447,961 +3314,1044 @@ def : Pat <(i32 (zext (i1 (setle (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), // The code is transformed upstream of llvm into // retval = (c-48) < 10 ? 1 : 0; let AddedComplexity = 139 in -def : Pat <(i32 (zext (i1 (setult (i32 (and (i32 IntRegs:$src1), 255)), - u7StrictPosImmPred:$src2)))), - (i32 (MUX_ii (i1 (CMPbGTUri_V4 (i32 IntRegs:$src1), - (DEC_CONST_BYTE u7StrictPosImmPred:$src2))), - 0, 1))>, - Requires<[HasV4T]>; - -// Pd=cmpb.gtu(Rs,Rt) -let isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPbGTU", -InputType = "reg" in -def CMPbGTUrr_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = cmpb.gtu($src1, $src2)", - [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 255), - (and (i32 IntRegs:$src2), 255)))]>, - Requires<[HasV4T]>, ImmRegRel; - -// Following instruction is not being extended as it results into the incorrect -// code for negative numbers. - -// Signed half compare(.eq) ri. -// Pd=cmph.eq(Rs,#s8) -let isCompare = 1, validSubTargets = HasV4SubT in -def CMPhEQri_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, s8Imm:$src2), - "$dst = cmph.eq($src1, #$src2)", - [(set (i1 PredRegs:$dst), (seteq (and (i32 IntRegs:$src1), 65535), - s8ImmPred:$src2))]>, - Requires<[HasV4T]>; - -// Signed half compare(.eq) rr. -// Case 1: xor + and, then compare: -// r0=xor(r0,r1) -// r0=and(r0,#0xffff) -// p0=cmp.eq(r0,#0) -// Pd=cmph.eq(Rs,Rt) -let isCompare = 1, validSubTargets = HasV4SubT in -def CMPhEQrr_xor_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = cmph.eq($src1, $src2)", - [(set (i1 PredRegs:$dst), (seteq (and (xor (i32 IntRegs:$src1), - (i32 IntRegs:$src2)), - 65535), 0))]>, - Requires<[HasV4T]>; - -// Signed half compare(.eq) rr. -// Case 2: shift left 16 bits then compare: -// r0=asl(r0,16) -// r1=asl(r1,16) -// p0=cmp.eq(r0,r1) -// Pd=cmph.eq(Rs,Rt) -let isCompare = 1, validSubTargets = HasV4SubT in -def CMPhEQrr_shl_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = cmph.eq($src1, $src2)", - [(set (i1 PredRegs:$dst), - (seteq (shl (i32 IntRegs:$src1), (i32 16)), - (shl (i32 IntRegs:$src2), (i32 16))))]>, - Requires<[HasV4T]>; - -/* Incorrect Pattern -- immediate should be right shifted before being -used in the cmph.gt instruction. -// Signed half compare(.gt) ri. -// Pd=cmph.gt(Rs,#s8) - -let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8, -isCompare = 1, validSubTargets = HasV4SubT in -def CMPhGTri_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, s8Ext:$src2), - "$dst = cmph.gt($src1, #$src2)", - [(set (i1 PredRegs:$dst), - (setgt (shl (i32 IntRegs:$src1), (i32 16)), - s8ExtPred:$src2))]>, - Requires<[HasV4T]>; -*/ - -// Signed half compare(.gt) rr. -// Pd=cmph.gt(Rs,Rt) -let isCompare = 1, validSubTargets = HasV4SubT in -def CMPhGTrr_shl_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = cmph.gt($src1, $src2)", - [(set (i1 PredRegs:$dst), - (setgt (shl (i32 IntRegs:$src1), (i32 16)), - (shl (i32 IntRegs:$src2), (i32 16))))]>, - Requires<[HasV4T]>; - -// Unsigned half compare rr (.gtu). -// Pd=cmph.gtu(Rs,Rt) -let isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPhGTU", -InputType = "reg" in -def CMPhGTUrr_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = cmph.gtu($src1, $src2)", - [(set (i1 PredRegs:$dst), - (setugt (and (i32 IntRegs:$src1), 65535), - (and (i32 IntRegs:$src2), 65535)))]>, - Requires<[HasV4T]>, ImmRegRel; - -// Unsigned half compare ri (.gtu). -// Pd=cmph.gtu(Rs,#u7) -let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 7, -isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPhGTU", -InputType = "imm" in -def CMPhGTUri_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, u7Ext:$src2), - "$dst = cmph.gtu($src1, #$src2)", - [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 65535), - u7ExtPred:$src2))]>, - Requires<[HasV4T]>, ImmRegRel; - -let validSubTargets = HasV4SubT in -def NTSTBIT_rr : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - "$dst = !tstbit($src1, $src2)", - [(set (i1 PredRegs:$dst), - (seteq (and (shl 1, (i32 IntRegs:$src2)), (i32 IntRegs:$src1)), 0))]>, - Requires<[HasV4T]>; - -let validSubTargets = HasV4SubT in -def NTSTBIT_ri : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), - "$dst = !tstbit($src1, $src2)", - [(set (i1 PredRegs:$dst), - (seteq (and (shl 1, u5ImmPred:$src2), (i32 IntRegs:$src1)), 0))]>, - Requires<[HasV4T]>; +def: Pat<(i32 (zext (i1 (setult (i32 (and (i32 IntRegs:$src1), 255)), + u7StrictPosImmPred:$src2)))), + (C2_muxii (A4_cmpbgtui IntRegs:$src1, + (DEC_CONST_BYTE u7StrictPosImmPred:$src2)), + 0, 1)>; //===----------------------------------------------------------------------===// // XTYPE/PRED - //===----------------------------------------------------------------------===// -//Deallocate frame and return. -// dealloc_return -let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicable = 1, - Defs = [R29, R30, R31, PC], Uses = [R30], neverHasSideEffects = 1 in { -let validSubTargets = HasV4SubT in - def DEALLOC_RET_V4 : LD0Inst<(outs), (ins), - "dealloc_return", - []>, - Requires<[HasV4T]>; +//===----------------------------------------------------------------------===// +// Multiclass for DeallocReturn +//===----------------------------------------------------------------------===// +class L4_RETURN<string mnemonic, bit isNot, bit isPredNew, bit isTak> + : LD0Inst<(outs), (ins PredRegs:$src), + !if(isNot, "if (!$src", "if ($src")# + !if(isPredNew, ".new) ", ") ")#mnemonic# + !if(isPredNew, #!if(isTak,":t", ":nt"),""), + [], "", LD_tc_3or4stall_SLOT0> { + + bits<2> src; + let BaseOpcode = "L4_RETURN"; + let isPredicatedFalse = isNot; + let isPredicatedNew = isPredNew; + let isTaken = isTak; + let IClass = 0b1001; + + let Inst{27-16} = 0b011000011110; + + let Inst{13} = isNot; + let Inst{12} = isTak; + let Inst{11} = isPredNew; + let Inst{10} = 0b0; + let Inst{9-8} = src; + let Inst{4-0} = 0b11110; + } + +// Produce all predicated forms, p, !p, p.new, !p.new, :t, :nt +multiclass L4_RETURN_PRED<string mnemonic, bit PredNot> { + let isPredicated = 1 in { + def _#NAME# : L4_RETURN <mnemonic, PredNot, 0, 1>; + def _#NAME#new_pnt : L4_RETURN <mnemonic, PredNot, 1, 0>; + def _#NAME#new_pt : L4_RETURN <mnemonic, PredNot, 1, 1>; + } } +multiclass LD_MISC_L4_RETURN<string mnemonic> { + let isBarrier = 1, isPredicable = 1 in + def NAME : LD0Inst <(outs), (ins), mnemonic, [], "", + LD_tc_3or4stall_SLOT0> { + let BaseOpcode = "L4_RETURN"; + let IClass = 0b1001; + let Inst{27-16} = 0b011000011110; + let Inst{13-10} = 0b0000; + let Inst{4-0} = 0b11110; + } + defm t : L4_RETURN_PRED<mnemonic, 0 >; + defm f : L4_RETURN_PRED<mnemonic, 1 >; +} + +let isReturn = 1, isTerminator = 1, + Defs = [R29, R30, R31, PC], Uses = [R30], hasSideEffects = 0 in +defm L4_return: LD_MISC_L4_RETURN <"dealloc_return">, PredNewRel; + // Restore registers and dealloc return function call. let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1, - Defs = [R29, R30, R31, PC] in { -let validSubTargets = HasV4SubT in + Defs = [R29, R30, R31, PC], isPredicable = 0, isAsmParserOnly = 1 in { def RESTORE_DEALLOC_RET_JMP_V4 : JInst<(outs), (ins calltarget:$dst), "jump $dst", - []>, - Requires<[HasV4T]>; + []>; } // Restore registers and dealloc frame before a tail call. -let isCall = 1, isBarrier = 1, - Defs = [R29, R30, R31, PC] in { -let validSubTargets = HasV4SubT in +let isCall = 1, Defs = [R29, R30, R31, PC], isAsmParserOnly = 1 in { def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : JInst<(outs), (ins calltarget:$dst), "call $dst", - []>, - Requires<[HasV4T]>; + []>; } // Save registers function call. -let isCall = 1, isBarrier = 1, - Uses = [R29, R31] in { +let isCall = 1, Uses = [R29, R31], isAsmParserOnly = 1 in { def SAVE_REGISTERS_CALL_V4 : JInst<(outs), (ins calltarget:$dst), "call $dst // Save_calle_saved_registers", - []>, - Requires<[HasV4T]>; + []>; } -// if (Ps) dealloc_return -let isReturn = 1, isTerminator = 1, - Defs = [R29, R30, R31, PC], Uses = [R30], neverHasSideEffects = 1, - isPredicated = 1 in { -let validSubTargets = HasV4SubT in - def DEALLOC_RET_cPt_V4 : LD0Inst<(outs), - (ins PredRegs:$src1), - "if ($src1) dealloc_return", - []>, - Requires<[HasV4T]>; -} - -// if (!Ps) dealloc_return -let isReturn = 1, isTerminator = 1, - Defs = [R29, R30, R31, PC], Uses = [R30], neverHasSideEffects = 1, - isPredicated = 1, isPredicatedFalse = 1 in { -let validSubTargets = HasV4SubT in - def DEALLOC_RET_cNotPt_V4 : LD0Inst<(outs), (ins PredRegs:$src1), - "if (!$src1) dealloc_return", - []>, - Requires<[HasV4T]>; -} - -// if (Ps.new) dealloc_return:nt -let isReturn = 1, isTerminator = 1, - Defs = [R29, R30, R31, PC], Uses = [R30], neverHasSideEffects = 1, - isPredicated = 1 in { -let validSubTargets = HasV4SubT in - def DEALLOC_RET_cdnPnt_V4 : LD0Inst<(outs), (ins PredRegs:$src1), - "if ($src1.new) dealloc_return:nt", - []>, - Requires<[HasV4T]>; -} +//===----------------------------------------------------------------------===// +// Template class for non predicated store instructions with +// GP-Relative or absolute addressing. +//===----------------------------------------------------------------------===// +let hasSideEffects = 0, isPredicable = 1, isNVStorable = 1 in +class T_StoreAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp, + bits<2>MajOp, Operand AddrOp, bit isAbs, bit isHalf> + : STInst<(outs), (ins AddrOp:$addr, RC:$src), + mnemonic # !if(isAbs, "(##", "(#")#"$addr) = $src"#!if(isHalf, ".h",""), + [], "", V2LDST_tc_st_SLOT01> { + bits<19> addr; + bits<5> src; + bits<16> offsetBits; + + string ImmOpStr = !cast<string>(ImmOp); + let offsetBits = !if (!eq(ImmOpStr, "u16_3Imm"), addr{18-3}, + !if (!eq(ImmOpStr, "u16_2Imm"), addr{17-2}, + !if (!eq(ImmOpStr, "u16_1Imm"), addr{16-1}, + /* u16_0Imm */ addr{15-0}))); + let IClass = 0b0100; + let Inst{27} = 1; + let Inst{26-25} = offsetBits{15-14}; + let Inst{24} = 0b0; + let Inst{23-22} = MajOp; + let Inst{21} = isHalf; + let Inst{20-16} = offsetBits{13-9}; + let Inst{13} = offsetBits{8}; + let Inst{12-8} = src; + let Inst{7-0} = offsetBits{7-0}; + } -// if (!Ps.new) dealloc_return:nt -let isReturn = 1, isTerminator = 1, - Defs = [R29, R30, R31, PC], Uses = [R30], neverHasSideEffects = 1, - isPredicated = 1, isPredicatedFalse = 1 in { -let validSubTargets = HasV4SubT in - def DEALLOC_RET_cNotdnPnt_V4 : LD0Inst<(outs), (ins PredRegs:$src1), - "if (!$src1.new) dealloc_return:nt", - []>, - Requires<[HasV4T]>; -} +//===----------------------------------------------------------------------===// +// Template class for predicated store instructions with +// GP-Relative or absolute addressing. +//===----------------------------------------------------------------------===// +let hasSideEffects = 0, isPredicated = 1, isNVStorable = 1, opExtentBits = 6, + opExtendable = 1 in +class T_StoreAbs_Pred <string mnemonic, RegisterClass RC, bits<2> MajOp, + bit isHalf, bit isNot, bit isNew> + : STInst<(outs), (ins PredRegs:$src1, u6Ext:$absaddr, RC: $src2), + !if(isNot, "if (!$src1", "if ($src1")#!if(isNew, ".new) ", + ") ")#mnemonic#"(#$absaddr) = $src2"#!if(isHalf, ".h",""), + [], "", ST_tc_st_SLOT01>, AddrModeRel { + bits<2> src1; + bits<6> absaddr; + bits<5> src2; + + let isPredicatedNew = isNew; + let isPredicatedFalse = isNot; + + let IClass = 0b1010; -// if (Ps.new) dealloc_return:t -let isReturn = 1, isTerminator = 1, - Defs = [R29, R30, R31, PC], Uses = [R30], neverHasSideEffects = 1, - isPredicated = 1 in { -let validSubTargets = HasV4SubT in - def DEALLOC_RET_cdnPt_V4 : LD0Inst<(outs), (ins PredRegs:$src1), - "if ($src1.new) dealloc_return:t", - []>, - Requires<[HasV4T]>; -} + let Inst{27-24} = 0b1111; + let Inst{23-22} = MajOp; + let Inst{21} = isHalf; + let Inst{17-16} = absaddr{5-4}; + let Inst{13} = isNew; + let Inst{12-8} = src2; + let Inst{7} = 0b1; + let Inst{6-3} = absaddr{3-0}; + let Inst{2} = isNot; + let Inst{1-0} = src1; + } -// if (!Ps.new) dealloc_return:nt -let isReturn = 1, isTerminator = 1, - Defs = [R29, R30, R31, PC], Uses = [R30], neverHasSideEffects = 1, - isPredicated = 1, isPredicatedFalse = 1 in { -let validSubTargets = HasV4SubT in - def DEALLOC_RET_cNotdnPt_V4 : LD0Inst<(outs), (ins PredRegs:$src1), - "if (!$src1.new) dealloc_return:t", - []>, - Requires<[HasV4T]>; +//===----------------------------------------------------------------------===// +// Template class for predicated store instructions with absolute addressing. +//===----------------------------------------------------------------------===// +class T_StoreAbs <string mnemonic, RegisterClass RC, Operand ImmOp, + bits<2> MajOp, bit isHalf> + : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, u0AlwaysExt, 1, isHalf>, + AddrModeRel { + string ImmOpStr = !cast<string>(ImmOp); + let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19, + !if (!eq(ImmOpStr, "u16_2Imm"), 18, + !if (!eq(ImmOpStr, "u16_1Imm"), 17, + /* u16_0Imm */ 16))); + + let opExtentAlign = !if (!eq(ImmOpStr, "u16_3Imm"), 3, + !if (!eq(ImmOpStr, "u16_2Imm"), 2, + !if (!eq(ImmOpStr, "u16_1Imm"), 1, + /* u16_0Imm */ 0))); } -// Load/Store with absolute addressing mode -// memw(#u6)=Rt +//===----------------------------------------------------------------------===// +// Multiclass for store instructions with absolute addressing. +//===----------------------------------------------------------------------===// +let addrMode = Absolute, isExtended = 1 in +multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC, + Operand ImmOp, bits<2> MajOp, bit isHalf = 0> { + let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in { + let opExtendable = 0, isPredicable = 1 in + def S2_#NAME#abs : T_StoreAbs <mnemonic, RC, ImmOp, MajOp, isHalf>; -multiclass ST_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot, - bit isPredNew> { - let isPredicatedNew = isPredNew in - def NAME#_V4 : STInst2<(outs), - (ins PredRegs:$src1, u0AlwaysExt:$absaddr, RC: $src2), - !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", - ") ")#mnemonic#"(##$absaddr) = $src2", - []>, - Requires<[HasV4T]>; -} + // Predicated + def S4_p#NAME#t_abs : T_StoreAbs_Pred<mnemonic, RC, MajOp, isHalf, 0, 0>; + def S4_p#NAME#f_abs : T_StoreAbs_Pred<mnemonic, RC, MajOp, isHalf, 1, 0>; -multiclass ST_Abs_Pred<string mnemonic, RegisterClass RC, bit PredNot> { - let isPredicatedFalse = PredNot in { - defm _c#NAME : ST_Abs_Predbase<mnemonic, RC, PredNot, 0>; - // Predicate new - defm _cdn#NAME : ST_Abs_Predbase<mnemonic, RC, PredNot, 1>; + // .new Predicated + def S4_p#NAME#tnew_abs : T_StoreAbs_Pred<mnemonic, RC, MajOp, isHalf, 0, 1>; + def S4_p#NAME#fnew_abs : T_StoreAbs_Pred<mnemonic, RC, MajOp, isHalf, 1, 1>; } } -let isNVStorable = 1, isExtended = 1, neverHasSideEffects = 1 in -multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC> { - let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in { - let opExtendable = 0, isPredicable = 1 in - def NAME#_V4 : STInst2<(outs), - (ins u0AlwaysExt:$absaddr, RC:$src), - mnemonic#"(##$absaddr) = $src", - []>, - Requires<[HasV4T]>; - - let opExtendable = 1, isPredicated = 1 in { - defm Pt : ST_Abs_Pred<mnemonic, RC, 0>; - defm NotPt : ST_Abs_Pred<mnemonic, RC, 1>; - } +//===----------------------------------------------------------------------===// +// Template class for non predicated new-value store instructions with +// GP-Relative or absolute addressing. +//===----------------------------------------------------------------------===// +let hasSideEffects = 0, isPredicable = 1, mayStore = 1, isNVStore = 1, + isNewValue = 1, opNewValue = 1 in +class T_StoreAbsGP_NV <string mnemonic, Operand ImmOp, bits<2>MajOp, bit isAbs> + : NVInst_V4<(outs), (ins u0AlwaysExt:$addr, IntRegs:$src), + mnemonic # !if(isAbs, "(##", "(#")#"$addr) = $src.new", + [], "", V2LDST_tc_st_SLOT0> { + bits<19> addr; + bits<3> src; + bits<16> offsetBits; + + string ImmOpStr = !cast<string>(ImmOp); + let offsetBits = !if (!eq(ImmOpStr, "u16_3Imm"), addr{18-3}, + !if (!eq(ImmOpStr, "u16_2Imm"), addr{17-2}, + !if (!eq(ImmOpStr, "u16_1Imm"), addr{16-1}, + /* u16_0Imm */ addr{15-0}))); + let IClass = 0b0100; + + let Inst{27} = 1; + let Inst{26-25} = offsetBits{15-14}; + let Inst{24-21} = 0b0101; + let Inst{20-16} = offsetBits{13-9}; + let Inst{13} = offsetBits{8}; + let Inst{12-11} = MajOp; + let Inst{10-8} = src; + let Inst{7-0} = offsetBits{7-0}; } -} -multiclass ST_Abs_Predbase_nv<string mnemonic, RegisterClass RC, bit isNot, - bit isPredNew> { - let isPredicatedNew = isPredNew in - def NAME#_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, u0AlwaysExt:$absaddr, RC: $src2), - !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", - ") ")#mnemonic#"(##$absaddr) = $src2.new", - []>, - Requires<[HasV4T]>; +//===----------------------------------------------------------------------===// +// Template class for predicated new-value store instructions with +// absolute addressing. +//===----------------------------------------------------------------------===// +let hasSideEffects = 0, isPredicated = 1, mayStore = 1, isNVStore = 1, + isNewValue = 1, opNewValue = 2, opExtentBits = 6, opExtendable = 1 in +class T_StoreAbs_NV_Pred <string mnemonic, bits<2> MajOp, bit isNot, bit isNew> + : NVInst_V4<(outs), (ins PredRegs:$src1, u6Ext:$absaddr, IntRegs:$src2), + !if(isNot, "if (!$src1", "if ($src1")#!if(isNew, ".new) ", + ") ")#mnemonic#"(#$absaddr) = $src2.new", + [], "", ST_tc_st_SLOT0>, AddrModeRel { + bits<2> src1; + bits<6> absaddr; + bits<3> src2; + + let isPredicatedNew = isNew; + let isPredicatedFalse = isNot; + + let IClass = 0b1010; + + let Inst{27-24} = 0b1111; + let Inst{23-21} = 0b101; + let Inst{17-16} = absaddr{5-4}; + let Inst{13} = isNew; + let Inst{12-11} = MajOp; + let Inst{10-8} = src2; + let Inst{7} = 0b1; + let Inst{6-3} = absaddr{3-0}; + let Inst{2} = isNot; + let Inst{1-0} = src1; } -multiclass ST_Abs_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> { - let isPredicatedFalse = PredNot in { - defm _c#NAME : ST_Abs_Predbase_nv<mnemonic, RC, PredNot, 0>; - // Predicate new - defm _cdn#NAME : ST_Abs_Predbase_nv<mnemonic, RC, PredNot, 1>; - } +//===----------------------------------------------------------------------===// +// Template class for non-predicated new-value store instructions with +// absolute addressing. +//===----------------------------------------------------------------------===// +class T_StoreAbs_NV <string mnemonic, Operand ImmOp, bits<2> MajOp> + : T_StoreAbsGP_NV <mnemonic, ImmOp, MajOp, 1>, AddrModeRel { + + string ImmOpStr = !cast<string>(ImmOp); + let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19, + !if (!eq(ImmOpStr, "u16_2Imm"), 18, + !if (!eq(ImmOpStr, "u16_1Imm"), 17, + /* u16_0Imm */ 16))); + + let opExtentAlign = !if (!eq(ImmOpStr, "u16_3Imm"), 3, + !if (!eq(ImmOpStr, "u16_2Imm"), 2, + !if (!eq(ImmOpStr, "u16_1Imm"), 1, + /* u16_0Imm */ 0))); } -let mayStore = 1, isNVStore = 1, isExtended = 1, neverHasSideEffects = 1 in -multiclass ST_Abs_nv<string mnemonic, string CextOp, RegisterClass RC> { +//===----------------------------------------------------------------------===// +// Multiclass for new-value store instructions with absolute addressing. +//===----------------------------------------------------------------------===// +let addrMode = Absolute, isExtended = 1 in +multiclass ST_Abs_NV <string mnemonic, string CextOp, Operand ImmOp, + bits<2> MajOp> { let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in { let opExtendable = 0, isPredicable = 1 in - def NAME#_nv_V4 : NVInst_V4<(outs), - (ins u0AlwaysExt:$absaddr, RC:$src), - mnemonic#"(##$absaddr) = $src.new", - []>, - Requires<[HasV4T]>; - - let opExtendable = 1, isPredicated = 1 in { - defm Pt : ST_Abs_Pred_nv<mnemonic, RC, 0>; - defm NotPt : ST_Abs_Pred_nv<mnemonic, RC, 1>; - } - } -} + def S2_#NAME#newabs : T_StoreAbs_NV <mnemonic, ImmOp, MajOp>; -let addrMode = Absolute in { - let accessSize = ByteAccess in - defm STrib_abs : ST_Abs<"memb", "STrib", IntRegs>, - ST_Abs_nv<"memb", "STrib", IntRegs>, AddrModeRel; + // Predicated + def S4_p#NAME#newt_abs : T_StoreAbs_NV_Pred <mnemonic, MajOp, 0, 0>; + def S4_p#NAME#newf_abs : T_StoreAbs_NV_Pred <mnemonic, MajOp, 1, 0>; - let accessSize = HalfWordAccess in - defm STrih_abs : ST_Abs<"memh", "STrih", IntRegs>, - ST_Abs_nv<"memh", "STrih", IntRegs>, AddrModeRel; - - let accessSize = WordAccess in - defm STriw_abs : ST_Abs<"memw", "STriw", IntRegs>, - ST_Abs_nv<"memw", "STriw", IntRegs>, AddrModeRel; - - let accessSize = DoubleWordAccess, isNVStorable = 0 in - defm STrid_abs : ST_Abs<"memd", "STrid", DoubleRegs>, AddrModeRel; + // .new Predicated + def S4_p#NAME#newtnew_abs : T_StoreAbs_NV_Pred <mnemonic, MajOp, 0, 1>; + def S4_p#NAME#newfnew_abs : T_StoreAbs_NV_Pred <mnemonic, MajOp, 1, 1>; + } } -let Predicates = [HasV4T], AddedComplexity = 30 in { -def : Pat<(truncstorei8 (i32 IntRegs:$src1), - (HexagonCONST32 tglobaladdr:$absaddr)), - (STrib_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>; +//===----------------------------------------------------------------------===// +// Stores with absolute addressing +//===----------------------------------------------------------------------===// +let accessSize = ByteAccess in +defm storerb : ST_Abs <"memb", "STrib", IntRegs, u16_0Imm, 0b00>, + ST_Abs_NV <"memb", "STrib", u16_0Imm, 0b00>; -def : Pat<(truncstorei16 (i32 IntRegs:$src1), - (HexagonCONST32 tglobaladdr:$absaddr)), - (STrih_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>; +let accessSize = HalfWordAccess in +defm storerh : ST_Abs <"memh", "STrih", IntRegs, u16_1Imm, 0b01>, + ST_Abs_NV <"memh", "STrih", u16_1Imm, 0b01>; -def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32 tglobaladdr:$absaddr)), - (STriw_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>; +let accessSize = WordAccess in +defm storeri : ST_Abs <"memw", "STriw", IntRegs, u16_2Imm, 0b10>, + ST_Abs_NV <"memw", "STriw", u16_2Imm, 0b10>; -def : Pat<(store (i64 DoubleRegs:$src1), - (HexagonCONST32 tglobaladdr:$absaddr)), - (STrid_abs_V4 tglobaladdr: $absaddr, DoubleRegs: $src1)>; -} +let isNVStorable = 0, accessSize = DoubleWordAccess in +defm storerd : ST_Abs <"memd", "STrid", DoubleRegs, u16_3Imm, 0b11>; + +let isNVStorable = 0, accessSize = HalfWordAccess in +defm storerf : ST_Abs <"memh", "STrif", IntRegs, u16_1Imm, 0b01, 1>; //===----------------------------------------------------------------------===// -// multiclass for store instructions with GP-relative addressing mode. +// GP-relative stores. // mem[bhwd](#global)=Rt -// if ([!]Pv[.new]) mem[bhwd](##global) = Rt +// Once predicated, these instructions map to absolute addressing mode. +// if ([!]Pv[.new]) mem[bhwd](##global)=Rt //===----------------------------------------------------------------------===// -let mayStore = 1, isNVStorable = 1 in -multiclass ST_GP<string mnemonic, string BaseOp, RegisterClass RC> { - let BaseOpcode = BaseOp, isPredicable = 1 in - def NAME#_V4 : STInst2<(outs), - (ins globaladdress:$global, RC:$src), - mnemonic#"(#$global) = $src", - []>; - // When GP-relative instructions are predicated, their addressing mode is - // changed to absolute and they are always constant extended. - let BaseOpcode = BaseOp, isExtended = 1, opExtendable = 1, - isPredicated = 1 in { - defm Pt : ST_Abs_Pred <mnemonic, RC, 0>; - defm NotPt : ST_Abs_Pred <mnemonic, RC, 1>; +let isAsmParserOnly = 1 in +class T_StoreGP <string mnemonic, string BaseOp, RegisterClass RC, + Operand ImmOp, bits<2> MajOp, bit isHalf = 0> + : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, globaladdress, 0, isHalf> { + // Set BaseOpcode same as absolute addressing instructions so that + // non-predicated GP-Rel instructions can have relate with predicated + // Absolute instruction. + let BaseOpcode = BaseOp#_abs; + } + +let isAsmParserOnly = 1 in +multiclass ST_GP <string mnemonic, string BaseOp, Operand ImmOp, + bits<2> MajOp, bit isHalf = 0> { + // Set BaseOpcode same as absolute addressing instructions so that + // non-predicated GP-Rel instructions can have relate with predicated + // Absolute instruction. + let BaseOpcode = BaseOp#_abs in { + def NAME#gp : T_StoreAbsGP <mnemonic, IntRegs, ImmOp, MajOp, + globaladdress, 0, isHalf>; + // New-value store + def NAME#newgp : T_StoreAbsGP_NV <mnemonic, ImmOp, MajOp, 0> ; } } -let mayStore = 1, isNVStore = 1 in -multiclass ST_GP_nv<string mnemonic, string BaseOp, RegisterClass RC> { - let BaseOpcode = BaseOp, isPredicable = 1 in - def NAME#_nv_V4 : NVInst_V4<(outs), - (ins u0AlwaysExt:$global, RC:$src), - mnemonic#"(#$global) = $src.new", - []>, - Requires<[HasV4T]>; - - // When GP-relative instructions are predicated, their addressing mode is - // changed to absolute and they are always constant extended. - let BaseOpcode = BaseOp, isExtended = 1, opExtendable = 1, - isPredicated = 1 in { - defm Pt : ST_Abs_Pred_nv<mnemonic, RC, 0>; - defm NotPt : ST_Abs_Pred_nv<mnemonic, RC, 1>; - } -} - -let validSubTargets = HasV4SubT, neverHasSideEffects = 1 in { - let isNVStorable = 0 in - defm STd_GP : ST_GP <"memd", "STd_GP", DoubleRegs>, PredNewRel; - - defm STb_GP : ST_GP<"memb", "STb_GP", IntRegs>, - ST_GP_nv<"memb", "STb_GP", IntRegs>, NewValueRel; - defm STh_GP : ST_GP<"memh", "STh_GP", IntRegs>, - ST_GP_nv<"memh", "STh_GP", IntRegs>, NewValueRel; - defm STw_GP : ST_GP<"memw", "STw_GP", IntRegs>, - ST_GP_nv<"memw", "STw_GP", IntRegs>, NewValueRel; -} - -// 64 bit atomic store -def : Pat <(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global), - (i64 DoubleRegs:$src1)), - (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>, - Requires<[HasV4T]>; - -// Map from store(globaladdress) -> memd(#foo) -let AddedComplexity = 100 in -def : Pat <(store (i64 DoubleRegs:$src1), - (HexagonCONST32_GP tglobaladdr:$global)), - (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>; +let accessSize = ByteAccess in +defm S2_storerb : ST_GP<"memb", "STrib", u16_0Imm, 0b00>, NewValueRel; -// 8 bit atomic store -def : Pat < (atomic_store_8 (HexagonCONST32_GP tglobaladdr:$global), - (i32 IntRegs:$src1)), - (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>; +let accessSize = HalfWordAccess in +defm S2_storerh : ST_GP<"memh", "STrih", u16_1Imm, 0b01>, NewValueRel; -// Map from store(globaladdress) -> memb(#foo) -let AddedComplexity = 100 in -def : Pat<(truncstorei8 (i32 IntRegs:$src1), - (HexagonCONST32_GP tglobaladdr:$global)), - (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>; +let accessSize = WordAccess in +defm S2_storeri : ST_GP<"memw", "STriw", u16_2Imm, 0b10>, NewValueRel; -// Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1" -// to "r0 = 1; memw(#foo) = r0" -let AddedComplexity = 100 in -def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)), - (STb_GP_V4 tglobaladdr:$global, (TFRI 1))>; +let isNVStorable = 0, accessSize = DoubleWordAccess in +def S2_storerdgp : T_StoreGP <"memd", "STrid", DoubleRegs, + u16_3Imm, 0b11>, PredNewRel; -def : Pat<(atomic_store_16 (HexagonCONST32_GP tglobaladdr:$global), - (i32 IntRegs:$src1)), - (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>; +let isNVStorable = 0, accessSize = HalfWordAccess in +def S2_storerfgp : T_StoreGP <"memh", "STrif", IntRegs, + u16_1Imm, 0b01, 1>, PredNewRel; -// Map from store(globaladdress) -> memh(#foo) -let AddedComplexity = 100 in -def : Pat<(truncstorei16 (i32 IntRegs:$src1), - (HexagonCONST32_GP tglobaladdr:$global)), - (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>; +class Loada_pat<PatFrag Load, ValueType VT, PatFrag Addr, InstHexagon MI> + : Pat<(VT (Load Addr:$addr)), (MI Addr:$addr)>; -// 32 bit atomic store -def : Pat<(atomic_store_32 (HexagonCONST32_GP tglobaladdr:$global), - (i32 IntRegs:$src1)), - (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>; +class Loadam_pat<PatFrag Load, ValueType VT, PatFrag Addr, PatFrag ValueMod, + InstHexagon MI> + : Pat<(VT (Load Addr:$addr)), (ValueMod (MI Addr:$addr))>; -// Map from store(globaladdress) -> memw(#foo) -let AddedComplexity = 100 in -def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)), - (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>; +class Storea_pat<PatFrag Store, PatFrag Value, PatFrag Addr, InstHexagon MI> + : Pat<(Store Value:$val, Addr:$addr), (MI Addr:$addr, Value:$val)>; -//===----------------------------------------------------------------------===// -// Multiclass for the load instructions with absolute addressing mode. -//===----------------------------------------------------------------------===// -multiclass LD_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot, - bit isPredNew> { - let isPredicatedNew = isPredNew in - def NAME : LDInst2<(outs RC:$dst), - (ins PredRegs:$src1, u0AlwaysExt:$absaddr), - !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", - ") ")#"$dst = "#mnemonic#"(##$absaddr)", - []>, - Requires<[HasV4T]>; -} +class Stoream_pat<PatFrag Store, PatFrag Value, PatFrag Addr, PatFrag ValueMod, + InstHexagon MI> + : Pat<(Store Value:$val, Addr:$addr), + (MI Addr:$addr, (ValueMod Value:$val))>; -multiclass LD_Abs_Pred<string mnemonic, RegisterClass RC, bit PredNot> { - let isPredicatedFalse = PredNot in { - defm _c#NAME : LD_Abs_Predbase<mnemonic, RC, PredNot, 0>; - // Predicate new - defm _cdn#NAME : LD_Abs_Predbase<mnemonic, RC, PredNot, 1>; - } +def: Storea_pat<SwapSt<atomic_store_8>, I32, addrgp, S2_storerbgp>; +def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, S2_storerhgp>; +def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, S2_storerigp>; +def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, S2_storerdgp>; + +let AddedComplexity = 100 in { + def: Storea_pat<truncstorei8, I32, addrgp, S2_storerbgp>; + def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhgp>; + def: Storea_pat<store, I32, addrgp, S2_storerigp>; + def: Storea_pat<store, I64, addrgp, S2_storerdgp>; + + // Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1" + // to "r0 = 1; memw(#foo) = r0" + let AddedComplexity = 100 in + def: Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)), + (S2_storerbgp tglobaladdr:$global, (A2_tfrsi 1))>; } -let isExtended = 1, neverHasSideEffects = 1 in -multiclass LD_Abs<string mnemonic, string CextOp, RegisterClass RC> { - let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in { - let opExtendable = 1, isPredicable = 1 in - def NAME#_V4 : LDInst2<(outs RC:$dst), - (ins u0AlwaysExt:$absaddr), - "$dst = "#mnemonic#"(##$absaddr)", - []>, - Requires<[HasV4T]>; - - let opExtendable = 2, isPredicated = 1 in { - defm Pt_V4 : LD_Abs_Pred<mnemonic, RC, 0>; - defm NotPt_V4 : LD_Abs_Pred<mnemonic, RC, 1>; - } +//===----------------------------------------------------------------------===// +// Template class for non predicated load instructions with +// absolute addressing mode. +//===----------------------------------------------------------------------===// +let isPredicable = 1, hasSideEffects = 0 in +class T_LoadAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp, + bits<3> MajOp, Operand AddrOp, bit isAbs> + : LDInst <(outs RC:$dst), (ins AddrOp:$addr), + "$dst = "#mnemonic# !if(isAbs, "(##", "(#")#"$addr)", + [], "", V2LDST_tc_ld_SLOT01> { + bits<5> dst; + bits<19> addr; + bits<16> offsetBits; + + string ImmOpStr = !cast<string>(ImmOp); + let offsetBits = !if (!eq(ImmOpStr, "u16_3Imm"), addr{18-3}, + !if (!eq(ImmOpStr, "u16_2Imm"), addr{17-2}, + !if (!eq(ImmOpStr, "u16_1Imm"), addr{16-1}, + /* u16_0Imm */ addr{15-0}))); + + let IClass = 0b0100; + + let Inst{27} = 0b1; + let Inst{26-25} = offsetBits{15-14}; + let Inst{24} = 0b1; + let Inst{23-21} = MajOp; + let Inst{20-16} = offsetBits{13-9}; + let Inst{13-5} = offsetBits{8-0}; + let Inst{4-0} = dst; } -} -let addrMode = Absolute in { - let accessSize = ByteAccess in { - defm LDrib_abs : LD_Abs<"memb", "LDrib", IntRegs>, AddrModeRel; - defm LDriub_abs : LD_Abs<"memub", "LDriub", IntRegs>, AddrModeRel; +class T_LoadAbs <string mnemonic, RegisterClass RC, Operand ImmOp, + bits<3> MajOp> + : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp, u0AlwaysExt, 1>, AddrModeRel { + + string ImmOpStr = !cast<string>(ImmOp); + let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19, + !if (!eq(ImmOpStr, "u16_2Imm"), 18, + !if (!eq(ImmOpStr, "u16_1Imm"), 17, + /* u16_0Imm */ 16))); + + let opExtentAlign = !if (!eq(ImmOpStr, "u16_3Imm"), 3, + !if (!eq(ImmOpStr, "u16_2Imm"), 2, + !if (!eq(ImmOpStr, "u16_1Imm"), 1, + /* u16_0Imm */ 0))); } - let accessSize = HalfWordAccess in { - defm LDrih_abs : LD_Abs<"memh", "LDrih", IntRegs>, AddrModeRel; - defm LDriuh_abs : LD_Abs<"memuh", "LDriuh", IntRegs>, AddrModeRel; + +//===----------------------------------------------------------------------===// +// Template class for predicated load instructions with +// absolute addressing mode. +//===----------------------------------------------------------------------===// +let isPredicated = 1, opExtentBits = 6, opExtendable = 2 in +class T_LoadAbs_Pred <string mnemonic, RegisterClass RC, bits<3> MajOp, + bit isPredNot, bit isPredNew> + : LDInst <(outs RC:$dst), (ins PredRegs:$src1, u6Ext:$absaddr), + !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#"$dst = "#mnemonic#"(#$absaddr)">, AddrModeRel { + bits<5> dst; + bits<2> src1; + bits<6> absaddr; + + let isPredicatedNew = isPredNew; + let isPredicatedFalse = isPredNot; + let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1); + + let IClass = 0b1001; + + let Inst{27-24} = 0b1111; + let Inst{23-21} = MajOp; + let Inst{20-16} = absaddr{5-1}; + let Inst{13} = 0b1; + let Inst{12} = isPredNew; + let Inst{11} = isPredNot; + let Inst{10-9} = src1; + let Inst{8} = absaddr{0}; + let Inst{7} = 0b1; + let Inst{4-0} = dst; } - let accessSize = WordAccess in - defm LDriw_abs : LD_Abs<"memw", "LDriw", IntRegs>, AddrModeRel; - let accessSize = DoubleWordAccess in - defm LDrid_abs : LD_Abs<"memd", "LDrid", DoubleRegs>, AddrModeRel; +//===----------------------------------------------------------------------===// +// Multiclass for the load instructions with absolute addressing mode. +//===----------------------------------------------------------------------===// +multiclass LD_Abs_Pred<string mnemonic, RegisterClass RC, bits<3> MajOp, + bit PredNot> { + def _abs : T_LoadAbs_Pred <mnemonic, RC, MajOp, PredNot, 0>; + // Predicate new + def new_abs : T_LoadAbs_Pred <mnemonic, RC, MajOp, PredNot, 1>; } -let Predicates = [HasV4T], AddedComplexity = 30 in { -def : Pat<(i32 (load (HexagonCONST32 tglobaladdr:$absaddr))), - (LDriw_abs_V4 tglobaladdr: $absaddr)>; - -def : Pat<(i32 (sextloadi8 (HexagonCONST32 tglobaladdr:$absaddr))), - (LDrib_abs_V4 tglobaladdr:$absaddr)>; +let addrMode = Absolute, isExtended = 1 in +multiclass LD_Abs<string mnemonic, string CextOp, RegisterClass RC, + Operand ImmOp, bits<3> MajOp> { + let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in { + let opExtendable = 1, isPredicable = 1 in + def L4_#NAME#_abs: T_LoadAbs <mnemonic, RC, ImmOp, MajOp>; -def : Pat<(i32 (zextloadi8 (HexagonCONST32 tglobaladdr:$absaddr))), - (LDriub_abs_V4 tglobaladdr:$absaddr)>; + // Predicated + defm L4_p#NAME#t : LD_Abs_Pred<mnemonic, RC, MajOp, 0>; + defm L4_p#NAME#f : LD_Abs_Pred<mnemonic, RC, MajOp, 1>; + } +} -def : Pat<(i32 (sextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))), - (LDrih_abs_V4 tglobaladdr:$absaddr)>; +let accessSize = ByteAccess, hasNewValue = 1 in { + defm loadrb : LD_Abs<"memb", "LDrib", IntRegs, u16_0Imm, 0b000>; + defm loadrub : LD_Abs<"memub", "LDriub", IntRegs, u16_0Imm, 0b001>; +} -def : Pat<(i32 (zextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))), - (LDriuh_abs_V4 tglobaladdr:$absaddr)>; +let accessSize = HalfWordAccess, hasNewValue = 1 in { + defm loadrh : LD_Abs<"memh", "LDrih", IntRegs, u16_1Imm, 0b010>; + defm loadruh : LD_Abs<"memuh", "LDriuh", IntRegs, u16_1Imm, 0b011>; } +let accessSize = WordAccess, hasNewValue = 1 in +defm loadri : LD_Abs<"memw", "LDriw", IntRegs, u16_2Imm, 0b100>; + +let accessSize = DoubleWordAccess in +defm loadrd : LD_Abs<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>; + //===----------------------------------------------------------------------===// // multiclass for load instructions with GP-relative addressing mode. // Rx=mem[bhwd](##global) +// Once predicated, these instructions map to absolute addressing mode. // if ([!]Pv[.new]) Rx=mem[bhwd](##global) //===----------------------------------------------------------------------===// -let neverHasSideEffects = 1, validSubTargets = HasV4SubT in -multiclass LD_GP<string mnemonic, string BaseOp, RegisterClass RC> { - let BaseOpcode = BaseOp in { - let isPredicable = 1 in - def NAME#_V4 : LDInst2<(outs RC:$dst), - (ins globaladdress:$global), - "$dst = "#mnemonic#"(#$global)", - []>; - - let isExtended = 1, opExtendable = 2, isPredicated = 1 in { - defm Pt_V4 : LD_Abs_Pred<mnemonic, RC, 0>; - defm NotPt_V4 : LD_Abs_Pred<mnemonic, RC, 1>; - } - } -} -defm LDd_GP : LD_GP<"memd", "LDd_GP", DoubleRegs>, PredNewRel; -defm LDb_GP : LD_GP<"memb", "LDb_GP", IntRegs>, PredNewRel; -defm LDub_GP : LD_GP<"memub", "LDub_GP", IntRegs>, PredNewRel; -defm LDh_GP : LD_GP<"memh", "LDh_GP", IntRegs>, PredNewRel; -defm LDuh_GP : LD_GP<"memuh", "LDuh_GP", IntRegs>, PredNewRel; -defm LDw_GP : LD_GP<"memw", "LDw_GP", IntRegs>, PredNewRel; +let isAsmParserOnly = 1 in +class T_LoadGP <string mnemonic, string BaseOp, RegisterClass RC, Operand ImmOp, + bits<3> MajOp> + : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp, globaladdress, 0>, PredNewRel { + let BaseOpcode = BaseOp#_abs; + } -def : Pat <(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)), - (i64 (LDd_GP_V4 tglobaladdr:$global))>; +let accessSize = ByteAccess, hasNewValue = 1 in { + def L2_loadrbgp : T_LoadGP<"memb", "LDrib", IntRegs, u16_0Imm, 0b000>; + def L2_loadrubgp : T_LoadGP<"memub", "LDriub", IntRegs, u16_0Imm, 0b001>; +} -def : Pat <(atomic_load_32 (HexagonCONST32_GP tglobaladdr:$global)), - (i32 (LDw_GP_V4 tglobaladdr:$global))>; +let accessSize = HalfWordAccess, hasNewValue = 1 in { + def L2_loadrhgp : T_LoadGP<"memh", "LDrih", IntRegs, u16_1Imm, 0b010>; + def L2_loadruhgp : T_LoadGP<"memuh", "LDriuh", IntRegs, u16_1Imm, 0b011>; +} -def : Pat <(atomic_load_16 (HexagonCONST32_GP tglobaladdr:$global)), - (i32 (LDuh_GP_V4 tglobaladdr:$global))>; +let accessSize = WordAccess, hasNewValue = 1 in +def L2_loadrigp : T_LoadGP<"memw", "LDriw", IntRegs, u16_2Imm, 0b100>; -def : Pat <(atomic_load_8 (HexagonCONST32_GP tglobaladdr:$global)), - (i32 (LDub_GP_V4 tglobaladdr:$global))>; +let accessSize = DoubleWordAccess in +def L2_loadrdgp : T_LoadGP<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>; -// Map from load(globaladdress) -> memw(#foo + 0) -let AddedComplexity = 100 in -def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))), - (i64 (LDd_GP_V4 tglobaladdr:$global))>; +def: Loada_pat<atomic_load_8, i32, addrgp, L2_loadrubgp>; +def: Loada_pat<atomic_load_16, i32, addrgp, L2_loadruhgp>; +def: Loada_pat<atomic_load_32, i32, addrgp, L2_loadrigp>; +def: Loada_pat<atomic_load_64, i64, addrgp, L2_loadrdgp>; // Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd -let AddedComplexity = 100 in -def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))), - (i1 (TFR_PdRs (i32 (LDb_GP_V4 tglobaladdr:$global))))>; +def: Loadam_pat<load, i1, addrga, I32toI1, L4_loadrub_abs>; +def: Loadam_pat<load, i1, addrgp, I32toI1, L2_loadrubgp>; + +def: Stoream_pat<store, I1, addrga, I1toI32, S2_storerbabs>; +def: Stoream_pat<store, I1, addrgp, I1toI32, S2_storerbgp>; + +// Map from load(globaladdress) -> mem[u][bhwd](#foo) +class LoadGP_pats <PatFrag ldOp, InstHexagon MI, ValueType VT = i32> + : Pat <(VT (ldOp (HexagonCONST32_GP tglobaladdr:$global))), + (VT (MI tglobaladdr:$global))>; + +let AddedComplexity = 100 in { + def: LoadGP_pats <extloadi8, L2_loadrbgp>; + def: LoadGP_pats <sextloadi8, L2_loadrbgp>; + def: LoadGP_pats <zextloadi8, L2_loadrubgp>; + def: LoadGP_pats <extloadi16, L2_loadrhgp>; + def: LoadGP_pats <sextloadi16, L2_loadrhgp>; + def: LoadGP_pats <zextloadi16, L2_loadruhgp>; + def: LoadGP_pats <load, L2_loadrigp>; + def: LoadGP_pats <load, L2_loadrdgp, i64>; +} // When the Interprocedural Global Variable optimizer realizes that a certain // global variable takes only two constant values, it shrinks the global to // a boolean. Catch those loads here in the following 3 patterns. -let AddedComplexity = 100 in -def : Pat <(i32 (extloadi1 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDb_GP_V4 tglobaladdr:$global))>; +let AddedComplexity = 100 in { + def: LoadGP_pats <extloadi1, L2_loadrubgp>; + def: LoadGP_pats <zextloadi1, L2_loadrubgp>; +} -let AddedComplexity = 100 in -def : Pat <(i32 (sextloadi1 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDb_GP_V4 tglobaladdr:$global))>; +// Transfer global address into a register +def: Pat<(HexagonCONST32 tglobaladdr:$Rs), (A2_tfrsi s16Ext:$Rs)>; +def: Pat<(HexagonCONST32_GP tblockaddress:$Rs), (A2_tfrsi s16Ext:$Rs)>; +def: Pat<(HexagonCONST32_GP tglobaladdr:$Rs), (A2_tfrsi s16Ext:$Rs)>; -// Map from load(globaladdress) -> memb(#foo) -let AddedComplexity = 100 in -def : Pat <(i32 (extloadi8 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDb_GP_V4 tglobaladdr:$global))>; +def: Pat<(i64 (ctlz I64:$src1)), (Zext64 (S2_cl0p I64:$src1))>; +def: Pat<(i64 (cttz I64:$src1)), (Zext64 (S2_ct0p I64:$src1))>; -// Map from load(globaladdress) -> memb(#foo) -let AddedComplexity = 100 in -def : Pat <(i32 (sextloadi8 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDb_GP_V4 tglobaladdr:$global))>; +let AddedComplexity = 30 in { + def: Storea_pat<truncstorei8, I32, u0AlwaysExtPred, S2_storerbabs>; + def: Storea_pat<truncstorei16, I32, u0AlwaysExtPred, S2_storerhabs>; + def: Storea_pat<store, I32, u0AlwaysExtPred, S2_storeriabs>; +} -let AddedComplexity = 100 in -def : Pat <(i32 (zextloadi1 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDub_GP_V4 tglobaladdr:$global))>; +let AddedComplexity = 30 in { + def: Loada_pat<load, i32, u0AlwaysExtPred, L4_loadri_abs>; + def: Loada_pat<sextloadi8, i32, u0AlwaysExtPred, L4_loadrb_abs>; + def: Loada_pat<zextloadi8, i32, u0AlwaysExtPred, L4_loadrub_abs>; + def: Loada_pat<sextloadi16, i32, u0AlwaysExtPred, L4_loadrh_abs>; + def: Loada_pat<zextloadi16, i32, u0AlwaysExtPred, L4_loadruh_abs>; +} -// Map from load(globaladdress) -> memub(#foo) +// Indexed store word - global address. +// memw(Rs+#u6:2)=#S8 let AddedComplexity = 100 in -def : Pat <(i32 (zextloadi8 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDub_GP_V4 tglobaladdr:$global))>; +def: Storex_add_pat<store, addrga, u6_2ImmPred, S4_storeiri_io>; -// Map from load(globaladdress) -> memh(#foo) -let AddedComplexity = 100 in -def : Pat <(i32 (extloadi16 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDh_GP_V4 tglobaladdr:$global))>; +// Load from a global address that has only one use in the current basic block. +let AddedComplexity = 100 in { + def: Loada_pat<extloadi8, i32, addrga, L4_loadrub_abs>; + def: Loada_pat<sextloadi8, i32, addrga, L4_loadrb_abs>; + def: Loada_pat<zextloadi8, i32, addrga, L4_loadrub_abs>; -// Map from load(globaladdress) -> memh(#foo) -let AddedComplexity = 100 in -def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDh_GP_V4 tglobaladdr:$global))>; + def: Loada_pat<extloadi16, i32, addrga, L4_loadruh_abs>; + def: Loada_pat<sextloadi16, i32, addrga, L4_loadrh_abs>; + def: Loada_pat<zextloadi16, i32, addrga, L4_loadruh_abs>; -// Map from load(globaladdress) -> memuh(#foo) -let AddedComplexity = 100 in -def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDuh_GP_V4 tglobaladdr:$global))>; + def: Loada_pat<load, i32, addrga, L4_loadri_abs>; + def: Loada_pat<load, i64, addrga, L4_loadrd_abs>; +} -// Map from load(globaladdress) -> memw(#foo) -let AddedComplexity = 100 in -def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDw_GP_V4 tglobaladdr:$global))>; +// Store to a global address that has only one use in the current basic block. +let AddedComplexity = 100 in { + def: Storea_pat<truncstorei8, I32, addrga, S2_storerbabs>; + def: Storea_pat<truncstorei16, I32, addrga, S2_storerhabs>; + def: Storea_pat<store, I32, addrga, S2_storeriabs>; + def: Storea_pat<store, I64, addrga, S2_storerdabs>; + def: Stoream_pat<truncstorei32, I64, addrga, LoReg, S2_storeriabs>; +} + +// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd +let AddedComplexity = 100 in +def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))), + (i1 (C2_tfrrp (i32 (L2_loadrbgp tglobaladdr:$global))))>; // Transfer global address into a register let isExtended = 1, opExtendable = 1, AddedComplexity=50, isMoveImm = 1, -isAsCheapAsAMove = 1, isReMaterializable = 1, validSubTargets = HasV4SubT in +isAsCheapAsAMove = 1, isReMaterializable = 1, isCodeGenOnly = 1 in def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins s16Ext:$src1), "$dst = #$src1", - [(set IntRegs:$dst, (HexagonCONST32 tglobaladdr:$src1))]>, - Requires<[HasV4T]>; + [(set IntRegs:$dst, (HexagonCONST32 tglobaladdr:$src1))]>; // Transfer a block address into a register def : Pat<(HexagonCONST32_GP tblockaddress:$src1), - (TFRI_V4 tblockaddress:$src1)>, - Requires<[HasV4T]>; - -let isExtended = 1, opExtendable = 2, AddedComplexity=50, -neverHasSideEffects = 1, isPredicated = 1, validSubTargets = HasV4SubT in -def TFRI_cPt_V4 : ALU32_ri<(outs IntRegs:$dst), - (ins PredRegs:$src1, s16Ext:$src2), - "if($src1) $dst = #$src2", - []>, - Requires<[HasV4T]>; - -let isExtended = 1, opExtendable = 2, AddedComplexity=50, isPredicatedFalse = 1, -neverHasSideEffects = 1, isPredicated = 1, validSubTargets = HasV4SubT in -def TFRI_cNotPt_V4 : ALU32_ri<(outs IntRegs:$dst), - (ins PredRegs:$src1, s16Ext:$src2), - "if(!$src1) $dst = #$src2", - []>, - Requires<[HasV4T]>; - -let isExtended = 1, opExtendable = 2, AddedComplexity=50, -neverHasSideEffects = 1, isPredicated = 1, validSubTargets = HasV4SubT in -def TFRI_cdnPt_V4 : ALU32_ri<(outs IntRegs:$dst), - (ins PredRegs:$src1, s16Ext:$src2), - "if($src1.new) $dst = #$src2", - []>, - Requires<[HasV4T]>; - -let isExtended = 1, opExtendable = 2, AddedComplexity=50, isPredicatedFalse = 1, -neverHasSideEffects = 1, isPredicated = 1, validSubTargets = HasV4SubT in -def TFRI_cdnNotPt_V4 : ALU32_ri<(outs IntRegs:$dst), - (ins PredRegs:$src1, s16Ext:$src2), - "if(!$src1.new) $dst = #$src2", - []>, - Requires<[HasV4T]>; - -let AddedComplexity = 50, Predicates = [HasV4T] in -def : Pat<(HexagonCONST32_GP tglobaladdr:$src1), - (TFRI_V4 tglobaladdr:$src1)>, - Requires<[HasV4T]>; - - -// Load - Indirect with long offset: These instructions take global address -// as an operand -let isExtended = 1, opExtendable = 3, AddedComplexity = 40, -validSubTargets = HasV4SubT in -def LDrid_ind_lo_V4 : LDInst<(outs DoubleRegs:$dst), - (ins IntRegs:$src1, u2Imm:$src2, globaladdressExt:$offset), - "$dst=memd($src1<<#$src2+##$offset)", - [(set (i64 DoubleRegs:$dst), - (load (add (shl IntRegs:$src1, u2ImmPred:$src2), - (HexagonCONST32 tglobaladdr:$offset))))]>, - Requires<[HasV4T]>; + (TFRI_V4 tblockaddress:$src1)>; -let AddedComplexity = 40 in -multiclass LD_indirect_lo<string OpcStr, PatFrag OpNode> { -let isExtended = 1, opExtendable = 3, validSubTargets = HasV4SubT in - def _lo_V4 : LDInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, u2Imm:$src2, globaladdressExt:$offset), - !strconcat("$dst = ", - !strconcat(OpcStr, "($src1<<#$src2+##$offset)")), - [(set IntRegs:$dst, - (i32 (OpNode (add (shl IntRegs:$src1, u2ImmPred:$src2), - (HexagonCONST32 tglobaladdr:$offset)))))]>, - Requires<[HasV4T]>; -} - -defm LDrib_ind : LD_indirect_lo<"memb", sextloadi8>; -defm LDriub_ind : LD_indirect_lo<"memub", zextloadi8>; -defm LDriub_ind_anyext : LD_indirect_lo<"memub", extloadi8>; -defm LDrih_ind : LD_indirect_lo<"memh", sextloadi16>; -defm LDriuh_ind : LD_indirect_lo<"memuh", zextloadi16>; -defm LDriuh_ind_anyext : LD_indirect_lo<"memuh", extloadi16>; -defm LDriw_ind : LD_indirect_lo<"memw", load>; - -let AddedComplexity = 40 in -def : Pat <(i32 (sextloadi8 (add IntRegs:$src1, - (NumUsesBelowThresCONST32 tglobaladdr:$offset)))), - (i32 (LDrib_ind_lo_V4 IntRegs:$src1, 0, tglobaladdr:$offset))>, - Requires<[HasV4T]>; - -let AddedComplexity = 40 in -def : Pat <(i32 (zextloadi8 (add IntRegs:$src1, - (NumUsesBelowThresCONST32 tglobaladdr:$offset)))), - (i32 (LDriub_ind_lo_V4 IntRegs:$src1, 0, tglobaladdr:$offset))>, - Requires<[HasV4T]>; +let AddedComplexity = 50 in +def : Pat<(HexagonCONST32_GP tglobaladdr:$src1), + (TFRI_V4 tglobaladdr:$src1)>; -let Predicates = [HasV4T], AddedComplexity = 30 in { -def : Pat<(truncstorei8 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2), - (STrib_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>; +// i8/i16/i32 -> i64 loads +// We need a complexity of 120 here to override preceding handling of +// zextload. +let AddedComplexity = 120 in { + def: Loadam_pat<extloadi8, i64, addrga, Zext64, L4_loadrub_abs>; + def: Loadam_pat<sextloadi8, i64, addrga, Sext64, L4_loadrb_abs>; + def: Loadam_pat<zextloadi8, i64, addrga, Zext64, L4_loadrub_abs>; -def : Pat<(truncstorei16 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2), - (STrih_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>; + def: Loadam_pat<extloadi16, i64, addrga, Zext64, L4_loadruh_abs>; + def: Loadam_pat<sextloadi16, i64, addrga, Sext64, L4_loadrh_abs>; + def: Loadam_pat<zextloadi16, i64, addrga, Zext64, L4_loadruh_abs>; -def : Pat<(store (i32 IntRegs:$src1), u0AlwaysExtPred:$src2), - (STriw_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>; + def: Loadam_pat<extloadi32, i64, addrga, Zext64, L4_loadri_abs>; + def: Loadam_pat<sextloadi32, i64, addrga, Sext64, L4_loadri_abs>; + def: Loadam_pat<zextloadi32, i64, addrga, Zext64, L4_loadri_abs>; } -let Predicates = [HasV4T], AddedComplexity = 30 in { -def : Pat<(i32 (load u0AlwaysExtPred:$src)), - (LDriw_abs_V4 u0AlwaysExtPred:$src)>; +let AddedComplexity = 100 in { + def: Loada_pat<extloadi8, i32, addrgp, L4_loadrub_abs>; + def: Loada_pat<sextloadi8, i32, addrgp, L4_loadrb_abs>; + def: Loada_pat<zextloadi8, i32, addrgp, L4_loadrub_abs>; -def : Pat<(i32 (sextloadi8 u0AlwaysExtPred:$src)), - (LDrib_abs_V4 u0AlwaysExtPred:$src)>; + def: Loada_pat<extloadi16, i32, addrgp, L4_loadruh_abs>; + def: Loada_pat<sextloadi16, i32, addrgp, L4_loadrh_abs>; + def: Loada_pat<zextloadi16, i32, addrgp, L4_loadruh_abs>; -def : Pat<(i32 (zextloadi8 u0AlwaysExtPred:$src)), - (LDriub_abs_V4 u0AlwaysExtPred:$src)>; - -def : Pat<(i32 (sextloadi16 u0AlwaysExtPred:$src)), - (LDrih_abs_V4 u0AlwaysExtPred:$src)>; - -def : Pat<(i32 (zextloadi16 u0AlwaysExtPred:$src)), - (LDriuh_abs_V4 u0AlwaysExtPred:$src)>; + def: Loada_pat<load, i32, addrgp, L4_loadri_abs>; + def: Loada_pat<load, i64, addrgp, L4_loadrd_abs>; } -// Indexed store word - global address. -// memw(Rs+#u6:2)=#S8 -let AddedComplexity = 10 in -def STriw_offset_ext_V4 : STInst<(outs), - (ins IntRegs:$src1, u6_2Imm:$src2, globaladdress:$src3), - "memw($src1+#$src2) = ##$src3", - [(store (HexagonCONST32 tglobaladdr:$src3), - (add IntRegs:$src1, u6_2ImmPred:$src2))]>, - Requires<[HasV4T]>; - -def : Pat<(i64 (ctlz (i64 DoubleRegs:$src1))), - (i64 (COMBINE_Ir_V4 (i32 0), (i32 (CTLZ64_rr DoubleRegs:$src1))))>, - Requires<[HasV4T]>; - -def : Pat<(i64 (cttz (i64 DoubleRegs:$src1))), - (i64 (COMBINE_Ir_V4 (i32 0), (i32 (CTTZ64_rr DoubleRegs:$src1))))>, - Requires<[HasV4T]>; +let AddedComplexity = 100 in { + def: Storea_pat<truncstorei8, I32, addrgp, S2_storerbabs>; + def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhabs>; + def: Storea_pat<store, I32, addrgp, S2_storeriabs>; + def: Storea_pat<store, I64, addrgp, S2_storerdabs>; +} +def: Loada_pat<atomic_load_8, i32, addrgp, L4_loadrub_abs>; +def: Loada_pat<atomic_load_16, i32, addrgp, L4_loadruh_abs>; +def: Loada_pat<atomic_load_32, i32, addrgp, L4_loadri_abs>; +def: Loada_pat<atomic_load_64, i64, addrgp, L4_loadrd_abs>; -// i8 -> i64 loads -// We need a complexity of 120 here to override preceding handling of -// zextloadi8. -let Predicates = [HasV4T], AddedComplexity = 120 in { -def: Pat <(i64 (extloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), - (i64 (COMBINE_Ir_V4 0, (LDrib_abs_V4 tglobaladdr:$addr)))>; +def: Storea_pat<SwapSt<atomic_store_8>, I32, addrgp, S2_storerbabs>; +def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, S2_storerhabs>; +def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, S2_storeriabs>; +def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, S2_storerdabs>; -def: Pat <(i64 (zextloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), - (i64 (COMBINE_Ir_V4 0, (LDriub_abs_V4 tglobaladdr:$addr)))>; +//===----------------------------------------------------------------------===// +// :raw for of boundscheck:hi:lo insns +//===----------------------------------------------------------------------===// -def: Pat <(i64 (sextloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), - (i64 (SXTW (LDrib_abs_V4 tglobaladdr:$addr)))>; +// A4_boundscheck_lo: Detect if a register is within bounds. +let hasSideEffects = 0 in +def A4_boundscheck_lo: ALU64Inst < + (outs PredRegs:$Pd), + (ins DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Pd = boundscheck($Rss, $Rtt):raw:lo"> { + bits<2> Pd; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1101; + + let Inst{27-23} = 0b00100; + let Inst{13} = 0b1; + let Inst{7-5} = 0b100; + let Inst{1-0} = Pd; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + } -def: Pat <(i64 (extloadi8 FoldGlobalAddr:$addr)), - (i64 (COMBINE_Ir_V4 0, (LDrib_abs_V4 FoldGlobalAddr:$addr)))>; +// A4_boundscheck_hi: Detect if a register is within bounds. +let hasSideEffects = 0 in +def A4_boundscheck_hi: ALU64Inst < + (outs PredRegs:$Pd), + (ins DoubleRegs:$Rss, DoubleRegs:$Rtt), + "$Pd = boundscheck($Rss, $Rtt):raw:hi"> { + bits<2> Pd; + bits<5> Rss; + bits<5> Rtt; + + let IClass = 0b1101; + + let Inst{27-23} = 0b00100; + let Inst{13} = 0b1; + let Inst{7-5} = 0b101; + let Inst{1-0} = Pd; + let Inst{20-16} = Rss; + let Inst{12-8} = Rtt; + } -def: Pat <(i64 (zextloadi8 FoldGlobalAddr:$addr)), - (i64 (COMBINE_Ir_V4 0, (LDriub_abs_V4 FoldGlobalAddr:$addr)))>; +let hasSideEffects = 0, isAsmParserOnly = 1 in +def A4_boundscheck : MInst < + (outs PredRegs:$Pd), (ins IntRegs:$Rs, DoubleRegs:$Rtt), + "$Pd=boundscheck($Rs,$Rtt)">; + +// A4_tlbmatch: Detect if a VA/ASID matches a TLB entry. +let isPredicateLate = 1, hasSideEffects = 0 in +def A4_tlbmatch : ALU64Inst<(outs PredRegs:$Pd), + (ins DoubleRegs:$Rs, IntRegs:$Rt), + "$Pd = tlbmatch($Rs, $Rt)", + [], "", ALU64_tc_2early_SLOT23> { + bits<2> Pd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1101; + let Inst{27-23} = 0b00100; + let Inst{20-16} = Rs; + let Inst{13} = 0b1; + let Inst{12-8} = Rt; + let Inst{7-5} = 0b011; + let Inst{1-0} = Pd; + } -def: Pat <(i64 (sextloadi8 FoldGlobalAddr:$addr)), - (i64 (SXTW (LDrib_abs_V4 FoldGlobalAddr:$addr)))>; +// We need custom lowering of ISD::PREFETCH into HexagonISD::DCFETCH +// because the SDNode ISD::PREFETCH has properties MayLoad and MayStore. +// We don't really want either one here. +def SDTHexagonDCFETCH : SDTypeProfile<0, 2, [SDTCisPtrTy<0>,SDTCisInt<1>]>; +def HexagonDCFETCH : SDNode<"HexagonISD::DCFETCH", SDTHexagonDCFETCH, + [SDNPHasChain]>; + +// Use LD0Inst for dcfetch, but set "mayLoad" to 0 because this doesn't +// really do a load. +let hasSideEffects = 1, mayLoad = 0 in +def Y2_dcfetchbo : LD0Inst<(outs), (ins IntRegs:$Rs, u11_3Imm:$u11_3), + "dcfetch($Rs + #$u11_3)", + [(HexagonDCFETCH IntRegs:$Rs, u11_3ImmPred:$u11_3)], + "", LD_tc_ld_SLOT0> { + bits<5> Rs; + bits<14> u11_3; + + let IClass = 0b1001; + let Inst{27-21} = 0b0100000; + let Inst{20-16} = Rs; + let Inst{13} = 0b0; + let Inst{10-0} = u11_3{13-3}; } -// i16 -> i64 loads -// We need a complexity of 120 here to override preceding handling of -// zextloadi16. -let AddedComplexity = 120 in { -def: Pat <(i64 (extloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), - (i64 (COMBINE_Ir_V4 0, (LDrih_abs_V4 tglobaladdr:$addr)))>, - Requires<[HasV4T]>; -def: Pat <(i64 (zextloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), - (i64 (COMBINE_Ir_V4 0, (LDriuh_abs_V4 tglobaladdr:$addr)))>, - Requires<[HasV4T]>; - -def: Pat <(i64 (sextloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), - (i64 (SXTW (LDrih_abs_V4 tglobaladdr:$addr)))>, - Requires<[HasV4T]>; - -def: Pat <(i64 (extloadi16 FoldGlobalAddr:$addr)), - (i64 (COMBINE_Ir_V4 0, (LDrih_abs_V4 FoldGlobalAddr:$addr)))>, - Requires<[HasV4T]>; - -def: Pat <(i64 (zextloadi16 FoldGlobalAddr:$addr)), - (i64 (COMBINE_Ir_V4 0, (LDriuh_abs_V4 FoldGlobalAddr:$addr)))>, - Requires<[HasV4T]>; +//===----------------------------------------------------------------------===// +// Compound instructions +//===----------------------------------------------------------------------===// -def: Pat <(i64 (sextloadi16 FoldGlobalAddr:$addr)), - (i64 (SXTW (LDrih_abs_V4 FoldGlobalAddr:$addr)))>, - Requires<[HasV4T]>; +let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1, + isPredicated = 1, isPredicatedNew = 1, isExtendable = 1, + opExtentBits = 11, opExtentAlign = 2, opExtendable = 1, + isTerminator = 1 in +class CJInst_tstbit_R0<string px, bit np, string tnt> + : InstHexagon<(outs), (ins IntRegs:$Rs, brtarget:$r9_2), + ""#px#" = tstbit($Rs, #0); if (" + #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2", + [], "", COMPOUND, TypeCOMPOUND> { + bits<4> Rs; + bits<11> r9_2; + + // np: !p[01] + let isPredicatedFalse = np; + // tnt: Taken/Not Taken + let isBrTaken = !if (!eq(tnt, "t"), "true", "false"); + let isTaken = !if (!eq(tnt, "t"), 1, 0); + + let IClass = 0b0001; + let Inst{27-26} = 0b00; + let Inst{25} = !if (!eq(px, "!p1"), 1, + !if (!eq(px, "p1"), 1, 0)); + let Inst{24-23} = 0b11; + let Inst{22} = np; + let Inst{21-20} = r9_2{10-9}; + let Inst{19-16} = Rs; + let Inst{13} = !if (!eq(tnt, "t"), 1, 0); + let Inst{9-8} = 0b11; + let Inst{7-1} = r9_2{8-2}; } -// i32->i64 loads -// We need a complexity of 120 here to override preceding handling of -// zextloadi32. -let AddedComplexity = 120 in { -def: Pat <(i64 (extloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), - (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 tglobaladdr:$addr)))>, - Requires<[HasV4T]>; - -def: Pat <(i64 (zextloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), - (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 tglobaladdr:$addr)))>, - Requires<[HasV4T]>; - -def: Pat <(i64 (sextloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), - (i64 (SXTW (LDriw_abs_V4 tglobaladdr:$addr)))>, - Requires<[HasV4T]>; - -def: Pat <(i64 (extloadi32 FoldGlobalAddr:$addr)), - (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 FoldGlobalAddr:$addr)))>, - Requires<[HasV4T]>; -def: Pat <(i64 (zextloadi32 FoldGlobalAddr:$addr)), - (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 FoldGlobalAddr:$addr)))>, - Requires<[HasV4T]>; - -def: Pat <(i64 (sextloadi32 FoldGlobalAddr:$addr)), - (i64 (SXTW (LDriw_abs_V4 FoldGlobalAddr:$addr)))>, - Requires<[HasV4T]>; +let Defs = [PC, P0], Uses = [P0] in { + def J4_tstbit0_tp0_jump_nt : CJInst_tstbit_R0<"p0", 0, "nt">; + def J4_tstbit0_tp0_jump_t : CJInst_tstbit_R0<"p0", 0, "t">; + def J4_tstbit0_fp0_jump_nt : CJInst_tstbit_R0<"p0", 1, "nt">; + def J4_tstbit0_fp0_jump_t : CJInst_tstbit_R0<"p0", 1, "t">; } -// Indexed store double word - global address. -// memw(Rs+#u6:2)=#S8 -let AddedComplexity = 10 in -def STrih_offset_ext_V4 : STInst<(outs), - (ins IntRegs:$src1, u6_1Imm:$src2, globaladdress:$src3), - "memh($src1+#$src2) = ##$src3", - [(truncstorei16 (HexagonCONST32 tglobaladdr:$src3), - (add IntRegs:$src1, u6_1ImmPred:$src2))]>, - Requires<[HasV4T]>; -// Map from store(globaladdress + x) -> memd(#foo + x) -let AddedComplexity = 100 in -def : Pat<(store (i64 DoubleRegs:$src1), - FoldGlobalAddrGP:$addr), - (STrid_abs_V4 FoldGlobalAddrGP:$addr, (i64 DoubleRegs:$src1))>, - Requires<[HasV4T]>; - -def : Pat<(atomic_store_64 FoldGlobalAddrGP:$addr, - (i64 DoubleRegs:$src1)), - (STrid_abs_V4 FoldGlobalAddrGP:$addr, (i64 DoubleRegs:$src1))>, - Requires<[HasV4T]>; - -// Map from store(globaladdress + x) -> memb(#foo + x) -let AddedComplexity = 100 in -def : Pat<(truncstorei8 (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr), - (STrib_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -def : Pat<(atomic_store_8 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)), - (STrib_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -// Map from store(globaladdress + x) -> memh(#foo + x) -let AddedComplexity = 100 in -def : Pat<(truncstorei16 (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr), - (STrih_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -def : Pat<(atomic_store_16 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)), - (STrih_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -// Map from store(globaladdress + x) -> memw(#foo + x) -let AddedComplexity = 100 in -def : Pat<(store (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr), - (STriw_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -def : Pat<(atomic_store_32 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)), - (STriw_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress + x) -> memd(#foo + x) -let AddedComplexity = 100 in -def : Pat<(i64 (load FoldGlobalAddrGP:$addr)), - (i64 (LDrid_abs_V4 FoldGlobalAddrGP:$addr))>, - Requires<[HasV4T]>; - -def : Pat<(atomic_load_64 FoldGlobalAddrGP:$addr), - (i64 (LDrid_abs_V4 FoldGlobalAddrGP:$addr))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress + x) -> memb(#foo + x) -let AddedComplexity = 100 in -def : Pat<(i32 (extloadi8 FoldGlobalAddrGP:$addr)), - (i32 (LDrib_abs_V4 FoldGlobalAddrGP:$addr))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress + x) -> memb(#foo + x) -let AddedComplexity = 100 in -def : Pat<(i32 (sextloadi8 FoldGlobalAddrGP:$addr)), - (i32 (LDrib_abs_V4 FoldGlobalAddrGP:$addr))>, - Requires<[HasV4T]>; - -//let AddedComplexity = 100 in -let AddedComplexity = 100 in -def : Pat<(i32 (extloadi16 FoldGlobalAddrGP:$addr)), - (i32 (LDrih_abs_V4 FoldGlobalAddrGP:$addr))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress + x) -> memh(#foo + x) -let AddedComplexity = 100 in -def : Pat<(i32 (sextloadi16 FoldGlobalAddrGP:$addr)), - (i32 (LDrih_abs_V4 FoldGlobalAddrGP:$addr))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress + x) -> memuh(#foo + x) -let AddedComplexity = 100 in -def : Pat<(i32 (zextloadi16 FoldGlobalAddrGP:$addr)), - (i32 (LDriuh_abs_V4 FoldGlobalAddrGP:$addr))>, - Requires<[HasV4T]>; +let Defs = [PC, P1], Uses = [P1] in { + def J4_tstbit0_tp1_jump_nt : CJInst_tstbit_R0<"p1", 0, "nt">; + def J4_tstbit0_tp1_jump_t : CJInst_tstbit_R0<"p1", 0, "t">; + def J4_tstbit0_fp1_jump_nt : CJInst_tstbit_R0<"p1", 1, "nt">; + def J4_tstbit0_fp1_jump_t : CJInst_tstbit_R0<"p1", 1, "t">; +} -def : Pat<(atomic_load_16 FoldGlobalAddrGP:$addr), - (i32 (LDriuh_abs_V4 FoldGlobalAddrGP:$addr))>, - Requires<[HasV4T]>; -// Map from load(globaladdress + x) -> memub(#foo + x) -let AddedComplexity = 100 in -def : Pat<(i32 (zextloadi8 FoldGlobalAddrGP:$addr)), - (i32 (LDriub_abs_V4 FoldGlobalAddrGP:$addr))>, - Requires<[HasV4T]>; +let isBranch = 1, hasSideEffects = 0, + isExtentSigned = 1, isPredicated = 1, isPredicatedNew = 1, + isExtendable = 1, opExtentBits = 11, opExtentAlign = 2, + opExtendable = 2, isTerminator = 1 in +class CJInst_RR<string px, string op, bit np, string tnt> + : InstHexagon<(outs), (ins IntRegs:$Rs, IntRegs:$Rt, brtarget:$r9_2), + ""#px#" = cmp."#op#"($Rs, $Rt); if (" + #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2", + [], "", COMPOUND, TypeCOMPOUND> { + bits<4> Rs; + bits<4> Rt; + bits<11> r9_2; + + // np: !p[01] + let isPredicatedFalse = np; + // tnt: Taken/Not Taken + let isBrTaken = !if (!eq(tnt, "t"), "true", "false"); + let isTaken = !if (!eq(tnt, "t"), 1, 0); + + let IClass = 0b0001; + let Inst{27-23} = !if (!eq(op, "eq"), 0b01000, + !if (!eq(op, "gt"), 0b01001, + !if (!eq(op, "gtu"), 0b01010, 0))); + let Inst{22} = np; + let Inst{21-20} = r9_2{10-9}; + let Inst{19-16} = Rs; + let Inst{13} = !if (!eq(tnt, "t"), 1, 0); + // px: Predicate reg 0/1 + let Inst{12} = !if (!eq(px, "!p1"), 1, + !if (!eq(px, "p1"), 1, 0)); + let Inst{11-8} = Rt; + let Inst{7-1} = r9_2{8-2}; +} -def : Pat<(atomic_load_8 FoldGlobalAddrGP:$addr), - (i32 (LDriub_abs_V4 FoldGlobalAddrGP:$addr))>, - Requires<[HasV4T]>; +// P[10] taken/not taken. +multiclass T_tnt_CJInst_RR<string op, bit np> { + let Defs = [PC, P0], Uses = [P0] in { + def NAME#p0_jump_nt : CJInst_RR<"p0", op, np, "nt">; + def NAME#p0_jump_t : CJInst_RR<"p0", op, np, "t">; + } + let Defs = [PC, P1], Uses = [P1] in { + def NAME#p1_jump_nt : CJInst_RR<"p1", op, np, "nt">; + def NAME#p1_jump_t : CJInst_RR<"p1", op, np, "t">; + } +} +// Predicate / !Predicate +multiclass T_pnp_CJInst_RR<string op>{ + defm J4_cmp#NAME#_t : T_tnt_CJInst_RR<op, 0>; + defm J4_cmp#NAME#_f : T_tnt_CJInst_RR<op, 1>; +} +// TypeCJ Instructions compare RR and jump +defm eq : T_pnp_CJInst_RR<"eq">; +defm gt : T_pnp_CJInst_RR<"gt">; +defm gtu : T_pnp_CJInst_RR<"gtu">; + +let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1, + isPredicated = 1, isPredicatedNew = 1, isExtendable = 1, opExtentBits = 11, + opExtentAlign = 2, opExtendable = 2, isTerminator = 1 in +class CJInst_RU5<string px, string op, bit np, string tnt> + : InstHexagon<(outs), (ins IntRegs:$Rs, u5Imm:$U5, brtarget:$r9_2), + ""#px#" = cmp."#op#"($Rs, #$U5); if (" + #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2", + [], "", COMPOUND, TypeCOMPOUND> { + bits<4> Rs; + bits<5> U5; + bits<11> r9_2; + + // np: !p[01] + let isPredicatedFalse = np; + // tnt: Taken/Not Taken + let isBrTaken = !if (!eq(tnt, "t"), "true", "false"); + let isTaken = !if (!eq(tnt, "t"), 1, 0); + + let IClass = 0b0001; + let Inst{27-26} = 0b00; + // px: Predicate reg 0/1 + let Inst{25} = !if (!eq(px, "!p1"), 1, + !if (!eq(px, "p1"), 1, 0)); + let Inst{24-23} = !if (!eq(op, "eq"), 0b00, + !if (!eq(op, "gt"), 0b01, + !if (!eq(op, "gtu"), 0b10, 0))); + let Inst{22} = np; + let Inst{21-20} = r9_2{10-9}; + let Inst{19-16} = Rs; + let Inst{13} = !if (!eq(tnt, "t"), 1, 0); + let Inst{12-8} = U5; + let Inst{7-1} = r9_2{8-2}; +} +// P[10] taken/not taken. +multiclass T_tnt_CJInst_RU5<string op, bit np> { + let Defs = [PC, P0], Uses = [P0] in { + def NAME#p0_jump_nt : CJInst_RU5<"p0", op, np, "nt">; + def NAME#p0_jump_t : CJInst_RU5<"p0", op, np, "t">; + } + let Defs = [PC, P1], Uses = [P1] in { + def NAME#p1_jump_nt : CJInst_RU5<"p1", op, np, "nt">; + def NAME#p1_jump_t : CJInst_RU5<"p1", op, np, "t">; + } +} +// Predicate / !Predicate +multiclass T_pnp_CJInst_RU5<string op>{ + defm J4_cmp#NAME#i_t : T_tnt_CJInst_RU5<op, 0>; + defm J4_cmp#NAME#i_f : T_tnt_CJInst_RU5<op, 1>; +} +// TypeCJ Instructions compare RI and jump +defm eq : T_pnp_CJInst_RU5<"eq">; +defm gt : T_pnp_CJInst_RU5<"gt">; +defm gtu : T_pnp_CJInst_RU5<"gtu">; + +let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1, + isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, + isExtendable = 1, opExtentBits = 11, opExtentAlign = 2, opExtendable = 1, + isTerminator = 1 in +class CJInst_Rn1<string px, string op, bit np, string tnt> + : InstHexagon<(outs), (ins IntRegs:$Rs, brtarget:$r9_2), + ""#px#" = cmp."#op#"($Rs,#-1); if (" + #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2", + [], "", COMPOUND, TypeCOMPOUND> { + bits<4> Rs; + bits<11> r9_2; + + // np: !p[01] + let isPredicatedFalse = np; + // tnt: Taken/Not Taken + let isBrTaken = !if (!eq(tnt, "t"), "true", "false"); + let isTaken = !if (!eq(tnt, "t"), 1, 0); + + let IClass = 0b0001; + let Inst{27-26} = 0b00; + let Inst{25} = !if (!eq(px, "!p1"), 1, + !if (!eq(px, "p1"), 1, 0)); + + let Inst{24-23} = 0b11; + let Inst{22} = np; + let Inst{21-20} = r9_2{10-9}; + let Inst{19-16} = Rs; + let Inst{13} = !if (!eq(tnt, "t"), 1, 0); + let Inst{9-8} = !if (!eq(op, "eq"), 0b00, + !if (!eq(op, "gt"), 0b01, 0)); + let Inst{7-1} = r9_2{8-2}; +} -// Map from load(globaladdress + x) -> memw(#foo + x) -let AddedComplexity = 100 in -def : Pat<(i32 (load FoldGlobalAddrGP:$addr)), - (i32 (LDriw_abs_V4 FoldGlobalAddrGP:$addr))>, - Requires<[HasV4T]>; +// P[10] taken/not taken. +multiclass T_tnt_CJInst_Rn1<string op, bit np> { + let Defs = [PC, P0], Uses = [P0] in { + def NAME#p0_jump_nt : CJInst_Rn1<"p0", op, np, "nt">; + def NAME#p0_jump_t : CJInst_Rn1<"p0", op, np, "t">; + } + let Defs = [PC, P1], Uses = [P1] in { + def NAME#p1_jump_nt : CJInst_Rn1<"p1", op, np, "nt">; + def NAME#p1_jump_t : CJInst_Rn1<"p1", op, np, "t">; + } +} +// Predicate / !Predicate +multiclass T_pnp_CJInst_Rn1<string op>{ + defm J4_cmp#NAME#n1_t : T_tnt_CJInst_Rn1<op, 0>; + defm J4_cmp#NAME#n1_f : T_tnt_CJInst_Rn1<op, 1>; +} +// TypeCJ Instructions compare -1 and jump +defm eq : T_pnp_CJInst_Rn1<"eq">; +defm gt : T_pnp_CJInst_Rn1<"gt">; + +// J4_jumpseti: Direct unconditional jump and set register to immediate. +let Defs = [PC], isBranch = 1, hasSideEffects = 0, hasNewValue = 1, + isExtentSigned = 1, opNewValue = 0, isExtendable = 1, opExtentBits = 11, + opExtentAlign = 2, opExtendable = 2 in +def J4_jumpseti: CJInst < + (outs IntRegs:$Rd), + (ins u6Imm:$U6, brtarget:$r9_2), + "$Rd = #$U6 ; jump $r9_2"> { + bits<4> Rd; + bits<6> U6; + bits<11> r9_2; + + let IClass = 0b0001; + let Inst{27-24} = 0b0110; + let Inst{21-20} = r9_2{10-9}; + let Inst{19-16} = Rd; + let Inst{13-8} = U6; + let Inst{7-1} = r9_2{8-2}; + } -def : Pat<(atomic_load_32 FoldGlobalAddrGP:$addr), - (i32 (LDriw_abs_V4 FoldGlobalAddrGP:$addr))>, - Requires<[HasV4T]>; +// J4_jumpsetr: Direct unconditional jump and transfer register. +let Defs = [PC], isBranch = 1, hasSideEffects = 0, hasNewValue = 1, + isExtentSigned = 1, opNewValue = 0, isExtendable = 1, opExtentBits = 11, + opExtentAlign = 2, opExtendable = 2 in +def J4_jumpsetr: CJInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs, brtarget:$r9_2), + "$Rd = $Rs ; jump $r9_2"> { + bits<4> Rd; + bits<4> Rs; + bits<11> r9_2; + + let IClass = 0b0001; + let Inst{27-24} = 0b0111; + let Inst{21-20} = r9_2{10-9}; + let Inst{11-8} = Rd; + let Inst{19-16} = Rs; + let Inst{7-1} = r9_2{8-2}; + } diff --git a/lib/Target/Hexagon/HexagonInstrInfoV5.td b/lib/Target/Hexagon/HexagonInstrInfoV5.td index 9da6074..19b0935 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV5.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV5.td @@ -1,26 +1,94 @@ -def SDTHexagonFCONST32 : SDTypeProfile<1, 1, [ - SDTCisVT<0, f32>, - SDTCisPtrTy<1>]>; -def HexagonFCONST32 : SDNode<"HexagonISD::FCONST32", SDTHexagonFCONST32>; +//=- HexagonInstrInfoV5.td - Target Desc. for Hexagon Target -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon V5 instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// XTYPE/MPY +//===----------------------------------------------------------------------===// + + //Rdd[+]=vrmpybsu(Rss,Rtt) +let Predicates = [HasV5T] in { + def M5_vrmpybsu: T_XTYPE_Vect<"vrmpybsu", 0b110, 0b001, 0>; + def M5_vrmacbsu: T_XTYPE_Vect_acc<"vrmpybsu", 0b110, 0b001, 0>; + + //Rdd[+]=vrmpybu(Rss,Rtt) + def M5_vrmpybuu: T_XTYPE_Vect<"vrmpybu", 0b100, 0b001, 0>; + def M5_vrmacbuu: T_XTYPE_Vect_acc<"vrmpybu", 0b100, 0b001, 0>; + + def M5_vdmpybsu: T_M2_vmpy<"vdmpybsu", 0b101, 0b001, 0, 0, 1>; + def M5_vdmacbsu: T_M2_vmpy_acc_sat <"vdmpybsu", 0b001, 0b001, 0, 0>; +} + +// Vector multiply bytes +// Rdd=vmpyb[s]u(Rs,Rt) +let Predicates = [HasV5T] in { + def M5_vmpybsu: T_XTYPE_mpy64 <"vmpybsu", 0b010, 0b001, 0, 0, 0>; + def M5_vmpybuu: T_XTYPE_mpy64 <"vmpybu", 0b100, 0b001, 0, 0, 0>; + + // Rxx+=vmpyb[s]u(Rs,Rt) + def M5_vmacbsu: T_XTYPE_mpy64_acc <"vmpybsu", "+", 0b110, 0b001, 0, 0, 0>; + def M5_vmacbuu: T_XTYPE_mpy64_acc <"vmpybu", "+", 0b100, 0b001, 0, 0, 0>; + + // Rd=vaddhub(Rss,Rtt):sat + let hasNewValue = 1, opNewValue = 0 in + def A5_vaddhubs: T_S3op_1 <"vaddhub", IntRegs, 0b01, 0b001, 0, 1>; +} + +def S2_asr_i_p_rnd : S_2OpInstImm<"asr", 0b110, 0b111, u6Imm, + [(set I64:$dst, + (sra (i64 (add (i64 (sra I64:$src1, u6ImmPred:$src2)), 1)), + (i32 1)))], 1>, + Requires<[HasV5T]> { + bits<6> src2; + let Inst{13-8} = src2; +} + +let isAsmParserOnly = 1 in +def S2_asr_i_p_rnd_goodsyntax + : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2), + "$dst = asrrnd($src1, #$src2)">; + +def C4_fastcorner9 : T_LOGICAL_2OP<"fastcorner9", 0b000, 0, 0>, + Requires<[HasV5T]> { + let Inst{13,7,4} = 0b111; +} + +def C4_fastcorner9_not : T_LOGICAL_2OP<"!fastcorner9", 0b000, 0, 0>, + Requires<[HasV5T]> { + let Inst{20,13,7,4} = 0b1111; +} -let isReMaterializable = 1, isMoveImm = 1 in +def SDTHexagonFCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, + SDTCisPtrTy<1>]>; +def HexagonFCONST32 : SDNode<"HexagonISD::FCONST32", SDTHexagonFCONST32>; + +let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in def FCONST32_nsdata : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global), - "$dst = CONST32(#$global)", - [(set (f32 IntRegs:$dst), - (HexagonFCONST32 tglobaladdr:$global))]>, - Requires<[HasV5T]>; + "$dst = CONST32(#$global)", + [(set F32:$dst, + (HexagonFCONST32 tglobaladdr:$global))]>, + Requires<[HasV5T]>; -let isReMaterializable = 1, isMoveImm = 1 in +let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in def CONST64_Float_Real : LDInst<(outs DoubleRegs:$dst), (ins f64imm:$src1), - "$dst = CONST64(#$src1)", - [(set DoubleRegs:$dst, fpimm:$src1)]>, - Requires<[HasV5T]>; + "$dst = CONST64(#$src1)", + [(set F64:$dst, fpimm:$src1)]>, + Requires<[HasV5T]>; -let isReMaterializable = 1, isMoveImm = 1 in +let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in def CONST32_Float_Real : LDInst<(outs IntRegs:$dst), (ins f32imm:$src1), - "$dst = CONST32(#$src1)", - [(set IntRegs:$dst, fpimm:$src1)]>, - Requires<[HasV5T]>; + "$dst = CONST32(#$src1)", + [(set F32:$dst, fpimm:$src1)]>, + Requires<[HasV5T]>; // Transfer immediate float. // Only works with single precision fp value. @@ -29,605 +97,841 @@ def CONST32_Float_Real : LDInst<(outs IntRegs:$dst), (ins f32imm:$src1), // Make sure that complexity is more than the CONST32 pattern in // HexagonInstrInfo.td patterns. let isExtended = 1, opExtendable = 1, isMoveImm = 1, isReMaterializable = 1, -isPredicable = 1, AddedComplexity = 30, validSubTargets = HasV5SubT, -isCodeGenOnly = 1 in + isPredicable = 1, AddedComplexity = 30, validSubTargets = HasV5SubT, + isCodeGenOnly = 1 in def TFRI_f : ALU32_ri<(outs IntRegs:$dst), (ins f32Ext:$src1), - "$dst = #$src1", - [(set IntRegs:$dst, fpimm:$src1)]>, - Requires<[HasV5T]>; + "$dst = #$src1", + [(set F32:$dst, fpimm:$src1)]>, + Requires<[HasV5T]>; let isExtended = 1, opExtendable = 2, isPredicated = 1, -neverHasSideEffects = 1, validSubTargets = HasV5SubT in + hasSideEffects = 0, validSubTargets = HasV5SubT, isCodeGenOnly = 1 in def TFRI_cPt_f : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, f32Ext:$src2), - "if ($src1) $dst = #$src2", - []>, - Requires<[HasV5T]>; + "if ($src1) $dst = #$src2", []>, + Requires<[HasV5T]>; -let isExtended = 1, opExtendable = 2, isPredicated = 1, isPredicatedFalse = 1, -neverHasSideEffects = 1, validSubTargets = HasV5SubT in +let isPseudo = 1, isExtended = 1, opExtendable = 2, isPredicated = 1, + isPredicatedFalse = 1, hasSideEffects = 0, validSubTargets = HasV5SubT in def TFRI_cNotPt_f : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, f32Ext:$src2), - "if (!$src1) $dst =#$src2", - []>, - Requires<[HasV5T]>; + "if (!$src1) $dst = #$src2", []>, + Requires<[HasV5T]>; + +def SDTHexagonI32I64: SDTypeProfile<1, 1, [SDTCisVT<0, i32>, + SDTCisVT<1, i64>]>; + +def HexagonPOPCOUNT: SDNode<"HexagonISD::POPCOUNT", SDTHexagonI32I64>; + +let hasNewValue = 1, validSubTargets = HasV5SubT in +def S5_popcountp : ALU64_rr<(outs IntRegs:$Rd), (ins DoubleRegs:$Rss), + "$Rd = popcount($Rss)", + [(set I32:$Rd, (HexagonPOPCOUNT I64:$Rss))], "", S_2op_tc_2_SLOT23>, + Requires<[HasV5T]> { + bits<5> Rd; + bits<5> Rss; + + let IClass = 0b1000; + + let Inst{27-21} = 0b1000011; + let Inst{7-5} = 0b011; + let Inst{4-0} = Rd; + let Inst{20-16} = Rss; + } + +defm: Loadx_pat<load, f32, s11_2ExtPred, L2_loadri_io>; +defm: Loadx_pat<load, f64, s11_3ExtPred, L2_loadrd_io>; + +defm: Storex_pat<store, F32, s11_2ExtPred, S2_storeri_io>; +defm: Storex_pat<store, F64, s11_3ExtPred, S2_storerd_io>; +def: Storex_simple_pat<store, F32, S2_storeri_io>; +def: Storex_simple_pat<store, F64, S2_storerd_io>; + +let isFP = 1, hasNewValue = 1, opNewValue = 0 in +class T_MInstFloat <string mnemonic, bits<3> MajOp, bits<3> MinOp> + : MInst<(outs IntRegs:$Rd), + (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rd = "#mnemonic#"($Rs, $Rt)", [], + "" , M_tc_3or4x_SLOT23 > , + Requires<[HasV5T]> { + bits<5> Rd; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1110; + + let Inst{27-24} = 0b1011; + let Inst{23-21} = MajOp; + let Inst{20-16} = Rs; + let Inst{13} = 0b0; + let Inst{12-8} = Rt; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rd; + } + +let isCommutable = 1 in { + def F2_sfadd : T_MInstFloat < "sfadd", 0b000, 0b000>; + def F2_sfmpy : T_MInstFloat < "sfmpy", 0b010, 0b000>; +} -// Convert single precision to double precision and vice-versa. -def CONVERT_sf2df : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src), - "$dst = convert_sf2df($src)", - [(set DoubleRegs:$dst, (fextend IntRegs:$src))]>, - Requires<[HasV5T]>; +def F2_sfsub : T_MInstFloat < "sfsub", 0b000, 0b001>; -def CONVERT_df2sf : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src), - "$dst = convert_df2sf($src)", - [(set IntRegs:$dst, (fround DoubleRegs:$src))]>, - Requires<[HasV5T]>; +def: Pat<(f32 (fadd F32:$src1, F32:$src2)), + (F2_sfadd F32:$src1, F32:$src2)>; +def: Pat<(f32 (fsub F32:$src1, F32:$src2)), + (F2_sfsub F32:$src1, F32:$src2)>; -// Load. -def LDrid_f : LDInst<(outs DoubleRegs:$dst), - (ins MEMri:$addr), - "$dst = memd($addr)", - [(set DoubleRegs:$dst, (f64 (load ADDRriS11_3:$addr)))]>, - Requires<[HasV5T]>; +def: Pat<(f32 (fmul F32:$src1, F32:$src2)), + (F2_sfmpy F32:$src1, F32:$src2)>; +let Itinerary = M_tc_3x_SLOT23 in { + def F2_sfmax : T_MInstFloat < "sfmax", 0b100, 0b000>; + def F2_sfmin : T_MInstFloat < "sfmin", 0b100, 0b001>; +} -let AddedComplexity = 20 in -def LDrid_indexed_f : LDInst<(outs DoubleRegs:$dst), - (ins IntRegs:$src1, s11_3Imm:$offset), - "$dst = memd($src1+#$offset)", - [(set DoubleRegs:$dst, (f64 (load (add IntRegs:$src1, - s11_3ImmPred:$offset))))]>, - Requires<[HasV5T]>; +let AddedComplexity = 100, Predicates = [HasV5T] in { + def: Pat<(f32 (select (i1 (setolt F32:$src1, F32:$src2)), + F32:$src1, F32:$src2)), + (F2_sfmin F32:$src1, F32:$src2)>; -def LDriw_f : LDInst<(outs IntRegs:$dst), - (ins MEMri:$addr), "$dst = memw($addr)", - [(set IntRegs:$dst, (f32 (load ADDRriS11_2:$addr)))]>, - Requires<[HasV5T]>; + def: Pat<(f32 (select (i1 (setogt F32:$src1, F32:$src2)), + F32:$src2, F32:$src1)), + (F2_sfmin F32:$src1, F32:$src2)>; + def: Pat<(f32 (select (i1 (setogt F32:$src1, F32:$src2)), + F32:$src1, F32:$src2)), + (F2_sfmax F32:$src1, F32:$src2)>; -let AddedComplexity = 20 in -def LDriw_indexed_f : LDInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, s11_2Imm:$offset), - "$dst = memw($src1+#$offset)", - [(set IntRegs:$dst, (f32 (load (add IntRegs:$src1, - s11_2ImmPred:$offset))))]>, - Requires<[HasV5T]>; + def: Pat<(f32 (select (i1 (setolt F32:$src1, F32:$src2)), + F32:$src2, F32:$src1)), + (F2_sfmax F32:$src1, F32:$src2)>; +} -// Store. -def STriw_f : STInst<(outs), - (ins MEMri:$addr, IntRegs:$src1), - "memw($addr) = $src1", - [(store (f32 IntRegs:$src1), ADDRriS11_2:$addr)]>, - Requires<[HasV5T]>; +def F2_sffixupn : T_MInstFloat < "sffixupn", 0b110, 0b000>; +def F2_sffixupd : T_MInstFloat < "sffixupd", 0b110, 0b001>; + +// F2_sfrecipa: Reciprocal approximation for division. +let isPredicateLate = 1, isFP = 1, +hasSideEffects = 0, hasNewValue = 1 in +def F2_sfrecipa: MInst < + (outs IntRegs:$Rd, PredRegs:$Pe), + (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rd, $Pe = sfrecipa($Rs, $Rt)">, + Requires<[HasV5T]> { + bits<5> Rd; + bits<2> Pe; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1110; + let Inst{27-21} = 0b1011111; + let Inst{20-16} = Rs; + let Inst{13} = 0b0; + let Inst{12-8} = Rt; + let Inst{7} = 0b1; + let Inst{6-5} = Pe; + let Inst{4-0} = Rd; + } + +// F2_dfcmpeq: Floating point compare for equal. +let isCompare = 1, isFP = 1 in +class T_fcmp <string mnemonic, RegisterClass RC, bits<3> MinOp, + list<dag> pattern = [] > + : ALU64Inst <(outs PredRegs:$dst), (ins RC:$src1, RC:$src2), + "$dst = "#mnemonic#"($src1, $src2)", pattern, + "" , ALU64_tc_2early_SLOT23 > , + Requires<[HasV5T]> { + bits<2> dst; + bits<5> src1; + bits<5> src2; + + let IClass = 0b1101; + + let Inst{27-21} = 0b0010111; + let Inst{20-16} = src1; + let Inst{12-8} = src2; + let Inst{7-5} = MinOp; + let Inst{1-0} = dst; + } + +class T_fcmp64 <string mnemonic, PatFrag OpNode, bits<3> MinOp> + : T_fcmp <mnemonic, DoubleRegs, MinOp, + [(set I1:$dst, (OpNode F64:$src1, F64:$src2))]> { + let IClass = 0b1101; + let Inst{27-21} = 0b0010111; +} -let AddedComplexity = 10 in -def STriw_indexed_f : STInst<(outs), - (ins IntRegs:$src1, s11_2Imm:$src2, IntRegs:$src3), - "memw($src1+#$src2) = $src3", - [(store (f32 IntRegs:$src3), - (add IntRegs:$src1, s11_2ImmPred:$src2))]>, - Requires<[HasV5T]>; +class T_fcmp32 <string mnemonic, PatFrag OpNode, bits<3> MinOp> + : T_fcmp <mnemonic, IntRegs, MinOp, + [(set I1:$dst, (OpNode F32:$src1, F32:$src2))]> { + let IClass = 0b1100; + let Inst{27-21} = 0b0111111; +} -def STrid_f : STInst<(outs), - (ins MEMri:$addr, DoubleRegs:$src1), - "memd($addr) = $src1", - [(store (f64 DoubleRegs:$src1), ADDRriS11_2:$addr)]>, - Requires<[HasV5T]>; +def F2_dfcmpeq : T_fcmp64<"dfcmp.eq", setoeq, 0b000>; +def F2_dfcmpgt : T_fcmp64<"dfcmp.gt", setogt, 0b001>; +def F2_dfcmpge : T_fcmp64<"dfcmp.ge", setoge, 0b010>; +def F2_dfcmpuo : T_fcmp64<"dfcmp.uo", setuo, 0b011>; + +def F2_sfcmpge : T_fcmp32<"sfcmp.ge", setoge, 0b000>; +def F2_sfcmpuo : T_fcmp32<"sfcmp.uo", setuo, 0b001>; +def F2_sfcmpeq : T_fcmp32<"sfcmp.eq", setoeq, 0b011>; +def F2_sfcmpgt : T_fcmp32<"sfcmp.gt", setogt, 0b100>; + +//===----------------------------------------------------------------------===// +// Multiclass to define 'Def Pats' for ordered gt, ge, eq operations. +//===----------------------------------------------------------------------===// + +let Predicates = [HasV5T] in +multiclass T_fcmp_pats<PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> { + // IntRegs + def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)), + (IntMI F32:$src1, F32:$src2)>; + // DoubleRegs + def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)), + (DoubleMI F64:$src1, F64:$src2)>; +} -// Indexed store double word. -let AddedComplexity = 10 in -def STrid_indexed_f : STInst<(outs), - (ins IntRegs:$src1, s11_3Imm:$src2, DoubleRegs:$src3), - "memd($src1+#$src2) = $src3", - [(store (f64 DoubleRegs:$src3), - (add IntRegs:$src1, s11_3ImmPred:$src2))]>, - Requires<[HasV5T]>; +defm : T_fcmp_pats <seteq, F2_sfcmpeq, F2_dfcmpeq>; +defm : T_fcmp_pats <setgt, F2_sfcmpgt, F2_dfcmpgt>; +defm : T_fcmp_pats <setge, F2_sfcmpge, F2_dfcmpge>; + +//===----------------------------------------------------------------------===// +// Multiclass to define 'Def Pats' for unordered gt, ge, eq operations. +//===----------------------------------------------------------------------===// +let Predicates = [HasV5T] in +multiclass unord_Pats <PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> { + // IntRegs + def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), + (IntMI F32:$src1, F32:$src2))>; + + // DoubleRegs + def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), + (DoubleMI F64:$src1, F64:$src2))>; +} +defm : unord_Pats <setuge, F2_sfcmpge, F2_dfcmpge>; +defm : unord_Pats <setugt, F2_sfcmpgt, F2_dfcmpgt>; +defm : unord_Pats <setueq, F2_sfcmpeq, F2_dfcmpeq>; + +//===----------------------------------------------------------------------===// +// Multiclass to define 'Def Pats' for the following dags: +// seteq(setoeq(op1, op2), 0) -> not(setoeq(op1, op2)) +// seteq(setoeq(op1, op2), 1) -> setoeq(op1, op2) +// setne(setoeq(op1, op2), 0) -> setoeq(op1, op2) +// setne(setoeq(op1, op2), 1) -> not(setoeq(op1, op2)) +//===----------------------------------------------------------------------===// +let Predicates = [HasV5T] in +multiclass eq_ordgePats <PatFrag cmpOp, InstHexagon IntMI, + InstHexagon DoubleMI> { + // IntRegs + def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)), + (C2_not (IntMI F32:$src1, F32:$src2))>; + def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)), + (IntMI F32:$src1, F32:$src2)>; + def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)), + (IntMI F32:$src1, F32:$src2)>; + def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)), + (C2_not (IntMI F32:$src1, F32:$src2))>; + + // DoubleRegs + def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)), + (C2_not (DoubleMI F64:$src1, F64:$src2))>; + def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)), + (DoubleMI F64:$src1, F64:$src2)>; + def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)), + (DoubleMI F64:$src1, F64:$src2)>; + def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 1)), + (C2_not (DoubleMI F64:$src1, F64:$src2))>; +} -// Add -let isCommutable = 1 in -def fADD_rr : ALU64_rr<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = sfadd($src1, $src2)", - [(set IntRegs:$dst, (fadd IntRegs:$src1, IntRegs:$src2))]>, - Requires<[HasV5T]>; +defm : eq_ordgePats<setoeq, F2_sfcmpeq, F2_dfcmpeq>; +defm : eq_ordgePats<setoge, F2_sfcmpge, F2_dfcmpge>; +defm : eq_ordgePats<setogt, F2_sfcmpgt, F2_dfcmpgt>; + +//===----------------------------------------------------------------------===// +// Multiclass to define 'Def Pats' for the following dags: +// seteq(setolt(op1, op2), 0) -> not(setogt(op2, op1)) +// seteq(setolt(op1, op2), 1) -> setogt(op2, op1) +// setne(setolt(op1, op2), 0) -> setogt(op2, op1) +// setne(setolt(op1, op2), 1) -> not(setogt(op2, op1)) +//===----------------------------------------------------------------------===// +let Predicates = [HasV5T] in +multiclass eq_ordltPats <PatFrag cmpOp, InstHexagon IntMI, + InstHexagon DoubleMI> { + // IntRegs + def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)), + (C2_not (IntMI F32:$src2, F32:$src1))>; + def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)), + (IntMI F32:$src2, F32:$src1)>; + def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)), + (IntMI F32:$src2, F32:$src1)>; + def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)), + (C2_not (IntMI F32:$src2, F32:$src1))>; + + // DoubleRegs + def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)), + (C2_not (DoubleMI F64:$src2, F64:$src1))>; + def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)), + (DoubleMI F64:$src2, F64:$src1)>; + def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)), + (DoubleMI F64:$src2, F64:$src1)>; + def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)), + (C2_not (DoubleMI F64:$src2, F64:$src1))>; +} -let isCommutable = 1 in -def fADD64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, - DoubleRegs:$src2), - "$dst = dfadd($src1, $src2)", - [(set DoubleRegs:$dst, (fadd DoubleRegs:$src1, - DoubleRegs:$src2))]>, - Requires<[HasV5T]>; +defm : eq_ordltPats<setole, F2_sfcmpge, F2_dfcmpge>; +defm : eq_ordltPats<setolt, F2_sfcmpgt, F2_dfcmpgt>; -def fSUB_rr : ALU64_rr<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = sfsub($src1, $src2)", - [(set IntRegs:$dst, (fsub IntRegs:$src1, IntRegs:$src2))]>, - Requires<[HasV5T]>; -def fSUB64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, - DoubleRegs:$src2), - "$dst = dfsub($src1, $src2)", - [(set DoubleRegs:$dst, (fsub DoubleRegs:$src1, - DoubleRegs:$src2))]>, - Requires<[HasV5T]>; - -let isCommutable = 1 in -def fMUL_rr : ALU64_rr<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = sfmpy($src1, $src2)", - [(set IntRegs:$dst, (fmul IntRegs:$src1, IntRegs:$src2))]>, - Requires<[HasV5T]>; - -let isCommutable = 1 in -def fMUL64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, - DoubleRegs:$src2), - "$dst = dfmpy($src1, $src2)", - [(set DoubleRegs:$dst, (fmul DoubleRegs:$src1, - DoubleRegs:$src2))]>, - Requires<[HasV5T]>; - -// Compare. -let isCompare = 1 in { -multiclass FCMP64_rr<string OpcStr, PatFrag OpNode> { - def _rr : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$b, DoubleRegs:$c), - !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), - [(set PredRegs:$dst, - (OpNode (f64 DoubleRegs:$b), (f64 DoubleRegs:$c)))]>, - Requires<[HasV5T]>; +// o. seto inverse of setuo. http://llvm.org/docs/LangRef.html#i_fcmp +let Predicates = [HasV5T] in { + def: Pat<(i1 (seto F32:$src1, F32:$src2)), + (C2_not (F2_sfcmpuo F32:$src2, F32:$src1))>; + def: Pat<(i1 (seto F32:$src1, fpimm:$src2)), + (C2_not (F2_sfcmpuo (TFRI_f fpimm:$src2), F32:$src1))>; + def: Pat<(i1 (seto F64:$src1, F64:$src2)), + (C2_not (F2_dfcmpuo F64:$src2, F64:$src1))>; + def: Pat<(i1 (seto F64:$src1, fpimm:$src2)), + (C2_not (F2_dfcmpuo (CONST64_Float_Real fpimm:$src2), F64:$src1))>; +} + +// Ordered lt. +let Predicates = [HasV5T] in { + def: Pat<(i1 (setolt F32:$src1, F32:$src2)), + (F2_sfcmpgt F32:$src2, F32:$src1)>; + def: Pat<(i1 (setolt F32:$src1, fpimm:$src2)), + (F2_sfcmpgt (f32 (TFRI_f fpimm:$src2)), F32:$src1)>; + def: Pat<(i1 (setolt F64:$src1, F64:$src2)), + (F2_dfcmpgt F64:$src2, F64:$src1)>; + def: Pat<(i1 (setolt F64:$src1, fpimm:$src2)), + (F2_dfcmpgt (CONST64_Float_Real fpimm:$src2), F64:$src1)>; } -multiclass FCMP32_rr<string OpcStr, PatFrag OpNode> { - def _rr : ALU64_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c), - !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), - [(set PredRegs:$dst, - (OpNode (f32 IntRegs:$b), (f32 IntRegs:$c)))]>, - Requires<[HasV5T]>; +// Unordered lt. +let Predicates = [HasV5T] in { + def: Pat<(i1 (setult F32:$src1, F32:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), + (F2_sfcmpgt F32:$src2, F32:$src1))>; + def: Pat<(i1 (setult F32:$src1, fpimm:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, (TFRI_f fpimm:$src2)), + (F2_sfcmpgt (TFRI_f fpimm:$src2), F32:$src1))>; + def: Pat<(i1 (setult F64:$src1, F64:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), + (F2_dfcmpgt F64:$src2, F64:$src1))>; + def: Pat<(i1 (setult F64:$src1, fpimm:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, (CONST64_Float_Real fpimm:$src2)), + (F2_dfcmpgt (CONST64_Float_Real fpimm:$src2), F64:$src1))>; } + +// Ordered le. +let Predicates = [HasV5T] in { + // rs <= rt -> rt >= rs. + def: Pat<(i1 (setole F32:$src1, F32:$src2)), + (F2_sfcmpge F32:$src2, F32:$src1)>; + def: Pat<(i1 (setole F32:$src1, fpimm:$src2)), + (F2_sfcmpge (TFRI_f fpimm:$src2), F32:$src1)>; + + // Rss <= Rtt -> Rtt >= Rss. + def: Pat<(i1 (setole F64:$src1, F64:$src2)), + (F2_dfcmpge F64:$src2, F64:$src1)>; + def: Pat<(i1 (setole F64:$src1, fpimm:$src2)), + (F2_dfcmpge (CONST64_Float_Real fpimm:$src2), F64:$src1)>; } -defm FCMPOEQ64 : FCMP64_rr<"dfcmp.eq", setoeq>; -defm FCMPUEQ64 : FCMP64_rr<"dfcmp.eq", setueq>; -defm FCMPOGT64 : FCMP64_rr<"dfcmp.gt", setogt>; -defm FCMPUGT64 : FCMP64_rr<"dfcmp.gt", setugt>; -defm FCMPOGE64 : FCMP64_rr<"dfcmp.ge", setoge>; -defm FCMPUGE64 : FCMP64_rr<"dfcmp.ge", setuge>; - -defm FCMPOEQ32 : FCMP32_rr<"sfcmp.eq", setoeq>; -defm FCMPUEQ32 : FCMP32_rr<"sfcmp.eq", setueq>; -defm FCMPOGT32 : FCMP32_rr<"sfcmp.gt", setogt>; -defm FCMPUGT32 : FCMP32_rr<"sfcmp.gt", setugt>; -defm FCMPOGE32 : FCMP32_rr<"sfcmp.ge", setoge>; -defm FCMPUGE32 : FCMP32_rr<"sfcmp.ge", setuge>; - -// olt. -def : Pat <(i1 (setolt (f32 IntRegs:$src1), (f32 IntRegs:$src2))), - (i1 (FCMPOGT32_rr IntRegs:$src2, IntRegs:$src1))>, - Requires<[HasV5T]>; - -def : Pat <(i1 (setolt (f32 IntRegs:$src1), (fpimm:$src2))), - (i1 (FCMPOGT32_rr (f32 (TFRI_f fpimm:$src2)), (f32 IntRegs:$src1)))>, - Requires<[HasV5T]>; - -def : Pat <(i1 (setolt (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))), - (i1 (FCMPOGT64_rr DoubleRegs:$src2, DoubleRegs:$src1))>, - Requires<[HasV5T]>; - -def : Pat <(i1 (setolt (f64 DoubleRegs:$src1), (fpimm:$src2))), - (i1 (FCMPOGT64_rr (f64 (CONST64_Float_Real fpimm:$src2)), - (f64 DoubleRegs:$src1)))>, - Requires<[HasV5T]>; - -// gt. -def : Pat <(i1 (setugt (f64 DoubleRegs:$src1), (fpimm:$src2))), - (i1 (FCMPUGT64_rr (f64 DoubleRegs:$src1), - (f64 (CONST64_Float_Real fpimm:$src2))))>, - Requires<[HasV5T]>; - -def : Pat <(i1 (setugt (f32 IntRegs:$src1), (fpimm:$src2))), - (i1 (FCMPUGT32_rr (f32 IntRegs:$src1), (f32 (TFRI_f fpimm:$src2))))>, - Requires<[HasV5T]>; - -// ult. -def : Pat <(i1 (setult (f32 IntRegs:$src1), (f32 IntRegs:$src2))), - (i1 (FCMPUGT32_rr IntRegs:$src2, IntRegs:$src1))>, - Requires<[HasV5T]>; - -def : Pat <(i1 (setult (f32 IntRegs:$src1), (fpimm:$src2))), - (i1 (FCMPUGT32_rr (f32 (TFRI_f fpimm:$src2)), (f32 IntRegs:$src1)))>, - Requires<[HasV5T]>; - -def : Pat <(i1 (setult (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))), - (i1 (FCMPUGT64_rr DoubleRegs:$src2, DoubleRegs:$src1))>, - Requires<[HasV5T]>; - -def : Pat <(i1 (setult (f64 DoubleRegs:$src1), (fpimm:$src2))), - (i1 (FCMPUGT64_rr (f64 (CONST64_Float_Real fpimm:$src2)), - (f64 DoubleRegs:$src1)))>, - Requires<[HasV5T]>; - -// le. +// Unordered le. +let Predicates = [HasV5T] in { // rs <= rt -> rt >= rs. -def : Pat<(i1 (setole (f32 IntRegs:$src1), (f32 IntRegs:$src2))), - (i1 (FCMPOGE32_rr IntRegs:$src2, IntRegs:$src1))>, - Requires<[HasV5T]>; + def: Pat<(i1 (setule F32:$src1, F32:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), + (F2_sfcmpge F32:$src2, F32:$src1))>; + def: Pat<(i1 (setule F32:$src1, fpimm:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, (TFRI_f fpimm:$src2)), + (F2_sfcmpge (TFRI_f fpimm:$src2), F32:$src1))>; + def: Pat<(i1 (setule F64:$src1, F64:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), + (F2_dfcmpge F64:$src2, F64:$src1))>; + def: Pat<(i1 (setule F64:$src1, fpimm:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, (CONST64_Float_Real fpimm:$src2)), + (F2_dfcmpge (CONST64_Float_Real fpimm:$src2), F64:$src1))>; +} + +// Ordered ne. +let Predicates = [HasV5T] in { + def: Pat<(i1 (setone F32:$src1, F32:$src2)), + (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>; + def: Pat<(i1 (setone F64:$src1, F64:$src2)), + (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>; + def: Pat<(i1 (setone F32:$src1, fpimm:$src2)), + (C2_not (F2_sfcmpeq F32:$src1, (TFRI_f fpimm:$src2)))>; + def: Pat<(i1 (setone F64:$src1, fpimm:$src2)), + (C2_not (F2_dfcmpeq F64:$src1, (CONST64_Float_Real fpimm:$src2)))>; +} -def : Pat<(i1 (setole (f32 IntRegs:$src1), (fpimm:$src2))), - (i1 (FCMPOGE32_rr (f32 (TFRI_f fpimm:$src2)), IntRegs:$src1))>, - Requires<[HasV5T]>; +// Unordered ne. +let Predicates = [HasV5T] in { + def: Pat<(i1 (setune F32:$src1, F32:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), + (C2_not (F2_sfcmpeq F32:$src1, F32:$src2)))>; + def: Pat<(i1 (setune F64:$src1, F64:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), + (C2_not (F2_dfcmpeq F64:$src1, F64:$src2)))>; + def: Pat<(i1 (setune F32:$src1, fpimm:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, (TFRI_f fpimm:$src2)), + (C2_not (F2_sfcmpeq F32:$src1, (TFRI_f fpimm:$src2))))>; + def: Pat<(i1 (setune F64:$src1, fpimm:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, (CONST64_Float_Real fpimm:$src2)), + (C2_not (F2_dfcmpeq F64:$src1, + (CONST64_Float_Real fpimm:$src2))))>; +} +// Besides set[o|u][comparions], we also need set[comparisons]. +let Predicates = [HasV5T] in { + // lt. + def: Pat<(i1 (setlt F32:$src1, F32:$src2)), + (F2_sfcmpgt F32:$src2, F32:$src1)>; + def: Pat<(i1 (setlt F32:$src1, fpimm:$src2)), + (F2_sfcmpgt (TFRI_f fpimm:$src2), F32:$src1)>; + def: Pat<(i1 (setlt F64:$src1, F64:$src2)), + (F2_dfcmpgt F64:$src2, F64:$src1)>; + def: Pat<(i1 (setlt F64:$src1, fpimm:$src2)), + (F2_dfcmpgt (CONST64_Float_Real fpimm:$src2), F64:$src1)>; + + // le. + // rs <= rt -> rt >= rs. + def: Pat<(i1 (setle F32:$src1, F32:$src2)), + (F2_sfcmpge F32:$src2, F32:$src1)>; + def: Pat<(i1 (setle F32:$src1, fpimm:$src2)), + (F2_sfcmpge (TFRI_f fpimm:$src2), F32:$src1)>; + + // Rss <= Rtt -> Rtt >= Rss. + def: Pat<(i1 (setle F64:$src1, F64:$src2)), + (F2_dfcmpge F64:$src2, F64:$src1)>; + def: Pat<(i1 (setle F64:$src1, fpimm:$src2)), + (F2_dfcmpge (CONST64_Float_Real fpimm:$src2), F64:$src1)>; + + // ne. + def: Pat<(i1 (setne F32:$src1, F32:$src2)), + (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>; + def: Pat<(i1 (setne F64:$src1, F64:$src2)), + (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>; + def: Pat<(i1 (setne F32:$src1, fpimm:$src2)), + (C2_not (F2_sfcmpeq F32:$src1, (TFRI_f fpimm:$src2)))>; + def: Pat<(i1 (setne F64:$src1, fpimm:$src2)), + (C2_not (F2_dfcmpeq F64:$src1, (CONST64_Float_Real fpimm:$src2)))>; +} -// Rss <= Rtt -> Rtt >= Rss. -def : Pat<(i1 (setole (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))), - (i1 (FCMPOGE64_rr DoubleRegs:$src2, DoubleRegs:$src1))>, - Requires<[HasV5T]>; +// F2 convert template classes: +let isFP = 1 in +class F2_RDD_RSS_CONVERT<string mnemonic, bits<3> MinOp, + SDNode Op, PatLeaf RCOut, PatLeaf RCIn, + string chop =""> + : SInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss), + "$Rdd = "#mnemonic#"($Rss)"#chop, + [(set RCOut:$Rdd, (Op RCIn:$Rss))], "", + S_2op_tc_3or4x_SLOT23> { + bits<5> Rdd; + bits<5> Rss; + + let IClass = 0b1000; + + let Inst{27-21} = 0b0000111; + let Inst{20-16} = Rss; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rdd; + } + +let isFP = 1 in +class F2_RDD_RS_CONVERT<string mnemonic, bits<3> MinOp, + SDNode Op, PatLeaf RCOut, PatLeaf RCIn, + string chop =""> + : SInst <(outs DoubleRegs:$Rdd), (ins IntRegs:$Rs), + "$Rdd = "#mnemonic#"($Rs)"#chop, + [(set RCOut:$Rdd, (Op RCIn:$Rs))], "", + S_2op_tc_3or4x_SLOT23> { + bits<5> Rdd; + bits<5> Rs; + + let IClass = 0b1000; + + let Inst{27-21} = 0b0100100; + let Inst{20-16} = Rs; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rdd; + } + +let isFP = 1, hasNewValue = 1 in +class F2_RD_RSS_CONVERT<string mnemonic, bits<3> MinOp, + SDNode Op, PatLeaf RCOut, PatLeaf RCIn, + string chop =""> + : SInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss), + "$Rd = "#mnemonic#"($Rss)"#chop, + [(set RCOut:$Rd, (Op RCIn:$Rss))], "", + S_2op_tc_3or4x_SLOT23> { + bits<5> Rd; + bits<5> Rss; + + let IClass = 0b1000; + + let Inst{27-24} = 0b1000; + let Inst{23-21} = MinOp; + let Inst{20-16} = Rss; + let Inst{7-5} = 0b001; + let Inst{4-0} = Rd; + } + +let isFP = 1, hasNewValue = 1 in +class F2_RD_RS_CONVERT<string mnemonic, bits<3> MajOp, bits<3> MinOp, + SDNode Op, PatLeaf RCOut, PatLeaf RCIn, + string chop =""> + : SInst <(outs IntRegs:$Rd), (ins IntRegs:$Rs), + "$Rd = "#mnemonic#"($Rs)"#chop, + [(set RCOut:$Rd, (Op RCIn:$Rs))], "", + S_2op_tc_3or4x_SLOT23> { + bits<5> Rd; + bits<5> Rs; + + let IClass = 0b1000; + + let Inst{27-24} = 0b1011; + let Inst{23-21} = MajOp; + let Inst{20-16} = Rs; + let Inst{7-5} = MinOp; + let Inst{4-0} = Rd; + } -def : Pat<(i1 (setole (f64 DoubleRegs:$src1), (fpimm:$src2))), - (i1 (FCMPOGE64_rr (f64 (CONST64_Float_Real fpimm:$src2)), - DoubleRegs:$src1))>, - Requires<[HasV5T]>; +// Convert single precision to double precision and vice-versa. +def F2_conv_sf2df : F2_RDD_RS_CONVERT <"convert_sf2df", 0b000, + fextend, F64, F32>; -// rs <= rt -> rt >= rs. -def : Pat<(i1 (setule (f32 IntRegs:$src1), (f32 IntRegs:$src2))), - (i1 (FCMPUGE32_rr IntRegs:$src2, IntRegs:$src1))>, - Requires<[HasV5T]>; - -def : Pat<(i1 (setule (f32 IntRegs:$src1), (fpimm:$src2))), - (i1 (FCMPUGE32_rr (f32 (TFRI_f fpimm:$src2)), IntRegs:$src1))>, - Requires<[HasV5T]>; - -// Rss <= Rtt -> Rtt >= Rss. -def : Pat<(i1 (setule (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))), - (i1 (FCMPUGE64_rr DoubleRegs:$src2, DoubleRegs:$src1))>, - Requires<[HasV5T]>; - -def : Pat<(i1 (setule (f64 DoubleRegs:$src1), (fpimm:$src2))), - (i1 (FCMPUGE64_rr (f64 (CONST64_Float_Real fpimm:$src2)), - DoubleRegs:$src1))>, - Requires<[HasV5T]>; - -// ne. -def : Pat<(i1 (setone (f32 IntRegs:$src1), (f32 IntRegs:$src2))), - (i1 (NOT_p (FCMPOEQ32_rr IntRegs:$src1, IntRegs:$src2)))>, - Requires<[HasV5T]>; - -def : Pat<(i1 (setone (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))), - (i1 (NOT_p (FCMPOEQ64_rr DoubleRegs:$src1, DoubleRegs:$src2)))>, - Requires<[HasV5T]>; - -def : Pat<(i1 (setune (f32 IntRegs:$src1), (f32 IntRegs:$src2))), - (i1 (NOT_p (FCMPUEQ32_rr IntRegs:$src1, IntRegs:$src2)))>, - Requires<[HasV5T]>; - -def : Pat<(i1 (setune (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))), - (i1 (NOT_p (FCMPUEQ64_rr DoubleRegs:$src1, DoubleRegs:$src2)))>, - Requires<[HasV5T]>; - -def : Pat<(i1 (setone (f32 IntRegs:$src1), (fpimm:$src2))), - (i1 (NOT_p (FCMPOEQ32_rr IntRegs:$src1, (f32 (TFRI_f fpimm:$src2)))))>, - Requires<[HasV5T]>; - -def : Pat<(i1 (setone (f64 DoubleRegs:$src1), (fpimm:$src2))), - (i1 (NOT_p (FCMPOEQ64_rr DoubleRegs:$src1, - (f64 (CONST64_Float_Real fpimm:$src2)))))>, - Requires<[HasV5T]>; - -def : Pat<(i1 (setune (f32 IntRegs:$src1), (fpimm:$src2))), - (i1 (NOT_p (FCMPUEQ32_rr IntRegs:$src1, (f32 (TFRI_f fpimm:$src2)))))>, - Requires<[HasV5T]>; - -def : Pat<(i1 (setune (f64 DoubleRegs:$src1), (fpimm:$src2))), - (i1 (NOT_p (FCMPUEQ64_rr DoubleRegs:$src1, - (f64 (CONST64_Float_Real fpimm:$src2)))))>, - Requires<[HasV5T]>; +def F2_conv_df2sf : F2_RD_RSS_CONVERT <"convert_df2sf", 0b000, + fround, F32, F64>; // Convert Integer to Floating Point. -def CONVERT_d2sf : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src), - "$dst = convert_d2sf($src)", - [(set (f32 IntRegs:$dst), (sint_to_fp (i64 DoubleRegs:$src)))]>, - Requires<[HasV5T]>; - -def CONVERT_ud2sf : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src), - "$dst = convert_ud2sf($src)", - [(set (f32 IntRegs:$dst), (uint_to_fp (i64 DoubleRegs:$src)))]>, - Requires<[HasV5T]>; - -def CONVERT_uw2sf : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src), - "$dst = convert_uw2sf($src)", - [(set (f32 IntRegs:$dst), (uint_to_fp (i32 IntRegs:$src)))]>, - Requires<[HasV5T]>; - -def CONVERT_w2sf : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src), - "$dst = convert_w2sf($src)", - [(set (f32 IntRegs:$dst), (sint_to_fp (i32 IntRegs:$src)))]>, - Requires<[HasV5T]>; - -def CONVERT_d2df : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), - "$dst = convert_d2df($src)", - [(set (f64 DoubleRegs:$dst), (sint_to_fp (i64 DoubleRegs:$src)))]>, - Requires<[HasV5T]>; - -def CONVERT_ud2df : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), - "$dst = convert_ud2df($src)", - [(set (f64 DoubleRegs:$dst), (uint_to_fp (i64 DoubleRegs:$src)))]>, - Requires<[HasV5T]>; - -def CONVERT_uw2df : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src), - "$dst = convert_uw2df($src)", - [(set (f64 DoubleRegs:$dst), (uint_to_fp (i32 IntRegs:$src)))]>, - Requires<[HasV5T]>; - -def CONVERT_w2df : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src), - "$dst = convert_w2df($src)", - [(set (f64 DoubleRegs:$dst), (sint_to_fp (i32 IntRegs:$src)))]>, - Requires<[HasV5T]>; +def F2_conv_d2sf : F2_RD_RSS_CONVERT <"convert_d2sf", 0b010, + sint_to_fp, F32, I64>; +def F2_conv_ud2sf : F2_RD_RSS_CONVERT <"convert_ud2sf", 0b001, + uint_to_fp, F32, I64>; +def F2_conv_uw2sf : F2_RD_RS_CONVERT <"convert_uw2sf", 0b001, 0b000, + uint_to_fp, F32, I32>; +def F2_conv_w2sf : F2_RD_RS_CONVERT <"convert_w2sf", 0b010, 0b000, + sint_to_fp, F32, I32>; +def F2_conv_d2df : F2_RDD_RSS_CONVERT <"convert_d2df", 0b011, + sint_to_fp, F64, I64>; +def F2_conv_ud2df : F2_RDD_RSS_CONVERT <"convert_ud2df", 0b010, + uint_to_fp, F64, I64>; +def F2_conv_uw2df : F2_RDD_RS_CONVERT <"convert_uw2df", 0b001, + uint_to_fp, F64, I32>; +def F2_conv_w2df : F2_RDD_RS_CONVERT <"convert_w2df", 0b010, + sint_to_fp, F64, I32>; // Convert Floating Point to Integer - default. -def CONVERT_df2uw : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src), - "$dst = convert_df2uw($src):chop", - [(set (i32 IntRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>, - Requires<[HasV5T]>; - -def CONVERT_df2w : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src), - "$dst = convert_df2w($src):chop", - [(set (i32 IntRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>, - Requires<[HasV5T]>; - -def CONVERT_sf2uw : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src), - "$dst = convert_sf2uw($src):chop", - [(set (i32 IntRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>, - Requires<[HasV5T]>; - -def CONVERT_sf2w : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src), - "$dst = convert_sf2w($src):chop", - [(set (i32 IntRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>, - Requires<[HasV5T]>; - -def CONVERT_df2d : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), - "$dst = convert_df2d($src):chop", - [(set (i64 DoubleRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>, - Requires<[HasV5T]>; - -def CONVERT_df2ud : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), - "$dst = convert_df2ud($src):chop", - [(set (i64 DoubleRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>, - Requires<[HasV5T]>; - -def CONVERT_sf2d : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src), - "$dst = convert_sf2d($src):chop", - [(set (i64 DoubleRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>, - Requires<[HasV5T]>; - -def CONVERT_sf2ud : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src), - "$dst = convert_sf2ud($src):chop", - [(set (i64 DoubleRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>, - Requires<[HasV5T]>; +def F2_conv_df2uw_chop : F2_RD_RSS_CONVERT <"convert_df2uw", 0b101, + fp_to_uint, I32, F64, ":chop">; +def F2_conv_df2w_chop : F2_RD_RSS_CONVERT <"convert_df2w", 0b111, + fp_to_sint, I32, F64, ":chop">; +def F2_conv_sf2uw_chop : F2_RD_RS_CONVERT <"convert_sf2uw", 0b011, 0b001, + fp_to_uint, I32, F32, ":chop">; +def F2_conv_sf2w_chop : F2_RD_RS_CONVERT <"convert_sf2w", 0b100, 0b001, + fp_to_sint, I32, F32, ":chop">; +def F2_conv_df2d_chop : F2_RDD_RSS_CONVERT <"convert_df2d", 0b110, + fp_to_sint, I64, F64, ":chop">; +def F2_conv_df2ud_chop : F2_RDD_RSS_CONVERT <"convert_df2ud", 0b111, + fp_to_uint, I64, F64, ":chop">; +def F2_conv_sf2d_chop : F2_RDD_RS_CONVERT <"convert_sf2d", 0b110, + fp_to_sint, I64, F32, ":chop">; +def F2_conv_sf2ud_chop : F2_RDD_RS_CONVERT <"convert_sf2ud", 0b101, + fp_to_uint, I64, F32, ":chop">; // Convert Floating Point to Integer: non-chopped. -let AddedComplexity = 20 in -def CONVERT_df2uw_nchop : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src), - "$dst = convert_df2uw($src)", - [(set (i32 IntRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>, - Requires<[HasV5T, IEEERndNearV5T]>; - -let AddedComplexity = 20 in -def CONVERT_df2w_nchop : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src), - "$dst = convert_df2w($src)", - [(set (i32 IntRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>, - Requires<[HasV5T, IEEERndNearV5T]>; - -let AddedComplexity = 20 in -def CONVERT_sf2uw_nchop : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src), - "$dst = convert_sf2uw($src)", - [(set (i32 IntRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>, - Requires<[HasV5T, IEEERndNearV5T]>; - -let AddedComplexity = 20 in -def CONVERT_sf2w_nchop : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src), - "$dst = convert_sf2w($src)", - [(set (i32 IntRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>, - Requires<[HasV5T, IEEERndNearV5T]>; - -let AddedComplexity = 20 in -def CONVERT_df2d_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), - "$dst = convert_df2d($src)", - [(set (i64 DoubleRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>, - Requires<[HasV5T, IEEERndNearV5T]>; - -let AddedComplexity = 20 in -def CONVERT_df2ud_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), - "$dst = convert_df2ud($src)", - [(set (i64 DoubleRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>, - Requires<[HasV5T, IEEERndNearV5T]>; - -let AddedComplexity = 20 in -def CONVERT_sf2d_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src), - "$dst = convert_sf2d($src)", - [(set (i64 DoubleRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>, - Requires<[HasV5T, IEEERndNearV5T]>; - -let AddedComplexity = 20 in -def CONVERT_sf2ud_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src), - "$dst = convert_sf2ud($src)", - [(set (i64 DoubleRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>, - Requires<[HasV5T, IEEERndNearV5T]>; - +let AddedComplexity = 20, Predicates = [HasV5T, IEEERndNearV5T] in { + def F2_conv_df2d : F2_RDD_RSS_CONVERT <"convert_df2d", 0b000, + fp_to_sint, I64, F64>; + def F2_conv_df2ud : F2_RDD_RSS_CONVERT <"convert_df2ud", 0b001, + fp_to_uint, I64, F64>; + def F2_conv_sf2ud : F2_RDD_RS_CONVERT <"convert_sf2ud", 0b011, + fp_to_uint, I64, F32>; + def F2_conv_sf2d : F2_RDD_RS_CONVERT <"convert_sf2d", 0b100, + fp_to_sint, I64, F32>; + def F2_conv_df2uw : F2_RD_RSS_CONVERT <"convert_df2uw", 0b011, + fp_to_uint, I32, F64>; + def F2_conv_df2w : F2_RD_RSS_CONVERT <"convert_df2w", 0b100, + fp_to_sint, I32, F64>; + def F2_conv_sf2uw : F2_RD_RS_CONVERT <"convert_sf2uw", 0b011, 0b000, + fp_to_uint, I32, F32>; + def F2_conv_sf2w : F2_RD_RS_CONVERT <"convert_sf2w", 0b100, 0b000, + fp_to_sint, I32, F32>; +} +// Fix up radicand. +let isFP = 1, hasNewValue = 1 in +def F2_sffixupr: SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs), + "$Rd = sffixupr($Rs)", + [], "" , S_2op_tc_3or4x_SLOT23>, Requires<[HasV5T]> { + bits<5> Rd; + bits<5> Rs; -// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp]. -def : Pat <(i32 (bitconvert (f32 IntRegs:$src))), - (i32 (TFR IntRegs:$src))>, - Requires<[HasV5T]>; + let IClass = 0b1000; -def : Pat <(f32 (bitconvert (i32 IntRegs:$src))), - (f32 (TFR IntRegs:$src))>, - Requires<[HasV5T]>; + let Inst{27-21} = 0b1011101; + let Inst{20-16} = Rs; + let Inst{7-5} = 0b000; + let Inst{4-0} = Rd; + } -def : Pat <(i64 (bitconvert (f64 DoubleRegs:$src))), - (i64 (TFR64 DoubleRegs:$src))>, - Requires<[HasV5T]>; - -def : Pat <(f64 (bitconvert (i64 DoubleRegs:$src))), - (f64 (TFR64 DoubleRegs:$src))>, - Requires<[HasV5T]>; +// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp]. +let Predicates = [HasV5T] in { + def: Pat <(i32 (bitconvert F32:$src)), (I32:$src)>; + def: Pat <(f32 (bitconvert I32:$src)), (F32:$src)>; + def: Pat <(i64 (bitconvert F64:$src)), (I64:$src)>; + def: Pat <(f64 (bitconvert I64:$src)), (F64:$src)>; +} -// Floating point fused multiply-add. -def FMADD_dp : ALU64_acc<(outs DoubleRegs:$dst), - (ins DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), - "$dst += dfmpy($src2, $src3)", - [(set (f64 DoubleRegs:$dst), - (fma DoubleRegs:$src2, DoubleRegs:$src3, DoubleRegs:$src1))], - "$src1 = $dst">, - Requires<[HasV5T]>; - -def FMADD_sp : ALU64_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "$dst += sfmpy($src2, $src3)", - [(set (f32 IntRegs:$dst), - (fma IntRegs:$src2, IntRegs:$src3, IntRegs:$src1))], - "$src1 = $dst">, - Requires<[HasV5T]>; - - -// Floating point max/min. -let AddedComplexity = 100 in -def FMAX_dp : ALU64_rr<(outs DoubleRegs:$dst), - (ins DoubleRegs:$src1, DoubleRegs:$src2), - "$dst = dfmax($src1, $src2)", - [(set DoubleRegs:$dst, (f64 (select (i1 (setolt DoubleRegs:$src2, - DoubleRegs:$src1)), - DoubleRegs:$src1, - DoubleRegs:$src2)))]>, - Requires<[HasV5T]>; - -let AddedComplexity = 100 in -def FMAX_sp : ALU64_rr<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = sfmax($src1, $src2)", - [(set IntRegs:$dst, (f32 (select (i1 (setolt IntRegs:$src2, - IntRegs:$src1)), - IntRegs:$src1, - IntRegs:$src2)))]>, - Requires<[HasV5T]>; - -let AddedComplexity = 100 in -def FMIN_dp : ALU64_rr<(outs DoubleRegs:$dst), - (ins DoubleRegs:$src1, DoubleRegs:$src2), - "$dst = dfmin($src1, $src2)", - [(set DoubleRegs:$dst, (f64 (select (i1 (setogt DoubleRegs:$src2, - DoubleRegs:$src1)), - DoubleRegs:$src1, - DoubleRegs:$src2)))]>, - Requires<[HasV5T]>; - -let AddedComplexity = 100 in -def FMIN_sp : ALU64_rr<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = sfmin($src1, $src2)", - [(set IntRegs:$dst, (f32 (select (i1 (setogt IntRegs:$src2, - IntRegs:$src1)), - IntRegs:$src1, - IntRegs:$src2)))]>, - Requires<[HasV5T]>; - -// Pseudo instruction to encode a set of conditional transfers. -// This instruction is used instead of a mux and trades-off codesize -// for performance. We conduct this transformation optimistically in -// the hope that these instructions get promoted to dot-new transfers. -let AddedComplexity = 100, isPredicated = 1 in -def TFR_condset_rr_f : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, - IntRegs:$src2, - IntRegs:$src3), - "Error; should not emit", - [(set IntRegs:$dst, (f32 (select PredRegs:$src1, - IntRegs:$src2, - IntRegs:$src3)))]>, - Requires<[HasV5T]>; - -let AddedComplexity = 100, isPredicated = 1 in -def TFR_condset_rr64_f : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1, - DoubleRegs:$src2, - DoubleRegs:$src3), - "Error; should not emit", - [(set DoubleRegs:$dst, (f64 (select PredRegs:$src1, - DoubleRegs:$src2, - DoubleRegs:$src3)))]>, - Requires<[HasV5T]>; - - - -let AddedComplexity = 100, isPredicated = 1 in -def TFR_condset_ri_f : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, f32imm:$src3), - "Error; should not emit", - [(set IntRegs:$dst, - (f32 (select PredRegs:$src1, IntRegs:$src2, fpimm:$src3)))]>, - Requires<[HasV5T]>; - -let AddedComplexity = 100, isPredicated = 1 in -def TFR_condset_ir_f : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, f32imm:$src2, IntRegs:$src3), - "Error; should not emit", - [(set IntRegs:$dst, - (f32 (select PredRegs:$src1, fpimm:$src2, IntRegs:$src3)))]>, - Requires<[HasV5T]>; - -let AddedComplexity = 100, isPredicated = 1 in -def TFR_condset_ii_f : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, f32imm:$src2, f32imm:$src3), - "Error; should not emit", - [(set IntRegs:$dst, (f32 (select PredRegs:$src1, - fpimm:$src2, - fpimm:$src3)))]>, - Requires<[HasV5T]>; - - -def : Pat <(select (i1 (setult (f32 IntRegs:$src1), (f32 IntRegs:$src2))), - (f32 IntRegs:$src3), - (f32 IntRegs:$src4)), - (TFR_condset_rr_f (FCMPUGT32_rr IntRegs:$src2, IntRegs:$src1), IntRegs:$src4, - IntRegs:$src3)>, Requires<[HasV5T]>; - -def : Pat <(select (i1 (setult (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))), - (f64 DoubleRegs:$src3), - (f64 DoubleRegs:$src4)), - (TFR_condset_rr64_f (FCMPUGT64_rr DoubleRegs:$src2, DoubleRegs:$src1), - DoubleRegs:$src4, DoubleRegs:$src3)>, Requires<[HasV5T]>; - -// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i). -def : Pat <(select (not PredRegs:$src1), fpimm:$src2, fpimm:$src3), - (TFR_condset_ii_f PredRegs:$src1, fpimm:$src3, fpimm:$src2)>; +// F2_sffma: Floating-point fused multiply add. +let isFP = 1, hasNewValue = 1 in +class T_sfmpy_acc <bit isSub, bit isLib> + : MInst<(outs IntRegs:$Rx), + (ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt), + "$Rx "#!if(isSub, "-=","+=")#" sfmpy($Rs, $Rt)"#!if(isLib, ":lib",""), + [], "$dst2 = $Rx" , M_tc_3_SLOT23 > , + Requires<[HasV5T]> { + bits<5> Rx; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1110; + + let Inst{27-21} = 0b1111000; + let Inst{20-16} = Rs; + let Inst{13} = 0b0; + let Inst{12-8} = Rt; + let Inst{7} = 0b1; + let Inst{6} = isLib; + let Inst{5} = isSub; + let Inst{4-0} = Rx; + } + +def F2_sffma: T_sfmpy_acc <0, 0>; +def F2_sffms: T_sfmpy_acc <1, 0>; +def F2_sffma_lib: T_sfmpy_acc <0, 1>; +def F2_sffms_lib: T_sfmpy_acc <1, 1>; + +def : Pat <(f32 (fma F32:$src2, F32:$src3, F32:$src1)), + (F2_sffma F32:$src1, F32:$src2, F32:$src3)>; + +// Floating-point fused multiply add w/ additional scaling (2**pu). +let isFP = 1, hasNewValue = 1 in +def F2_sffma_sc: MInst < + (outs IntRegs:$Rx), + (ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt, PredRegs:$Pu), + "$Rx += sfmpy($Rs, $Rt, $Pu):scale" , + [], "$dst2 = $Rx" , M_tc_3_SLOT23 > , + Requires<[HasV5T]> { + bits<5> Rx; + bits<5> Rs; + bits<5> Rt; + bits<2> Pu; + + let IClass = 0b1110; + + let Inst{27-21} = 0b1111011; + let Inst{20-16} = Rs; + let Inst{13} = 0b0; + let Inst{12-8} = Rt; + let Inst{7} = 0b1; + let Inst{6-5} = Pu; + let Inst{4-0} = Rx; + } + +let isExtended = 1, isExtentSigned = 1, opExtentBits = 8, opExtendable = 3, + isPseudo = 1, InputType = "imm" in +def MUX_ir_f : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, f32Ext:$src3), + "$dst = mux($src1, $src2, #$src3)", + [(set F32:$dst, (f32 (select I1:$src1, F32:$src2, fpimm:$src3)))]>, + Requires<[HasV5T]>; + +let isExtended = 1, isExtentSigned = 1, opExtentBits = 8, opExtendable = 2, + isPseudo = 1, InputType = "imm" in +def MUX_ri_f : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, f32Ext:$src2, IntRegs:$src3), + "$dst = mux($src1, #$src2, $src3)", + [(set F32:$dst, (f32 (select I1:$src1, fpimm:$src2, F32:$src3)))]>, + Requires<[HasV5T]>; + +def: Pat<(select I1:$src1, F32:$src2, F32:$src3), + (C2_mux I1:$src1, F32:$src2, F32:$src3)>, + Requires<[HasV5T]>; + +def: Pat<(select (i1 (setult F32:$src1, F32:$src2)), F32:$src3, F32:$src4), + (C2_mux (F2_sfcmpgt F32:$src2, F32:$src1), F32:$src4, F32:$src3)>, + Requires<[HasV5T]>; + +def: Pat<(select I1:$src1, F64:$src2, F64:$src3), + (C2_vmux I1:$src1, F64:$src2, F64:$src3)>, + Requires<[HasV5T]>; + +def: Pat<(select (i1 (setult F64:$src1, F64:$src2)), F64:$src3, F64:$src4), + (C2_vmux (F2_dfcmpgt F64:$src2, F64:$src1), F64:$src3, F64:$src4)>, + Requires<[HasV5T]>; // Map from p0 = pnot(p0); r0 = select(p0, #i, r1) -// => r0 = TFR_condset_ri(p0, r1, #i) -def : Pat <(select (not PredRegs:$src1), fpimm:$src2, IntRegs:$src3), - (TFR_condset_ri_f PredRegs:$src1, IntRegs:$src3, fpimm:$src2)>; +// => r0 = MUX_ir_f(p0, #i, r1) +def: Pat<(select (not I1:$src1), fpimm:$src2, F32:$src3), + (MUX_ir_f I1:$src1, F32:$src3, fpimm:$src2)>, + Requires<[HasV5T]>; // Map from p0 = pnot(p0); r0 = mux(p0, r1, #i) -// => r0 = TFR_condset_ir(p0, #i, r1) -def : Pat <(select (not PredRegs:$src1), IntRegs:$src2, fpimm:$src3), - (TFR_condset_ir_f PredRegs:$src1, fpimm:$src3, IntRegs:$src2)>; +// => r0 = MUX_ri_f(p0, r1, #i) +def: Pat<(select (not I1:$src1), F32:$src2, fpimm:$src3), + (MUX_ri_f I1:$src1, fpimm:$src3, F32:$src2)>, + Requires<[HasV5T]>; + +def: Pat<(i32 (fp_to_sint F64:$src1)), + (LoReg (F2_conv_df2d_chop F64:$src1))>, + Requires<[HasV5T]>; + +//===----------------------------------------------------------------------===// +// :natural forms of vasrh and vasrhub insns +//===----------------------------------------------------------------------===// +// S5_asrhub_rnd_sat: Vector arithmetic shift right by immediate with round, +// saturate, and pack. +let Defs = [USR_OVF], hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +class T_ASRHUB<bit isSat> + : SInst <(outs IntRegs:$Rd), + (ins DoubleRegs:$Rss, u4Imm:$u4), + "$Rd = vasrhub($Rss, #$u4):"#!if(isSat, "sat", "raw"), + [], "", S_2op_tc_2_SLOT23>, + Requires<[HasV5T]> { + bits<5> Rd; + bits<5> Rss; + bits<4> u4; + + let IClass = 0b1000; + + let Inst{27-21} = 0b1000011; + let Inst{20-16} = Rss; + let Inst{13-12} = 0b00; + let Inst{11-8} = u4; + let Inst{7-6} = 0b10; + let Inst{5} = isSat; + let Inst{4-0} = Rd; + } + +def S5_asrhub_rnd_sat : T_ASRHUB <0>; +def S5_asrhub_sat : T_ASRHUB <1>; + +let isAsmParserOnly = 1 in +def S5_asrhub_rnd_sat_goodsyntax + : SInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss, u4Imm:$u4), + "$Rd = vasrhub($Rss, #$u4):rnd:sat">, Requires<[HasV5T]>; + +// S5_vasrhrnd: Vector arithmetic shift right by immediate with round. +let hasSideEffects = 0 in +def S5_vasrhrnd : SInst <(outs DoubleRegs:$Rdd), + (ins DoubleRegs:$Rss, u4Imm:$u4), + "$Rdd = vasrh($Rss, #$u4):raw">, + Requires<[HasV5T]> { + bits<5> Rdd; + bits<5> Rss; + bits<4> u4; + + let IClass = 0b1000; + + let Inst{27-21} = 0b0000001; + let Inst{20-16} = Rss; + let Inst{13-12} = 0b00; + let Inst{11-8} = u4; + let Inst{7-5} = 0b000; + let Inst{4-0} = Rdd; + } + +let isAsmParserOnly = 1 in +def S5_vasrhrnd_goodsyntax + : SInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, u4Imm:$u4), + "$Rdd = vasrh($Rss,#$u4):rnd">, Requires<[HasV5T]>; + +// Floating point reciprocal square root approximation +let Uses = [USR], isPredicateLate = 1, isFP = 1, + hasSideEffects = 0, hasNewValue = 1, opNewValue = 0, + validSubTargets = HasV5SubT in +def F2_sfinvsqrta: SInst < + (outs IntRegs:$Rd, PredRegs:$Pe), + (ins IntRegs:$Rs), + "$Rd, $Pe = sfinvsqrta($Rs)" > , + Requires<[HasV5T]> { + bits<5> Rd; + bits<2> Pe; + bits<5> Rs; + + let IClass = 0b1000; + + let Inst{27-21} = 0b1011111; + let Inst{20-16} = Rs; + let Inst{7} = 0b0; + let Inst{6-5} = Pe; + let Inst{4-0} = Rd; + } + +// Complex multiply 32x16 +let Defs = [USR_OVF], Itinerary = S_3op_tc_3x_SLOT23 in { + def M4_cmpyi_whc : T_S3op_8<"cmpyiwh", 0b101, 1, 1, 1, 1>; + def M4_cmpyr_whc : T_S3op_8<"cmpyrwh", 0b111, 1, 1, 1, 1>; +} -def : Pat <(i32 (fp_to_sint (f64 DoubleRegs:$src1))), - (i32 (EXTRACT_SUBREG (i64 (CONVERT_df2d (f64 DoubleRegs:$src1))), subreg_loreg))>, - Requires<[HasV5T]>; +// Classify floating-point value +let isFP = 1 in + def F2_sfclass : T_TEST_BIT_IMM<"sfclass", 0b111>; + +let isFP = 1 in +def F2_dfclass: ALU64Inst<(outs PredRegs:$Pd), (ins DoubleRegs:$Rss, u5Imm:$u5), + "$Pd = dfclass($Rss, #$u5)", + [], "" , ALU64_tc_2early_SLOT23 > , Requires<[HasV5T]> { + bits<2> Pd; + bits<5> Rss; + bits<5> u5; + + let IClass = 0b1101; + let Inst{27-21} = 0b1100100; + let Inst{20-16} = Rss; + let Inst{12-10} = 0b000; + let Inst{9-5} = u5; + let Inst{4-3} = 0b10; + let Inst{1-0} = Pd; + } + +// Instructions to create floating point constant +class T_fimm <string mnemonic, RegisterClass RC, bits<4> RegType, bit isNeg> + : ALU64Inst<(outs RC:$dst), (ins u10Imm:$src), + "$dst = "#mnemonic#"(#$src)"#!if(isNeg, ":neg", ":pos"), + [], "", ALU64_tc_3x_SLOT23>, Requires<[HasV5T]> { + bits<5> dst; + bits<10> src; + + let IClass = 0b1101; + let Inst{27-24} = RegType; + let Inst{23} = 0b0; + let Inst{22} = isNeg; + let Inst{21} = src{9}; + let Inst{13-5} = src{8-0}; + let Inst{4-0} = dst; + } + +let hasNewValue = 1, opNewValue = 0 in { +def F2_sfimm_p : T_fimm <"sfmake", IntRegs, 0b0110, 0>; +def F2_sfimm_n : T_fimm <"sfmake", IntRegs, 0b0110, 1>; +} + +def F2_dfimm_p : T_fimm <"dfmake", DoubleRegs, 0b1001, 0>; +def F2_dfimm_n : T_fimm <"dfmake", DoubleRegs, 0b1001, 1>; def : Pat <(fabs (f32 IntRegs:$src1)), - (CLRBIT_31 (f32 IntRegs:$src1), 31)>, + (S2_clrbit_i (f32 IntRegs:$src1), 31)>, Requires<[HasV5T]>; def : Pat <(fneg (f32 IntRegs:$src1)), - (TOGBIT_31 (f32 IntRegs:$src1), 31)>, - Requires<[HasV5T]>; - -/* -def : Pat <(fabs (f64 DoubleRegs:$src1)), - (CLRBIT_31 (f32 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg)), 31)>, - Requires<[HasV5T]>; - -def : Pat <(fabs (f64 DoubleRegs:$src1)), - (CLRBIT_31 (f32 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg)), 31)>, + (S2_togglebit_i (f32 IntRegs:$src1), 31)>, Requires<[HasV5T]>; - */ diff --git a/lib/Target/Hexagon/HexagonInstrInfoVector.td b/lib/Target/Hexagon/HexagonInstrInfoVector.td new file mode 100644 index 0000000..6e67b6e --- /dev/null +++ b/lib/Target/Hexagon/HexagonInstrInfoVector.td @@ -0,0 +1,65 @@ +//===- HexagonInstrInfoVector.td - Hexagon Vector Patterns -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon Vector instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +def V2I1: PatLeaf<(v2i1 PredRegs:$R)>; +def V4I1: PatLeaf<(v4i1 PredRegs:$R)>; +def V8I1: PatLeaf<(v8i1 PredRegs:$R)>; +def V4I8: PatLeaf<(v4i8 IntRegs:$R)>; +def V2I16: PatLeaf<(v2i16 IntRegs:$R)>; +def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>; +def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>; +def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>; + +// Vector shift support. Vector shifting in Hexagon is rather different +// from internal representation of LLVM. +// LLVM assumes all shifts (in vector case) will have the form +// <VT> = SHL/SRA/SRL <VT> by <VT> +// while Hexagon has the following format: +// <VT> = SHL/SRA/SRL <VT> by <IT/i32> +// As a result, special care is needed to guarantee correctness and +// performance. +class vshift_v4i16<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp> + : S_2OpInstImm<Str, MajOp, MinOp, u4Imm, + [(set (v4i16 DoubleRegs:$dst), + (Op (v4i16 DoubleRegs:$src1), u4ImmPred:$src2))]> { + bits<4> src2; + let Inst{11-8} = src2; +} + +class vshift_v2i32<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp> + : S_2OpInstImm<Str, MajOp, MinOp, u5Imm, + [(set (v2i32 DoubleRegs:$dst), + (Op (v2i32 DoubleRegs:$src1), u5ImmPred:$src2))]> { + bits<5> src2; + let Inst{12-8} = src2; +} + +def S2_asr_i_vw : vshift_v2i32<sra, "vasrw", 0b010, 0b000>; +def S2_lsr_i_vw : vshift_v2i32<srl, "vlsrw", 0b010, 0b001>; +def S2_asl_i_vw : vshift_v2i32<shl, "vaslw", 0b010, 0b010>; + +def S2_asr_i_vh : vshift_v4i16<sra, "vasrh", 0b100, 0b000>; +def S2_lsr_i_vh : vshift_v4i16<srl, "vlsrh", 0b100, 0b001>; +def S2_asl_i_vh : vshift_v4i16<shl, "vaslh", 0b100, 0b010>; + +// Vector shift words by register +def S2_asr_r_vw : T_S3op_shiftVect < "vasrw", 0b00, 0b00>; +def S2_lsr_r_vw : T_S3op_shiftVect < "vlsrw", 0b00, 0b01>; +def S2_asl_r_vw : T_S3op_shiftVect < "vaslw", 0b00, 0b10>; +def S2_lsl_r_vw : T_S3op_shiftVect < "vlslw", 0b00, 0b11>; + +// Vector shift halfwords by register +def S2_asr_r_vh : T_S3op_shiftVect < "vasrh", 0b01, 0b00>; +def S2_lsr_r_vh : T_S3op_shiftVect < "vlsrh", 0b01, 0b01>; +def S2_asl_r_vh : T_S3op_shiftVect < "vaslh", 0b01, 0b10>; +def S2_lsl_r_vh : T_S3op_shiftVect < "vlslh", 0b01, 0b11>; diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td index b3385d8..c0551e8 100644 --- a/lib/Target/Hexagon/HexagonIntrinsics.td +++ b/lib/Target/Hexagon/HexagonIntrinsics.td @@ -13,3495 +13,1250 @@ // March 4, 2008 //===----------------------------------------------------------------------===// -// -// ALU 32 types. -// +class T_I_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID imm:$Is), + (MI imm:$Is)>; -class qi_ALU32_sisi<string opc, Intrinsic IntID> - : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class qi_ALU32_sis10<string opc, Intrinsic IntID> - : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - -class qi_ALU32_sis8<string opc, Intrinsic IntID> - : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - -class qi_ALU32_siu8<string opc, Intrinsic IntID> - : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - -class qi_ALU32_siu9<string opc, Intrinsic IntID> - : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u9Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - -class si_ALU32_qisisi<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, - IntRegs:$src3), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, - IntRegs:$src3))]>; - -class si_ALU32_qis8si<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2, - IntRegs:$src3), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, $src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, - IntRegs:$src3))]>; - -class si_ALU32_qisis8<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, - s8Imm:$src3), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, - imm:$src3))]>; - -class si_ALU32_qis8s8<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2, s8Imm:$src3), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, imm:$src3))]>; - -class si_ALU32_sisi<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU32_sisi_sat<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU32_sisi_rnd<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU32_sis16<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s16Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - -class si_ALU32_sis10<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - -class si_ALU32_s10si<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins s10Imm:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "(#$src1, $src2)")), - [(set IntRegs:$dst, (IntID imm:$src1, IntRegs:$src2))]>; - -class si_lo_ALU32_siu16<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u16Imm:$src2), - !strconcat("$dst.l = ", !strconcat(opc , "#$src2")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - -class si_hi_ALU32_siu16<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u16Imm:$src2), - !strconcat("$dst.h = ", !strconcat(opc , "#$src2")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - -class si_ALU32_s16<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins s16Imm:$src1), - !strconcat("$dst = ", !strconcat(opc , "#$src1")), - [(set IntRegs:$dst, (IntID imm:$src1))]>; - -class di_ALU32_s8<string opc, Intrinsic IntID> - : ALU32_rr<(outs DoubleRegs:$dst), (ins s8Imm:$src1), - !strconcat("$dst = ", !strconcat(opc , "#$src1")), - [(set DoubleRegs:$dst, (IntID imm:$src1))]>; - -class di_ALU64_di<string opc, Intrinsic IntID> - : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), - !strconcat("$dst = ", !strconcat(opc , "$src")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>; - -class si_ALU32_si<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src), - !strconcat("$dst = ", !strconcat(opc , "($src)")), - [(set IntRegs:$dst, (IntID IntRegs:$src))]>; - -class si_ALU32_si_tfr<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src), - !strconcat("$dst = ", !strconcat(opc , "$src")), - [(set IntRegs:$dst, (IntID IntRegs:$src))]>; +class T_R_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I32:$Rs), + (MI I32:$Rs)>; -// -// ALU 64 types. -// +class T_P_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I64:$Rs), + (MI DoubleRegs:$Rs)>; + +class T_II_pat <InstHexagon MI, Intrinsic IntID, PatFrag Imm1, PatFrag Imm2> + : Pat<(IntID Imm1:$Is, Imm2:$It), + (MI Imm1:$Is, Imm2:$It)>; + +class T_RI_pat <InstHexagon MI, Intrinsic IntID, PatLeaf ImmPred = PatLeaf<(i32 imm)>> + : Pat<(IntID I32:$Rs, ImmPred:$It), + (MI I32:$Rs, ImmPred:$It)>; + +class T_IR_pat <InstHexagon MI, Intrinsic IntID, PatFrag ImmPred = PatLeaf<(i32 imm)>> + : Pat<(IntID ImmPred:$Is, I32:$Rt), + (MI ImmPred:$Is, I32:$Rt)>; + +class T_PI_pat <InstHexagon MI, Intrinsic IntID> + : Pat<(IntID I64:$Rs, imm:$It), + (MI DoubleRegs:$Rs, imm:$It)>; + +class T_RP_pat <InstHexagon MI, Intrinsic IntID> + : Pat<(IntID I32:$Rs, I64:$Rt), + (MI I32:$Rs, DoubleRegs:$Rt)>; + +class T_RR_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I32:$Rs, I32:$Rt), + (MI I32:$Rs, I32:$Rt)>; + +class T_PP_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I64:$Rs, I64:$Rt), + (MI DoubleRegs:$Rs, DoubleRegs:$Rt)>; -class si_ALU64_si_sat<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src), - !strconcat("$dst = ", !strconcat(opc , "($src):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src))]>; - -class si_ALU64_didi<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; - -class di_ALU64_sidi<string opc, Intrinsic IntID> - : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2))]>; - -class di_ALU64_didi<string opc, Intrinsic IntID> - : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2))]>; - -class di_ALU64_qididi<string opc, Intrinsic IntID> - : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2, - DoubleRegs:$src3), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2, - DoubleRegs:$src3))]>; - -class di_ALU64_sisi<string opc, Intrinsic IntID> - : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_ALU64_didi_sat<string opc, Intrinsic IntID> - : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2))]>; - -class di_ALU64_didi_rnd<string opc, Intrinsic IntID> - : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2))]>; - -class di_ALU64_didi_crnd<string opc, Intrinsic IntID> - : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):crnd")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2))]>; - -class di_ALU64_didi_rnd_sat<string opc, Intrinsic IntID> - : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd:sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2))]>; - -class di_ALU64_didi_crnd_sat<string opc, Intrinsic IntID> - : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):crnd:sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2))]>; - -class qi_ALU64_didi<string opc, Intrinsic IntID> - : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set PredRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; - -class si_ALU64_sisi<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_sat_lh<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_l16_sat_hh<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_l16_sat_lh<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_l16_sat_hl<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_l16_sat_ll<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_l16_hh<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_l16_hl<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_l16_lh<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_l16_ll<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_h16_sat_hh<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.H, $src2.H):sat:<<16")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_h16_sat_lh<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.L, $src2.H):sat:<<16")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_h16_sat_hl<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.H, $src2.L):sat:<<16")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_h16_sat_ll<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.L, $src2.L):sat:<<16")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_h16_hh<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<16")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_h16_hl<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<16")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_h16_lh<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<16")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_h16_ll<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<16")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_lh<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_ll<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_ALU64_sisi_sat<string opc, Intrinsic IntID> - : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; +class T_QII_pat <InstHexagon MI, Intrinsic IntID, PatFrag Imm1, PatFrag Imm2> + : Pat <(IntID (i32 PredRegs:$Ps), Imm1:$Is, Imm2:$It), + (MI PredRegs:$Ps, Imm1:$Is, Imm2:$It)>; -// -// SInst classes. -// +class T_QRI_pat <InstHexagon MI, Intrinsic IntID, PatFrag ImmPred> + : Pat <(IntID (i32 PredRegs:$Ps), I32:$Rs, ImmPred:$Is), + (MI PredRegs:$Ps, I32:$Rs, ImmPred:$Is)>; -class qi_SInst_qi<string opc, Intrinsic IntID> - : SInst<(outs PredRegs:$dst), (ins IntRegs:$src), - !strconcat("$dst = ", !strconcat(opc , "($src)")), - [(set PredRegs:$dst, (IntID IntRegs:$src))]>; - -class qi_SInst_qi_pxfer<string opc, Intrinsic IntID> - : SInst<(outs PredRegs:$dst), (ins IntRegs:$src), - !strconcat("$dst = ", !strconcat(opc , "$src")), - [(set PredRegs:$dst, (IntID IntRegs:$src))]>; - -class qi_SInst_qiqi<string opc, Intrinsic IntID> - : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class qi_SInst_qiqi_neg<string opc, Intrinsic IntID> - : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, !$src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_SInst_di<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), - !strconcat("$dst = ", !strconcat(opc , "($src)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>; - -class di_SInst_di_sat<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), - !strconcat("$dst = ", !strconcat(opc , "($src):sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>; - -class si_SInst_di<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src), - !strconcat("$dst = ", !strconcat(opc , "($src)")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src))]>; - -class si_SInst_di_sat<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src), - !strconcat("$dst = ", !strconcat(opc , "($src):sat")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src))]>; - -class di_SInst_disi<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>; - -class di_SInst_didi<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; - -class di_SInst_si<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1), - !strconcat("$dst = ", !strconcat(opc , "($src1)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>; - -class si_SInst_sisiu3<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, u3Imm:$src3), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, - imm:$src3))]>; - -class si_SInst_diu5<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, u5Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>; - -class si_SInst_disi<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>; - -class si_SInst_sidi<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2))]>; - -class di_SInst_disisi<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2, - IntRegs:$src3), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2, - IntRegs:$src3))]>; - -class di_SInst_sisi<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class qi_SInst_siu5<string opc, Intrinsic IntID> - : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - -class qi_SInst_siu6<string opc, Intrinsic IntID> - : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u6Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - -class qi_SInst_sisi<string opc, Intrinsic IntID> - : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_SInst_si<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src), - !strconcat("$dst = ", !strconcat(opc , "($src)")), - [(set IntRegs:$dst, (IntID IntRegs:$src))]>; - -class si_SInst_si_sat<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src), - !strconcat("$dst = ", !strconcat(opc , "($src):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src))]>; - -class di_SInst_qi<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src), - !strconcat("$dst = ", !strconcat(opc , "($src)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src))]>; - -class si_SInst_qi<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src), - !strconcat("$dst = ", !strconcat(opc , "$src")), - [(set IntRegs:$dst, (IntID IntRegs:$src))]>; - -class si_SInst_qiqi<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class qi_SInst_si<string opc, Intrinsic IntID> - : SInst<(outs PredRegs:$dst), (ins IntRegs:$src), - !strconcat("$dst = ", !strconcat(opc , "$src")), - [(set PredRegs:$dst, (IntID IntRegs:$src))]>; - -class si_SInst_sisi<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_SInst_diu6<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>; - -class si_SInst_siu5<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - -class si_SInst_siu5_rnd<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):rnd")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - -class si_SInst_siu5u5<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2, u5Imm:$src3), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, imm:$src3))]>; - -class si_SInst_sisisi_acc<string opc, Intrinsic IntID> - : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_SInst_sisisi_nac<string opc, Intrinsic IntID> - : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_SInst_didisi_acc<string opc, Intrinsic IntID> - : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, - DoubleRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_SInst_didisi_nac<string opc, Intrinsic IntID> - : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, - DoubleRegs:$src1, IntRegs:$src2))], - "$dst2 = $dst">; - -class si_SInst_sisiu5u5<string opc, Intrinsic IntID> - : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - u5Imm:$src2, u5Imm:$src3), - !strconcat("$dst = ", !strconcat(opc , - "($src1, #$src2, #$src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - imm:$src2, imm:$src3))], - "$dst2 = $dst">; - -class si_SInst_sisidi<string opc, Intrinsic IntID> - : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - DoubleRegs:$src2))], - "$dst2 = $dst">; - -class di_SInst_didiu6u6<string opc, Intrinsic IntID> - : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - u6Imm:$src2, u6Imm:$src3), - !strconcat("$dst = ", !strconcat(opc , - "($src1, #$src2, #$src3)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, - imm:$src2, imm:$src3))], - "$dst2 = $dst">; - -class di_SInst_dididi<string opc, Intrinsic IntID> - : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, - DoubleRegs:$src1, - DoubleRegs:$src2))], - "$dst2 = $dst">; - -class di_SInst_diu6u6<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2, - u6Imm:$src3), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2, - imm:$src3))]>; - -class di_SInst_didiqi<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, - IntRegs:$src3), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2, - IntRegs:$src3))]>; - -class di_SInst_didiu3<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, - u3Imm:$src3), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2, - imm:$src3))]>; - -class di_SInst_didisi_or<string opc, Intrinsic IntID> - : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - IntRegs:$src2), - !strconcat("$dst |= ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_SInst_didisi_and<string opc, Intrinsic IntID> - : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - IntRegs:$src2), - !strconcat("$dst &= ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_SInst_didiu6_and<string opc, Intrinsic IntID> - : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - u6Imm:$src2), - !strconcat("$dst &= ", !strconcat(opc , "($src1, #$src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, - imm:$src2))], - "$dst2 = $dst">; - -class di_SInst_didiu6_or<string opc, Intrinsic IntID> - : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - u6Imm:$src2), - !strconcat("$dst |= ", !strconcat(opc , "($src1, #$src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, - imm:$src2))], - "$dst2 = $dst">; - -class di_SInst_didiu6_xor<string opc, Intrinsic IntID> - : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - u6Imm:$src2), - !strconcat("$dst ^= ", !strconcat(opc , "($src1, #$src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, - imm:$src2))], - "$dst2 = $dst">; - -class si_SInst_sisisi_and<string opc, Intrinsic IntID> - : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst &= ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_SInst_sisisi_or<string opc, Intrinsic IntID> - : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst |= ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - - -class si_SInst_sisiu5_and<string opc, Intrinsic IntID> - : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - u5Imm:$src2), - !strconcat("$dst &= ", !strconcat(opc , "($src1, #$src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - imm:$src2))], - "$dst2 = $dst">; - -class si_SInst_sisiu5_or<string opc, Intrinsic IntID> - : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - u5Imm:$src2), - !strconcat("$dst |= ", !strconcat(opc , "($src1, #$src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - imm:$src2))], - "$dst2 = $dst">; - -class si_SInst_sisiu5_xor<string opc, Intrinsic IntID> - : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - u5Imm:$src2), - !strconcat("$dst ^= ", !strconcat(opc , "($src1, #$src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - imm:$src2))], - "$dst2 = $dst">; - -class si_SInst_sisiu5_acc<string opc, Intrinsic IntID> - : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - u5Imm:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1, #$src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - imm:$src2))], - "$dst2 = $dst">; - -class si_SInst_sisiu5_nac<string opc, Intrinsic IntID> - : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - u5Imm:$src2), - !strconcat("$dst -= ", !strconcat(opc , "($src1, #$src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - imm:$src2))], - "$dst2 = $dst">; - -class di_SInst_didiu6_acc<string opc, Intrinsic IntID> - : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - u5Imm:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1, #$src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, - DoubleRegs:$src1, imm:$src2))], - "$dst2 = $dst">; - -class di_SInst_didiu6_nac<string opc, Intrinsic IntID> - : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - u5Imm:$src2), - !strconcat("$dst -= ", !strconcat(opc , "($src1, #$src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, - imm:$src2))], - "$dst2 = $dst">; +class T_QIR_pat <InstHexagon MI, Intrinsic IntID, PatFrag ImmPred> + : Pat <(IntID (i32 PredRegs:$Ps), ImmPred:$Is, I32:$Rs), + (MI PredRegs:$Ps, ImmPred:$Is, I32:$Rs)>; +class T_RRI_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I32:$Rs, I32:$Rt, imm:$Iu), + (MI I32:$Rs, I32:$Rt, imm:$Iu)>; -// -// MInst classes. -// +class T_RII_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I32:$Rs, imm:$It, imm:$Iu), + (MI I32:$Rs, imm:$It, imm:$Iu)>; -class di_MInst_sisi_rnd_hh_s1<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.H, $src2.H):<<1:rnd")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_rnd_hh<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.H, $src2.H):rnd")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_rnd_hl_s1<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.H, $src2.L):<<1:rnd")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_rnd_hl<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.H, $src2.L):rnd")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_rnd_lh_s1<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.L, $src2.H):<<1:rnd")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_rnd_lh<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.L, $src2.H):rnd")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_rnd_ll_s1<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.L, $src2.L):<<1:rnd")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_rnd_ll<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.L, $src2.L):rnd")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_disisi_acc<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_nac<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_acc_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1, $src2):sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_nac_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2):sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_acc_sat_conj<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1, $src2*):sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_nac_sat_conj<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2*):sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_nac_s1_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1, $src2):<<1:sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_acc_s1_sat_conj<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , - "($src1, $src2*):<<1:sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_nac_s1_sat_conj<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1, $src2*):<<1:sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_s8s8<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins s8Imm:$src1, s8Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "(#$src1, #$src2)")), - [(set DoubleRegs:$dst, (IntID imm:$src1, imm:$src2))]>; - -class si_MInst_sis9<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - -class si_MInst_sisi<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_hh<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_hh_s1<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<1")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_lh<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_lh_s1<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<1")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_hl<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_hl_s1<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<1")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_ll<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_ll_s1<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<1")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - - -class si_MInst_sisi_hh<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_hh_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<1")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_lh<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_lh_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<1")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_hl<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_hl_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<1")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_ll<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_ll_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<1")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_up<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_didi<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2))]>; - -class di_MInst_didi_conj<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2*)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2))]>; - -class di_MInst_sisi_s1_sat_conj<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1, $src2*):<<1:sat")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_didi_s1_rnd_sat<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1, $src2):<<1:rnd:sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2))]>; - -class di_MInst_didi_sat<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2))]>; - -class di_MInst_didi_rnd_sat<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1, $src2):rnd:sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2))]>; - -class si_SInst_sisi_sat<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_SInst_didi_sat<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; - -class si_SInst_disi_s1_rnd_sat<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1, $src2):<<1:rnd:sat")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_s1_rnd_sat<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1, $src2):<<1:rnd:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_l_s1_rnd_sat<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1, $src2.L):<<1:rnd:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_h_s1_rnd_sat<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1, $src2.H):<<1:rnd:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_rnd_sat_conj<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1, $src2*):rnd:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_s1_rnd_sat_conj<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1, $src2*):<<1:rnd:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_rnd_sat<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1, $src2):rnd:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_rnd<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisisi_xacc<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, - IntRegs:$src3), - !strconcat("$dst ^= ", !strconcat(opc , "($src2, $src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, - IntRegs:$src3))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, - IntRegs:$src3), - !strconcat("$dst += ", !strconcat(opc , "($src2, $src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, - IntRegs:$src3))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, - IntRegs:$src3), - !strconcat("$dst -= ", !strconcat(opc , "($src2, $src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, - IntRegs:$src3))], - "$dst2 = $dst">; - -class si_MInst_sisis8_acc<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, - s8Imm:$src3), - !strconcat("$dst += ", !strconcat(opc , "($src2, #$src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, - imm:$src3))], - "$dst2 = $dst">; - -class si_MInst_sisis8_nac<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, - s8Imm:$src3), - !strconcat("$dst -= ", !strconcat(opc , "($src2, #$src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, - imm:$src3))], - "$dst2 = $dst">; - -class si_MInst_sisiu4u5<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - u4Imm:$src2, u5Imm:$src3), - !strconcat("$dst = ", !strconcat(opc , - "($src1, #$src2, #$src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - imm:$src2, imm:$src3))], - "$dst2 = $dst">; - -class si_MInst_sisiu8_acc<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, - u8Imm:$src3), - !strconcat("$dst += ", !strconcat(opc , "($src2, #$src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, - imm:$src3))], - "$dst2 = $dst">; - -class si_MInst_sisiu8_nac<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, - u8Imm:$src3), - !strconcat("$dst -= ", !strconcat(opc , "($src2, #$src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, - imm:$src3))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_hh<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.H)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_sat_lh<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , - "($src1.L, $src2.H):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_sat_lh_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , - "($src1.L, $src2.H):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_sat_hh<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , - "($src1.H, $src2.H):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_sat_hh_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , - "($src1.H, $src2.H):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_hh_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , - "($src1.H, $src2.H):<<1")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_hh<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.H)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_sat_hh_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.H, $src2.H):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_sat_hh<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.H, $src2.H):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_sat_hl_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.H, $src2.L):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_sat_hl<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.H, $src2.L):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_sat_lh_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.L, $src2.H):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_sat_lh<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.L, $src2.H):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_sat_ll_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.L, $src2.L):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_sat_ll<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.L, $src2.L):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_hh_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.H, $src2.H):<<1")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_hl<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.L)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_hl_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , - "($src1.H, $src2.L):<<1")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_hl<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.L)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_hl_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.H, $src2.L):<<1")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_lh<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.H)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_lh_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , - "($src1.L, $src2.H):<<1")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_lh<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.H)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_lh_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.L, $src2.H):<<1")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_ll<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.L)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_ll_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , - "($src1.L, $src2.L):<<1")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_sat_ll_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , - "($src1.L, $src2.L):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_sat_hl_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , - "($src1.H, $src2.L):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_sat_ll<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , - "($src1.L, $src2.L):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_acc_sat_hl<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , - "($src1.H, $src2.L):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_ll<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.L)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_ll_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.L, $src2.L):<<1")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_hh_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.H, $src2.H):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_hh_s1_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.H, $src2.H):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_hl_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.H, $src2.L):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_hl_s1_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.H, $src2.L):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_lh_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.L, $src2.H):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_lh_s1_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.L, $src2.H):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_ll_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.L, $src2.L):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_nac_ll_s1_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1.L, $src2.L):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_ALU32_sisi<string opc, Intrinsic IntID> - : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_sat<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_sat_conj<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2*):sat")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_sisi_s1_sat<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_didi_s1_sat<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2))]>; - -class si_MInst_didi_s1_rnd_sat<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1, $src2):<<1:rnd:sat")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; - -class si_MInst_didi_rnd_sat<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd:sat")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; - -class si_MInst_sisi_sat_hh<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_sat_hh_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.H, $src2.H):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_sat_hl<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_sat_hl_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.H, $src2.L):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_sat_lh<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_sat_lh_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.L, $src2.H):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_sat_ll<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_sat_ll_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.L, $src2.L):<<1:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_sat_rnd_hh<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.H, $src2.H):rnd:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_rnd_hh<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.H, $src2.H):rnd")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_rnd_hh_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , - "($src1.H, $src2.H):<<1:rnd")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_sat_rnd_hh_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", - !strconcat(opc , - "($src1.H, $src2.H):<<1:rnd:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_rnd_hl<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", - !strconcat(opc , "($src1.H, $src2.L):rnd")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_rnd_hl_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", - !strconcat(opc , "($src1.H, $src2.L):<<1:rnd")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_sat_rnd_hl<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", - !strconcat(opc , "($src1.H, $src2.L):rnd:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_sat_rnd_hl_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", - !strconcat(opc , "($src1.H, $src2.L):<<1:rnd:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_rnd_lh<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", - !strconcat(opc , "($src1.L, $src2.H):rnd")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_sat_rnd_lh<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", - !strconcat(opc , "($src1.L, $src2.H):rnd:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_sat_rnd_lh_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", - !strconcat(opc , "($src1.L, $src2.H):<<1:rnd:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_rnd_lh_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", - !strconcat(opc , "($src1.L, $src2.H):<<1:rnd")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_sat_rnd_ll<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", - !strconcat(opc , "($src1.L, $src2.L):rnd:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_sat_rnd_ll_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", - !strconcat(opc , "($src1.L, $src2.L):<<1:rnd:sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_rnd_ll<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", - !strconcat(opc , "($src1.L, $src2.L):rnd")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_sisi_rnd_ll_s1<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", - !strconcat(opc , "($src1.L, $src2.L):<<1:rnd")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_dididi_acc_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, - DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1, $src2):sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, - DoubleRegs:$src1, - DoubleRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_dididi_acc_rnd_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - DoubleRegs:$src2), - !strconcat("$dst += ", - !strconcat(opc , "($src1, $src2):rnd:sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, - DoubleRegs:$src1, - DoubleRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_dididi_acc_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, - DoubleRegs:$src1, - DoubleRegs:$src2), - !strconcat("$dst += ", - !strconcat(opc , "($src1, $src2):<<1")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, - DoubleRegs:$src1, - DoubleRegs:$src2))], - "$dst2 = $dst">; - - -class di_MInst_dididi_acc_s1_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, - DoubleRegs:$src1, - DoubleRegs:$src2), - !strconcat("$dst += ", - !strconcat(opc , "($src1, $src2):<<1:sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, - DoubleRegs:$src1, - DoubleRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_dididi_acc_s1_rnd_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - DoubleRegs:$src2), - !strconcat("$dst += ", - !strconcat(opc , "($src1, $src2):<<1:rnd:sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, - DoubleRegs:$src1, - DoubleRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_dididi_acc<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - DoubleRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, - DoubleRegs:$src1, - DoubleRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_dididi_acc_conj<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - DoubleRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1, $src2*)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, - DoubleRegs:$src1, - DoubleRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_acc_hh<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.H)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_acc_hl<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.L)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_acc_lh<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.H)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_acc_ll<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.L)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_acc_hh_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", - !strconcat(opc , "($src1.H, $src2.H):<<1")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_acc_hl_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", - !strconcat(opc , "($src1.H, $src2.L):<<1")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_acc_lh_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", - !strconcat(opc , "($src1.L, $src2.H):<<1")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_acc_ll_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", - !strconcat(opc , "($src1.L, $src2.L):<<1")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_nac_hh<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.H)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_nac_hl<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.L)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_nac_lh<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.H)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_nac_ll<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.L)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_nac_hh_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", - !strconcat(opc , "($src1.H, $src2.H):<<1")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_nac_hl_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", - !strconcat(opc , "($src1.H, $src2.L):<<1")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_nac_lh_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", - !strconcat(opc , "($src1.L, $src2.H):<<1")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_nac_ll_s1<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst -= ", - !strconcat(opc , "($src1.L, $src2.L):<<1")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disisi_acc_s1_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", - !strconcat(opc , "($src1, $src2):<<1:sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class di_MInst_disi_s1_sat<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>; - -class di_MInst_didisi_acc_s1_sat<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - IntRegs:$src2), - !strconcat("$dst += ", - !strconcat(opc , "($src1, $src2):<<1:sat")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, - DoubleRegs:$src1, - IntRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_disi_s1_rnd_sat<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", - !strconcat(opc , "($src1, $src2):<<1:rnd:sat")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>; - -class si_MInst_didi<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; - - -class T_RI_pat <InstHexagon MI, Intrinsic IntID> - : Pat<(IntID (i32 IntRegs:$Rs), imm:$It), - (MI IntRegs:$Rs, imm:$It)>; +class T_IRI_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID imm:$It, I32:$Rs, imm:$Iu), + (MI imm:$It, I32:$Rs, imm:$Iu)>; -// -// LDInst classes. -// -let mayLoad = 1, neverHasSideEffects = 1 in -class di_LDInstPI_diu4<string opc, Intrinsic IntID> - : LDInstPI<(outs IntRegs:$dst, DoubleRegs:$dst2), - (ins IntRegs:$src1, IntRegs:$src2, CRRegs:$src3, s4Imm:$offset), - "$dst2 = memd($src1++#$offset:circ($src3))", - [], - "$src1 = $dst">; +class T_IRR_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID imm:$Is, I32:$Rs, I32:$Rt), + (MI imm:$Is, I32:$Rs, I32:$Rt)>; -/******************************************************************** -* ALU32/ALU * -*********************************************************************/ +class T_RIR_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I32:$Rs, imm:$Is, I32:$Rt), + (MI I32:$Rs, imm:$Is, I32:$Rt)>; -// ALU32 / ALU / Add. -def HEXAGON_A2_add: - si_ALU32_sisi <"add", int_hexagon_A2_add>; -def HEXAGON_A2_addi: - si_ALU32_sis16 <"add", int_hexagon_A2_addi>; - -// ALU32 / ALU / Logical operations. -def HEXAGON_A2_and: - si_ALU32_sisi <"and", int_hexagon_A2_and>; -def HEXAGON_A2_andir: - si_ALU32_sis10 <"and", int_hexagon_A2_andir>; -def HEXAGON_A2_not: - si_ALU32_si <"not", int_hexagon_A2_not>; -def HEXAGON_A2_or: - si_ALU32_sisi <"or", int_hexagon_A2_or>; -def HEXAGON_A2_orir: - si_ALU32_sis10 <"or", int_hexagon_A2_orir>; -def HEXAGON_A2_xor: - si_ALU32_sisi <"xor", int_hexagon_A2_xor>; - -// ALU32 / ALU / Negate. -def HEXAGON_A2_neg: - si_ALU32_si <"neg", int_hexagon_A2_neg>; - -// ALU32 / ALU / Subtract. -def HEXAGON_A2_sub: - si_ALU32_sisi <"sub", int_hexagon_A2_sub>; -def HEXAGON_A2_subri: - si_ALU32_s10si <"sub", int_hexagon_A2_subri>; - -// ALU32 / ALU / Transfer Immediate. -def HEXAGON_A2_tfril: - si_lo_ALU32_siu16 <"", int_hexagon_A2_tfril>; -def HEXAGON_A2_tfrih: - si_hi_ALU32_siu16 <"", int_hexagon_A2_tfrih>; -def HEXAGON_A2_tfrsi: - si_ALU32_s16 <"", int_hexagon_A2_tfrsi>; -def HEXAGON_A2_tfrpi: - di_ALU32_s8 <"", int_hexagon_A2_tfrpi>; - -// ALU32 / ALU / Transfer Register. -def HEXAGON_A2_tfr: - si_ALU32_si_tfr <"", int_hexagon_A2_tfr>; +class T_RRR_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I32:$Rs, I32:$Rt, I32:$Ru), + (MI I32:$Rs, I32:$Rt, I32:$Ru)>; -/******************************************************************** -* ALU32/PERM * -*********************************************************************/ +class T_PPI_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I64:$Rs, I64:$Rt, imm:$Iu), + (MI DoubleRegs:$Rs, DoubleRegs:$Rt, imm:$Iu)>; -// ALU32 / PERM / Combine. -def HEXAGON_A2_combinew: - di_ALU32_sisi <"combine", int_hexagon_A2_combinew>; -def HEXAGON_A2_combine_hh: - si_MInst_sisi_hh <"combine", int_hexagon_A2_combine_hh>; -def HEXAGON_A2_combine_lh: - si_MInst_sisi_lh <"combine", int_hexagon_A2_combine_lh>; -def HEXAGON_A2_combine_hl: - si_MInst_sisi_hl <"combine", int_hexagon_A2_combine_hl>; -def HEXAGON_A2_combine_ll: - si_MInst_sisi_ll <"combine", int_hexagon_A2_combine_ll>; -def HEXAGON_A2_combineii: - di_MInst_s8s8 <"combine", int_hexagon_A2_combineii>; - -// ALU32 / PERM / Mux. -def HEXAGON_C2_mux: - si_ALU32_qisisi <"mux", int_hexagon_C2_mux>; -def HEXAGON_C2_muxri: - si_ALU32_qis8si <"mux", int_hexagon_C2_muxri>; -def HEXAGON_C2_muxir: - si_ALU32_qisis8 <"mux", int_hexagon_C2_muxir>; -def HEXAGON_C2_muxii: - si_ALU32_qis8s8 <"mux", int_hexagon_C2_muxii>; - -// ALU32 / PERM / Shift halfword. -def HEXAGON_A2_aslh: - si_ALU32_si <"aslh", int_hexagon_A2_aslh>; -def HEXAGON_A2_asrh: - si_ALU32_si <"asrh", int_hexagon_A2_asrh>; -def SI_to_SXTHI_asrh: - si_ALU32_si <"asrh", int_hexagon_SI_to_SXTHI_asrh>; - -// ALU32 / PERM / Sign/zero extend. -def HEXAGON_A2_sxth: - si_ALU32_si <"sxth", int_hexagon_A2_sxth>; -def HEXAGON_A2_sxtb: - si_ALU32_si <"sxtb", int_hexagon_A2_sxtb>; -def HEXAGON_A2_zxth: - si_ALU32_si <"zxth", int_hexagon_A2_zxth>; -def HEXAGON_A2_zxtb: - si_ALU32_si <"zxtb", int_hexagon_A2_zxtb>; +class T_PII_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I64:$Rs, imm:$It, imm:$Iu), + (MI DoubleRegs:$Rs, imm:$It, imm:$Iu)>; -/******************************************************************** -* ALU32/PRED * -*********************************************************************/ +class T_PPP_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I64:$Rs, I64:$Rt, I64:$Ru), + (MI DoubleRegs:$Rs, DoubleRegs:$Rt, DoubleRegs:$Ru)>; -// ALU32 / PRED / Compare. -def HEXAGON_C2_cmpeq: - qi_ALU32_sisi <"cmp.eq", int_hexagon_C2_cmpeq>; -def HEXAGON_C2_cmpeqi: - qi_ALU32_sis10 <"cmp.eq", int_hexagon_C2_cmpeqi>; -def HEXAGON_C2_cmpgei: - qi_ALU32_sis8 <"cmp.ge", int_hexagon_C2_cmpgei>; -def HEXAGON_C2_cmpgeui: - qi_ALU32_siu8 <"cmp.geu", int_hexagon_C2_cmpgeui>; -def HEXAGON_C2_cmpgt: - qi_ALU32_sisi <"cmp.gt", int_hexagon_C2_cmpgt>; -def HEXAGON_C2_cmpgti: - qi_ALU32_sis10 <"cmp.gt", int_hexagon_C2_cmpgti>; -def HEXAGON_C2_cmpgtu: - qi_ALU32_sisi <"cmp.gtu", int_hexagon_C2_cmpgtu>; -def HEXAGON_C2_cmpgtui: - qi_ALU32_siu9 <"cmp.gtu", int_hexagon_C2_cmpgtui>; -def HEXAGON_C2_cmplt: - qi_ALU32_sisi <"cmp.lt", int_hexagon_C2_cmplt>; -def HEXAGON_C2_cmpltu: - qi_ALU32_sisi <"cmp.ltu", int_hexagon_C2_cmpltu>; +class T_PPR_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I64:$Rs, I64:$Rt, I32:$Ru), + (MI DoubleRegs:$Rs, DoubleRegs:$Rt, I32:$Ru)>; -/******************************************************************** -* ALU32/VH * -*********************************************************************/ +class T_PRR_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I64:$Rs, I32:$Rt, I32:$Ru), + (MI DoubleRegs:$Rs, I32:$Rt, I32:$Ru)>; -// ALU32 / VH / Vector add halfwords. -// Rd32=vadd[u]h(Rs32,Rt32:sat] -def HEXAGON_A2_svaddh: - si_ALU32_sisi <"vaddh", int_hexagon_A2_svaddh>; -def HEXAGON_A2_svaddhs: - si_ALU32_sisi_sat <"vaddh", int_hexagon_A2_svaddhs>; -def HEXAGON_A2_svadduhs: - si_ALU32_sisi_sat <"vadduh", int_hexagon_A2_svadduhs>; - -// ALU32 / VH / Vector average halfwords. -def HEXAGON_A2_svavgh: - si_ALU32_sisi <"vavgh", int_hexagon_A2_svavgh>; -def HEXAGON_A2_svavghs: - si_ALU32_sisi_rnd <"vavgh", int_hexagon_A2_svavghs>; -def HEXAGON_A2_svnavgh: - si_ALU32_sisi <"vnavgh", int_hexagon_A2_svnavgh>; - -// ALU32 / VH / Vector subtract halfwords. -def HEXAGON_A2_svsubh: - si_ALU32_sisi <"vsubh", int_hexagon_A2_svsubh>; -def HEXAGON_A2_svsubhs: - si_ALU32_sisi_sat <"vsubh", int_hexagon_A2_svsubhs>; -def HEXAGON_A2_svsubuhs: - si_ALU32_sisi_sat <"vsubuh", int_hexagon_A2_svsubuhs>; +class T_PPQ_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I64:$Rs, I64:$Rt, (i32 PredRegs:$Ru)), + (MI DoubleRegs:$Rs, DoubleRegs:$Rt, PredRegs:$Ru)>; -/******************************************************************** -* ALU64/ALU * -*********************************************************************/ +class T_PR_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID I64:$Rs, I32:$Rt), + (MI DoubleRegs:$Rs, I32:$Rt)>; -// ALU64 / ALU / Add. -def HEXAGON_A2_addp: - di_ALU64_didi <"add", int_hexagon_A2_addp>; -def HEXAGON_A2_addsat: - si_ALU64_sisi_sat <"add", int_hexagon_A2_addsat>; - -// ALU64 / ALU / Add halfword. -// Even though the definition says hl, it should be lh - -//so DON'T change the class " si_ALU64_sisi_l16_lh " it inherits. -def HEXAGON_A2_addh_l16_hl: - si_ALU64_sisi_l16_lh <"add", int_hexagon_A2_addh_l16_hl>; -def HEXAGON_A2_addh_l16_ll: - si_ALU64_sisi_l16_ll <"add", int_hexagon_A2_addh_l16_ll>; - -def HEXAGON_A2_addh_l16_sat_hl: - si_ALU64_sisi_l16_sat_lh <"add", int_hexagon_A2_addh_l16_sat_hl>; -def HEXAGON_A2_addh_l16_sat_ll: - si_ALU64_sisi_l16_sat_ll <"add", int_hexagon_A2_addh_l16_sat_ll>; - -def HEXAGON_A2_addh_h16_hh: - si_ALU64_sisi_h16_hh <"add", int_hexagon_A2_addh_h16_hh>; -def HEXAGON_A2_addh_h16_hl: - si_ALU64_sisi_h16_hl <"add", int_hexagon_A2_addh_h16_hl>; -def HEXAGON_A2_addh_h16_lh: - si_ALU64_sisi_h16_lh <"add", int_hexagon_A2_addh_h16_lh>; -def HEXAGON_A2_addh_h16_ll: - si_ALU64_sisi_h16_ll <"add", int_hexagon_A2_addh_h16_ll>; - -def HEXAGON_A2_addh_h16_sat_hh: - si_ALU64_sisi_h16_sat_hh <"add", int_hexagon_A2_addh_h16_sat_hh>; -def HEXAGON_A2_addh_h16_sat_hl: - si_ALU64_sisi_h16_sat_hl <"add", int_hexagon_A2_addh_h16_sat_hl>; -def HEXAGON_A2_addh_h16_sat_lh: - si_ALU64_sisi_h16_sat_lh <"add", int_hexagon_A2_addh_h16_sat_lh>; -def HEXAGON_A2_addh_h16_sat_ll: - si_ALU64_sisi_h16_sat_ll <"add", int_hexagon_A2_addh_h16_sat_ll>; - -// ALU64 / ALU / Compare. -def HEXAGON_C2_cmpeqp: - qi_ALU64_didi <"cmp.eq", int_hexagon_C2_cmpeqp>; -def HEXAGON_C2_cmpgtp: - qi_ALU64_didi <"cmp.gt", int_hexagon_C2_cmpgtp>; -def HEXAGON_C2_cmpgtup: - qi_ALU64_didi <"cmp.gtu", int_hexagon_C2_cmpgtup>; - -// ALU64 / ALU / Logical operations. -def HEXAGON_A2_andp: - di_ALU64_didi <"and", int_hexagon_A2_andp>; -def HEXAGON_A2_orp: - di_ALU64_didi <"or", int_hexagon_A2_orp>; -def HEXAGON_A2_xorp: - di_ALU64_didi <"xor", int_hexagon_A2_xorp>; - -// ALU64 / ALU / Maximum. -def HEXAGON_A2_max: - si_ALU64_sisi <"max", int_hexagon_A2_max>; -def HEXAGON_A2_maxu: - si_ALU64_sisi <"maxu", int_hexagon_A2_maxu>; - -// ALU64 / ALU / Minimum. -def HEXAGON_A2_min: - si_ALU64_sisi <"min", int_hexagon_A2_min>; -def HEXAGON_A2_minu: - si_ALU64_sisi <"minu", int_hexagon_A2_minu>; - -// ALU64 / ALU / Subtract. -def HEXAGON_A2_subp: - di_ALU64_didi <"sub", int_hexagon_A2_subp>; -def HEXAGON_A2_subsat: - si_ALU64_sisi_sat <"sub", int_hexagon_A2_subsat>; - -// ALU64 / ALU / Subtract halfword. -// Even though the definition says hl, it should be lh - -//so DON'T change the class " si_ALU64_sisi_l16_lh " it inherits. -def HEXAGON_A2_subh_l16_hl: - si_ALU64_sisi_l16_lh <"sub", int_hexagon_A2_subh_l16_hl>; -def HEXAGON_A2_subh_l16_ll: - si_ALU64_sisi_l16_ll <"sub", int_hexagon_A2_subh_l16_ll>; - -def HEXAGON_A2_subh_l16_sat_hl: - si_ALU64_sisi_l16_sat_lh <"sub", int_hexagon_A2_subh_l16_sat_hl>; -def HEXAGON_A2_subh_l16_sat_ll: - si_ALU64_sisi_l16_sat_ll <"sub", int_hexagon_A2_subh_l16_sat_ll>; - -def HEXAGON_A2_subh_h16_hh: - si_ALU64_sisi_h16_hh <"sub", int_hexagon_A2_subh_h16_hh>; -def HEXAGON_A2_subh_h16_hl: - si_ALU64_sisi_h16_hl <"sub", int_hexagon_A2_subh_h16_hl>; -def HEXAGON_A2_subh_h16_lh: - si_ALU64_sisi_h16_lh <"sub", int_hexagon_A2_subh_h16_lh>; -def HEXAGON_A2_subh_h16_ll: - si_ALU64_sisi_h16_ll <"sub", int_hexagon_A2_subh_h16_ll>; - -def HEXAGON_A2_subh_h16_sat_hh: - si_ALU64_sisi_h16_sat_hh <"sub", int_hexagon_A2_subh_h16_sat_hh>; -def HEXAGON_A2_subh_h16_sat_hl: - si_ALU64_sisi_h16_sat_hl <"sub", int_hexagon_A2_subh_h16_sat_hl>; -def HEXAGON_A2_subh_h16_sat_lh: - si_ALU64_sisi_h16_sat_lh <"sub", int_hexagon_A2_subh_h16_sat_lh>; -def HEXAGON_A2_subh_h16_sat_ll: - si_ALU64_sisi_h16_sat_ll <"sub", int_hexagon_A2_subh_h16_sat_ll>; - -// ALU64 / ALU / Transfer register. -def HEXAGON_A2_tfrp: - di_ALU64_di <"", int_hexagon_A2_tfrp>; +class T_D_pat <InstHexagon MI, Intrinsic IntID> + : Pat<(IntID (F64:$Rs)), + (MI (F64:$Rs))>; -/******************************************************************** -* ALU64/BIT * -*********************************************************************/ +class T_DI_pat <InstHexagon MI, Intrinsic IntID, + PatLeaf ImmPred = PatLeaf<(i32 imm)>> + : Pat<(IntID F64:$Rs, ImmPred:$It), + (MI F64:$Rs, ImmPred:$It)>; -// ALU64 / BIT / Masked parity. -def HEXAGON_S2_parityp: - si_ALU64_didi <"parity", int_hexagon_S2_parityp>; +class T_F_pat <InstHexagon MI, Intrinsic IntID> + : Pat<(IntID F32:$Rs), + (MI F32:$Rs)>; -/******************************************************************** -* ALU64/PERM * -*********************************************************************/ +class T_FI_pat <InstHexagon MI, Intrinsic IntID, + PatLeaf ImmPred = PatLeaf<(i32 imm)>> + : Pat<(IntID F32:$Rs, ImmPred:$It), + (MI F32:$Rs, ImmPred:$It)>; -// ALU64 / PERM / Vector pack high and low halfwords. -def HEXAGON_S2_packhl: - di_ALU64_sisi <"packhl", int_hexagon_S2_packhl>; +class T_FF_pat <InstHexagon MI, Intrinsic IntID> + : Pat<(IntID F32:$Rs, F32:$Rt), + (MI F32:$Rs, F32:$Rt)>; -/******************************************************************** -* ALU64/VB * -*********************************************************************/ +class T_DD_pat <InstHexagon MI, Intrinsic IntID> + : Pat<(IntID F64:$Rs, F64:$Rt), + (MI F64:$Rs, F64:$Rt)>; -// ALU64 / VB / Vector add unsigned bytes. -def HEXAGON_A2_vaddub: - di_ALU64_didi <"vaddub", int_hexagon_A2_vaddub>; -def HEXAGON_A2_vaddubs: - di_ALU64_didi_sat <"vaddub", int_hexagon_A2_vaddubs>; - -// ALU64 / VB / Vector average unsigned bytes. -def HEXAGON_A2_vavgub: - di_ALU64_didi <"vavgub", int_hexagon_A2_vavgub>; -def HEXAGON_A2_vavgubr: - di_ALU64_didi_rnd <"vavgub", int_hexagon_A2_vavgubr>; - -// ALU64 / VB / Vector compare unsigned bytes. -def HEXAGON_A2_vcmpbeq: - qi_ALU64_didi <"vcmpb.eq", int_hexagon_A2_vcmpbeq>; -def HEXAGON_A2_vcmpbgtu: - qi_ALU64_didi <"vcmpb.gtu",int_hexagon_A2_vcmpbgtu>; - -// ALU64 / VB / Vector maximum/minimum unsigned bytes. -def HEXAGON_A2_vmaxub: - di_ALU64_didi <"vmaxub", int_hexagon_A2_vmaxub>; -def HEXAGON_A2_vminub: - di_ALU64_didi <"vminub", int_hexagon_A2_vminub>; - -// ALU64 / VB / Vector subtract unsigned bytes. -def HEXAGON_A2_vsubub: - di_ALU64_didi <"vsubub", int_hexagon_A2_vsubub>; -def HEXAGON_A2_vsububs: - di_ALU64_didi_sat <"vsubub", int_hexagon_A2_vsububs>; +class T_FFF_pat <InstHexagon MI, Intrinsic IntID> + : Pat<(IntID F32:$Rs, F32:$Rt, F32:$Ru), + (MI F32:$Rs, F32:$Rt, F32:$Ru)>; -// ALU64 / VB / Vector mux. -def HEXAGON_C2_vmux: - di_ALU64_qididi <"vmux", int_hexagon_C2_vmux>; +class T_FFFQ_pat <InstHexagon MI, Intrinsic IntID> + : Pat <(IntID F32:$Rs, F32:$Rt, F32:$Ru, (i32 PredRegs:$Rx)), + (MI F32:$Rs, F32:$Rt, F32:$Ru, PredRegs:$Rx)>; +//===----------------------------------------------------------------------===// +// MPYS / Multipy signed/unsigned halfwords +//Rd=mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:rnd][:sat] +//===----------------------------------------------------------------------===// -/******************************************************************** -* ALU64/VH * -*********************************************************************/ +def : T_RR_pat <M2_mpy_ll_s1, int_hexagon_M2_mpy_ll_s1>; +def : T_RR_pat <M2_mpy_ll_s0, int_hexagon_M2_mpy_ll_s0>; +def : T_RR_pat <M2_mpy_lh_s1, int_hexagon_M2_mpy_lh_s1>; +def : T_RR_pat <M2_mpy_lh_s0, int_hexagon_M2_mpy_lh_s0>; +def : T_RR_pat <M2_mpy_hl_s1, int_hexagon_M2_mpy_hl_s1>; +def : T_RR_pat <M2_mpy_hl_s0, int_hexagon_M2_mpy_hl_s0>; +def : T_RR_pat <M2_mpy_hh_s1, int_hexagon_M2_mpy_hh_s1>; +def : T_RR_pat <M2_mpy_hh_s0, int_hexagon_M2_mpy_hh_s0>; + +def : T_RR_pat <M2_mpyu_ll_s1, int_hexagon_M2_mpyu_ll_s1>; +def : T_RR_pat <M2_mpyu_ll_s0, int_hexagon_M2_mpyu_ll_s0>; +def : T_RR_pat <M2_mpyu_lh_s1, int_hexagon_M2_mpyu_lh_s1>; +def : T_RR_pat <M2_mpyu_lh_s0, int_hexagon_M2_mpyu_lh_s0>; +def : T_RR_pat <M2_mpyu_hl_s1, int_hexagon_M2_mpyu_hl_s1>; +def : T_RR_pat <M2_mpyu_hl_s0, int_hexagon_M2_mpyu_hl_s0>; +def : T_RR_pat <M2_mpyu_hh_s1, int_hexagon_M2_mpyu_hh_s1>; +def : T_RR_pat <M2_mpyu_hh_s0, int_hexagon_M2_mpyu_hh_s0>; + +def : T_RR_pat <M2_mpy_sat_ll_s1, int_hexagon_M2_mpy_sat_ll_s1>; +def : T_RR_pat <M2_mpy_sat_ll_s0, int_hexagon_M2_mpy_sat_ll_s0>; +def : T_RR_pat <M2_mpy_sat_lh_s1, int_hexagon_M2_mpy_sat_lh_s1>; +def : T_RR_pat <M2_mpy_sat_lh_s0, int_hexagon_M2_mpy_sat_lh_s0>; +def : T_RR_pat <M2_mpy_sat_hl_s1, int_hexagon_M2_mpy_sat_hl_s1>; +def : T_RR_pat <M2_mpy_sat_hl_s0, int_hexagon_M2_mpy_sat_hl_s0>; +def : T_RR_pat <M2_mpy_sat_hh_s1, int_hexagon_M2_mpy_sat_hh_s1>; +def : T_RR_pat <M2_mpy_sat_hh_s0, int_hexagon_M2_mpy_sat_hh_s0>; + +def : T_RR_pat <M2_mpy_rnd_ll_s1, int_hexagon_M2_mpy_rnd_ll_s1>; +def : T_RR_pat <M2_mpy_rnd_ll_s0, int_hexagon_M2_mpy_rnd_ll_s0>; +def : T_RR_pat <M2_mpy_rnd_lh_s1, int_hexagon_M2_mpy_rnd_lh_s1>; +def : T_RR_pat <M2_mpy_rnd_lh_s0, int_hexagon_M2_mpy_rnd_lh_s0>; +def : T_RR_pat <M2_mpy_rnd_hl_s1, int_hexagon_M2_mpy_rnd_hl_s1>; +def : T_RR_pat <M2_mpy_rnd_hl_s0, int_hexagon_M2_mpy_rnd_hl_s0>; +def : T_RR_pat <M2_mpy_rnd_hh_s1, int_hexagon_M2_mpy_rnd_hh_s1>; +def : T_RR_pat <M2_mpy_rnd_hh_s0, int_hexagon_M2_mpy_rnd_hh_s0>; + +def : T_RR_pat <M2_mpy_sat_rnd_ll_s1, int_hexagon_M2_mpy_sat_rnd_ll_s1>; +def : T_RR_pat <M2_mpy_sat_rnd_ll_s0, int_hexagon_M2_mpy_sat_rnd_ll_s0>; +def : T_RR_pat <M2_mpy_sat_rnd_lh_s1, int_hexagon_M2_mpy_sat_rnd_lh_s1>; +def : T_RR_pat <M2_mpy_sat_rnd_lh_s0, int_hexagon_M2_mpy_sat_rnd_lh_s0>; +def : T_RR_pat <M2_mpy_sat_rnd_hl_s1, int_hexagon_M2_mpy_sat_rnd_hl_s1>; +def : T_RR_pat <M2_mpy_sat_rnd_hl_s0, int_hexagon_M2_mpy_sat_rnd_hl_s0>; +def : T_RR_pat <M2_mpy_sat_rnd_hh_s1, int_hexagon_M2_mpy_sat_rnd_hh_s1>; +def : T_RR_pat <M2_mpy_sat_rnd_hh_s0, int_hexagon_M2_mpy_sat_rnd_hh_s0>; -// ALU64 / VH / Vector add halfwords. -// Rdd64=vadd[u]h(Rss64,Rtt64:sat] -def HEXAGON_A2_vaddh: - di_ALU64_didi <"vaddh", int_hexagon_A2_vaddh>; -def HEXAGON_A2_vaddhs: - di_ALU64_didi_sat <"vaddh", int_hexagon_A2_vaddhs>; -def HEXAGON_A2_vadduhs: - di_ALU64_didi_sat <"vadduh", int_hexagon_A2_vadduhs>; - -// ALU64 / VH / Vector average halfwords. -// Rdd64=v[n]avg[u]h(Rss64,Rtt64:rnd/:crnd][:sat] -def HEXAGON_A2_vavgh: - di_ALU64_didi <"vavgh", int_hexagon_A2_vavgh>; -def HEXAGON_A2_vavghcr: - di_ALU64_didi_crnd <"vavgh", int_hexagon_A2_vavghcr>; -def HEXAGON_A2_vavghr: - di_ALU64_didi_rnd <"vavgh", int_hexagon_A2_vavghr>; -def HEXAGON_A2_vavguh: - di_ALU64_didi <"vavguh", int_hexagon_A2_vavguh>; -def HEXAGON_A2_vavguhr: - di_ALU64_didi_rnd <"vavguh", int_hexagon_A2_vavguhr>; -def HEXAGON_A2_vnavgh: - di_ALU64_didi <"vnavgh", int_hexagon_A2_vnavgh>; -def HEXAGON_A2_vnavghcr: - di_ALU64_didi_crnd_sat <"vnavgh", int_hexagon_A2_vnavghcr>; -def HEXAGON_A2_vnavghr: - di_ALU64_didi_rnd_sat <"vnavgh", int_hexagon_A2_vnavghr>; - -// ALU64 / VH / Vector compare halfwords. -def HEXAGON_A2_vcmpheq: - qi_ALU64_didi <"vcmph.eq", int_hexagon_A2_vcmpheq>; -def HEXAGON_A2_vcmphgt: - qi_ALU64_didi <"vcmph.gt", int_hexagon_A2_vcmphgt>; -def HEXAGON_A2_vcmphgtu: - qi_ALU64_didi <"vcmph.gtu",int_hexagon_A2_vcmphgtu>; - -// ALU64 / VH / Vector maximum halfwords. -def HEXAGON_A2_vmaxh: - di_ALU64_didi <"vmaxh", int_hexagon_A2_vmaxh>; -def HEXAGON_A2_vmaxuh: - di_ALU64_didi <"vmaxuh", int_hexagon_A2_vmaxuh>; - -// ALU64 / VH / Vector minimum halfwords. -def HEXAGON_A2_vminh: - di_ALU64_didi <"vminh", int_hexagon_A2_vminh>; -def HEXAGON_A2_vminuh: - di_ALU64_didi <"vminuh", int_hexagon_A2_vminuh>; - -// ALU64 / VH / Vector subtract halfwords. -def HEXAGON_A2_vsubh: - di_ALU64_didi <"vsubh", int_hexagon_A2_vsubh>; -def HEXAGON_A2_vsubhs: - di_ALU64_didi_sat <"vsubh", int_hexagon_A2_vsubhs>; -def HEXAGON_A2_vsubuhs: - di_ALU64_didi_sat <"vsubuh", int_hexagon_A2_vsubuhs>; +//===----------------------------------------------------------------------===// +// MPYS / Multipy signed/unsigned halfwords and add/subtract the +// result from the accumulator. +//Rx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat] +//===----------------------------------------------------------------------===// -/******************************************************************** -* ALU64/VW * -*********************************************************************/ +def : T_RRR_pat <M2_mpy_acc_ll_s1, int_hexagon_M2_mpy_acc_ll_s1>; +def : T_RRR_pat <M2_mpy_acc_ll_s0, int_hexagon_M2_mpy_acc_ll_s0>; +def : T_RRR_pat <M2_mpy_acc_lh_s1, int_hexagon_M2_mpy_acc_lh_s1>; +def : T_RRR_pat <M2_mpy_acc_lh_s0, int_hexagon_M2_mpy_acc_lh_s0>; +def : T_RRR_pat <M2_mpy_acc_hl_s1, int_hexagon_M2_mpy_acc_hl_s1>; +def : T_RRR_pat <M2_mpy_acc_hl_s0, int_hexagon_M2_mpy_acc_hl_s0>; +def : T_RRR_pat <M2_mpy_acc_hh_s1, int_hexagon_M2_mpy_acc_hh_s1>; +def : T_RRR_pat <M2_mpy_acc_hh_s0, int_hexagon_M2_mpy_acc_hh_s0>; + +def : T_RRR_pat <M2_mpyu_acc_ll_s1, int_hexagon_M2_mpyu_acc_ll_s1>; +def : T_RRR_pat <M2_mpyu_acc_ll_s0, int_hexagon_M2_mpyu_acc_ll_s0>; +def : T_RRR_pat <M2_mpyu_acc_lh_s1, int_hexagon_M2_mpyu_acc_lh_s1>; +def : T_RRR_pat <M2_mpyu_acc_lh_s0, int_hexagon_M2_mpyu_acc_lh_s0>; +def : T_RRR_pat <M2_mpyu_acc_hl_s1, int_hexagon_M2_mpyu_acc_hl_s1>; +def : T_RRR_pat <M2_mpyu_acc_hl_s0, int_hexagon_M2_mpyu_acc_hl_s0>; +def : T_RRR_pat <M2_mpyu_acc_hh_s1, int_hexagon_M2_mpyu_acc_hh_s1>; +def : T_RRR_pat <M2_mpyu_acc_hh_s0, int_hexagon_M2_mpyu_acc_hh_s0>; + +def : T_RRR_pat <M2_mpy_nac_ll_s1, int_hexagon_M2_mpy_nac_ll_s1>; +def : T_RRR_pat <M2_mpy_nac_ll_s0, int_hexagon_M2_mpy_nac_ll_s0>; +def : T_RRR_pat <M2_mpy_nac_lh_s1, int_hexagon_M2_mpy_nac_lh_s1>; +def : T_RRR_pat <M2_mpy_nac_lh_s0, int_hexagon_M2_mpy_nac_lh_s0>; +def : T_RRR_pat <M2_mpy_nac_hl_s1, int_hexagon_M2_mpy_nac_hl_s1>; +def : T_RRR_pat <M2_mpy_nac_hl_s0, int_hexagon_M2_mpy_nac_hl_s0>; +def : T_RRR_pat <M2_mpy_nac_hh_s1, int_hexagon_M2_mpy_nac_hh_s1>; +def : T_RRR_pat <M2_mpy_nac_hh_s0, int_hexagon_M2_mpy_nac_hh_s0>; + +def : T_RRR_pat <M2_mpyu_nac_ll_s1, int_hexagon_M2_mpyu_nac_ll_s1>; +def : T_RRR_pat <M2_mpyu_nac_ll_s0, int_hexagon_M2_mpyu_nac_ll_s0>; +def : T_RRR_pat <M2_mpyu_nac_lh_s1, int_hexagon_M2_mpyu_nac_lh_s1>; +def : T_RRR_pat <M2_mpyu_nac_lh_s0, int_hexagon_M2_mpyu_nac_lh_s0>; +def : T_RRR_pat <M2_mpyu_nac_hl_s1, int_hexagon_M2_mpyu_nac_hl_s1>; +def : T_RRR_pat <M2_mpyu_nac_hl_s0, int_hexagon_M2_mpyu_nac_hl_s0>; +def : T_RRR_pat <M2_mpyu_nac_hh_s1, int_hexagon_M2_mpyu_nac_hh_s1>; +def : T_RRR_pat <M2_mpyu_nac_hh_s0, int_hexagon_M2_mpyu_nac_hh_s0>; + +def : T_RRR_pat <M2_mpy_acc_sat_ll_s1, int_hexagon_M2_mpy_acc_sat_ll_s1>; +def : T_RRR_pat <M2_mpy_acc_sat_ll_s0, int_hexagon_M2_mpy_acc_sat_ll_s0>; +def : T_RRR_pat <M2_mpy_acc_sat_lh_s1, int_hexagon_M2_mpy_acc_sat_lh_s1>; +def : T_RRR_pat <M2_mpy_acc_sat_lh_s0, int_hexagon_M2_mpy_acc_sat_lh_s0>; +def : T_RRR_pat <M2_mpy_acc_sat_hl_s1, int_hexagon_M2_mpy_acc_sat_hl_s1>; +def : T_RRR_pat <M2_mpy_acc_sat_hl_s0, int_hexagon_M2_mpy_acc_sat_hl_s0>; +def : T_RRR_pat <M2_mpy_acc_sat_hh_s1, int_hexagon_M2_mpy_acc_sat_hh_s1>; +def : T_RRR_pat <M2_mpy_acc_sat_hh_s0, int_hexagon_M2_mpy_acc_sat_hh_s0>; + +def : T_RRR_pat <M2_mpy_nac_sat_ll_s1, int_hexagon_M2_mpy_nac_sat_ll_s1>; +def : T_RRR_pat <M2_mpy_nac_sat_ll_s0, int_hexagon_M2_mpy_nac_sat_ll_s0>; +def : T_RRR_pat <M2_mpy_nac_sat_lh_s1, int_hexagon_M2_mpy_nac_sat_lh_s1>; +def : T_RRR_pat <M2_mpy_nac_sat_lh_s0, int_hexagon_M2_mpy_nac_sat_lh_s0>; +def : T_RRR_pat <M2_mpy_nac_sat_hl_s1, int_hexagon_M2_mpy_nac_sat_hl_s1>; +def : T_RRR_pat <M2_mpy_nac_sat_hl_s0, int_hexagon_M2_mpy_nac_sat_hl_s0>; +def : T_RRR_pat <M2_mpy_nac_sat_hh_s1, int_hexagon_M2_mpy_nac_sat_hh_s1>; +def : T_RRR_pat <M2_mpy_nac_sat_hh_s0, int_hexagon_M2_mpy_nac_sat_hh_s0>; + + +//===----------------------------------------------------------------------===// +// Multiply signed/unsigned halfwords with and without saturation and rounding +// into a 64-bits destination register. +//===----------------------------------------------------------------------===// + +def : T_RR_pat <M2_mpyd_hh_s0, int_hexagon_M2_mpyd_hh_s0>; +def : T_RR_pat <M2_mpyd_hl_s0, int_hexagon_M2_mpyd_hl_s0>; +def : T_RR_pat <M2_mpyd_lh_s0, int_hexagon_M2_mpyd_lh_s0>; +def : T_RR_pat <M2_mpyd_ll_s0, int_hexagon_M2_mpyd_ll_s0>; +def : T_RR_pat <M2_mpyd_hh_s1, int_hexagon_M2_mpyd_hh_s1>; +def : T_RR_pat <M2_mpyd_hl_s1, int_hexagon_M2_mpyd_hl_s1>; +def : T_RR_pat <M2_mpyd_lh_s1, int_hexagon_M2_mpyd_lh_s1>; +def : T_RR_pat <M2_mpyd_ll_s1, int_hexagon_M2_mpyd_ll_s1>; + +def : T_RR_pat <M2_mpyd_rnd_hh_s0, int_hexagon_M2_mpyd_rnd_hh_s0>; +def : T_RR_pat <M2_mpyd_rnd_hl_s0, int_hexagon_M2_mpyd_rnd_hl_s0>; +def : T_RR_pat <M2_mpyd_rnd_lh_s0, int_hexagon_M2_mpyd_rnd_lh_s0>; +def : T_RR_pat <M2_mpyd_rnd_ll_s0, int_hexagon_M2_mpyd_rnd_ll_s0>; +def : T_RR_pat <M2_mpyd_rnd_hh_s1, int_hexagon_M2_mpyd_rnd_hh_s1>; +def : T_RR_pat <M2_mpyd_rnd_hl_s1, int_hexagon_M2_mpyd_rnd_hl_s1>; +def : T_RR_pat <M2_mpyd_rnd_lh_s1, int_hexagon_M2_mpyd_rnd_lh_s1>; +def : T_RR_pat <M2_mpyd_rnd_ll_s1, int_hexagon_M2_mpyd_rnd_ll_s1>; + +def : T_RR_pat <M2_mpyud_hh_s0, int_hexagon_M2_mpyud_hh_s0>; +def : T_RR_pat <M2_mpyud_hl_s0, int_hexagon_M2_mpyud_hl_s0>; +def : T_RR_pat <M2_mpyud_lh_s0, int_hexagon_M2_mpyud_lh_s0>; +def : T_RR_pat <M2_mpyud_ll_s0, int_hexagon_M2_mpyud_ll_s0>; +def : T_RR_pat <M2_mpyud_hh_s1, int_hexagon_M2_mpyud_hh_s1>; +def : T_RR_pat <M2_mpyud_hl_s1, int_hexagon_M2_mpyud_hl_s1>; +def : T_RR_pat <M2_mpyud_lh_s1, int_hexagon_M2_mpyud_lh_s1>; +def : T_RR_pat <M2_mpyud_ll_s1, int_hexagon_M2_mpyud_ll_s1>; + +//===----------------------------------------------------------------------===// +// MPYS / Multipy signed/unsigned halfwords and add/subtract the +// result from the 64-bit destination register. +//Rxx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat] +//===----------------------------------------------------------------------===// + +def : T_PRR_pat <M2_mpyd_acc_hh_s0, int_hexagon_M2_mpyd_acc_hh_s0>; +def : T_PRR_pat <M2_mpyd_acc_hl_s0, int_hexagon_M2_mpyd_acc_hl_s0>; +def : T_PRR_pat <M2_mpyd_acc_lh_s0, int_hexagon_M2_mpyd_acc_lh_s0>; +def : T_PRR_pat <M2_mpyd_acc_ll_s0, int_hexagon_M2_mpyd_acc_ll_s0>; + +def : T_PRR_pat <M2_mpyd_acc_hh_s1, int_hexagon_M2_mpyd_acc_hh_s1>; +def : T_PRR_pat <M2_mpyd_acc_hl_s1, int_hexagon_M2_mpyd_acc_hl_s1>; +def : T_PRR_pat <M2_mpyd_acc_lh_s1, int_hexagon_M2_mpyd_acc_lh_s1>; +def : T_PRR_pat <M2_mpyd_acc_ll_s1, int_hexagon_M2_mpyd_acc_ll_s1>; + +def : T_PRR_pat <M2_mpyd_nac_hh_s0, int_hexagon_M2_mpyd_nac_hh_s0>; +def : T_PRR_pat <M2_mpyd_nac_hl_s0, int_hexagon_M2_mpyd_nac_hl_s0>; +def : T_PRR_pat <M2_mpyd_nac_lh_s0, int_hexagon_M2_mpyd_nac_lh_s0>; +def : T_PRR_pat <M2_mpyd_nac_ll_s0, int_hexagon_M2_mpyd_nac_ll_s0>; + +def : T_PRR_pat <M2_mpyd_nac_hh_s1, int_hexagon_M2_mpyd_nac_hh_s1>; +def : T_PRR_pat <M2_mpyd_nac_hl_s1, int_hexagon_M2_mpyd_nac_hl_s1>; +def : T_PRR_pat <M2_mpyd_nac_lh_s1, int_hexagon_M2_mpyd_nac_lh_s1>; +def : T_PRR_pat <M2_mpyd_nac_ll_s1, int_hexagon_M2_mpyd_nac_ll_s1>; + +def : T_PRR_pat <M2_mpyud_acc_hh_s0, int_hexagon_M2_mpyud_acc_hh_s0>; +def : T_PRR_pat <M2_mpyud_acc_hl_s0, int_hexagon_M2_mpyud_acc_hl_s0>; +def : T_PRR_pat <M2_mpyud_acc_lh_s0, int_hexagon_M2_mpyud_acc_lh_s0>; +def : T_PRR_pat <M2_mpyud_acc_ll_s0, int_hexagon_M2_mpyud_acc_ll_s0>; + +def : T_PRR_pat <M2_mpyud_acc_hh_s1, int_hexagon_M2_mpyud_acc_hh_s1>; +def : T_PRR_pat <M2_mpyud_acc_hl_s1, int_hexagon_M2_mpyud_acc_hl_s1>; +def : T_PRR_pat <M2_mpyud_acc_lh_s1, int_hexagon_M2_mpyud_acc_lh_s1>; +def : T_PRR_pat <M2_mpyud_acc_ll_s1, int_hexagon_M2_mpyud_acc_ll_s1>; + +def : T_PRR_pat <M2_mpyud_nac_hh_s0, int_hexagon_M2_mpyud_nac_hh_s0>; +def : T_PRR_pat <M2_mpyud_nac_hl_s0, int_hexagon_M2_mpyud_nac_hl_s0>; +def : T_PRR_pat <M2_mpyud_nac_lh_s0, int_hexagon_M2_mpyud_nac_lh_s0>; +def : T_PRR_pat <M2_mpyud_nac_ll_s0, int_hexagon_M2_mpyud_nac_ll_s0>; + +def : T_PRR_pat <M2_mpyud_nac_hh_s1, int_hexagon_M2_mpyud_nac_hh_s1>; +def : T_PRR_pat <M2_mpyud_nac_hl_s1, int_hexagon_M2_mpyud_nac_hl_s1>; +def : T_PRR_pat <M2_mpyud_nac_lh_s1, int_hexagon_M2_mpyud_nac_lh_s1>; +def : T_PRR_pat <M2_mpyud_nac_ll_s1, int_hexagon_M2_mpyud_nac_ll_s1>; + +// Vector complex multiply imaginary: Rdd=vcmpyi(Rss,Rtt)[:<<1]:sat +def : T_PP_pat <M2_vcmpy_s1_sat_i, int_hexagon_M2_vcmpy_s1_sat_i>; +def : T_PP_pat <M2_vcmpy_s0_sat_i, int_hexagon_M2_vcmpy_s0_sat_i>; + +// Vector complex multiply real: Rdd=vcmpyr(Rss,Rtt)[:<<1]:sat +def : T_PP_pat <M2_vcmpy_s1_sat_r, int_hexagon_M2_vcmpy_s1_sat_r>; +def : T_PP_pat <M2_vcmpy_s0_sat_r, int_hexagon_M2_vcmpy_s0_sat_r>; + +// Vector dual multiply: Rdd=vdmpy(Rss,Rtt)[:<<1]:sat +def : T_PP_pat <M2_vdmpys_s1, int_hexagon_M2_vdmpys_s1>; +def : T_PP_pat <M2_vdmpys_s0, int_hexagon_M2_vdmpys_s0>; + +// Vector multiply even halfwords: Rdd=vmpyeh(Rss,Rtt)[:<<1]:sat +def : T_PP_pat <M2_vmpy2es_s1, int_hexagon_M2_vmpy2es_s1>; +def : T_PP_pat <M2_vmpy2es_s0, int_hexagon_M2_vmpy2es_s0>; + +//Rdd=vmpywoh(Rss,Rtt)[:<<1][:rnd]:sat +def : T_PP_pat <M2_mmpyh_s0, int_hexagon_M2_mmpyh_s0>; +def : T_PP_pat <M2_mmpyh_s1, int_hexagon_M2_mmpyh_s1>; +def : T_PP_pat <M2_mmpyh_rs0, int_hexagon_M2_mmpyh_rs0>; +def : T_PP_pat <M2_mmpyh_rs1, int_hexagon_M2_mmpyh_rs1>; + +//Rdd=vmpyweh(Rss,Rtt)[:<<1][:rnd]:sat +def : T_PP_pat <M2_mmpyl_s0, int_hexagon_M2_mmpyl_s0>; +def : T_PP_pat <M2_mmpyl_s1, int_hexagon_M2_mmpyl_s1>; +def : T_PP_pat <M2_mmpyl_rs0, int_hexagon_M2_mmpyl_rs0>; +def : T_PP_pat <M2_mmpyl_rs1, int_hexagon_M2_mmpyl_rs1>; + +//Rdd=vmpywouh(Rss,Rtt)[:<<1][:rnd]:sat +def : T_PP_pat <M2_mmpyuh_s0, int_hexagon_M2_mmpyuh_s0>; +def : T_PP_pat <M2_mmpyuh_s1, int_hexagon_M2_mmpyuh_s1>; +def : T_PP_pat <M2_mmpyuh_rs0, int_hexagon_M2_mmpyuh_rs0>; +def : T_PP_pat <M2_mmpyuh_rs1, int_hexagon_M2_mmpyuh_rs1>; + +//Rdd=vmpyweuh(Rss,Rtt)[:<<1][:rnd]:sat +def : T_PP_pat <M2_mmpyul_s0, int_hexagon_M2_mmpyul_s0>; +def : T_PP_pat <M2_mmpyul_s1, int_hexagon_M2_mmpyul_s1>; +def : T_PP_pat <M2_mmpyul_rs0, int_hexagon_M2_mmpyul_rs0>; +def : T_PP_pat <M2_mmpyul_rs1, int_hexagon_M2_mmpyul_rs1>; + +// Vector reduce add unsigned bytes: Rdd32[+]=vrmpybu(Rss32,Rtt32) +def : T_PP_pat <A2_vraddub, int_hexagon_A2_vraddub>; +def : T_PPP_pat <A2_vraddub_acc, int_hexagon_A2_vraddub_acc>; + +// Vector sum of absolute differences unsigned bytes: Rdd=vrsadub(Rss,Rtt) +def : T_PP_pat <A2_vrsadub, int_hexagon_A2_vrsadub>; +def : T_PPP_pat <A2_vrsadub_acc, int_hexagon_A2_vrsadub_acc>; + +// Vector absolute difference: Rdd=vabsdiffh(Rtt,Rss) +def : T_PP_pat <M2_vabsdiffh, int_hexagon_M2_vabsdiffh>; + +// Vector absolute difference words: Rdd=vabsdiffw(Rtt,Rss) +def : T_PP_pat <M2_vabsdiffw, int_hexagon_M2_vabsdiffw>; + +// Vector reduce complex multiply real or imaginary: +// Rdd[+]=vrcmpy[ir](Rss,Rtt[*]) +def : T_PP_pat <M2_vrcmpyi_s0, int_hexagon_M2_vrcmpyi_s0>; +def : T_PP_pat <M2_vrcmpyi_s0c, int_hexagon_M2_vrcmpyi_s0c>; +def : T_PPP_pat <M2_vrcmaci_s0, int_hexagon_M2_vrcmaci_s0>; +def : T_PPP_pat <M2_vrcmaci_s0c, int_hexagon_M2_vrcmaci_s0c>; + +def : T_PP_pat <M2_vrcmpyr_s0, int_hexagon_M2_vrcmpyr_s0>; +def : T_PP_pat <M2_vrcmpyr_s0c, int_hexagon_M2_vrcmpyr_s0c>; +def : T_PPP_pat <M2_vrcmacr_s0, int_hexagon_M2_vrcmacr_s0>; +def : T_PPP_pat <M2_vrcmacr_s0c, int_hexagon_M2_vrcmacr_s0c>; + +// Vector reduce halfwords +// Rdd[+]=vrmpyh(Rss,Rtt) +def : T_PP_pat <M2_vrmpy_s0, int_hexagon_M2_vrmpy_s0>; +def : T_PPP_pat <M2_vrmac_s0, int_hexagon_M2_vrmac_s0>; + +//===----------------------------------------------------------------------===// +// Vector Multipy with accumulation +//===----------------------------------------------------------------------===// -// ALU64 / VW / Vector add words. -// Rdd32=vaddw(Rss32,Rtt32)[:sat] -def HEXAGON_A2_vaddw: - di_ALU64_didi <"vaddw", int_hexagon_A2_vaddw>; -def HEXAGON_A2_vaddws: - di_ALU64_didi_sat <"vaddw", int_hexagon_A2_vaddws>; - -// ALU64 / VW / Vector average words. -def HEXAGON_A2_vavguw: - di_ALU64_didi <"vavguw", int_hexagon_A2_vavguw>; -def HEXAGON_A2_vavguwr: - di_ALU64_didi_rnd <"vavguw", int_hexagon_A2_vavguwr>; -def HEXAGON_A2_vavgw: - di_ALU64_didi <"vavgw", int_hexagon_A2_vavgw>; -def HEXAGON_A2_vavgwcr: - di_ALU64_didi_crnd <"vavgw", int_hexagon_A2_vavgwcr>; -def HEXAGON_A2_vavgwr: - di_ALU64_didi_rnd <"vavgw", int_hexagon_A2_vavgwr>; -def HEXAGON_A2_vnavgw: - di_ALU64_didi <"vnavgw", int_hexagon_A2_vnavgw>; -def HEXAGON_A2_vnavgwcr: - di_ALU64_didi_crnd_sat <"vnavgw", int_hexagon_A2_vnavgwcr>; -def HEXAGON_A2_vnavgwr: - di_ALU64_didi_rnd_sat <"vnavgw", int_hexagon_A2_vnavgwr>; - -// ALU64 / VW / Vector compare words. -def HEXAGON_A2_vcmpweq: - qi_ALU64_didi <"vcmpw.eq", int_hexagon_A2_vcmpweq>; -def HEXAGON_A2_vcmpwgt: - qi_ALU64_didi <"vcmpw.gt", int_hexagon_A2_vcmpwgt>; -def HEXAGON_A2_vcmpwgtu: - qi_ALU64_didi <"vcmpw.gtu",int_hexagon_A2_vcmpwgtu>; - -// ALU64 / VW / Vector maximum words. -def HEXAGON_A2_vmaxw: - di_ALU64_didi <"vmaxw", int_hexagon_A2_vmaxw>; -def HEXAGON_A2_vmaxuw: - di_ALU64_didi <"vmaxuw", int_hexagon_A2_vmaxuw>; - -// ALU64 / VW / Vector minimum words. -def HEXAGON_A2_vminw: - di_ALU64_didi <"vminw", int_hexagon_A2_vminw>; -def HEXAGON_A2_vminuw: - di_ALU64_didi <"vminuw", int_hexagon_A2_vminuw>; - -// ALU64 / VW / Vector subtract words. -def HEXAGON_A2_vsubw: - di_ALU64_didi <"vsubw", int_hexagon_A2_vsubw>; -def HEXAGON_A2_vsubws: - di_ALU64_didi_sat <"vsubw", int_hexagon_A2_vsubws>; +// Vector multiply word by signed half with accumulation +// Rxx+=vmpyw[eo]h(Rss,Rtt)[:<<1][:rnd]:sat +def : T_PPP_pat <M2_mmacls_s1, int_hexagon_M2_mmacls_s1>; +def : T_PPP_pat <M2_mmacls_s0, int_hexagon_M2_mmacls_s0>; +def : T_PPP_pat <M2_mmacls_rs1, int_hexagon_M2_mmacls_rs1>; +def : T_PPP_pat <M2_mmacls_rs0, int_hexagon_M2_mmacls_rs0>; +def : T_PPP_pat <M2_mmachs_s1, int_hexagon_M2_mmachs_s1>; +def : T_PPP_pat <M2_mmachs_s0, int_hexagon_M2_mmachs_s0>; +def : T_PPP_pat <M2_mmachs_rs1, int_hexagon_M2_mmachs_rs1>; +def : T_PPP_pat <M2_mmachs_rs0, int_hexagon_M2_mmachs_rs0>; + +// Vector multiply word by unsigned half with accumulation +// Rxx+=vmpyw[eo]uh(Rss,Rtt)[:<<1][:rnd]:sat +def : T_PPP_pat <M2_mmaculs_s1, int_hexagon_M2_mmaculs_s1>; +def : T_PPP_pat <M2_mmaculs_s0, int_hexagon_M2_mmaculs_s0>; +def : T_PPP_pat <M2_mmaculs_rs1, int_hexagon_M2_mmaculs_rs1>; +def : T_PPP_pat <M2_mmaculs_rs0, int_hexagon_M2_mmaculs_rs0>; +def : T_PPP_pat <M2_mmacuhs_s1, int_hexagon_M2_mmacuhs_s1>; +def : T_PPP_pat <M2_mmacuhs_s0, int_hexagon_M2_mmacuhs_s0>; +def : T_PPP_pat <M2_mmacuhs_rs1, int_hexagon_M2_mmacuhs_rs1>; +def : T_PPP_pat <M2_mmacuhs_rs0, int_hexagon_M2_mmacuhs_rs0>; + +// Vector multiply even halfwords with accumulation +// Rxx+=vmpyeh(Rss,Rtt)[:<<1][:sat] +def : T_PPP_pat <M2_vmac2es, int_hexagon_M2_vmac2es>; +def : T_PPP_pat <M2_vmac2es_s1, int_hexagon_M2_vmac2es_s1>; +def : T_PPP_pat <M2_vmac2es_s0, int_hexagon_M2_vmac2es_s0>; + +// Vector dual multiply with accumulation +// Rxx+=vdmpy(Rss,Rtt)[:sat] +def : T_PPP_pat <M2_vdmacs_s1, int_hexagon_M2_vdmacs_s1>; +def : T_PPP_pat <M2_vdmacs_s0, int_hexagon_M2_vdmacs_s0>; + +// Vector complex multiply real or imaginary with accumulation +// Rxx+=vcmpy[ir](Rss,Rtt):sat +def : T_PPP_pat <M2_vcmac_s0_sat_r, int_hexagon_M2_vcmac_s0_sat_r>; +def : T_PPP_pat <M2_vcmac_s0_sat_i, int_hexagon_M2_vcmac_s0_sat_i>; +//===----------------------------------------------------------------------===// +// Add/Subtract halfword +// Rd=add(Rt.L,Rs.[HL])[:sat] +// Rd=sub(Rt.L,Rs.[HL])[:sat] +// Rd=add(Rt.[LH],Rs.[HL])[:sat][:<16] +// Rd=sub(Rt.[LH],Rs.[HL])[:sat][:<16] +//===----------------------------------------------------------------------===// + +//Rd=add(Rt.L,Rs.[LH]) +def : T_RR_pat <A2_addh_l16_ll, int_hexagon_A2_addh_l16_ll>; +def : T_RR_pat <A2_addh_l16_hl, int_hexagon_A2_addh_l16_hl>; + +//Rd=add(Rt.L,Rs.[LH]):sat +def : T_RR_pat <A2_addh_l16_sat_ll, int_hexagon_A2_addh_l16_sat_ll>; +def : T_RR_pat <A2_addh_l16_sat_hl, int_hexagon_A2_addh_l16_sat_hl>; + +//Rd=sub(Rt.L,Rs.[LH]) +def : T_RR_pat <A2_subh_l16_ll, int_hexagon_A2_subh_l16_ll>; +def : T_RR_pat <A2_subh_l16_hl, int_hexagon_A2_subh_l16_hl>; + +//Rd=sub(Rt.L,Rs.[LH]):sat +def : T_RR_pat <A2_subh_l16_sat_ll, int_hexagon_A2_subh_l16_sat_ll>; +def : T_RR_pat <A2_subh_l16_sat_hl, int_hexagon_A2_subh_l16_sat_hl>; + +//Rd=add(Rt.[LH],Rs.[LH]):<<16 +def : T_RR_pat <A2_addh_h16_ll, int_hexagon_A2_addh_h16_ll>; +def : T_RR_pat <A2_addh_h16_lh, int_hexagon_A2_addh_h16_lh>; +def : T_RR_pat <A2_addh_h16_hl, int_hexagon_A2_addh_h16_hl>; +def : T_RR_pat <A2_addh_h16_hh, int_hexagon_A2_addh_h16_hh>; + +//Rd=sub(Rt.[LH],Rs.[LH]):<<16 +def : T_RR_pat <A2_subh_h16_ll, int_hexagon_A2_subh_h16_ll>; +def : T_RR_pat <A2_subh_h16_lh, int_hexagon_A2_subh_h16_lh>; +def : T_RR_pat <A2_subh_h16_hl, int_hexagon_A2_subh_h16_hl>; +def : T_RR_pat <A2_subh_h16_hh, int_hexagon_A2_subh_h16_hh>; + +//Rd=add(Rt.[LH],Rs.[LH]):sat:<<16 +def : T_RR_pat <A2_addh_h16_sat_ll, int_hexagon_A2_addh_h16_sat_ll>; +def : T_RR_pat <A2_addh_h16_sat_lh, int_hexagon_A2_addh_h16_sat_lh>; +def : T_RR_pat <A2_addh_h16_sat_hl, int_hexagon_A2_addh_h16_sat_hl>; +def : T_RR_pat <A2_addh_h16_sat_hh, int_hexagon_A2_addh_h16_sat_hh>; + +//Rd=sub(Rt.[LH],Rs.[LH]):sat:<<16 +def : T_RR_pat <A2_subh_h16_sat_ll, int_hexagon_A2_subh_h16_sat_ll>; +def : T_RR_pat <A2_subh_h16_sat_lh, int_hexagon_A2_subh_h16_sat_lh>; +def : T_RR_pat <A2_subh_h16_sat_hl, int_hexagon_A2_subh_h16_sat_hl>; +def : T_RR_pat <A2_subh_h16_sat_hh, int_hexagon_A2_subh_h16_sat_hh>; + +// ALU64 / ALU / min max +def : T_RR_pat<A2_max, int_hexagon_A2_max>; +def : T_RR_pat<A2_min, int_hexagon_A2_min>; +def : T_RR_pat<A2_maxu, int_hexagon_A2_maxu>; +def : T_RR_pat<A2_minu, int_hexagon_A2_minu>; + +// Shift and accumulate +def : T_RRI_pat <S2_asr_i_r_nac, int_hexagon_S2_asr_i_r_nac>; +def : T_RRI_pat <S2_lsr_i_r_nac, int_hexagon_S2_lsr_i_r_nac>; +def : T_RRI_pat <S2_asl_i_r_nac, int_hexagon_S2_asl_i_r_nac>; +def : T_RRI_pat <S2_asr_i_r_acc, int_hexagon_S2_asr_i_r_acc>; +def : T_RRI_pat <S2_lsr_i_r_acc, int_hexagon_S2_lsr_i_r_acc>; +def : T_RRI_pat <S2_asl_i_r_acc, int_hexagon_S2_asl_i_r_acc>; + +def : T_RRI_pat <S2_asr_i_r_and, int_hexagon_S2_asr_i_r_and>; +def : T_RRI_pat <S2_lsr_i_r_and, int_hexagon_S2_lsr_i_r_and>; +def : T_RRI_pat <S2_asl_i_r_and, int_hexagon_S2_asl_i_r_and>; +def : T_RRI_pat <S2_asr_i_r_or, int_hexagon_S2_asr_i_r_or>; +def : T_RRI_pat <S2_lsr_i_r_or, int_hexagon_S2_lsr_i_r_or>; +def : T_RRI_pat <S2_asl_i_r_or, int_hexagon_S2_asl_i_r_or>; +def : T_RRI_pat <S2_lsr_i_r_xacc, int_hexagon_S2_lsr_i_r_xacc>; +def : T_RRI_pat <S2_asl_i_r_xacc, int_hexagon_S2_asl_i_r_xacc>; + +def : T_PPI_pat <S2_asr_i_p_nac, int_hexagon_S2_asr_i_p_nac>; +def : T_PPI_pat <S2_lsr_i_p_nac, int_hexagon_S2_lsr_i_p_nac>; +def : T_PPI_pat <S2_asl_i_p_nac, int_hexagon_S2_asl_i_p_nac>; +def : T_PPI_pat <S2_asr_i_p_acc, int_hexagon_S2_asr_i_p_acc>; +def : T_PPI_pat <S2_lsr_i_p_acc, int_hexagon_S2_lsr_i_p_acc>; +def : T_PPI_pat <S2_asl_i_p_acc, int_hexagon_S2_asl_i_p_acc>; + +def : T_PPI_pat <S2_asr_i_p_and, int_hexagon_S2_asr_i_p_and>; +def : T_PPI_pat <S2_lsr_i_p_and, int_hexagon_S2_lsr_i_p_and>; +def : T_PPI_pat <S2_asl_i_p_and, int_hexagon_S2_asl_i_p_and>; +def : T_PPI_pat <S2_asr_i_p_or, int_hexagon_S2_asr_i_p_or>; +def : T_PPI_pat <S2_lsr_i_p_or, int_hexagon_S2_lsr_i_p_or>; +def : T_PPI_pat <S2_asl_i_p_or, int_hexagon_S2_asl_i_p_or>; +def : T_PPI_pat <S2_lsr_i_p_xacc, int_hexagon_S2_lsr_i_p_xacc>; +def : T_PPI_pat <S2_asl_i_p_xacc, int_hexagon_S2_asl_i_p_xacc>; + +def : T_RRR_pat <S2_asr_r_r_nac, int_hexagon_S2_asr_r_r_nac>; +def : T_RRR_pat <S2_lsr_r_r_nac, int_hexagon_S2_lsr_r_r_nac>; +def : T_RRR_pat <S2_asl_r_r_nac, int_hexagon_S2_asl_r_r_nac>; +def : T_RRR_pat <S2_lsl_r_r_nac, int_hexagon_S2_lsl_r_r_nac>; +def : T_RRR_pat <S2_asr_r_r_acc, int_hexagon_S2_asr_r_r_acc>; +def : T_RRR_pat <S2_lsr_r_r_acc, int_hexagon_S2_lsr_r_r_acc>; +def : T_RRR_pat <S2_asl_r_r_acc, int_hexagon_S2_asl_r_r_acc>; +def : T_RRR_pat <S2_lsl_r_r_acc, int_hexagon_S2_lsl_r_r_acc>; + +def : T_RRR_pat <S2_asr_r_r_and, int_hexagon_S2_asr_r_r_and>; +def : T_RRR_pat <S2_lsr_r_r_and, int_hexagon_S2_lsr_r_r_and>; +def : T_RRR_pat <S2_asl_r_r_and, int_hexagon_S2_asl_r_r_and>; +def : T_RRR_pat <S2_lsl_r_r_and, int_hexagon_S2_lsl_r_r_and>; +def : T_RRR_pat <S2_asr_r_r_or, int_hexagon_S2_asr_r_r_or>; +def : T_RRR_pat <S2_lsr_r_r_or, int_hexagon_S2_lsr_r_r_or>; +def : T_RRR_pat <S2_asl_r_r_or, int_hexagon_S2_asl_r_r_or>; +def : T_RRR_pat <S2_lsl_r_r_or, int_hexagon_S2_lsl_r_r_or>; + +def : T_PPR_pat <S2_asr_r_p_nac, int_hexagon_S2_asr_r_p_nac>; +def : T_PPR_pat <S2_lsr_r_p_nac, int_hexagon_S2_lsr_r_p_nac>; +def : T_PPR_pat <S2_asl_r_p_nac, int_hexagon_S2_asl_r_p_nac>; +def : T_PPR_pat <S2_lsl_r_p_nac, int_hexagon_S2_lsl_r_p_nac>; +def : T_PPR_pat <S2_asr_r_p_acc, int_hexagon_S2_asr_r_p_acc>; +def : T_PPR_pat <S2_lsr_r_p_acc, int_hexagon_S2_lsr_r_p_acc>; +def : T_PPR_pat <S2_asl_r_p_acc, int_hexagon_S2_asl_r_p_acc>; +def : T_PPR_pat <S2_lsl_r_p_acc, int_hexagon_S2_lsl_r_p_acc>; + +def : T_PPR_pat <S2_asr_r_p_and, int_hexagon_S2_asr_r_p_and>; +def : T_PPR_pat <S2_lsr_r_p_and, int_hexagon_S2_lsr_r_p_and>; +def : T_PPR_pat <S2_asl_r_p_and, int_hexagon_S2_asl_r_p_and>; +def : T_PPR_pat <S2_lsl_r_p_and, int_hexagon_S2_lsl_r_p_and>; +def : T_PPR_pat <S2_asr_r_p_or, int_hexagon_S2_asr_r_p_or>; +def : T_PPR_pat <S2_lsr_r_p_or, int_hexagon_S2_lsr_r_p_or>; +def : T_PPR_pat <S2_asl_r_p_or, int_hexagon_S2_asl_r_p_or>; +def : T_PPR_pat <S2_lsl_r_p_or, int_hexagon_S2_lsl_r_p_or>; + +def : T_RRI_pat <S2_asr_i_r_nac, int_hexagon_S2_asr_i_r_nac>; +def : T_RRI_pat <S2_lsr_i_r_nac, int_hexagon_S2_lsr_i_r_nac>; +def : T_RRI_pat <S2_asl_i_r_nac, int_hexagon_S2_asl_i_r_nac>; +def : T_RRI_pat <S2_asr_i_r_acc, int_hexagon_S2_asr_i_r_acc>; +def : T_RRI_pat <S2_lsr_i_r_acc, int_hexagon_S2_lsr_i_r_acc>; +def : T_RRI_pat <S2_asl_i_r_acc, int_hexagon_S2_asl_i_r_acc>; + +def : T_RRI_pat <S2_asr_i_r_and, int_hexagon_S2_asr_i_r_and>; +def : T_RRI_pat <S2_lsr_i_r_and, int_hexagon_S2_lsr_i_r_and>; +def : T_RRI_pat <S2_asl_i_r_and, int_hexagon_S2_asl_i_r_and>; +def : T_RRI_pat <S2_asr_i_r_or, int_hexagon_S2_asr_i_r_or>; +def : T_RRI_pat <S2_lsr_i_r_or, int_hexagon_S2_lsr_i_r_or>; +def : T_RRI_pat <S2_asl_i_r_or, int_hexagon_S2_asl_i_r_or>; +def : T_RRI_pat <S2_lsr_i_r_xacc, int_hexagon_S2_lsr_i_r_xacc>; +def : T_RRI_pat <S2_asl_i_r_xacc, int_hexagon_S2_asl_i_r_xacc>; + +def : T_PPI_pat <S2_asr_i_p_nac, int_hexagon_S2_asr_i_p_nac>; +def : T_PPI_pat <S2_lsr_i_p_nac, int_hexagon_S2_lsr_i_p_nac>; +def : T_PPI_pat <S2_asl_i_p_nac, int_hexagon_S2_asl_i_p_nac>; +def : T_PPI_pat <S2_asr_i_p_acc, int_hexagon_S2_asr_i_p_acc>; +def : T_PPI_pat <S2_lsr_i_p_acc, int_hexagon_S2_lsr_i_p_acc>; +def : T_PPI_pat <S2_asl_i_p_acc, int_hexagon_S2_asl_i_p_acc>; + +def : T_PPI_pat <S2_asr_i_p_and, int_hexagon_S2_asr_i_p_and>; +def : T_PPI_pat <S2_lsr_i_p_and, int_hexagon_S2_lsr_i_p_and>; +def : T_PPI_pat <S2_asl_i_p_and, int_hexagon_S2_asl_i_p_and>; +def : T_PPI_pat <S2_asr_i_p_or, int_hexagon_S2_asr_i_p_or>; +def : T_PPI_pat <S2_lsr_i_p_or, int_hexagon_S2_lsr_i_p_or>; +def : T_PPI_pat <S2_asl_i_p_or, int_hexagon_S2_asl_i_p_or>; +def : T_PPI_pat <S2_lsr_i_p_xacc, int_hexagon_S2_lsr_i_p_xacc>; +def : T_PPI_pat <S2_asl_i_p_xacc, int_hexagon_S2_asl_i_p_xacc>; + +def : T_RRR_pat <S2_asr_r_r_nac, int_hexagon_S2_asr_r_r_nac>; +def : T_RRR_pat <S2_lsr_r_r_nac, int_hexagon_S2_lsr_r_r_nac>; +def : T_RRR_pat <S2_asl_r_r_nac, int_hexagon_S2_asl_r_r_nac>; +def : T_RRR_pat <S2_lsl_r_r_nac, int_hexagon_S2_lsl_r_r_nac>; +def : T_RRR_pat <S2_asr_r_r_acc, int_hexagon_S2_asr_r_r_acc>; +def : T_RRR_pat <S2_lsr_r_r_acc, int_hexagon_S2_lsr_r_r_acc>; +def : T_RRR_pat <S2_asl_r_r_acc, int_hexagon_S2_asl_r_r_acc>; +def : T_RRR_pat <S2_lsl_r_r_acc, int_hexagon_S2_lsl_r_r_acc>; + +def : T_RRR_pat <S2_asr_r_r_and, int_hexagon_S2_asr_r_r_and>; +def : T_RRR_pat <S2_lsr_r_r_and, int_hexagon_S2_lsr_r_r_and>; +def : T_RRR_pat <S2_asl_r_r_and, int_hexagon_S2_asl_r_r_and>; +def : T_RRR_pat <S2_lsl_r_r_and, int_hexagon_S2_lsl_r_r_and>; +def : T_RRR_pat <S2_asr_r_r_or, int_hexagon_S2_asr_r_r_or>; +def : T_RRR_pat <S2_lsr_r_r_or, int_hexagon_S2_lsr_r_r_or>; +def : T_RRR_pat <S2_asl_r_r_or, int_hexagon_S2_asl_r_r_or>; +def : T_RRR_pat <S2_lsl_r_r_or, int_hexagon_S2_lsl_r_r_or>; + +def : T_PPR_pat <S2_asr_r_p_nac, int_hexagon_S2_asr_r_p_nac>; +def : T_PPR_pat <S2_lsr_r_p_nac, int_hexagon_S2_lsr_r_p_nac>; +def : T_PPR_pat <S2_asl_r_p_nac, int_hexagon_S2_asl_r_p_nac>; +def : T_PPR_pat <S2_lsl_r_p_nac, int_hexagon_S2_lsl_r_p_nac>; +def : T_PPR_pat <S2_asr_r_p_acc, int_hexagon_S2_asr_r_p_acc>; +def : T_PPR_pat <S2_lsr_r_p_acc, int_hexagon_S2_lsr_r_p_acc>; +def : T_PPR_pat <S2_asl_r_p_acc, int_hexagon_S2_asl_r_p_acc>; +def : T_PPR_pat <S2_lsl_r_p_acc, int_hexagon_S2_lsl_r_p_acc>; + +def : T_PPR_pat <S2_asr_r_p_and, int_hexagon_S2_asr_r_p_and>; +def : T_PPR_pat <S2_lsr_r_p_and, int_hexagon_S2_lsr_r_p_and>; +def : T_PPR_pat <S2_asl_r_p_and, int_hexagon_S2_asl_r_p_and>; +def : T_PPR_pat <S2_lsl_r_p_and, int_hexagon_S2_lsl_r_p_and>; +def : T_PPR_pat <S2_asr_r_p_or, int_hexagon_S2_asr_r_p_or>; +def : T_PPR_pat <S2_lsr_r_p_or, int_hexagon_S2_lsr_r_p_or>; +def : T_PPR_pat <S2_asl_r_p_or, int_hexagon_S2_asl_r_p_or>; +def : T_PPR_pat <S2_lsl_r_p_or, int_hexagon_S2_lsl_r_p_or>; /******************************************************************** -* CR * +* ALU32/ALU * *********************************************************************/ +def : T_RR_pat<A2_add, int_hexagon_A2_add>; +def : T_RI_pat<A2_addi, int_hexagon_A2_addi>; +def : T_RR_pat<A2_sub, int_hexagon_A2_sub>; +def : T_IR_pat<A2_subri, int_hexagon_A2_subri>; +def : T_RR_pat<A2_and, int_hexagon_A2_and>; +def : T_RI_pat<A2_andir, int_hexagon_A2_andir>; +def : T_RR_pat<A2_or, int_hexagon_A2_or>; +def : T_RI_pat<A2_orir, int_hexagon_A2_orir>; +def : T_RR_pat<A2_xor, int_hexagon_A2_xor>; +def : T_RR_pat<A2_combinew, int_hexagon_A2_combinew>; + +// Assembler mapped from Rd32=not(Rs32) to Rd32=sub(#-1,Rs32) +def : Pat <(int_hexagon_A2_not (I32:$Rs)), + (A2_subri -1, IntRegs:$Rs)>; + +// Assembler mapped from Rd32=neg(Rs32) to Rd32=sub(#0,Rs32) +def : Pat <(int_hexagon_A2_neg IntRegs:$Rs), + (A2_subri 0, IntRegs:$Rs)>; + +// Transfer immediate +def : Pat <(int_hexagon_A2_tfril (I32:$Rs), u16_0ImmPred:$Is), + (A2_tfril IntRegs:$Rs, u16_0ImmPred:$Is)>; +def : Pat <(int_hexagon_A2_tfrih (I32:$Rs), u16_0ImmPred:$Is), + (A2_tfrih IntRegs:$Rs, u16_0ImmPred:$Is)>; + +// Transfer Register/immediate. +def : T_R_pat <A2_tfr, int_hexagon_A2_tfr>; +def : T_I_pat <A2_tfrsi, int_hexagon_A2_tfrsi>; + +// Assembler mapped from Rdd32=Rss32 to Rdd32=combine(Rss.H32,Rss.L32) +def : Pat<(int_hexagon_A2_tfrp DoubleRegs:$src), + (A2_combinew (HiReg DoubleRegs:$src), (LoReg DoubleRegs:$src))>; -// CR / Logical reductions on predicates. -def HEXAGON_C2_all8: - qi_SInst_qi <"all8", int_hexagon_C2_all8>; -def HEXAGON_C2_any8: - qi_SInst_qi <"any8", int_hexagon_C2_any8>; - -// CR / Logical operations on predicates. -def HEXAGON_C2_pxfer_map: - qi_SInst_qi_pxfer <"", int_hexagon_C2_pxfer_map>; -def HEXAGON_C2_and: - qi_SInst_qiqi <"and", int_hexagon_C2_and>; -def HEXAGON_C2_andn: - qi_SInst_qiqi_neg <"and", int_hexagon_C2_andn>; -def HEXAGON_C2_not: - qi_SInst_qi <"not", int_hexagon_C2_not>; -def HEXAGON_C2_or: - qi_SInst_qiqi <"or", int_hexagon_C2_or>; -def HEXAGON_C2_orn: - qi_SInst_qiqi_neg <"or", int_hexagon_C2_orn>; -def HEXAGON_C2_xor: - qi_SInst_qiqi <"xor", int_hexagon_C2_xor>; - +/******************************************************************** +* ALU32/PERM * +*********************************************************************/ +// Combine +def: T_RR_pat<A2_combine_hh, int_hexagon_A2_combine_hh>; +def: T_RR_pat<A2_combine_hl, int_hexagon_A2_combine_hl>; +def: T_RR_pat<A2_combine_lh, int_hexagon_A2_combine_lh>; +def: T_RR_pat<A2_combine_ll, int_hexagon_A2_combine_ll>; + +def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s8ExtPred, s8ImmPred>; + +def: Pat<(i32 (int_hexagon_C2_mux (I32:$Rp), (I32:$Rs), + (I32:$Rt))), + (i32 (C2_mux (C2_tfrrp IntRegs:$Rp), IntRegs:$Rs, IntRegs:$Rt))>; + +// Mux +def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s8ExtPred>; +def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s8ExtPred>; +def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s8ExtPred, s8ImmPred>; + +// Shift halfword +def : T_R_pat<A2_aslh, int_hexagon_A2_aslh>; +def : T_R_pat<A2_asrh, int_hexagon_A2_asrh>; +def : T_R_pat<A2_asrh, int_hexagon_SI_to_SXTHI_asrh>; + +// Sign/zero extend +def : T_R_pat<A2_sxth, int_hexagon_A2_sxth>; +def : T_R_pat<A2_sxtb, int_hexagon_A2_sxtb>; +def : T_R_pat<A2_zxth, int_hexagon_A2_zxth>; +def : T_R_pat<A2_zxtb, int_hexagon_A2_zxtb>; /******************************************************************** -* MTYPE/ALU * +* ALU32/PRED * *********************************************************************/ +// Compare +def : T_RR_pat<C2_cmpeq, int_hexagon_C2_cmpeq>; +def : T_RR_pat<C2_cmpgt, int_hexagon_C2_cmpgt>; +def : T_RR_pat<C2_cmpgtu, int_hexagon_C2_cmpgtu>; + +def : T_RI_pat<C2_cmpeqi, int_hexagon_C2_cmpeqi, s10ExtPred>; +def : T_RI_pat<C2_cmpgti, int_hexagon_C2_cmpgti, s10ExtPred>; +def : T_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u9ExtPred>; -// MTYPE / ALU / Add and accumulate. -def HEXAGON_M2_acci: - si_MInst_sisisi_acc <"add", int_hexagon_M2_acci>; -def HEXAGON_M2_accii: - si_MInst_sisis8_acc <"add", int_hexagon_M2_accii>; -def HEXAGON_M2_nacci: - si_MInst_sisisi_nac <"add", int_hexagon_M2_nacci>; -def HEXAGON_M2_naccii: - si_MInst_sisis8_nac <"add", int_hexagon_M2_naccii>; +def : Pat <(i32 (int_hexagon_C2_cmpgei (I32:$src1), s8ExtPred:$src2)), + (i32 (C2_cmpgti (I32:$src1), + (DEC_CONST_SIGNED s8ExtPred:$src2)))>; -// MTYPE / ALU / Subtract and accumulate. -def HEXAGON_M2_subacc: - si_MInst_sisisi_acc <"sub", int_hexagon_M2_subacc>; +def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), u8ExtPred:$src2)), + (i32 (C2_cmpgtui (I32:$src1), + (DEC_CONST_UNSIGNED u8ExtPred:$src2)))>; -// MTYPE / ALU / Vector absolute difference. -def HEXAGON_M2_vabsdiffh: - di_MInst_didi <"vabsdiffh",int_hexagon_M2_vabsdiffh>; -def HEXAGON_M2_vabsdiffw: - di_MInst_didi <"vabsdiffw",int_hexagon_M2_vabsdiffw>; +// The instruction, Pd=cmp.geu(Rs, #u8) -> Pd=cmp.eq(Rs,Rs) when #u8 == 0. +def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), 0)), + (i32 (C2_cmpeq (I32:$src1), (I32:$src1)))>; -// MTYPE / ALU / XOR and xor with destination. -def HEXAGON_M2_xor_xacc: - si_MInst_sisisi_xacc <"xor", int_hexagon_M2_xor_xacc>; +def : Pat <(i32 (int_hexagon_C2_cmplt (I32:$src1), + (I32:$src2))), + (i32 (C2_cmpgt (I32:$src2), (I32:$src1)))>; +def : Pat <(i32 (int_hexagon_C2_cmpltu (I32:$src1), + (I32:$src2))), + (i32 (C2_cmpgtu (I32:$src2), (I32:$src1)))>; /******************************************************************** -* MTYPE/COMPLEX * +* ALU32/VH * *********************************************************************/ +// Vector add, subtract, average halfwords +def: T_RR_pat<A2_svaddh, int_hexagon_A2_svaddh>; +def: T_RR_pat<A2_svaddhs, int_hexagon_A2_svaddhs>; +def: T_RR_pat<A2_svadduhs, int_hexagon_A2_svadduhs>; -// MTYPE / COMPLEX / Complex multiply. -// Rdd[-+]=cmpy(Rs, Rt:<<1]:sat -def HEXAGON_M2_cmpys_s1: - di_MInst_sisi_s1_sat <"cmpy", int_hexagon_M2_cmpys_s1>; -def HEXAGON_M2_cmpys_s0: - di_MInst_sisi_sat <"cmpy", int_hexagon_M2_cmpys_s0>; -def HEXAGON_M2_cmpysc_s1: - di_MInst_sisi_s1_sat_conj <"cmpy", int_hexagon_M2_cmpysc_s1>; -def HEXAGON_M2_cmpysc_s0: - di_MInst_sisi_sat_conj <"cmpy", int_hexagon_M2_cmpysc_s0>; - -def HEXAGON_M2_cmacs_s1: - di_MInst_disisi_acc_s1_sat <"cmpy", int_hexagon_M2_cmacs_s1>; -def HEXAGON_M2_cmacs_s0: - di_MInst_disisi_acc_sat <"cmpy", int_hexagon_M2_cmacs_s0>; -def HEXAGON_M2_cmacsc_s1: - di_MInst_disisi_acc_s1_sat_conj <"cmpy", int_hexagon_M2_cmacsc_s1>; -def HEXAGON_M2_cmacsc_s0: - di_MInst_disisi_acc_sat_conj <"cmpy", int_hexagon_M2_cmacsc_s0>; - -def HEXAGON_M2_cnacs_s1: - di_MInst_disisi_nac_s1_sat <"cmpy", int_hexagon_M2_cnacs_s1>; -def HEXAGON_M2_cnacs_s0: - di_MInst_disisi_nac_sat <"cmpy", int_hexagon_M2_cnacs_s0>; -def HEXAGON_M2_cnacsc_s1: - di_MInst_disisi_nac_s1_sat_conj <"cmpy", int_hexagon_M2_cnacsc_s1>; -def HEXAGON_M2_cnacsc_s0: - di_MInst_disisi_nac_sat_conj <"cmpy", int_hexagon_M2_cnacsc_s0>; - -// MTYPE / COMPLEX / Complex multiply real or imaginary. -def HEXAGON_M2_cmpyr_s0: - di_MInst_sisi <"cmpyr", int_hexagon_M2_cmpyr_s0>; -def HEXAGON_M2_cmacr_s0: - di_MInst_disisi_acc <"cmpyr", int_hexagon_M2_cmacr_s0>; - -def HEXAGON_M2_cmpyi_s0: - di_MInst_sisi <"cmpyi", int_hexagon_M2_cmpyi_s0>; -def HEXAGON_M2_cmaci_s0: - di_MInst_disisi_acc <"cmpyi", int_hexagon_M2_cmaci_s0>; - -// MTYPE / COMPLEX / Complex multiply with round and pack. -// Rxx32+=cmpy(Rs32,[*]Rt32:<<1]:rnd:sat -def HEXAGON_M2_cmpyrs_s0: - si_MInst_sisi_rnd_sat <"cmpy", int_hexagon_M2_cmpyrs_s0>; -def HEXAGON_M2_cmpyrs_s1: - si_MInst_sisi_s1_rnd_sat <"cmpy", int_hexagon_M2_cmpyrs_s1>; - -def HEXAGON_M2_cmpyrsc_s0: - si_MInst_sisi_rnd_sat_conj <"cmpy", int_hexagon_M2_cmpyrsc_s0>; -def HEXAGON_M2_cmpyrsc_s1: - si_MInst_sisi_s1_rnd_sat_conj <"cmpy", int_hexagon_M2_cmpyrsc_s1>; - -//MTYPE / COMPLEX / Vector complex multiply real or imaginary. -def HEXAGON_M2_vcmpy_s0_sat_i: - di_MInst_didi_sat <"vcmpyi", int_hexagon_M2_vcmpy_s0_sat_i>; -def HEXAGON_M2_vcmpy_s1_sat_i: - di_MInst_didi_s1_sat <"vcmpyi", int_hexagon_M2_vcmpy_s1_sat_i>; - -def HEXAGON_M2_vcmpy_s0_sat_r: - di_MInst_didi_sat <"vcmpyr", int_hexagon_M2_vcmpy_s0_sat_r>; -def HEXAGON_M2_vcmpy_s1_sat_r: - di_MInst_didi_s1_sat <"vcmpyr", int_hexagon_M2_vcmpy_s1_sat_r>; - -def HEXAGON_M2_vcmac_s0_sat_i: - di_MInst_dididi_acc_sat <"vcmpyi", int_hexagon_M2_vcmac_s0_sat_i>; -def HEXAGON_M2_vcmac_s0_sat_r: - di_MInst_dididi_acc_sat <"vcmpyr", int_hexagon_M2_vcmac_s0_sat_r>; - -//MTYPE / COMPLEX / Vector reduce complex multiply real or imaginary. -def HEXAGON_M2_vrcmpyi_s0: - di_MInst_didi <"vrcmpyi", int_hexagon_M2_vrcmpyi_s0>; -def HEXAGON_M2_vrcmpyr_s0: - di_MInst_didi <"vrcmpyr", int_hexagon_M2_vrcmpyr_s0>; - -def HEXAGON_M2_vrcmpyi_s0c: - di_MInst_didi_conj <"vrcmpyi", int_hexagon_M2_vrcmpyi_s0c>; -def HEXAGON_M2_vrcmpyr_s0c: - di_MInst_didi_conj <"vrcmpyr", int_hexagon_M2_vrcmpyr_s0c>; - -def HEXAGON_M2_vrcmaci_s0: - di_MInst_dididi_acc <"vrcmpyi", int_hexagon_M2_vrcmaci_s0>; -def HEXAGON_M2_vrcmacr_s0: - di_MInst_dididi_acc <"vrcmpyr", int_hexagon_M2_vrcmacr_s0>; - -def HEXAGON_M2_vrcmaci_s0c: - di_MInst_dididi_acc_conj <"vrcmpyi", int_hexagon_M2_vrcmaci_s0c>; -def HEXAGON_M2_vrcmacr_s0c: - di_MInst_dididi_acc_conj <"vrcmpyr", int_hexagon_M2_vrcmacr_s0c>; +def: T_RR_pat<A2_svsubh, int_hexagon_A2_svsubh>; +def: T_RR_pat<A2_svsubhs, int_hexagon_A2_svsubhs>; +def: T_RR_pat<A2_svsubuhs, int_hexagon_A2_svsubuhs>; +def: T_RR_pat<A2_svavgh, int_hexagon_A2_svavgh>; +def: T_RR_pat<A2_svavghs, int_hexagon_A2_svavghs>; +def: T_RR_pat<A2_svnavgh, int_hexagon_A2_svnavgh>; /******************************************************************** -* MTYPE/MPYH * +* ALU64/ALU * *********************************************************************/ +def: T_RR_pat<A2_addsat, int_hexagon_A2_addsat>; +def: T_RR_pat<A2_subsat, int_hexagon_A2_subsat>; +def: T_PP_pat<A2_addp, int_hexagon_A2_addp>; +def: T_PP_pat<A2_subp, int_hexagon_A2_subp>; + +def: T_PP_pat<A2_andp, int_hexagon_A2_andp>; +def: T_PP_pat<A2_orp, int_hexagon_A2_orp>; +def: T_PP_pat<A2_xorp, int_hexagon_A2_xorp>; -// MTYPE / MPYH / Multiply and use lower result. -//def HEXAGON_M2_mpysmi: -//FIXME: Hexagon_M2_mpysmi should really by of the type si_MInst_sim9, -// not si_MInst_sis9 - but for now, we will use s9. -// def Hexagon_M2_mpysmi: -// si_MInst_sim9 <"mpyi", int_hexagon_M2_mpysmi>; -def Hexagon_M2_mpysmi: - si_MInst_sis9 <"mpyi", int_hexagon_M2_mpysmi>; -def HEXAGON_M2_mpyi: - si_MInst_sisi <"mpyi", int_hexagon_M2_mpyi>; -def HEXAGON_M2_mpyui: - si_MInst_sisi <"mpyui", int_hexagon_M2_mpyui>; -def HEXAGON_M2_macsip: - si_MInst_sisiu8_acc <"mpyi", int_hexagon_M2_macsip>; -def HEXAGON_M2_maci: - si_MInst_sisisi_acc <"mpyi", int_hexagon_M2_maci>; -def HEXAGON_M2_macsin: - si_MInst_sisiu8_nac <"mpyi", int_hexagon_M2_macsin>; - -// MTYPE / MPYH / Multiply word by half (32x16). -//Rdd[+]=vmpywoh(Rss,Rtt)[:<<1][:rnd][:sat] -//Rdd[+]=vmpyweh(Rss,Rtt)[:<<1][:rnd][:sat] -def HEXAGON_M2_mmpyl_rs1: - di_MInst_didi_s1_rnd_sat <"vmpyweh", int_hexagon_M2_mmpyl_rs1>; -def HEXAGON_M2_mmpyl_s1: - di_MInst_didi_s1_sat <"vmpyweh", int_hexagon_M2_mmpyl_s1>; -def HEXAGON_M2_mmpyl_rs0: - di_MInst_didi_rnd_sat <"vmpyweh", int_hexagon_M2_mmpyl_rs0>; -def HEXAGON_M2_mmpyl_s0: - di_MInst_didi_sat <"vmpyweh", int_hexagon_M2_mmpyl_s0>; -def HEXAGON_M2_mmpyh_rs1: - di_MInst_didi_s1_rnd_sat <"vmpywoh", int_hexagon_M2_mmpyh_rs1>; -def HEXAGON_M2_mmpyh_s1: - di_MInst_didi_s1_sat <"vmpywoh", int_hexagon_M2_mmpyh_s1>; -def HEXAGON_M2_mmpyh_rs0: - di_MInst_didi_rnd_sat <"vmpywoh", int_hexagon_M2_mmpyh_rs0>; -def HEXAGON_M2_mmpyh_s0: - di_MInst_didi_sat <"vmpywoh", int_hexagon_M2_mmpyh_s0>; -def HEXAGON_M2_mmacls_rs1: - di_MInst_dididi_acc_s1_rnd_sat <"vmpyweh", int_hexagon_M2_mmacls_rs1>; -def HEXAGON_M2_mmacls_s1: - di_MInst_dididi_acc_s1_sat <"vmpyweh", int_hexagon_M2_mmacls_s1>; -def HEXAGON_M2_mmacls_rs0: - di_MInst_dididi_acc_rnd_sat <"vmpyweh", int_hexagon_M2_mmacls_rs0>; -def HEXAGON_M2_mmacls_s0: - di_MInst_dididi_acc_sat <"vmpyweh", int_hexagon_M2_mmacls_s0>; -def HEXAGON_M2_mmachs_rs1: - di_MInst_dididi_acc_s1_rnd_sat <"vmpywoh", int_hexagon_M2_mmachs_rs1>; -def HEXAGON_M2_mmachs_s1: - di_MInst_dididi_acc_s1_sat <"vmpywoh", int_hexagon_M2_mmachs_s1>; -def HEXAGON_M2_mmachs_rs0: - di_MInst_dididi_acc_rnd_sat <"vmpywoh", int_hexagon_M2_mmachs_rs0>; -def HEXAGON_M2_mmachs_s0: - di_MInst_dididi_acc_sat <"vmpywoh", int_hexagon_M2_mmachs_s0>; - -// MTYPE / MPYH / Multiply word by unsigned half (32x16). -//Rdd[+]=vmpywouh(Rss,Rtt)[:<<1][:rnd][:sat] -//Rdd[+]=vmpyweuh(Rss,Rtt)[:<<1][:rnd][:sat] -def HEXAGON_M2_mmpyul_rs1: - di_MInst_didi_s1_rnd_sat <"vmpyweuh", int_hexagon_M2_mmpyul_rs1>; -def HEXAGON_M2_mmpyul_s1: - di_MInst_didi_s1_sat <"vmpyweuh", int_hexagon_M2_mmpyul_s1>; -def HEXAGON_M2_mmpyul_rs0: - di_MInst_didi_rnd_sat <"vmpyweuh", int_hexagon_M2_mmpyul_rs0>; -def HEXAGON_M2_mmpyul_s0: - di_MInst_didi_sat <"vmpyweuh", int_hexagon_M2_mmpyul_s0>; -def HEXAGON_M2_mmpyuh_rs1: - di_MInst_didi_s1_rnd_sat <"vmpywouh", int_hexagon_M2_mmpyuh_rs1>; -def HEXAGON_M2_mmpyuh_s1: - di_MInst_didi_s1_sat <"vmpywouh", int_hexagon_M2_mmpyuh_s1>; -def HEXAGON_M2_mmpyuh_rs0: - di_MInst_didi_rnd_sat <"vmpywouh", int_hexagon_M2_mmpyuh_rs0>; -def HEXAGON_M2_mmpyuh_s0: - di_MInst_didi_sat <"vmpywouh", int_hexagon_M2_mmpyuh_s0>; -def HEXAGON_M2_mmaculs_rs1: - di_MInst_dididi_acc_s1_rnd_sat <"vmpyweuh", int_hexagon_M2_mmaculs_rs1>; -def HEXAGON_M2_mmaculs_s1: - di_MInst_dididi_acc_s1_sat <"vmpyweuh", int_hexagon_M2_mmaculs_s1>; -def HEXAGON_M2_mmaculs_rs0: - di_MInst_dididi_acc_rnd_sat <"vmpyweuh", int_hexagon_M2_mmaculs_rs0>; -def HEXAGON_M2_mmaculs_s0: - di_MInst_dididi_acc_sat <"vmpyweuh", int_hexagon_M2_mmaculs_s0>; -def HEXAGON_M2_mmacuhs_rs1: - di_MInst_dididi_acc_s1_rnd_sat <"vmpywouh", int_hexagon_M2_mmacuhs_rs1>; -def HEXAGON_M2_mmacuhs_s1: - di_MInst_dididi_acc_s1_sat <"vmpywouh", int_hexagon_M2_mmacuhs_s1>; -def HEXAGON_M2_mmacuhs_rs0: - di_MInst_dididi_acc_rnd_sat <"vmpywouh", int_hexagon_M2_mmacuhs_rs0>; -def HEXAGON_M2_mmacuhs_s0: - di_MInst_dididi_acc_sat <"vmpywouh", int_hexagon_M2_mmacuhs_s0>; - -// MTYPE / MPYH / Multiply and use upper result. -def HEXAGON_M2_hmmpyh_rs1: - si_MInst_sisi_h_s1_rnd_sat <"mpy", int_hexagon_M2_hmmpyh_rs1>; -def HEXAGON_M2_hmmpyl_rs1: - si_MInst_sisi_l_s1_rnd_sat <"mpy", int_hexagon_M2_hmmpyl_rs1>; -def HEXAGON_M2_mpy_up: - si_MInst_sisi <"mpy", int_hexagon_M2_mpy_up>; -def HEXAGON_M2_dpmpyss_rnd_s0: - si_MInst_sisi_rnd <"mpy", int_hexagon_M2_dpmpyss_rnd_s0>; -def HEXAGON_M2_mpyu_up: - si_MInst_sisi <"mpyu", int_hexagon_M2_mpyu_up>; - -// MTYPE / MPYH / Multiply and use full result. -def HEXAGON_M2_dpmpyuu_s0: - di_MInst_sisi <"mpyu", int_hexagon_M2_dpmpyuu_s0>; -def HEXAGON_M2_dpmpyuu_acc_s0: - di_MInst_disisi_acc <"mpyu", int_hexagon_M2_dpmpyuu_acc_s0>; -def HEXAGON_M2_dpmpyuu_nac_s0: - di_MInst_disisi_nac <"mpyu", int_hexagon_M2_dpmpyuu_nac_s0>; -def HEXAGON_M2_dpmpyss_s0: - di_MInst_sisi <"mpy", int_hexagon_M2_dpmpyss_s0>; -def HEXAGON_M2_dpmpyss_acc_s0: - di_MInst_disisi_acc <"mpy", int_hexagon_M2_dpmpyss_acc_s0>; -def HEXAGON_M2_dpmpyss_nac_s0: - di_MInst_disisi_nac <"mpy", int_hexagon_M2_dpmpyss_nac_s0>; +def: T_PP_pat<C2_cmpeqp, int_hexagon_C2_cmpeqp>; +def: T_PP_pat<C2_cmpgtp, int_hexagon_C2_cmpgtp>; +def: T_PP_pat<C2_cmpgtup, int_hexagon_C2_cmpgtup>; +def: T_PP_pat<S2_parityp, int_hexagon_S2_parityp>; +def: T_RR_pat<S2_packhl, int_hexagon_S2_packhl>; /******************************************************************** -* MTYPE/MPYS * +* ALU64/VB * *********************************************************************/ +// ALU64 - Vector add +def : T_PP_pat <A2_vaddub, int_hexagon_A2_vaddub>; +def : T_PP_pat <A2_vaddubs, int_hexagon_A2_vaddubs>; +def : T_PP_pat <A2_vaddh, int_hexagon_A2_vaddh>; +def : T_PP_pat <A2_vaddhs, int_hexagon_A2_vaddhs>; +def : T_PP_pat <A2_vadduhs, int_hexagon_A2_vadduhs>; +def : T_PP_pat <A2_vaddw, int_hexagon_A2_vaddw>; +def : T_PP_pat <A2_vaddws, int_hexagon_A2_vaddws>; + +// ALU64 - Vector average +def : T_PP_pat <A2_vavgub, int_hexagon_A2_vavgub>; +def : T_PP_pat <A2_vavgubr, int_hexagon_A2_vavgubr>; +def : T_PP_pat <A2_vavgh, int_hexagon_A2_vavgh>; +def : T_PP_pat <A2_vavghr, int_hexagon_A2_vavghr>; +def : T_PP_pat <A2_vavghcr, int_hexagon_A2_vavghcr>; +def : T_PP_pat <A2_vavguh, int_hexagon_A2_vavguh>; +def : T_PP_pat <A2_vavguhr, int_hexagon_A2_vavguhr>; + +def : T_PP_pat <A2_vavgw, int_hexagon_A2_vavgw>; +def : T_PP_pat <A2_vavgwr, int_hexagon_A2_vavgwr>; +def : T_PP_pat <A2_vavgwcr, int_hexagon_A2_vavgwcr>; +def : T_PP_pat <A2_vavguw, int_hexagon_A2_vavguw>; +def : T_PP_pat <A2_vavguwr, int_hexagon_A2_vavguwr>; + +// ALU64 - Vector negative average +def : T_PP_pat <A2_vnavgh, int_hexagon_A2_vnavgh>; +def : T_PP_pat <A2_vnavghr, int_hexagon_A2_vnavghr>; +def : T_PP_pat <A2_vnavghcr, int_hexagon_A2_vnavghcr>; +def : T_PP_pat <A2_vnavgw, int_hexagon_A2_vnavgw>; +def : T_PP_pat <A2_vnavgwr, int_hexagon_A2_vnavgwr>; +def : T_PP_pat <A2_vnavgwcr, int_hexagon_A2_vnavgwcr>; + +// ALU64 - Vector max +def : T_PP_pat <A2_vmaxh, int_hexagon_A2_vmaxh>; +def : T_PP_pat <A2_vmaxw, int_hexagon_A2_vmaxw>; +def : T_PP_pat <A2_vmaxub, int_hexagon_A2_vmaxub>; +def : T_PP_pat <A2_vmaxuh, int_hexagon_A2_vmaxuh>; +def : T_PP_pat <A2_vmaxuw, int_hexagon_A2_vmaxuw>; + +// ALU64 - Vector min +def : T_PP_pat <A2_vminh, int_hexagon_A2_vminh>; +def : T_PP_pat <A2_vminw, int_hexagon_A2_vminw>; +def : T_PP_pat <A2_vminub, int_hexagon_A2_vminub>; +def : T_PP_pat <A2_vminuh, int_hexagon_A2_vminuh>; +def : T_PP_pat <A2_vminuw, int_hexagon_A2_vminuw>; + +// ALU64 - Vector sub +def : T_PP_pat <A2_vsubub, int_hexagon_A2_vsubub>; +def : T_PP_pat <A2_vsububs, int_hexagon_A2_vsububs>; +def : T_PP_pat <A2_vsubh, int_hexagon_A2_vsubh>; +def : T_PP_pat <A2_vsubhs, int_hexagon_A2_vsubhs>; +def : T_PP_pat <A2_vsubuhs, int_hexagon_A2_vsubuhs>; +def : T_PP_pat <A2_vsubw, int_hexagon_A2_vsubw>; +def : T_PP_pat <A2_vsubws, int_hexagon_A2_vsubws>; + +// ALU64 - Vector compare bytes +def : T_PP_pat <A2_vcmpbeq, int_hexagon_A2_vcmpbeq>; +def : T_PP_pat <A4_vcmpbgt, int_hexagon_A4_vcmpbgt>; +def : T_PP_pat <A2_vcmpbgtu, int_hexagon_A2_vcmpbgtu>; + +// ALU64 - Vector compare halfwords +def : T_PP_pat <A2_vcmpheq, int_hexagon_A2_vcmpheq>; +def : T_PP_pat <A2_vcmphgt, int_hexagon_A2_vcmphgt>; +def : T_PP_pat <A2_vcmphgtu, int_hexagon_A2_vcmphgtu>; + +// ALU64 - Vector compare words +def : T_PP_pat <A2_vcmpweq, int_hexagon_A2_vcmpweq>; +def : T_PP_pat <A2_vcmpwgt, int_hexagon_A2_vcmpwgt>; +def : T_PP_pat <A2_vcmpwgtu, int_hexagon_A2_vcmpwgtu>; -// MTYPE / MPYS / Scalar 16x16 multiply signed. -//Rd=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]| -// [:<<0[:rnd|:sat|:rnd:sat]|:<<1[:rnd|:sat|:rnd:sat]]] -def HEXAGON_M2_mpy_hh_s0: - si_MInst_sisi_hh <"mpy", int_hexagon_M2_mpy_hh_s0>; -def HEXAGON_M2_mpy_hh_s1: - si_MInst_sisi_hh_s1 <"mpy", int_hexagon_M2_mpy_hh_s1>; -def HEXAGON_M2_mpy_rnd_hh_s1: - si_MInst_sisi_rnd_hh_s1 <"mpy", int_hexagon_M2_mpy_rnd_hh_s1>; -def HEXAGON_M2_mpy_sat_rnd_hh_s1: - si_MInst_sisi_sat_rnd_hh_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_hh_s1>; -def HEXAGON_M2_mpy_sat_hh_s1: - si_MInst_sisi_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_sat_hh_s1>; -def HEXAGON_M2_mpy_rnd_hh_s0: - si_MInst_sisi_rnd_hh <"mpy", int_hexagon_M2_mpy_rnd_hh_s0>; -def HEXAGON_M2_mpy_sat_rnd_hh_s0: - si_MInst_sisi_sat_rnd_hh <"mpy", int_hexagon_M2_mpy_sat_rnd_hh_s0>; -def HEXAGON_M2_mpy_sat_hh_s0: - si_MInst_sisi_sat_hh <"mpy", int_hexagon_M2_mpy_sat_hh_s0>; - -def HEXAGON_M2_mpy_hl_s0: - si_MInst_sisi_hl <"mpy", int_hexagon_M2_mpy_hl_s0>; -def HEXAGON_M2_mpy_hl_s1: - si_MInst_sisi_hl_s1 <"mpy", int_hexagon_M2_mpy_hl_s1>; -def HEXAGON_M2_mpy_rnd_hl_s1: - si_MInst_sisi_rnd_hl_s1 <"mpy", int_hexagon_M2_mpy_rnd_hl_s1>; -def HEXAGON_M2_mpy_sat_rnd_hl_s1: - si_MInst_sisi_sat_rnd_hl_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_hl_s1>; -def HEXAGON_M2_mpy_sat_hl_s1: - si_MInst_sisi_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_sat_hl_s1>; -def HEXAGON_M2_mpy_rnd_hl_s0: - si_MInst_sisi_rnd_hl <"mpy", int_hexagon_M2_mpy_rnd_hl_s0>; -def HEXAGON_M2_mpy_sat_rnd_hl_s0: - si_MInst_sisi_sat_rnd_hl <"mpy", int_hexagon_M2_mpy_sat_rnd_hl_s0>; -def HEXAGON_M2_mpy_sat_hl_s0: - si_MInst_sisi_sat_hl <"mpy", int_hexagon_M2_mpy_sat_hl_s0>; - -def HEXAGON_M2_mpy_lh_s0: - si_MInst_sisi_lh <"mpy", int_hexagon_M2_mpy_lh_s0>; -def HEXAGON_M2_mpy_lh_s1: - si_MInst_sisi_lh_s1 <"mpy", int_hexagon_M2_mpy_lh_s1>; -def HEXAGON_M2_mpy_rnd_lh_s1: - si_MInst_sisi_rnd_lh_s1 <"mpy", int_hexagon_M2_mpy_rnd_lh_s1>; -def HEXAGON_M2_mpy_sat_rnd_lh_s1: - si_MInst_sisi_sat_rnd_lh_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_lh_s1>; -def HEXAGON_M2_mpy_sat_lh_s1: - si_MInst_sisi_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_sat_lh_s1>; -def HEXAGON_M2_mpy_rnd_lh_s0: - si_MInst_sisi_rnd_lh <"mpy", int_hexagon_M2_mpy_rnd_lh_s0>; -def HEXAGON_M2_mpy_sat_rnd_lh_s0: - si_MInst_sisi_sat_rnd_lh <"mpy", int_hexagon_M2_mpy_sat_rnd_lh_s0>; -def HEXAGON_M2_mpy_sat_lh_s0: - si_MInst_sisi_sat_lh <"mpy", int_hexagon_M2_mpy_sat_lh_s0>; - -def HEXAGON_M2_mpy_ll_s0: - si_MInst_sisi_ll <"mpy", int_hexagon_M2_mpy_ll_s0>; -def HEXAGON_M2_mpy_ll_s1: - si_MInst_sisi_ll_s1 <"mpy", int_hexagon_M2_mpy_ll_s1>; -def HEXAGON_M2_mpy_rnd_ll_s1: - si_MInst_sisi_rnd_ll_s1 <"mpy", int_hexagon_M2_mpy_rnd_ll_s1>; -def HEXAGON_M2_mpy_sat_rnd_ll_s1: - si_MInst_sisi_sat_rnd_ll_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_ll_s1>; -def HEXAGON_M2_mpy_sat_ll_s1: - si_MInst_sisi_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_sat_ll_s1>; -def HEXAGON_M2_mpy_rnd_ll_s0: - si_MInst_sisi_rnd_ll <"mpy", int_hexagon_M2_mpy_rnd_ll_s0>; -def HEXAGON_M2_mpy_sat_rnd_ll_s0: - si_MInst_sisi_sat_rnd_ll <"mpy", int_hexagon_M2_mpy_sat_rnd_ll_s0>; -def HEXAGON_M2_mpy_sat_ll_s0: - si_MInst_sisi_sat_ll <"mpy", int_hexagon_M2_mpy_sat_ll_s0>; - -//Rdd=mpy(Rs.[H|L],Rt.[H|L])[[:<<0|:<<1]|[:<<0:rnd|:<<1:rnd]] -def HEXAGON_M2_mpyd_hh_s0: - di_MInst_sisi_hh <"mpy", int_hexagon_M2_mpyd_hh_s0>; -def HEXAGON_M2_mpyd_hh_s1: - di_MInst_sisi_hh_s1 <"mpy", int_hexagon_M2_mpyd_hh_s1>; -def HEXAGON_M2_mpyd_rnd_hh_s1: - di_MInst_sisi_rnd_hh_s1 <"mpy", int_hexagon_M2_mpyd_rnd_hh_s1>; -def HEXAGON_M2_mpyd_rnd_hh_s0: - di_MInst_sisi_rnd_hh <"mpy", int_hexagon_M2_mpyd_rnd_hh_s0>; - -def HEXAGON_M2_mpyd_hl_s0: - di_MInst_sisi_hl <"mpy", int_hexagon_M2_mpyd_hl_s0>; -def HEXAGON_M2_mpyd_hl_s1: - di_MInst_sisi_hl_s1 <"mpy", int_hexagon_M2_mpyd_hl_s1>; -def HEXAGON_M2_mpyd_rnd_hl_s1: - di_MInst_sisi_rnd_hl_s1 <"mpy", int_hexagon_M2_mpyd_rnd_hl_s1>; -def HEXAGON_M2_mpyd_rnd_hl_s0: - di_MInst_sisi_rnd_hl <"mpy", int_hexagon_M2_mpyd_rnd_hl_s0>; - -def HEXAGON_M2_mpyd_lh_s0: - di_MInst_sisi_lh <"mpy", int_hexagon_M2_mpyd_lh_s0>; -def HEXAGON_M2_mpyd_lh_s1: - di_MInst_sisi_lh_s1 <"mpy", int_hexagon_M2_mpyd_lh_s1>; -def HEXAGON_M2_mpyd_rnd_lh_s1: - di_MInst_sisi_rnd_lh_s1 <"mpy", int_hexagon_M2_mpyd_rnd_lh_s1>; -def HEXAGON_M2_mpyd_rnd_lh_s0: - di_MInst_sisi_rnd_lh <"mpy", int_hexagon_M2_mpyd_rnd_lh_s0>; - -def HEXAGON_M2_mpyd_ll_s0: - di_MInst_sisi_ll <"mpy", int_hexagon_M2_mpyd_ll_s0>; -def HEXAGON_M2_mpyd_ll_s1: - di_MInst_sisi_ll_s1 <"mpy", int_hexagon_M2_mpyd_ll_s1>; -def HEXAGON_M2_mpyd_rnd_ll_s1: - di_MInst_sisi_rnd_ll_s1 <"mpy", int_hexagon_M2_mpyd_rnd_ll_s1>; -def HEXAGON_M2_mpyd_rnd_ll_s0: - di_MInst_sisi_rnd_ll <"mpy", int_hexagon_M2_mpyd_rnd_ll_s0>; - -//Rx+=mpy(Rs.[H|L],Rt.[H|L])[[[:<<0|:<<1]|[:<<0:sat|:<<1:sat]] -def HEXAGON_M2_mpy_acc_hh_s0: - si_MInst_sisisi_acc_hh <"mpy", int_hexagon_M2_mpy_acc_hh_s0>; -def HEXAGON_M2_mpy_acc_hh_s1: - si_MInst_sisisi_acc_hh_s1 <"mpy", int_hexagon_M2_mpy_acc_hh_s1>; -def HEXAGON_M2_mpy_acc_sat_hh_s1: - si_MInst_sisisi_acc_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_hh_s1>; -def HEXAGON_M2_mpy_acc_sat_hh_s0: - si_MInst_sisisi_acc_sat_hh <"mpy", int_hexagon_M2_mpy_acc_sat_hh_s0>; - -def HEXAGON_M2_mpy_acc_hl_s0: - si_MInst_sisisi_acc_hl <"mpy", int_hexagon_M2_mpy_acc_hl_s0>; -def HEXAGON_M2_mpy_acc_hl_s1: - si_MInst_sisisi_acc_hl_s1 <"mpy", int_hexagon_M2_mpy_acc_hl_s1>; -def HEXAGON_M2_mpy_acc_sat_hl_s1: - si_MInst_sisisi_acc_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_hl_s1>; -def HEXAGON_M2_mpy_acc_sat_hl_s0: - si_MInst_sisisi_acc_sat_hl <"mpy", int_hexagon_M2_mpy_acc_sat_hl_s0>; - -def HEXAGON_M2_mpy_acc_lh_s0: - si_MInst_sisisi_acc_lh <"mpy", int_hexagon_M2_mpy_acc_lh_s0>; -def HEXAGON_M2_mpy_acc_lh_s1: - si_MInst_sisisi_acc_lh_s1 <"mpy", int_hexagon_M2_mpy_acc_lh_s1>; -def HEXAGON_M2_mpy_acc_sat_lh_s1: - si_MInst_sisisi_acc_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_lh_s1>; -def HEXAGON_M2_mpy_acc_sat_lh_s0: - si_MInst_sisisi_acc_sat_lh <"mpy", int_hexagon_M2_mpy_acc_sat_lh_s0>; - -def HEXAGON_M2_mpy_acc_ll_s0: - si_MInst_sisisi_acc_ll <"mpy", int_hexagon_M2_mpy_acc_ll_s0>; -def HEXAGON_M2_mpy_acc_ll_s1: - si_MInst_sisisi_acc_ll_s1 <"mpy", int_hexagon_M2_mpy_acc_ll_s1>; -def HEXAGON_M2_mpy_acc_sat_ll_s1: - si_MInst_sisisi_acc_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_ll_s1>; -def HEXAGON_M2_mpy_acc_sat_ll_s0: - si_MInst_sisisi_acc_sat_ll <"mpy", int_hexagon_M2_mpy_acc_sat_ll_s0>; - -//Rx-=mpy(Rs.[H|L],Rt.[H|L])[[[:<<0|:<<1]|[:<<0:sat|:<<1:sat]] -def HEXAGON_M2_mpy_nac_hh_s0: - si_MInst_sisisi_nac_hh <"mpy", int_hexagon_M2_mpy_nac_hh_s0>; -def HEXAGON_M2_mpy_nac_hh_s1: - si_MInst_sisisi_nac_hh_s1 <"mpy", int_hexagon_M2_mpy_nac_hh_s1>; -def HEXAGON_M2_mpy_nac_sat_hh_s1: - si_MInst_sisisi_nac_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_hh_s1>; -def HEXAGON_M2_mpy_nac_sat_hh_s0: - si_MInst_sisisi_nac_sat_hh <"mpy", int_hexagon_M2_mpy_nac_sat_hh_s0>; - -def HEXAGON_M2_mpy_nac_hl_s0: - si_MInst_sisisi_nac_hl <"mpy", int_hexagon_M2_mpy_nac_hl_s0>; -def HEXAGON_M2_mpy_nac_hl_s1: - si_MInst_sisisi_nac_hl_s1 <"mpy", int_hexagon_M2_mpy_nac_hl_s1>; -def HEXAGON_M2_mpy_nac_sat_hl_s1: - si_MInst_sisisi_nac_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_hl_s1>; -def HEXAGON_M2_mpy_nac_sat_hl_s0: - si_MInst_sisisi_nac_sat_hl <"mpy", int_hexagon_M2_mpy_nac_sat_hl_s0>; - -def HEXAGON_M2_mpy_nac_lh_s0: - si_MInst_sisisi_nac_lh <"mpy", int_hexagon_M2_mpy_nac_lh_s0>; -def HEXAGON_M2_mpy_nac_lh_s1: - si_MInst_sisisi_nac_lh_s1 <"mpy", int_hexagon_M2_mpy_nac_lh_s1>; -def HEXAGON_M2_mpy_nac_sat_lh_s1: - si_MInst_sisisi_nac_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_lh_s1>; -def HEXAGON_M2_mpy_nac_sat_lh_s0: - si_MInst_sisisi_nac_sat_lh <"mpy", int_hexagon_M2_mpy_nac_sat_lh_s0>; - -def HEXAGON_M2_mpy_nac_ll_s0: - si_MInst_sisisi_nac_ll <"mpy", int_hexagon_M2_mpy_nac_ll_s0>; -def HEXAGON_M2_mpy_nac_ll_s1: - si_MInst_sisisi_nac_ll_s1 <"mpy", int_hexagon_M2_mpy_nac_ll_s1>; -def HEXAGON_M2_mpy_nac_sat_ll_s1: - si_MInst_sisisi_nac_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_ll_s1>; -def HEXAGON_M2_mpy_nac_sat_ll_s0: - si_MInst_sisisi_nac_sat_ll <"mpy", int_hexagon_M2_mpy_nac_sat_ll_s0>; - -//Rx+=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1] -def HEXAGON_M2_mpyd_acc_hh_s0: - di_MInst_disisi_acc_hh <"mpy", int_hexagon_M2_mpyd_acc_hh_s0>; -def HEXAGON_M2_mpyd_acc_hh_s1: - di_MInst_disisi_acc_hh_s1 <"mpy", int_hexagon_M2_mpyd_acc_hh_s1>; - -def HEXAGON_M2_mpyd_acc_hl_s0: - di_MInst_disisi_acc_hl <"mpy", int_hexagon_M2_mpyd_acc_hl_s0>; -def HEXAGON_M2_mpyd_acc_hl_s1: - di_MInst_disisi_acc_hl_s1 <"mpy", int_hexagon_M2_mpyd_acc_hl_s1>; - -def HEXAGON_M2_mpyd_acc_lh_s0: - di_MInst_disisi_acc_lh <"mpy", int_hexagon_M2_mpyd_acc_lh_s0>; -def HEXAGON_M2_mpyd_acc_lh_s1: - di_MInst_disisi_acc_lh_s1 <"mpy", int_hexagon_M2_mpyd_acc_lh_s1>; - -def HEXAGON_M2_mpyd_acc_ll_s0: - di_MInst_disisi_acc_ll <"mpy", int_hexagon_M2_mpyd_acc_ll_s0>; -def HEXAGON_M2_mpyd_acc_ll_s1: - di_MInst_disisi_acc_ll_s1 <"mpy", int_hexagon_M2_mpyd_acc_ll_s1>; - -//Rx-=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1] -def HEXAGON_M2_mpyd_nac_hh_s0: - di_MInst_disisi_nac_hh <"mpy", int_hexagon_M2_mpyd_nac_hh_s0>; -def HEXAGON_M2_mpyd_nac_hh_s1: - di_MInst_disisi_nac_hh_s1 <"mpy", int_hexagon_M2_mpyd_nac_hh_s1>; - -def HEXAGON_M2_mpyd_nac_hl_s0: - di_MInst_disisi_nac_hl <"mpy", int_hexagon_M2_mpyd_nac_hl_s0>; -def HEXAGON_M2_mpyd_nac_hl_s1: - di_MInst_disisi_nac_hl_s1 <"mpy", int_hexagon_M2_mpyd_nac_hl_s1>; - -def HEXAGON_M2_mpyd_nac_lh_s0: - di_MInst_disisi_nac_lh <"mpy", int_hexagon_M2_mpyd_nac_lh_s0>; -def HEXAGON_M2_mpyd_nac_lh_s1: - di_MInst_disisi_nac_lh_s1 <"mpy", int_hexagon_M2_mpyd_nac_lh_s1>; - -def HEXAGON_M2_mpyd_nac_ll_s0: - di_MInst_disisi_nac_ll <"mpy", int_hexagon_M2_mpyd_nac_ll_s0>; -def HEXAGON_M2_mpyd_nac_ll_s1: - di_MInst_disisi_nac_ll_s1 <"mpy", int_hexagon_M2_mpyd_nac_ll_s1>; - -// MTYPE / MPYS / Scalar 16x16 multiply unsigned. -//Rd=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] -def HEXAGON_M2_mpyu_hh_s0: - si_MInst_sisi_hh <"mpyu", int_hexagon_M2_mpyu_hh_s0>; -def HEXAGON_M2_mpyu_hh_s1: - si_MInst_sisi_hh_s1 <"mpyu", int_hexagon_M2_mpyu_hh_s1>; -def HEXAGON_M2_mpyu_hl_s0: - si_MInst_sisi_hl <"mpyu", int_hexagon_M2_mpyu_hl_s0>; -def HEXAGON_M2_mpyu_hl_s1: - si_MInst_sisi_hl_s1 <"mpyu", int_hexagon_M2_mpyu_hl_s1>; -def HEXAGON_M2_mpyu_lh_s0: - si_MInst_sisi_lh <"mpyu", int_hexagon_M2_mpyu_lh_s0>; -def HEXAGON_M2_mpyu_lh_s1: - si_MInst_sisi_lh_s1 <"mpyu", int_hexagon_M2_mpyu_lh_s1>; -def HEXAGON_M2_mpyu_ll_s0: - si_MInst_sisi_ll <"mpyu", int_hexagon_M2_mpyu_ll_s0>; -def HEXAGON_M2_mpyu_ll_s1: - si_MInst_sisi_ll_s1 <"mpyu", int_hexagon_M2_mpyu_ll_s1>; - -//Rdd=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] -def HEXAGON_M2_mpyud_hh_s0: - di_MInst_sisi_hh <"mpyu", int_hexagon_M2_mpyud_hh_s0>; -def HEXAGON_M2_mpyud_hh_s1: - di_MInst_sisi_hh_s1 <"mpyu", int_hexagon_M2_mpyud_hh_s1>; -def HEXAGON_M2_mpyud_hl_s0: - di_MInst_sisi_hl <"mpyu", int_hexagon_M2_mpyud_hl_s0>; -def HEXAGON_M2_mpyud_hl_s1: - di_MInst_sisi_hl_s1 <"mpyu", int_hexagon_M2_mpyud_hl_s1>; -def HEXAGON_M2_mpyud_lh_s0: - di_MInst_sisi_lh <"mpyu", int_hexagon_M2_mpyud_lh_s0>; -def HEXAGON_M2_mpyud_lh_s1: - di_MInst_sisi_lh_s1 <"mpyu", int_hexagon_M2_mpyud_lh_s1>; -def HEXAGON_M2_mpyud_ll_s0: - di_MInst_sisi_ll <"mpyu", int_hexagon_M2_mpyud_ll_s0>; -def HEXAGON_M2_mpyud_ll_s1: - di_MInst_sisi_ll_s1 <"mpyu", int_hexagon_M2_mpyud_ll_s1>; - -//Rd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] -def HEXAGON_M2_mpyu_acc_hh_s0: - si_MInst_sisisi_acc_hh <"mpyu", int_hexagon_M2_mpyu_acc_hh_s0>; -def HEXAGON_M2_mpyu_acc_hh_s1: - si_MInst_sisisi_acc_hh_s1 <"mpyu", int_hexagon_M2_mpyu_acc_hh_s1>; -def HEXAGON_M2_mpyu_acc_hl_s0: - si_MInst_sisisi_acc_hl <"mpyu", int_hexagon_M2_mpyu_acc_hl_s0>; -def HEXAGON_M2_mpyu_acc_hl_s1: - si_MInst_sisisi_acc_hl_s1 <"mpyu", int_hexagon_M2_mpyu_acc_hl_s1>; -def HEXAGON_M2_mpyu_acc_lh_s0: - si_MInst_sisisi_acc_lh <"mpyu", int_hexagon_M2_mpyu_acc_lh_s0>; -def HEXAGON_M2_mpyu_acc_lh_s1: - si_MInst_sisisi_acc_lh_s1 <"mpyu", int_hexagon_M2_mpyu_acc_lh_s1>; -def HEXAGON_M2_mpyu_acc_ll_s0: - si_MInst_sisisi_acc_ll <"mpyu", int_hexagon_M2_mpyu_acc_ll_s0>; -def HEXAGON_M2_mpyu_acc_ll_s1: - si_MInst_sisisi_acc_ll_s1 <"mpyu", int_hexagon_M2_mpyu_acc_ll_s1>; - -//Rd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] -def HEXAGON_M2_mpyu_nac_hh_s0: - si_MInst_sisisi_nac_hh <"mpyu", int_hexagon_M2_mpyu_nac_hh_s0>; -def HEXAGON_M2_mpyu_nac_hh_s1: - si_MInst_sisisi_nac_hh_s1 <"mpyu", int_hexagon_M2_mpyu_nac_hh_s1>; -def HEXAGON_M2_mpyu_nac_hl_s0: - si_MInst_sisisi_nac_hl <"mpyu", int_hexagon_M2_mpyu_nac_hl_s0>; -def HEXAGON_M2_mpyu_nac_hl_s1: - si_MInst_sisisi_nac_hl_s1 <"mpyu", int_hexagon_M2_mpyu_nac_hl_s1>; -def HEXAGON_M2_mpyu_nac_lh_s0: - si_MInst_sisisi_nac_lh <"mpyu", int_hexagon_M2_mpyu_nac_lh_s0>; -def HEXAGON_M2_mpyu_nac_lh_s1: - si_MInst_sisisi_nac_lh_s1 <"mpyu", int_hexagon_M2_mpyu_nac_lh_s1>; -def HEXAGON_M2_mpyu_nac_ll_s0: - si_MInst_sisisi_nac_ll <"mpyu", int_hexagon_M2_mpyu_nac_ll_s0>; -def HEXAGON_M2_mpyu_nac_ll_s1: - si_MInst_sisisi_nac_ll_s1 <"mpyu", int_hexagon_M2_mpyu_nac_ll_s1>; - -//Rdd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] -def HEXAGON_M2_mpyud_acc_hh_s0: - di_MInst_disisi_acc_hh <"mpyu", int_hexagon_M2_mpyud_acc_hh_s0>; -def HEXAGON_M2_mpyud_acc_hh_s1: - di_MInst_disisi_acc_hh_s1 <"mpyu", int_hexagon_M2_mpyud_acc_hh_s1>; -def HEXAGON_M2_mpyud_acc_hl_s0: - di_MInst_disisi_acc_hl <"mpyu", int_hexagon_M2_mpyud_acc_hl_s0>; -def HEXAGON_M2_mpyud_acc_hl_s1: - di_MInst_disisi_acc_hl_s1 <"mpyu", int_hexagon_M2_mpyud_acc_hl_s1>; -def HEXAGON_M2_mpyud_acc_lh_s0: - di_MInst_disisi_acc_lh <"mpyu", int_hexagon_M2_mpyud_acc_lh_s0>; -def HEXAGON_M2_mpyud_acc_lh_s1: - di_MInst_disisi_acc_lh_s1 <"mpyu", int_hexagon_M2_mpyud_acc_lh_s1>; -def HEXAGON_M2_mpyud_acc_ll_s0: - di_MInst_disisi_acc_ll <"mpyu", int_hexagon_M2_mpyud_acc_ll_s0>; -def HEXAGON_M2_mpyud_acc_ll_s1: - di_MInst_disisi_acc_ll_s1 <"mpyu", int_hexagon_M2_mpyud_acc_ll_s1>; - -//Rdd-=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] -def HEXAGON_M2_mpyud_nac_hh_s0: - di_MInst_disisi_nac_hh <"mpyu", int_hexagon_M2_mpyud_nac_hh_s0>; -def HEXAGON_M2_mpyud_nac_hh_s1: - di_MInst_disisi_nac_hh_s1 <"mpyu", int_hexagon_M2_mpyud_nac_hh_s1>; -def HEXAGON_M2_mpyud_nac_hl_s0: - di_MInst_disisi_nac_hl <"mpyu", int_hexagon_M2_mpyud_nac_hl_s0>; -def HEXAGON_M2_mpyud_nac_hl_s1: - di_MInst_disisi_nac_hl_s1 <"mpyu", int_hexagon_M2_mpyud_nac_hl_s1>; -def HEXAGON_M2_mpyud_nac_lh_s0: - di_MInst_disisi_nac_lh <"mpyu", int_hexagon_M2_mpyud_nac_lh_s0>; -def HEXAGON_M2_mpyud_nac_lh_s1: - di_MInst_disisi_nac_lh_s1 <"mpyu", int_hexagon_M2_mpyud_nac_lh_s1>; -def HEXAGON_M2_mpyud_nac_ll_s0: - di_MInst_disisi_nac_ll <"mpyu", int_hexagon_M2_mpyud_nac_ll_s0>; -def HEXAGON_M2_mpyud_nac_ll_s1: - di_MInst_disisi_nac_ll_s1 <"mpyu", int_hexagon_M2_mpyud_nac_ll_s1>; - +// ALU64 / VB / Vector mux. +def : Pat<(int_hexagon_C2_vmux PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt), + (C2_vmux PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt)>; + +// MPY - Multiply and use full result +// Rdd = mpy[u](Rs, Rt) +def : T_RR_pat <M2_dpmpyss_s0, int_hexagon_M2_dpmpyss_s0>; +def : T_RR_pat <M2_dpmpyuu_s0, int_hexagon_M2_dpmpyuu_s0>; + +// Complex multiply real or imaginary +def : T_RR_pat <M2_cmpyi_s0, int_hexagon_M2_cmpyi_s0>; +def : T_RR_pat <M2_cmpyr_s0, int_hexagon_M2_cmpyr_s0>; + +// Complex multiply +def : T_RR_pat <M2_cmpys_s0, int_hexagon_M2_cmpys_s0>; +def : T_RR_pat <M2_cmpysc_s0, int_hexagon_M2_cmpysc_s0>; +def : T_RR_pat <M2_cmpys_s1, int_hexagon_M2_cmpys_s1>; +def : T_RR_pat <M2_cmpysc_s1, int_hexagon_M2_cmpysc_s1>; + +// Vector multiply halfwords +// Rdd=vmpyh(Rs,Rt)[:<<1]:sat +def : T_RR_pat <M2_vmpy2s_s0, int_hexagon_M2_vmpy2s_s0>; +def : T_RR_pat <M2_vmpy2s_s1, int_hexagon_M2_vmpy2s_s1>; + +// Rxx[+-]= mpy[u](Rs,Rt) +def : T_PRR_pat <M2_dpmpyss_acc_s0, int_hexagon_M2_dpmpyss_acc_s0>; +def : T_PRR_pat <M2_dpmpyss_nac_s0, int_hexagon_M2_dpmpyss_nac_s0>; +def : T_PRR_pat <M2_dpmpyuu_acc_s0, int_hexagon_M2_dpmpyuu_acc_s0>; +def : T_PRR_pat <M2_dpmpyuu_nac_s0, int_hexagon_M2_dpmpyuu_nac_s0>; + +// Rxx[-+]=cmpy(Rs,Rt)[:<<1]:sat +def : T_PRR_pat <M2_cmacs_s0, int_hexagon_M2_cmacs_s0>; +def : T_PRR_pat <M2_cnacs_s0, int_hexagon_M2_cnacs_s0>; +def : T_PRR_pat <M2_cmacs_s1, int_hexagon_M2_cmacs_s1>; +def : T_PRR_pat <M2_cnacs_s1, int_hexagon_M2_cnacs_s1>; + +// Rxx[-+]=cmpy(Rs,Rt*)[:<<1]:sat +def : T_PRR_pat <M2_cmacsc_s0, int_hexagon_M2_cmacsc_s0>; +def : T_PRR_pat <M2_cnacsc_s0, int_hexagon_M2_cnacsc_s0>; +def : T_PRR_pat <M2_cmacsc_s1, int_hexagon_M2_cmacsc_s1>; +def : T_PRR_pat <M2_cnacsc_s1, int_hexagon_M2_cnacsc_s1>; + +// Rxx+=cmpy[ir](Rs,Rt) +def : T_PRR_pat <M2_cmaci_s0, int_hexagon_M2_cmaci_s0>; +def : T_PRR_pat <M2_cmacr_s0, int_hexagon_M2_cmacr_s0>; + +// Rxx+=vmpyh(Rs,Rt)[:<<1][:sat] +def : T_PRR_pat <M2_vmac2, int_hexagon_M2_vmac2>; +def : T_PRR_pat <M2_vmac2s_s0, int_hexagon_M2_vmac2s_s0>; +def : T_PRR_pat <M2_vmac2s_s1, int_hexagon_M2_vmac2s_s1>; /******************************************************************** -* MTYPE/VB * +* CR * *********************************************************************/ +class qi_CRInst_qi_pat<InstHexagon Inst, Intrinsic IntID> : + Pat<(i32 (IntID IntRegs:$Rs)), + (i32 (C2_tfrpr (Inst (C2_tfrrp IntRegs:$Rs))))>; -// MTYPE / VB / Vector reduce add unsigned bytes. -def HEXAGON_A2_vraddub: - di_MInst_didi <"vraddub", int_hexagon_A2_vraddub>; -def HEXAGON_A2_vraddub_acc: - di_MInst_dididi_acc <"vraddub", int_hexagon_A2_vraddub_acc>; +class qi_CRInst_qiqi_pat<InstHexagon Inst, Intrinsic IntID> : + Pat<(i32 (IntID IntRegs:$Rs, IntRegs:$Rt)), + (i32 (C2_tfrpr (Inst (C2_tfrrp IntRegs:$Rs), (C2_tfrrp IntRegs:$Rt))))>; -// MTYPE / VB / Vector sum of absolute differences unsigned bytes. -def HEXAGON_A2_vrsadub: - di_MInst_didi <"vrsadub", int_hexagon_A2_vrsadub>; -def HEXAGON_A2_vrsadub_acc: - di_MInst_dididi_acc <"vrsadub", int_hexagon_A2_vrsadub_acc>; +def: qi_CRInst_qi_pat<C2_not, int_hexagon_C2_not>; +def: qi_CRInst_qi_pat<C2_all8, int_hexagon_C2_all8>; +def: qi_CRInst_qi_pat<C2_any8, int_hexagon_C2_any8>; -/******************************************************************** -* MTYPE/VH * -*********************************************************************/ +def: qi_CRInst_qiqi_pat<C2_and, int_hexagon_C2_and>; +def: qi_CRInst_qiqi_pat<C2_andn, int_hexagon_C2_andn>; +def: qi_CRInst_qiqi_pat<C2_or, int_hexagon_C2_or>; +def: qi_CRInst_qiqi_pat<C2_orn, int_hexagon_C2_orn>; +def: qi_CRInst_qiqi_pat<C2_xor, int_hexagon_C2_xor>; -// MTYPE / VH / Vector dual multiply. -def HEXAGON_M2_vdmpys_s1: - di_MInst_didi_s1_sat <"vdmpy", int_hexagon_M2_vdmpys_s1>; -def HEXAGON_M2_vdmpys_s0: - di_MInst_didi_sat <"vdmpy", int_hexagon_M2_vdmpys_s0>; -def HEXAGON_M2_vdmacs_s1: - di_MInst_dididi_acc_s1_sat <"vdmpy", int_hexagon_M2_vdmacs_s1>; -def HEXAGON_M2_vdmacs_s0: - di_MInst_dididi_acc_sat <"vdmpy", int_hexagon_M2_vdmacs_s0>; - -// MTYPE / VH / Vector dual multiply with round and pack. -def HEXAGON_M2_vdmpyrs_s0: - si_MInst_didi_rnd_sat <"vdmpy", int_hexagon_M2_vdmpyrs_s0>; -def HEXAGON_M2_vdmpyrs_s1: - si_MInst_didi_s1_rnd_sat <"vdmpy", int_hexagon_M2_vdmpyrs_s1>; - -// MTYPE / VH / Vector multiply even halfwords. -def HEXAGON_M2_vmpy2es_s1: - di_MInst_didi_s1_sat <"vmpyeh", int_hexagon_M2_vmpy2es_s1>; -def HEXAGON_M2_vmpy2es_s0: - di_MInst_didi_sat <"vmpyeh", int_hexagon_M2_vmpy2es_s0>; -def HEXAGON_M2_vmac2es: - di_MInst_dididi_acc <"vmpyeh", int_hexagon_M2_vmac2es>; -def HEXAGON_M2_vmac2es_s1: - di_MInst_dididi_acc_s1_sat <"vmpyeh", int_hexagon_M2_vmac2es_s1>; -def HEXAGON_M2_vmac2es_s0: - di_MInst_dididi_acc_sat <"vmpyeh", int_hexagon_M2_vmac2es_s0>; - -// MTYPE / VH / Vector multiply halfwords. -def HEXAGON_M2_vmpy2s_s0: - di_MInst_sisi_sat <"vmpyh", int_hexagon_M2_vmpy2s_s0>; -def HEXAGON_M2_vmpy2s_s1: - di_MInst_sisi_s1_sat <"vmpyh", int_hexagon_M2_vmpy2s_s1>; -def HEXAGON_M2_vmac2: - di_MInst_disisi_acc <"vmpyh", int_hexagon_M2_vmac2>; -def HEXAGON_M2_vmac2s_s0: - di_MInst_disisi_acc_sat <"vmpyh", int_hexagon_M2_vmac2s_s0>; -def HEXAGON_M2_vmac2s_s1: - di_MInst_disisi_acc_s1_sat <"vmpyh", int_hexagon_M2_vmac2s_s1>; - -// MTYPE / VH / Vector multiply halfwords with round and pack. -def HEXAGON_M2_vmpy2s_s0pack: - si_MInst_sisi_rnd_sat <"vmpyh", int_hexagon_M2_vmpy2s_s0pack>; -def HEXAGON_M2_vmpy2s_s1pack: - si_MInst_sisi_s1_rnd_sat <"vmpyh", int_hexagon_M2_vmpy2s_s1pack>; - -// MTYPE / VH / Vector reduce multiply halfwords. -// Rxx32+=vrmpyh(Rss32,Rtt32) -def HEXAGON_M2_vrmpy_s0: - di_MInst_didi <"vrmpyh", int_hexagon_M2_vrmpy_s0>; -def HEXAGON_M2_vrmac_s0: - di_MInst_dididi_acc <"vrmpyh", int_hexagon_M2_vrmac_s0>; +// Multiply 32x32 and use lower result +def : T_RRI_pat <M2_macsip, int_hexagon_M2_macsip>; +def : T_RRI_pat <M2_macsin, int_hexagon_M2_macsin>; +def : T_RRR_pat <M2_maci, int_hexagon_M2_maci>; +// Subtract and accumulate +def : T_RRR_pat <M2_subacc, int_hexagon_M2_subacc>; -/******************************************************************** -* STYPE/ALU * -*********************************************************************/ +// Add and accumulate +def : T_RRR_pat <M2_acci, int_hexagon_M2_acci>; +def : T_RRR_pat <M2_nacci, int_hexagon_M2_nacci>; +def : T_RRI_pat <M2_accii, int_hexagon_M2_accii>; +def : T_RRI_pat <M2_naccii, int_hexagon_M2_naccii>; -// STYPE / ALU / Absolute value. -def HEXAGON_A2_abs: - si_SInst_si <"abs", int_hexagon_A2_abs>; -def HEXAGON_A2_absp: - di_SInst_di <"abs", int_hexagon_A2_absp>; -def HEXAGON_A2_abssat: - si_SInst_si_sat <"abs", int_hexagon_A2_abssat>; +// XOR and XOR with destination +def : T_RRR_pat <M2_xor_xacc, int_hexagon_M2_xor_xacc>; -// STYPE / ALU / Negate. -def HEXAGON_A2_negp: - di_SInst_di <"neg", int_hexagon_A2_negp>; -def HEXAGON_A2_negsat: - si_SInst_si_sat <"neg", int_hexagon_A2_negsat>; +class MType_R32_pat <Intrinsic IntID, InstHexagon OutputInst> : + Pat <(IntID IntRegs:$src1, IntRegs:$src2), + (OutputInst IntRegs:$src1, IntRegs:$src2)>; -// STYPE / ALU / Logical Not. -def HEXAGON_A2_notp: - di_SInst_di <"not", int_hexagon_A2_notp>; +// Vector dual multiply with round and pack -// STYPE / ALU / Sign extend word to doubleword. -def HEXAGON_A2_sxtw: - di_SInst_si <"sxtw", int_hexagon_A2_sxtw>; +def : Pat <(int_hexagon_M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2), + (M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2)>; +def : Pat <(int_hexagon_M2_vdmpyrs_s1 DoubleRegs:$src1, DoubleRegs:$src2), + (M2_vdmpyrs_s1 DoubleRegs:$src1, DoubleRegs:$src2)>; + +// Vector multiply halfwords with round and pack + +def : MType_R32_pat <int_hexagon_M2_vmpy2s_s0pack, M2_vmpy2s_s0pack>; +def : MType_R32_pat <int_hexagon_M2_vmpy2s_s1pack, M2_vmpy2s_s1pack>; + +// Multiply and use lower result +def : MType_R32_pat <int_hexagon_M2_mpyi, M2_mpyi>; +def : T_RI_pat<M2_mpysmi, int_hexagon_M2_mpysmi>; + +// Assembler mapped from Rd32=mpyui(Rs32,Rt32) to Rd32=mpyi(Rs32,Rt32) +def : MType_R32_pat <int_hexagon_M2_mpyui, M2_mpyi>; + +// Multiply and use upper result +def : MType_R32_pat <int_hexagon_M2_mpy_up, M2_mpy_up>; +def : MType_R32_pat <int_hexagon_M2_mpyu_up, M2_mpyu_up>; +def : MType_R32_pat <int_hexagon_M2_hmmpyh_rs1, M2_hmmpyh_rs1>; +def : MType_R32_pat <int_hexagon_M2_hmmpyl_rs1, M2_hmmpyl_rs1>; +def : MType_R32_pat <int_hexagon_M2_dpmpyss_rnd_s0, M2_dpmpyss_rnd_s0>; + +// Complex multiply with round and pack +// Rxx32+=cmpy(Rs32,[*]Rt32:<<1]:rnd:sat +def : MType_R32_pat <int_hexagon_M2_cmpyrs_s0, M2_cmpyrs_s0>; +def : MType_R32_pat <int_hexagon_M2_cmpyrs_s1, M2_cmpyrs_s1>; +def : MType_R32_pat <int_hexagon_M2_cmpyrsc_s0, M2_cmpyrsc_s0>; +def : MType_R32_pat <int_hexagon_M2_cmpyrsc_s1, M2_cmpyrsc_s1>; /******************************************************************** -* STYPE/BIT * +* STYPE/ALU * *********************************************************************/ +def : T_P_pat <A2_absp, int_hexagon_A2_absp>; +def : T_P_pat <A2_negp, int_hexagon_A2_negp>; +def : T_P_pat <A2_notp, int_hexagon_A2_notp>; -// STYPE / BIT / Count leading. -def HEXAGON_S2_cl0: - si_SInst_si <"cl0", int_hexagon_S2_cl0>; -def HEXAGON_S2_cl0p: - si_SInst_di <"cl0", int_hexagon_S2_cl0p>; -def HEXAGON_S2_cl1: - si_SInst_si <"cl1", int_hexagon_S2_cl1>; -def HEXAGON_S2_cl1p: - si_SInst_di <"cl1", int_hexagon_S2_cl1p>; -def HEXAGON_S2_clb: - si_SInst_si <"clb", int_hexagon_S2_clb>; -def HEXAGON_S2_clbp: - si_SInst_di <"clb", int_hexagon_S2_clbp>; -def HEXAGON_S2_clbnorm: - si_SInst_si <"normamt", int_hexagon_S2_clbnorm>; - -// STYPE / BIT / Count trailing. -def HEXAGON_S2_ct0: - si_SInst_si <"ct0", int_hexagon_S2_ct0>; -def HEXAGON_S2_ct1: - si_SInst_si <"ct1", int_hexagon_S2_ct1>; - -// STYPE / BIT / Compare bit mask. -def Hexagon_C2_bitsclr: - qi_SInst_sisi <"bitsclr", int_hexagon_C2_bitsclr>; -def Hexagon_C2_bitsclri: - qi_SInst_siu6 <"bitsclr", int_hexagon_C2_bitsclri>; -def Hexagon_C2_bitsset: - qi_SInst_sisi <"bitsset", int_hexagon_C2_bitsset>; - -// STYPE / BIT / Extract unsigned. -// Rd[d][32/64]=extractu(Rs[s],Rt[t],[imm]) -def HEXAGON_S2_extractu: - si_SInst_siu5u5 <"extractu",int_hexagon_S2_extractu>; -def HEXAGON_S2_extractu_rp: - si_SInst_sidi <"extractu",int_hexagon_S2_extractu_rp>; -def HEXAGON_S2_extractup: - di_SInst_diu6u6 <"extractu",int_hexagon_S2_extractup>; -def HEXAGON_S2_extractup_rp: - di_SInst_didi <"extractu",int_hexagon_S2_extractup_rp>; - -// STYPE / BIT / Insert bitfield. -def Hexagon_S2_insert: - si_SInst_sisiu5u5 <"insert", int_hexagon_S2_insert>; -def Hexagon_S2_insert_rp: - si_SInst_sisidi <"insert", int_hexagon_S2_insert_rp>; -def Hexagon_S2_insertp: - di_SInst_didiu6u6 <"insert", int_hexagon_S2_insertp>; -def Hexagon_S2_insertp_rp: - di_SInst_dididi <"insert", int_hexagon_S2_insertp_rp>; - -// STYPE / BIT / Innterleave/deinterleave. -def Hexagon_S2_interleave: - di_SInst_di <"interleave", int_hexagon_S2_interleave>; -def Hexagon_S2_deinterleave: - di_SInst_di <"deinterleave", int_hexagon_S2_deinterleave>; - -// STYPE / BIT / Linear feedback-shift Iteration. -def Hexagon_S2_lfsp: - di_SInst_didi <"lfs", int_hexagon_S2_lfsp>; - -// STYPE / BIT / Bit reverse. -def Hexagon_S2_brev: - si_SInst_si <"brev", int_hexagon_S2_brev>; - -// STYPE / BIT / Set/Clear/Toggle Bit. -def HEXAGON_S2_setbit_i: - si_SInst_siu5 <"setbit", int_hexagon_S2_setbit_i>; -def HEXAGON_S2_togglebit_i: - si_SInst_siu5 <"togglebit", int_hexagon_S2_togglebit_i>; -def HEXAGON_S2_clrbit_i: - si_SInst_siu5 <"clrbit", int_hexagon_S2_clrbit_i>; -def HEXAGON_S2_setbit_r: - si_SInst_sisi <"setbit", int_hexagon_S2_setbit_r>; -def HEXAGON_S2_togglebit_r: - si_SInst_sisi <"togglebit", int_hexagon_S2_togglebit_r>; -def HEXAGON_S2_clrbit_r: - si_SInst_sisi <"clrbit", int_hexagon_S2_clrbit_r>; - -// STYPE / BIT / Test Bit. -def HEXAGON_S2_tstbit_i: - qi_SInst_siu5 <"tstbit", int_hexagon_S2_tstbit_i>; -def HEXAGON_S2_tstbit_r: - qi_SInst_sisi <"tstbit", int_hexagon_S2_tstbit_r>; +/******************************************************************** +* STYPE/BIT * +*********************************************************************/ +// Count leading/trailing +def: T_R_pat<S2_cl0, int_hexagon_S2_cl0>; +def: T_P_pat<S2_cl0p, int_hexagon_S2_cl0p>; +def: T_R_pat<S2_cl1, int_hexagon_S2_cl1>; +def: T_P_pat<S2_cl1p, int_hexagon_S2_cl1p>; +def: T_R_pat<S2_clb, int_hexagon_S2_clb>; +def: T_P_pat<S2_clbp, int_hexagon_S2_clbp>; +def: T_R_pat<S2_clbnorm, int_hexagon_S2_clbnorm>; +def: T_R_pat<S2_ct0, int_hexagon_S2_ct0>; +def: T_R_pat<S2_ct1, int_hexagon_S2_ct1>; + +// Compare bit mask +def: T_RR_pat<C2_bitsclr, int_hexagon_C2_bitsclr>; +def: T_RI_pat<C2_bitsclri, int_hexagon_C2_bitsclri>; +def: T_RR_pat<C2_bitsset, int_hexagon_C2_bitsset>; + +// Vector shuffle +def : T_PP_pat <S2_shuffeb, int_hexagon_S2_shuffeb>; +def : T_PP_pat <S2_shuffob, int_hexagon_S2_shuffob>; +def : T_PP_pat <S2_shuffeh, int_hexagon_S2_shuffeh>; +def : T_PP_pat <S2_shuffoh, int_hexagon_S2_shuffoh>; + +// Vector truncate +def : T_PP_pat <S2_vtrunewh, int_hexagon_S2_vtrunewh>; +def : T_PP_pat <S2_vtrunowh, int_hexagon_S2_vtrunowh>; + +// Linear feedback-shift Iteration. +def : T_PP_pat <S2_lfsp, int_hexagon_S2_lfsp>; + +// Vector splice +def : T_PPQ_pat <S2_vsplicerb, int_hexagon_S2_vsplicerb>; +def : T_PPI_pat <S2_vspliceib, int_hexagon_S2_vspliceib>; + +// Shift by immediate and add +def : T_RRI_pat<S2_addasl_rrri, int_hexagon_S2_addasl_rrri>; + +// Extract bitfield +def : T_PII_pat<S2_extractup, int_hexagon_S2_extractup>; +def : T_RII_pat<S2_extractu, int_hexagon_S2_extractu>; +def : T_RP_pat <S2_extractu_rp, int_hexagon_S2_extractu_rp>; +def : T_PP_pat <S2_extractup_rp, int_hexagon_S2_extractup_rp>; + +// Insert bitfield +def : Pat <(int_hexagon_S2_insert_rp IntRegs:$src1, IntRegs:$src2, + DoubleRegs:$src3), + (S2_insert_rp IntRegs:$src1, IntRegs:$src2, DoubleRegs:$src3)>; + +def : Pat<(i64 (int_hexagon_S2_insertp_rp (I64:$src1), + (I64:$src2), (I64:$src3))), + (i64 (S2_insertp_rp (I64:$src1), (I64:$src2), + (I64:$src3)))>; + +def : Pat<(int_hexagon_S2_insert IntRegs:$src1, IntRegs:$src2, + u5ImmPred:$src3, u5ImmPred:$src4), + (S2_insert IntRegs:$src1, IntRegs:$src2, + u5ImmPred:$src3, u5ImmPred:$src4)>; + +def : Pat<(i64 (int_hexagon_S2_insertp (I64:$src1), + (I64:$src2), u6ImmPred:$src3, u6ImmPred:$src4)), + (i64 (S2_insertp (I64:$src1), (I64:$src2), + u6ImmPred:$src3, u6ImmPred:$src4))>; + + +// Innterleave/deinterleave +def : T_P_pat <S2_interleave, int_hexagon_S2_interleave>; +def : T_P_pat <S2_deinterleave, int_hexagon_S2_deinterleave>; + +// Set/Clear/Toggle Bit +def: T_RI_pat<S2_setbit_i, int_hexagon_S2_setbit_i>; +def: T_RI_pat<S2_clrbit_i, int_hexagon_S2_clrbit_i>; +def: T_RI_pat<S2_togglebit_i, int_hexagon_S2_togglebit_i>; + +def: T_RR_pat<S2_setbit_r, int_hexagon_S2_setbit_r>; +def: T_RR_pat<S2_clrbit_r, int_hexagon_S2_clrbit_r>; +def: T_RR_pat<S2_togglebit_r, int_hexagon_S2_togglebit_r>; + +// Test Bit +def: T_RI_pat<S2_tstbit_i, int_hexagon_S2_tstbit_i>; +def: T_RR_pat<S2_tstbit_r, int_hexagon_S2_tstbit_r>; /******************************************************************** * STYPE/COMPLEX * *********************************************************************/ +// Vector Complex conjugate +def : T_P_pat <A2_vconj, int_hexagon_A2_vconj>; -// STYPE / COMPLEX / Vector Complex conjugate. -def HEXAGON_A2_vconj: - di_SInst_di_sat <"vconj", int_hexagon_A2_vconj>; - -// STYPE / COMPLEX / Vector Complex rotate. -def HEXAGON_S2_vcrotate: - di_SInst_disi <"vcrotate",int_hexagon_S2_vcrotate>; - +// Vector Complex rotate +def : T_PR_pat <S2_vcrotate, int_hexagon_S2_vcrotate>; /******************************************************************** * STYPE/PERM * *********************************************************************/ -// STYPE / PERM / Saturate. -def HEXAGON_A2_sat: - si_SInst_di <"sat", int_hexagon_A2_sat>; -def HEXAGON_A2_satb: - si_SInst_si <"satb", int_hexagon_A2_satb>; -def HEXAGON_A2_sath: - si_SInst_si <"sath", int_hexagon_A2_sath>; -def HEXAGON_A2_satub: - si_SInst_si <"satub", int_hexagon_A2_satub>; -def HEXAGON_A2_satuh: - si_SInst_si <"satuh", int_hexagon_A2_satuh>; - -// STYPE / PERM / Swizzle bytes. -def HEXAGON_A2_swiz: - si_SInst_si <"swiz", int_hexagon_A2_swiz>; - -// STYPE / PERM / Vector align. -// Need custom lowering -def HEXAGON_S2_valignib: - di_SInst_didiu3 <"valignb", int_hexagon_S2_valignib>; -def HEXAGON_S2_valignrb: - di_SInst_didiqi <"valignb", int_hexagon_S2_valignrb>; - -// STYPE / PERM / Vector round and pack. -def HEXAGON_S2_vrndpackwh: - si_SInst_di <"vrndwh", int_hexagon_S2_vrndpackwh>; -def HEXAGON_S2_vrndpackwhs: - si_SInst_di_sat <"vrndwh", int_hexagon_S2_vrndpackwhs>; - -// STYPE / PERM / Vector saturate and pack. -def HEXAGON_S2_svsathb: - si_SInst_si <"vsathb", int_hexagon_S2_svsathb>; -def HEXAGON_S2_vsathb: - si_SInst_di <"vsathb", int_hexagon_S2_vsathb>; -def HEXAGON_S2_svsathub: - si_SInst_si <"vsathub", int_hexagon_S2_svsathub>; -def HEXAGON_S2_vsathub: - si_SInst_di <"vsathub", int_hexagon_S2_vsathub>; -def HEXAGON_S2_vsatwh: - si_SInst_di <"vsatwh", int_hexagon_S2_vsatwh>; -def HEXAGON_S2_vsatwuh: - si_SInst_di <"vsatwuh", int_hexagon_S2_vsatwuh>; - -// STYPE / PERM / Vector saturate without pack. -def HEXAGON_S2_vsathb_nopack: - di_SInst_di <"vsathb", int_hexagon_S2_vsathb_nopack>; -def HEXAGON_S2_vsathub_nopack: - di_SInst_di <"vsathub", int_hexagon_S2_vsathub_nopack>; -def HEXAGON_S2_vsatwh_nopack: - di_SInst_di <"vsatwh", int_hexagon_S2_vsatwh_nopack>; -def HEXAGON_S2_vsatwuh_nopack: - di_SInst_di <"vsatwuh", int_hexagon_S2_vsatwuh_nopack>; - -// STYPE / PERM / Vector shuffle. -def HEXAGON_S2_shuffeb: - di_SInst_didi <"shuffeb", int_hexagon_S2_shuffeb>; -def HEXAGON_S2_shuffeh: - di_SInst_didi <"shuffeh", int_hexagon_S2_shuffeh>; -def HEXAGON_S2_shuffob: - di_SInst_didi <"shuffob", int_hexagon_S2_shuffob>; -def HEXAGON_S2_shuffoh: - di_SInst_didi <"shuffoh", int_hexagon_S2_shuffoh>; - -// STYPE / PERM / Vector splat bytes. -def HEXAGON_S2_vsplatrb: - si_SInst_si <"vsplatb", int_hexagon_S2_vsplatrb>; - -// STYPE / PERM / Vector splat halfwords. -def HEXAGON_S2_vsplatrh: - di_SInst_si <"vsplath", int_hexagon_S2_vsplatrh>; - -// STYPE / PERM / Vector splice. -def Hexagon_S2_vsplicerb: - di_SInst_didiqi <"vspliceb",int_hexagon_S2_vsplicerb>; -def Hexagon_S2_vspliceib: - di_SInst_didiu3 <"vspliceb",int_hexagon_S2_vspliceib>; - -// STYPE / PERM / Sign extend. -def HEXAGON_S2_vsxtbh: - di_SInst_si <"vsxtbh", int_hexagon_S2_vsxtbh>; -def HEXAGON_S2_vsxthw: - di_SInst_si <"vsxthw", int_hexagon_S2_vsxthw>; - -// STYPE / PERM / Truncate. -def HEXAGON_S2_vtrunehb: - si_SInst_di <"vtrunehb",int_hexagon_S2_vtrunehb>; -def HEXAGON_S2_vtrunohb: - si_SInst_di <"vtrunohb",int_hexagon_S2_vtrunohb>; -def HEXAGON_S2_vtrunewh: - di_SInst_didi <"vtrunewh",int_hexagon_S2_vtrunewh>; -def HEXAGON_S2_vtrunowh: - di_SInst_didi <"vtrunowh",int_hexagon_S2_vtrunowh>; - -// STYPE / PERM / Zero extend. -def HEXAGON_S2_vzxtbh: - di_SInst_si <"vzxtbh", int_hexagon_S2_vzxtbh>; -def HEXAGON_S2_vzxthw: - di_SInst_si <"vzxthw", int_hexagon_S2_vzxthw>; - +// Vector saturate without pack +def : T_P_pat <S2_vsathb_nopack, int_hexagon_S2_vsathb_nopack>; +def : T_P_pat <S2_vsathub_nopack, int_hexagon_S2_vsathub_nopack>; +def : T_P_pat <S2_vsatwh_nopack, int_hexagon_S2_vsatwh_nopack>; +def : T_P_pat <S2_vsatwuh_nopack, int_hexagon_S2_vsatwuh_nopack>; /******************************************************************** * STYPE/PRED * *********************************************************************/ -// STYPE / PRED / Mask generate from predicate. -def HEXAGON_C2_mask: - di_SInst_qi <"mask", int_hexagon_C2_mask>; - -// STYPE / PRED / Predicate transfer. -def HEXAGON_C2_tfrpr: - si_SInst_qi <"", int_hexagon_C2_tfrpr>; -def HEXAGON_C2_tfrrp: - qi_SInst_si <"", int_hexagon_C2_tfrrp>; +// Predicate transfer +def: Pat<(i32 (int_hexagon_C2_tfrpr (I32:$Rs))), + (i32 (C2_tfrpr (C2_tfrrp (I32:$Rs))))>; +def: Pat<(i32 (int_hexagon_C2_tfrrp (I32:$Rs))), + (i32 (C2_tfrpr (C2_tfrrp (I32:$Rs))))>; -// STYPE / PRED / Viterbi pack even and odd predicate bits. -def HEXAGON_C2_vitpack: - si_SInst_qiqi <"vitpack",int_hexagon_C2_vitpack>; +// Mask generate from predicate +def: Pat<(i64 (int_hexagon_C2_mask (I32:$Rs))), + (i64 (C2_mask (C2_tfrrp (I32:$Rs))))>; +// Viterbi pack even and odd predicate bits +def: Pat<(i32 (int_hexagon_C2_vitpack (I32:$Rs), (I32:$Rt))), + (i32 (C2_vitpack (C2_tfrrp (I32:$Rs)), + (C2_tfrrp (I32:$Rt))))>; /******************************************************************** * STYPE/SHIFT * *********************************************************************/ -// STYPE / SHIFT / Shift by immediate. -def HEXAGON_S2_asl_i_r: - si_SInst_siu5 <"asl", int_hexagon_S2_asl_i_r>; -def HEXAGON_S2_asr_i_r: - si_SInst_siu5 <"asr", int_hexagon_S2_asr_i_r>; -def HEXAGON_S2_lsr_i_r: - si_SInst_siu5 <"lsr", int_hexagon_S2_lsr_i_r>; -def HEXAGON_S2_asl_i_p: - di_SInst_diu6 <"asl", int_hexagon_S2_asl_i_p>; -def HEXAGON_S2_asr_i_p: - di_SInst_diu6 <"asr", int_hexagon_S2_asr_i_p>; -def HEXAGON_S2_lsr_i_p: - di_SInst_diu6 <"lsr", int_hexagon_S2_lsr_i_p>; - -// STYPE / SHIFT / Shift by immediate and accumulate. -def HEXAGON_S2_asl_i_r_acc: - si_SInst_sisiu5_acc <"asl", int_hexagon_S2_asl_i_r_acc>; -def HEXAGON_S2_asr_i_r_acc: - si_SInst_sisiu5_acc <"asr", int_hexagon_S2_asr_i_r_acc>; -def HEXAGON_S2_lsr_i_r_acc: - si_SInst_sisiu5_acc <"lsr", int_hexagon_S2_lsr_i_r_acc>; -def HEXAGON_S2_asl_i_r_nac: - si_SInst_sisiu5_nac <"asl", int_hexagon_S2_asl_i_r_nac>; -def HEXAGON_S2_asr_i_r_nac: - si_SInst_sisiu5_nac <"asr", int_hexagon_S2_asr_i_r_nac>; -def HEXAGON_S2_lsr_i_r_nac: - si_SInst_sisiu5_nac <"lsr", int_hexagon_S2_lsr_i_r_nac>; -def HEXAGON_S2_asl_i_p_acc: - di_SInst_didiu6_acc <"asl", int_hexagon_S2_asl_i_p_acc>; -def HEXAGON_S2_asr_i_p_acc: - di_SInst_didiu6_acc <"asr", int_hexagon_S2_asr_i_p_acc>; -def HEXAGON_S2_lsr_i_p_acc: - di_SInst_didiu6_acc <"lsr", int_hexagon_S2_lsr_i_p_acc>; -def HEXAGON_S2_asl_i_p_nac: - di_SInst_didiu6_nac <"asl", int_hexagon_S2_asl_i_p_nac>; -def HEXAGON_S2_asr_i_p_nac: - di_SInst_didiu6_nac <"asr", int_hexagon_S2_asr_i_p_nac>; -def HEXAGON_S2_lsr_i_p_nac: - di_SInst_didiu6_nac <"lsr", int_hexagon_S2_lsr_i_p_nac>; - -// STYPE / SHIFT / Shift by immediate and add. -def HEXAGON_S2_addasl_rrri: - si_SInst_sisiu3 <"addasl", int_hexagon_S2_addasl_rrri>; - -// STYPE / SHIFT / Shift by immediate and logical. -def HEXAGON_S2_asl_i_r_and: - si_SInst_sisiu5_and <"asl", int_hexagon_S2_asl_i_r_and>; -def HEXAGON_S2_asr_i_r_and: - si_SInst_sisiu5_and <"asr", int_hexagon_S2_asr_i_r_and>; -def HEXAGON_S2_lsr_i_r_and: - si_SInst_sisiu5_and <"lsr", int_hexagon_S2_lsr_i_r_and>; - -def HEXAGON_S2_asl_i_r_xacc: - si_SInst_sisiu5_xor <"asl", int_hexagon_S2_asl_i_r_xacc>; -def HEXAGON_S2_lsr_i_r_xacc: - si_SInst_sisiu5_xor <"lsr", int_hexagon_S2_lsr_i_r_xacc>; - -def HEXAGON_S2_asl_i_r_or: - si_SInst_sisiu5_or <"asl", int_hexagon_S2_asl_i_r_or>; -def HEXAGON_S2_asr_i_r_or: - si_SInst_sisiu5_or <"asr", int_hexagon_S2_asr_i_r_or>; -def HEXAGON_S2_lsr_i_r_or: - si_SInst_sisiu5_or <"lsr", int_hexagon_S2_lsr_i_r_or>; - -def HEXAGON_S2_asl_i_p_and: - di_SInst_didiu6_and <"asl", int_hexagon_S2_asl_i_p_and>; -def HEXAGON_S2_asr_i_p_and: - di_SInst_didiu6_and <"asr", int_hexagon_S2_asr_i_p_and>; -def HEXAGON_S2_lsr_i_p_and: - di_SInst_didiu6_and <"lsr", int_hexagon_S2_lsr_i_p_and>; - -def HEXAGON_S2_asl_i_p_xacc: - di_SInst_didiu6_xor <"asl", int_hexagon_S2_asl_i_p_xacc>; -def HEXAGON_S2_lsr_i_p_xacc: - di_SInst_didiu6_xor <"lsr", int_hexagon_S2_lsr_i_p_xacc>; - -def HEXAGON_S2_asl_i_p_or: - di_SInst_didiu6_or <"asl", int_hexagon_S2_asl_i_p_or>; -def HEXAGON_S2_asr_i_p_or: - di_SInst_didiu6_or <"asr", int_hexagon_S2_asr_i_p_or>; -def HEXAGON_S2_lsr_i_p_or: - di_SInst_didiu6_or <"lsr", int_hexagon_S2_lsr_i_p_or>; - -// STYPE / SHIFT / Shift right by immediate with rounding. -def HEXAGON_S2_asr_i_r_rnd: - si_SInst_siu5_rnd <"asr", int_hexagon_S2_asr_i_r_rnd>; -def HEXAGON_S2_asr_i_r_rnd_goodsyntax: - si_SInst_siu5 <"asrrnd", int_hexagon_S2_asr_i_r_rnd_goodsyntax>; - -// STYPE / SHIFT / Shift left by immediate with saturation. -def HEXAGON_S2_asl_i_r_sat: - si_SInst_sisi_sat <"asl", int_hexagon_S2_asl_i_r_sat>; - -// STYPE / SHIFT / Shift by register. -def HEXAGON_S2_asl_r_r: - si_SInst_sisi <"asl", int_hexagon_S2_asl_r_r>; -def HEXAGON_S2_asr_r_r: - si_SInst_sisi <"asr", int_hexagon_S2_asr_r_r>; -def HEXAGON_S2_lsl_r_r: - si_SInst_sisi <"lsl", int_hexagon_S2_lsl_r_r>; -def HEXAGON_S2_lsr_r_r: - si_SInst_sisi <"lsr", int_hexagon_S2_lsr_r_r>; -def HEXAGON_S2_asl_r_p: - di_SInst_disi <"asl", int_hexagon_S2_asl_r_p>; -def HEXAGON_S2_asr_r_p: - di_SInst_disi <"asr", int_hexagon_S2_asr_r_p>; -def HEXAGON_S2_lsl_r_p: - di_SInst_disi <"lsl", int_hexagon_S2_lsl_r_p>; -def HEXAGON_S2_lsr_r_p: - di_SInst_disi <"lsr", int_hexagon_S2_lsr_r_p>; - -// STYPE / SHIFT / Shift by register and accumulate. -def HEXAGON_S2_asl_r_r_acc: - si_SInst_sisisi_acc <"asl", int_hexagon_S2_asl_r_r_acc>; -def HEXAGON_S2_asr_r_r_acc: - si_SInst_sisisi_acc <"asr", int_hexagon_S2_asr_r_r_acc>; -def HEXAGON_S2_lsl_r_r_acc: - si_SInst_sisisi_acc <"lsl", int_hexagon_S2_lsl_r_r_acc>; -def HEXAGON_S2_lsr_r_r_acc: - si_SInst_sisisi_acc <"lsr", int_hexagon_S2_lsr_r_r_acc>; -def HEXAGON_S2_asl_r_p_acc: - di_SInst_didisi_acc <"asl", int_hexagon_S2_asl_r_p_acc>; -def HEXAGON_S2_asr_r_p_acc: - di_SInst_didisi_acc <"asr", int_hexagon_S2_asr_r_p_acc>; -def HEXAGON_S2_lsl_r_p_acc: - di_SInst_didisi_acc <"lsl", int_hexagon_S2_lsl_r_p_acc>; -def HEXAGON_S2_lsr_r_p_acc: - di_SInst_didisi_acc <"lsr", int_hexagon_S2_lsr_r_p_acc>; - -def HEXAGON_S2_asl_r_r_nac: - si_SInst_sisisi_nac <"asl", int_hexagon_S2_asl_r_r_nac>; -def HEXAGON_S2_asr_r_r_nac: - si_SInst_sisisi_nac <"asr", int_hexagon_S2_asr_r_r_nac>; -def HEXAGON_S2_lsl_r_r_nac: - si_SInst_sisisi_nac <"lsl", int_hexagon_S2_lsl_r_r_nac>; -def HEXAGON_S2_lsr_r_r_nac: - si_SInst_sisisi_nac <"lsr", int_hexagon_S2_lsr_r_r_nac>; -def HEXAGON_S2_asl_r_p_nac: - di_SInst_didisi_nac <"asl", int_hexagon_S2_asl_r_p_nac>; -def HEXAGON_S2_asr_r_p_nac: - di_SInst_didisi_nac <"asr", int_hexagon_S2_asr_r_p_nac>; -def HEXAGON_S2_lsl_r_p_nac: - di_SInst_didisi_nac <"lsl", int_hexagon_S2_lsl_r_p_nac>; -def HEXAGON_S2_lsr_r_p_nac: - di_SInst_didisi_nac <"lsr", int_hexagon_S2_lsr_r_p_nac>; - -// STYPE / SHIFT / Shift by register and logical. -def HEXAGON_S2_asl_r_r_and: - si_SInst_sisisi_and <"asl", int_hexagon_S2_asl_r_r_and>; -def HEXAGON_S2_asr_r_r_and: - si_SInst_sisisi_and <"asr", int_hexagon_S2_asr_r_r_and>; -def HEXAGON_S2_lsl_r_r_and: - si_SInst_sisisi_and <"lsl", int_hexagon_S2_lsl_r_r_and>; -def HEXAGON_S2_lsr_r_r_and: - si_SInst_sisisi_and <"lsr", int_hexagon_S2_lsr_r_r_and>; - -def HEXAGON_S2_asl_r_r_or: - si_SInst_sisisi_or <"asl", int_hexagon_S2_asl_r_r_or>; -def HEXAGON_S2_asr_r_r_or: - si_SInst_sisisi_or <"asr", int_hexagon_S2_asr_r_r_or>; -def HEXAGON_S2_lsl_r_r_or: - si_SInst_sisisi_or <"lsl", int_hexagon_S2_lsl_r_r_or>; -def HEXAGON_S2_lsr_r_r_or: - si_SInst_sisisi_or <"lsr", int_hexagon_S2_lsr_r_r_or>; - -def HEXAGON_S2_asl_r_p_and: - di_SInst_didisi_and <"asl", int_hexagon_S2_asl_r_p_and>; -def HEXAGON_S2_asr_r_p_and: - di_SInst_didisi_and <"asr", int_hexagon_S2_asr_r_p_and>; -def HEXAGON_S2_lsl_r_p_and: - di_SInst_didisi_and <"lsl", int_hexagon_S2_lsl_r_p_and>; -def HEXAGON_S2_lsr_r_p_and: - di_SInst_didisi_and <"lsr", int_hexagon_S2_lsr_r_p_and>; - -def HEXAGON_S2_asl_r_p_or: - di_SInst_didisi_or <"asl", int_hexagon_S2_asl_r_p_or>; -def HEXAGON_S2_asr_r_p_or: - di_SInst_didisi_or <"asr", int_hexagon_S2_asr_r_p_or>; -def HEXAGON_S2_lsl_r_p_or: - di_SInst_didisi_or <"lsl", int_hexagon_S2_lsl_r_p_or>; -def HEXAGON_S2_lsr_r_p_or: - di_SInst_didisi_or <"lsr", int_hexagon_S2_lsr_r_p_or>; - -// STYPE / SHIFT / Shift by register with saturation. -def HEXAGON_S2_asl_r_r_sat: - si_SInst_sisi_sat <"asl", int_hexagon_S2_asl_r_r_sat>; -def HEXAGON_S2_asr_r_r_sat: - si_SInst_sisi_sat <"asr", int_hexagon_S2_asr_r_r_sat>; - -// STYPE / SHIFT / Table Index. -def Hexagon_S2_tableidxb_goodsyntax: - si_MInst_sisiu4u5 <"tableidxb",int_hexagon_S2_tableidxb_goodsyntax>; -def Hexagon_S2_tableidxd_goodsyntax: - si_MInst_sisiu4u5 <"tableidxd",int_hexagon_S2_tableidxd_goodsyntax>; -def Hexagon_S2_tableidxh_goodsyntax: - si_MInst_sisiu4u5 <"tableidxh",int_hexagon_S2_tableidxh_goodsyntax>; -def Hexagon_S2_tableidxw_goodsyntax: - si_MInst_sisiu4u5 <"tableidxw",int_hexagon_S2_tableidxw_goodsyntax>; +def : T_PI_pat <S2_asr_i_p, int_hexagon_S2_asr_i_p>; +def : T_PI_pat <S2_lsr_i_p, int_hexagon_S2_lsr_i_p>; +def : T_PI_pat <S2_asl_i_p, int_hexagon_S2_asl_i_p>; + +def : T_PR_pat <S2_asr_r_p, int_hexagon_S2_asr_r_p>; +def : T_PR_pat <S2_lsr_r_p, int_hexagon_S2_lsr_r_p>; +def : T_PR_pat <S2_asl_r_p, int_hexagon_S2_asl_r_p>; +def : T_PR_pat <S2_lsl_r_p, int_hexagon_S2_lsl_r_p>; + +def : T_RR_pat <S2_asr_r_r, int_hexagon_S2_asr_r_r>; +def : T_RR_pat <S2_lsr_r_r, int_hexagon_S2_lsr_r_r>; +def : T_RR_pat <S2_asl_r_r, int_hexagon_S2_asl_r_r>; +def : T_RR_pat <S2_lsl_r_r, int_hexagon_S2_lsl_r_r>; + +def : T_RR_pat <S2_asr_r_r_sat, int_hexagon_S2_asr_r_r_sat>; +def : T_RR_pat <S2_asl_r_r_sat, int_hexagon_S2_asl_r_r_sat>; + +def : T_R_pat <S2_vsxtbh, int_hexagon_S2_vsxtbh>; +def : T_R_pat <S2_vzxtbh, int_hexagon_S2_vzxtbh>; +def : T_R_pat <S2_vsxthw, int_hexagon_S2_vsxthw>; +def : T_R_pat <S2_vzxthw, int_hexagon_S2_vzxthw>; +def : T_R_pat <S2_vsplatrh, int_hexagon_S2_vsplatrh>; +def : T_R_pat <A2_sxtw, int_hexagon_A2_sxtw>; + +// Vector saturate and pack +def : T_R_pat <S2_svsathb, int_hexagon_S2_svsathb>; +def : T_R_pat <S2_svsathub, int_hexagon_S2_svsathub>; +def : T_P_pat <S2_vsathub, int_hexagon_S2_vsathub>; +def : T_P_pat <S2_vsatwh, int_hexagon_S2_vsatwh>; +def : T_P_pat <S2_vsatwuh, int_hexagon_S2_vsatwuh>; +def : T_P_pat <S2_vsathb, int_hexagon_S2_vsathb>; + +def : T_P_pat <S2_vtrunohb, int_hexagon_S2_vtrunohb>; +def : T_P_pat <S2_vtrunehb, int_hexagon_S2_vtrunehb>; +def : T_P_pat <S2_vrndpackwh, int_hexagon_S2_vrndpackwh>; +def : T_P_pat <S2_vrndpackwhs, int_hexagon_S2_vrndpackwhs>; +def : T_R_pat <S2_brev, int_hexagon_S2_brev>; +def : T_R_pat <S2_vsplatrb, int_hexagon_S2_vsplatrb>; + +def : T_R_pat <A2_abs, int_hexagon_A2_abs>; +def : T_R_pat <A2_abssat, int_hexagon_A2_abssat>; +def : T_R_pat <A2_negsat, int_hexagon_A2_negsat>; + +def : T_R_pat <A2_swiz, int_hexagon_A2_swiz>; + +def : T_P_pat <A2_sat, int_hexagon_A2_sat>; +def : T_R_pat <A2_sath, int_hexagon_A2_sath>; +def : T_R_pat <A2_satuh, int_hexagon_A2_satuh>; +def : T_R_pat <A2_satub, int_hexagon_A2_satub>; +def : T_R_pat <A2_satb, int_hexagon_A2_satb>; + +// Vector arithmetic shift right by immediate with truncate and pack. +def : T_PI_pat<S2_asr_i_svw_trun, int_hexagon_S2_asr_i_svw_trun>; + +def : T_RI_pat <S2_asr_i_r, int_hexagon_S2_asr_i_r>; +def : T_RI_pat <S2_lsr_i_r, int_hexagon_S2_lsr_i_r>; +def : T_RI_pat <S2_asl_i_r, int_hexagon_S2_asl_i_r>; +def : T_RI_pat <S2_asr_i_r_rnd, int_hexagon_S2_asr_i_r_rnd>; +def : T_RI_pat <S2_asr_i_r_rnd_goodsyntax, + int_hexagon_S2_asr_i_r_rnd_goodsyntax>; + +// Shift left by immediate with saturation. +def : T_RI_pat <S2_asl_i_r_sat, int_hexagon_S2_asl_i_r_sat>; +//===----------------------------------------------------------------------===// +// Template 'def pat' to map tableidx[bhwd] intrinsics to :raw instructions. +//===----------------------------------------------------------------------===// +class S2op_tableidx_pat <Intrinsic IntID, InstHexagon OutputInst, + SDNodeXForm XformImm> + : Pat <(IntID IntRegs:$src1, IntRegs:$src2, u4ImmPred:$src3, u5ImmPred:$src4), + (OutputInst IntRegs:$src1, IntRegs:$src2, u4ImmPred:$src3, + (XformImm u5ImmPred:$src4))>; + + +// Table Index : Extract and insert bits. +// Map to the real hardware instructions after subtracting appropriate +// values from the 4th input operand. Please note that subtraction is not +// needed for int_hexagon_S2_tableidxb_goodsyntax. + +def : Pat <(int_hexagon_S2_tableidxb_goodsyntax IntRegs:$src1, IntRegs:$src2, + u4ImmPred:$src3, u5ImmPred:$src4), + (S2_tableidxb IntRegs:$src1, IntRegs:$src2, + u4ImmPred:$src3, u5ImmPred:$src4)>; + +def : S2op_tableidx_pat <int_hexagon_S2_tableidxh_goodsyntax, S2_tableidxh, + DEC_CONST_SIGNED>; +def : S2op_tableidx_pat <int_hexagon_S2_tableidxw_goodsyntax, S2_tableidxw, + DEC2_CONST_SIGNED>; +def : S2op_tableidx_pat <int_hexagon_S2_tableidxd_goodsyntax, S2_tableidxd, + DEC3_CONST_SIGNED>; /******************************************************************** * STYPE/VH * *********************************************************************/ -// STYPE / VH / Vector absolute value halfwords. -// Rdd64=vabsh(Rss64) -def HEXAGON_A2_vabsh: - di_SInst_di <"vabsh", int_hexagon_A2_vabsh>; -def HEXAGON_A2_vabshsat: - di_SInst_di_sat <"vabsh", int_hexagon_A2_vabshsat>; - -// STYPE / VH / Vector shift halfwords by immediate. -// Rdd64=v[asl/asr/lsr]h(Rss64,Rt32) -def HEXAGON_S2_asl_i_vh: - di_SInst_disi <"vaslh", int_hexagon_S2_asl_i_vh>; -def HEXAGON_S2_asr_i_vh: - di_SInst_disi <"vasrh", int_hexagon_S2_asr_i_vh>; -def HEXAGON_S2_lsr_i_vh: - di_SInst_disi <"vlsrh", int_hexagon_S2_lsr_i_vh>; - -// STYPE / VH / Vector shift halfwords by register. -// Rdd64=v[asl/asr/lsl/lsr]w(Rss64,Rt32) -def HEXAGON_S2_asl_r_vh: - di_SInst_disi <"vaslh", int_hexagon_S2_asl_r_vh>; -def HEXAGON_S2_asr_r_vh: - di_SInst_disi <"vasrh", int_hexagon_S2_asr_r_vh>; -def HEXAGON_S2_lsl_r_vh: - di_SInst_disi <"vlslh", int_hexagon_S2_lsl_r_vh>; -def HEXAGON_S2_lsr_r_vh: - di_SInst_disi <"vlsrh", int_hexagon_S2_lsr_r_vh>; +// Vector absolute value halfwords with and without saturation +// Rdd64=vabsh(Rss64)[:sat] +def : T_P_pat <A2_vabsh, int_hexagon_A2_vabsh>; +def : T_P_pat <A2_vabshsat, int_hexagon_A2_vabshsat>; + +// Vector shift halfwords by immediate +// Rdd64=[vaslh/vasrh/vlsrh](Rss64,u4) +def : T_PI_pat <S2_asr_i_vh, int_hexagon_S2_asr_i_vh>; +def : T_PI_pat <S2_lsr_i_vh, int_hexagon_S2_lsr_i_vh>; +def : T_PI_pat <S2_asl_i_vh, int_hexagon_S2_asl_i_vh>; +// Vector shift halfwords by register +// Rdd64=[vaslw/vasrw/vlslw/vlsrw](Rss64,Rt32) +def : T_PR_pat <S2_asr_r_vh, int_hexagon_S2_asr_r_vh>; +def : T_PR_pat <S2_lsr_r_vh, int_hexagon_S2_lsr_r_vh>; +def : T_PR_pat <S2_asl_r_vh, int_hexagon_S2_asl_r_vh>; +def : T_PR_pat <S2_lsl_r_vh, int_hexagon_S2_lsl_r_vh>; /******************************************************************** * STYPE/VW * *********************************************************************/ -// STYPE / VW / Vector absolute value words. -def HEXAGON_A2_vabsw: - di_SInst_di <"vabsw", int_hexagon_A2_vabsw>; -def HEXAGON_A2_vabswsat: - di_SInst_di_sat <"vabsw", int_hexagon_A2_vabswsat>; - -// STYPE / VW / Vector shift words by immediate. -// Rdd64=v[asl/vsl]w(Rss64,Rt32) -def HEXAGON_S2_asl_i_vw: - di_SInst_disi <"vaslw", int_hexagon_S2_asl_i_vw>; -def HEXAGON_S2_asr_i_vw: - di_SInst_disi <"vasrw", int_hexagon_S2_asr_i_vw>; -def HEXAGON_S2_lsr_i_vw: - di_SInst_disi <"vlsrw", int_hexagon_S2_lsr_i_vw>; - -// STYPE / VW / Vector shift words by register. -// Rdd64=v[asl/vsl]w(Rss64,Rt32) -def HEXAGON_S2_asl_r_vw: - di_SInst_disi <"vaslw", int_hexagon_S2_asl_r_vw>; -def HEXAGON_S2_asr_r_vw: - di_SInst_disi <"vasrw", int_hexagon_S2_asr_r_vw>; -def HEXAGON_S2_lsl_r_vw: - di_SInst_disi <"vlslw", int_hexagon_S2_lsl_r_vw>; -def HEXAGON_S2_lsr_r_vw: - di_SInst_disi <"vlsrw", int_hexagon_S2_lsr_r_vw>; - -// STYPE / VW / Vector shift words with truncate and pack. -def HEXAGON_S2_asr_r_svw_trun: - si_SInst_disi <"vasrw", int_hexagon_S2_asr_r_svw_trun>; -def HEXAGON_S2_asr_i_svw_trun: - si_SInst_diu5 <"vasrw", int_hexagon_S2_asr_i_svw_trun>; - -// LD / Circular loads. -def HEXAGON_circ_ldd: - di_LDInstPI_diu4 <"circ_ldd", int_hexagon_circ_ldd>; +// Vector absolute value words with and without saturation +def : T_P_pat <A2_vabsw, int_hexagon_A2_vabsw>; +def : T_P_pat <A2_vabswsat, int_hexagon_A2_vabswsat>; + +// Vector shift words by immediate. +// Rdd64=[vasrw/vlsrw|vaslw](Rss64,u5) +def : T_PI_pat <S2_asr_i_vw, int_hexagon_S2_asr_i_vw>; +def : T_PI_pat <S2_lsr_i_vw, int_hexagon_S2_lsr_i_vw>; +def : T_PI_pat <S2_asl_i_vw, int_hexagon_S2_asl_i_vw>; + +// Vector shift words by register. +// Rdd64=[vasrw/vlsrw|vaslw|vlslw](Rss64,Rt32) +def : T_PR_pat <S2_asr_r_vw, int_hexagon_S2_asr_r_vw>; +def : T_PR_pat <S2_lsr_r_vw, int_hexagon_S2_lsr_r_vw>; +def : T_PR_pat <S2_asl_r_vw, int_hexagon_S2_asl_r_vw>; +def : T_PR_pat <S2_lsl_r_vw, int_hexagon_S2_lsl_r_vw>; + +// Vector shift words with truncate and pack + +def : T_PR_pat <S2_asr_r_svw_trun, int_hexagon_S2_asr_r_svw_trun>; + +def : T_R_pat<L2_loadw_locked, int_hexagon_L2_loadw_locked>; +def : T_R_pat<L4_loadd_locked, int_hexagon_L4_loadd_locked>; + +def: Pat<(i32 (int_hexagon_S2_storew_locked (I32:$Rs), (I32:$Rt))), + (i32 (C2_tfrpr (S2_storew_locked (I32:$Rs), (I32:$Rt))))>; +def: Pat<(i32 (int_hexagon_S4_stored_locked (I32:$Rs), (I64:$Rt))), + (i32 (C2_tfrpr (S4_stored_locked (I32:$Rs), (I64:$Rt))))>; include "HexagonIntrinsicsV3.td" include "HexagonIntrinsicsV4.td" diff --git a/lib/Target/Hexagon/HexagonIntrinsicsDerived.td b/lib/Target/Hexagon/HexagonIntrinsicsDerived.td index 2788101..4c28b28 100644 --- a/lib/Target/Hexagon/HexagonIntrinsicsDerived.td +++ b/lib/Target/Hexagon/HexagonIntrinsicsDerived.td @@ -13,13 +13,13 @@ // def : Pat <(mul DoubleRegs:$src1, DoubleRegs:$src2), (i64 - (COMBINE_rr - (HEXAGON_M2_maci - (HEXAGON_M2_maci + (A2_combinew + (M2_maci + (M2_maci (i32 (EXTRACT_SUBREG (i64 - (MPYU64 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), + (M2_dpmpyuu_s0 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)), (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_loreg)))), @@ -31,7 +31,8 @@ def : Pat <(mul DoubleRegs:$src1, DoubleRegs:$src2), (i32 (EXTRACT_SUBREG (i64 - (MPYU64 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)), + (M2_dpmpyuu_s0 + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)), (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_loreg)))), subreg_loreg))))>; diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV3.td b/lib/Target/Hexagon/HexagonIntrinsicsV3.td index 2a54e62..6152cb0 100644 --- a/lib/Target/Hexagon/HexagonIntrinsicsV3.td +++ b/lib/Target/Hexagon/HexagonIntrinsicsV3.td @@ -11,40 +11,17 @@ // //===----------------------------------------------------------------------===// - - - -// MTYPE / COMPLEX / Vector reduce complex multiply real or imaginary. -def Hexagon_M2_vrcmpys_s1: - di_MInst_disi_s1_sat <"vrcmpys", int_hexagon_M2_vrcmpys_s1>; -def Hexagon_M2_vrcmpys_acc_s1: - di_MInst_didisi_acc_s1_sat <"vrcmpys", int_hexagon_M2_vrcmpys_acc_s1>; -def Hexagon_M2_vrcmpys_s1rp: - si_MInst_disi_s1_rnd_sat <"vrcmpys", int_hexagon_M2_vrcmpys_s1rp>; - - - - -/******************************************************************** -* MTYPE/VB * -*********************************************************************/ - -// MTYPE / VB / Vector reduce add unsigned bytes. -def Hexagon_M2_vradduh: - si_MInst_didi <"vradduh", int_hexagon_M2_vradduh>; - - -/******************************************************************** -* ALU64/ALU * -*********************************************************************/ - -// ALU64 / ALU / Add. -def Hexagon_A2_addsp: - di_ALU64_sidi <"add", int_hexagon_A2_addsp>; -def Hexagon_A2_addpsat: - di_ALU64_didi <"add", int_hexagon_A2_addpsat>; - -def Hexagon_A2_maxp: - di_ALU64_didi <"max", int_hexagon_A2_maxp>; -def Hexagon_A2_maxup: - di_ALU64_didi <"maxu", int_hexagon_A2_maxup>; +// Vector reduce complex multiply real or imaginary +def : T_PR_pat <M2_vrcmpys_s1, int_hexagon_M2_vrcmpys_s1>; +def : T_PPR_pat<M2_vrcmpys_acc_s1, int_hexagon_M2_vrcmpys_acc_s1>; +def : T_PR_pat <M2_vrcmpys_s1rp, int_hexagon_M2_vrcmpys_s1rp>; + +// Vector reduce add unsigned halfwords +def : T_PP_pat<M2_vradduh, int_hexagon_M2_vradduh>; + +def: T_RP_pat<A2_addsp, int_hexagon_A2_addsp>; +def: T_PP_pat<A2_addpsat, int_hexagon_A2_addpsat>; +def: T_PP_pat<A2_minp, int_hexagon_A2_minp>; +def: T_PP_pat<A2_minup, int_hexagon_A2_minup>; +def: T_PP_pat<A2_maxp, int_hexagon_A2_maxp>; +def: T_PP_pat<A2_maxup, int_hexagon_A2_maxup>; diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV4.td b/lib/Target/Hexagon/HexagonIntrinsicsV4.td index 77b148b..8d068eb 100644 --- a/lib/Target/Hexagon/HexagonIntrinsicsV4.td +++ b/lib/Target/Hexagon/HexagonIntrinsicsV4.td @@ -12,359 +12,307 @@ // 80-V9418-12 Rev. A // June 15, 2010 +// Vector reduce multiply word by signed half (32x16) +//Rdd=vrmpyweh(Rss,Rtt)[:<<1] +def : T_PP_pat <M4_vrmpyeh_s0, int_hexagon_M4_vrmpyeh_s0>; +def : T_PP_pat <M4_vrmpyeh_s1, int_hexagon_M4_vrmpyeh_s1>; + +//Rdd=vrmpywoh(Rss,Rtt)[:<<1] +def : T_PP_pat <M4_vrmpyoh_s0, int_hexagon_M4_vrmpyoh_s0>; +def : T_PP_pat <M4_vrmpyoh_s1, int_hexagon_M4_vrmpyoh_s1>; + +//Rdd+=vrmpyweh(Rss,Rtt)[:<<1] +def : T_PPP_pat <M4_vrmpyeh_acc_s0, int_hexagon_M4_vrmpyeh_acc_s0>; +def : T_PPP_pat <M4_vrmpyeh_acc_s1, int_hexagon_M4_vrmpyeh_acc_s1>; + +//Rdd=vrmpywoh(Rss,Rtt)[:<<1] +def : T_PPP_pat <M4_vrmpyoh_acc_s0, int_hexagon_M4_vrmpyoh_acc_s0>; +def : T_PPP_pat <M4_vrmpyoh_acc_s1, int_hexagon_M4_vrmpyoh_acc_s1>; + +// Vector multiply halfwords, signed by unsigned +// Rdd=vmpyhsu(Rs,Rt)[:<<1]:sat +def : T_RR_pat <M2_vmpy2su_s0, int_hexagon_M2_vmpy2su_s0>; +def : T_RR_pat <M2_vmpy2su_s1, int_hexagon_M2_vmpy2su_s1>; + +// Rxx+=vmpyhsu(Rs,Rt)[:<<1]:sat +def : T_PRR_pat <M2_vmac2su_s0, int_hexagon_M2_vmac2su_s0>; +def : T_PRR_pat <M2_vmac2su_s1, int_hexagon_M2_vmac2su_s1>; + +// Vector polynomial multiply halfwords +// Rdd=vpmpyh(Rs,Rt) +def : T_RR_pat <M4_vpmpyh, int_hexagon_M4_vpmpyh>; +// Rxx[^]=vpmpyh(Rs,Rt) +def : T_PRR_pat <M4_vpmpyh_acc, int_hexagon_M4_vpmpyh_acc>; + +// Polynomial multiply words +// Rdd=pmpyw(Rs,Rt) +def : T_RR_pat <M4_pmpyw, int_hexagon_M4_pmpyw>; +// Rxx^=pmpyw(Rs,Rt) +def : T_PRR_pat <M4_pmpyw_acc, int_hexagon_M4_pmpyw_acc>; + +//Rxx^=asr(Rss,Rt) +def : T_PPR_pat <S2_asr_r_p_xor, int_hexagon_S2_asr_r_p_xor>; +//Rxx^=asl(Rss,Rt) +def : T_PPR_pat <S2_asl_r_p_xor, int_hexagon_S2_asl_r_p_xor>; +//Rxx^=lsr(Rss,Rt) +def : T_PPR_pat <S2_lsr_r_p_xor, int_hexagon_S2_lsr_r_p_xor>; +//Rxx^=lsl(Rss,Rt) +def : T_PPR_pat <S2_lsl_r_p_xor, int_hexagon_S2_lsl_r_p_xor>; + +// Multiply and use upper result +def : MType_R32_pat <int_hexagon_M2_mpysu_up, M2_mpysu_up>; +def : MType_R32_pat <int_hexagon_M2_mpy_up_s1, M2_mpy_up_s1>; +def : MType_R32_pat <int_hexagon_M2_hmmpyh_s1, M2_hmmpyh_s1>; +def : MType_R32_pat <int_hexagon_M2_hmmpyl_s1, M2_hmmpyl_s1>; +def : MType_R32_pat <int_hexagon_M2_mpy_up_s1_sat, M2_mpy_up_s1_sat>; + +// Vector reduce add unsigned halfwords +def : Pat <(int_hexagon_M2_vraddh DoubleRegs:$src1, DoubleRegs:$src2), + (M2_vraddh DoubleRegs:$src1, DoubleRegs:$src2)>; + +def : T_P_pat <S2_brevp, int_hexagon_S2_brevp>; + +def: T_P_pat <S2_ct0p, int_hexagon_S2_ct0p>; +def: T_P_pat <S2_ct1p, int_hexagon_S2_ct1p>; +def: T_RR_pat<C4_nbitsset, int_hexagon_C4_nbitsset>; +def: T_RR_pat<C4_nbitsclr, int_hexagon_C4_nbitsclr>; +def: T_RI_pat<C4_nbitsclri, int_hexagon_C4_nbitsclri>; + + +class vcmpImm_pat <InstHexagon MI, Intrinsic IntID, PatLeaf immPred> : + Pat <(IntID (i64 DoubleRegs:$src1), immPred:$src2), + (MI (i64 DoubleRegs:$src1), immPred:$src2)>; + +def : vcmpImm_pat <A4_vcmpbeqi, int_hexagon_A4_vcmpbeqi, u8ImmPred>; +def : vcmpImm_pat <A4_vcmpbgti, int_hexagon_A4_vcmpbgti, s8ImmPred>; +def : vcmpImm_pat <A4_vcmpbgtui, int_hexagon_A4_vcmpbgtui, u7ImmPred>; + +def : vcmpImm_pat <A4_vcmpheqi, int_hexagon_A4_vcmpheqi, s8ImmPred>; +def : vcmpImm_pat <A4_vcmphgti, int_hexagon_A4_vcmphgti, s8ImmPred>; +def : vcmpImm_pat <A4_vcmphgtui, int_hexagon_A4_vcmphgtui, u7ImmPred>; + +def : vcmpImm_pat <A4_vcmpweqi, int_hexagon_A4_vcmpweqi, s8ImmPred>; +def : vcmpImm_pat <A4_vcmpwgti, int_hexagon_A4_vcmpwgti, s8ImmPred>; +def : vcmpImm_pat <A4_vcmpwgtui, int_hexagon_A4_vcmpwgtui, u7ImmPred>; + +def : T_PP_pat<A4_vcmpbeq_any, int_hexagon_A4_vcmpbeq_any>; + +def : T_RR_pat<A4_cmpbeq, int_hexagon_A4_cmpbeq>; +def : T_RR_pat<A4_cmpbgt, int_hexagon_A4_cmpbgt>; +def : T_RR_pat<A4_cmpbgtu, int_hexagon_A4_cmpbgtu>; +def : T_RR_pat<A4_cmpheq, int_hexagon_A4_cmpheq>; +def : T_RR_pat<A4_cmphgt, int_hexagon_A4_cmphgt>; +def : T_RR_pat<A4_cmphgtu, int_hexagon_A4_cmphgtu>; + +def : T_RI_pat<A4_cmpbeqi, int_hexagon_A4_cmpbeqi>; +def : T_RI_pat<A4_cmpbgti, int_hexagon_A4_cmpbgti>; +def : T_RI_pat<A4_cmpbgtui, int_hexagon_A4_cmpbgtui>; + +def : T_RI_pat<A4_cmpheqi, int_hexagon_A4_cmpheqi>; +def : T_RI_pat<A4_cmphgti, int_hexagon_A4_cmphgti>; +def : T_RI_pat<A4_cmphgtui, int_hexagon_A4_cmphgtui>; + +def : T_RP_pat <A4_boundscheck, int_hexagon_A4_boundscheck>; + +def : T_PR_pat<A4_tlbmatch, int_hexagon_A4_tlbmatch>; + +def : Pat <(int_hexagon_M4_mpyrr_addr IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3), + (M4_mpyrr_addr IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +def : T_IRR_pat <M4_mpyrr_addi, int_hexagon_M4_mpyrr_addi>; +def : T_IRI_pat <M4_mpyri_addi, int_hexagon_M4_mpyri_addi>; +def : T_RIR_pat <M4_mpyri_addr_u2, int_hexagon_M4_mpyri_addr_u2>; +def : T_RRI_pat <M4_mpyri_addr, int_hexagon_M4_mpyri_addr>; +// Multiply 32x32 and use upper result +def : T_RRR_pat <M4_mac_up_s1_sat, int_hexagon_M4_mac_up_s1_sat>; +def : T_RRR_pat <M4_nac_up_s1_sat, int_hexagon_M4_nac_up_s1_sat>; + +// Complex multiply 32x16 +def : T_PR_pat <M4_cmpyi_wh, int_hexagon_M4_cmpyi_wh>; +def : T_PR_pat <M4_cmpyr_wh, int_hexagon_M4_cmpyr_wh>; + +def : T_PR_pat <M4_cmpyi_whc, int_hexagon_M4_cmpyi_whc>; +def : T_PR_pat <M4_cmpyr_whc, int_hexagon_M4_cmpyr_whc>; + +def : T_PP_pat<A4_andnp, int_hexagon_A4_andnp>; +def : T_PP_pat<A4_ornp, int_hexagon_A4_ornp>; + +// Complex add/sub halfwords/words +def : T_PP_pat <S4_vxaddsubw, int_hexagon_S4_vxaddsubw>; +def : T_PP_pat <S4_vxsubaddw, int_hexagon_S4_vxsubaddw>; +def : T_PP_pat <S4_vxaddsubh, int_hexagon_S4_vxaddsubh>; +def : T_PP_pat <S4_vxsubaddh, int_hexagon_S4_vxsubaddh>; + +def : T_PP_pat <S4_vxaddsubhr, int_hexagon_S4_vxaddsubhr>; +def : T_PP_pat <S4_vxsubaddhr, int_hexagon_S4_vxsubaddhr>; + +// Extract bitfield +def : T_PP_pat <S4_extractp_rp, int_hexagon_S4_extractp_rp>; +def : T_RP_pat <S4_extract_rp, int_hexagon_S4_extract_rp>; +def : T_PII_pat <S4_extractp, int_hexagon_S4_extractp>; +def : T_RII_pat <S4_extract, int_hexagon_S4_extract>; + +// Vector conditional negate +// Rdd=vcnegh(Rss,Rt) +def : T_PR_pat <S2_vcnegh, int_hexagon_S2_vcnegh>; + +// Shift an immediate left by register amount +def : T_IR_pat<S4_lsli, int_hexagon_S4_lsli>; + +// Vector reduce maximum halfwords +def : T_PPR_pat <A4_vrmaxh, int_hexagon_A4_vrmaxh>; +def : T_PPR_pat <A4_vrmaxuh, int_hexagon_A4_vrmaxuh>; -// -// ALU 32 types. -// - -class si_ALU32_sisi_not<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, ~$src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class di_ALU32_s8si<string opc, Intrinsic IntID> - : ALU32_rr<(outs DoubleRegs:$dst), (ins s8Imm:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "(#$src1, $src2)")), - [(set DoubleRegs:$dst, (IntID imm:$src1, IntRegs:$src2))]>; +// Vector reduce maximum words +def : T_PPR_pat <A4_vrmaxw, int_hexagon_A4_vrmaxw>; +def : T_PPR_pat <A4_vrmaxuw, int_hexagon_A4_vrmaxuw>; -class di_ALU32_sis8<string opc, Intrinsic IntID> - : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; +// Vector reduce minimum halfwords +def : T_PPR_pat <A4_vrminh, int_hexagon_A4_vrminh>; +def : T_PPR_pat <A4_vrminuh, int_hexagon_A4_vrminuh>; -class qi_neg_ALU32_sisi<string opc, Intrinsic IntID> - : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; +// Vector reduce minimum words +def : T_PPR_pat <A4_vrminw, int_hexagon_A4_vrminw>; +def : T_PPR_pat <A4_vrminuw, int_hexagon_A4_vrminuw>; -class qi_neg_ALU32_sis10<string opc, Intrinsic IntID> - : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2), - !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; +// Rotate and reduce bytes +def : Pat <(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, + u2ImmPred:$src3), + (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2ImmPred:$src3)>; + +// Rotate and reduce bytes with accumulation +// Rxx+=vrcrotate(Rss,Rt,#u2) +def : Pat <(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, + IntRegs:$src3, u2ImmPred:$src4), + (S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, + IntRegs:$src3, u2ImmPred:$src4)>; + +// Vector conditional negate +def : T_PPR_pat<S2_vrcnegh, int_hexagon_S2_vrcnegh>; -class qi_neg_ALU32_siu9<string opc, Intrinsic IntID> - : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u9Imm:$src2), - !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; +// Logical xor with xor accumulation +def : T_PPP_pat<M4_xor_xacc, int_hexagon_M4_xor_xacc>; + +// ALU64 - Vector min/max byte +def : T_PP_pat <A2_vminb, int_hexagon_A2_vminb>; +def : T_PP_pat <A2_vmaxb, int_hexagon_A2_vmaxb>; + +// Shift and add/sub/and/or +def : T_IRI_pat <S4_andi_asl_ri, int_hexagon_S4_andi_asl_ri>; +def : T_IRI_pat <S4_ori_asl_ri, int_hexagon_S4_ori_asl_ri>; +def : T_IRI_pat <S4_addi_asl_ri, int_hexagon_S4_addi_asl_ri>; +def : T_IRI_pat <S4_subi_asl_ri, int_hexagon_S4_subi_asl_ri>; +def : T_IRI_pat <S4_andi_lsr_ri, int_hexagon_S4_andi_lsr_ri>; +def : T_IRI_pat <S4_ori_lsr_ri, int_hexagon_S4_ori_lsr_ri>; +def : T_IRI_pat <S4_addi_lsr_ri, int_hexagon_S4_addi_lsr_ri>; +def : T_IRI_pat <S4_subi_lsr_ri, int_hexagon_S4_subi_lsr_ri>; -class si_neg_ALU32_sisi<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class si_neg_ALU32_sis8<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2), - !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - -class si_ALU32_sis8<string opc, Intrinsic IntID> - : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - - -// -// SInst Classes. -// -class qi_neg_SInst_qiqi<string opc, Intrinsic IntID> - : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class qi_SInst_qi_andqiqi_neg<string opc, Intrinsic IntID> - : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, - IntRegs:$src3), - !strconcat("$dst = ", !strconcat(opc , - "($src1, and($src2, !$src3)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, - IntRegs:$src3))]>; - -class qi_SInst_qi_andqiqi<string opc, Intrinsic IntID> - : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, - IntRegs:$src3), - !strconcat("$dst = ", !strconcat(opc , - "($src1, and($src2, $src3)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, - IntRegs:$src3))]>; - -class qi_SInst_qi_orqiqi_neg<string opc, Intrinsic IntID> - : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, - IntRegs:$src3), - !strconcat("$dst = ", !strconcat(opc , - "($src1, or($src2, !$src3)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, - IntRegs:$src3))]>; - -class qi_SInst_qi_orqiqi<string opc, Intrinsic IntID> - : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, - IntRegs:$src3), - !strconcat("$dst = ", !strconcat(opc , - "($src1, or($src2, $src3)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, - IntRegs:$src3))]>; - -class si_SInst_si_addsis6<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, s6Imm:$src3), - !strconcat("$dst = ", !strconcat(opc , - "($src1, add($src2, #$src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, - imm:$src3))]>; - -class si_SInst_si_subs6si<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3), - !strconcat("$dst = ", !strconcat(opc , - "($src1, sub(#$src2, $src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, - IntRegs:$src3))]>; - -class di_ALU64_didi_neg<string opc, Intrinsic IntID> - : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, ~$src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; - -class di_MInst_dididi_xacc<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - DoubleRegs:$src2), - !strconcat("$dst ^= ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, - DoubleRegs:$src2))], - "$dst2 = $dst">; - -class si_MInst_sisisi_and<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, - IntRegs:$src3), - !strconcat("$dst &= ", !strconcat(opc , "($src2, $src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, - IntRegs:$src3))]>; - -class si_MInst_sisisi_andn<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, - IntRegs:$src3), - !strconcat("$dst &= ", !strconcat(opc , "($src2, ~$src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, - IntRegs:$src3))]>; - -class si_SInst_sisis10_andi<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, s10Imm:$src3), - !strconcat("$dst = ", !strconcat(opc , - "($src1, and($src2, #$src3))")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, - imm:$src3))]>; - -class si_MInst_sisisi_xor<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, - IntRegs:$src3), - !strconcat("$dst ^= ", !strconcat(opc , "($src2, $src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, - IntRegs:$src3))]>; - -class si_MInst_sisisi_xorn<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, - IntRegs:$src3), - !strconcat("$dst ^= ", !strconcat(opc , "($src2, ~$src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, - IntRegs:$src3))]>; - -class si_SInst_sisis10_or<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, s10Imm:$src3), - !strconcat("$dst |= ", !strconcat(opc , "($src2, #$src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, - imm:$src3))]>; - -class si_MInst_sisisi_or<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, - IntRegs:$src3), - !strconcat("$dst |= ", !strconcat(opc , "($src2, $src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, - IntRegs:$src3))]>; - -class si_MInst_sisisi_orn<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, - IntRegs:$src3), - !strconcat("$dst |= ", !strconcat(opc , "($src2, ~$src3)")), - [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, - IntRegs:$src3))]>; - -class si_SInst_siu5_sat<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):sat")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; +// Split bitfield +def : T_RI_pat <A4_bitspliti, int_hexagon_A4_bitspliti>; +def : T_RR_pat <A4_bitsplit, int_hexagon_A4_bitsplit>; +def: T_RR_pat<S4_parity, int_hexagon_S4_parity>; + +def: T_RI_pat<S4_ntstbit_i, int_hexagon_S4_ntstbit_i>; +def: T_RR_pat<S4_ntstbit_r, int_hexagon_S4_ntstbit_r>; + +def: T_RI_pat<S4_clbaddi, int_hexagon_S4_clbaddi>; +def: T_PI_pat<S4_clbpaddi, int_hexagon_S4_clbpaddi>; +def: T_P_pat <S4_clbpnorm, int_hexagon_S4_clbpnorm>; /******************************************************************** * ALU32/ALU * *********************************************************************/ // ALU32 / ALU / Logical Operations. -def Hexagon_A4_orn : si_ALU32_sisi_not <"or", int_hexagon_A4_orn>; -def Hexagon_A4_andn : si_ALU32_sisi_not <"and", int_hexagon_A4_andn>; - +def: T_RR_pat<A4_andn, int_hexagon_A4_andn>; +def: T_RR_pat<A4_orn, int_hexagon_A4_orn>; /******************************************************************** * ALU32/PERM * *********************************************************************/ -// ALU32 / PERM / Combine Words Into Doublewords. -def Hexagon_A4_combineir : di_ALU32_s8si <"combine", int_hexagon_A4_combineir>; -def Hexagon_A4_combineri : di_ALU32_sis8 <"combine", int_hexagon_A4_combineri>; - +// Combine Words Into Doublewords. +def: T_RI_pat<A4_combineri, int_hexagon_A4_combineri, s8ExtPred>; +def: T_IR_pat<A4_combineir, int_hexagon_A4_combineir, s8ExtPred>; /******************************************************************** * ALU32/PRED * *********************************************************************/ -// ALU32 / PRED / Conditional Shift Halfword. -// ALU32 / PRED / Conditional Sign Extend. -// ALU32 / PRED / Conditional Zero Extend. -// ALU32 / PRED / Compare. -def Hexagon_C4_cmpltei : qi_neg_ALU32_sis10 <"cmp.gt", int_hexagon_C4_cmpltei>; -def Hexagon_C4_cmplte : qi_neg_ALU32_sisi <"cmp.gt", int_hexagon_C4_cmplte>; -def Hexagon_C4_cmplteu : qi_neg_ALU32_sisi <"cmp.gtu",int_hexagon_C4_cmplteu>; +// Compare +def : T_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi, s10ExtPred>; +def : T_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei, s10ExtPred>; +def : T_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui, u9ExtPred>; -def: T_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi>; -def: T_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei>; -def: T_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui>; - -// ALU32 / PRED / cmpare To General Register. -def Hexagon_A4_rcmpneq : si_neg_ALU32_sisi <"cmp.eq", int_hexagon_A4_rcmpneq>; -def Hexagon_A4_rcmpneqi: si_neg_ALU32_sis8 <"cmp.eq", int_hexagon_A4_rcmpneqi>; -def Hexagon_A4_rcmpeq : si_ALU32_sisi <"cmp.eq", int_hexagon_A4_rcmpeq>; -def Hexagon_A4_rcmpeqi : si_ALU32_sis8 <"cmp.eq", int_hexagon_A4_rcmpeqi>; +def: T_RR_pat<A4_rcmpeq, int_hexagon_A4_rcmpeq>; +def: T_RR_pat<A4_rcmpneq, int_hexagon_A4_rcmpneq>; +def: T_RI_pat<A4_rcmpeqi, int_hexagon_A4_rcmpeqi>; +def: T_RI_pat<A4_rcmpneqi, int_hexagon_A4_rcmpneqi>; /******************************************************************** * CR * *********************************************************************/ -// CR / Corner Detection Acceleration. -def Hexagon_C4_fastcorner9: - qi_SInst_qiqi<"fastcorner9", int_hexagon_C4_fastcorner9>; -def Hexagon_C4_fastcorner9_not: - qi_neg_SInst_qiqi<"fastcorner9",int_hexagon_C4_fastcorner9_not>; - // CR / Logical Operations On Predicates. -def Hexagon_C4_and_andn: - qi_SInst_qi_andqiqi_neg <"and", int_hexagon_C4_and_andn>; -def Hexagon_C4_and_and: - qi_SInst_qi_andqiqi <"and", int_hexagon_C4_and_and>; -def Hexagon_C4_and_orn: - qi_SInst_qi_orqiqi_neg <"and", int_hexagon_C4_and_orn>; -def Hexagon_C4_and_or: - qi_SInst_qi_orqiqi <"and", int_hexagon_C4_and_or>; -def Hexagon_C4_or_andn: - qi_SInst_qi_andqiqi_neg <"or", int_hexagon_C4_or_andn>; -def Hexagon_C4_or_and: - qi_SInst_qi_andqiqi <"or", int_hexagon_C4_or_and>; -def Hexagon_C4_or_orn: - qi_SInst_qi_orqiqi_neg <"or", int_hexagon_C4_or_orn>; -def Hexagon_C4_or_or: - qi_SInst_qi_orqiqi <"or", int_hexagon_C4_or_or>; +class qi_CRInst_qiqiqi_pat<Intrinsic IntID, InstHexagon Inst> : + Pat<(i32 (IntID IntRegs:$Rs, IntRegs:$Rt, IntRegs:$Ru)), + (i32 (C2_tfrpr (Inst (C2_tfrrp IntRegs:$Rs), + (C2_tfrrp IntRegs:$Rt), + (C2_tfrrp IntRegs:$Ru))))>; + +def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_and, C4_and_and>; +def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_andn, C4_and_andn>; +def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_or, C4_and_or>; +def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_orn, C4_and_orn>; +def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_and, C4_or_and>; +def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_andn, C4_or_andn>; +def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_or, C4_or_or>; +def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_orn, C4_or_orn>; /******************************************************************** * XTYPE/ALU * *********************************************************************/ -// XTYPE / ALU / Add And Accumulate. -def Hexagon_S4_addaddi: - si_SInst_si_addsis6 <"add", int_hexagon_S4_addaddi>; -def Hexagon_S4_subaddi: - si_SInst_si_subs6si <"add", int_hexagon_S4_subaddi>; +// Add And Accumulate. -// XTYPE / ALU / Logical Doublewords. -def Hexagon_S4_andnp: - di_ALU64_didi_neg <"and", int_hexagon_A4_andnp>; -def Hexagon_S4_ornp: - di_ALU64_didi_neg <"or", int_hexagon_A4_ornp>; +def : T_RRI_pat <S4_addaddi, int_hexagon_S4_addaddi>; +def : T_RIR_pat <S4_subaddi, int_hexagon_S4_subaddi>; -// XTYPE / ALU / Logical-logical Doublewords. -def Hexagon_M4_xor_xacc: - di_MInst_dididi_xacc <"xor", int_hexagon_M4_xor_xacc>; // XTYPE / ALU / Logical-logical Words. -def HEXAGON_M4_and_and: - si_MInst_sisisi_and <"and", int_hexagon_M4_and_and>; -def HEXAGON_M4_and_or: - si_MInst_sisisi_and <"or", int_hexagon_M4_and_or>; -def HEXAGON_M4_and_xor: - si_MInst_sisisi_and <"xor", int_hexagon_M4_and_xor>; -def HEXAGON_M4_and_andn: - si_MInst_sisisi_andn <"and", int_hexagon_M4_and_andn>; -def HEXAGON_M4_xor_and: - si_MInst_sisisi_xor <"and", int_hexagon_M4_xor_and>; -def HEXAGON_M4_xor_or: - si_MInst_sisisi_xor <"or", int_hexagon_M4_xor_or>; -def HEXAGON_M4_xor_andn: - si_MInst_sisisi_xorn <"and", int_hexagon_M4_xor_andn>; -def HEXAGON_M4_or_and: - si_MInst_sisisi_or <"and", int_hexagon_M4_or_and>; -def HEXAGON_M4_or_or: - si_MInst_sisisi_or <"or", int_hexagon_M4_or_or>; -def HEXAGON_M4_or_xor: - si_MInst_sisisi_or <"xor", int_hexagon_M4_or_xor>; -def HEXAGON_M4_or_andn: - si_MInst_sisisi_orn <"and", int_hexagon_M4_or_andn>; -def HEXAGON_S4_or_andix: - si_SInst_sisis10_andi <"or", int_hexagon_S4_or_andix>; -def HEXAGON_S4_or_andi: - si_SInst_sisis10_or <"and", int_hexagon_S4_or_andi>; -def HEXAGON_S4_or_ori: - si_SInst_sisis10_or <"or", int_hexagon_S4_or_ori>; - -// XTYPE / ALU / Modulo wrap. -def HEXAGON_A4_modwrapu: - si_ALU64_sisi <"modwrap", int_hexagon_A4_modwrapu>; - -// XTYPE / ALU / Round. -def HEXAGON_A4_cround_ri: - si_SInst_siu5 <"cround", int_hexagon_A4_cround_ri>; -def HEXAGON_A4_cround_rr: - si_SInst_sisi <"cround", int_hexagon_A4_cround_rr>; -def HEXAGON_A4_round_ri: - si_SInst_siu5 <"round", int_hexagon_A4_round_ri>; -def HEXAGON_A4_round_rr: - si_SInst_sisi <"round", int_hexagon_A4_round_rr>; -def HEXAGON_A4_round_ri_sat: - si_SInst_siu5_sat <"round", int_hexagon_A4_round_ri_sat>; -def HEXAGON_A4_round_rr_sat: - si_SInst_sisi_sat <"round", int_hexagon_A4_round_rr_sat>; - -// XTYPE / ALU / Vector reduce add unsigned halfwords. -// XTYPE / ALU / Vector add bytes. -// XTYPE / ALU / Vector conditional negate. -// XTYPE / ALU / Vector maximum bytes. -// XTYPE / ALU / Vector reduce maximum halfwords. -// XTYPE / ALU / Vector reduce maximum words. -// XTYPE / ALU / Vector minimum bytes. -// XTYPE / ALU / Vector reduce minimum halfwords. -// XTYPE / ALU / Vector reduce minimum words. -// XTYPE / ALU / Vector subtract bytes. - - -/******************************************************************** -* XTYPE/BIT * -*********************************************************************/ - -// XTYPE / BIT / Count leading. -// XTYPE / BIT / Count trailing. -// XTYPE / BIT / Extract bitfield. -// XTYPE / BIT / Masked parity. -// XTYPE / BIT / Bit reverse. -// XTYPE / BIT / Split bitfield. - - -/******************************************************************** -* XTYPE/COMPLEX * -*********************************************************************/ - -// XTYPE / COMPLEX / Complex add/sub halfwords. -// XTYPE / COMPLEX / Complex add/sub words. -// XTYPE / COMPLEX / Complex multiply 32x16. -// XTYPE / COMPLEX / Vector reduce complex rotate. - - -/******************************************************************** -* XTYPE/MPY * -*********************************************************************/ - -// XTYPE / COMPLEX / Complex add/sub halfwords. +def : T_RRR_pat <M4_or_xor, int_hexagon_M4_or_xor>; +def : T_RRR_pat <M4_and_xor, int_hexagon_M4_and_xor>; +def : T_RRR_pat <M4_or_and, int_hexagon_M4_or_and>; +def : T_RRR_pat <M4_and_and, int_hexagon_M4_and_and>; +def : T_RRR_pat <M4_xor_and, int_hexagon_M4_xor_and>; +def : T_RRR_pat <M4_or_or, int_hexagon_M4_or_or>; +def : T_RRR_pat <M4_and_or, int_hexagon_M4_and_or>; +def : T_RRR_pat <M4_xor_or, int_hexagon_M4_xor_or>; +def : T_RRR_pat <M4_or_andn, int_hexagon_M4_or_andn>; +def : T_RRR_pat <M4_and_andn, int_hexagon_M4_and_andn>; +def : T_RRR_pat <M4_xor_andn, int_hexagon_M4_xor_andn>; + +def : T_RRI_pat <S4_or_andi, int_hexagon_S4_or_andi>; +def : T_RRI_pat <S4_or_andix, int_hexagon_S4_or_andix>; +def : T_RRI_pat <S4_or_ori, int_hexagon_S4_or_ori>; + +// Modulo wrap. +def : T_RR_pat <A4_modwrapu, int_hexagon_A4_modwrapu>; + +// Arithmetic/Convergent round +// Rd=[cround|round](Rs,Rt)[:sat] +// Rd=[cround|round](Rs,#u5)[:sat] +def : T_RI_pat <A4_cround_ri, int_hexagon_A4_cround_ri>; +def : T_RR_pat <A4_cround_rr, int_hexagon_A4_cround_rr>; + +def : T_RI_pat <A4_round_ri, int_hexagon_A4_round_ri>; +def : T_RR_pat <A4_round_rr, int_hexagon_A4_round_rr>; + +def : T_RI_pat <A4_round_ri_sat, int_hexagon_A4_round_ri_sat>; +def : T_RR_pat <A4_round_rr_sat, int_hexagon_A4_round_rr_sat>; + +def : T_P_pat <A2_roundsat, int_hexagon_A2_roundsat>; diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV5.td b/lib/Target/Hexagon/HexagonIntrinsicsV5.td index 1d44b52..60e6b1e 100644 --- a/lib/Target/Hexagon/HexagonIntrinsicsV5.td +++ b/lib/Target/Hexagon/HexagonIntrinsicsV5.td @@ -1,395 +1,111 @@ -class sf_SInst_sf<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1), - !strconcat("$dst = ", !strconcat(opc , "($src1)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1))]>; - -class si_SInst_sf<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1), - !strconcat("$dst = ", !strconcat(opc , "($src1)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1))]>; - -class sf_SInst_si<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1), - !strconcat("$dst = ", !strconcat(opc , "($src1)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1))]>; - -class sf_SInst_di<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1), - !strconcat("$dst = ", !strconcat(opc , "($src1)")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src1))]>; - -class sf_SInst_df<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1), - !strconcat("$dst = ", !strconcat(opc , "($src1)")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src1))]>; - -class si_SInst_df<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1), - !strconcat("$dst = ", !strconcat(opc , "($src1)")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src1))]>; - -class df_SInst_sf<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1), - !strconcat("$dst = ", !strconcat(opc , "($src1)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>; - -class di_SInst_sf<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1), - !strconcat("$dst = ", !strconcat(opc , "($src1)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>; - -class df_SInst_si<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1), - !strconcat("$dst = ", !strconcat(opc , "($src1)")), - [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>; - -class df_SInst_df<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1), - !strconcat("$dst = ", !strconcat(opc , "($src1)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1))]>; - -class di_SInst_df<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1), - !strconcat("$dst = ", !strconcat(opc , "($src1)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1))]>; - - -class df_SInst_di<string opc, Intrinsic IntID> - : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1), - !strconcat("$dst = ", !strconcat(opc , "($src1)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1))]>; - -class sf_MInst_sfsf<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class df_MInst_dfdf<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; - -class qi_ALU64_dfdf<string opc, Intrinsic IntID> - : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set PredRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; - -class qi_ALU64_dfu5<string opc, Intrinsic IntID> - : ALU64_ri<(outs PredRegs:$dst), (ins DoubleRegs:$src1, u5Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set PredRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>; - - -class sf_MInst_sfsfsf_acc<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, - IntRegs:$dst2), - !strconcat("$dst += ", !strconcat(opc , - "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, - IntRegs:$src2, IntRegs:$dst2))], - "$dst2 = $dst">; - -class sf_MInst_sfsfsf_nac<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, - IntRegs:$dst2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1, $src2)")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, - IntRegs:$src2, IntRegs:$dst2))], - "$dst2 = $dst">; - - -class sf_MInst_sfsfsfsi_sc<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2, IntRegs:$src3), - !strconcat("$dst += ", !strconcat(opc , - "($src1, $src2, $src3):scale")), - [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, - IntRegs:$src2, IntRegs:$src3))], - "$dst2 = $dst">; - -class sf_MInst_sfsfsf_acc_lib<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, - IntRegs:$dst2), - !strconcat("$dst += ", !strconcat(opc , - "($src1, $src2):lib")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, - IntRegs:$src2, IntRegs:$dst2))], - "$dst2 = $dst">; - -class sf_MInst_sfsfsf_nac_lib<string opc, Intrinsic IntID> - : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, - IntRegs:$dst2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1, $src2):lib")), - [(set IntRegs:$dst, (IntID IntRegs:$src1, - IntRegs:$src2, IntRegs:$dst2))], - "$dst2 = $dst">; - -class df_MInst_dfdfdf_acc<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, - DoubleRegs:$dst2), - !strconcat("$dst += ", !strconcat(opc , - "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2, DoubleRegs:$dst2))], - "$dst2 = $dst">; - -class df_MInst_dfdfdf_nac<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, - DoubleRegs:$dst2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1, $src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2, DoubleRegs:$dst2))], - "$dst2 = $dst">; - - -class df_MInst_dfdfdfsi_sc<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, - DoubleRegs:$src2, IntRegs:$src3), - !strconcat("$dst += ", !strconcat(opc , - "($src1, $src2, $src3):scale")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, - DoubleRegs:$src2, IntRegs:$src3))], - "$dst2 = $dst">; - -class df_MInst_dfdfdf_acc_lib<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, - DoubleRegs:$dst2), - !strconcat("$dst += ", !strconcat(opc , - "($src1, $src2):lib")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2, DoubleRegs:$dst2))], - "$dst2 = $dst">; - -class df_MInst_dfdfdf_nac_lib<string opc, Intrinsic IntID> - : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, - DoubleRegs:$dst2), - !strconcat("$dst -= ", !strconcat(opc , - "($src1, $src2):lib")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, - DoubleRegs:$src2, DoubleRegs:$dst2))], - "$dst2 = $dst">; - -class qi_SInst_sfsf<string opc, Intrinsic IntID> - : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; - -class qi_SInst_sfu5<string opc, Intrinsic IntID> - : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; - -class sf_ALU64_u10_pos<string opc, Intrinsic IntID> - : ALU64_ri<(outs IntRegs:$dst), (ins u10Imm:$src1), - !strconcat("$dst = ", !strconcat(opc , "#$src1):pos")), - [(set IntRegs:$dst, (IntID imm:$src1))]>; - -class sf_ALU64_u10_neg<string opc, Intrinsic IntID> - : ALU64_ri<(outs IntRegs:$dst), (ins u10Imm:$src1), - !strconcat("$dst = ", !strconcat(opc , "#$src1):neg")), - [(set IntRegs:$dst, (IntID imm:$src1))]>; - -class df_ALU64_u10_pos<string opc, Intrinsic IntID> - : ALU64_ri<(outs DoubleRegs:$dst), (ins u10Imm:$src1), - !strconcat("$dst = ", !strconcat(opc , "#$src1):pos")), - [(set DoubleRegs:$dst, (IntID imm:$src1))]>; - -class df_ALU64_u10_neg<string opc, Intrinsic IntID> - : ALU64_ri<(outs DoubleRegs:$dst), (ins u10Imm:$src1), - !strconcat("$dst = ", !strconcat(opc , "#$src1):neg")), - [(set DoubleRegs:$dst, (IntID imm:$src1))]>; - -class di_MInst_diu6<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>; - -class di_MInst_diu4_rnd<string opc, Intrinsic IntID> - : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u4Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):rnd")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>; - -class si_MInst_diu4_rnd_sat<string opc, Intrinsic IntID> - : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, u4Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):rnd:sat")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>; - -class si_SInst_diu4_sat<string opc, Intrinsic IntID> - : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, u4Imm:$src2), - !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):sat")), - [(set IntRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>; - - -def HEXAGON_C4_fastcorner9: - qi_SInst_qiqi <"fastcorner9", int_hexagon_C4_fastcorner9>; -def HEXAGON_C4_fastcorner9_not: - qi_SInst_qiqi <"!fastcorner9", int_hexagon_C4_fastcorner9_not>; -def HEXAGON_M5_vrmpybuu: - di_MInst_didi <"vrmpybu", int_hexagon_M5_vrmpybuu>; -def HEXAGON_M5_vrmacbuu: - di_MInst_dididi_acc <"vrmpybu", int_hexagon_M5_vrmacbuu>; -def HEXAGON_M5_vrmpybsu: - di_MInst_didi <"vrmpybsu", int_hexagon_M5_vrmpybsu>; -def HEXAGON_M5_vrmacbsu: - di_MInst_dididi_acc <"vrmpybsu", int_hexagon_M5_vrmacbsu>; -def HEXAGON_M5_vmpybuu: - di_MInst_sisi <"vmpybu", int_hexagon_M5_vmpybuu>; -def HEXAGON_M5_vmpybsu: - di_MInst_sisi <"vmpybsu", int_hexagon_M5_vmpybsu>; -def HEXAGON_M5_vmacbuu: - di_MInst_disisi_acc <"vmpybu", int_hexagon_M5_vmacbuu>; -def HEXAGON_M5_vmacbsu: - di_MInst_disisi_acc <"vmpybsu", int_hexagon_M5_vmacbsu>; -def HEXAGON_M5_vdmpybsu: - di_MInst_didi_sat <"vdmpybsu", int_hexagon_M5_vdmpybsu>; -def HEXAGON_M5_vdmacbsu: - di_MInst_dididi_acc_sat <"vdmpybsu", int_hexagon_M5_vdmacbsu>; -def HEXAGON_A5_vaddhubs: - si_SInst_didi_sat <"vaddhub", int_hexagon_A5_vaddhubs>; -def HEXAGON_S5_popcountp: - si_SInst_di <"popcount", int_hexagon_S5_popcountp>; -def HEXAGON_S5_asrhub_rnd_sat_goodsyntax: - si_MInst_diu4_rnd_sat <"vasrhub", int_hexagon_S5_asrhub_rnd_sat_goodsyntax>; -def HEXAGON_S5_asrhub_sat: - si_SInst_diu4_sat <"vasrhub", int_hexagon_S5_asrhub_sat>; -def HEXAGON_S5_vasrhrnd_goodsyntax: - di_MInst_diu4_rnd <"vasrh", int_hexagon_S5_vasrhrnd_goodsyntax>; -def HEXAGON_S2_asr_i_p_rnd: - di_SInst_diu6 <"asr", int_hexagon_S2_asr_i_p_rnd>; -def HEXAGON_S2_asr_i_p_rnd_goodsyntax: - di_MInst_diu6 <"asrrnd", int_hexagon_S2_asr_i_p_rnd_goodsyntax>; -def HEXAGON_F2_sfadd: - sf_MInst_sfsf <"sfadd", int_hexagon_F2_sfadd>; -def HEXAGON_F2_sfsub: - sf_MInst_sfsf <"sfsub", int_hexagon_F2_sfsub>; -def HEXAGON_F2_sfmpy: - sf_MInst_sfsf <"sfmpy", int_hexagon_F2_sfmpy>; -def HEXAGON_F2_sffma: - sf_MInst_sfsfsf_acc <"sfmpy", int_hexagon_F2_sffma>; -def HEXAGON_F2_sffma_sc: - sf_MInst_sfsfsfsi_sc <"sfmpy", int_hexagon_F2_sffma_sc>; -def HEXAGON_F2_sffms: - sf_MInst_sfsfsf_nac <"sfmpy", int_hexagon_F2_sffms>; -def HEXAGON_F2_sffma_lib: - sf_MInst_sfsfsf_acc_lib <"sfmpy", int_hexagon_F2_sffma_lib>; -def HEXAGON_F2_sffms_lib: - sf_MInst_sfsfsf_nac_lib <"sfmpy", int_hexagon_F2_sffms_lib>; -def HEXAGON_F2_sfcmpeq: - qi_SInst_sfsf <"sfcmp.eq", int_hexagon_F2_sfcmpeq>; -def HEXAGON_F2_sfcmpgt: - qi_SInst_sfsf <"sfcmp.gt", int_hexagon_F2_sfcmpgt>; -def HEXAGON_F2_sfcmpge: - qi_SInst_sfsf <"sfcmp.ge", int_hexagon_F2_sfcmpge>; -def HEXAGON_F2_sfcmpuo: - qi_SInst_sfsf <"sfcmp.uo", int_hexagon_F2_sfcmpuo>; -def HEXAGON_F2_sfmax: - sf_MInst_sfsf <"sfmax", int_hexagon_F2_sfmax>; -def HEXAGON_F2_sfmin: - sf_MInst_sfsf <"sfmin", int_hexagon_F2_sfmin>; -def HEXAGON_F2_sfclass: - qi_SInst_sfu5 <"sfclass", int_hexagon_F2_sfclass>; -def HEXAGON_F2_sfimm_p: - sf_ALU64_u10_pos <"sfmake", int_hexagon_F2_sfimm_p>; -def HEXAGON_F2_sfimm_n: - sf_ALU64_u10_neg <"sfmake", int_hexagon_F2_sfimm_n>; -def HEXAGON_F2_sffixupn: - sf_MInst_sfsf <"sffixupn", int_hexagon_F2_sffixupn>; -def HEXAGON_F2_sffixupd: - sf_MInst_sfsf <"sffixupd", int_hexagon_F2_sffixupd>; -def HEXAGON_F2_sffixupr: - sf_SInst_sf <"sffixupr", int_hexagon_F2_sffixupr>; -def HEXAGON_F2_dfadd: - df_MInst_dfdf <"dfadd", int_hexagon_F2_dfadd>; -def HEXAGON_F2_dfsub: - df_MInst_dfdf <"dfsub", int_hexagon_F2_dfsub>; -def HEXAGON_F2_dfmpy: - df_MInst_dfdf <"dfmpy", int_hexagon_F2_dfmpy>; -def HEXAGON_F2_dffma: - df_MInst_dfdfdf_acc <"dfmpy", int_hexagon_F2_dffma>; -def HEXAGON_F2_dffms: - df_MInst_dfdfdf_nac <"dfmpy", int_hexagon_F2_dffms>; -def HEXAGON_F2_dffma_lib: - df_MInst_dfdfdf_acc_lib <"dfmpy", int_hexagon_F2_dffma_lib>; -def HEXAGON_F2_dffms_lib: - df_MInst_dfdfdf_nac_lib <"dfmpy", int_hexagon_F2_dffms_lib>; -def HEXAGON_F2_dffma_sc: - df_MInst_dfdfdfsi_sc <"dfmpy", int_hexagon_F2_dffma_sc>; -def HEXAGON_F2_dfmax: - df_MInst_dfdf <"dfmax", int_hexagon_F2_dfmax>; -def HEXAGON_F2_dfmin: - df_MInst_dfdf <"dfmin", int_hexagon_F2_dfmin>; -def HEXAGON_F2_dfcmpeq: - qi_ALU64_dfdf <"dfcmp.eq", int_hexagon_F2_dfcmpeq>; -def HEXAGON_F2_dfcmpgt: - qi_ALU64_dfdf <"dfcmp.gt", int_hexagon_F2_dfcmpgt>; -def HEXAGON_F2_dfcmpge: - qi_ALU64_dfdf <"dfcmp.ge", int_hexagon_F2_dfcmpge>; -def HEXAGON_F2_dfcmpuo: - qi_ALU64_dfdf <"dfcmp.uo", int_hexagon_F2_dfcmpuo>; -def HEXAGON_F2_dfclass: - qi_ALU64_dfu5 <"dfclass", int_hexagon_F2_dfclass>; -def HEXAGON_F2_dfimm_p: - df_ALU64_u10_pos <"dfmake", int_hexagon_F2_dfimm_p>; -def HEXAGON_F2_dfimm_n: - df_ALU64_u10_neg <"dfmake", int_hexagon_F2_dfimm_n>; -def HEXAGON_F2_dffixupn: - df_MInst_dfdf <"dffixupn", int_hexagon_F2_dffixupn>; -def HEXAGON_F2_dffixupd: - df_MInst_dfdf <"dffixupd", int_hexagon_F2_dffixupd>; -def HEXAGON_F2_dffixupr: - df_SInst_df <"dffixupr", int_hexagon_F2_dffixupr>; -def HEXAGON_F2_conv_sf2df: - df_SInst_sf <"convert_sf2df", int_hexagon_F2_conv_sf2df>; -def HEXAGON_F2_conv_df2sf: - sf_SInst_df <"convert_df2sf", int_hexagon_F2_conv_df2sf>; -def HEXAGON_F2_conv_uw2sf: - sf_SInst_si <"convert_uw2sf", int_hexagon_F2_conv_uw2sf>; -def HEXAGON_F2_conv_uw2df: - df_SInst_si <"convert_uw2df", int_hexagon_F2_conv_uw2df>; -def HEXAGON_F2_conv_w2sf: - sf_SInst_si <"convert_w2sf", int_hexagon_F2_conv_w2sf>; -def HEXAGON_F2_conv_w2df: - df_SInst_si <"convert_w2df", int_hexagon_F2_conv_w2df>; -def HEXAGON_F2_conv_ud2sf: - sf_SInst_di <"convert_ud2sf", int_hexagon_F2_conv_ud2sf>; -def HEXAGON_F2_conv_ud2df: - df_SInst_di <"convert_ud2df", int_hexagon_F2_conv_ud2df>; -def HEXAGON_F2_conv_d2sf: - sf_SInst_di <"convert_d2sf", int_hexagon_F2_conv_d2sf>; -def HEXAGON_F2_conv_d2df: - df_SInst_di <"convert_d2df", int_hexagon_F2_conv_d2df>; -def HEXAGON_F2_conv_sf2uw: - si_SInst_sf <"convert_sf2uw", int_hexagon_F2_conv_sf2uw>; -def HEXAGON_F2_conv_sf2w: - si_SInst_sf <"convert_sf2w", int_hexagon_F2_conv_sf2w>; -def HEXAGON_F2_conv_sf2ud: - di_SInst_sf <"convert_sf2ud", int_hexagon_F2_conv_sf2ud>; -def HEXAGON_F2_conv_sf2d: - di_SInst_sf <"convert_sf2d", int_hexagon_F2_conv_sf2d>; -def HEXAGON_F2_conv_df2uw: - si_SInst_df <"convert_df2uw", int_hexagon_F2_conv_df2uw>; -def HEXAGON_F2_conv_df2w: - si_SInst_df <"convert_df2w", int_hexagon_F2_conv_df2w>; -def HEXAGON_F2_conv_df2ud: - di_SInst_df <"convert_df2ud", int_hexagon_F2_conv_df2ud>; -def HEXAGON_F2_conv_df2d: - di_SInst_df <"convert_df2d", int_hexagon_F2_conv_df2d>; -def HEXAGON_F2_conv_sf2uw_chop: - si_SInst_sf <"convert_sf2uw", int_hexagon_F2_conv_sf2uw_chop>; -def HEXAGON_F2_conv_sf2w_chop: - si_SInst_sf <"convert_sf2w", int_hexagon_F2_conv_sf2w_chop>; -def HEXAGON_F2_conv_sf2ud_chop: - di_SInst_sf <"convert_sf2ud", int_hexagon_F2_conv_sf2ud_chop>; -def HEXAGON_F2_conv_sf2d_chop: - di_SInst_sf <"convert_sf2d", int_hexagon_F2_conv_sf2d_chop>; -def HEXAGON_F2_conv_df2uw_chop: - si_SInst_df <"convert_df2uw", int_hexagon_F2_conv_df2uw_chop>; -def HEXAGON_F2_conv_df2w_chop: - si_SInst_df <"convert_df2w", int_hexagon_F2_conv_df2w_chop>; -def HEXAGON_F2_conv_df2ud_chop: - di_SInst_df <"convert_df2ud", int_hexagon_F2_conv_df2ud_chop>; -def HEXAGON_F2_conv_df2d_chop: - di_SInst_df <"convert_df2d", int_hexagon_F2_conv_df2d_chop>; +//===- HexagonIntrinsicsV5.td - V5 Instruction intrinsics --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//Rdd[+]=vrmpybsu(Rss,Rtt) +//Rdd[+]=vrmpybuu(Rss,Rtt) +let Predicates = [HasV5T] in { +def : T_PP_pat <M5_vrmpybsu, int_hexagon_M5_vrmpybsu>; +def : T_PP_pat <M5_vrmpybuu, int_hexagon_M5_vrmpybuu>; + +def : T_PP_pat <M5_vdmpybsu, int_hexagon_M5_vdmpybsu>; + +def : T_PPP_pat <M5_vrmacbsu, int_hexagon_M5_vrmacbsu>; +def : T_PPP_pat <M5_vrmacbuu, int_hexagon_M5_vrmacbuu>; +//Rxx+=vdmpybsu(Rss,Rtt):sat +def : T_PPP_pat <M5_vdmacbsu, int_hexagon_M5_vdmacbsu>; + +// Vector multiply bytes +// Rdd=vmpyb[s]u(Rs,Rt) +def : T_RR_pat <M5_vmpybsu, int_hexagon_M5_vmpybsu>; +def : T_RR_pat <M5_vmpybuu, int_hexagon_M5_vmpybuu>; + +// Rxx+=vmpyb[s]u(Rs,Rt) +def : T_PRR_pat <M5_vmacbsu, int_hexagon_M5_vmacbsu>; +def : T_PRR_pat <M5_vmacbuu, int_hexagon_M5_vmacbuu>; + +// Rd=vaddhub(Rss,Rtt):sat +def : T_PP_pat <A5_vaddhubs, int_hexagon_A5_vaddhubs>; +} + +def : T_FF_pat<F2_sfadd, int_hexagon_F2_sfadd>; +def : T_FF_pat<F2_sfsub, int_hexagon_F2_sfsub>; +def : T_FF_pat<F2_sfmpy, int_hexagon_F2_sfmpy>; +def : T_FF_pat<F2_sfmax, int_hexagon_F2_sfmax>; +def : T_FF_pat<F2_sfmin, int_hexagon_F2_sfmin>; + +def : T_FF_pat<F2_sffixupn, int_hexagon_F2_sffixupn>; +def : T_FF_pat<F2_sffixupd, int_hexagon_F2_sffixupd>; +def : T_F_pat <F2_sffixupr, int_hexagon_F2_sffixupr>; + +def: qi_CRInst_qiqi_pat<C4_fastcorner9, int_hexagon_C4_fastcorner9>; +def: qi_CRInst_qiqi_pat<C4_fastcorner9_not, int_hexagon_C4_fastcorner9_not>; + +def : T_P_pat <S5_popcountp, int_hexagon_S5_popcountp>; +def : T_PI_pat <S5_asrhub_sat, int_hexagon_S5_asrhub_sat>; + +def : T_PI_pat <S2_asr_i_p_rnd, int_hexagon_S2_asr_i_p_rnd>; +def : T_PI_pat <S2_asr_i_p_rnd_goodsyntax, + int_hexagon_S2_asr_i_p_rnd_goodsyntax>; + +def : T_PI_pat <S5_asrhub_rnd_sat_goodsyntax, + int_hexagon_S5_asrhub_rnd_sat_goodsyntax>; + +def : T_PI_pat <S5_vasrhrnd_goodsyntax, int_hexagon_S5_vasrhrnd_goodsyntax>; + +def : T_FFF_pat <F2_sffma, int_hexagon_F2_sffma>; +def : T_FFF_pat <F2_sffms, int_hexagon_F2_sffms>; +def : T_FFF_pat <F2_sffma_lib, int_hexagon_F2_sffma_lib>; +def : T_FFF_pat <F2_sffms_lib, int_hexagon_F2_sffms_lib>; +def : T_FFFQ_pat <F2_sffma_sc, int_hexagon_F2_sffma_sc>; + +// Compare floating-point value +def : T_FF_pat <F2_sfcmpge, int_hexagon_F2_sfcmpge>; +def : T_FF_pat <F2_sfcmpuo, int_hexagon_F2_sfcmpuo>; +def : T_FF_pat <F2_sfcmpeq, int_hexagon_F2_sfcmpeq>; +def : T_FF_pat <F2_sfcmpgt, int_hexagon_F2_sfcmpgt>; + +def : T_DD_pat <F2_dfcmpeq, int_hexagon_F2_dfcmpeq>; +def : T_DD_pat <F2_dfcmpgt, int_hexagon_F2_dfcmpgt>; +def : T_DD_pat <F2_dfcmpge, int_hexagon_F2_dfcmpge>; +def : T_DD_pat <F2_dfcmpuo, int_hexagon_F2_dfcmpuo>; + +// Create floating-point value +def : T_I_pat <F2_sfimm_p, int_hexagon_F2_sfimm_p>; +def : T_I_pat <F2_sfimm_n, int_hexagon_F2_sfimm_n>; +def : T_I_pat <F2_dfimm_p, int_hexagon_F2_dfimm_p>; +def : T_I_pat <F2_dfimm_n, int_hexagon_F2_dfimm_n>; + +def : T_DI_pat <F2_dfclass, int_hexagon_F2_dfclass>; +def : T_FI_pat <F2_sfclass, int_hexagon_F2_sfclass>; +def : T_F_pat <F2_conv_sf2df, int_hexagon_F2_conv_sf2df>; +def : T_D_pat <F2_conv_df2sf, int_hexagon_F2_conv_df2sf>; +def : T_R_pat <F2_conv_uw2sf, int_hexagon_F2_conv_uw2sf>; +def : T_R_pat <F2_conv_uw2df, int_hexagon_F2_conv_uw2df>; +def : T_R_pat <F2_conv_w2sf, int_hexagon_F2_conv_w2sf>; +def : T_R_pat <F2_conv_w2df, int_hexagon_F2_conv_w2df>; +def : T_P_pat <F2_conv_ud2sf, int_hexagon_F2_conv_ud2sf>; +def : T_P_pat <F2_conv_ud2df, int_hexagon_F2_conv_ud2df>; +def : T_P_pat <F2_conv_d2sf, int_hexagon_F2_conv_d2sf>; +def : T_P_pat <F2_conv_d2df, int_hexagon_F2_conv_d2df>; +def : T_F_pat <F2_conv_sf2uw, int_hexagon_F2_conv_sf2uw>; +def : T_F_pat <F2_conv_sf2w, int_hexagon_F2_conv_sf2w>; +def : T_F_pat <F2_conv_sf2ud, int_hexagon_F2_conv_sf2ud>; +def : T_F_pat <F2_conv_sf2d, int_hexagon_F2_conv_sf2d>; +def : T_D_pat <F2_conv_df2uw, int_hexagon_F2_conv_df2uw>; +def : T_D_pat <F2_conv_df2w, int_hexagon_F2_conv_df2w>; +def : T_D_pat <F2_conv_df2ud, int_hexagon_F2_conv_df2ud>; +def : T_D_pat <F2_conv_df2d, int_hexagon_F2_conv_df2d>; +def : T_F_pat <F2_conv_sf2uw_chop, int_hexagon_F2_conv_sf2uw_chop>; +def : T_F_pat <F2_conv_sf2w_chop, int_hexagon_F2_conv_sf2w_chop>; +def : T_F_pat <F2_conv_sf2ud_chop, int_hexagon_F2_conv_sf2ud_chop>; +def : T_F_pat <F2_conv_sf2d_chop, int_hexagon_F2_conv_sf2d_chop>; +def : T_D_pat <F2_conv_df2uw_chop, int_hexagon_F2_conv_df2uw_chop>; +def : T_D_pat <F2_conv_df2w_chop, int_hexagon_F2_conv_df2w_chop>; +def : T_D_pat <F2_conv_df2ud_chop, int_hexagon_F2_conv_df2ud_chop>; +def : T_D_pat <F2_conv_df2d_chop, int_hexagon_F2_conv_df2d_chop>; diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp index 5e4346d..9c9f3af 100644 --- a/lib/Target/Hexagon/HexagonMCInstLower.cpp +++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp @@ -15,7 +15,6 @@ #include "Hexagon.h" #include "HexagonAsmPrinter.h" #include "HexagonMachineFunctionInfo.h" -#include "MCTargetDesc/HexagonMCInst.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Mangler.h" @@ -39,10 +38,9 @@ static MCOperand GetSymbolRef(const MachineOperand& MO, const MCSymbol* Symbol, } // Create an MCInst from a MachineInstr -void llvm::HexagonLowerToMC(const MachineInstr* MI, HexagonMCInst& MCI, +void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCI, HexagonAsmPrinter& AP) { MCI.setOpcode(MI->getOpcode()); - MCI.setDesc(MI->getDesc()); for (unsigned i = 0, e = MI->getNumOperands(); i < e; i++) { const MachineOperand &MO = MI->getOperand(i); diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp index 97c626f..35f732c 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -205,20 +205,17 @@ void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) { // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or // are disabled, then these HazardRecs will be disabled. const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries(); - const TargetMachine &TM = DAG->MF.getTarget(); + const TargetSubtargetInfo &STI = DAG->MF.getSubtarget(); + const TargetInstrInfo *TII = STI.getInstrInfo(); delete Top.HazardRec; delete Bot.HazardRec; - Top.HazardRec = - TM.getSubtargetImpl()->getInstrInfo()->CreateTargetMIHazardRecognizer( - Itin, DAG); - Bot.HazardRec = - TM.getSubtargetImpl()->getInstrInfo()->CreateTargetMIHazardRecognizer( - Itin, DAG); + Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); + Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); delete Top.ResourceModel; delete Bot.ResourceModel; - Top.ResourceModel = new VLIWResourceModel(TM, DAG->getSchedModel()); - Bot.ResourceModel = new VLIWResourceModel(TM, DAG->getSchedModel()); + Top.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel()); + Bot.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel()); assert((!llvm::ForceTopDown || !llvm::ForceBottomUp) && "-misched-topdown incompatible with -misched-bottomup"); diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h index 1e023c3..6034344 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.h +++ b/lib/Target/Hexagon/HexagonMachineScheduler.h @@ -54,11 +54,9 @@ class VLIWResourceModel { unsigned TotalPackets; public: -VLIWResourceModel(const TargetMachine &TM, const TargetSchedModel *SM) : - SchedModel(SM), TotalPackets(0) { - ResourcesModel = - TM.getSubtargetImpl()->getInstrInfo()->CreateTargetScheduleState( - *TM.getSubtargetImpl()); + VLIWResourceModel(const TargetSubtargetInfo &STI, const TargetSchedModel *SM) + : SchedModel(SM), TotalPackets(0) { + ResourcesModel = STI.getInstrInfo()->CreateTargetScheduleState(STI); // This hard requirement could be relaxed, // but for now do not let it proceed. diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp index 782c979..806d448 100644 --- a/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -176,7 +176,7 @@ static bool commonChecksToProhibitNewValueJump(bool afterRA, return false; // if call in path, bail out. - if (MII->getOpcode() == Hexagon::CALLv3) + if (MII->getOpcode() == Hexagon::J2_call) return false; // if NVJ is running prior to RA, do the following checks. @@ -199,8 +199,7 @@ static bool commonChecksToProhibitNewValueJump(bool afterRA, // of registers by individual passes in the backend. At this time, // we don't know the scope of usage and definitions of these // instructions. - if (MII->getOpcode() == Hexagon::TFR_condset_rr || - MII->getOpcode() == Hexagon::TFR_condset_ii || + if (MII->getOpcode() == Hexagon::TFR_condset_ii || MII->getOpcode() == Hexagon::TFR_condset_ri || MII->getOpcode() == Hexagon::TFR_condset_ir || MII->getOpcode() == Hexagon::LDriw_pred || @@ -228,8 +227,8 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII, int64_t v = MI->getOperand(2).getImm(); if (!(isUInt<5>(v) || - ((MI->getOpcode() == Hexagon::CMPEQri || - MI->getOpcode() == Hexagon::CMPGTri) && + ((MI->getOpcode() == Hexagon::C2_cmpeqi || + MI->getOpcode() == Hexagon::C2_cmpgti) && (v == -1)))) return false; } @@ -299,49 +298,49 @@ static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg, taken = true; switch (MI->getOpcode()) { - case Hexagon::CMPEQrr: - return taken ? Hexagon::CMPEQrr_t_Jumpnv_t_V4 - : Hexagon::CMPEQrr_t_Jumpnv_nt_V4; + case Hexagon::C2_cmpeq: + return taken ? Hexagon::J4_cmpeq_t_jumpnv_t + : Hexagon::J4_cmpeq_t_jumpnv_nt; - case Hexagon::CMPEQri: { + case Hexagon::C2_cmpeqi: { if (reg >= 0) - return taken ? Hexagon::CMPEQri_t_Jumpnv_t_V4 - : Hexagon::CMPEQri_t_Jumpnv_nt_V4; + return taken ? Hexagon::J4_cmpeqi_t_jumpnv_t + : Hexagon::J4_cmpeqi_t_jumpnv_nt; else - return taken ? Hexagon::CMPEQn1_t_Jumpnv_t_V4 - : Hexagon::CMPEQn1_t_Jumpnv_nt_V4; + return taken ? Hexagon::J4_cmpeqn1_t_jumpnv_t + : Hexagon::J4_cmpeqn1_t_jumpnv_nt; } - case Hexagon::CMPGTrr: { + case Hexagon::C2_cmpgt: { if (secondRegNewified) - return taken ? Hexagon::CMPLTrr_t_Jumpnv_t_V4 - : Hexagon::CMPLTrr_t_Jumpnv_nt_V4; + return taken ? Hexagon::J4_cmplt_t_jumpnv_t + : Hexagon::J4_cmplt_t_jumpnv_nt; else - return taken ? Hexagon::CMPGTrr_t_Jumpnv_t_V4 - : Hexagon::CMPGTrr_t_Jumpnv_nt_V4; + return taken ? Hexagon::J4_cmpgt_t_jumpnv_t + : Hexagon::J4_cmpgt_t_jumpnv_nt; } - case Hexagon::CMPGTri: { + case Hexagon::C2_cmpgti: { if (reg >= 0) - return taken ? Hexagon::CMPGTri_t_Jumpnv_t_V4 - : Hexagon::CMPGTri_t_Jumpnv_nt_V4; + return taken ? Hexagon::J4_cmpgti_t_jumpnv_t + : Hexagon::J4_cmpgti_t_jumpnv_nt; else - return taken ? Hexagon::CMPGTn1_t_Jumpnv_t_V4 - : Hexagon::CMPGTn1_t_Jumpnv_nt_V4; + return taken ? Hexagon::J4_cmpgtn1_t_jumpnv_t + : Hexagon::J4_cmpgtn1_t_jumpnv_nt; } - case Hexagon::CMPGTUrr: { + case Hexagon::C2_cmpgtu: { if (secondRegNewified) - return taken ? Hexagon::CMPLTUrr_t_Jumpnv_t_V4 - : Hexagon::CMPLTUrr_t_Jumpnv_nt_V4; + return taken ? Hexagon::J4_cmpltu_t_jumpnv_t + : Hexagon::J4_cmpltu_t_jumpnv_nt; else - return taken ? Hexagon::CMPGTUrr_t_Jumpnv_t_V4 - : Hexagon::CMPGTUrr_t_Jumpnv_nt_V4; + return taken ? Hexagon::J4_cmpgtu_t_jumpnv_t + : Hexagon::J4_cmpgtu_t_jumpnv_nt; } - case Hexagon::CMPGTUri: - return taken ? Hexagon::CMPGTUri_t_Jumpnv_t_V4 - : Hexagon::CMPGTUri_t_Jumpnv_nt_V4; + case Hexagon::C2_cmpgtui: + return taken ? Hexagon::J4_cmpgtui_t_jumpnv_t + : Hexagon::J4_cmpgtui_t_jumpnv_nt; default: llvm_unreachable("Could not find matching New Value Jump instruction."); @@ -356,19 +355,15 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { << "********** Function: " << MF.getName() << "\n"); -#if 0 - // for now disable this, if we move NewValueJump before register - // allocation we need this information. - LiveVariables &LVs = getAnalysis<LiveVariables>(); -#endif + // If we move NewValueJump before register allocation we'll need live variable + // analysis here too. QII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo()); QRI = static_cast<const HexagonRegisterInfo *>( MF.getSubtarget().getRegisterInfo()); MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); - if (!QRI->Subtarget.hasV4TOps() || - DisableNewValueJumps) { + if (DisableNewValueJumps) { return false; } @@ -413,12 +408,12 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "Instr: "; MI->dump(); dbgs() << "\n"); if (!foundJump && - (MI->getOpcode() == Hexagon::JMP_t || - MI->getOpcode() == Hexagon::JMP_f || - MI->getOpcode() == Hexagon::JMP_tnew_t || - MI->getOpcode() == Hexagon::JMP_tnew_nt || - MI->getOpcode() == Hexagon::JMP_fnew_t || - MI->getOpcode() == Hexagon::JMP_fnew_nt)) { + (MI->getOpcode() == Hexagon::J2_jumpt || + MI->getOpcode() == Hexagon::J2_jumpf || + MI->getOpcode() == Hexagon::J2_jumptnewpt || + MI->getOpcode() == Hexagon::J2_jumptnew || + MI->getOpcode() == Hexagon::J2_jumpfnewpt || + MI->getOpcode() == Hexagon::J2_jumpfnew)) { // This is where you would insert your compare and // instr that feeds compare jmpPos = MII; @@ -454,9 +449,9 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { jmpTarget = MI->getOperand(1).getMBB(); foundJump = true; - if (MI->getOpcode() == Hexagon::JMP_f || - MI->getOpcode() == Hexagon::JMP_fnew_t || - MI->getOpcode() == Hexagon::JMP_fnew_nt) { + if (MI->getOpcode() == Hexagon::J2_jumpf || + MI->getOpcode() == Hexagon::J2_jumpfnewpt || + MI->getOpcode() == Hexagon::J2_jumpfnew) { invertPredicate = true; } continue; @@ -545,7 +540,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { if (isSecondOpReg) { // In case of CMPLT, or CMPLTU, or EQ with the second register // to newify, swap the operands. - if (cmpInstr->getOpcode() == Hexagon::CMPEQrr && + if (cmpInstr->getOpcode() == Hexagon::C2_cmpeq && feederReg == (unsigned) cmpOp2) { unsigned tmp = cmpReg1; bool tmpIsKill = MO1IsKill; @@ -612,8 +607,8 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { .addReg(cmpOp2, getKillRegState(MO2IsKill)) .addMBB(jmpTarget); - else if ((cmpInstr->getOpcode() == Hexagon::CMPEQri || - cmpInstr->getOpcode() == Hexagon::CMPGTri) && + else if ((cmpInstr->getOpcode() == Hexagon::C2_cmpeqi || + cmpInstr->getOpcode() == Hexagon::C2_cmpgti) && cmpOp2 == -1 ) // Corresponding new-value compare jump instructions don't have the // operand for -1 immediate value. diff --git a/lib/Target/Hexagon/HexagonOperands.td b/lib/Target/Hexagon/HexagonOperands.td index c79d78f..318ca72 100644 --- a/lib/Target/Hexagon/HexagonOperands.td +++ b/lib/Target/Hexagon/HexagonOperands.td @@ -39,6 +39,7 @@ let PrintMethod = "printImmOperand" in { def u16_0Imm : Operand<i32>; def u16_1Imm : Operand<i32>; def u16_2Imm : Operand<i32>; + def u16_3Imm : Operand<i32>; def u11_3Imm : Operand<i32>; def u10Imm : Operand<i32>; def u9Imm : Operand<i32>; @@ -258,6 +259,19 @@ def u16_s8ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<16,8>(v); }]>; +def u16_0ImmPred : PatLeaf<(i32 imm), [{ + // True if the immediate fits in a 16-bit unsigned field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<16>(v); +}]>; + +def u11_3ImmPred : PatLeaf<(i32 imm), [{ + // True if the immediate fits in a 14-bit unsigned field, and the lowest + // three bits are 0. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<11,3>(v); +}]>; + def u9ImmPred : PatLeaf<(i32 imm), [{ // u9ImmPred predicate - True if the immediate fits in a 9-bit unsigned // field. @@ -329,6 +343,12 @@ def u5ImmPred : PatLeaf<(i32 imm), [{ return isUInt<5>(v); }]>; +def u4ImmPred : PatLeaf<(i32 imm), [{ + // u4ImmPred predicate - True if the immediate fits in a 4-bit unsigned + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<4>(v); +}]>; def u3ImmPred : PatLeaf<(i32 imm), [{ // u3ImmPred predicate - True if the immediate fits in a 3-bit unsigned @@ -497,309 +517,218 @@ def u0AlwaysExt : Operand<i32>; // Predicates for constant extendable operands def s16ExtPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 16-bit sign extended field. - return isInt<16>(v); - else { - if (isInt<16>(v)) - return true; + if (isInt<16>(v)) + return true; - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit signed field. - return isConstExtProfitable(Node) && isInt<32>(v); - } + // Return true if extending this immediate is profitable and the value + // can fit in a 32-bit signed field. + return isConstExtProfitable(Node) && isInt<32>(v); }]>; def s10ExtPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 10-bit sign extended field. - return isInt<10>(v); - else { - if (isInt<10>(v)) - return true; + if (isInt<10>(v)) + return true; - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit signed field. - return isConstExtProfitable(Node) && isInt<32>(v); - } + // Return true if extending this immediate is profitable and the value + // can fit in a 32-bit signed field. + return isConstExtProfitable(Node) && isInt<32>(v); }]>; def s9ExtPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 9-bit sign extended field. - return isInt<9>(v); - else { - if (isInt<9>(v)) - return true; + if (isInt<9>(v)) + return true; - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isInt<32>(v); - } + // Return true if extending this immediate is profitable and the value + // can fit in a 32-bit unsigned field. + return isConstExtProfitable(Node) && isInt<32>(v); }]>; def s8ExtPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 8-bit sign extended field. - return isInt<8>(v); - else { - if (isInt<8>(v)) - return true; + if (isInt<8>(v)) + return true; - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit signed field. - return isConstExtProfitable(Node) && isInt<32>(v); - } + // Return true if extending this immediate is profitable and the value + // can fit in a 32-bit signed field. + return isConstExtProfitable(Node) && isInt<32>(v); }]>; def s8_16ExtPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate fits in a 8-bit sign extended field. - return isInt<8>(v); - else { - if (isInt<8>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can't fit in a 16-bit signed field. This is required to avoid - // unnecessary constant extenders. - return isConstExtProfitable(Node) && !isInt<16>(v); - } + if (isInt<8>(v)) + return true; + + // Return true if extending this immediate is profitable and the value + // can't fit in a 16-bit signed field. This is required to avoid + // unnecessary constant extenders. + return isConstExtProfitable(Node) && !isInt<16>(v); }]>; def s6ExtPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 6-bit sign extended field. - return isInt<6>(v); - else { - if (isInt<6>(v)) - return true; + if (isInt<6>(v)) + return true; - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isInt<32>(v); - } + // Return true if extending this immediate is profitable and the value + // can fit in a 32-bit unsigned field. + return isConstExtProfitable(Node) && isInt<32>(v); }]>; def s6_16ExtPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate fits in a 6-bit sign extended field. - return isInt<6>(v); - else { - if (isInt<6>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can't fit in a 16-bit signed field. This is required to avoid - // unnecessary constant extenders. - return isConstExtProfitable(Node) && !isInt<16>(v); - } + if (isInt<6>(v)) + return true; + + // Return true if extending this immediate is profitable and the value + // can't fit in a 16-bit signed field. This is required to avoid + // unnecessary constant extenders. + return isConstExtProfitable(Node) && !isInt<16>(v); }]>; def s6_10ExtPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 6-bit sign extended field. - return isInt<6>(v); - else { - if (isInt<6>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can't fit in a 10-bit signed field. This is required to avoid - // unnecessary constant extenders. - return isConstExtProfitable(Node) && !isInt<10>(v); - } + if (isInt<6>(v)) + return true; + + // Return true if extending this immediate is profitable and the value + // can't fit in a 10-bit signed field. This is required to avoid + // unnecessary constant extenders. + return isConstExtProfitable(Node) && !isInt<10>(v); }]>; def s11_0ExtPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 11-bit sign extended field. - return isShiftedInt<11,0>(v); - else { - if (isInt<11>(v)) - return true; + if (isInt<11>(v)) + return true; - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit signed field. - return isConstExtProfitable(Node) && isInt<32>(v); - } + // Return true if extending this immediate is profitable and the value + // can fit in a 32-bit signed field. + return isConstExtProfitable(Node) && isInt<32>(v); }]>; def s11_1ExtPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 12-bit sign extended field and - // is 2 byte aligned. + if (isInt<12>(v)) return isShiftedInt<11,1>(v); - else { - if (isInt<12>(v)) - return isShiftedInt<11,1>(v); - // Return true if extending this immediate is profitable and the low 1 bit - // is zero (2-byte aligned). - return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 2) == 0); - } + // Return true if extending this immediate is profitable and the low 1 bit + // is zero (2-byte aligned). + return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 2) == 0); }]>; def s11_2ExtPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 13-bit sign extended field and - // is 4-byte aligned. + if (isInt<13>(v)) return isShiftedInt<11,2>(v); - else { - if (isInt<13>(v)) - return isShiftedInt<11,2>(v); - // Return true if extending this immediate is profitable and the low 2-bits - // are zero (4-byte aligned). - return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 4) == 0); - } + // Return true if extending this immediate is profitable and the low 2-bits + // are zero (4-byte aligned). + return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 4) == 0); }]>; def s11_3ExtPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 14-bit sign extended field and - // is 8-byte aligned. - return isShiftedInt<11,3>(v); - else { - if (isInt<14>(v)) - return isShiftedInt<11,3>(v); - - // Return true if extending this immediate is profitable and the low 3-bits - // are zero (8-byte aligned). - return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 8) == 0); - } + if (isInt<14>(v)) + return isShiftedInt<11,3>(v); + + // Return true if extending this immediate is profitable and the low 3-bits + // are zero (8-byte aligned). + return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 8) == 0); }]>; def u0AlwaysExtPred : PatLeaf<(i32 imm), [{ // Predicate for an unsigned 32-bit value that always needs to be extended. - if (Subtarget.hasV4TOps()) { - if (isConstExtProfitable(Node)) { - int64_t v = (int64_t)N->getSExtValue(); - return isUInt<32>(v); - } + if (isConstExtProfitable(Node)) { + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<32>(v); } return false; }]>; def u6ExtPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 6-bit unsigned field. - return isUInt<6>(v); - else { - if (isUInt<6>(v)) - return true; + if (isUInt<6>(v)) + return true; - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v); - } + // Return true if extending this immediate is profitable and the value + // can fit in a 32-bit unsigned field. + return isConstExtProfitable(Node) && isUInt<32>(v); }]>; def u7ExtPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 7-bit unsigned field. - return isUInt<7>(v); - else { - if (isUInt<7>(v)) - return true; + if (isUInt<7>(v)) + return true; - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v); - } + // Return true if extending this immediate is profitable and the value + // can fit in a 32-bit unsigned field. + return isConstExtProfitable(Node) && isUInt<32>(v); }]>; def u8ExtPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 8-bit unsigned field. - return isUInt<8>(v); - else { - if (isUInt<8>(v)) - return true; + if (isUInt<8>(v)) + return true; - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v); - } + // Return true if extending this immediate is profitable and the value + // can fit in a 32-bit unsigned field. + return isConstExtProfitable(Node) && isUInt<32>(v); }]>; def u9ExtPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 9-bit unsigned field. - return isUInt<9>(v); - else { - if (isUInt<9>(v)) - return true; + if (isUInt<9>(v)) + return true; - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v); - } + // Return true if extending this immediate is profitable and the value + // can fit in a 32-bit unsigned field. + return isConstExtProfitable(Node) && isUInt<32>(v); }]>; def u6_1ExtPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 7-bit unsigned field and - // is 2-byte aligned. + if (isUInt<7>(v)) return isShiftedUInt<6,1>(v); - else { - if (isUInt<7>(v)) - return isShiftedUInt<6,1>(v); - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 2) == 0); - } + // Return true if extending this immediate is profitable and the value + // can fit in a 32-bit unsigned field. + return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 2) == 0); }]>; def u6_2ExtPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 8-bit unsigned field and - // is 4-byte aligned. + if (isUInt<8>(v)) return isShiftedUInt<6,2>(v); - else { - if (isUInt<8>(v)) - return isShiftedUInt<6,2>(v); - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 4) == 0); - } + // Return true if extending this immediate is profitable and the value + // can fit in a 32-bit unsigned field. + return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 4) == 0); }]>; def u6_3ExtPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - if (!Subtarget.hasV4TOps()) - // Return true if the immediate can fit in a 9-bit unsigned field and - // is 8-byte aligned. + if (isUInt<9>(v)) return isShiftedUInt<6,3>(v); - else { - if (isUInt<9>(v)) - return isShiftedUInt<6,3>(v); - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 8) == 0); - } + // Return true if extending this immediate is profitable and the value + // can fit in a 32-bit unsigned field. + return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 8) == 0); }]>; + +// This complex pattern exists only to create a machine instruction operand +// of type "frame index". There doesn't seem to be a way to do that directly +// in the patterns. +def AddrFI : ComplexPattern<i32, 1, "SelectAddrFI", [frameindex], []>; + +// These complex patterns are not strictly necessary, since global address +// folding will happen during DAG combining. For distinguishing between GA +// and GP, pat frags with HexagonCONST32 and HexagonCONST32_GP can be used. +def AddrGA : ComplexPattern<i32, 1, "SelectAddrGA", [], []>; +def AddrGP : ComplexPattern<i32, 1, "SelectAddrGP", [], []>; + // Addressing modes. def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>; @@ -856,3 +785,12 @@ def symbolHi32 : Operand<i32> { def symbolLo32 : Operand<i32> { let PrintMethod = "printSymbolLo"; } + +// Return true if for a 32 to 64-bit sign-extended load. +def is_sext_i32 : PatLeaf<(i64 DoubleRegs:$src1), [{ + LoadSDNode *LD = dyn_cast<LoadSDNode>(N); + if (!LD) + return false; + return LD->getExtensionType() == ISD::SEXTLOAD && + LD->getMemoryVT().getScalarType() == MVT::i32; +}]>; diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp index 8912152..afd3a17 100644 --- a/lib/Target/Hexagon/HexagonPeephole.cpp +++ b/lib/Target/Hexagon/HexagonPeephole.cpp @@ -112,7 +112,7 @@ INITIALIZE_PASS(HexagonPeephole, "hexagon-peephole", "Hexagon Peephole", bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { QII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo()); - QRI = MF.getTarget().getSubtarget<HexagonSubtarget>().getRegisterInfo(); + QRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); MRI = &MF.getRegInfo(); DenseMap<unsigned, unsigned> PeepholeMap; @@ -133,7 +133,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { MachineInstr *MI = MII; // Look for sign extends: // %vreg170<def> = SXTW %vreg166 - if (!DisableOptSZExt && MI->getOpcode() == Hexagon::SXTW) { + if (!DisableOptSZExt && MI->getOpcode() == Hexagon::A2_sxtw) { assert (MI->getNumOperands() == 2); MachineOperand &Dst = MI->getOperand(0); MachineOperand &Src = MI->getOperand(1); @@ -152,7 +152,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { // Look for %vreg170<def> = COMBINE_ir_V4 (0, %vreg169) // %vreg170:DoublRegs, %vreg169:IntRegs if (!DisableOptExtTo64 && - MI->getOpcode () == Hexagon::COMBINE_Ir_V4) { + MI->getOpcode () == Hexagon::A4_combineir) { assert (MI->getNumOperands() == 3); MachineOperand &Dst = MI->getOperand(0); MachineOperand &Src1 = MI->getOperand(1); @@ -169,7 +169,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { // %vregIntReg = COPY %vregDoubleReg1:subreg_loreg. // and convert into // %vregIntReg = COPY %vregDoubleReg0:subreg_hireg. - if (MI->getOpcode() == Hexagon::LSRd_ri) { + if (MI->getOpcode() == Hexagon::S2_lsr_i_p) { assert(MI->getNumOperands() == 3); MachineOperand &Dst = MI->getOperand(0); MachineOperand &Src1 = MI->getOperand(1); @@ -184,7 +184,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { // Look for P=NOT(P). if (!DisablePNotP && - (MI->getOpcode() == Hexagon::NOT_p)) { + (MI->getOpcode() == Hexagon::C2_not)) { assert (MI->getNumOperands() == 2); MachineOperand &Dst = MI->getOperand(0); MachineOperand &Src = MI->getOperand(1); @@ -269,10 +269,9 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { unsigned PR = 1, S1 = 2, S2 = 3; // Operand indices. switch (Op) { - case Hexagon::TFR_condset_rr: + case Hexagon::C2_mux: + case Hexagon::C2_muxii: case Hexagon::TFR_condset_ii: - case Hexagon::MUX_ii: - case Hexagon::MUX_rr: NewOp = Op; break; case Hexagon::TFR_condset_ri: @@ -281,11 +280,11 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { case Hexagon::TFR_condset_ir: NewOp = Hexagon::TFR_condset_ri; break; - case Hexagon::MUX_ri: - NewOp = Hexagon::MUX_ir; + case Hexagon::C2_muxri: + NewOp = Hexagon::C2_muxir; break; - case Hexagon::MUX_ir: - NewOp = Hexagon::MUX_ri; + case Hexagon::C2_muxir: + NewOp = Hexagon::C2_muxri; break; } if (NewOp) { diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp index 2b6741c..3df98d6 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -45,9 +45,6 @@ HexagonRegisterInfo::HexagonRegisterInfo(HexagonSubtarget &st) const MCPhysReg * HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - static const MCPhysReg CalleeSavedRegsV2[] = { - Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0 - }; static const MCPhysReg CalleeSavedRegsV3[] = { Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19, Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23, @@ -55,11 +52,6 @@ HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { }; switch(Subtarget.getHexagonArchVersion()) { - case HexagonSubtarget::V1: - break; - case HexagonSubtarget::V2: - return CalleeSavedRegsV2; - case HexagonSubtarget::V3: case HexagonSubtarget::V4: case HexagonSubtarget::V5: return CalleeSavedRegsV3; @@ -88,10 +80,6 @@ BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF) const TargetRegisterClass* const* HexagonRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - static const TargetRegisterClass * const CalleeSavedRegClassesV2[] = { - &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, - &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, - }; static const TargetRegisterClass * const CalleeSavedRegClassesV3[] = { &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, @@ -102,11 +90,6 @@ HexagonRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { }; switch(Subtarget.getHexagonArchVersion()) { - case HexagonSubtarget::V1: - break; - case HexagonSubtarget::V2: - return CalleeSavedRegClassesV2; - case HexagonSubtarget::V3: case HexagonSubtarget::V4: case HexagonSubtarget::V5: return CalleeSavedRegClassesV3; @@ -159,20 +142,18 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // // r0 = add(r30, #10000) // r0 = memw(r0) - if ( (MI.getOpcode() == Hexagon::LDriw) || - (MI.getOpcode() == Hexagon::LDrid) || - (MI.getOpcode() == Hexagon::LDrih) || - (MI.getOpcode() == Hexagon::LDriuh) || - (MI.getOpcode() == Hexagon::LDrib) || - (MI.getOpcode() == Hexagon::LDriub) || - (MI.getOpcode() == Hexagon::LDriw_f) || - (MI.getOpcode() == Hexagon::LDrid_f)) { - unsigned dstReg = (MI.getOpcode() == Hexagon::LDrid) ? + if ( (MI.getOpcode() == Hexagon::L2_loadri_io) || + (MI.getOpcode() == Hexagon::L2_loadrd_io) || + (MI.getOpcode() == Hexagon::L2_loadrh_io) || + (MI.getOpcode() == Hexagon::L2_loadruh_io) || + (MI.getOpcode() == Hexagon::L2_loadrb_io) || + (MI.getOpcode() == Hexagon::L2_loadrub_io)) { + unsigned dstReg = (MI.getOpcode() == Hexagon::L2_loadrd_io) ? getSubReg(MI.getOperand(0).getReg(), Hexagon::subreg_loreg) : MI.getOperand(0).getReg(); // Check if offset can fit in addi. - if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) { + if (!TII.isValidOffset(Hexagon::A2_addi, Offset)) { BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(Hexagon::CONST32_Int_Real), dstReg).addImm(Offset); BuildMI(*MI.getParent(), II, MI.getDebugLoc(), @@ -180,19 +161,16 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, dstReg).addReg(FrameReg).addReg(dstReg); } else { BuildMI(*MI.getParent(), II, MI.getDebugLoc(), - TII.get(Hexagon::ADD_ri), + TII.get(Hexagon::A2_addi), dstReg).addReg(FrameReg).addImm(Offset); } MI.getOperand(FIOperandNum).ChangeToRegister(dstReg, false, false,true); MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); - } else if ((MI.getOpcode() == Hexagon::STriw_indexed) || - (MI.getOpcode() == Hexagon::STriw) || - (MI.getOpcode() == Hexagon::STrid) || - (MI.getOpcode() == Hexagon::STrih) || - (MI.getOpcode() == Hexagon::STrib) || - (MI.getOpcode() == Hexagon::STrid_f) || - (MI.getOpcode() == Hexagon::STriw_f)) { + } else if ((MI.getOpcode() == Hexagon::S2_storeri_io) || + (MI.getOpcode() == Hexagon::S2_storerd_io) || + (MI.getOpcode() == Hexagon::S2_storerh_io) || + (MI.getOpcode() == Hexagon::S2_storerb_io)) { // For stores, we need a reserved register. Change // memw(r30 + #10000) = r0 to: // @@ -201,7 +179,7 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned resReg = HEXAGON_RESERVED_REG_1; // Check if offset can fit in addi. - if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) { + if (!TII.isValidOffset(Hexagon::A2_addi, Offset)) { BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(Hexagon::CONST32_Int_Real), resReg).addImm(Offset); BuildMI(*MI.getParent(), II, MI.getDebugLoc(), @@ -209,47 +187,19 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, resReg).addReg(FrameReg).addReg(resReg); } else { BuildMI(*MI.getParent(), II, MI.getDebugLoc(), - TII.get(Hexagon::ADD_ri), + TII.get(Hexagon::A2_addi), resReg).addReg(FrameReg).addImm(Offset); } MI.getOperand(FIOperandNum).ChangeToRegister(resReg, false, false,true); MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); } else if (TII.isMemOp(&MI)) { // use the constant extender if the instruction provides it - // and we are V4TOps. - if (Subtarget.hasV4TOps()) { - if (TII.isConstExtended(&MI)) { - MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false); - MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset); - TII.immediateExtend(&MI); - } else { - llvm_unreachable("Need to implement for memops"); - } + if (TII.isConstExtended(&MI)) { + MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset); + TII.immediateExtend(&MI); } else { - // Only V3 and older instructions here. - unsigned ResReg = HEXAGON_RESERVED_REG_1; - if (!MFI.hasVarSizedObjects() && - TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset))) { - MI.getOperand(FIOperandNum).ChangeToRegister(getStackRegister(), - false, false, false); - MI.getOperand(FIOperandNum+1).ChangeToImmediate(FrameSize+Offset); - } else if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) { - BuildMI(*MI.getParent(), II, MI.getDebugLoc(), - TII.get(Hexagon::CONST32_Int_Real), ResReg).addImm(Offset); - BuildMI(*MI.getParent(), II, MI.getDebugLoc(), - TII.get(Hexagon::A2_add), ResReg).addReg(FrameReg). - addReg(ResReg); - MI.getOperand(FIOperandNum).ChangeToRegister(ResReg, false, false, - true); - MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); - } else { - BuildMI(*MI.getParent(), II, MI.getDebugLoc(), - TII.get(Hexagon::ADD_ri), ResReg).addReg(FrameReg). - addImm(Offset); - MI.getOperand(FIOperandNum).ChangeToRegister(ResReg, false, false, - true); - MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); - } + llvm_unreachable("Need to implement for memops"); } } else { unsigned dstReg = MI.getOperand(0).getReg(); diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.td b/lib/Target/Hexagon/HexagonRegisterInfo.td index 9750984..edf1c25 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.td +++ b/lib/Target/Hexagon/HexagonRegisterInfo.td @@ -13,20 +13,25 @@ let Namespace = "Hexagon" in { - class HexagonReg<bits<5> num, string n> : Register<n> { + class HexagonReg<bits<5> num, string n, list<string> alt = [], + list<Register> alias = []> : Register<n> { field bits<5> Num; + let Aliases = alias; let HWEncoding{4-0} = num; } - class HexagonDoubleReg<bits<5> num, string n, list<Register> subregs> : + class HexagonDoubleReg<bits<5> num, string n, list<Register> subregs, + list<string> alt = []> : RegisterWithSubRegs<n, subregs> { field bits<5> Num; + + let AltNames = alt; let HWEncoding{4-0} = num; } // Registers are identified with 5-bit ID numbers. // Ri - 32-bit integer registers. - class Ri<bits<5> num, string n> : HexagonReg<num, n> { + class Ri<bits<5> num, string n, list<string> alt = []> : HexagonReg<num, n, alt> { let Num = num; } @@ -49,27 +54,37 @@ let Namespace = "Hexagon" in { } // Rc - control registers - class Rc<bits<5> num, string n> : HexagonReg<num, n> { + class Rc<bits<5> num, string n, + list<string> alt = [], list<Register> alias = []> : + HexagonReg<num, n, alt, alias> { let Num = num; } - // Rj - aliased integer registers - class Rj<string n, Ri R>: HexagonReg<R.Num, n> { - let Num = R.Num; - let Aliases = [R]; + // Rcc - 64-bit control registers. + class Rcc<bits<5> num, string n, list<Register> subregs, + list<string> alt = []> : + HexagonDoubleReg<num, n, subregs, alt> { + let Num = num; + let SubRegs = subregs; + } + + // Mx - address modifier registers + class Mx<bits<1> num, string n> : HexagonReg<{0b0000, num}, n> { + let Num = !cast<bits<5>>(num); } def subreg_loreg : SubRegIndex<32>; def subreg_hireg : SubRegIndex<32, 32>; + def subreg_overflow : SubRegIndex<1, 0>; // Integer registers. - foreach I = 0-31 in { - def R#I : Ri<I, "r"#I>, DwarfRegNum<[I]>; + foreach i = 0-28 in { + def R#i : Ri<i, "r"#i>, DwarfRegNum<[i]>; } - def SP : Rj<"sp", R29>, DwarfRegNum<[29]>; - def FP : Rj<"fp", R30>, DwarfRegNum<[30]>; - def LR : Rj<"lr", R31>, DwarfRegNum<[31]>; + def R29 : Ri<29, "r29", ["sp"]>, DwarfRegNum<[29]>; + def R30 : Ri<30, "r30", ["fp"]>, DwarfRegNum<[30]>; + def R31 : Ri<31, "r31", ["lr"]>, DwarfRegNum<[31]>; // Aliases of the R* registers used to hold 64-bit int values (doubles). let SubRegIndices = [subreg_loreg, subreg_hireg], CoveredBySubRegs = 1 in { @@ -97,44 +112,98 @@ let Namespace = "Hexagon" in { def P2 : Rp<2, "p2">, DwarfRegNum<[65]>; def P3 : Rp<3, "p3">, DwarfRegNum<[66]>; - // Control registers. - def SA0 : Rc<0, "sa0">, DwarfRegNum<[67]>; - def LC0 : Rc<1, "lc0">, DwarfRegNum<[68]>; - - def SA1 : Rc<2, "sa1">, DwarfRegNum<[69]>; - def LC1 : Rc<3, "lc1">, DwarfRegNum<[70]>; + // Modifier registers. + // C6 and C7 can also be M0 and M1, but register names must be unique, even + // if belonging to different register classes. + def M0 : Mx<0, "m0">, DwarfRegNum<[72]>; + def M1 : Mx<1, "m1">, DwarfRegNum<[73]>; - def M0 : Rc<6, "m0">, DwarfRegNum<[71]>; - def M1 : Rc<7, "m1">, DwarfRegNum<[72]>; + // Fake register to represent USR.OVF bit. Artihmetic/saturating instruc- + // tions modify this bit, and multiple such instructions are allowed in the + // same packet. We need to ignore output dependencies on this bit, but not + // on the entire USR. + def USR_OVF : Rc<?, "usr.ovf">; - def PC : Rc<9, "pc">, DwarfRegNum<[32]>; // is the Dwarf number correct? - def GP : Rc<11, "gp">, DwarfRegNum<[33]>; // is the Dwarf number correct? + // Control registers. + def SA0 : Rc<0, "sa0", ["c0"]>, DwarfRegNum<[67]>; + def LC0 : Rc<1, "lc0", ["c1"]>, DwarfRegNum<[68]>; + def SA1 : Rc<2, "sa1", ["c2"]>, DwarfRegNum<[69]>; + def LC1 : Rc<3, "lc1", ["c3"]>, DwarfRegNum<[70]>; + def P3_0 : Rc<4, "p3:0", ["c4"], [P0, P1, P2, P3]>, + DwarfRegNum<[71]>; + def C6 : Rc<6, "c6", [], [M0]>, DwarfRegNum<[72]>; + def C7 : Rc<7, "c7", [], [M1]>, DwarfRegNum<[73]>; + + def USR : Rc<8, "usr", ["c8"]>, DwarfRegNum<[74]> { + let SubRegIndices = [subreg_overflow]; + let SubRegs = [USR_OVF]; + } + def PC : Rc<9, "pc">, DwarfRegNum<[75]>; + def UGP : Rc<10, "ugp", ["c10"]>, DwarfRegNum<[76]>; + def GP : Rc<11, "gp">, DwarfRegNum<[77]>; + def CS0 : Rc<12, "cs0", ["c12"]>, DwarfRegNum<[78]>; + def CS1 : Rc<13, "cs1", ["c13"]>, DwarfRegNum<[79]>; + def UPCL : Rc<14, "upcyclelo", ["c14"]>, DwarfRegNum<[80]>; + def UPCH : Rc<15, "upcyclehi", ["c15"]>, DwarfRegNum<[81]>; } + // Control registers pairs. + let SubRegIndices = [subreg_loreg, subreg_hireg], CoveredBySubRegs = 1 in { + def C1_0 : Rcc<0, "c1:0", [SA0, LC0], ["lc0:sa0"]>, DwarfRegNum<[67]>; + def C3_2 : Rcc<2, "c3:2", [SA1, LC1], ["lc1:sa1"]>, DwarfRegNum<[69]>; + def C7_6 : Rcc<6, "c7:6", [C6, C7], ["m1:0"]>, DwarfRegNum<[72]>; + def C9_8 : Rcc<8, "c9:8", [USR, PC]>, DwarfRegNum<[74]>; + def C11_10 : Rcc<10, "c11:10", [UGP, GP]>, DwarfRegNum<[76]>; + def CS : Rcc<12, "c13:12", [CS0, CS1], ["cs1:0"]>, DwarfRegNum<[78]>; + def UPC : Rcc<14, "c15:14", [UPCL, UPCH]>, DwarfRegNum<[80]>; + } + // Register classes. // // FIXME: the register order should be defined in terms of the preferred // allocation order... // -def IntRegs : RegisterClass<"Hexagon", [i32,f32], 32, +def IntRegs : RegisterClass<"Hexagon", [i32, f32, v4i8, v2i16], 32, (add (sequence "R%u", 0, 9), (sequence "R%u", 12, 28), R10, R11, R29, R30, R31)> { } -def DoubleRegs : RegisterClass<"Hexagon", [i64,f64], 64, +def DoubleRegs : RegisterClass<"Hexagon", [i64, f64, v8i8, v4i16, v2i32], 64, (add (sequence "D%u", 0, 4), (sequence "D%u", 6, 13), D5, D14, D15)>; -def PredRegs : RegisterClass<"Hexagon", [i1], 32, (add (sequence "P%u", 0, 3))> +def PredRegs : RegisterClass<"Hexagon", + [i1, v2i1, v4i1, v8i1, v4i8, v2i16, i32], 32, + (add (sequence "P%u", 0, 3))> { let Size = 32; } -def CRRegs : RegisterClass<"Hexagon", [i32], 32, - (add (sequence "LC%u", 0, 1), - (sequence "SA%u", 0, 1), - (sequence "M%u", 0, 1), PC, GP)> { - let Size = 32; +let Size = 32 in +def ModRegs : RegisterClass<"Hexagon", [i32], 32, (add M0, M1)>; + +let Size = 32, isAllocatable = 0 in +def CtrRegs : RegisterClass<"Hexagon", [i32], 32, + (add LC0, SA0, LC1, SA1, + P3_0, + M0, M1, C6, C7, CS0, CS1, UPCL, UPCH, + USR, USR_OVF, UGP, GP, PC)>; + +let Size = 64, isAllocatable = 0 in +def CtrRegs64 : RegisterClass<"Hexagon", [i64], 64, + (add C1_0, C3_2, C7_6, C9_8, C11_10, CS, UPC)>; + +def VolatileV3 { + list<Register> Regs = [D0, D1, D2, D3, D4, D5, D6, D7, + R28, R31, + P0, P1, P2, P3, + M0, M1, + LC0, LC1, SA0, SA1, USR, USR_OVF]; } + +def PositiveHalfWord : PatLeaf<(i32 IntRegs:$a), +[{ + return isPositiveHalfWord(N); +}]>; diff --git a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp index 2b459a4..0c24075 100644 --- a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp +++ b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp @@ -15,6 +15,7 @@ #include "Hexagon.h" #include "HexagonTargetMachine.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/Pass.h" @@ -42,7 +43,7 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineFunctionAnalysis>(); AU.addPreserved<MachineFunctionAnalysis>(); - AU.addPreserved("stack-protector"); + AU.addPreserved<StackProtector>(); FunctionPass::getAnalysisUsage(AU); } }; diff --git a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp index 8fdd493..ce6a39a 100644 --- a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp +++ b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp @@ -48,12 +48,9 @@ using namespace llvm; namespace { class HexagonSplitConst32AndConst64 : public MachineFunctionPass { - const HexagonTargetMachine &QTM; - public: static char ID; - HexagonSplitConst32AndConst64(const HexagonTargetMachine &TM) - : MachineFunctionPass(ID), QTM(TM) {} + HexagonSplitConst32AndConst64() : MachineFunctionPass(ID) {} const char *getPassName() const override { return "Hexagon Split Const32s and Const64s"; @@ -68,13 +65,12 @@ char HexagonSplitConst32AndConst64::ID = 0; bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) { const HexagonTargetObjectFile &TLOF = - (const HexagonTargetObjectFile &)QTM.getSubtargetImpl() - ->getTargetLowering() - ->getObjFileLowering(); + *static_cast<const HexagonTargetObjectFile *>( + Fn.getTarget().getObjFileLowering()); if (TLOF.IsSmallDataEnabled()) return true; - const TargetInstrInfo *TII = QTM.getSubtargetImpl()->getInstrInfo(); + const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo(); // Loop over all of the basic blocks for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); @@ -117,9 +113,9 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) { MachineOperand &Symbol = MI->getOperand (1); BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::LO_label), DestReg).addOperand(Symbol); + TII->get(Hexagon::LO_PIC), DestReg).addOperand(Symbol); BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::HI_label), DestReg).addOperand(Symbol); + TII->get(Hexagon::HI_PIC), DestReg).addOperand(Symbol); // MBB->erase returns the iterator to the next instruction, which is the // one we want to process next MII = MBB->erase (MI); @@ -139,9 +135,9 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) { else if (Opc == Hexagon::CONST64_Int_Real) { int DestReg = MI->getOperand(0).getReg(); int64_t ImmValue = MI->getOperand(1).getImm (); - unsigned DestLo = QTM.getSubtargetImpl()->getRegisterInfo()->getSubReg( + unsigned DestLo = Fn.getSubtarget().getRegisterInfo()->getSubReg( DestReg, Hexagon::subreg_loreg); - unsigned DestHi = QTM.getSubtargetImpl()->getRegisterInfo()->getSubReg( + unsigned DestHi = Fn.getSubtarget().getRegisterInfo()->getSubReg( DestReg, Hexagon::subreg_hireg); int32_t LowWord = (ImmValue & 0xFFFFFFFF); @@ -176,6 +172,6 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) { //===----------------------------------------------------------------------===// FunctionPass * -llvm::createHexagonSplitConst32AndConst64(const HexagonTargetMachine &TM) { - return new HexagonSplitConst32AndConst64(TM); +llvm::createHexagonSplitConst32AndConst64() { + return new HexagonSplitConst32AndConst64(); } diff --git a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp index 1052b80..8873bb9 100644 --- a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp +++ b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp @@ -58,13 +58,9 @@ namespace llvm { namespace { class HexagonSplitTFRCondSets : public MachineFunctionPass { - const HexagonTargetMachine &QTM; - const HexagonSubtarget &QST; - public: static char ID; - HexagonSplitTFRCondSets(const HexagonTargetMachine& TM) : - MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) { + HexagonSplitTFRCondSets() : MachineFunctionPass(ID) { initializeHexagonSplitTFRCondSetsPass(*PassRegistry::getPassRegistry()); } @@ -80,7 +76,7 @@ char HexagonSplitTFRCondSets::ID = 0; bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) { - const TargetInstrInfo *TII = QTM.getSubtargetImpl()->getInstrInfo(); + const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo(); // Loop over all of the basic blocks. for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); @@ -90,41 +86,8 @@ bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) { for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); ++MII) { MachineInstr *MI = MII; - int Opc1, Opc2; switch(MI->getOpcode()) { - case Hexagon::TFR_condset_rr: - case Hexagon::TFR_condset_rr_f: - case Hexagon::TFR_condset_rr64_f: { - int DestReg = MI->getOperand(0).getReg(); - int SrcReg1 = MI->getOperand(2).getReg(); - int SrcReg2 = MI->getOperand(3).getReg(); - - if (MI->getOpcode() == Hexagon::TFR_condset_rr || - MI->getOpcode() == Hexagon::TFR_condset_rr_f) { - Opc1 = Hexagon::TFR_cPt; - Opc2 = Hexagon::TFR_cNotPt; - } - else if (MI->getOpcode() == Hexagon::TFR_condset_rr64_f) { - Opc1 = Hexagon::TFR64_cPt; - Opc2 = Hexagon::TFR64_cNotPt; - } - - // Minor optimization: do not emit the predicated copy if the source - // and the destination is the same register. - if (DestReg != SrcReg1) { - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Opc1), - DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg1); - } - if (DestReg != SrcReg2) { - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Opc2), - DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg2); - } - MII = MBB->erase(MI); - --MII; - break; - } - case Hexagon::TFR_condset_ri: - case Hexagon::TFR_condset_ri_f: { + case Hexagon::TFR_condset_ri: { int DestReg = MI->getOperand(0).getReg(); int SrcReg1 = MI->getOperand(2).getReg(); @@ -132,77 +95,50 @@ bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) { // is the same register. if (DestReg != SrcReg1) { BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::TFR_cPt), DestReg). + TII->get(Hexagon::A2_tfrt), DestReg). addReg(MI->getOperand(1).getReg()).addReg(SrcReg1); } - if (MI->getOpcode() == Hexagon::TFR_condset_ri ) { - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::TFRI_cNotPt), DestReg). - addReg(MI->getOperand(1).getReg()). - addImm(MI->getOperand(3).getImm()); - } else if (MI->getOpcode() == Hexagon::TFR_condset_ri_f ) { - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::TFRI_cNotPt_f), DestReg). - addReg(MI->getOperand(1).getReg()). - addFPImm(MI->getOperand(3).getFPImm()); - } + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::C2_cmoveif), DestReg). + addReg(MI->getOperand(1).getReg()). + addImm(MI->getOperand(3).getImm()); MII = MBB->erase(MI); --MII; break; } - case Hexagon::TFR_condset_ir: - case Hexagon::TFR_condset_ir_f: { + case Hexagon::TFR_condset_ir: { int DestReg = MI->getOperand(0).getReg(); int SrcReg2 = MI->getOperand(3).getReg(); - if (MI->getOpcode() == Hexagon::TFR_condset_ir ) { - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::TFRI_cPt), DestReg). - addReg(MI->getOperand(1).getReg()). - addImm(MI->getOperand(2).getImm()); - } else if (MI->getOpcode() == Hexagon::TFR_condset_ir_f ) { - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::TFRI_cPt_f), DestReg). - addReg(MI->getOperand(1).getReg()). - addFPImm(MI->getOperand(2).getFPImm()); - } + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::C2_cmoveit), DestReg). + addReg(MI->getOperand(1).getReg()). + addImm(MI->getOperand(2).getImm()); // Do not emit the predicated copy if the source and // the destination is the same register. if (DestReg != SrcReg2) { BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::TFR_cNotPt), DestReg). + TII->get(Hexagon::A2_tfrf), DestReg). addReg(MI->getOperand(1).getReg()).addReg(SrcReg2); } MII = MBB->erase(MI); --MII; break; } - case Hexagon::TFR_condset_ii: - case Hexagon::TFR_condset_ii_f: { + case Hexagon::TFR_condset_ii: { int DestReg = MI->getOperand(0).getReg(); int SrcReg1 = MI->getOperand(1).getReg(); - if (MI->getOpcode() == Hexagon::TFR_condset_ii ) { - int Immed1 = MI->getOperand(2).getImm(); - int Immed2 = MI->getOperand(3).getImm(); - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::TFRI_cPt), - DestReg).addReg(SrcReg1).addImm(Immed1); - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::TFRI_cNotPt), - DestReg).addReg(SrcReg1).addImm(Immed2); - } else if (MI->getOpcode() == Hexagon::TFR_condset_ii_f ) { - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::TFRI_cPt_f), DestReg). - addReg(SrcReg1). - addFPImm(MI->getOperand(2).getFPImm()); - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::TFRI_cNotPt_f), DestReg). - addReg(SrcReg1). - addFPImm(MI->getOperand(3).getFPImm()); - } + int Immed1 = MI->getOperand(2).getImm(); + int Immed2 = MI->getOperand(3).getImm(); + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::C2_cmoveit), + DestReg).addReg(SrcReg1).addImm(Immed1); + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::C2_cmoveif), + DestReg).addReg(SrcReg1).addImm(Immed2); MII = MBB->erase(MI); --MII; break; @@ -231,7 +167,6 @@ void llvm::initializeHexagonSplitTFRCondSetsPass(PassRegistry &Registry) { CALL_ONCE_INITIALIZATION(initializePassOnce) } -FunctionPass* -llvm::createHexagonSplitTFRCondSets(const HexagonTargetMachine &TM) { - return new HexagonSplitTFRCondSets(TM); +FunctionPass *llvm::createHexagonSplitTFRCondSets() { + return new HexagonSplitTFRCondSets(); } diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp index 657893f..380f023 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -54,12 +54,7 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { if (CPUString.empty()) CPUString = "hexagonv4"; - if (CPUString == "hexagonv2") { - HexagonArchVersion = V2; - } else if (CPUString == "hexagonv3") { - EnableV3 = true; - HexagonArchVersion = V3; - } else if (CPUString == "hexagonv4") { + if (CPUString == "hexagonv4") { HexagonArchVersion = V4; } else if (CPUString == "hexagonv5") { HexagonArchVersion = V5; @@ -74,9 +69,8 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS, const TargetMachine &TM) : HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU.str()), - DL("e-m:e-p:32:32-i1:32-i64:64-a:0-n32"), - InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM), - TSInfo(DL), FrameLowering() { + InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), + TSInfo(*TM.getDataLayout()), FrameLowering() { // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUString); diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h index 10776ae..57de546 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.h +++ b/lib/Target/Hexagon/HexagonSubtarget.h @@ -15,8 +15,8 @@ #define LLVM_LIB_TARGET_HEXAGON_HEXAGONSUBTARGET_H #include "HexagonFrameLowering.h" -#include "HexagonInstrInfo.h" #include "HexagonISelLowering.h" +#include "HexagonInstrInfo.h" #include "HexagonSelectionDAGInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" @@ -39,13 +39,12 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo { public: enum HexagonArchEnum { - V1, V2, V3, V4, V5 + V4, V5 }; HexagonArchEnum HexagonArchVersion; private: std::string CPUString; - const DataLayout DL; // Calculates type size & alignment. HexagonInstrInfo InstrInfo; HexagonTargetLowering TLInfo; HexagonSelectionDAGInfo TSInfo; @@ -74,7 +73,6 @@ public: const HexagonSelectionDAGInfo *getSelectionDAGInfo() const override { return &TSInfo; } - const DataLayout *getDataLayout() const override { return &DL; } HexagonSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS); @@ -83,18 +81,11 @@ public: /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); - bool hasV2TOps () const { return HexagonArchVersion >= V2; } - bool hasV2TOpsOnly () const { return HexagonArchVersion == V2; } - bool hasV3TOps () const { return HexagonArchVersion >= V3; } - bool hasV3TOpsOnly () const { return HexagonArchVersion == V3; } - bool hasV4TOps () const { return HexagonArchVersion >= V4; } - bool hasV4TOpsOnly () const { return HexagonArchVersion == V4; } - bool useMemOps () const { return HexagonArchVersion >= V4 && UseMemOps; } - bool hasV5TOps () const { return HexagonArchVersion >= V5; } - bool hasV5TOpsOnly () const { return HexagonArchVersion == V5; } - bool modeIEEERndNear () const { return ModeIEEERndNear; } - - bool isSubtargetV2() const { return HexagonArchVersion == V2;} + bool useMemOps() const { return UseMemOps; } + bool hasV5TOps() const { return getHexagonArchVersion() >= V5; } + bool hasV5TOpsOnly() const { return getHexagonArchVersion() == V5; } + bool modeIEEERndNear() const { return ModeIEEERndNear; } + const std::string &getCPUString () const { return CPUString; } // Threshold for small data section diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index cd18dfb..64f75a3 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -17,8 +17,8 @@ #include "HexagonMachineScheduler.h" #include "HexagonTargetObjectFile.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" -#include "llvm/PassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" @@ -71,7 +71,7 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT, CodeGenOpt::Level OL) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), TLOF(make_unique<HexagonTargetObjectFile>()), - Subtarget(TT, CPU, FS, *this) { + DL("e-m:e-p:32:32-i1:32-i64:64-a:0-n32"), Subtarget(TT, CPU, FS, *this) { initAsmInfo(); } @@ -103,10 +103,10 @@ public: } bool addInstSelector() override; - bool addPreRegAlloc() override; - bool addPostRegAlloc() override; - bool addPreSched2() override; - bool addPreEmitPass() override; + void addPreRegAlloc() override; + void addPostRegAlloc() override; + void addPreSched2() override; + void addPreEmitPass() override; }; } // namespace @@ -131,51 +131,41 @@ bool HexagonPassConfig::addInstSelector() { return false; } -bool HexagonPassConfig::addPreRegAlloc() { +void HexagonPassConfig::addPreRegAlloc() { if (getOptLevel() != CodeGenOpt::None) if (!DisableHardwareLoops) - addPass(createHexagonHardwareLoops()); - return false; + addPass(createHexagonHardwareLoops(), false); } -bool HexagonPassConfig::addPostRegAlloc() { - const HexagonTargetMachine &TM = getHexagonTargetMachine(); +void HexagonPassConfig::addPostRegAlloc() { if (getOptLevel() != CodeGenOpt::None) if (!DisableHexagonCFGOpt) - addPass(createHexagonCFGOptimizer(TM)); - return false; + addPass(createHexagonCFGOptimizer(), false); } -bool HexagonPassConfig::addPreSched2() { - const HexagonTargetMachine &TM = getHexagonTargetMachine(); - - addPass(createHexagonCopyToCombine()); +void HexagonPassConfig::addPreSched2() { + addPass(createHexagonCopyToCombine(), false); if (getOptLevel() != CodeGenOpt::None) - addPass(&IfConverterID); - addPass(createHexagonSplitConst32AndConst64(TM)); - printAndVerify("After hexagon split const32/64 pass"); - return true; + addPass(&IfConverterID, false); + addPass(createHexagonSplitConst32AndConst64()); } -bool HexagonPassConfig::addPreEmitPass() { - const HexagonTargetMachine &TM = getHexagonTargetMachine(); +void HexagonPassConfig::addPreEmitPass() { bool NoOpt = (getOptLevel() == CodeGenOpt::None); if (!NoOpt) - addPass(createHexagonNewValueJump()); + addPass(createHexagonNewValueJump(), false); // Expand Spill code for predicate registers. - addPass(createHexagonExpandPredSpillCode(TM)); + addPass(createHexagonExpandPredSpillCode(), false); // Split up TFRcondsets into conditional transfers. - addPass(createHexagonSplitTFRCondSets(TM)); + addPass(createHexagonSplitTFRCondSets(), false); // Create Packets. if (!NoOpt) { if (!DisableHardwareLoops) - addPass(createHexagonFixupHwLoops()); - addPass(createHexagonPacketizer()); + addPass(createHexagonFixupHwLoops(), false); + addPass(createHexagonPacketizer(), false); } - - return false; } diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h index 4a9f447..e0b3a9b 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.h +++ b/lib/Target/Hexagon/HexagonTargetMachine.h @@ -24,6 +24,7 @@ class Module; class HexagonTargetMachine : public LLVMTargetMachine { std::unique_ptr<TargetLoweringObjectFile> TLOF; + const DataLayout DL; // Calculates type size & alignment. HexagonSubtarget Subtarget; public: @@ -32,7 +33,7 @@ public: Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); ~HexagonTargetMachine() override; - + const DataLayout *getDataLayout() const override { return &DL; } const HexagonSubtarget *getSubtargetImpl() const override { return &Subtarget; } diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp index f4ab5e2..d8660d3 100644 --- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp +++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp @@ -33,14 +33,10 @@ void HexagonTargetObjectFile::Initialize(MCContext &Ctx, TargetLoweringObjectFileELF::Initialize(Ctx, TM); InitializeELF(TM.Options.UseInitArray); - SmallDataSection = - getContext().getELFSection(".sdata", ELF::SHT_PROGBITS, - ELF::SHF_WRITE | ELF::SHF_ALLOC, - SectionKind::getDataRel()); - SmallBSSSection = - getContext().getELFSection(".sbss", ELF::SHT_NOBITS, - ELF::SHF_WRITE | ELF::SHF_ALLOC, - SectionKind::getBSS()); + SmallDataSection = getContext().getELFSection( + ".sdata", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC); + SmallBSSSection = getContext().getELFSection(".sbss", ELF::SHT_NOBITS, + ELF::SHF_WRITE | ELF::SHF_ALLOC); } // sdata/sbss support taken largely from the MIPS Backend. @@ -79,8 +75,7 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM, if (Kind.isBSS() || Kind.isDataNoRel() || Kind.isCommon()) { Type *Ty = GV->getType()->getElementType(); - return IsInSmallSection( - TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(Ty)); + return IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(Ty)); } return false; diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index e7296d6..c123640 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -264,8 +264,7 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &Fn) { static bool IsIndirectCall(MachineInstr* MI) { - return ((MI->getOpcode() == Hexagon::CALLR) || - (MI->getOpcode() == Hexagon::CALLRv3)); + return MI->getOpcode() == Hexagon::J2_callr; } // Reserve resources for constant extender. Trigure an assertion if @@ -273,7 +272,7 @@ static bool IsIndirectCall(MachineInstr* MI) { void HexagonPacketizerList::reserveResourcesForConstExt(MachineInstr* MI) { const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; MachineFunction *MF = MI->getParent()->getParent(); - MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT_i), + MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::A4_ext), MI->getDebugLoc()); if (ResourceTracker->canReserveResources(PseudoMI)) { @@ -291,7 +290,7 @@ bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) { assert((QII->isExtended(MI) || QII->isConstExtended(MI)) && "Should only be called for constant extended instructions"); MachineFunction *MF = MI->getParent()->getParent(); - MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT_i), + MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::A4_ext), MI->getDebugLoc()); bool CanReserve = ResourceTracker->canReserveResources(PseudoMI); MF->DeleteMachineInstr(PseudoMI); @@ -303,7 +302,7 @@ bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) { bool HexagonPacketizerList::tryAllocateResourcesForConstExt(MachineInstr* MI) { const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; MachineFunction *MF = MI->getParent()->getParent(); - MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT_i), + MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::A4_ext), MI->getDebugLoc()); if (ResourceTracker->canReserveResources(PseudoMI)) { @@ -366,12 +365,12 @@ static bool IsRegDependence(const SDep::Kind DepType) { } static bool IsDirectJump(MachineInstr* MI) { - return (MI->getOpcode() == Hexagon::JMP); + return (MI->getOpcode() == Hexagon::J2_jump); } static bool IsSchedBarrier(MachineInstr* MI) { switch (MI->getOpcode()) { - case Hexagon::BARRIER: + case Hexagon::Y2_barrier: return true; } return false; @@ -382,8 +381,8 @@ static bool IsControlFlow(MachineInstr* MI) { } static bool IsLoopN(MachineInstr *MI) { - return (MI->getOpcode() == Hexagon::LOOP0_i || - MI->getOpcode() == Hexagon::LOOP0_r); + return (MI->getOpcode() == Hexagon::J2_loop0i || + MI->getOpcode() == Hexagon::J2_loop0r); } /// DoesModifyCalleeSavedReg - Returns true if the instruction modifies a @@ -563,8 +562,8 @@ bool HexagonPacketizerList::CanPromoteToNewValueStore( if (PacketSU->getInstr()->getDesc().mayStore() || // if we have mayStore = 1 set on ALLOCFRAME and DEALLOCFRAME, // then we don't need this - PacketSU->getInstr()->getOpcode() == Hexagon::ALLOCFRAME || - PacketSU->getInstr()->getOpcode() == Hexagon::DEALLOCFRAME) + PacketSU->getInstr()->getOpcode() == Hexagon::S2_allocframe || + PacketSU->getInstr()->getOpcode() == Hexagon::L2_deallocframe) return false; } @@ -721,10 +720,7 @@ bool HexagonPacketizerList::CanPromoteToNewValue( MachineBasicBlock::iterator &MII) { const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - const HexagonRegisterInfo *QRI = - (const HexagonRegisterInfo *)MF.getSubtarget().getRegisterInfo(); - if (!QRI->Subtarget.hasV4TOps() || - !QII->mayBeNewStore(MI)) + if (!QII->mayBeNewStore(MI)) return false; MachineInstr *PacketMI = PacketSU->getInstr(); @@ -1055,84 +1051,82 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { // first store is not in SLOT0. New value store, new value jump, // dealloc_return and memop always take SLOT0. // Arch spec 3.4.4.2 - if (QRI->Subtarget.hasV4TOps()) { - if (MCIDI.mayStore() && MCIDJ.mayStore() && - (QII->isNewValueInst(J) || QII->isMemOp(J) || QII->isMemOp(I))) { - Dependence = true; - return false; - } + if (MCIDI.mayStore() && MCIDJ.mayStore() && + (QII->isNewValueInst(J) || QII->isMemOp(J) || QII->isMemOp(I))) { + Dependence = true; + return false; + } - if ((QII->isMemOp(J) && MCIDI.mayStore()) - || (MCIDJ.mayStore() && QII->isMemOp(I)) - || (QII->isMemOp(J) && QII->isMemOp(I))) { - Dependence = true; - return false; - } + if ((QII->isMemOp(J) && MCIDI.mayStore()) + || (MCIDJ.mayStore() && QII->isMemOp(I)) + || (QII->isMemOp(J) && QII->isMemOp(I))) { + Dependence = true; + return false; + } - //if dealloc_return - if (MCIDJ.mayStore() && QII->isDeallocRet(I)) { - Dependence = true; - return false; - } + //if dealloc_return + if (MCIDJ.mayStore() && QII->isDeallocRet(I)) { + Dependence = true; + return false; + } - // If an instruction feeds new value jump, glue it. - MachineBasicBlock::iterator NextMII = I; - ++NextMII; - if (NextMII != I->getParent()->end() && QII->isNewValueJump(NextMII)) { - MachineInstr *NextMI = NextMII; + // If an instruction feeds new value jump, glue it. + MachineBasicBlock::iterator NextMII = I; + ++NextMII; + if (NextMII != I->getParent()->end() && QII->isNewValueJump(NextMII)) { + MachineInstr *NextMI = NextMII; - bool secondRegMatch = false; - bool maintainNewValueJump = false; + bool secondRegMatch = false; + bool maintainNewValueJump = false; - if (NextMI->getOperand(1).isReg() && - I->getOperand(0).getReg() == NextMI->getOperand(1).getReg()) { - secondRegMatch = true; - maintainNewValueJump = true; - } + if (NextMI->getOperand(1).isReg() && + I->getOperand(0).getReg() == NextMI->getOperand(1).getReg()) { + secondRegMatch = true; + maintainNewValueJump = true; + } - if (!secondRegMatch && - I->getOperand(0).getReg() == NextMI->getOperand(0).getReg()) { - maintainNewValueJump = true; - } + if (!secondRegMatch && + I->getOperand(0).getReg() == NextMI->getOperand(0).getReg()) { + maintainNewValueJump = true; + } - for (std::vector<MachineInstr*>::iterator - VI = CurrentPacketMIs.begin(), - VE = CurrentPacketMIs.end(); - (VI != VE && maintainNewValueJump); ++VI) { - SUnit *PacketSU = MIToSUnit.find(*VI)->second; + for (std::vector<MachineInstr*>::iterator + VI = CurrentPacketMIs.begin(), + VE = CurrentPacketMIs.end(); + (VI != VE && maintainNewValueJump); ++VI) { + SUnit *PacketSU = MIToSUnit.find(*VI)->second; - // NVJ can not be part of the dual jump - Arch Spec: section 7.8 - if (PacketSU->getInstr()->getDesc().isCall()) { - Dependence = true; - break; - } - // Validate - // 1. Packet does not have a store in it. - // 2. If the first operand of the nvj is newified, and the second - // operand is also a reg, it (second reg) is not defined in - // the same packet. - // 3. If the second operand of the nvj is newified, (which means - // first operand is also a reg), first reg is not defined in - // the same packet. - if (PacketSU->getInstr()->getDesc().mayStore() || - PacketSU->getInstr()->getOpcode() == Hexagon::ALLOCFRAME || - // Check #2. - (!secondRegMatch && NextMI->getOperand(1).isReg() && - PacketSU->getInstr()->modifiesRegister( - NextMI->getOperand(1).getReg(), QRI)) || - // Check #3. - (secondRegMatch && - PacketSU->getInstr()->modifiesRegister( - NextMI->getOperand(0).getReg(), QRI))) { - Dependence = true; - break; - } + // NVJ can not be part of the dual jump - Arch Spec: section 7.8 + if (PacketSU->getInstr()->getDesc().isCall()) { + Dependence = true; + break; + } + // Validate + // 1. Packet does not have a store in it. + // 2. If the first operand of the nvj is newified, and the second + // operand is also a reg, it (second reg) is not defined in + // the same packet. + // 3. If the second operand of the nvj is newified, (which means + // first operand is also a reg), first reg is not defined in + // the same packet. + if (PacketSU->getInstr()->getDesc().mayStore() || + PacketSU->getInstr()->getOpcode() == Hexagon::S2_allocframe || + // Check #2. + (!secondRegMatch && NextMI->getOperand(1).isReg() && + PacketSU->getInstr()->modifiesRegister( + NextMI->getOperand(1).getReg(), QRI)) || + // Check #3. + (secondRegMatch && + PacketSU->getInstr()->modifiesRegister( + NextMI->getOperand(0).getReg(), QRI))) { + Dependence = true; + break; } - if (!Dependence) - GlueToNewValueJump = true; - else - return false; } + if (!Dependence) + GlueToNewValueJump = true; + else + return false; } if (SUJ->isSucc(SUI)) { @@ -1254,9 +1248,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { else if ((DepType == SDep::Order) && !I->hasOrderedMemoryRef() && !J->hasOrderedMemoryRef()) { - if (QRI->Subtarget.hasV4TOps() && - // hexagonv4 allows dual store. - MCIDI.mayStore() && MCIDJ.mayStore()) { + if (MCIDI.mayStore() && MCIDJ.mayStore()) { /* do nothing */ } // store followed by store-- not OK on V2 @@ -1278,11 +1270,10 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { // packetized in a same packet. This implies that the store is using // caller's SP. Hence, offset needs to be updated accordingly. else if (DepType == SDep::Data - && QRI->Subtarget.hasV4TOps() - && J->getOpcode() == Hexagon::ALLOCFRAME - && (I->getOpcode() == Hexagon::STrid - || I->getOpcode() == Hexagon::STriw - || I->getOpcode() == Hexagon::STrib) + && J->getOpcode() == Hexagon::S2_allocframe + && (I->getOpcode() == Hexagon::S2_storerd_io + || I->getOpcode() == Hexagon::S2_storeri_io + || I->getOpcode() == Hexagon::S2_storerb_io) && I->getOperand(0).getReg() == QRI->getStackRegister() && QII->isValidOffset(I->getOpcode(), I->getOperand(1).getImm() - diff --git a/lib/Target/Hexagon/HexagonVarargsCallingConvention.h b/lib/Target/Hexagon/HexagonVarargsCallingConvention.h deleted file mode 100644 index edbe29a..0000000 --- a/lib/Target/Hexagon/HexagonVarargsCallingConvention.h +++ /dev/null @@ -1,149 +0,0 @@ -//===-- HexagonVarargsCallingConvention.h - Calling Conventions -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the functions that assign locations to outgoing function -// arguments. Adapted from the target independent version but this handles -// calls to varargs functions -// -//===----------------------------------------------------------------------===// -// - - - - -static bool RetCC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT, - EVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, - Hexagon_CCState &State, - int NonVarArgsParams, - int CurrentParam, - bool ForceMem); - - -static bool CC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT, - EVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, - Hexagon_CCState &State, - int NonVarArgsParams, - int CurrentParam, - bool ForceMem) { - unsigned ByValSize = 0; - if (ArgFlags.isByVal() && - ((ByValSize = ArgFlags.getByValSize()) > - (MVT(MVT::i64).getSizeInBits() / 8))) { - ForceMem = true; - } - - - // Only assign registers for named (non-varargs) arguments - if ( !ForceMem && ((NonVarArgsParams == -1) || (CurrentParam <= - NonVarArgsParams))) { - - if (LocVT == MVT::i32 || - LocVT == MVT::i16 || - LocVT == MVT::i8 || - LocVT == MVT::f32) { - static const unsigned RegList1[] = { - Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, - Hexagon::R5 - }; - if (unsigned Reg = State.AllocateReg(RegList1, 6)) { - State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg, - LocVT.getSimpleVT(), LocInfo)); - return false; - } - } - - if (LocVT == MVT::i64 || - LocVT == MVT::f64) { - static const unsigned RegList2[] = { - Hexagon::D0, Hexagon::D1, Hexagon::D2 - }; - if (unsigned Reg = State.AllocateReg(RegList2, 3)) { - State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg, - LocVT.getSimpleVT(), LocInfo)); - return false; - } - } - } - - const Type* ArgTy = LocVT.getTypeForEVT(State.getContext()); - unsigned Alignment = State.getTarget() - .getSubtargetImpl() - ->getDataLayout() - ->getABITypeAlignment(ArgTy); - unsigned Size = - State.getTarget().getSubtargetImpl()->getDataLayout()->getTypeSizeInBits( - ArgTy) / - 8; - - // If it's passed by value, then we need the size of the aggregate not of - // the pointer. - if (ArgFlags.isByVal()) { - Size = ByValSize; - - // Hexagon_TODO: Get the alignment of the contained type here. - Alignment = 8; - } - - unsigned Offset3 = State.AllocateStack(Size, Alignment); - State.addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset3, - LocVT.getSimpleVT(), LocInfo)); - return false; -} - - -static bool RetCC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT, - EVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, - Hexagon_CCState &State, - int NonVarArgsParams, - int CurrentParam, - bool ForceMem) { - - if (LocVT == MVT::i32 || - LocVT == MVT::f32) { - static const unsigned RegList1[] = { - Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, - Hexagon::R5 - }; - if (unsigned Reg = State.AllocateReg(RegList1, 6)) { - State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg, - LocVT.getSimpleVT(), LocInfo)); - return false; - } - } - - if (LocVT == MVT::i64 || - LocVT == MVT::f64) { - static const unsigned RegList2[] = { - Hexagon::D0, Hexagon::D1, Hexagon::D2 - }; - if (unsigned Reg = State.AllocateReg(RegList2, 3)) { - State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg, - LocVT.getSimpleVT(), LocInfo)); - return false; - } - } - - const Type* ArgTy = LocVT.getTypeForEVT(State.getContext()); - unsigned Alignment = State.getTarget() - .getSubtargetImpl() - ->getDataLayout() - ->getABITypeAlignment(ArgTy); - unsigned Size = - State.getTarget().getSubtargetImpl()->getDataLayout()->getTypeSizeInBits( - ArgTy) / - 8; - - unsigned Offset3 = State.AllocateStack(Size, Alignment); - State.addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset3, - LocVT.getSimpleVT(), LocInfo)); - return false; -} diff --git a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt index 2a6124e..4c987ed 100644 --- a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt @@ -4,7 +4,7 @@ add_llvm_library(LLVMHexagonDesc HexagonInstPrinter.cpp HexagonMCAsmInfo.cpp HexagonMCCodeEmitter.cpp - HexagonMCInst.cpp + HexagonMCInstrInfo.cpp HexagonMCTargetDesc.cpp ) diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h index c0a3fae..8e02f79 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -19,7 +19,6 @@ #include "HexagonMCTargetDesc.h" #include "llvm/Support/ErrorHandling.h" - #include <stdint.h> namespace llvm { diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp index 1fd8d70..6c87c9f 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp @@ -14,7 +14,7 @@ #include "HexagonAsmPrinter.h" #include "Hexagon.h" #include "HexagonInstPrinter.h" -#include "MCTargetDesc/HexagonMCInst.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" #include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" @@ -77,46 +77,41 @@ StringRef HexagonInstPrinter::getRegName(unsigned RegNo) const { return getRegisterName(RegNo); } -void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { - printInst((const HexagonMCInst*)(MI), O, Annot); -} - -void HexagonInstPrinter::printInst(const HexagonMCInst *MI, raw_ostream &O, +void HexagonInstPrinter::printInst(MCInst const *MI, raw_ostream &O, StringRef Annot) { const char startPacket = '{', endPacket = '}'; // TODO: add outer HW loop when it's supported too. if (MI->getOpcode() == Hexagon::ENDLOOP0) { // Ending a harware loop is different from ending an regular packet. - assert(MI->isPacketEnd() && "Loop-end must also end the packet"); + assert(HexagonMCInstrInfo::isPacketEnd(*MI) && "Loop-end must also end the packet"); - if (MI->isPacketStart()) { + if (HexagonMCInstrInfo::isPacketBegin(*MI)) { // There must be a packet to end a loop. // FIXME: when shuffling is always run, this shouldn't be needed. - HexagonMCInst Nop; + MCInst Nop; StringRef NoAnnot; - Nop.setOpcode (Hexagon::NOP); - Nop.setPacketStart (MI->isPacketStart()); + Nop.setOpcode (Hexagon::A2_nop); + HexagonMCInstrInfo::setPacketBegin (Nop, HexagonMCInstrInfo::isPacketBegin(*MI)); printInst (&Nop, O, NoAnnot); } // Close the packet. - if (MI->isPacketEnd()) + if (HexagonMCInstrInfo::isPacketEnd(*MI)) O << PacketPadding << endPacket; printInstruction(MI, O); } else { // Prefix the insn opening the packet. - if (MI->isPacketStart()) + if (HexagonMCInstrInfo::isPacketBegin(*MI)) O << PacketPadding << startPacket << '\n'; printInstruction(MI, O); // Suffix the insn closing the packet. - if (MI->isPacketEnd()) + if (HexagonMCInstrInfo::isPacketEnd(*MI)) // Suffix the packet in a new line always, since the GNU assembler has // issues with a closing brace on the same line as CONST{32,64}. O << '\n' << PacketPadding << endPacket; diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h index 55ae95c..d02243b 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h @@ -18,17 +18,14 @@ #include "llvm/MC/MCInstrInfo.h" namespace llvm { - class HexagonMCInst; - class HexagonInstPrinter : public MCInstPrinter { public: - explicit HexagonInstPrinter(const MCAsmInfo &MAI, - const MCInstrInfo &MII, - const MCRegisterInfo &MRI) + explicit HexagonInstPrinter(MCAsmInfo const &MAI, + MCInstrInfo const &MII, + MCRegisterInfo const &MRI) : MCInstPrinter(MAI, MII, MRI), MII(MII) {} - void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; - void printInst(const HexagonMCInst *MI, raw_ostream &O, StringRef Annot); + void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot) override; virtual StringRef getOpcodeName(unsigned Opcode) const; void printInstruction(const MCInst *MI, raw_ostream &O); StringRef getRegName(unsigned RegNo) const; diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp index 4471977..a5a09ba 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp @@ -10,8 +10,8 @@ #include "Hexagon.h" #include "MCTargetDesc/HexagonBaseInfo.h" #include "MCTargetDesc/HexagonMCCodeEmitter.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" #include "MCTargetDesc/HexagonMCTargetDesc.h" -#include "MCTargetDesc/HexagonMCInst.h" #include "llvm/ADT/Statistic.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" @@ -35,9 +35,9 @@ namespace { /// Possible values for instruction packet parse field. enum class ParseField { duplex = 0x0, last0 = 0x1, last1 = 0x2, end = 0x3 }; /// \brief Returns the packet bits based on instruction position. -uint32_t getPacketBits(HexagonMCInst const &HMI) { +uint32_t getPacketBits(MCInst const &HMI) { unsigned const ParseFieldOffset = 14; - ParseField Field = HMI.isPacketEnd() ? ParseField::end : ParseField::last0; + ParseField Field = HexagonMCInstrInfo::isPacketEnd(HMI) ? ParseField::end : ParseField::last0; return static_cast <uint32_t> (Field) << ParseFieldOffset; } void emitLittleEndian(uint64_t Binary, raw_ostream &OS) { @@ -51,14 +51,15 @@ void emitLittleEndian(uint64_t Binary, raw_ostream &OS) { HexagonMCCodeEmitter::HexagonMCCodeEmitter(MCInstrInfo const &aMII, MCSubtargetInfo const &aMST, MCContext &aMCT) - : MST(aMST), MCT(aMCT) {} + : MST(aMST), MCT(aMCT), MCII (aMII) {} void HexagonMCCodeEmitter::EncodeInstruction(MCInst const &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, MCSubtargetInfo const &STI) const { - HexagonMCInst const &HMB = static_cast<HexagonMCInst const &>(MI); - uint64_t Binary = getBinaryCodeForInstr(HMB, Fixups, STI) | getPacketBits(HMB); - assert(HMB.getDesc().getSize() == 4 && "All instructions should be 32bit"); + uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI) | getPacketBits(MI); + assert(HexagonMCInstrInfo::getDesc(MCII, MI).getSize() == 4 && + "All instructions should be 32bit"); + (void)&MCII; emitLittleEndian(Binary, OS); ++MCNumEmitted; } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h index 96048ad..db1d707 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h @@ -28,6 +28,7 @@ namespace llvm { class HexagonMCCodeEmitter : public MCCodeEmitter { MCSubtargetInfo const &MST; MCContext &MCT; + MCInstrInfo const &MCII; public: HexagonMCCodeEmitter(MCInstrInfo const &aMII, MCSubtargetInfo const &aMST, @@ -51,8 +52,8 @@ public: MCSubtargetInfo const &STI) const; private: - HexagonMCCodeEmitter(HexagonMCCodeEmitter const &) LLVM_DELETED_FUNCTION; - void operator=(HexagonMCCodeEmitter const &) LLVM_DELETED_FUNCTION; + HexagonMCCodeEmitter(HexagonMCCodeEmitter const &) = delete; + void operator=(HexagonMCCodeEmitter const &) = delete; }; // class HexagonMCCodeEmitter } // namespace llvm diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp deleted file mode 100644 index c842b9b..0000000 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp +++ /dev/null @@ -1,176 +0,0 @@ -//===- HexagonMCInst.cpp - Hexagon sub-class of MCInst --------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class extends MCInst to allow some Hexagon VLIW annotations. -// -//===----------------------------------------------------------------------===// - -#include "HexagonInstrInfo.h" -#include "MCTargetDesc/HexagonBaseInfo.h" -#include "MCTargetDesc/HexagonMCInst.h" -#include "MCTargetDesc/HexagonMCTargetDesc.h" - -using namespace llvm; - -// Return the slots used by the insn. -unsigned HexagonMCInst::getUnits(const HexagonTargetMachine* TM) const { - const HexagonInstrInfo *QII = TM->getSubtargetImpl()->getInstrInfo(); - const InstrItineraryData *II = - TM->getSubtargetImpl()->getInstrItineraryData(); - const InstrStage* - IS = II->beginStage(QII->get(this->getOpcode()).getSchedClass()); - - return (IS->getUnits()); -} - -// Return the Hexagon ISA class for the insn. -unsigned HexagonMCInst::getType() const { - const uint64_t F = MCID->TSFlags; - - return ((F >> HexagonII::TypePos) & HexagonII::TypeMask); -} - -// Return whether the insn is an actual insn. -bool HexagonMCInst::isCanon() const { - return (!MCID->isPseudo() && - !isPrefix() && - getType() != HexagonII::TypeENDLOOP); -} - -// Return whether the insn is a prefix. -bool HexagonMCInst::isPrefix() const { - return (getType() == HexagonII::TypePREFIX); -} - -// Return whether the insn is solo, i.e., cannot be in a packet. -bool HexagonMCInst::isSolo() const { - const uint64_t F = MCID->TSFlags; - return ((F >> HexagonII::SoloPos) & HexagonII::SoloMask); -} - -// Return whether the insn is a new-value consumer. -bool HexagonMCInst::isNewValue() const { - const uint64_t F = MCID->TSFlags; - return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask); -} - -// Return whether the instruction is a legal new-value producer. -bool HexagonMCInst::hasNewValue() const { - const uint64_t F = MCID->TSFlags; - return ((F >> HexagonII::hasNewValuePos) & HexagonII::hasNewValueMask); -} - -// Return the operand that consumes or produces a new value. -const MCOperand& HexagonMCInst::getNewValue() const { - const uint64_t F = MCID->TSFlags; - const unsigned O = (F >> HexagonII::NewValueOpPos) & - HexagonII::NewValueOpMask; - const MCOperand& MCO = getOperand(O); - - assert ((isNewValue() || hasNewValue()) && MCO.isReg()); - return (MCO); -} - -// Return whether the instruction needs to be constant extended. -// 1) Always return true if the instruction has 'isExtended' flag set. -// -// isExtendable: -// 2) For immediate extended operands, return true only if the value is -// out-of-range. -// 3) For global address, always return true. - -bool HexagonMCInst::isConstExtended(void) const { - if (isExtended()) - return true; - - if (!isExtendable()) - return false; - - short ExtOpNum = getCExtOpNum(); - int MinValue = getMinValue(); - int MaxValue = getMaxValue(); - const MCOperand& MO = getOperand(ExtOpNum); - - // We could be using an instruction with an extendable immediate and shoehorn - // a global address into it. If it is a global address it will be constant - // extended. We do this for COMBINE. - // We currently only handle isGlobal() because it is the only kind of - // object we are going to end up with here for now. - // In the future we probably should add isSymbol(), etc. - if (MO.isExpr()) - return true; - - // If the extendable operand is not 'Immediate' type, the instruction should - // have 'isExtended' flag set. - assert(MO.isImm() && "Extendable operand must be Immediate type"); - - int ImmValue = MO.getImm(); - return (ImmValue < MinValue || ImmValue > MaxValue); -} - -// Return whether the instruction must be always extended. -bool HexagonMCInst::isExtended(void) const { - const uint64_t F = MCID->TSFlags; - return (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask; -} - -// Return true if the instruction may be extended based on the operand value. -bool HexagonMCInst::isExtendable(void) const { - const uint64_t F = MCID->TSFlags; - return (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask; -} - -// Return number of bits in the constant extended operand. -unsigned HexagonMCInst::getBitCount(void) const { - const uint64_t F = MCID->TSFlags; - return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask); -} - -// Return constant extended operand number. -unsigned short HexagonMCInst::getCExtOpNum(void) const { - const uint64_t F = MCID->TSFlags; - return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask); -} - -// Return whether the operand can be constant extended. -bool HexagonMCInst::isOperandExtended(const unsigned short OperandNum) const { - const uint64_t F = MCID->TSFlags; - return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask) - == OperandNum; -} - -// Return the min value that a constant extendable operand can have -// without being extended. -int HexagonMCInst::getMinValue(void) const { - const uint64_t F = MCID->TSFlags; - unsigned isSigned = (F >> HexagonII::ExtentSignedPos) - & HexagonII::ExtentSignedMask; - unsigned bits = (F >> HexagonII::ExtentBitsPos) - & HexagonII::ExtentBitsMask; - - if (isSigned) // if value is signed - return -1U << (bits - 1); - else - return 0; -} - -// Return the max value that a constant extendable operand can have -// without being extended. -int HexagonMCInst::getMaxValue(void) const { - const uint64_t F = MCID->TSFlags; - unsigned isSigned = (F >> HexagonII::ExtentSignedPos) - & HexagonII::ExtentSignedMask; - unsigned bits = (F >> HexagonII::ExtentBitsPos) - & HexagonII::ExtentBitsMask; - - if (isSigned) // if value is signed - return ~(-1U << (bits - 1)); - else - return ~(-1U << bits); -} diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h deleted file mode 100644 index 90fbbf3..0000000 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h +++ /dev/null @@ -1,100 +0,0 @@ -//===- HexagonMCInst.h - Hexagon sub-class of MCInst ----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class extends MCInst to allow some VLIW annotations. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINST_H -#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINST_H - -#include "HexagonTargetMachine.h" -#include "llvm/MC/MCInst.h" - -namespace llvm { - class MCOperand; - - class HexagonMCInst: public MCInst { - // MCID is set during instruction lowering. - // It is needed in order to access TSFlags for - // use in checking MC instruction properties. - const MCInstrDesc *MCID; - - // Packet start and end markers - unsigned packetStart: 1, packetEnd: 1; - - public: - explicit HexagonMCInst(): - MCInst(), MCID(nullptr), packetStart(0), packetEnd(0) {}; - HexagonMCInst(const MCInstrDesc& mcid): - MCInst(), MCID(&mcid), packetStart(0), packetEnd(0) {}; - - bool isPacketStart() const { return (packetStart); }; - bool isPacketEnd() const { return (packetEnd); }; - void setPacketStart(bool Y) { packetStart = Y; }; - void setPacketEnd(bool Y) { packetEnd = Y; }; - void resetPacket() { setPacketStart(false); setPacketEnd(false); }; - - // Return the slots used by the insn. - unsigned getUnits(const HexagonTargetMachine* TM) const; - - // Return the Hexagon ISA class for the insn. - unsigned getType() const; - - void setDesc(const MCInstrDesc& mcid) { MCID = &mcid; }; - const MCInstrDesc& getDesc(void) const { return *MCID; }; - - // Return whether the insn is an actual insn. - bool isCanon() const; - - // Return whether the insn is a prefix. - bool isPrefix() const; - - // Return whether the insn is solo, i.e., cannot be in a packet. - bool isSolo() const; - - // Return whether the instruction needs to be constant extended. - bool isConstExtended() const; - - // Return constant extended operand number. - unsigned short getCExtOpNum(void) const; - - // Return whether the insn is a new-value consumer. - bool isNewValue() const; - - // Return whether the instruction is a legal new-value producer. - bool hasNewValue() const; - - // Return the operand that consumes or produces a new value. - const MCOperand& getNewValue() const; - - // Return number of bits in the constant extended operand. - unsigned getBitCount(void) const; - - private: - // Return whether the instruction must be always extended. - bool isExtended() const; - - // Return true if the insn may be extended based on the operand value. - bool isExtendable() const; - - // Return true if the operand can be constant extended. - bool isOperandExtended(const unsigned short OperandNum) const; - - // Return the min value that a constant extendable operand can have - // without being extended. - int getMinValue() const; - - // Return the max value that a constant extendable operand can have - // without being extended. - int getMaxValue() const; - }; -} - -#endif diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp new file mode 100644 index 0000000..33e7c81 --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp @@ -0,0 +1,223 @@ +//===- HexagonMCInstrInfo.cpp - Hexagon sub-class of MCInst ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class extends MCInstrInfo to allow Hexagon specific MCInstr queries +// +//===----------------------------------------------------------------------===// + +#include "HexagonMCInstrInfo.h" +#include "HexagonBaseInfo.h" + +namespace llvm { +void HexagonMCInstrInfo::AppendImplicitOperands(MCInst &MCI) { + MCI.addOperand(MCOperand::CreateImm(0)); + MCI.addOperand(MCOperand::CreateInst(nullptr)); +} + +unsigned HexagonMCInstrInfo::getBitCount(MCInstrInfo const &MCII, + MCInst const &MCI) { + uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask); +} + +// Return constant extended operand number. +unsigned short HexagonMCInstrInfo::getCExtOpNum(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask); +} + +MCInstrDesc const &HexagonMCInstrInfo::getDesc(MCInstrInfo const &MCII, + MCInst const &MCI) { + return (MCII.get(MCI.getOpcode())); +} + +std::bitset<16> HexagonMCInstrInfo::GetImplicitBits(MCInst const &MCI) { + SanityCheckImplicitOperands(MCI); + std::bitset<16> Bits(MCI.getOperand(MCI.getNumOperands() - 2).getImm()); + return Bits; +} + +// Return the max value that a constant extendable operand can have +// without being extended. +int HexagonMCInstrInfo::getMaxValue(MCInstrInfo const &MCII, + MCInst const &MCI) { + uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + unsigned isSigned = + (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask; + unsigned bits = (F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask; + + if (isSigned) // if value is signed + return ~(-1U << (bits - 1)); + else + return ~(-1U << bits); +} + +// Return the min value that a constant extendable operand can have +// without being extended. +int HexagonMCInstrInfo::getMinValue(MCInstrInfo const &MCII, + MCInst const &MCI) { + uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + unsigned isSigned = + (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask; + unsigned bits = (F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask; + + if (isSigned) // if value is signed + return -1U << (bits - 1); + else + return 0; +} + +// Return the operand that consumes or produces a new value. +MCOperand const &HexagonMCInstrInfo::getNewValue(MCInstrInfo const &MCII, + MCInst const &MCI) { + uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + unsigned const O = + (F >> HexagonII::NewValueOpPos) & HexagonII::NewValueOpMask; + MCOperand const &MCO = MCI.getOperand(O); + + assert((HexagonMCInstrInfo::isNewValue(MCII, MCI) || + HexagonMCInstrInfo::hasNewValue(MCII, MCI)) && + MCO.isReg()); + return (MCO); +} + +// Return the Hexagon ISA class for the insn. +unsigned HexagonMCInstrInfo::getType(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + + return ((F >> HexagonII::TypePos) & HexagonII::TypeMask); +} + +// Return whether the instruction is a legal new-value producer. +bool HexagonMCInstrInfo::hasNewValue(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::hasNewValuePos) & HexagonII::hasNewValueMask); +} + +// Return whether the insn is an actual insn. +bool HexagonMCInstrInfo::isCanon(MCInstrInfo const &MCII, MCInst const &MCI) { + return (!HexagonMCInstrInfo::getDesc(MCII, MCI).isPseudo() && + !HexagonMCInstrInfo::isPrefix(MCII, MCI) && + HexagonMCInstrInfo::getType(MCII, MCI) != HexagonII::TypeENDLOOP); +} + +// Return whether the instruction needs to be constant extended. +// 1) Always return true if the instruction has 'isExtended' flag set. +// +// isExtendable: +// 2) For immediate extended operands, return true only if the value is +// out-of-range. +// 3) For global address, always return true. + +bool HexagonMCInstrInfo::isConstExtended(MCInstrInfo const &MCII, + MCInst const &MCI) { + if (HexagonMCInstrInfo::isExtended(MCII, MCI)) + return true; + + if (!HexagonMCInstrInfo::isExtendable(MCII, MCI)) + return false; + + short ExtOpNum = HexagonMCInstrInfo::getCExtOpNum(MCII, MCI); + int MinValue = HexagonMCInstrInfo::getMinValue(MCII, MCI); + int MaxValue = HexagonMCInstrInfo::getMaxValue(MCII, MCI); + MCOperand const &MO = MCI.getOperand(ExtOpNum); + + // We could be using an instruction with an extendable immediate and shoehorn + // a global address into it. If it is a global address it will be constant + // extended. We do this for COMBINE. + // We currently only handle isGlobal() because it is the only kind of + // object we are going to end up with here for now. + // In the future we probably should add isSymbol(), etc. + if (MO.isExpr()) + return true; + + // If the extendable operand is not 'Immediate' type, the instruction should + // have 'isExtended' flag set. + assert(MO.isImm() && "Extendable operand must be Immediate type"); + + int ImmValue = MO.getImm(); + return (ImmValue < MinValue || ImmValue > MaxValue); +} + +// Return true if the instruction may be extended based on the operand value. +bool HexagonMCInstrInfo::isExtendable(MCInstrInfo const &MCII, + MCInst const &MCI) { + uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask; +} + +// Return whether the instruction must be always extended. +bool HexagonMCInstrInfo::isExtended(MCInstrInfo const &MCII, + MCInst const &MCI) { + uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask; +} + +// Return whether the insn is a new-value consumer. +bool HexagonMCInstrInfo::isNewValue(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask); +} + +// Return whether the operand can be constant extended. +bool HexagonMCInstrInfo::isOperandExtended(MCInstrInfo const &MCII, + MCInst const &MCI, + unsigned short OperandNum) { + uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask) == + OperandNum; +} + +bool HexagonMCInstrInfo::isPacketBegin(MCInst const &MCI) { + std::bitset<16> Bits(GetImplicitBits(MCI)); + return Bits.test(packetBeginIndex); +} + +bool HexagonMCInstrInfo::isPacketEnd(MCInst const &MCI) { + std::bitset<16> Bits(GetImplicitBits(MCI)); + return Bits.test(packetEndIndex); +} + +// Return whether the insn is a prefix. +bool HexagonMCInstrInfo::isPrefix(MCInstrInfo const &MCII, MCInst const &MCI) { + return (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypePREFIX); +} + +// Return whether the insn is solo, i.e., cannot be in a packet. +bool HexagonMCInstrInfo::isSolo(MCInstrInfo const &MCII, MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::SoloPos) & HexagonII::SoloMask); +} + +void HexagonMCInstrInfo::resetPacket(MCInst &MCI) { + setPacketBegin(MCI, false); + setPacketEnd(MCI, false); +} + +void HexagonMCInstrInfo::SetImplicitBits(MCInst &MCI, std::bitset<16> Bits) { + SanityCheckImplicitOperands(MCI); + MCI.getOperand(MCI.getNumOperands() - 2).setImm(Bits.to_ulong()); +} + +void HexagonMCInstrInfo::setPacketBegin(MCInst &MCI, bool f) { + std::bitset<16> Bits(GetImplicitBits(MCI)); + Bits.set(packetBeginIndex, f); + SetImplicitBits(MCI, Bits); +} + +void HexagonMCInstrInfo::setPacketEnd(MCInst &MCI, bool f) { + std::bitset<16> Bits(GetImplicitBits(MCI)); + Bits.set(packetEndIndex, f); + SetImplicitBits(MCI, Bits); +} +} diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h new file mode 100644 index 0000000..10fc0f3 --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h @@ -0,0 +1,106 @@ +//===- HexagonMCInstrInfo.cpp - Hexagon sub-class of MCInst ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Utility functions for Hexagon specific MCInst queries +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H +#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H + +#include "llvm/MC/MCInstrInfo.h" + +#include <bitset> + +namespace llvm { +class MCInstrDesc; +class MCInstrInfo; +class MCInst; +class MCOperand; +namespace HexagonMCInstrInfo { +void AppendImplicitOperands(MCInst &MCI); + +// Return number of bits in the constant extended operand. +unsigned getBitCount(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return constant extended operand number. +unsigned short getCExtOpNum(MCInstrInfo const &MCII, MCInst const &MCI); + +MCInstrDesc const &getDesc(MCInstrInfo const &MCII, MCInst const &MCI); + +std::bitset<16> GetImplicitBits(MCInst const &MCI); + +// Return the max value that a constant extendable operand can have +// without being extended. +int getMaxValue(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return the min value that a constant extendable operand can have +// without being extended. +int getMinValue(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return the operand that consumes or produces a new value. +MCOperand const &getNewValue(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return the Hexagon ISA class for the insn. +unsigned getType(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return whether the instruction is a legal new-value producer. +bool hasNewValue(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return whether the insn is an actual insn. +bool isCanon(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return whether the instruction needs to be constant extended. +bool isConstExtended(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return true if the insn may be extended based on the operand value. +bool isExtendable(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return whether the instruction must be always extended. +bool isExtended(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return whether the insn is a new-value consumer. +bool isNewValue(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return true if the operand can be constant extended. +bool isOperandExtended(MCInstrInfo const &MCII, MCInst const &MCI, + unsigned short OperandNum); + +bool isPacketBegin(MCInst const &MCI); + +bool isPacketEnd(MCInst const &MCI); + +// Return whether the insn is a prefix. +bool isPrefix(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return whether the insn is solo, i.e., cannot be in a packet. +bool isSolo(MCInstrInfo const &MCII, MCInst const &MCI); + +static const size_t packetBeginIndex = 0; +static const size_t packetEndIndex = 1; + +void resetPacket(MCInst &MCI); + +inline void SanityCheckImplicitOperands(MCInst const &MCI) { + assert(MCI.getNumOperands() >= 2 && "At least the two implicit operands"); + assert(MCI.getOperand(MCI.getNumOperands() - 1).isInst() && + "Implicit bits and flags"); + assert(MCI.getOperand(MCI.getNumOperands() - 2).isImm() && + "Parent pointer"); +} + +void SetImplicitBits(MCInst &MCI, std::bitset<16> Bits); + +void setPacketBegin(MCInst &MCI, bool Y); + +void setPacketEnd(MCInst &MCI, bool Y); +} +} + +#endif // LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index 14ddd9d..09a305b 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -35,7 +35,7 @@ using namespace llvm; #define GET_REGINFO_MC_DESC #include "HexagonGenRegisterInfo.inc" -static MCInstrInfo *createHexagonMCInstrInfo() { +MCInstrInfo *llvm::createHexagonMCInstrInfo() { MCInstrInfo *X = new MCInstrInfo(); InitHexagonMCInstrInfo(X); return X; diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h index 02fd516..f074b65 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h @@ -30,6 +30,8 @@ class raw_ostream; extern Target TheHexagonTarget; +MCInstrInfo *createHexagonMCInstrInfo(); + MCCodeEmitter *createHexagonMCCodeEmitter(MCInstrInfo const &MCII, MCRegisterInfo const &MRI, MCSubtargetInfo const &MST, |