diff options
Diffstat (limited to 'lib/Target/Hexagon')
37 files changed, 3053 insertions, 2234 deletions
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt index eaa8bef..c6ffb96 100644 --- a/lib/Target/Hexagon/CMakeLists.txt +++ b/lib/Target/Hexagon/CMakeLists.txt @@ -31,7 +31,6 @@ add_llvm_target(HexagonCodeGen HexagonRemoveSZExtArgs.cpp HexagonSelectionDAGInfo.cpp HexagonSplitConst32AndConst64.cpp - HexagonSplitTFRCondSets.cpp HexagonSubtarget.cpp HexagonTargetMachine.cpp HexagonTargetObjectFile.cpp diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h index e0a3b2f..dfe79f9 100644 --- a/lib/Target/Hexagon/Hexagon.h +++ b/lib/Target/Hexagon/Hexagon.h @@ -36,7 +36,6 @@ namespace llvm { FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM); FunctionPass *createHexagonCFGOptimizer(); - FunctionPass *createHexagonSplitTFRCondSets(); FunctionPass *createHexagonSplitConst32AndConst64(); FunctionPass *createHexagonExpandPredSpillCode(); FunctionPass *createHexagonHardwareLoops(); diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td index f892c9f..53a687c 100644 --- a/lib/Target/Hexagon/Hexagon.td +++ b/lib/Target/Hexagon/Hexagon.td @@ -28,10 +28,10 @@ def ArchV5: SubtargetFeature<"v5", "HexagonArchVersion", "V5", "Hexagon V5">; //===----------------------------------------------------------------------===// // Hexagon Instruction Predicate Definitions. //===----------------------------------------------------------------------===// -def HasV5T : Predicate<"Subtarget->hasV5TOps()">; -def NoV5T : Predicate<"!Subtarget->hasV5TOps()">; -def UseMEMOP : Predicate<"Subtarget->useMemOps()">; -def IEEERndNearV5T : Predicate<"Subtarget->modeIEEERndNear()">; +def HasV5T : Predicate<"HST->hasV5TOps()">; +def NoV5T : Predicate<"!HST->hasV5TOps()">; +def UseMEMOP : Predicate<"HST->useMemOps()">; +def IEEERndNearV5T : Predicate<"HST->modeIEEERndNear()">; //===----------------------------------------------------------------------===// // Classes used for relation maps. @@ -168,14 +168,6 @@ def getRegForm : InstrMapping { let ValueCols = [["reg"]]; } -def getRegShlForm : InstrMapping { - let FilterClass = "ImmRegShl"; - let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"]; - let ColFields = ["InputType"]; - let KeyCol = ["imm"]; - let ValueCols = [["reg"]]; -} - //===----------------------------------------------------------------------===// // Register File, Calling Conv, Instruction Descriptions //===----------------------------------------------------------------------===// diff --git a/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/lib/Target/Hexagon/HexagonCopyToCombine.cpp index dd193f9..5a26045 100644 --- a/lib/Target/Hexagon/HexagonCopyToCombine.cpp +++ b/lib/Target/Hexagon/HexagonCopyToCombine.cpp @@ -127,12 +127,21 @@ static bool isCombinableInstType(MachineInstr *MI, case Hexagon::A2_tfrsi: { // A transfer-immediate can be combined if its argument is a signed 8bit // value. - assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); - unsigned DestReg = MI->getOperand(0).getReg(); + const MachineOperand &Op0 = MI->getOperand(0); + const MachineOperand &Op1 = MI->getOperand(1); + assert(Op0.isReg()); + + unsigned DestReg = Op0.getReg(); + // Ensure that TargetFlags are MO_NO_FLAG for a global. This is a + // workaround for an ABI bug that prevents GOT relocations on combine + // instructions + if (!Op1.isImm() && Op1.getTargetFlags() != HexagonII::MO_NO_FLAG) + return false; - // Only combine constant extended TFRI if we are in aggressive mode. + // Only combine constant extended A2_tfrsi if we are in aggressive mode. + bool NotExt = Op1.isImm() && isInt<8>(Op1.getImm()); return Hexagon::IntRegsRegClass.contains(DestReg) && - (ShouldCombineAggressively || isInt<8>(MI->getOperand(1).getImm())); + (ShouldCombineAggressively || NotExt); } case Hexagon::TFRI_V4: { diff --git a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp index 8176598..40059fb 100644 --- a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp +++ b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp @@ -79,7 +79,166 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) { ++MII) { MachineInstr *MI = MII; int Opc = MI->getOpcode(); - if (Opc == Hexagon::STriw_pred) { + if (Opc == Hexagon::S2_storerb_pci_pseudo || + Opc == Hexagon::S2_storerh_pci_pseudo || + Opc == Hexagon::S2_storeri_pci_pseudo || + Opc == Hexagon::S2_storerd_pci_pseudo || + Opc == Hexagon::S2_storerf_pci_pseudo) { + unsigned Opcode; + if (Opc == Hexagon::S2_storerd_pci_pseudo) + Opcode = Hexagon::S2_storerd_pci; + else if (Opc == Hexagon::S2_storeri_pci_pseudo) + Opcode = Hexagon::S2_storeri_pci; + else if (Opc == Hexagon::S2_storerh_pci_pseudo) + Opcode = Hexagon::S2_storerh_pci; + else if (Opc == Hexagon::S2_storerf_pci_pseudo) + Opcode = Hexagon::S2_storerf_pci; + else if (Opc == Hexagon::S2_storerb_pci_pseudo) + Opcode = Hexagon::S2_storerb_pci; + else + llvm_unreachable("wrong Opc"); + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + MachineOperand &Op2 = MI->getOperand(2); + MachineOperand &Op3 = MI->getOperand(3); // Modifier value. + MachineOperand &Op4 = MI->getOperand(4); + // Emit a "C6 = Rn, C6 is the control register for M0". + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr), + Hexagon::C6)->addOperand(Op3); + // Replace the pseude circ_ldd by the real circ_ldd. + MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Opcode)); + NewMI->addOperand(Op0); + NewMI->addOperand(Op1); + NewMI->addOperand(Op4); + NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0, + false, /*isDef*/ + false, /*isImpl*/ + true /*isKill*/)); + NewMI->addOperand(Op2); + MII = MBB->erase(MI); + --MII; + } else if (Opc == Hexagon::L2_loadrd_pci_pseudo || + Opc == Hexagon::L2_loadri_pci_pseudo || + Opc == Hexagon::L2_loadrh_pci_pseudo || + Opc == Hexagon::L2_loadruh_pci_pseudo|| + Opc == Hexagon::L2_loadrb_pci_pseudo || + Opc == Hexagon::L2_loadrub_pci_pseudo) { + unsigned Opcode; + if (Opc == Hexagon::L2_loadrd_pci_pseudo) + Opcode = Hexagon::L2_loadrd_pci; + else if (Opc == Hexagon::L2_loadri_pci_pseudo) + Opcode = Hexagon::L2_loadri_pci; + else if (Opc == Hexagon::L2_loadrh_pci_pseudo) + Opcode = Hexagon::L2_loadrh_pci; + else if (Opc == Hexagon::L2_loadruh_pci_pseudo) + Opcode = Hexagon::L2_loadruh_pci; + else if (Opc == Hexagon::L2_loadrb_pci_pseudo) + Opcode = Hexagon::L2_loadrb_pci; + else if (Opc == Hexagon::L2_loadrub_pci_pseudo) + Opcode = Hexagon::L2_loadrub_pci; + else + llvm_unreachable("wrong Opc"); + + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + MachineOperand &Op2 = MI->getOperand(2); + MachineOperand &Op4 = MI->getOperand(4); // Modifier value. + MachineOperand &Op5 = MI->getOperand(5); + // Emit a "C6 = Rn, C6 is the control register for M0". + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr), + Hexagon::C6)->addOperand(Op4); + // Replace the pseude circ_ldd by the real circ_ldd. + MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Opcode)); + NewMI->addOperand(Op1); + NewMI->addOperand(Op0); + NewMI->addOperand(Op2); + NewMI->addOperand(Op5); + NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0, + false, /*isDef*/ + false, /*isImpl*/ + true /*isKill*/)); + MII = MBB->erase(MI); + --MII; + } else if (Opc == Hexagon::L2_loadrd_pbr_pseudo || + Opc == Hexagon::L2_loadri_pbr_pseudo || + Opc == Hexagon::L2_loadrh_pbr_pseudo || + Opc == Hexagon::L2_loadruh_pbr_pseudo|| + Opc == Hexagon::L2_loadrb_pbr_pseudo || + Opc == Hexagon::L2_loadrub_pbr_pseudo) { + unsigned Opcode; + if (Opc == Hexagon::L2_loadrd_pbr_pseudo) + Opcode = Hexagon::L2_loadrd_pbr; + else if (Opc == Hexagon::L2_loadri_pbr_pseudo) + Opcode = Hexagon::L2_loadri_pbr; + else if (Opc == Hexagon::L2_loadrh_pbr_pseudo) + Opcode = Hexagon::L2_loadrh_pbr; + else if (Opc == Hexagon::L2_loadruh_pbr_pseudo) + Opcode = Hexagon::L2_loadruh_pbr; + else if (Opc == Hexagon::L2_loadrb_pbr_pseudo) + Opcode = Hexagon::L2_loadrb_pbr; + else if (Opc == Hexagon::L2_loadrub_pbr_pseudo) + Opcode = Hexagon::L2_loadrub_pbr; + else + llvm_unreachable("wrong Opc"); + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + MachineOperand &Op2 = MI->getOperand(2); + MachineOperand &Op4 = MI->getOperand(4); // Modifier value. + // Emit a "C6 = Rn, C6 is the control register for M0". + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr), + Hexagon::C6)->addOperand(Op4); + // Replace the pseudo brev_ldd by the real brev_ldd. + MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Opcode)); + NewMI->addOperand(Op1); + NewMI->addOperand(Op0); + NewMI->addOperand(Op2); + NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0, + false, /*isDef*/ + false, /*isImpl*/ + true /*isKill*/)); + MII = MBB->erase(MI); + --MII; + } else if (Opc == Hexagon::S2_storerd_pbr_pseudo || + Opc == Hexagon::S2_storeri_pbr_pseudo || + Opc == Hexagon::S2_storerh_pbr_pseudo || + Opc == Hexagon::S2_storerb_pbr_pseudo || + Opc == Hexagon::S2_storerf_pbr_pseudo) { + unsigned Opcode; + if (Opc == Hexagon::S2_storerd_pbr_pseudo) + Opcode = Hexagon::S2_storerd_pbr; + else if (Opc == Hexagon::S2_storeri_pbr_pseudo) + Opcode = Hexagon::S2_storeri_pbr; + else if (Opc == Hexagon::S2_storerh_pbr_pseudo) + Opcode = Hexagon::S2_storerh_pbr; + else if (Opc == Hexagon::S2_storerf_pbr_pseudo) + Opcode = Hexagon::S2_storerf_pbr; + else if (Opc == Hexagon::S2_storerb_pbr_pseudo) + Opcode = Hexagon::S2_storerb_pbr; + else + llvm_unreachable("wrong Opc"); + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + MachineOperand &Op2 = MI->getOperand(2); + MachineOperand &Op3 = MI->getOperand(3); // Modifier value. + // Emit a "C6 = Rn, C6 is the control register for M0". + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr), + Hexagon::C6)->addOperand(Op3); + // Replace the pseudo brev_ldd by the real brev_ldd. + MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Opcode)); + NewMI->addOperand(Op0); + NewMI->addOperand(Op1); + NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0, + false, /*isDef*/ + false, /*isImpl*/ + true /*isKill*/)); + NewMI->addOperand(Op2); + MII = MBB->erase(MI); + --MII; + } else if (Opc == Hexagon::STriw_pred) { // STriw_pred [R30], ofst, SrcReg; unsigned FP = MI->getOperand(0).getReg(); assert(FP == QST.getRegisterInfo()->getFrameRegister() && diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index 2b1992f..65d689b 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -140,7 +140,7 @@ bool HexagonFrameLowering::hasTailCall(MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); unsigned RetOpcode = MBBI->getOpcode(); - return RetOpcode == Hexagon::TCRETURNtg || RetOpcode == Hexagon::TCRETURNtext; + return RetOpcode == Hexagon::TCRETURNi || RetOpcode == Hexagon::TCRETURNr; } void HexagonFrameLowering::emitEpilogue(MachineFunction &MF, diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp index 1577c33..c47ee9c 100644 --- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -690,7 +690,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, // If the induction variable bump is not a power of 2, quit. // Othwerise we'd need a general integer division. - if (!isPowerOf2_64(abs64(IVBump))) + if (!isPowerOf2_64(std::abs(IVBump))) return nullptr; MachineBasicBlock *PH = Loop->getLoopPreheader(); diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index fb056b5..aaccac8 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -45,37 +45,25 @@ namespace llvm { /// namespace { class HexagonDAGToDAGISel : public SelectionDAGISel { - /// Subtarget - Keep a pointer to the Hexagon Subtarget around so that we can - /// make the right decision when generating code for different targets. - const HexagonSubtarget *Subtarget; - - // Keep a reference to HexagonTargetMachine. - const HexagonTargetMachine& TM; - DenseMap<const GlobalValue *, unsigned> GlobalAddressUseCountMap; + const HexagonTargetMachine& HTM; + const HexagonSubtarget *HST; public: - explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine, + explicit HexagonDAGToDAGISel(HexagonTargetMachine &tm, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(targetmachine, OptLevel), TM(targetmachine) { + : SelectionDAGISel(tm, OptLevel), HTM(tm) { initializeHexagonDAGToDAGISelPass(*PassRegistry::getPassRegistry()); } - bool hasNumUsesBelowThresGA(SDNode *N) const; - SDNode *Select(SDNode *N) override; + bool runOnMachineFunction(MachineFunction &MF) override { + // Reset the subtarget each time through. + HST = &MF.getSubtarget<HexagonSubtarget>(); + SelectionDAGISel::runOnMachineFunction(MF); + return true; + } - // Complex Pattern Selectors. - inline bool foldGlobalAddress(SDValue &N, SDValue &R); - inline bool foldGlobalAddressGP(SDValue &N, SDValue &R); - bool foldGlobalAddressImpl(SDValue &N, SDValue &R, bool ShouldLookForGP); - bool SelectADDRri(SDValue& N, SDValue &R1, SDValue &R2); - bool SelectADDRriS11_0(SDValue& N, SDValue &R1, SDValue &R2); - bool SelectADDRriS11_1(SDValue& N, SDValue &R1, SDValue &R2); - bool SelectADDRriS11_2(SDValue& N, SDValue &R1, SDValue &R2); - bool SelectMEMriS11_2(SDValue& Addr, SDValue &Base, SDValue &Offset); - bool SelectADDRriS11_3(SDValue& N, SDValue &R1, SDValue &R2); - bool SelectADDRrr(SDValue &Addr, SDValue &Base, SDValue &Offset); - bool SelectADDRriU6_0(SDValue& N, SDValue &R1, SDValue &R2); - bool SelectADDRriU6_1(SDValue& N, SDValue &R1, SDValue &R2); - bool SelectADDRriU6_2(SDValue& N, SDValue &R1, SDValue &R2); + virtual void PreprocessISelDAG() override; + + SDNode *Select(SDNode *N) override; // Complex Pattern Selectors. inline bool SelectAddrGA(SDValue &N, SDValue &R); @@ -87,18 +75,12 @@ public: return "Hexagon DAG->DAG Pattern Instruction Selection"; } - bool runOnMachineFunction(MachineFunction &MF) override { - Subtarget = &MF.getSubtarget<HexagonSubtarget>(); - return SelectionDAGISel::runOnMachineFunction(MF); - } - + SDNode *SelectFrameIndex(SDNode *N); /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, + unsigned ConstraintID, std::vector<SDValue> &OutOps) override; - bool SelectAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset); - SDNode *SelectLoad(SDNode *N); SDNode *SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl); SDNode *SelectIndexedLoad(LoadSDNode *LD, SDLoc dl); @@ -110,99 +92,98 @@ public: SDNode *SelectIndexedStore(StoreSDNode *ST, SDLoc dl); SDNode *SelectStore(SDNode *N); SDNode *SelectSHL(SDNode *N); - SDNode *SelectSelect(SDNode *N); - SDNode *SelectTruncate(SDNode *N); SDNode *SelectMul(SDNode *N); SDNode *SelectZeroExtend(SDNode *N); - SDNode *SelectIntrinsicWOChain(SDNode *N); SDNode *SelectIntrinsicWChain(SDNode *N); + SDNode *SelectIntrinsicWOChain(SDNode *N); SDNode *SelectConstant(SDNode *N); SDNode *SelectConstantFP(SDNode *N); SDNode *SelectAdd(SDNode *N); - bool isConstExtProfitable(SDNode *N) const; - -// XformMskToBitPosU5Imm - Returns the bit position which -// the single bit 32 bit mask represents. -// Used in Clr and Set bit immediate memops. -SDValue XformMskToBitPosU5Imm(uint32_t Imm) { - int32_t bitPos; - bitPos = Log2_32(Imm); - assert(bitPos >= 0 && bitPos < 32 && - "Constant out of range for 32 BitPos Memops"); - return CurDAG->getTargetConstant(bitPos, MVT::i32); -} - -// XformMskToBitPosU4Imm - Returns the bit position which the single bit 16 bit -// mask represents. Used in Clr and Set bit immediate memops. -SDValue XformMskToBitPosU4Imm(uint16_t Imm) { - return XformMskToBitPosU5Imm(Imm); -} + SDNode *SelectBitOp(SDNode *N); + + // XformMskToBitPosU5Imm - Returns the bit position which + // the single bit 32 bit mask represents. + // Used in Clr and Set bit immediate memops. + SDValue XformMskToBitPosU5Imm(uint32_t Imm) { + int32_t bitPos; + bitPos = Log2_32(Imm); + assert(bitPos >= 0 && bitPos < 32 && + "Constant out of range for 32 BitPos Memops"); + return CurDAG->getTargetConstant(bitPos, MVT::i32); + } -// XformMskToBitPosU3Imm - Returns the bit position which the single bit 8 bit -// mask represents. Used in Clr and Set bit immediate memops. -SDValue XformMskToBitPosU3Imm(uint8_t Imm) { - return XformMskToBitPosU5Imm(Imm); -} + // XformMskToBitPosU4Imm - Returns the bit position which the single-bit + // 16 bit mask represents. Used in Clr and Set bit immediate memops. + SDValue XformMskToBitPosU4Imm(uint16_t Imm) { + return XformMskToBitPosU5Imm(Imm); + } -// Return true if there is exactly one bit set in V, i.e., if V is one of the -// following integers: 2^0, 2^1, ..., 2^31. -bool ImmIsSingleBit(uint32_t v) const { - return isPowerOf2_32(v); -} + // XformMskToBitPosU3Imm - Returns the bit position which the single-bit + // 8 bit mask represents. Used in Clr and Set bit immediate memops. + SDValue XformMskToBitPosU3Imm(uint8_t Imm) { + return XformMskToBitPosU5Imm(Imm); + } -// XformM5ToU5Imm - Return a target constant with the specified value, of type -// i32 where the negative literal is transformed into a positive literal for -// use in -= memops. -inline SDValue XformM5ToU5Imm(signed Imm) { - assert( (Imm >= -31 && Imm <= -1) && "Constant out of range for Memops"); - return CurDAG->getTargetConstant( - Imm, MVT::i32); -} + // Return true if there is exactly one bit set in V, i.e., if V is one of the + // following integers: 2^0, 2^1, ..., 2^31. + bool ImmIsSingleBit(uint32_t v) const { + return isPowerOf2_32(v); + } + // XformM5ToU5Imm - Return a target constant with the specified value, of + // type i32 where the negative literal is transformed into a positive literal + // for use in -= memops. + inline SDValue XformM5ToU5Imm(signed Imm) { + assert( (Imm >= -31 && Imm <= -1) && "Constant out of range for Memops"); + return CurDAG->getTargetConstant( - Imm, MVT::i32); + } -// XformU7ToU7M1Imm - Return a target constant decremented by 1, in range -// [1..128], used in cmpb.gtu instructions. -inline SDValue XformU7ToU7M1Imm(signed Imm) { - assert((Imm >= 1 && Imm <= 128) && "Constant out of range for cmpb op"); - return CurDAG->getTargetConstant(Imm - 1, MVT::i8); -} + // XformU7ToU7M1Imm - Return a target constant decremented by 1, in range + // [1..128], used in cmpb.gtu instructions. + inline SDValue XformU7ToU7M1Imm(signed Imm) { + assert((Imm >= 1 && Imm <= 128) && "Constant out of range for cmpb op"); + return CurDAG->getTargetConstant(Imm - 1, MVT::i8); + } -// XformS8ToS8M1Imm - Return a target constant decremented by 1. -inline SDValue XformSToSM1Imm(signed Imm) { - return CurDAG->getTargetConstant(Imm - 1, MVT::i32); -} + // XformS8ToS8M1Imm - Return a target constant decremented by 1. + inline SDValue XformSToSM1Imm(signed Imm) { + return CurDAG->getTargetConstant(Imm - 1, MVT::i32); + } -// XformU8ToU8M1Imm - Return a target constant decremented by 1. -inline SDValue XformUToUM1Imm(unsigned Imm) { - assert((Imm >= 1) && "Cannot decrement unsigned int less than 1"); - return CurDAG->getTargetConstant(Imm - 1, MVT::i32); -} + // XformU8ToU8M1Imm - Return a target constant decremented by 1. + inline SDValue XformUToUM1Imm(unsigned Imm) { + assert((Imm >= 1) && "Cannot decrement unsigned int less than 1"); + return CurDAG->getTargetConstant(Imm - 1, MVT::i32); + } -// XformSToSM2Imm - Return a target constant decremented by 2. -inline SDValue XformSToSM2Imm(unsigned Imm) { - return CurDAG->getTargetConstant(Imm - 2, MVT::i32); -} + // XformSToSM2Imm - Return a target constant decremented by 2. + inline SDValue XformSToSM2Imm(unsigned Imm) { + return CurDAG->getTargetConstant(Imm - 2, MVT::i32); + } -// XformSToSM3Imm - Return a target constant decremented by 3. -inline SDValue XformSToSM3Imm(unsigned Imm) { - return CurDAG->getTargetConstant(Imm - 3, MVT::i32); -} + // XformSToSM3Imm - Return a target constant decremented by 3. + inline SDValue XformSToSM3Imm(unsigned Imm) { + return CurDAG->getTargetConstant(Imm - 3, MVT::i32); + } -// Include the pieces autogenerated from the target description. -#include "HexagonGenDAGISel.inc" + // Include the pieces autogenerated from the target description. + #include "HexagonGenDAGISel.inc" private: - bool isValueExtension(SDValue const &Val, unsigned FromBits, SDValue &Src); -}; + bool isValueExtension(const SDValue &Val, unsigned FromBits, SDValue &Src); +}; // end HexagonDAGToDAGISel } // end anonymous namespace /// createHexagonISelDag - This pass converts a legalized DAG into a /// Hexagon-specific DAG, ready for instruction scheduling. /// -FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM, - CodeGenOpt::Level OptLevel) { +namespace llvm { +FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM, + CodeGenOpt::Level OptLevel) { return new HexagonDAGToDAGISel(TM, OptLevel); } +} static void initializePassOnce(PassRegistry &Registry) { const char *Name = "Hexagon DAG->DAG Pattern Instruction Selection"; @@ -216,76 +197,6 @@ void llvm::initializeHexagonDAGToDAGISelPass(PassRegistry &Registry) { } -static bool IsS11_0_Offset(SDNode * S) { - ConstantSDNode *N = cast<ConstantSDNode>(S); - - // immS16 predicate - True if the immediate fits in a 16-bit sign extended - // field. - int64_t v = (int64_t)N->getSExtValue(); - return isInt<11>(v); -} - - -static bool IsS11_1_Offset(SDNode * S) { - ConstantSDNode *N = cast<ConstantSDNode>(S); - - // immS16 predicate - True if the immediate fits in a 16-bit sign extended - // field. - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<11,1>(v); -} - - -static bool IsS11_2_Offset(SDNode * S) { - ConstantSDNode *N = cast<ConstantSDNode>(S); - - // immS16 predicate - True if the immediate fits in a 16-bit sign extended - // field. - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<11,2>(v); -} - - -static bool IsS11_3_Offset(SDNode * S) { - ConstantSDNode *N = cast<ConstantSDNode>(S); - - // immS16 predicate - True if the immediate fits in a 16-bit sign extended - // field. - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<11,3>(v); -} - - -static bool IsU6_0_Offset(SDNode * S) { - ConstantSDNode *N = cast<ConstantSDNode>(S); - - // u6 predicate - True if the immediate fits in a 6-bit unsigned extended - // field. - int64_t v = (int64_t)N->getSExtValue(); - return isUInt<6>(v); -} - - -static bool IsU6_1_Offset(SDNode * S) { - ConstantSDNode *N = cast<ConstantSDNode>(S); - - // u6 predicate - True if the immediate fits in a 6-bit unsigned extended - // field. - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedUInt<6,1>(v); -} - - -static bool IsU6_2_Offset(SDNode * S) { - ConstantSDNode *N = cast<ConstantSDNode>(S); - - // u6 predicate - True if the immediate fits in a 6-bit unsigned extended - // field. - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedUInt<6,2>(v); -} - - // Intrinsics that return a a predicate. static unsigned doesIntrinsicReturnPredicate(unsigned ID) { @@ -332,216 +243,119 @@ static unsigned doesIntrinsicReturnPredicate(unsigned ID) } } -static bool OffsetFitsS11(EVT MemType, int64_t Offset) { - if (MemType == MVT::i64 && isShiftedInt<11,3>(Offset)) { - return true; - } - if (MemType == MVT::i32 && isShiftedInt<11,2>(Offset)) { - return true; - } - if (MemType == MVT::i16 && isShiftedInt<11,1>(Offset)) { - return true; - } - if (MemType == MVT::i8 && isInt<11>(Offset)) { - return true; - } - return false; -} - - -// -// Try to lower loads of GlobalAdresses into base+offset loads. Custom -// lowering for GlobalAddress nodes has already turned it into a -// CONST32. -// -SDNode *HexagonDAGToDAGISel::SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl) { - SDValue Chain = LD->getChain(); - SDNode* Const32 = LD->getBasePtr().getNode(); - unsigned Opcode = 0; - - if (Const32->getOpcode() == HexagonISD::CONST32 && - ISD::isNormalLoad(LD)) { - SDValue Base = Const32->getOperand(0); - EVT LoadedVT = LD->getMemoryVT(); - int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset(); - if (Offset != 0 && OffsetFitsS11(LoadedVT, Offset)) { - MVT PointerTy = getTargetLowering()->getPointerTy(); - const GlobalValue* GV = - cast<GlobalAddressSDNode>(Base)->getGlobal(); - SDValue TargAddr = - CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0); - SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set, - dl, PointerTy, - TargAddr); - // Figure out base + offset opcode - if (LoadedVT == MVT::i64) Opcode = Hexagon::L2_loadrd_io; - else if (LoadedVT == MVT::i32) Opcode = Hexagon::L2_loadri_io; - else if (LoadedVT == MVT::i16) Opcode = Hexagon::L2_loadrh_io; - else if (LoadedVT == MVT::i8) Opcode = Hexagon::L2_loadrb_io; - else llvm_unreachable("unknown memory type"); - - // Build indexed load. - SDValue TargetConstOff = CurDAG->getTargetConstant(Offset, PointerTy); - SDNode* Result = CurDAG->getMachineNode(Opcode, dl, - LD->getValueType(0), - MVT::Other, - SDValue(NewBase,0), - TargetConstOff, - Chain); - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = LD->getMemOperand(); - cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); - ReplaceUses(LD, Result); - return Result; - } - } - - return SelectCode(LD); -} - - SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD, unsigned Opcode, - SDLoc dl) -{ + SDLoc dl) { SDValue Chain = LD->getChain(); EVT LoadedVT = LD->getMemoryVT(); SDValue Base = LD->getBasePtr(); SDValue Offset = LD->getOffset(); SDNode *OffsetNode = Offset.getNode(); int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue(); - SDValue N1 = LD->getOperand(1); - SDValue CPTmpN1_0; - SDValue CPTmpN1_1; - - if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) && - N1.getNode()->getValueType(0) == MVT::i32) { - const HexagonInstrInfo *TII = Subtarget->getInstrInfo(); - if (TII->isValidAutoIncImm(LoadedVT, Val)) { - SDValue TargetConst = CurDAG->getTargetConstant(Val, MVT::i32); - SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32, - MVT::Other, Base, TargetConst, - Chain); - SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64, - SDValue(Result_1, 0)); - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = LD->getMemOperand(); - cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); - const SDValue Froms[] = { SDValue(LD, 0), - SDValue(LD, 1), - SDValue(LD, 2) - }; - const SDValue Tos[] = { SDValue(Result_2, 0), - SDValue(Result_1, 1), - SDValue(Result_1, 2) - }; - ReplaceUses(Froms, Tos, 3); - return Result_2; - } - SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); - SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); - SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, - MVT::Other, Base, TargetConst0, + + const HexagonInstrInfo &TII = *HST->getInstrInfo(); + if (TII.isValidAutoIncImm(LoadedVT, Val)) { + SDValue TargetConst = CurDAG->getTargetConstant(Val, MVT::i32); + SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32, + MVT::Other, Base, TargetConst, Chain); - SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, - MVT::i64, SDValue(Result_1, 0)); - SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, - MVT::i32, Base, TargetConstVal, - SDValue(Result_1, 1)); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64, + SDValue(Result_1, 0)); MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = LD->getMemOperand(); cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); const SDValue Froms[] = { SDValue(LD, 0), SDValue(LD, 1), - SDValue(LD, 2) - }; + SDValue(LD, 2) }; const SDValue Tos[] = { SDValue(Result_2, 0), - SDValue(Result_3, 0), - SDValue(Result_1, 1) - }; + SDValue(Result_1, 1), + SDValue(Result_1, 2) }; ReplaceUses(Froms, Tos, 3); return Result_2; } - return SelectCode(LD); + + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); + SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Other, + Base, TargetConst0, Chain); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64, + SDValue(Result_1, 0)); + SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32, + Base, TargetConstVal, + SDValue(Result_1, 1)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) }; + const SDValue Tos[] = { SDValue(Result_2, 0), + SDValue(Result_3, 0), + SDValue(Result_1, 1) }; + ReplaceUses(Froms, Tos, 3); + return Result_2; } SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD, unsigned Opcode, - SDLoc dl) -{ + SDLoc dl) { SDValue Chain = LD->getChain(); EVT LoadedVT = LD->getMemoryVT(); SDValue Base = LD->getBasePtr(); SDValue Offset = LD->getOffset(); SDNode *OffsetNode = Offset.getNode(); int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue(); - SDValue N1 = LD->getOperand(1); - SDValue CPTmpN1_0; - SDValue CPTmpN1_1; - - if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) && - N1.getNode()->getValueType(0) == MVT::i32) { - const HexagonInstrInfo *TII = Subtarget->getInstrInfo(); - if (TII->isValidAutoIncImm(LoadedVT, Val)) { - SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); - SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); - SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, - MVT::i32, MVT::Other, Base, - TargetConstVal, Chain); - SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, - TargetConst0); - SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl, - MVT::i64, MVT::Other, - SDValue(Result_2,0), - SDValue(Result_1,0)); - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = LD->getMemOperand(); - cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); - const SDValue Froms[] = { SDValue(LD, 0), - SDValue(LD, 1), - SDValue(LD, 2) - }; - const SDValue Tos[] = { SDValue(Result_3, 0), - SDValue(Result_1, 1), - SDValue(Result_1, 2) - }; - ReplaceUses(Froms, Tos, 3); - return Result_3; - } - // Generate an indirect load. - SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + const HexagonInstrInfo &TII = *HST->getInstrInfo(); + if (TII.isValidAutoIncImm(LoadedVT, Val)) { SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, - MVT::Other, - Base, TargetConst0, Chain); - SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, - TargetConst0); - SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl, + MVT::i32, MVT::Other, Base, + TargetConstVal, Chain); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A4_combineir, dl, MVT::i64, MVT::Other, - SDValue(Result_2,0), + TargetConst0, SDValue(Result_1,0)); - // Add offset to base. - SDNode* Result_4 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32, - Base, TargetConstVal, - SDValue(Result_1, 1)); MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = LD->getMemOperand(); cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); const SDValue Froms[] = { SDValue(LD, 0), SDValue(LD, 1), - SDValue(LD, 2) - }; - const SDValue Tos[] = { SDValue(Result_3, 0), // Load value. - SDValue(Result_4, 0), // New address. - SDValue(Result_1, 1) - }; + SDValue(LD, 2) }; + const SDValue Tos[] = { SDValue(Result_2, 0), + SDValue(Result_1, 1), + SDValue(Result_1, 2) }; ReplaceUses(Froms, Tos, 3); - return Result_3; + return Result_2; } - return SelectCode(LD); + // Generate an indirect load. + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); + SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, + MVT::Other, Base, TargetConst0, + Chain); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A4_combineir, dl, + MVT::i64, MVT::Other, + TargetConst0, + SDValue(Result_1,0)); + // Add offset to base. + SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32, + Base, TargetConstVal, + SDValue(Result_1, 1)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) }; + const SDValue Tos[] = { SDValue(Result_2, 0), // Load value. + SDValue(Result_3, 0), // New address. + SDValue(Result_1, 1) }; + ReplaceUses(Froms, Tos, 3); + return Result_2; } @@ -555,45 +369,44 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) { EVT LoadedVT = LD->getMemoryVT(); unsigned Opcode = 0; - // Check for zero ext loads. - bool zextval = (LD->getExtensionType() == ISD::ZEXTLOAD); + // Check for zero extended loads. Treat any-extend loads as zero extended + // loads. + ISD::LoadExtType ExtType = LD->getExtensionType(); + bool IsZeroExt = (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD); // Figure out the opcode. - const HexagonInstrInfo *TII = Subtarget->getInstrInfo(); + const HexagonInstrInfo &TII = *HST->getInstrInfo(); if (LoadedVT == MVT::i64) { - if (TII->isValidAutoIncImm(LoadedVT, Val)) + if (TII.isValidAutoIncImm(LoadedVT, Val)) Opcode = Hexagon::L2_loadrd_pi; else Opcode = Hexagon::L2_loadrd_io; } else if (LoadedVT == MVT::i32) { - if (TII->isValidAutoIncImm(LoadedVT, Val)) + if (TII.isValidAutoIncImm(LoadedVT, Val)) Opcode = Hexagon::L2_loadri_pi; else Opcode = Hexagon::L2_loadri_io; } else if (LoadedVT == MVT::i16) { - if (TII->isValidAutoIncImm(LoadedVT, Val)) - Opcode = zextval ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadrh_pi; + if (TII.isValidAutoIncImm(LoadedVT, Val)) + Opcode = IsZeroExt ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadrh_pi; else - Opcode = zextval ? Hexagon::L2_loadruh_io : Hexagon::L2_loadrh_io; + Opcode = IsZeroExt ? Hexagon::L2_loadruh_io : Hexagon::L2_loadrh_io; } else if (LoadedVT == MVT::i8) { - if (TII->isValidAutoIncImm(LoadedVT, Val)) - Opcode = zextval ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrb_pi; + if (TII.isValidAutoIncImm(LoadedVT, Val)) + Opcode = IsZeroExt ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrb_pi; else - Opcode = zextval ? Hexagon::L2_loadrub_io : Hexagon::L2_loadrb_io; + Opcode = IsZeroExt ? Hexagon::L2_loadrub_io : Hexagon::L2_loadrb_io; } else llvm_unreachable("unknown memory type"); - // For zero ext i64 loads, we need to add combine instructions. - if (LD->getValueType(0) == MVT::i64 && - LD->getExtensionType() == ISD::ZEXTLOAD) { + // For zero extended i64 loads, we need to add combine instructions. + if (LD->getValueType(0) == MVT::i64 && IsZeroExt) return SelectIndexedLoadZeroExtend64(LD, Opcode, dl); - } - if (LD->getValueType(0) == MVT::i64 && - LD->getExtensionType() == ISD::SEXTLOAD) { - // Handle sign ext i64 loads. + // Handle sign extended i64 loads. + if (LD->getValueType(0) == MVT::i64 && ExtType == ISD::SEXTLOAD) return SelectIndexedLoadSignExtend64(LD, Opcode, dl); - } - if (TII->isValidAutoIncImm(LoadedVT, Val)) { + + if (TII.isValidAutoIncImm(LoadedVT, Val)) { SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); SDNode* Result = CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0), @@ -649,7 +462,7 @@ SDNode *HexagonDAGToDAGISel::SelectLoad(SDNode *N) { if (AM != ISD::UNINDEXED) { result = SelectIndexedLoad(LD, dl); } else { - result = SelectBaseOffsetLoad(LD, dl); + result = SelectCode(LD); } return result; @@ -665,13 +478,12 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) { // Get the constant value. int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue(); EVT StoredVT = ST->getMemoryVT(); + EVT ValueVT = Value.getValueType(); // Offset value must be within representable range // and must have correct alignment properties. - const HexagonInstrInfo *TII = Subtarget->getInstrInfo(); - if (TII->isValidAutoIncImm(StoredVT, Val)) { - SDValue Ops[] = {Base, CurDAG->getTargetConstant(Val, MVT::i32), Value, - Chain}; + const HexagonInstrInfo &TII = *HST->getInstrInfo(); + if (TII.isValidAutoIncImm(StoredVT, Val)) { unsigned Opcode = 0; // Figure out the post inc version of opcode. @@ -681,6 +493,13 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) { else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_pi; else llvm_unreachable("unknown memory type"); + if (ST->isTruncatingStore() && ValueVT.getSizeInBits() == 64) { + assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store"); + Value = CurDAG->getTargetExtractSubreg(Hexagon::subreg_loreg, + dl, MVT::i32, Value); + } + SDValue Ops[] = {Base, CurDAG->getTargetConstant(Val, MVT::i32), Value, + Chain}; // Build post increment store. SDNode* Result = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Other, Ops); @@ -694,7 +513,8 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) { } // Note: Order of operands matches the def of instruction: - // def STrid : STInst<(outs), (ins MEMri:$addr, DoubleRegs:$src1), ... + // def S2_storerd_io + // : STInst<(outs), (ins IntRegs:$base, imm:$offset, DoubleRegs:$src1), ... // and it differs for POST_ST* for instance. SDValue Ops[] = { Base, CurDAG->getTargetConstant(0, MVT::i32), Value, Chain}; @@ -724,61 +544,6 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) { return Result_2; } - -SDNode *HexagonDAGToDAGISel::SelectBaseOffsetStore(StoreSDNode *ST, - SDLoc dl) { - SDValue Chain = ST->getChain(); - SDNode* Const32 = ST->getBasePtr().getNode(); - SDValue Value = ST->getValue(); - unsigned Opcode = 0; - - // Try to lower stores of GlobalAdresses into indexed stores. Custom - // lowering for GlobalAddress nodes has already turned it into a - // CONST32. Avoid truncating stores for the moment. Post-inc stores - // do the same. Don't think there's a reason for it, so will file a - // bug to fix. - if ((Const32->getOpcode() == HexagonISD::CONST32) && - !(Value.getValueType() == MVT::i64 && ST->isTruncatingStore())) { - SDValue Base = Const32->getOperand(0); - if (Base.getOpcode() == ISD::TargetGlobalAddress) { - EVT StoredVT = ST->getMemoryVT(); - int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset(); - if (Offset != 0 && OffsetFitsS11(StoredVT, Offset)) { - MVT PointerTy = getTargetLowering()->getPointerTy(); - const GlobalValue* GV = - cast<GlobalAddressSDNode>(Base)->getGlobal(); - SDValue TargAddr = - CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0); - SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set, - dl, PointerTy, - TargAddr); - - // Figure out base + offset opcode - if (StoredVT == MVT::i64) Opcode = Hexagon::S2_storerd_io; - else if (StoredVT == MVT::i32) Opcode = Hexagon::S2_storeri_io; - else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_io; - else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_io; - else llvm_unreachable("unknown memory type"); - - SDValue Ops[] = {SDValue(NewBase,0), - CurDAG->getTargetConstant(Offset,PointerTy), - Value, Chain}; - // build indexed store - SDNode* Result = CurDAG->getMachineNode(Opcode, dl, - MVT::Other, Ops); - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = ST->getMemOperand(); - cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); - ReplaceUses(ST, Result); - return Result; - } - } - } - - return SelectCode(ST); -} - - SDNode *HexagonDAGToDAGISel::SelectStore(SDNode *N) { SDLoc dl(N); StoreSDNode *ST = cast<StoreSDNode>(N); @@ -789,7 +554,7 @@ SDNode *HexagonDAGToDAGISel::SelectStore(SDNode *N) { return SelectIndexedStore(ST, dl); } - return SelectBaseOffsetStore(ST, dl); + return SelectCode(ST); } SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) { @@ -875,187 +640,6 @@ SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) { return SelectCode(N); } - -SDNode *HexagonDAGToDAGISel::SelectSelect(SDNode *N) { - SDLoc dl(N); - SDValue N0 = N->getOperand(0); - if (N0.getOpcode() == ISD::SETCC) { - SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == ISD::SIGN_EXTEND_INREG) { - SDValue N000 = N00.getOperand(0); - SDValue N001 = N00.getOperand(1); - if (cast<VTSDNode>(N001)->getVT() == MVT::i16) { - SDValue N01 = N0.getOperand(1); - SDValue N02 = N0.getOperand(2); - - // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2, - // i16:Other),IntRegs:i32:$src1, SETLT:Other),IntRegs:i32:$src1, - // IntRegs:i32:$src2) - // Emits: (MAXh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2) - // Pattern complexity = 9 cost = 1 size = 0. - if (cast<CondCodeSDNode>(N02)->get() == ISD::SETLT) { - SDValue N1 = N->getOperand(1); - if (N01 == N1) { - SDValue N2 = N->getOperand(2); - if (N000 == N2 && - N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 && - N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) { - SDNode *SextNode = CurDAG->getMachineNode(Hexagon::A2_sxth, dl, - MVT::i32, N000); - SDNode *Result = CurDAG->getMachineNode(Hexagon::A2_max, dl, - MVT::i32, - SDValue(SextNode, 0), - N1); - ReplaceUses(N, Result); - return Result; - } - } - } - - // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2, - // i16:Other), IntRegs:i32:$src1, SETGT:Other), IntRegs:i32:$src1, - // IntRegs:i32:$src2) - // Emits: (MINh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2) - // Pattern complexity = 9 cost = 1 size = 0. - if (cast<CondCodeSDNode>(N02)->get() == ISD::SETGT) { - SDValue N1 = N->getOperand(1); - if (N01 == N1) { - SDValue N2 = N->getOperand(2); - if (N000 == N2 && - N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 && - N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) { - SDNode *SextNode = CurDAG->getMachineNode(Hexagon::A2_sxth, dl, - MVT::i32, N000); - SDNode *Result = CurDAG->getMachineNode(Hexagon::A2_min, dl, - MVT::i32, - SDValue(SextNode, 0), - N1); - ReplaceUses(N, Result); - return Result; - } - } - } - } - } - } - - return SelectCode(N); -} - - -SDNode *HexagonDAGToDAGISel::SelectTruncate(SDNode *N) { - SDLoc dl(N); - SDValue Shift = N->getOperand(0); - - // - // %conv.i = sext i32 %tmp1 to i64 - // %conv2.i = sext i32 %add to i64 - // %mul.i = mul nsw i64 %conv2.i, %conv.i - // %shr5.i = lshr i64 %mul.i, 32 - // %conv3.i = trunc i64 %shr5.i to i32 - // - // --- match with the following --- - // - // %conv3.i = mpy (%tmp1, %add) - // - // Trunc to i32. - if (N->getValueType(0) == MVT::i32) { - // Trunc from i64. - if (Shift.getNode()->getValueType(0) == MVT::i64) { - // Trunc child is logical shift right. - if (Shift.getOpcode() != ISD::SRL) { - return SelectCode(N); - } - - SDValue ShiftOp0 = Shift.getOperand(0); - SDValue ShiftOp1 = Shift.getOperand(1); - - // Shift by const 32 - if (ShiftOp1.getOpcode() != ISD::Constant) { - return SelectCode(N); - } - - int32_t ShiftConst = - cast<ConstantSDNode>(ShiftOp1.getNode())->getSExtValue(); - if (ShiftConst != 32) { - return SelectCode(N); - } - - // Shifting a i64 signed multiply - SDValue Mul = ShiftOp0; - if (Mul.getOpcode() != ISD::MUL) { - return SelectCode(N); - } - - SDValue MulOp0 = Mul.getOperand(0); - SDValue MulOp1 = Mul.getOperand(1); - - SDValue OP0; - SDValue OP1; - - // Handle sign_extend and sextload - if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) { - SDValue Sext0 = MulOp0.getOperand(0); - if (Sext0.getNode()->getValueType(0) != MVT::i32) { - return SelectCode(N); - } - - OP0 = Sext0; - } else if (MulOp0.getOpcode() == ISD::LOAD) { - LoadSDNode *LD = cast<LoadSDNode>(MulOp0.getNode()); - if (LD->getMemoryVT() != MVT::i32 || - LD->getExtensionType() != ISD::SEXTLOAD || - LD->getAddressingMode() != ISD::UNINDEXED) { - return SelectCode(N); - } - - SDValue Chain = LD->getChain(); - SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); - OP0 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32, - MVT::Other, - LD->getBasePtr(), - TargetConst0, Chain), 0); - } else { - return SelectCode(N); - } - - // Same goes for the second operand. - if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) { - SDValue Sext1 = MulOp1.getOperand(0); - if (Sext1.getNode()->getValueType(0) != MVT::i32) - return SelectCode(N); - - OP1 = Sext1; - } else if (MulOp1.getOpcode() == ISD::LOAD) { - LoadSDNode *LD = cast<LoadSDNode>(MulOp1.getNode()); - if (LD->getMemoryVT() != MVT::i32 || - LD->getExtensionType() != ISD::SEXTLOAD || - LD->getAddressingMode() != ISD::UNINDEXED) { - return SelectCode(N); - } - - SDValue Chain = LD->getChain(); - SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); - OP1 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32, - MVT::Other, - LD->getBasePtr(), - TargetConst0, Chain), 0); - } else { - return SelectCode(N); - } - - // Generate a mpy instruction. - SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpy_up, dl, MVT::i32, - OP0, OP1); - ReplaceUses(N, Result); - return Result; - } - } - - return SelectCode(N); -} - - SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) { SDLoc dl(N); if (N->getValueType(0) == MVT::i32) { @@ -1134,6 +718,36 @@ SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) { // SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) { SDLoc dl(N); + + SDValue Op0 = N->getOperand(0); + EVT OpVT = Op0.getValueType(); + unsigned OpBW = OpVT.getSizeInBits(); + + // Special handling for zero-extending a vector of booleans. + if (OpVT.isVector() && OpVT.getVectorElementType() == MVT::i1 && OpBW <= 64) { + SDNode *Mask = CurDAG->getMachineNode(Hexagon::C2_mask, dl, MVT::i64, Op0); + unsigned NE = OpVT.getVectorNumElements(); + EVT ExVT = N->getValueType(0); + unsigned ES = ExVT.getVectorElementType().getSizeInBits(); + uint64_t MV = 0, Bit = 1; + for (unsigned i = 0; i < NE; ++i) { + MV |= Bit; + Bit <<= ES; + } + SDValue Ones = CurDAG->getTargetConstant(MV, MVT::i64); + SDNode *OnesReg = CurDAG->getMachineNode(Hexagon::CONST64_Int_Real, dl, + MVT::i64, Ones); + if (ExVT.getSizeInBits() == 32) { + SDNode *And = CurDAG->getMachineNode(Hexagon::A2_andp, dl, MVT::i64, + SDValue(Mask,0), SDValue(OnesReg,0)); + SDValue SubR = CurDAG->getTargetConstant(Hexagon::subreg_loreg, MVT::i32); + return CurDAG->getMachineNode(Hexagon::EXTRACT_SUBREG, dl, ExVT, + SDValue(And,0), SubR); + } + return CurDAG->getMachineNode(Hexagon::A2_andp, dl, ExVT, + SDValue(Mask,0), SDValue(OnesReg,0)); + } + SDNode *IsIntrinsic = N->getOperand(0).getNode(); if ((IsIntrinsic->getOpcode() == ISD::INTRINSIC_WO_CHAIN)) { unsigned ID = @@ -1141,7 +755,7 @@ SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) { if (doesIntrinsicReturnPredicate(ID)) { // Now we need to differentiate target data types. if (N->getValueType(0) == MVT::i64) { - // Convert the zero_extend to Rs = Pd followed by COMBINE_rr(0,Rs). + // Convert the zero_extend to Rs = Pd followed by A2_combinew(0,Rs). SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); SDNode *Result_1 = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl, MVT::i32, @@ -1171,6 +785,203 @@ SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) { } // +// Checking for intrinsics circular load/store, and bitreverse load/store +// instrisics in order to select the correct lowered operation. +// +SDNode *HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) { + unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + if (IntNo == Intrinsic::hexagon_circ_ldd || + IntNo == Intrinsic::hexagon_circ_ldw || + IntNo == Intrinsic::hexagon_circ_lduh || + IntNo == Intrinsic::hexagon_circ_ldh || + IntNo == Intrinsic::hexagon_circ_ldub || + IntNo == Intrinsic::hexagon_circ_ldb) { + SDLoc dl(N); + SDValue Chain = N->getOperand(0); + SDValue Base = N->getOperand(2); + SDValue Load = N->getOperand(3); + SDValue ModifierExpr = N->getOperand(4); + SDValue Offset = N->getOperand(5); + + // We need to add the rerurn type for the load. This intrinsic has + // two return types, one for the load and one for the post-increment. + // Only the *_ld instructions push the extra return type, and bump the + // result node operand number correspondingly. + std::vector<EVT> ResTys; + unsigned opc; + unsigned memsize, align; + MVT MvtSize = MVT::i32; + + if (IntNo == Intrinsic::hexagon_circ_ldd) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i64); + opc = Hexagon::L2_loadrd_pci_pseudo; + memsize = 8; + align = 8; + } else if (IntNo == Intrinsic::hexagon_circ_ldw) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadri_pci_pseudo; + memsize = 4; + align = 4; + } else if (IntNo == Intrinsic::hexagon_circ_ldh) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadrh_pci_pseudo; + memsize = 2; + align = 2; + MvtSize = MVT::i16; + } else if (IntNo == Intrinsic::hexagon_circ_lduh) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadruh_pci_pseudo; + memsize = 2; + align = 2; + MvtSize = MVT::i16; + } else if (IntNo == Intrinsic::hexagon_circ_ldb) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadrb_pci_pseudo; + memsize = 1; + align = 1; + MvtSize = MVT::i8; + } else if (IntNo == Intrinsic::hexagon_circ_ldub) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadrub_pci_pseudo; + memsize = 1; + align = 1; + MvtSize = MVT::i8; + } else + llvm_unreachable("no opc"); + + ResTys.push_back(MVT::Other); + + // Copy over the arguments, which are the same mostly. + SmallVector<SDValue, 5> Ops; + Ops.push_back(Base); + Ops.push_back(Load); + Ops.push_back(ModifierExpr); + int32_t Val = cast<ConstantSDNode>(Offset.getNode())->getSExtValue(); + Ops.push_back(CurDAG->getTargetConstant(Val, MVT::i32)); + Ops.push_back(Chain); + SDNode* Result = CurDAG->getMachineNode(opc, dl, ResTys, Ops); + + SDValue ST; + MachineMemOperand *Mem = + MF->getMachineMemOperand(MachinePointerInfo(), + MachineMemOperand::MOStore, memsize, align); + if (MvtSize != MVT::i32) + ST = CurDAG->getTruncStore(Chain, dl, SDValue(Result, 1), Load, + MvtSize, Mem); + else + ST = CurDAG->getStore(Chain, dl, SDValue(Result, 1), Load, Mem); + + SDNode* Store = SelectStore(ST.getNode()); + + const SDValue Froms[] = { SDValue(N, 0), + SDValue(N, 1) }; + const SDValue Tos[] = { SDValue(Result, 0), + SDValue(Store, 0) }; + ReplaceUses(Froms, Tos, 2); + return Result; + } + + if (IntNo == Intrinsic::hexagon_brev_ldd || + IntNo == Intrinsic::hexagon_brev_ldw || + IntNo == Intrinsic::hexagon_brev_ldh || + IntNo == Intrinsic::hexagon_brev_lduh || + IntNo == Intrinsic::hexagon_brev_ldb || + IntNo == Intrinsic::hexagon_brev_ldub) { + SDLoc dl(N); + SDValue Chain = N->getOperand(0); + SDValue Base = N->getOperand(2); + SDValue Load = N->getOperand(3); + SDValue ModifierExpr = N->getOperand(4); + + // We need to add the rerurn type for the load. This intrinsic has + // two return types, one for the load and one for the post-increment. + std::vector<EVT> ResTys; + unsigned opc; + unsigned memsize, align; + MVT MvtSize = MVT::i32; + + if (IntNo == Intrinsic::hexagon_brev_ldd) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i64); + opc = Hexagon::L2_loadrd_pbr_pseudo; + memsize = 8; + align = 8; + } else if (IntNo == Intrinsic::hexagon_brev_ldw) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadri_pbr_pseudo; + memsize = 4; + align = 4; + } else if (IntNo == Intrinsic::hexagon_brev_ldh) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadrh_pbr_pseudo; + memsize = 2; + align = 2; + MvtSize = MVT::i16; + } else if (IntNo == Intrinsic::hexagon_brev_lduh) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadruh_pbr_pseudo; + memsize = 2; + align = 2; + MvtSize = MVT::i16; + } else if (IntNo == Intrinsic::hexagon_brev_ldb) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadrb_pbr_pseudo; + memsize = 1; + align = 1; + MvtSize = MVT::i8; + } else if (IntNo == Intrinsic::hexagon_brev_ldub) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + opc = Hexagon::L2_loadrub_pbr_pseudo; + memsize = 1; + align = 1; + MvtSize = MVT::i8; + } else + llvm_unreachable("no opc"); + + ResTys.push_back(MVT::Other); + + // Copy over the arguments, which are the same mostly. + SmallVector<SDValue, 4> Ops; + Ops.push_back(Base); + Ops.push_back(Load); + Ops.push_back(ModifierExpr); + Ops.push_back(Chain); + SDNode* Result = CurDAG->getMachineNode(opc, dl, ResTys, Ops); + SDValue ST; + MachineMemOperand *Mem = + MF->getMachineMemOperand(MachinePointerInfo(), + MachineMemOperand::MOStore, memsize, align); + if (MvtSize != MVT::i32) + ST = CurDAG->getTruncStore(Chain, dl, SDValue(Result, 1), Load, + MvtSize, Mem); + else + ST = CurDAG->getStore(Chain, dl, SDValue(Result, 1), Load, Mem); + + SDNode* Store = SelectStore(ST.getNode()); + + const SDValue Froms[] = { SDValue(N, 0), + SDValue(N, 1) }; + const SDValue Tos[] = { SDValue(Result, 0), + SDValue(Store, 0) }; + ReplaceUses(Froms, Tos, 2); + return Result; + } + + return SelectCode(N); +} + +// // Checking for intrinsics which have predicate registers as operand(s) // and lowering to the actual intrinsic. // @@ -1217,37 +1028,20 @@ SDNode *HexagonDAGToDAGISel::SelectConstantFP(SDNode *N) { return SelectCode(N); } - // // Map predicate true (encoded as -1 in LLVM) to a XOR. // SDNode *HexagonDAGToDAGISel::SelectConstant(SDNode *N) { SDLoc dl(N); if (N->getValueType(0) == MVT::i1) { - SDNode* Result; + SDNode* Result = 0; int32_t Val = cast<ConstantSDNode>(N)->getSExtValue(); if (Val == -1) { - // Create the IntReg = 1 node. - SDNode* IntRegTFR = - CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, - CurDAG->getTargetConstant(0, MVT::i32)); - - // Pd = IntReg - SDNode* Pd = CurDAG->getMachineNode(Hexagon::C2_tfrrp, dl, MVT::i1, - SDValue(IntRegTFR, 0)); - - // not(Pd) - SDNode* NotPd = CurDAG->getMachineNode(Hexagon::C2_not, dl, MVT::i1, - SDValue(Pd, 0)); - - // xor(not(Pd)) - Result = CurDAG->getMachineNode(Hexagon::C2_xor, dl, MVT::i1, - SDValue(Pd, 0), SDValue(NotPd, 0)); - - // We have just built: - // Rs = Pd - // Pd = xor(not(Pd), Pd) - + Result = CurDAG->getMachineNode(Hexagon::TFR_PdTrue, dl, MVT::i1); + } else if (Val == 0) { + Result = CurDAG->getMachineNode(Hexagon::TFR_PdFalse, dl, MVT::i1); + } + if (Result) { ReplaceUses(N, Result); return Result; } @@ -1283,347 +1077,282 @@ SDNode *HexagonDAGToDAGISel::SelectAdd(SDNode *N) { return Result; } - -SDNode *HexagonDAGToDAGISel::Select(SDNode *N) { - if (N->isMachineOpcode()) { - N->setNodeId(-1); - return nullptr; // Already selected. - } - - - switch (N->getOpcode()) { - case ISD::Constant: - return SelectConstant(N); - - case ISD::ConstantFP: - return SelectConstantFP(N); - - case ISD::ADD: - return SelectAdd(N); - - case ISD::SHL: - return SelectSHL(N); - - case ISD::LOAD: - return SelectLoad(N); - - case ISD::STORE: - return SelectStore(N); - - case ISD::SELECT: - return SelectSelect(N); - - case ISD::TRUNCATE: - return SelectTruncate(N); - - case ISD::MUL: - return SelectMul(N); - - case ISD::ZERO_EXTEND: - return SelectZeroExtend(N); - - case ISD::INTRINSIC_WO_CHAIN: - return SelectIntrinsicWOChain(N); - } - - return SelectCode(N); -} - - // -// Hexagon_TODO: Five functions for ADDRri?! Surely there must be a better way -// to define these instructions. +// Map the following, where possible. +// AND/FABS -> clrbit +// OR -> setbit +// XOR/FNEG ->toggle_bit. // -bool HexagonDAGToDAGISel::SelectADDRri(SDValue& Addr, SDValue &Base, - SDValue &Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // Direct calls. - - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return true; - } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return true; -} +SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) { + SDLoc dl(N); + EVT ValueVT = N->getValueType(0); + // We handle only 32 and 64-bit bit ops. + if (!(ValueVT == MVT::i32 || ValueVT == MVT::i64 || + ValueVT == MVT::f32 || ValueVT == MVT::f64)) + return SelectCode(N); -bool HexagonDAGToDAGISel::SelectADDRriS11_0(SDValue& Addr, SDValue &Base, - SDValue &Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // Direct calls. + // We handly only fabs and fneg for V5. + unsigned Opc = N->getOpcode(); + if ((Opc == ISD::FABS || Opc == ISD::FNEG) && !HST->hasV5TOps()) + return SelectCode(N); - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsS11_0_Offset(Offset.getNode())); + int64_t Val = 0; + if (Opc != ISD::FABS && Opc != ISD::FNEG) { + if (N->getOperand(1).getOpcode() == ISD::Constant) + Val = cast<ConstantSDNode>((N)->getOperand(1))->getSExtValue(); + else + return SelectCode(N); } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsS11_0_Offset(Offset.getNode())); -} - -bool HexagonDAGToDAGISel::SelectADDRriS11_1(SDValue& Addr, SDValue &Base, - SDValue &Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // Direct calls. - - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsS11_1_Offset(Offset.getNode())); + if (Opc == ISD::AND) { + if (((ValueVT == MVT::i32) && + (!((Val & 0x80000000) || (Val & 0x7fffffff)))) || + ((ValueVT == MVT::i64) && + (!((Val & 0x8000000000000000) || (Val & 0x7fffffff))))) + // If it's simple AND, do the normal op. + return SelectCode(N); + else + Val = ~Val; } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsS11_1_Offset(Offset.getNode())); -} - - -bool HexagonDAGToDAGISel::SelectADDRriS11_2(SDValue& Addr, SDValue &Base, - SDValue &Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // Direct calls. - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsS11_2_Offset(Offset.getNode())); + // If OR or AND is being fed by shl, srl and, sra don't do this change, + // because Hexagon provide |= &= on shl, srl, and sra. + // Traverse the DAG to see if there is shl, srl and sra. + if (Opc == ISD::OR || Opc == ISD::AND) { + switch (N->getOperand(0)->getOpcode()) { + default: break; + case ISD::SRA: + case ISD::SRL: + case ISD::SHL: + return SelectCode(N); + } } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsS11_2_Offset(Offset.getNode())); -} + // Make sure it's power of 2. + unsigned bitpos = 0; + if (Opc != ISD::FABS && Opc != ISD::FNEG) { + if (((ValueVT == MVT::i32) && !isPowerOf2_32(Val)) || + ((ValueVT == MVT::i64) && !isPowerOf2_64(Val))) + return SelectCode(N); -bool HexagonDAGToDAGISel::SelectADDRriU6_0(SDValue& Addr, SDValue &Base, - SDValue &Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // Direct calls. - - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsU6_0_Offset(Offset.getNode())); + // Get the bit position. + bitpos = countTrailingZeros(uint64_t(Val)); + } else { + // For fabs and fneg, it's always the 31st bit. + bitpos = 31; } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsU6_0_Offset(Offset.getNode())); -} + unsigned BitOpc = 0; + // Set the right opcode for bitwise operations. + switch(Opc) { + default: llvm_unreachable("Only bit-wise/abs/neg operations are allowed."); + case ISD::AND: + case ISD::FABS: + BitOpc = Hexagon::S2_clrbit_i; + break; + case ISD::OR: + BitOpc = Hexagon::S2_setbit_i; + break; + case ISD::XOR: + case ISD::FNEG: + BitOpc = Hexagon::S2_togglebit_i; + break; + } -bool HexagonDAGToDAGISel::SelectADDRriU6_1(SDValue& Addr, SDValue &Base, - SDValue &Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // Direct calls. + SDNode *Result; + // Get the right SDVal for the opcode. + SDValue SDVal = CurDAG->getTargetConstant(bitpos, MVT::i32); - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsU6_1_Offset(Offset.getNode())); + if (ValueVT == MVT::i32 || ValueVT == MVT::f32) { + Result = CurDAG->getMachineNode(BitOpc, dl, ValueVT, + N->getOperand(0), SDVal); + } else { + // 64-bit gymnastic to use REG_SEQUENCE. But it's worth it. + EVT SubValueVT; + if (ValueVT == MVT::i64) + SubValueVT = MVT::i32; + else + SubValueVT = MVT::f32; + + SDNode *Reg = N->getOperand(0).getNode(); + SDValue RegClass = CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, + MVT::i64); + + SDValue SubregHiIdx = CurDAG->getTargetConstant(Hexagon::subreg_hireg, + MVT::i32); + SDValue SubregLoIdx = CurDAG->getTargetConstant(Hexagon::subreg_loreg, + MVT::i32); + + SDValue SubregHI = CurDAG->getTargetExtractSubreg(Hexagon::subreg_hireg, dl, + MVT::i32, SDValue(Reg, 0)); + + SDValue SubregLO = CurDAG->getTargetExtractSubreg(Hexagon::subreg_loreg, dl, + MVT::i32, SDValue(Reg, 0)); + + // Clear/set/toggle hi or lo registers depending on the bit position. + if (SubValueVT != MVT::f32 && bitpos < 32) { + SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT, + SubregLO, SDVal); + const SDValue Ops[] = { RegClass, SubregHI, SubregHiIdx, + SDValue(Result0, 0), SubregLoIdx }; + Result = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, + dl, ValueVT, Ops); + } else { + if (Opc != ISD::FABS && Opc != ISD::FNEG) + SDVal = CurDAG->getTargetConstant(bitpos-32, MVT::i32); + SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT, + SubregHI, SDVal); + const SDValue Ops[] = { RegClass, SDValue(Result0, 0), SubregHiIdx, + SubregLO, SubregLoIdx }; + Result = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, + dl, ValueVT, Ops); + } } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsU6_1_Offset(Offset.getNode())); + + ReplaceUses(N, Result); + return Result; } -bool HexagonDAGToDAGISel::SelectADDRriU6_2(SDValue& Addr, SDValue &Base, - SDValue &Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // Direct calls. +SDNode *HexagonDAGToDAGISel::SelectFrameIndex(SDNode *N) { + int FX = cast<FrameIndexSDNode>(N)->getIndex(); + SDValue FI = CurDAG->getTargetFrameIndex(FX, MVT::i32); + SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32); + SDLoc DL(N); - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsU6_2_Offset(Offset.getNode())); - } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsU6_2_Offset(Offset.getNode())); -} + SDNode *R = CurDAG->getMachineNode(Hexagon::TFR_FI, DL, MVT::i32, FI, Zero); + if (N->getHasDebugValue()) + CurDAG->TransferDbgValues(SDValue(N, 0), SDValue(R, 0)); + return R; +} -bool HexagonDAGToDAGISel::SelectMEMriS11_2(SDValue& Addr, SDValue &Base, - SDValue &Offset) { - if (Addr.getOpcode() != ISD::ADD) { - return(SelectADDRriS11_2(Addr, Base, Offset)); +SDNode *HexagonDAGToDAGISel::Select(SDNode *N) { + if (N->isMachineOpcode()) { + N->setNodeId(-1); + return nullptr; // Already selected. } - return SelectADDRriS11_2(Addr, Base, Offset); -} + switch (N->getOpcode()) { + case ISD::Constant: + return SelectConstant(N); + case ISD::ConstantFP: + return SelectConstantFP(N); -bool HexagonDAGToDAGISel::SelectADDRriS11_3(SDValue& Addr, SDValue &Base, - SDValue &Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // Direct calls. + case ISD::FrameIndex: + return SelectFrameIndex(N); - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsS11_3_Offset(Offset.getNode())); - } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return (IsS11_3_Offset(Offset.getNode())); -} + case ISD::ADD: + return SelectAdd(N); -bool HexagonDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, - SDValue &R2) { - if (Addr.getOpcode() == ISD::FrameIndex) return false; - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // Direct calls. - - if (Addr.getOpcode() == ISD::ADD) { - if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) - if (isInt<13>(CN->getSExtValue())) - return false; // Let the reg+imm pattern catch this! - R1 = Addr.getOperand(0); - R2 = Addr.getOperand(1); - return true; - } + case ISD::SHL: + return SelectSHL(N); - R1 = Addr; + case ISD::LOAD: + return SelectLoad(N); - return true; -} + case ISD::STORE: + return SelectStore(N); + case ISD::MUL: + return SelectMul(N); -// Handle generic address case. It is accessed from inlined asm =m constraints, -// which could have any kind of pointer. -bool HexagonDAGToDAGISel::SelectAddr(SDNode *Op, SDValue Addr, - SDValue &Base, SDValue &Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // Direct calls. + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::FABS: + case ISD::FNEG: + return SelectBitOp(N); - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return true; - } + case ISD::ZERO_EXTEND: + return SelectZeroExtend(N); - if (Addr.getOpcode() == ISD::ADD) { - Base = Addr.getOperand(0); - Offset = Addr.getOperand(1); - return true; + case ISD::INTRINSIC_W_CHAIN: + return SelectIntrinsicWChain(N); + + case ISD::INTRINSIC_WO_CHAIN: + return SelectIntrinsicWOChain(N); } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return true; + return SelectCode(N); } bool HexagonDAGToDAGISel:: -SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, +SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { - SDValue Op0, Op1; - - switch (ConstraintCode) { - case 'o': // Offsetable. - case 'v': // Not offsetable. - default: return true; - case 'm': // Memory. - if (!SelectAddr(Op.getNode(), Op, Op0, Op1)) - return true; + SDValue Inp = Op, Res; + + switch (ConstraintID) { + default: + return true; + case InlineAsm::Constraint_i: + case InlineAsm::Constraint_o: // Offsetable. + case InlineAsm::Constraint_v: // Not offsetable. + case InlineAsm::Constraint_m: // Memory. + if (SelectAddrFI(Inp, Res)) + OutOps.push_back(Res); + else + OutOps.push_back(Inp); break; } - OutOps.push_back(Op0); - OutOps.push_back(Op1); + OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32)); return false; } -bool HexagonDAGToDAGISel::isConstExtProfitable(SDNode *N) const { - unsigned UseCount = 0; - for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { - UseCount++; - } - - return (UseCount <= 1); - -} - -//===--------------------------------------------------------------------===// -// Return 'true' if use count of the global address is below threshold. -//===--------------------------------------------------------------------===// -bool HexagonDAGToDAGISel::hasNumUsesBelowThresGA(SDNode *N) const { - assert(N->getOpcode() == ISD::TargetGlobalAddress && - "Expecting a target global address"); - - // Always try to fold the address. - if (TM.getOptLevel() == CodeGenOpt::Aggressive) - return true; - - GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); - DenseMap<const GlobalValue *, unsigned>::const_iterator GI = - GlobalAddressUseCountMap.find(GA->getGlobal()); - - if (GI == GlobalAddressUseCountMap.end()) - return false; - - return GI->second <= MaxNumOfUsesForConstExtenders; -} - -//===--------------------------------------------------------------------===// -// Return true if the non-GP-relative global address can be folded. -//===--------------------------------------------------------------------===// -inline bool HexagonDAGToDAGISel::foldGlobalAddress(SDValue &N, SDValue &R) { - return foldGlobalAddressImpl(N, R, false); -} - -//===--------------------------------------------------------------------===// -// Return true if the GP-relative global address can be folded. -//===--------------------------------------------------------------------===// -inline bool HexagonDAGToDAGISel::foldGlobalAddressGP(SDValue &N, SDValue &R) { - return foldGlobalAddressImpl(N, R, true); -} +void HexagonDAGToDAGISel::PreprocessISelDAG() { + SelectionDAG &DAG = *CurDAG; + std::vector<SDNode*> Nodes; + for (auto I = DAG.allnodes_begin(), E = DAG.allnodes_end(); I != E; ++I) + Nodes.push_back(I); + + // Simplify: (or (select c x 0) z) -> (select c (or x z) z) + // (or (select c 0 y) z) -> (select c z (or y z)) + // This may not be the right thing for all targets, so do it here. + for (auto I: Nodes) { + if (I->getOpcode() != ISD::OR) + continue; + + auto IsZero = [] (const SDValue &V) -> bool { + if (ConstantSDNode *SC = dyn_cast<ConstantSDNode>(V.getNode())) + return SC->isNullValue(); + return false; + }; + auto IsSelect0 = [IsZero] (const SDValue &Op) -> bool { + if (Op.getOpcode() != ISD::SELECT) + return false; + return IsZero(Op.getOperand(1)) || IsZero(Op.getOperand(2)); + }; -//===--------------------------------------------------------------------===// -// Fold offset of the global address if number of uses are below threshold. -//===--------------------------------------------------------------------===// -bool HexagonDAGToDAGISel::foldGlobalAddressImpl(SDValue &N, SDValue &R, - bool ShouldLookForGP) { - if (N.getOpcode() == ISD::ADD) { - SDValue N0 = N.getOperand(0); - SDValue N1 = N.getOperand(1); - if ((ShouldLookForGP && (N0.getOpcode() == HexagonISD::CONST32_GP)) || - (!ShouldLookForGP && (N0.getOpcode() == HexagonISD::CONST32))) { - ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1); - GlobalAddressSDNode *GA = - dyn_cast<GlobalAddressSDNode>(N0.getOperand(0)); - - if (Const && GA && - (GA->getOpcode() == ISD::TargetGlobalAddress)) { - if ((N0.getOpcode() == HexagonISD::CONST32) && - !hasNumUsesBelowThresGA(GA)) - return false; - R = CurDAG->getTargetGlobalAddress(GA->getGlobal(), - SDLoc(Const), - N.getValueType(), - GA->getOffset() + - (uint64_t)Const->getSExtValue()); - return true; + SDValue N0 = I->getOperand(0), N1 = I->getOperand(1); + EVT VT = I->getValueType(0); + bool SelN0 = IsSelect0(N0); + SDValue SOp = SelN0 ? N0 : N1; + SDValue VOp = SelN0 ? N1 : N0; + + if (SOp.getOpcode() == ISD::SELECT && SOp.getNode()->hasOneUse()) { + SDValue SC = SOp.getOperand(0); + SDValue SX = SOp.getOperand(1); + SDValue SY = SOp.getOperand(2); + SDLoc DLS = SOp; + if (IsZero(SY)) { + SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SX, VOp); + SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, NewOr, VOp); + DAG.ReplaceAllUsesWith(I, NewSel.getNode()); + } else if (IsZero(SX)) { + SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SY, VOp); + SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, VOp, NewOr); + DAG.ReplaceAllUsesWith(I, NewSel.getNode()); } } } - return false; } + bool HexagonDAGToDAGISel::SelectAddrFI(SDValue& N, SDValue &R) { if (N.getOpcode() != ISD::FrameIndex) return false; @@ -1681,8 +1410,8 @@ bool HexagonDAGToDAGISel::SelectGlobalAddress(SDValue &N, SDValue &R, return false; } -bool HexagonDAGToDAGISel::isValueExtension(SDValue const &Val, - unsigned FromBits, SDValue &Src) { +bool HexagonDAGToDAGISel::isValueExtension(const SDValue &Val, + unsigned FromBits, SDValue &Src) { unsigned Opc = Val.getOpcode(); switch (Opc) { case ISD::SIGN_EXTEND: diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 0072994..a2209ab 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -164,6 +164,12 @@ CC_Hexagon (unsigned ValNo, MVT ValVT, LocInfo = CCValAssign::ZExt; else LocInfo = CCValAssign::AExt; + } else if (LocVT == MVT::v4i8 || LocVT == MVT::v2i16) { + LocVT = MVT::i32; + LocInfo = CCValAssign::BCvt; + } else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) { + LocVT = MVT::i64; + LocInfo = CCValAssign::BCvt; } if (LocVT == MVT::i32 || LocVT == MVT::f32) { @@ -239,6 +245,12 @@ static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT, LocInfo = CCValAssign::ZExt; else LocInfo = CCValAssign::AExt; + } else if (LocVT == MVT::v4i8 || LocVT == MVT::v2i16) { + LocVT = MVT::i32; + LocInfo = CCValAssign::BCvt; + } else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) { + LocVT = MVT::i64; + LocInfo = CCValAssign::BCvt; } if (LocVT == MVT::i32 || LocVT == MVT::f32) { @@ -764,7 +776,7 @@ LowerBR_JT(SDValue Op, SelectionDAG &DAG) const BlockAddress::get(const_cast<BasicBlock *>(MBB->getBasicBlock())); } - SDValue JumpTableBase = DAG.getNode(HexagonISD::WrapperJT, dl, + SDValue JumpTableBase = DAG.getNode(HexagonISD::JT, dl, getPointerTy(), TargetJT); SDValue ShiftIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index, DAG.getConstant(2, MVT::i32)); @@ -944,6 +956,192 @@ HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { false, 0); } +// Creates a SPLAT instruction for a constant value VAL. +static SDValue createSplat(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue Val) { + if (VT.getSimpleVT() == MVT::v4i8) + return DAG.getNode(HexagonISD::VSPLATB, dl, VT, Val); + + if (VT.getSimpleVT() == MVT::v4i16) + return DAG.getNode(HexagonISD::VSPLATH, dl, VT, Val); + + return SDValue(); +} + +static bool isSExtFree(SDValue N) { + // A sign-extend of a truncate of a sign-extend is free. + if (N.getOpcode() == ISD::TRUNCATE && + N.getOperand(0).getOpcode() == ISD::AssertSext) + return true; + // We have sign-extended loads. + if (N.getOpcode() == ISD::LOAD) + return true; + return false; +} + +SDValue HexagonTargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + SDValue InpVal = Op.getOperand(0); + if (isa<ConstantSDNode>(InpVal)) { + uint64_t V = cast<ConstantSDNode>(InpVal)->getZExtValue(); + return DAG.getTargetConstant(countPopulation(V), MVT::i64); + } + SDValue PopOut = DAG.getNode(HexagonISD::POPCOUNT, dl, MVT::i32, InpVal); + return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, PopOut); +} + +SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue Cmp = Op.getOperand(2); + ISD::CondCode CC = cast<CondCodeSDNode>(Cmp)->get(); + + EVT VT = Op.getValueType(); + EVT LHSVT = LHS.getValueType(); + EVT RHSVT = RHS.getValueType(); + + if (LHSVT == MVT::v2i16) { + assert(ISD::isSignedIntSetCC(CC) || ISD::isUnsignedIntSetCC(CC)); + unsigned ExtOpc = ISD::isSignedIntSetCC(CC) ? ISD::SIGN_EXTEND + : ISD::ZERO_EXTEND; + SDValue LX = DAG.getNode(ExtOpc, dl, MVT::v2i32, LHS); + SDValue RX = DAG.getNode(ExtOpc, dl, MVT::v2i32, RHS); + SDValue SC = DAG.getNode(ISD::SETCC, dl, MVT::v2i1, LX, RX, Cmp); + return SC; + } + + // Treat all other vector types as legal. + if (VT.isVector()) + return Op; + + // Equals and not equals should use sign-extend, not zero-extend, since + // we can represent small negative values in the compare instructions. + // The LLVM default is to use zero-extend arbitrarily in these cases. + if ((CC == ISD::SETEQ || CC == ISD::SETNE) && + (RHSVT == MVT::i8 || RHSVT == MVT::i16) && + (LHSVT == MVT::i8 || LHSVT == MVT::i16)) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS); + if (C && C->getAPIntValue().isNegative()) { + LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS); + RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS); + return DAG.getNode(ISD::SETCC, dl, Op.getValueType(), + LHS, RHS, Op.getOperand(2)); + } + if (isSExtFree(LHS) || isSExtFree(RHS)) { + LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS); + RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS); + return DAG.getNode(ISD::SETCC, dl, Op.getValueType(), + LHS, RHS, Op.getOperand(2)); + } + } + return SDValue(); +} + +SDValue HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) + const { + SDValue PredOp = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1), Op2 = Op.getOperand(2); + EVT OpVT = Op1.getValueType(); + SDLoc DL(Op); + + if (OpVT == MVT::v2i16) { + SDValue X1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op1); + SDValue X2 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op2); + SDValue SL = DAG.getNode(ISD::VSELECT, DL, MVT::v2i32, PredOp, X1, X2); + SDValue TR = DAG.getNode(ISD::TRUNCATE, DL, MVT::v2i16, SL); + return TR; + } + + return SDValue(); +} + +// Handle only specific vector loads. +SDValue HexagonTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + SDLoc DL(Op); + LoadSDNode *LoadNode = cast<LoadSDNode>(Op); + SDValue Chain = LoadNode->getChain(); + SDValue Ptr = Op.getOperand(1); + SDValue LoweredLoad; + SDValue Result; + SDValue Base = LoadNode->getBasePtr(); + ISD::LoadExtType Ext = LoadNode->getExtensionType(); + unsigned Alignment = LoadNode->getAlignment(); + SDValue LoadChain; + + if(Ext == ISD::NON_EXTLOAD) + Ext = ISD::ZEXTLOAD; + + if (VT == MVT::v4i16) { + if (Alignment == 2) { + SDValue Loads[4]; + // Base load. + Loads[0] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Base, + LoadNode->getPointerInfo(), MVT::i16, + LoadNode->isVolatile(), + LoadNode->isNonTemporal(), + LoadNode->isInvariant(), + Alignment); + // Base+2 load. + SDValue Increment = DAG.getConstant(2, MVT::i32); + Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment); + Loads[1] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr, + LoadNode->getPointerInfo(), MVT::i16, + LoadNode->isVolatile(), + LoadNode->isNonTemporal(), + LoadNode->isInvariant(), + Alignment); + // SHL 16, then OR base and base+2. + SDValue ShiftAmount = DAG.getConstant(16, MVT::i32); + SDValue Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[1], ShiftAmount); + SDValue Tmp2 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[0]); + // Base + 4. + Increment = DAG.getConstant(4, MVT::i32); + Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment); + Loads[2] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr, + LoadNode->getPointerInfo(), MVT::i16, + LoadNode->isVolatile(), + LoadNode->isNonTemporal(), + LoadNode->isInvariant(), + Alignment); + // Base + 6. + Increment = DAG.getConstant(6, MVT::i32); + Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment); + Loads[3] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr, + LoadNode->getPointerInfo(), MVT::i16, + LoadNode->isVolatile(), + LoadNode->isNonTemporal(), + LoadNode->isInvariant(), + Alignment); + // SHL 16, then OR base+4 and base+6. + Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[3], ShiftAmount); + SDValue Tmp4 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[2]); + // Combine to i64. This could be optimised out later if we can + // affect reg allocation of this code. + Result = DAG.getNode(HexagonISD::COMBINE, DL, MVT::i64, Tmp4, Tmp2); + LoadChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + Loads[0].getValue(1), Loads[1].getValue(1), + Loads[2].getValue(1), Loads[3].getValue(1)); + } else { + // Perform default type expansion. + Result = DAG.getLoad(MVT::i64, DL, Chain, Ptr, LoadNode->getPointerInfo(), + LoadNode->isVolatile(), LoadNode->isNonTemporal(), + LoadNode->isInvariant(), LoadNode->getAlignment()); + LoadChain = Result.getValue(1); + } + } else + llvm_unreachable("Custom lowering unsupported load"); + + Result = DAG.getNode(ISD::BITCAST, DL, VT, Result); + // Since we pretend to lower a load, we need the original chain + // info attached to the result. + SDValue Ops[] = { Result, LoadChain }; + + return DAG.getMergeValues(Ops, DL); +} + + SDValue HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { EVT ValTy = Op.getValueType(); @@ -1028,6 +1226,19 @@ SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, return DAG.getNode(HexagonISD::CONST32, dl, getPointerTy(), Result); } +// Specifies that for loads and stores VT can be promoted to PromotedLdStVT. +void HexagonTargetLowering::promoteLdStType(EVT VT, EVT PromotedLdStVT) { + if (VT != PromotedLdStVT) { + setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); + AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), + PromotedLdStVT.getSimpleVT()); + + setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); + AddPromotedToType(ISD::STORE, VT.getSimpleVT(), + PromotedLdStVT.getSimpleVT()); + } +} + SDValue HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); @@ -1045,14 +1256,105 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, : TargetLowering(TM), Subtarget(&STI) { // Set up the register classes. + addRegisterClass(MVT::v2i1, &Hexagon::PredRegsRegClass); // bbbbaaaa + addRegisterClass(MVT::v4i1, &Hexagon::PredRegsRegClass); // ddccbbaa + addRegisterClass(MVT::v8i1, &Hexagon::PredRegsRegClass); // hgfedcba addRegisterClass(MVT::i32, &Hexagon::IntRegsRegClass); - addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass); + addRegisterClass(MVT::v4i8, &Hexagon::IntRegsRegClass); + addRegisterClass(MVT::v2i16, &Hexagon::IntRegsRegClass); + promoteLdStType(MVT::v4i8, MVT::i32); + promoteLdStType(MVT::v2i16, MVT::i32); if (Subtarget->hasV5TOps()) { addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass); addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass); } + addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass); + addRegisterClass(MVT::v8i8, &Hexagon::DoubleRegsRegClass); + addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass); + addRegisterClass(MVT::v2i32, &Hexagon::DoubleRegsRegClass); + promoteLdStType(MVT::v8i8, MVT::i64); + + // Custom lower v4i16 load only. Let v4i16 store to be + // promoted for now. + setOperationAction(ISD::LOAD, MVT::v4i16, Custom); + AddPromotedToType(ISD::LOAD, MVT::v4i16, MVT::i64); + setOperationAction(ISD::STORE, MVT::v4i16, Promote); + AddPromotedToType(ISD::STORE, MVT::v4i16, MVT::i64); + promoteLdStType(MVT::v2i32, MVT::i64); + + for (unsigned i = (unsigned) MVT::FIRST_VECTOR_VALUETYPE; + i <= (unsigned) MVT::LAST_VECTOR_VALUETYPE; ++i) { + MVT::SimpleValueType VT = (MVT::SimpleValueType) i; + + // Hexagon does not have support for the following operations, + // so they need to be expanded. + setOperationAction(ISD::SELECT, VT, Expand); + setOperationAction(ISD::SDIV, VT, Expand); + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::UDIV, VT, Expand); + setOperationAction(ISD::UREM, VT, Expand); + setOperationAction(ISD::ROTL, VT, Expand); + setOperationAction(ISD::ROTR, VT, Expand); + setOperationAction(ISD::FDIV, VT, Expand); + setOperationAction(ISD::FNEG, VT, Expand); + setOperationAction(ISD::UMUL_LOHI, VT, Expand); + setOperationAction(ISD::SMUL_LOHI, VT, Expand); + setOperationAction(ISD::UDIVREM, VT, Expand); + setOperationAction(ISD::SDIVREM, VT, Expand); + setOperationAction(ISD::FPOW, VT, Expand); + setOperationAction(ISD::CTPOP, VT, Expand); + setOperationAction(ISD::CTLZ, VT, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); + setOperationAction(ISD::CTTZ, VT, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); + + // Expand all any extend loads. + for (unsigned j = (unsigned) MVT::FIRST_VECTOR_VALUETYPE; + j <= (unsigned) MVT::LAST_VECTOR_VALUETYPE; ++j) + setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType) j, VT, Expand); + + // Expand all trunc stores. + for (unsigned TargetVT = (unsigned) MVT::FIRST_VECTOR_VALUETYPE; + TargetVT <= (unsigned) MVT::LAST_VECTOR_VALUETYPE; ++TargetVT) + setTruncStoreAction(VT, (MVT::SimpleValueType) TargetVT, Expand); + + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); + setOperationAction(ISD::ConstantPool, VT, Expand); + setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); + setOperationAction(ISD::BUILD_VECTOR, VT, Expand); + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Expand); + setOperationAction(ISD::INSERT_SUBVECTOR, VT, Expand); + setOperationAction(ISD::CONCAT_VECTORS, VT, Expand); + setOperationAction(ISD::SRA, VT, Custom); + setOperationAction(ISD::SHL, VT, Custom); + setOperationAction(ISD::SRL, VT, Custom); + + if (!isTypeLegal(VT)) + continue; + + setOperationAction(ISD::ADD, VT, Legal); + setOperationAction(ISD::SUB, VT, Legal); + setOperationAction(ISD::MUL, VT, Legal); + + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); + setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); + } + + setOperationAction(ISD::SETCC, MVT::v2i16, Custom); + setOperationAction(ISD::VSELECT, MVT::v2i16, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); + + setOperationAction(ISD::ConstantPool, MVT::i32, Custom); + addRegisterClass(MVT::i1, &Hexagon::PredRegsRegClass); computeRegisterProperties(Subtarget->getRegisterInfo()); @@ -1308,9 +1610,14 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, // Turn FP extload into load/fextend. for (MVT VT : MVT::fp_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); - // Hexagon has a i1 sign extending load. - for (MVT VT : MVT::integer_valuetypes()) - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand); + + // No extending loads from i32. + for (MVT VT : MVT::integer_valuetypes()) { + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i32, Expand); + } + // Turn FP truncstore into trunc + store. setTruncStoreAction(MVT::f64, MVT::f32, Expand); @@ -1358,6 +1665,10 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SELECT, MVT::f64, Expand); } + // Hexagon needs to optimize cases with negative constants. + setOperationAction(ISD::SETCC, MVT::i16, Custom); + setOperationAction(ISD::SETCC, MVT::i8, Custom); + if (EmitJumpTables) { setOperationAction(ISD::BR_JT, MVT::Other, Custom); } else { @@ -1415,9 +1726,17 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::CTLZ, MVT::i64, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); + setOperationAction(ISD::ROTL, MVT::i32, Expand); setOperationAction(ISD::ROTR, MVT::i32, Expand); setOperationAction(ISD::BSWAP, MVT::i32, Expand); + setOperationAction(ISD::ROTL, MVT::i64, Expand); + setOperationAction(ISD::ROTR, MVT::i64, Expand); + setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); + setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); + setOperationAction(ISD::BR_CC, MVT::i64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); setOperationAction(ISD::FPOW, MVT::f64, Expand); @@ -1429,7 +1748,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); - + setOperationAction(ISD::MULHS, MVT::i64, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); @@ -1463,27 +1782,63 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { - default: return nullptr; - case HexagonISD::CONST32: return "HexagonISD::CONST32"; - case HexagonISD::CONST32_GP: return "HexagonISD::CONST32_GP"; - case HexagonISD::CONST32_Int_Real: return "HexagonISD::CONST32_Int_Real"; - case HexagonISD::ADJDYNALLOC: return "HexagonISD::ADJDYNALLOC"; - case HexagonISD::CMPICC: return "HexagonISD::CMPICC"; - case HexagonISD::CMPFCC: return "HexagonISD::CMPFCC"; - case HexagonISD::BRICC: return "HexagonISD::BRICC"; - case HexagonISD::BRFCC: return "HexagonISD::BRFCC"; - case HexagonISD::SELECT_ICC: return "HexagonISD::SELECT_ICC"; - case HexagonISD::SELECT_FCC: return "HexagonISD::SELECT_FCC"; - case HexagonISD::Hi: return "HexagonISD::Hi"; - case HexagonISD::Lo: return "HexagonISD::Lo"; - case HexagonISD::FTOI: return "HexagonISD::FTOI"; - case HexagonISD::ITOF: return "HexagonISD::ITOF"; - case HexagonISD::CALLv3: return "HexagonISD::CALLv3"; - case HexagonISD::CALLv3nr: return "HexagonISD::CALLv3nr"; - case HexagonISD::CALLR: return "HexagonISD::CALLR"; - case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG"; - case HexagonISD::BR_JT: return "HexagonISD::BR_JT"; - case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN"; + default: return nullptr; + case HexagonISD::CONST32: return "HexagonISD::CONST32"; + case HexagonISD::CONST32_GP: return "HexagonISD::CONST32_GP"; + case HexagonISD::CONST32_Int_Real: return "HexagonISD::CONST32_Int_Real"; + case HexagonISD::ADJDYNALLOC: return "HexagonISD::ADJDYNALLOC"; + case HexagonISD::CMPICC: return "HexagonISD::CMPICC"; + case HexagonISD::CMPFCC: return "HexagonISD::CMPFCC"; + case HexagonISD::BRICC: return "HexagonISD::BRICC"; + case HexagonISD::BRFCC: return "HexagonISD::BRFCC"; + case HexagonISD::SELECT_ICC: return "HexagonISD::SELECT_ICC"; + case HexagonISD::SELECT_FCC: return "HexagonISD::SELECT_FCC"; + case HexagonISD::Hi: return "HexagonISD::Hi"; + case HexagonISD::Lo: return "HexagonISD::Lo"; + case HexagonISD::JT: return "HexagonISD::JT"; + case HexagonISD::CP: return "HexagonISD::CP"; + case HexagonISD::POPCOUNT: return "HexagonISD::POPCOUNT"; + case HexagonISD::COMBINE: return "HexagonISD::COMBINE"; + case HexagonISD::PACKHL: return "HexagonISD::PACKHL"; + case HexagonISD::VSPLATB: return "HexagonISD::VSPLTB"; + case HexagonISD::VSPLATH: return "HexagonISD::VSPLATH"; + case HexagonISD::SHUFFEB: return "HexagonISD::SHUFFEB"; + case HexagonISD::SHUFFEH: return "HexagonISD::SHUFFEH"; + case HexagonISD::SHUFFOB: return "HexagonISD::SHUFFOB"; + case HexagonISD::SHUFFOH: return "HexagonISD::SHUFFOH"; + case HexagonISD::VSXTBH: return "HexagonISD::VSXTBH"; + case HexagonISD::VSXTBW: return "HexagonISD::VSXTBW"; + case HexagonISD::VSRAW: return "HexagonISD::VSRAW"; + case HexagonISD::VSRAH: return "HexagonISD::VSRAH"; + case HexagonISD::VSRLW: return "HexagonISD::VSRLW"; + case HexagonISD::VSRLH: return "HexagonISD::VSRLH"; + case HexagonISD::VSHLW: return "HexagonISD::VSHLW"; + case HexagonISD::VSHLH: return "HexagonISD::VSHLH"; + case HexagonISD::VCMPBEQ: return "HexagonISD::VCMPBEQ"; + case HexagonISD::VCMPBGT: return "HexagonISD::VCMPBGT"; + case HexagonISD::VCMPBGTU: return "HexagonISD::VCMPBGTU"; + case HexagonISD::VCMPHEQ: return "HexagonISD::VCMPHEQ"; + case HexagonISD::VCMPHGT: return "HexagonISD::VCMPHGT"; + case HexagonISD::VCMPHGTU: return "HexagonISD::VCMPHGTU"; + case HexagonISD::VCMPWEQ: return "HexagonISD::VCMPWEQ"; + case HexagonISD::VCMPWGT: return "HexagonISD::VCMPWGT"; + case HexagonISD::VCMPWGTU: return "HexagonISD::VCMPWGTU"; + case HexagonISD::INSERT_ri: return "HexagonISD::INSERT_ri"; + case HexagonISD::INSERT_rd: return "HexagonISD::INSERT_rd"; + case HexagonISD::INSERT_riv: return "HexagonISD::INSERT_riv"; + case HexagonISD::INSERT_rdv: return "HexagonISD::INSERT_rdv"; + case HexagonISD::EXTRACTU_ri: return "HexagonISD::EXTRACTU_ri"; + case HexagonISD::EXTRACTU_rd: return "HexagonISD::EXTRACTU_rd"; + case HexagonISD::EXTRACTU_riv: return "HexagonISD::EXTRACTU_riv"; + case HexagonISD::EXTRACTU_rdv: return "HexagonISD::EXTRACTU_rdv"; + case HexagonISD::FTOI: return "HexagonISD::FTOI"; + case HexagonISD::ITOF: return "HexagonISD::ITOF"; + case HexagonISD::CALLv3: return "HexagonISD::CALLv3"; + case HexagonISD::CALLv3nr: return "HexagonISD::CALLv3nr"; + case HexagonISD::CALLR: return "HexagonISD::CALLR"; + case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG"; + case HexagonISD::BR_JT: return "HexagonISD::BR_JT"; + case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN"; case HexagonISD::EH_RETURN: return "HexagonISD::EH_RETURN"; } } @@ -1505,6 +1860,474 @@ bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { return ((VT1.getSimpleVT() == MVT::i64) && (VT2.getSimpleVT() == MVT::i32)); } +// shouldExpandBuildVectorWithShuffles +// Should we expand the build vector with shuffles? +bool +HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT, + unsigned DefinedValues) const { + + // Hexagon vector shuffle operates on element sizes of bytes or halfwords + EVT EltVT = VT.getVectorElementType(); + int EltBits = EltVT.getSizeInBits(); + if ((EltBits != 8) && (EltBits != 16)) + return false; + + return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); +} + +// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3). V1 and +// V2 are the two vectors to select data from, V3 is the permutation. +static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { + const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + SDLoc dl(Op); + EVT VT = Op.getValueType(); + + if (V2.getOpcode() == ISD::UNDEF) + V2 = V1; + + if (SVN->isSplat()) { + int Lane = SVN->getSplatIndex(); + if (Lane == -1) Lane = 0; + + // Test if V1 is a SCALAR_TO_VECTOR. + if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) + return createSplat(DAG, dl, VT, V1.getOperand(0)); + + // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR + // (and probably will turn into a SCALAR_TO_VECTOR once legalization + // reaches it). + if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR && + !isa<ConstantSDNode>(V1.getOperand(0))) { + bool IsScalarToVector = true; + for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) + if (V1.getOperand(i).getOpcode() != ISD::UNDEF) { + IsScalarToVector = false; + break; + } + if (IsScalarToVector) + return createSplat(DAG, dl, VT, V1.getOperand(0)); + } + return createSplat(DAG, dl, VT, DAG.getConstant(Lane, MVT::i32)); + } + + // FIXME: We need to support more general vector shuffles. See + // below the comment from the ARM backend that deals in the general + // case with the vector shuffles. For now, let expand handle these. + return SDValue(); + + // If the shuffle is not directly supported and it has 4 elements, use + // the PerfectShuffle-generated table to synthesize it from other shuffles. +} + +// If BUILD_VECTOR has same base element repeated several times, +// report true. +static bool isCommonSplatElement(BuildVectorSDNode *BVN) { + unsigned NElts = BVN->getNumOperands(); + SDValue V0 = BVN->getOperand(0); + + for (unsigned i = 1, e = NElts; i != e; ++i) { + if (BVN->getOperand(i) != V0) + return false; + } + return true; +} + +// LowerVECTOR_SHIFT - Lower a vector shift. Try to convert +// <VT> = SHL/SRA/SRL <VT> by <VT> to Hexagon specific +// <VT> = SHL/SRA/SRL <VT> by <IT/i32>. +static SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) { + BuildVectorSDNode *BVN = 0; + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + SDValue V3; + SDLoc dl(Op); + EVT VT = Op.getValueType(); + + if ((BVN = dyn_cast<BuildVectorSDNode>(V1.getNode())) && + isCommonSplatElement(BVN)) + V3 = V2; + else if ((BVN = dyn_cast<BuildVectorSDNode>(V2.getNode())) && + isCommonSplatElement(BVN)) + V3 = V1; + else + return SDValue(); + + SDValue CommonSplat = BVN->getOperand(0); + SDValue Result; + + if (VT.getSimpleVT() == MVT::v4i16) { + switch (Op.getOpcode()) { + case ISD::SRA: + Result = DAG.getNode(HexagonISD::VSRAH, dl, VT, V3, CommonSplat); + break; + case ISD::SHL: + Result = DAG.getNode(HexagonISD::VSHLH, dl, VT, V3, CommonSplat); + break; + case ISD::SRL: + Result = DAG.getNode(HexagonISD::VSRLH, dl, VT, V3, CommonSplat); + break; + default: + return SDValue(); + } + } else if (VT.getSimpleVT() == MVT::v2i32) { + switch (Op.getOpcode()) { + case ISD::SRA: + Result = DAG.getNode(HexagonISD::VSRAW, dl, VT, V3, CommonSplat); + break; + case ISD::SHL: + Result = DAG.getNode(HexagonISD::VSHLW, dl, VT, V3, CommonSplat); + break; + case ISD::SRL: + Result = DAG.getNode(HexagonISD::VSRLW, dl, VT, V3, CommonSplat); + break; + default: + return SDValue(); + } + } else { + return SDValue(); + } + + return DAG.getNode(ISD::BITCAST, dl, VT, Result); +} + +SDValue +HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { + BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); + SDLoc dl(Op); + EVT VT = Op.getValueType(); + + unsigned Size = VT.getSizeInBits(); + + // A vector larger than 64 bits cannot be represented in Hexagon. + // Expand will split the vector. + if (Size > 64) + return SDValue(); + + APInt APSplatBits, APSplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + unsigned NElts = BVN->getNumOperands(); + + // Try to generate a SPLAT instruction. + if ((VT.getSimpleVT() == MVT::v4i8 || VT.getSimpleVT() == MVT::v4i16) && + (BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, + HasAnyUndefs, 0, true) && SplatBitSize <= 16)) { + unsigned SplatBits = APSplatBits.getZExtValue(); + int32_t SextVal = ((int32_t) (SplatBits << (32 - SplatBitSize)) >> + (32 - SplatBitSize)); + return createSplat(DAG, dl, VT, DAG.getConstant(SextVal, MVT::i32)); + } + + // Try to generate COMBINE to build v2i32 vectors. + if (VT.getSimpleVT() == MVT::v2i32) { + SDValue V0 = BVN->getOperand(0); + SDValue V1 = BVN->getOperand(1); + + if (V0.getOpcode() == ISD::UNDEF) + V0 = DAG.getConstant(0, MVT::i32); + if (V1.getOpcode() == ISD::UNDEF) + V1 = DAG.getConstant(0, MVT::i32); + + ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(V0); + ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(V1); + // If the element isn't a constant, it is in a register: + // generate a COMBINE Register Register instruction. + if (!C0 || !C1) + return DAG.getNode(HexagonISD::COMBINE, dl, VT, V1, V0); + + // If one of the operands is an 8 bit integer constant, generate + // a COMBINE Immediate Immediate instruction. + if (isInt<8>(C0->getSExtValue()) || + isInt<8>(C1->getSExtValue())) + return DAG.getNode(HexagonISD::COMBINE, dl, VT, V1, V0); + } + + // Try to generate a S2_packhl to build v2i16 vectors. + if (VT.getSimpleVT() == MVT::v2i16) { + for (unsigned i = 0, e = NElts; i != e; ++i) { + if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) + continue; + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(BVN->getOperand(i)); + // If the element isn't a constant, it is in a register: + // generate a S2_packhl instruction. + if (!Cst) { + SDValue pack = DAG.getNode(HexagonISD::PACKHL, dl, MVT::v4i16, + BVN->getOperand(1), BVN->getOperand(0)); + + return DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::v2i16, + pack); + } + } + } + + // In the general case, generate a CONST32 or a CONST64 for constant vectors, + // and insert_vector_elt for all the other cases. + uint64_t Res = 0; + unsigned EltSize = Size / NElts; + SDValue ConstVal; + uint64_t Mask = ~uint64_t(0ULL) >> (64 - EltSize); + bool HasNonConstantElements = false; + + for (unsigned i = 0, e = NElts; i != e; ++i) { + // LLVM's BUILD_VECTOR operands are in Little Endian mode, whereas Hexagon's + // combine, const64, etc. are Big Endian. + unsigned OpIdx = NElts - i - 1; + SDValue Operand = BVN->getOperand(OpIdx); + if (Operand.getOpcode() == ISD::UNDEF) + continue; + + int64_t Val = 0; + if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Operand)) + Val = Cst->getSExtValue(); + else + HasNonConstantElements = true; + + Val &= Mask; + Res = (Res << EltSize) | Val; + } + + if (Size == 64) + ConstVal = DAG.getConstant(Res, MVT::i64); + else + ConstVal = DAG.getConstant(Res, MVT::i32); + + // When there are non constant operands, add them with INSERT_VECTOR_ELT to + // ConstVal, the constant part of the vector. + if (HasNonConstantElements) { + EVT EltVT = VT.getVectorElementType(); + SDValue Width = DAG.getConstant(EltVT.getSizeInBits(), MVT::i64); + SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width, + DAG.getConstant(32, MVT::i64)); + + for (unsigned i = 0, e = NElts; i != e; ++i) { + // LLVM's BUILD_VECTOR operands are in Little Endian mode, whereas Hexagon + // is Big Endian. + unsigned OpIdx = NElts - i - 1; + SDValue Operand = BVN->getOperand(OpIdx); + if (dyn_cast<ConstantSDNode>(Operand)) + // This operand is already in ConstVal. + continue; + + if (VT.getSizeInBits() == 64 && + Operand.getValueType().getSizeInBits() == 32) { + SDValue C = DAG.getConstant(0, MVT::i32); + Operand = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Operand); + } + + SDValue Idx = DAG.getConstant(OpIdx, MVT::i64); + SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, Width); + SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset); + const SDValue Ops[] = {ConstVal, Operand, Combined}; + + if (VT.getSizeInBits() == 32) + ConstVal = DAG.getNode(HexagonISD::INSERT_riv, dl, MVT::i32, Ops); + else + ConstVal = DAG.getNode(HexagonISD::INSERT_rdv, dl, MVT::i64, Ops); + } + } + + return DAG.getNode(ISD::BITCAST, dl, VT, ConstVal); +} + +SDValue +HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + EVT VT = Op.getValueType(); + unsigned NElts = Op.getNumOperands(); + SDValue Vec = Op.getOperand(0); + EVT VecVT = Vec.getValueType(); + SDValue Width = DAG.getConstant(VecVT.getSizeInBits(), MVT::i64); + SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width, + DAG.getConstant(32, MVT::i64)); + SDValue ConstVal = DAG.getConstant(0, MVT::i64); + + ConstantSDNode *W = dyn_cast<ConstantSDNode>(Width); + ConstantSDNode *S = dyn_cast<ConstantSDNode>(Shifted); + + if ((VecVT.getSimpleVT() == MVT::v2i16) && (NElts == 2) && W && S) { + if ((W->getZExtValue() == 32) && ((S->getZExtValue() >> 32) == 32)) { + // We are trying to concat two v2i16 to a single v4i16. + SDValue Vec0 = Op.getOperand(1); + SDValue Combined = DAG.getNode(HexagonISD::COMBINE, dl, VT, Vec0, Vec); + return DAG.getNode(ISD::BITCAST, dl, VT, Combined); + } + } + + if ((VecVT.getSimpleVT() == MVT::v4i8) && (NElts == 2) && W && S) { + if ((W->getZExtValue() == 32) && ((S->getZExtValue() >> 32) == 32)) { + // We are trying to concat two v4i8 to a single v8i8. + SDValue Vec0 = Op.getOperand(1); + SDValue Combined = DAG.getNode(HexagonISD::COMBINE, dl, VT, Vec0, Vec); + return DAG.getNode(ISD::BITCAST, dl, VT, Combined); + } + } + + for (unsigned i = 0, e = NElts; i != e; ++i) { + unsigned OpIdx = NElts - i - 1; + SDValue Operand = Op.getOperand(OpIdx); + + if (VT.getSizeInBits() == 64 && + Operand.getValueType().getSizeInBits() == 32) { + SDValue C = DAG.getConstant(0, MVT::i32); + Operand = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Operand); + } + + SDValue Idx = DAG.getConstant(OpIdx, MVT::i64); + SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, Width); + SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset); + const SDValue Ops[] = {ConstVal, Operand, Combined}; + + if (VT.getSizeInBits() == 32) + ConstVal = DAG.getNode(HexagonISD::INSERT_riv, dl, MVT::i32, Ops); + else + ConstVal = DAG.getNode(HexagonISD::INSERT_rdv, dl, MVT::i64, Ops); + } + + return DAG.getNode(ISD::BITCAST, dl, VT, ConstVal); +} + +SDValue +HexagonTargetLowering::LowerEXTRACT_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + int VTN = VT.isVector() ? VT.getVectorNumElements() : 1; + SDLoc dl(Op); + SDValue Idx = Op.getOperand(1); + SDValue Vec = Op.getOperand(0); + EVT VecVT = Vec.getValueType(); + EVT EltVT = VecVT.getVectorElementType(); + int EltSize = EltVT.getSizeInBits(); + SDValue Width = DAG.getConstant(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT ? + EltSize : VTN * EltSize, MVT::i64); + + // Constant element number. + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Idx)) { + SDValue Offset = DAG.getConstant(C->getZExtValue() * EltSize, MVT::i32); + const SDValue Ops[] = {Vec, Width, Offset}; + + ConstantSDNode *W = dyn_cast<ConstantSDNode>(Width); + assert(W && "Non constant width in LowerEXTRACT_VECTOR"); + + SDValue N; + // For certain extracts, it is a simple _hi/_lo subreg. + if (VecVT.getSimpleVT() == MVT::v2i32) { + // v2i32 -> i32 vselect. + if (C->getZExtValue() == 0) + N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, + MVT::i32, Vec); + else if (C->getZExtValue() == 1) + N = DAG.getTargetExtractSubreg(Hexagon::subreg_hireg, dl, + MVT::i32, Vec); + else + llvm_unreachable("Bad offset"); + } else if ((VecVT.getSimpleVT() == MVT::v4i16) && + (W->getZExtValue() == 32)) { + // v4i16 -> v2i16/i32 vselect. + if (C->getZExtValue() == 0) + N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, + MVT::i32, Vec); + else if (C->getZExtValue() == 2) + N = DAG.getTargetExtractSubreg(Hexagon::subreg_hireg, dl, + MVT::i32, Vec); + else + llvm_unreachable("Bad offset"); + } else if ((VecVT.getSimpleVT() == MVT::v8i8) && + (W->getZExtValue() == 32)) { + // v8i8 -> v4i8/i32 vselect. + if (C->getZExtValue() == 0) + N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, + MVT::i32, Vec); + else if (C->getZExtValue() == 4) + N = DAG.getTargetExtractSubreg(Hexagon::subreg_hireg, dl, + MVT::i32, Vec); + else + llvm_unreachable("Bad offset"); + } else if (VecVT.getSizeInBits() == 32) { + N = DAG.getNode(HexagonISD::EXTRACTU_ri, dl, MVT::i32, Ops); + } else { + N = DAG.getNode(HexagonISD::EXTRACTU_rd, dl, MVT::i64, Ops); + if (VT.getSizeInBits() == 32) + N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::i32, N); + } + + return DAG.getNode(ISD::BITCAST, dl, VT, N); + } + + // Variable element number. + SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i32, Idx, + DAG.getConstant(EltSize, MVT::i32)); + SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width, + DAG.getConstant(32, MVT::i64)); + SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset); + + const SDValue Ops[] = {Vec, Combined}; + + SDValue N; + if (VecVT.getSizeInBits() == 32) { + N = DAG.getNode(HexagonISD::EXTRACTU_riv, dl, MVT::i32, Ops); + } else { + N = DAG.getNode(HexagonISD::EXTRACTU_rdv, dl, MVT::i64, Ops); + if (VT.getSizeInBits() == 32) + N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::i32, N); + } + return DAG.getNode(ISD::BITCAST, dl, VT, N); +} + +SDValue +HexagonTargetLowering::LowerINSERT_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + int VTN = VT.isVector() ? VT.getVectorNumElements() : 1; + SDLoc dl(Op); + SDValue Vec = Op.getOperand(0); + SDValue Val = Op.getOperand(1); + SDValue Idx = Op.getOperand(2); + EVT VecVT = Vec.getValueType(); + EVT EltVT = VecVT.getVectorElementType(); + int EltSize = EltVT.getSizeInBits(); + SDValue Width = DAG.getConstant(Op.getOpcode() == ISD::INSERT_VECTOR_ELT ? + EltSize : VTN * EltSize, MVT::i64); + + if (ConstantSDNode *C = cast<ConstantSDNode>(Idx)) { + SDValue Offset = DAG.getConstant(C->getSExtValue() * EltSize, MVT::i32); + const SDValue Ops[] = {Vec, Val, Width, Offset}; + + SDValue N; + if (VT.getSizeInBits() == 32) + N = DAG.getNode(HexagonISD::INSERT_ri, dl, MVT::i32, Ops); + else + N = DAG.getNode(HexagonISD::INSERT_rd, dl, MVT::i64, Ops); + + return DAG.getNode(ISD::BITCAST, dl, VT, N); + } + + // Variable element number. + SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i32, Idx, + DAG.getConstant(EltSize, MVT::i32)); + SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width, + DAG.getConstant(32, MVT::i64)); + SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset); + + if (VT.getSizeInBits() == 64 && + Val.getValueType().getSizeInBits() == 32) { + SDValue C = DAG.getConstant(0, MVT::i32); + Val = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Val); + } + + const SDValue Ops[] = {Vec, Val, Combined}; + + SDValue N; + if (VT.getSizeInBits() == 32) + N = DAG.getNode(HexagonISD::INSERT_riv, dl, MVT::i32, Ops); + else + N = DAG.getNode(HexagonISD::INSERT_rdv, dl, MVT::i64, Ops); + + return DAG.getNode(ISD::BITCAST, dl, VT, N); +} + bool HexagonTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { // Assuming the caller does not have either a signext or zeroext modifier, and @@ -1549,7 +2372,19 @@ SDValue HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Should not custom lower this!"); - case ISD::ConstantPool: return LowerConstantPool(Op, DAG); + case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); + case ISD::INSERT_SUBVECTOR: return LowerINSERT_VECTOR(Op, DAG); + case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR(Op, DAG); + case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_VECTOR(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR(Op, DAG); + case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); + case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); + case ISD::SRA: + case ISD::SHL: + case ISD::SRL: + return LowerVECTOR_SHIFT(Op, DAG); + case ISD::ConstantPool: + return LowerConstantPool(Op, DAG); case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); // Frame & Return address. Currently unimplemented. case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); @@ -1561,9 +2396,14 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::BR_JT: return LowerBR_JT(Op, DAG); + // Custom lower some vector loads. + case ISD::LOAD: return LowerLOAD(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::SELECT: return Op; + case ISD::SETCC: return LowerSETCC(Op, DAG); + case ISD::VSELECT: return LowerVSELECT(Op, DAG); + case ISD::CTPOP: return LowerCTPOP(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::INLINEASM: return LowerINLINEASM(Op, DAG); diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index 151c28f..34b1ebb 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -37,6 +37,10 @@ bool isPositiveHalfWord(SDNode *N); ADJDYNALLOC, ARGEXTEND, + PIC_ADD, + AT_GOT, + AT_PCREL, + CMPICC, // Compare two GPR operands, set icc. CMPFCC, // Compare two FP operands, set fcc. BRICC, // Branch to dest on icc condition @@ -54,23 +58,44 @@ bool isPositiveHalfWord(SDNode *N); CALLR, RET_FLAG, // Return with a flag operand. - BR_JT, // Jump table. - BARRIER, // Memory barrier + BR_JT, // Branch through jump table. + BARRIER, // Memory barrier. + JT, // Jump table. + CP, // Constant pool. POPCOUNT, COMBINE, - WrapperJT, - WrapperCP, - WrapperCombineII, - WrapperCombineRR, - WrapperCombineRI_V4, - WrapperCombineIR_V4, - WrapperPackhl, - WrapperSplatB, - WrapperSplatH, - WrapperShuffEB, - WrapperShuffEH, - WrapperShuffOB, - WrapperShuffOH, + PACKHL, + VSPLATB, + VSPLATH, + SHUFFEB, + SHUFFEH, + SHUFFOB, + SHUFFOH, + VSXTBH, + VSXTBW, + VSRAW, + VSRAH, + VSRLW, + VSRLH, + VSHLW, + VSHLH, + VCMPBEQ, + VCMPBGT, + VCMPBGTU, + VCMPHEQ, + VCMPHGT, + VCMPHGTU, + VCMPWEQ, + VCMPWGT, + VCMPWGTU, + INSERT_ri, + INSERT_rd, + INSERT_riv, + INSERT_rdv, + EXTRACTU_ri, + EXTRACTU_rd, + EXTRACTU_riv, + EXTRACTU_rdv, TC_RETURN, EH_RETURN, DCFETCH @@ -85,6 +110,8 @@ bool isPositiveHalfWord(SDNode *N); bool CanReturnSmallStruct(const Function* CalleeFn, unsigned& RetSize) const; + void promoteLdStType(EVT VT, EVT PromotedLdStVT); + public: const HexagonSubtarget *Subtarget; explicit HexagonTargetLowering(const TargetMachine &TM, @@ -110,10 +137,17 @@ bool isPositiveHalfWord(SDNode *N); bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; - SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + // Should we expand the build vector with shuffles? + bool shouldExpandBuildVectorWithShuffles(EVT VT, + unsigned DefinedValues) const override; + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; const char *getTargetNodeName(unsigned Opcode) const override; - SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEXTRACT_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINSERT_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const; @@ -137,9 +171,13 @@ bool isPositiveHalfWord(SDNode *N); const SmallVectorImpl<SDValue> &OutVals, SDValue Callee) const; + SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -170,6 +208,15 @@ bool isPositiveHalfWord(SDNode *N); const std::string &Constraint, MVT VT) const override; + unsigned getInlineAsmMemConstraint( + const std::string &ConstraintCode) const override { + if (ConstraintCode == "o") + return InlineAsm::Constraint_o; + else if (ConstraintCode == "v") + return InlineAsm::Constraint_v; + return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); + } + // Intrinsics SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; /// isLegalAddressingMode - Return true if the addressing mode represented diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td index 3d04678..36a7e9f 100644 --- a/lib/Target/Hexagon/HexagonInstrFormats.td +++ b/lib/Target/Hexagon/HexagonInstrFormats.td @@ -76,7 +76,7 @@ class OpcodeHexagon { class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, string cstr, InstrItinClass itin, IType type> - : Instruction, OpcodeHexagon { + : Instruction { let Namespace = "Hexagon"; dag OutOperandList = outs; @@ -84,18 +84,18 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, let AsmString = asmstr; let Pattern = pattern; let Constraints = cstr; - let Itinerary = itin;
- let Size = 4;
-
- // SoftFail is a field the disassembler can use to provide a way for
- // instructions to not match without killing the whole decode process. It is
- // mainly used for ARM, but Tablegen expects this field to exist or it fails
- // to build the decode table.
- field bits<32> SoftFail = 0;
-
- // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
-
- // Instruction type according to the ISA.
+ let Itinerary = itin; + let Size = 4; + + // SoftFail is a field the disassembler can use to provide a way for + // instructions to not match without killing the whole decode process. It is + // mainly used for ARM, but Tablegen expects this field to exist or it fails + // to build the decode table. + field bits<32> SoftFail = 0; + + // *** Must match MCTargetDesc/HexagonBaseInfo.h *** + + // Instruction type according to the ISA. IType Type = type; let TSFlags{4-0} = Type.Value; @@ -197,7 +197,7 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, let mayLoad = 1 in class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = LD_tc_ld_SLOT01> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon; let mayLoad = 1 in class LDInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [], @@ -217,7 +217,7 @@ class LDInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [], let mayLoad = 1 in class LD0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin=LD_tc_ld_SLOT0> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon; // ST Instruction Class in V2/V3 can take SLOT0 only. // ST Instruction Class in V4 can take SLOT0 & SLOT1. @@ -225,7 +225,7 @@ class LD0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], let mayStore = 1 in class STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = ST_tc_st_SLOT01> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>, OpcodeHexagon; class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = ""> @@ -234,7 +234,7 @@ class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [], let mayStore = 1 in class ST0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = ST_tc_ld_SLOT0> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>, OpcodeHexagon; // ST Instruction Class in V2/V3 can take SLOT0 only. // ST Instruction Class in V4 can take SLOT0 & SLOT1. @@ -247,13 +247,14 @@ class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [], // In V2/V3 we used ST for this but in v4 ST can take SLOT0 or SLOT1. class SYSInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = ST_tc_3stall_SLOT0> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeSYSTEM>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeSYSTEM>, + OpcodeHexagon; // ALU32 Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. class ALU32Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeALU32>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeALU32>, OpcodeHexagon; // ALU64 Instruction Class in V2/V3. // XTYPE Instruction Class in V4. @@ -261,7 +262,8 @@ class ALU32Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], // Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4. class ALU64Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>, + OpcodeHexagon; class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23> @@ -274,7 +276,8 @@ class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [], // Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4. class MInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = M_tc_3x_SLOT23> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>, + OpcodeHexagon; // M Instruction Class in V2/V3. // XTYPE Instruction Class in V4. @@ -290,7 +293,8 @@ class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [], // Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4. class SInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = S_2op_tc_1_SLOT23> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>, + OpcodeHexagon; // S Instruction Class in V2/V3. // XTYPE Instruction Class in V4. @@ -304,34 +308,37 @@ class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [], // Definition of the instruction class NOT CHANGED. class JInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = J_tc_2early_SLOT23> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJ>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJ>, OpcodeHexagon; // JR Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. class JRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = J_tc_2early_SLOT2> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJR>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJR>, OpcodeHexagon; // CR Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. class CRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = CR_tc_2early_SLOT3> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCR>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCR>, OpcodeHexagon; let isCodeGenOnly = 1, isPseudo = 1 in class Endloop<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = J_tc_2early_SLOT0123> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeENDLOOP>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeENDLOOP>, + OpcodeHexagon; let isCodeGenOnly = 1, isPseudo = 1 in class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = ""> - : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDO, TypePSEUDO>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDO, TypePSEUDO>, + OpcodeHexagon; let isCodeGenOnly = 1, isPseudo = 1 in class PseudoM<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr=""> - : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDOM, TypePSEUDO>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDOM, TypePSEUDO>, + OpcodeHexagon; //===----------------------------------------------------------------------===// // Instruction Classes Definitions - diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td index 5fec80b..7f7b2c9 100644 --- a/lib/Target/Hexagon/HexagonInstrFormatsV4.td +++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td @@ -17,10 +17,88 @@ // *** Must match BaseInfo.h *** //----------------------------------------------------------------------------// -def TypeMEMOP : IType<9>; -def TypeNV : IType<10>; +def TypeMEMOP : IType<9>; +def TypeNV : IType<10>; +def TypeDUPLEX : IType<11>; def TypeCOMPOUND : IType<12>; -def TypePREFIX : IType<30>; +def TypeAG_VX : IType<28>; +def TypeAG_VM : IType<29>; +def TypePREFIX : IType<30>; + +// Duplex Instruction Class Declaration +//===----------------------------------------------------------------------===// + +class OpcodeDuplex { + field bits<32> Inst = ?; // Default to an invalid insn. + bits<4> IClass = 0; // ICLASS + bits<13> ISubHi = 0; // Low sub-insn + bits<13> ISubLo = 0; // High sub-insn + + let Inst{31-29} = IClass{3-1}; + let Inst{13} = IClass{0}; + let Inst{15-14} = 0; + let Inst{28-16} = ISubHi; + let Inst{12-0} = ISubLo; +} + +class InstDuplex<bits<4> iClass, list<dag> pattern = [], + string cstr = ""> + : Instruction, OpcodeDuplex { + let Namespace = "Hexagon"; + IType Type = TypeDUPLEX; // uses slot 0,1 + let isCodeGenOnly = 1; + let hasSideEffects = 0; + dag OutOperandList = (outs); + dag InOperandList = (ins); + let IClass = iClass; + let Constraints = cstr; + let Itinerary = DUPLEX; + let Size = 4; + + // SoftFail is a field the disassembler can use to provide a way for + // instructions to not match without killing the whole decode process. It is + // mainly used for ARM, but Tablegen expects this field to exist or it fails + // to build the decode table. + field bits<32> SoftFail = 0; + + // *** Must match MCTargetDesc/HexagonBaseInfo.h *** + + let TSFlags{4-0} = Type.Value; + + // Predicated instructions. + bits<1> isPredicated = 0; + let TSFlags{6} = isPredicated; + bits<1> isPredicatedFalse = 0; + let TSFlags{7} = isPredicatedFalse; + bits<1> isPredicatedNew = 0; + let TSFlags{8} = isPredicatedNew; + + // New-value insn helper fields. + bits<1> isNewValue = 0; + let TSFlags{9} = isNewValue; // New-value consumer insn. + bits<1> hasNewValue = 0; + let TSFlags{10} = hasNewValue; // New-value producer insn. + bits<3> opNewValue = 0; + let TSFlags{13-11} = opNewValue; // New-value produced operand. + bits<1> isNVStorable = 0; + let TSFlags{14} = isNVStorable; // Store that can become new-value store. + bits<1> isNVStore = 0; + let TSFlags{15} = isNVStore; // New-value store insn. + + // Immediate extender helper fields. + bits<1> isExtendable = 0; + let TSFlags{16} = isExtendable; // Insn may be extended. + bits<1> isExtended = 0; + let TSFlags{17} = isExtended; // Insn must be extended. + bits<3> opExtendable = 0; + let TSFlags{20-18} = opExtendable; // Which operand may be extended. + bits<1> isExtentSigned = 0; + let TSFlags{21} = isExtentSigned; // Signed or unsigned range. + bits<5> opExtentBits = 0; + let TSFlags{26-22} = opExtentBits; //Number of bits of range before extending. + bits<2> opExtentAlign = 0; + let TSFlags{28-27} = opExtentAlign; // Alignment exponent before extending. +} //----------------------------------------------------------------------------// // Instruction Classes Definitions @@ -31,7 +109,7 @@ def TypePREFIX : IType<30>; // class NVInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = NCJ_tc_3or4stall_SLOT0> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeNV>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeNV>, OpcodeHexagon; class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = NCJ_tc_3or4stall_SLOT0> @@ -56,7 +134,8 @@ class NCJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], let mayLoad = 1, mayStore = 1 in class MEMInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = V4LDST_tc_st_SLOT0> - : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeMEMOP>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeMEMOP>, + OpcodeHexagon; class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = V4LDST_tc_st_SLOT0> @@ -65,8 +144,9 @@ class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [], let isCodeGenOnly = 1 in class EXTENDERInst<dag outs, dag ins, string asmstr, list<dag> pattern = []> : InstHexagon<outs, ins, asmstr, pattern, "", EXTENDER_tc_1_SLOT0123, - TypePREFIX>; + TypePREFIX>, OpcodeHexagon; class CJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = ""> - : InstHexagon<outs, ins, asmstr, pattern, cstr, COMPOUND, TypeCOMPOUND>; + : InstHexagon<outs, ins, asmstr, pattern, cstr, COMPOUND, TypeCOMPOUND>, + OpcodeHexagon; diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 9bae12c..fbf1ca9 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -62,10 +62,8 @@ const int Hexagon_MEMB_AUTOINC_MIN = -8; void HexagonInstrInfo::anchor() {} HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST) - : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP), - RI(ST), Subtarget(ST) { -} - + : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP), + RI(), Subtarget(ST) {} /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of @@ -159,15 +157,19 @@ HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, } BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); } else { - BuildMI(&MBB, DL, - get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB); + // If Cond[0] is a basic block, insert ENDLOOP0. + if (Cond[0].isMBB()) + BuildMI(&MBB, DL, get(Hexagon::ENDLOOP0)).addMBB(Cond[0].getMBB()); + else + BuildMI(&MBB, DL, + get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB); } return 1; } + // We don't handle ENDLOOP0 with a conditional branch in AnalyzeBranch. BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB); BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); - return 2; } @@ -211,9 +213,11 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, return false; --I; } - + + bool JumpToBlock = I->getOpcode() == Hexagon::J2_jump && + I->getOperand(0).isMBB(); // Delete the JMP if it's equivalent to a fall-through. - if (AllowModify && I->getOpcode() == Hexagon::J2_jump && + if (AllowModify && JumpToBlock && MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { DEBUG(dbgs()<< "\nErasing the jump to successor block\n";); I->eraseFromParent(); @@ -243,6 +247,14 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, } while(I); int LastOpcode = LastInst->getOpcode(); + int SecLastOpcode = SecondLastInst ? SecondLastInst->getOpcode() : 0; + // If the branch target is not a basic block, it could be a tail call. + // (It is, if the target is a function.) + if (LastOpcode == Hexagon::J2_jump && !LastInst->getOperand(0).isMBB()) + return true; + if (SecLastOpcode == Hexagon::J2_jump && + !SecondLastInst->getOperand(0).isMBB()) + return true; bool LastOpcodeHasJMP_c = PredOpcodeHasJMP_c(LastOpcode); bool LastOpcodeHasNot = PredOpcodeHasNot(LastOpcode); @@ -270,8 +282,6 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, return true; } - int SecLastOpcode = SecondLastInst->getOpcode(); - bool SecLastOpcodeHasJMP_c = PredOpcodeHasJMP_c(SecLastOpcode); bool SecLastOpcodeHasNot = PredOpcodeHasNot(SecLastOpcode); if (SecLastOpcodeHasJMP_c && (LastOpcode == Hexagon::J2_jump)) { @@ -308,30 +318,35 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { - int BOpc = Hexagon::J2_jump; - int BccOpc = Hexagon::J2_jumpt; - int BccOpcNot = Hexagon::J2_jumpf; - MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) return 0; --I; - if (I->getOpcode() != BOpc && I->getOpcode() != BccOpc && - I->getOpcode() != BccOpcNot) - return 0; - - // Remove the branch. - I->eraseFromParent(); + unsigned Opc1 = I->getOpcode(); + switch (Opc1) { + case Hexagon::J2_jump: + case Hexagon::J2_jumpt: + case Hexagon::J2_jumpf: + case Hexagon::ENDLOOP0: + I->eraseFromParent(); + break; + default: + return 0; + } I = MBB.end(); if (I == MBB.begin()) return 1; --I; - if (I->getOpcode() != BccOpc && I->getOpcode() != BccOpcNot) - return 1; - - // Remove the branch. - I->eraseFromParent(); - return 2; + unsigned Opc2 = I->getOpcode(); + switch (Opc2) { + case Hexagon::J2_jumpt: + case Hexagon::J2_jumpf: + case Hexagon::ENDLOOP0: + I->eraseFromParent(); + return 2; + default: + return 1; + } } @@ -549,12 +564,95 @@ void HexagonInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, SmallVectorImpl<MachineInstr*> &NewMIs) const { llvm_unreachable("Unimplemented"); } +bool +HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { + const HexagonRegisterInfo &TRI = getRegisterInfo(); + MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + MachineBasicBlock &MBB = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case Hexagon::TFR_PdTrue: { + unsigned Reg = MI->getOperand(0).getReg(); + BuildMI(MBB, MI, DL, get(Hexagon::C2_orn), Reg) + .addReg(Reg, RegState::Undef) + .addReg(Reg, RegState::Undef); + MBB.erase(MI); + return true; + } + case Hexagon::TFR_PdFalse: { + unsigned Reg = MI->getOperand(0).getReg(); + BuildMI(MBB, MI, DL, get(Hexagon::C2_andn), Reg) + .addReg(Reg, RegState::Undef) + .addReg(Reg, RegState::Undef); + MBB.erase(MI); + return true; + } + case Hexagon::VMULW: { + // Expand a 64-bit vector multiply into 2 32-bit scalar multiplies. + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned Src1Reg = MI->getOperand(1).getReg(); + unsigned Src2Reg = MI->getOperand(2).getReg(); + unsigned Src1SubHi = TRI.getSubReg(Src1Reg, Hexagon::subreg_hireg); + unsigned Src1SubLo = TRI.getSubReg(Src1Reg, Hexagon::subreg_loreg); + unsigned Src2SubHi = TRI.getSubReg(Src2Reg, Hexagon::subreg_hireg); + unsigned Src2SubLo = TRI.getSubReg(Src2Reg, Hexagon::subreg_loreg); + BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi), + TRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi) + .addReg(Src2SubHi); + BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi), + TRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo) + .addReg(Src2SubLo); + MBB.erase(MI); + MRI.clearKillFlags(Src1SubHi); + MRI.clearKillFlags(Src1SubLo); + MRI.clearKillFlags(Src2SubHi); + MRI.clearKillFlags(Src2SubLo); + return true; + } + case Hexagon::VMULW_ACC: { + // Expand 64-bit vector multiply with addition into 2 scalar multiplies. + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned Src1Reg = MI->getOperand(1).getReg(); + unsigned Src2Reg = MI->getOperand(2).getReg(); + unsigned Src3Reg = MI->getOperand(3).getReg(); + unsigned Src1SubHi = TRI.getSubReg(Src1Reg, Hexagon::subreg_hireg); + unsigned Src1SubLo = TRI.getSubReg(Src1Reg, Hexagon::subreg_loreg); + unsigned Src2SubHi = TRI.getSubReg(Src2Reg, Hexagon::subreg_hireg); + unsigned Src2SubLo = TRI.getSubReg(Src2Reg, Hexagon::subreg_loreg); + unsigned Src3SubHi = TRI.getSubReg(Src3Reg, Hexagon::subreg_hireg); + unsigned Src3SubLo = TRI.getSubReg(Src3Reg, Hexagon::subreg_loreg); + BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci), + TRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi) + .addReg(Src2SubHi).addReg(Src3SubHi); + BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci), + TRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo) + .addReg(Src2SubLo).addReg(Src3SubLo); + MBB.erase(MI); + MRI.clearKillFlags(Src1SubHi); + MRI.clearKillFlags(Src1SubLo); + MRI.clearKillFlags(Src2SubHi); + MRI.clearKillFlags(Src2SubLo); + MRI.clearKillFlags(Src3SubHi); + MRI.clearKillFlags(Src3SubLo); + return true; + } + case Hexagon::TCRETURNi: + MI->setDesc(get(Hexagon::J2_jump)); + return true; + case Hexagon::TCRETURNr: + MI->setDesc(get(Hexagon::J2_jumpr)); + return true; + } + + return false; +} MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - int FI) const { + MachineInstr *MI, + ArrayRef<unsigned> Ops, + int FI) const { // Hexagon_TODO: Implement. return nullptr; } @@ -641,7 +739,7 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const { switch(Opc) { case Hexagon::A2_tfrsi: - return isInt<12>(MI->getOperand(1).getImm()); + return (isOperandExtended(MI, 1) && isConstExtended(MI)) || isInt<12>(MI->getOperand(1).getImm()); case Hexagon::S2_storerd_io: return isShiftedUInt<6,3>(MI->getOperand(1).getImm()); @@ -1036,6 +1134,8 @@ SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, // bool HexagonInstrInfo:: ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { + if (!Cond.empty() && Cond[0].isMBB()) + return true; if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) { Cond.erase(Cond.begin()); } else { @@ -1521,7 +1621,6 @@ int HexagonInstrInfo::GetDotNewOp(const MachineInstr* MI) const { switch (MI->getOpcode()) { default: llvm_unreachable("Unknown .new type"); - // store new value byte case Hexagon::S4_storerb_ur: return Hexagon::S4_storerbnew_ur; @@ -1531,6 +1630,20 @@ int HexagonInstrInfo::GetDotNewOp(const MachineInstr* MI) const { case Hexagon::S4_storeri_ur: return Hexagon::S4_storerinew_ur; + case Hexagon::S2_storerb_pci: + return Hexagon::S2_storerb_pci; + + case Hexagon::S2_storeri_pci: + return Hexagon::S2_storeri_pci; + + case Hexagon::S2_storerh_pci: + return Hexagon::S2_storerh_pci; + + case Hexagon::S2_storerd_pci: + return Hexagon::S2_storerd_pci; + + case Hexagon::S2_storerf_pci: + return Hexagon::S2_storerf_pci; } return 0; } @@ -1647,7 +1760,7 @@ bool HexagonInstrInfo::isConstExtended(MachineInstr *MI) const { // We currently only handle isGlobal() because it is the only kind of // object we are going to end up with here for now. // In the future we probably should add isSymbol(), etc. - if (MO.isGlobal() || MO.isSymbol()) + if (MO.isGlobal() || MO.isSymbol() || MO.isBlockAddress()) return true; // If the extendable operand is not 'Immediate' type, the instruction should diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index 6acfbec..2644248 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -26,7 +26,7 @@ namespace llvm { struct EVT; - +class HexagonSubtarget; class HexagonInstrInfo : public HexagonGenInstrInfo { virtual void anchor(); const HexagonRegisterInfo RI; @@ -102,15 +102,21 @@ public: const TargetRegisterClass *RC, SmallVectorImpl<MachineInstr*> &NewMIs) const; - MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, + /// expandPostRAPseudo - This function is called for all pseudo instructions + /// that remain after register allocation. Many pseudo instructions are + /// created to help register allocation. This is the place to convert them + /// into real instructions. The target can edit MI in place, or it can insert + /// new instructions and erase MI. The function should return true if + /// anything was changed. + bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override; + + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, + ArrayRef<unsigned> Ops, int FrameIndex) const override; - MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr* LoadMI) const override { + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, + ArrayRef<unsigned> Ops, + MachineInstr *LoadMI) const override { return nullptr; } diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td index 60635cf..19cf993 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/lib/Target/Hexagon/HexagonInstrInfo.td @@ -104,10 +104,16 @@ def : T_CMP_pat <C2_cmpgtui, setugt, u9ImmPred>; //===----------------------------------------------------------------------===// // ALU32/ALU + //===----------------------------------------------------------------------===// +// Add. + +def SDT_Int32Leaf : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>; +def SDT_Int32Unary : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; + def SDTHexagonI64I32I32 : SDTypeProfile<1, 2, [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; def HexagonCOMBINE : SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>; +def HexagonPACKHL : SDNode<"HexagonISD::PACKHL", SDTHexagonI64I32I32>; let hasSideEffects = 0, hasNewValue = 1, InputType = "reg" in class T_ALU32_3op<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit OpsRev, @@ -243,6 +249,9 @@ let OutOperandList = (outs DoubleRegs:$Rd), hasNewValue = 0 in { def C2_ccombinewnewf : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 1, 1>; } +def: BinOp32_pat<HexagonCOMBINE, A2_combinew, i64>; +def: BinOp32_pat<HexagonPACKHL, S2_packhl, i64>; + let hasSideEffects = 0, hasNewValue = 1, isCompare = 1, InputType = "reg" in class T_ALU32_3op_cmp<string mnemonic, bits<2> MinOp, bit IsNeg, bit IsComm> : ALU32_rr<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt), @@ -321,7 +330,7 @@ let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1, def A2_combineii: ALU32Inst <(outs DoubleRegs:$Rdd), (ins s8Ext:$s8, s8Imm:$S8), "$Rdd = combine(#$s8, #$S8)", [(set (i64 DoubleRegs:$Rdd), - (i64 (HexagonCOMBINE(i32 s8ExtPred:$s8), (i32 s8ImmPred:$S8))))]> { + (i64 (HexagonCOMBINE(i32 s32ImmPred:$s8), (i32 s8ImmPred:$S8))))]> { bits<5> Rdd; bits<8> s8; bits<8> S8; @@ -406,7 +415,7 @@ multiclass Addri_base<string mnemonic, SDNode OpNode> { defm addi : Addri_base<"add", add>, ImmRegRel, PredNewRel; -def: Pat<(i32 (add I32:$Rs, s16ExtPred:$s16)), +def: Pat<(i32 (add I32:$Rs, s32ImmPred:$s16)), (i32 (A2_addi I32:$Rs, imm:$s16))>; //===----------------------------------------------------------------------===// @@ -420,7 +429,7 @@ class T_ALU32ri_logical <string mnemonic, SDNode OpNode, bits<2> MinOp> : ALU32_ri <(outs IntRegs:$Rd), (ins IntRegs:$Rs, s10Ext:$s10), "$Rd = "#mnemonic#"($Rs, #$s10)" , - [(set (i32 IntRegs:$Rd), (OpNode (i32 IntRegs:$Rs), s10ExtPred:$s10))]> { + [(set (i32 IntRegs:$Rd), (OpNode (i32 IntRegs:$Rs), s32ImmPred:$s10))]> { bits<5> Rd; bits<5> Rs; bits<10> s10; @@ -465,7 +474,7 @@ def A2_nop: ALU32Inst <(outs), (ins), "nop" > { let Inst{27-24} = 0b1111; } -def: Pat<(sub s10ExtPred:$s10, IntRegs:$Rs), +def: Pat<(sub s32ImmPred:$s10, IntRegs:$Rs), (A2_subri imm:$s10, IntRegs:$Rs)>; // Rd = not(Rs) gets mapped to Rd=sub(#-1, Rs). @@ -613,7 +622,7 @@ let InputType = "imm", isExtendable = 1, isExtentSigned = 1, isAsCheapAsAMove = 1 , opExtendable = 1, opExtentBits = 16, isMoveImm = 1, isPredicated = 0, isPredicable = 1, isReMaterializable = 1 in def A2_tfrsi : ALU32Inst<(outs IntRegs:$Rd), (ins s16Ext:$s16), "$Rd = #$s16", - [(set (i32 IntRegs:$Rd), s16ExtPred:$s16)], "", ALU32_2op_tc_1_SLOT0123>, + [(set (i32 IntRegs:$Rd), s32ImmPred:$s16)], "", ALU32_2op_tc_1_SLOT0123>, ImmRegRel, PredRel { bits<5> Rd; bits<16> s16; @@ -637,9 +646,13 @@ def A2_tfrpi : ALU64_rr<(outs DoubleRegs:$dst), (ins s8Imm64:$src1), // TODO: see if this instruction can be deleted.. let isExtendable = 1, opExtendable = 1, opExtentBits = 6, - isAsmParserOnly = 1 in -def TFRI64_V4 : ALU64_rr<(outs DoubleRegs:$dst), (ins u6Ext:$src1), + isAsmParserOnly = 1 in { +def TFRI64_V4 : ALU64_rr<(outs DoubleRegs:$dst), (ins u64Imm:$src1), "$dst = #$src1">; +def TFRI64_V2_ext : ALU64_rr<(outs DoubleRegs:$dst), + (ins s8Ext:$src1, s8Imm:$src2), + "$dst = combine(##$src1, #$src2)">; +} //===----------------------------------------------------------------------===// // ALU32/ALU - @@ -677,11 +690,11 @@ let opExtendable = 3 in def C2_muxir : T_MUX1<0b0, (ins PredRegs:$Pu, IntRegs:$Rs, s8Ext:$s8), "$Rd = mux($Pu, $Rs, #$s8)">; -def : Pat<(i32 (select I1:$Pu, s8ExtPred:$s8, I32:$Rs)), - (C2_muxri I1:$Pu, s8ExtPred:$s8, I32:$Rs)>; +def : Pat<(i32 (select I1:$Pu, s32ImmPred:$s8, I32:$Rs)), + (C2_muxri I1:$Pu, s32ImmPred:$s8, I32:$Rs)>; -def : Pat<(i32 (select I1:$Pu, I32:$Rs, s8ExtPred:$s8)), - (C2_muxir I1:$Pu, I32:$Rs, s8ExtPred:$s8)>; +def : Pat<(i32 (select I1:$Pu, I32:$Rs, s32ImmPred:$s8)), + (C2_muxir I1:$Pu, I32:$Rs, s32ImmPred:$s8)>; // C2_muxii: Scalar mux immediates. let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, @@ -690,7 +703,7 @@ def C2_muxii: ALU32Inst <(outs IntRegs:$Rd), (ins PredRegs:$Pu, s8Ext:$s8, s8Imm:$S8), "$Rd = mux($Pu, #$s8, #$S8)" , [(set (i32 IntRegs:$Rd), - (i32 (select I1:$Pu, s8ExtPred:$s8, s8ImmPred:$S8)))] > { + (i32 (select I1:$Pu, s32ImmPred:$s8, s8ImmPred:$S8)))] > { bits<5> Rd; bits<2> Pu; bits<8> s8; @@ -706,6 +719,12 @@ def C2_muxii: ALU32Inst <(outs IntRegs:$Rd), let Inst{4-0} = Rd; } +let isCodeGenOnly = 1, isPseudo = 1 in +def MUX64_rr : ALU64_rr<(outs DoubleRegs:$Rd), + (ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt), + ".error \"should not emit\" ", []>; + + //===----------------------------------------------------------------------===// // template class for non-predicated alu32_2op instructions // - aslh, asrh, sxtb, sxth, zxth @@ -987,6 +1006,17 @@ def: T_vcmp_pat<A2_vcmpwgtu, setugt, v2i32>; //===----------------------------------------------------------------------===// // ALU32/PRED + //===----------------------------------------------------------------------===// +// No bits needed. If cmp.ge is found the assembler parser will +// transform it to cmp.gt subtracting 1 from the immediate. +let isPseudo = 1 in { +def C2_cmpgei: ALU32Inst < + (outs PredRegs:$Pd), (ins IntRegs:$Rs, s8Ext:$s8), + "$Pd = cmp.ge($Rs, #$s8)">; +def C2_cmpgeui: ALU32Inst < + (outs PredRegs:$Pd), (ins IntRegs:$Rs, u8Ext:$s8), + "$Pd = cmp.geu($Rs, #$s8)">; +} + //===----------------------------------------------------------------------===// // ALU32/PRED - @@ -1742,27 +1772,29 @@ def L2_loadalignb_io: T_loadalign_io <"memb_fifo", 0b0100, s11_0Ext>; multiclass Loadx_pat<PatFrag Load, ValueType VT, PatLeaf ImmPred, InstHexagon MI> { def: Pat<(VT (Load AddrFI:$fi)), (VT (MI AddrFI:$fi, 0))>; + def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))), + (VT (MI AddrFI:$fi, imm:$Off))>; def: Pat<(VT (Load (add (i32 IntRegs:$Rs), ImmPred:$Off))), (VT (MI IntRegs:$Rs, imm:$Off))>; def: Pat<(VT (Load (i32 IntRegs:$Rs))), (VT (MI IntRegs:$Rs, 0))>; } let AddedComplexity = 20 in { - defm: Loadx_pat<load, i32, s11_2ExtPred, L2_loadri_io>; - defm: Loadx_pat<load, i64, s11_3ExtPred, L2_loadrd_io>; - defm: Loadx_pat<atomic_load_8 , i32, s11_0ExtPred, L2_loadrub_io>; - defm: Loadx_pat<atomic_load_16, i32, s11_1ExtPred, L2_loadruh_io>; - defm: Loadx_pat<atomic_load_32, i32, s11_2ExtPred, L2_loadri_io>; - defm: Loadx_pat<atomic_load_64, i64, s11_3ExtPred, L2_loadrd_io>; - - defm: Loadx_pat<extloadi1, i32, s11_0ExtPred, L2_loadrub_io>; - defm: Loadx_pat<extloadi8, i32, s11_0ExtPred, L2_loadrub_io>; - defm: Loadx_pat<extloadi16, i32, s11_1ExtPred, L2_loadruh_io>; - defm: Loadx_pat<sextloadi8, i32, s11_0ExtPred, L2_loadrb_io>; - defm: Loadx_pat<sextloadi16, i32, s11_1ExtPred, L2_loadrh_io>; - defm: Loadx_pat<zextloadi1, i32, s11_0ExtPred, L2_loadrub_io>; - defm: Loadx_pat<zextloadi8, i32, s11_0ExtPred, L2_loadrub_io>; - defm: Loadx_pat<zextloadi16, i32, s11_1ExtPred, L2_loadruh_io>; + defm: Loadx_pat<load, i32, s30_2ImmPred, L2_loadri_io>; + defm: Loadx_pat<load, i64, s29_3ImmPred, L2_loadrd_io>; + defm: Loadx_pat<atomic_load_8 , i32, s32_0ImmPred, L2_loadrub_io>; + defm: Loadx_pat<atomic_load_16, i32, s31_1ImmPred, L2_loadruh_io>; + defm: Loadx_pat<atomic_load_32, i32, s30_2ImmPred, L2_loadri_io>; + defm: Loadx_pat<atomic_load_64, i64, s29_3ImmPred, L2_loadrd_io>; + + defm: Loadx_pat<extloadi1, i32, s32_0ImmPred, L2_loadrub_io>; + defm: Loadx_pat<extloadi8, i32, s32_0ImmPred, L2_loadrub_io>; + defm: Loadx_pat<extloadi16, i32, s31_1ImmPred, L2_loadruh_io>; + defm: Loadx_pat<sextloadi8, i32, s32_0ImmPred, L2_loadrb_io>; + defm: Loadx_pat<sextloadi16, i32, s31_1ImmPred, L2_loadrh_io>; + defm: Loadx_pat<zextloadi1, i32, s32_0ImmPred, L2_loadrub_io>; + defm: Loadx_pat<zextloadi8, i32, s32_0ImmPred, L2_loadrub_io>; + defm: Loadx_pat<zextloadi16, i32, s31_1ImmPred, L2_loadruh_io>; // No sextloadi1. } @@ -2707,7 +2739,7 @@ class T_MType_mpy_ri <bit isNeg, Operand ImmOp, list<dag> pattern> let isExtendable = 1, opExtentBits = 8, opExtendable = 2 in def M2_mpysip : T_MType_mpy_ri <0, u8Ext, - [(set (i32 IntRegs:$Rd), (mul IntRegs:$Rs, u8ExtPred:$u8))]>; + [(set (i32 IntRegs:$Rd), (mul IntRegs:$Rs, u32ImmPred:$u8))]>; def M2_mpysin : T_MType_mpy_ri <1, u8Imm, [(set (i32 IntRegs:$Rd), (ineg (mul IntRegs:$Rs, @@ -2729,7 +2761,7 @@ let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 9, def M2_mpysmi : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Ext:$src2), "$dst = mpyi($src1, #$src2)", [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1), - s9ExtPred:$src2))]>, ImmRegRel; + s32ImmPred:$src2))]>, ImmRegRel; let hasNewValue = 1, isExtendable = 1, opExtentBits = 8, opExtendable = 3, InputType = "imm" in @@ -2780,7 +2812,7 @@ class T_MType_acc_rr <string mnemonic, bits<3> MajOp, bits<3> MinOp, let CextOpcode = "MPYI_acc", Itinerary = M_tc_3x_SLOT23 in { def M2_macsip : T_MType_acc_ri <"+= mpyi", 0b010, u8Ext, [(set (i32 IntRegs:$dst), - (add (mul IntRegs:$src2, u8ExtPred:$src3), + (add (mul IntRegs:$src2, u32ImmPred:$src3), IntRegs:$src1))]>, ImmRegRel; def M2_maci : T_MType_acc_rr <"+= mpyi", 0b000, 0b000, 0, @@ -2793,7 +2825,7 @@ let CextOpcode = "ADD_acc" in { let isExtentSigned = 1 in def M2_accii : T_MType_acc_ri <"+= add", 0b100, s8Ext, [(set (i32 IntRegs:$dst), - (add (add (i32 IntRegs:$src2), s8_16ExtPred:$src3), + (add (add (i32 IntRegs:$src2), s16_16ImmPred:$src3), (i32 IntRegs:$src1)))]>, ImmRegRel; def M2_acci : T_MType_acc_rr <"+= add", 0b000, 0b001, 0, @@ -2825,9 +2857,9 @@ class T_MType_acc_pat2 <InstHexagon MI, SDNode firstOp, SDNode secOp> (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; def : T_MType_acc_pat2 <M2_xor_xacc, xor, xor>; -def : T_MType_acc_pat1 <M2_macsin, mul, sub, u8ExtPred>; +def : T_MType_acc_pat1 <M2_macsin, mul, sub, u32ImmPred>; -def : T_MType_acc_pat1 <M2_naccii, add, sub, s8_16ExtPred>; +def : T_MType_acc_pat1 <M2_naccii, add, sub, s16_16ImmPred>; def : T_MType_acc_pat2 <M2_nacci, add, sub>; //===----------------------------------------------------------------------===// @@ -3514,7 +3546,8 @@ let addrMode = BaseImmOffset, InputType = "imm" in { } // Patterns for generating stores, where the address takes different forms: -// - frameindex,, +// - frameindex, +// - frameindex + offset, // - base + offset, // - simple (base address without offset). // These would usually be used together (via Storex_pat defined below), but @@ -3522,6 +3555,10 @@ let addrMode = BaseImmOffset, InputType = "imm" in { // AddedComplexity) to the individual patterns. class Storex_fi_pat<PatFrag Store, PatFrag Value, InstHexagon MI> : Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, Value:$Rs)>; +class Storex_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, + InstHexagon MI> + : Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)), + (MI AddrFI:$fi, imm:$Off, Value:$Rs)>; class Storex_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, InstHexagon MI> : Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)), @@ -3537,6 +3574,10 @@ class Storexm_fi_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod, InstHexagon MI> : Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, (ValueMod Value:$Rs))>; +class Storexm_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, + PatFrag ValueMod, InstHexagon MI> + : Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)), + (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>; class Storexm_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, PatFrag ValueMod, InstHexagon MI> : Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)), @@ -3548,14 +3589,16 @@ class Storexm_simple_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod, multiclass Storex_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred, InstHexagon MI> { - def: Storex_fi_pat <Store, Value, MI>; - def: Storex_add_pat <Store, Value, ImmPred, MI>; + def: Storex_fi_pat <Store, Value, MI>; + def: Storex_fi_add_pat <Store, Value, ImmPred, MI>; + def: Storex_add_pat <Store, Value, ImmPred, MI>; } multiclass Storexm_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred, PatFrag ValueMod, InstHexagon MI> { - def: Storexm_fi_pat <Store, Value, ValueMod, MI>; - def: Storexm_add_pat <Store, Value, ImmPred, ValueMod, MI>; + def: Storexm_fi_pat <Store, Value, ValueMod, MI>; + def: Storexm_fi_add_pat <Store, Value, ImmPred, ValueMod, MI>; + def: Storexm_add_pat <Store, Value, ImmPred, ValueMod, MI>; } // Regular stores in the DAG have two operands: value and address. @@ -3567,15 +3610,15 @@ class SwapSt<PatFrag F> : PatFrag<(ops node:$val, node:$ptr), F.Fragment>; let AddedComplexity = 20 in { - defm: Storex_pat<truncstorei8, I32, s11_0ExtPred, S2_storerb_io>; - defm: Storex_pat<truncstorei16, I32, s11_1ExtPred, S2_storerh_io>; - defm: Storex_pat<store, I32, s11_2ExtPred, S2_storeri_io>; - defm: Storex_pat<store, I64, s11_3ExtPred, S2_storerd_io>; + defm: Storex_pat<truncstorei8, I32, s32_0ImmPred, S2_storerb_io>; + defm: Storex_pat<truncstorei16, I32, s31_1ImmPred, S2_storerh_io>; + defm: Storex_pat<store, I32, s30_2ImmPred, S2_storeri_io>; + defm: Storex_pat<store, I64, s29_3ImmPred, S2_storerd_io>; - defm: Storex_pat<SwapSt<atomic_store_8>, I32, s11_0ExtPred, S2_storerb_io>; - defm: Storex_pat<SwapSt<atomic_store_16>, I32, s11_1ExtPred, S2_storerh_io>; - defm: Storex_pat<SwapSt<atomic_store_32>, I32, s11_2ExtPred, S2_storeri_io>; - defm: Storex_pat<SwapSt<atomic_store_64>, I64, s11_3ExtPred, S2_storerd_io>; + defm: Storex_pat<SwapSt<atomic_store_8>, I32, s32_0ImmPred, S2_storerb_io>; + defm: Storex_pat<SwapSt<atomic_store_16>, I32, s31_1ImmPred, S2_storerh_io>; + defm: Storex_pat<SwapSt<atomic_store_32>, I32, s30_2ImmPred, S2_storeri_io>; + defm: Storex_pat<SwapSt<atomic_store_64>, I64, s29_3ImmPred, S2_storerd_io>; } // Simple patterns should be tried with the least priority. @@ -3590,9 +3633,9 @@ def: Storex_simple_pat<SwapSt<atomic_store_32>, I32, S2_storeri_io>; def: Storex_simple_pat<SwapSt<atomic_store_64>, I64, S2_storerd_io>; let AddedComplexity = 20 in { - defm: Storexm_pat<truncstorei8, I64, s11_0ExtPred, LoReg, S2_storerb_io>; - defm: Storexm_pat<truncstorei16, I64, s11_1ExtPred, LoReg, S2_storerh_io>; - defm: Storexm_pat<truncstorei32, I64, s11_2ExtPred, LoReg, S2_storeri_io>; + defm: Storexm_pat<truncstorei8, I64, s32_0ImmPred, LoReg, S2_storerb_io>; + defm: Storexm_pat<truncstorei16, I64, s31_1ImmPred, LoReg, S2_storerh_io>; + defm: Storexm_pat<truncstorei32, I64, s30_2ImmPred, LoReg, S2_storeri_io>; } def: Storexm_simple_pat<truncstorei8, I64, LoReg, S2_storerb_io>; @@ -4321,6 +4364,14 @@ def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), IntRegs:$Rt)), // XTYPE/PERM + //===----------------------------------------------------------------------===// +def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add (i32 IntRegs:$b), 3))), + (i32 8)), + (i32 (zextloadi8 (add (i32 IntRegs:$b), 2)))), + (i32 16)), + (shl (i32 (zextloadi8 (add (i32 IntRegs:$b), 1))), (i32 8))), + (zextloadi8 (i32 IntRegs:$b))), + (A2_swiz (L2_loadri_io IntRegs:$b, 0))>; + //===----------------------------------------------------------------------===// // XTYPE/PERM - //===----------------------------------------------------------------------===// @@ -4364,7 +4415,7 @@ def C2_pxfer_map: SInst<(outs PredRegs:$dst), (ins PredRegs:$src), // Patterns for loads of i1: def: Pat<(i1 (load AddrFI:$fi)), (C2_tfrrp (L2_loadrub_io AddrFI:$fi, 0))>; -def: Pat<(i1 (load (add (i32 IntRegs:$Rs), s11_0ExtPred:$Off))), +def: Pat<(i1 (load (add (i32 IntRegs:$Rs), s32ImmPred:$Off))), (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, imm:$Off))>; def: Pat<(i1 (load (i32 IntRegs:$Rs))), (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>; @@ -4375,7 +4426,7 @@ def I1toI32: OutPatFrag<(ops node:$Rs), def I32toI1: OutPatFrag<(ops node:$Rs), (i1 (C2_tfrrp (i32 $Rs)))>; -defm: Storexm_pat<store, I1, s11_0ExtPred, I1toI32, S2_storerb_io>; +defm: Storexm_pat<store, I1, s32ImmPred, I1toI32, S2_storerb_io>; def: Storexm_simple_pat<store, I1, I1toI32, S2_storerb_io>; //===----------------------------------------------------------------------===// @@ -4474,6 +4525,12 @@ def Y2_barrier : SYSInst<(outs), (ins), //===----------------------------------------------------------------------===// // SYSTEM/SUPER - //===----------------------------------------------------------------------===// + +// Generate frameindex addresses. +let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1, + isPseudo = 1, isCodeGenOnly = 1, hasSideEffects = 0 in +def TFR_FI: ALU32_ri<(outs IntRegs:$Rd), (ins IntRegs:$fi, s32Imm:$Off), "">; + //===----------------------------------------------------------------------===// // CRUSER - Type. //===----------------------------------------------------------------------===// @@ -4519,6 +4576,11 @@ class LOOP_rBase<string mnemonic, Operand brOp, bit mustExtend = 0> multiclass LOOP_ri<string mnemonic> { def i : LOOP_iBase<mnemonic, brtarget>; def r : LOOP_rBase<mnemonic, brtarget>; + + let isCodeGenOnly = 1, isExtended = 1, opExtendable = 0 in { + def iext: LOOP_iBase<mnemonic, brtargetExt, 1>; + def rext: LOOP_rBase<mnemonic, brtargetExt, 1>; + } } @@ -4676,36 +4738,6 @@ def Y4_trace: CRInst <(outs), (ins IntRegs:$Rs), let Inst{20-16} = Rs; } -let AddedComplexity = 100, isPredicated = 1, isCodeGenOnly = 1 in -def TFR_condset_ri : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, s12Imm:$src3), - "Error; should not emit", - [(set (i32 IntRegs:$dst), - (i32 (select (i1 PredRegs:$src1), (i32 IntRegs:$src2), - s12ImmPred:$src3)))]>; - -let AddedComplexity = 100, isPredicated = 1, isCodeGenOnly = 1 in -def TFR_condset_ir : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, s12Imm:$src2, IntRegs:$src3), - "Error; should not emit", - [(set (i32 IntRegs:$dst), - (i32 (select (i1 PredRegs:$src1), s12ImmPred:$src2, - (i32 IntRegs:$src3))))]>; - -let AddedComplexity = 100, isPredicated = 1, isCodeGenOnly = 1 in -def TFR_condset_ii : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, s12Imm:$src2, s12Imm:$src3), - "Error; should not emit", - [(set (i32 IntRegs:$dst), - (i32 (select (i1 PredRegs:$src1), s12ImmPred:$src2, - s12ImmPred:$src3)))]>; - -// Generate frameindex addresses. -let isReMaterializable = 1, isCodeGenOnly = 1 in -def TFR_FI : ALU32_ri<(outs IntRegs:$dst), (ins FrameIndex:$src1), - "$dst = add($src1)", - [(set (i32 IntRegs:$dst), ADDRri:$src1)]>; - // Support for generating global address. // Taken from X86InstrInfo.td. def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, @@ -4750,30 +4782,29 @@ def HI_PIC : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label), "$dst.h = #HI($label@GOTREL)", []>; -let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0, - isAsmParserOnly = 1 in -def LOi : ALU32_ri<(outs IntRegs:$dst), (ins i32imm:$imm_value), - "$dst.l = #LO($imm_value)", - []>; - +let isReMaterializable = 1, isMoveImm = 1, + isCodeGenOnly = 1, hasSideEffects = 0 in +def HI_GOT : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst.h = #HI($global@GOT)", + []>; -let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0, - isAsmParserOnly = 1 in -def HIi : ALU32_ri<(outs IntRegs:$dst), (ins i32imm:$imm_value), - "$dst.h = #HI($imm_value)", - []>; +let isReMaterializable = 1, isMoveImm = 1, + isCodeGenOnly = 1, hasSideEffects = 0 in +def LO_GOT : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst.l = #LO($global@GOT)", + []>; -let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0, - isAsmParserOnly = 1 in -def LO_jt : ALU32_ri<(outs IntRegs:$dst), (ins jumptablebase:$jt), - "$dst.l = #LO($jt)", - []>; +let isReMaterializable = 1, isMoveImm = 1, + isCodeGenOnly = 1, hasSideEffects = 0 in +def HI_GOTREL : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst.h = #HI($global@GOTREL)", + []>; -let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0, - isAsmParserOnly = 1 in -def HI_jt : ALU32_ri<(outs IntRegs:$dst), (ins jumptablebase:$jt), - "$dst.h = #HI($jt)", - []>; +let isReMaterializable = 1, isMoveImm = 1, + isCodeGenOnly = 1, hasSideEffects = 0 in +def LO_GOTREL : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst.l = #LO($global@GOTREL)", + []>; // This pattern is incorrect. When we add small data, we should change // this pattern to use memw(#foo). @@ -4785,31 +4816,19 @@ def CONST32 : CONSTLDInst<(outs IntRegs:$dst), (ins globaladdress:$global), (load (HexagonCONST32 tglobaltlsaddr:$global)))]>; let isReMaterializable = 1, isMoveImm = 1 in -def CONST32_set : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global), - "$dst = CONST32(#$global)", - [(set (i32 IntRegs:$dst), - (HexagonCONST32 tglobaladdr:$global))]>; - -let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in def CONST32_set_jt : CONSTLDInst<(outs IntRegs:$dst), (ins jumptablebase:$jt), "$dst = CONST32(#$jt)", [(set (i32 IntRegs:$dst), (HexagonCONST32 tjumptable:$jt))]>; let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in -def CONST32GP_set : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global), - "$dst = CONST32(#$global)", - [(set (i32 IntRegs:$dst), - (HexagonCONST32_GP tglobaladdr:$global))]>; - -let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in def CONST32_Int_Real : CONSTLDInst<(outs IntRegs:$dst), (ins i32imm:$global), "$dst = CONST32(#$global)", [(set (i32 IntRegs:$dst), imm:$global) ]>; -// Map BlockAddress lowering to CONST32_Int_Real -def : Pat<(HexagonCONST32_GP tblockaddress:$addr), - (CONST32_Int_Real tblockaddress:$addr)>; +// Map TLS addressses to a CONST32 instruction +def: Pat<(HexagonCONST32 tglobaltlsaddr:$addr), (A2_tfrsi s16Ext:$addr)>; +def: Pat<(HexagonCONST32 bbl:$label), (A2_tfrsi s16Ext:$label)>; let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in def CONST32_Label : LDInst2<(outs IntRegs:$dst), (ins bblabel:$label), @@ -4869,21 +4888,17 @@ let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0, def TCRETURNr : T_JMPr; // Direct tail-calls. -let isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0, -isTerminator = 1, isCodeGenOnly = 1 in { - def TCRETURNtg : JInst<(outs), (ins calltarget:$dst), "jump $dst", - [], "", J_tc_2early_SLOT23>; - def TCRETURNtext : JInst<(outs), (ins calltarget:$dst), "jump $dst", - [], "", J_tc_2early_SLOT23>; -} +let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0, + isTerminator = 1, isCodeGenOnly = 1 in +def TCRETURNi : JInst<(outs), (ins calltarget:$dst), "", []>; //Tail calls. def: Pat<(HexagonTCRet tglobaladdr:$dst), - (TCRETURNtg tglobaladdr:$dst)>; + (TCRETURNi tglobaladdr:$dst)>; def: Pat<(HexagonTCRet texternalsym:$dst), - (TCRETURNtext texternalsym:$dst)>; + (TCRETURNi texternalsym:$dst)>; def: Pat<(HexagonTCRet (i32 IntRegs:$dst)), - (TCRETURNr (i32 IntRegs:$dst))>; + (TCRETURNr IntRegs:$dst)>; // Map from r0 = and(r1, 65535) to r0 = zxth(r1) def: Pat<(and (i32 IntRegs:$src1), 65535), @@ -4900,19 +4915,19 @@ def: Pat<(add (i1 PredRegs:$src1), -1), (C2_not PredRegs:$src1)>; // Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i). -def: Pat<(select (not (i1 PredRegs:$src1)), s8ImmPred:$src2, s8ExtPred:$src3), - (C2_muxii PredRegs:$src1, s8ExtPred:$src3, s8ImmPred:$src2)>; +def: Pat<(select (not (i1 PredRegs:$src1)), s8ImmPred:$src2, s32ImmPred:$src3), + (C2_muxii PredRegs:$src1, s32ImmPred:$src3, s8ImmPred:$src2)>; // Map from p0 = pnot(p0); r0 = select(p0, #i, r1) // => r0 = C2_muxir(p0, r1, #i) -def: Pat<(select (not (i1 PredRegs:$src1)), s8ExtPred:$src2, +def: Pat<(select (not (i1 PredRegs:$src1)), s32ImmPred:$src2, (i32 IntRegs:$src3)), - (C2_muxir PredRegs:$src1, IntRegs:$src3, s8ExtPred:$src2)>; + (C2_muxir PredRegs:$src1, IntRegs:$src3, s32ImmPred:$src2)>; // Map from p0 = pnot(p0); r0 = mux(p0, r1, #i) // => r0 = C2_muxri (p0, #i, r1) -def: Pat<(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s8ExtPred:$src3), - (C2_muxri PredRegs:$src1, s8ExtPred:$src3, IntRegs:$src2)>; +def: Pat<(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s32ImmPred:$src3), + (C2_muxri PredRegs:$src1, s32ImmPred:$src3, IntRegs:$src2)>; // Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump. def: Pat<(brcond (not (i1 PredRegs:$src1)), bb:$offset), @@ -4952,26 +4967,6 @@ def: Pat<(brcond (i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)), bb:$offset), (J2_jumpf (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s8ImmPred:$src2)), bb:$offset)>; -// cmp.lt(r0, r1) -> cmp.gt(r1, r0) -def : Pat <(brcond (i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))), - bb:$offset), - (J2_jumpt (C2_cmpgt (i32 IntRegs:$src2), (i32 IntRegs:$src1)), bb:$offset)>; - -def : Pat <(brcond (i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), - bb:$offset), - (J2_jumpf (C2_cmpgtup (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)), - bb:$offset)>; - -def : Pat <(brcond (i1 (setule (i32 IntRegs:$src1), (i32 IntRegs:$src2))), - bb:$offset), - (J2_jumpf (C2_cmpgtu (i32 IntRegs:$src1), (i32 IntRegs:$src2)), - bb:$offset)>; - -def : Pat <(brcond (i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), - bb:$offset), - (J2_jumpf (C2_cmpgtup (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)), - bb:$offset)>; - // Map from a 64-bit select to an emulated 64-bit mux. // Hexagon does not support 64-bit MUXes; so emulate with combines. def: Pat<(select (i1 PredRegs:$src1), (i64 DoubleRegs:$src2), @@ -4987,10 +4982,6 @@ def: Pat<(select (i1 PredRegs:$src1), (i1 PredRegs:$src2), (i1 PredRegs:$src3)), (C2_or (C2_and PredRegs:$src1, PredRegs:$src2), (C2_and (C2_not PredRegs:$src1), PredRegs:$src3))>; -// Map Pd = load(addr) -> Rs = load(addr); Pd = Rs. -def : Pat<(i1 (load ADDRriS11_2:$addr)), - (i1 (C2_tfrrp (i32 (L2_loadrb_io AddrFI:$addr, 0))))>; - // Map for truncating from 64 immediates to 32 bit immediates. def: Pat<(i32 (trunc (i64 DoubleRegs:$src))), (LoReg DoubleRegs:$src)>; @@ -4999,42 +4990,10 @@ def: Pat<(i32 (trunc (i64 DoubleRegs:$src))), def: Pat<(i1 (trunc (i64 DoubleRegs:$src))), (C2_tfrrp (LoReg DoubleRegs:$src))>; -// Map memb(Rs) = Rdd -> memb(Rs) = Rt. -def : Pat<(truncstorei8 (i64 DoubleRegs:$src), ADDRriS11_0:$addr), - (S2_storerb_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), - subreg_loreg)))>; - -// Map memh(Rs) = Rdd -> memh(Rs) = Rt. -def : Pat<(truncstorei16 (i64 DoubleRegs:$src), ADDRriS11_0:$addr), - (S2_storerh_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), - subreg_loreg)))>; -// Map memw(Rs) = Rdd -> memw(Rs) = Rt -def : Pat<(truncstorei32 (i64 DoubleRegs:$src), ADDRriS11_0:$addr), - (S2_storeri_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), - subreg_loreg)))>; - -// Map memw(Rs) = Rdd -> memw(Rs) = Rt. -def : Pat<(truncstorei32 (i64 DoubleRegs:$src), ADDRriS11_0:$addr), - (S2_storeri_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), - subreg_loreg)))>; - -// Map from i1 = constant<-1>; memw(addr) = i1 -> r0 = 1; memw(addr) = r0. -def : Pat<(store (i1 -1), ADDRriS11_2:$addr), - (S2_storerb_io AddrFI:$addr, 0, (A2_tfrsi 1))>; - - -// Map from i1 = constant<-1>; store i1 -> r0 = 1; store r0. -def : Pat<(store (i1 -1), ADDRriS11_2:$addr), - (S2_storerb_io AddrFI:$addr, 0, (A2_tfrsi 1))>; - -// Map from memb(Rs) = Pd -> Rt = mux(Pd, #0, #1); store Rt. -def : Pat<(store (i1 PredRegs:$src1), ADDRriS11_2:$addr), - (S2_storerb_io AddrFI:$addr, 0, (i32 (C2_muxii (i1 PredRegs:$src1), 1, 0)) )>; - // rs <= rt -> !(rs > rt). let AddedComplexity = 30 in -def: Pat<(i1 (setle (i32 IntRegs:$src1), s10ExtPred:$src2)), - (C2_not (C2_cmpgti IntRegs:$src1, s10ExtPred:$src2))>; +def: Pat<(i1 (setle (i32 IntRegs:$src1), s32ImmPred:$src2)), + (C2_not (C2_cmpgti IntRegs:$src1, s32ImmPred:$src2))>; // rs <= rt -> !(rs > rt). def : Pat<(i1 (setle (i32 IntRegs:$src1), (i32 IntRegs:$src2))), @@ -5048,13 +5007,8 @@ def: Pat<(i1 (setle (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), // Hexagon_TODO: We should improve on this. // rs != rt -> !(rs == rt). let AddedComplexity = 30 in -def: Pat<(i1 (setne (i32 IntRegs:$src1), s10ExtPred:$src2)), - (C2_not (C2_cmpeqi IntRegs:$src1, s10ExtPred:$src2))>; - -// Map cmpne(Rs) -> !cmpeqe(Rs). -// rs != rt -> !(rs == rt). -def : Pat <(i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))), - (i1 (C2_not (i1 (C2_cmpeq (i32 IntRegs:$src1), (i32 IntRegs:$src2)))))>; +def: Pat<(i1 (setne (i32 IntRegs:$src1), s32ImmPred:$src2)), + (C2_not (C2_cmpeqi IntRegs:$src1, s32ImmPred:$src2))>; // Convert setne back to xor for hexagon since we compute w/ pred registers. def: Pat<(i1 (setne (i1 PredRegs:$src1), (i1 PredRegs:$src2))), @@ -5072,8 +5026,8 @@ def : Pat <(i1 (setge (i32 IntRegs:$src1), (i32 IntRegs:$src2))), // cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1) let AddedComplexity = 30 in -def: Pat<(i1 (setge (i32 IntRegs:$src1), s8ExtPred:$src2)), - (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s8ExtPred:$src2))>; +def: Pat<(i1 (setge (i32 IntRegs:$src1), s32ImmPred:$src2)), + (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s32ImmPred:$src2))>; // Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss). // rss >= rtt -> !(rtt > rss). @@ -5084,20 +5038,21 @@ def: Pat<(i1 (setge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), // !cmpge(Rs, Imm) -> !cmpgt(Rs, Imm-1). // rs < rt -> !(rs >= rt). let AddedComplexity = 30 in -def: Pat<(i1 (setlt (i32 IntRegs:$src1), s8ExtPred:$src2)), - (C2_not (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s8ExtPred:$src2)))>; +def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32ImmPred:$src2)), + (C2_not (C2_cmpgti IntRegs:$src1, + (DEC_CONST_SIGNED s32ImmPred:$src2)))>; // Generate cmpgeu(Rs, #0) -> cmpeq(Rs, Rs) def: Pat<(i1 (setuge (i32 IntRegs:$src1), 0)), (C2_cmpeq IntRegs:$src1, IntRegs:$src1)>; // Generate cmpgeu(Rs, #u8) -> cmpgtu(Rs, #u8 -1) -def: Pat<(i1 (setuge (i32 IntRegs:$src1), u8ExtPred:$src2)), - (C2_cmpgtui IntRegs:$src1, (DEC_CONST_UNSIGNED u8ExtPred:$src2))>; +def: Pat<(i1 (setuge (i32 IntRegs:$src1), u32ImmPred:$src2)), + (C2_cmpgtui IntRegs:$src1, (DEC_CONST_UNSIGNED u32ImmPred:$src2))>; // Generate cmpgtu(Rs, #u9) -def: Pat<(i1 (setugt (i32 IntRegs:$src1), u9ExtPred:$src2)), - (C2_cmpgtui IntRegs:$src1, u9ExtPred:$src2)>; +def: Pat<(i1 (setugt (i32 IntRegs:$src1), u32ImmPred:$src2)), + (C2_cmpgtui IntRegs:$src1, u32ImmPred:$src2)>; // Map from Rs >= Rt -> !(Rt > Rs). // rs >= rt -> !(rt > rs). @@ -5118,11 +5073,6 @@ def: Pat<(i32 (sext (i1 PredRegs:$src1))), def: Pat<(i64 (sext (i1 PredRegs:$src1))), (A2_combinew (A2_tfrsi -1), (C2_muxii PredRegs:$src1, -1, 0))>; -// Convert sign-extended load back to load and sign extend. -// i32 -> i64 -def: Pat <(i64 (sextloadi32 ADDRriS11_2:$src1)), - (i64 (A2_sxtw (L2_loadri_io AddrFI:$src1, 0)))>; - // Zero extends. // i1 -> i32 def: Pat<(i32 (zext (i1 PredRegs:$src1))), @@ -5136,12 +5086,6 @@ def: Pat<(i32 (anyext (i1 PredRegs:$src1))), def: Pat<(i64 (anyext (i1 PredRegs:$src1))), (A2_sxtw (C2_muxii PredRegs:$src1, 1, 0))>; -def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh), - (i32 32))), - (i64 (zextloadi32 ADDRriS11_2:$srcLow)))), - (i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg), - (L2_loadri_io AddrFI:$srcLow, 0)))>; - // Multiply 64-bit unsigned and use upper result. def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)), (A2_addp @@ -5186,10 +5130,13 @@ let AddedComplexity = 100 in def: Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND (i32 IntRegs:$src1)), i16)), (i32 IntRegs:$src1)>; -def HexagonWrapperJT: SDNode<"HexagonISD::WrapperJT", SDTIntUnaryOp>; +def HexagonJT: SDNode<"HexagonISD::JT", SDTIntUnaryOp>; +def HexagonCP: SDNode<"HexagonISD::CP", SDTIntUnaryOp>; -def : Pat<(HexagonWrapperJT tjumptable:$dst), - (i32 (CONST32_set_jt tjumptable:$dst))>; +def: Pat<(HexagonJT tjumptable:$dst), + (CONST32_set_jt tjumptable:$dst)>; +def: Pat<(HexagonCP tconstpool :$dst), + (CONST32_set_jt tconstpool:$dst)>; // XTYPE/SHIFT // @@ -5626,6 +5573,43 @@ let hasNewValue = 1 in { def S2_insertp_rp : T_S3op_insert<"insert", DoubleRegs>; def S2_insertp : T_S2op_insert <0b0011, DoubleRegs, u6Imm>; + +def SDTHexagonINSERT_ri : SDTypeProfile<1, 4, [SDTCisVT<0, i32>, + SDTCisVT<1, i32>, + SDTCisVT<2, i32>, + SDTCisVT<3, i32>, + SDTCisVT<4, i32>]>; +def SDTHexagonINSERT_rd : SDTypeProfile<1, 4, [SDTCisVT<0, i64>, + SDTCisVT<1, i64>, + SDTCisVT<2, i64>, + SDTCisVT<3, i32>, + SDTCisVT<4, i32>]>; +def SDTHexagonINSERT_riv : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, + SDTCisVT<1, i32>, + SDTCisVT<2, i32>, + SDTCisVT<3, i64>]>; +def SDTHexagonINSERT_rdv : SDTypeProfile<1, 3, [SDTCisVT<0, i64>, + SDTCisVT<1, i64>, + SDTCisVT<2, i64>, + SDTCisVT<3, i64>]>; +def HexagonINSERT_ri : SDNode<"HexagonISD::INSERT_ri", SDTHexagonINSERT_ri>; +def HexagonINSERT_rd : SDNode<"HexagonISD::INSERT_rd", SDTHexagonINSERT_rd>; +def HexagonINSERT_riv: SDNode<"HexagonISD::INSERT_riv", SDTHexagonINSERT_riv>; +def HexagonINSERT_rdv: SDNode<"HexagonISD::INSERT_rdv", SDTHexagonINSERT_rdv>; + +def: Pat<(HexagonINSERT_ri I32:$Rs, I32:$Rt, u5ImmPred:$u1, u5ImmPred:$u2), + (S2_insert I32:$Rs, I32:$Rt, u5ImmPred:$u1, u5ImmPred:$u2)>; + +def: Pat<(HexagonINSERT_rd I64:$Rs, I64:$Rt, u6ImmPred:$u1, u6ImmPred:$u2), + (S2_insertp I64:$Rs, I64:$Rt, u6ImmPred:$u1, u6ImmPred:$u2)>; + +def: Pat<(HexagonINSERT_riv I32:$Rs, I32:$Rt, I64:$Ru), + (S2_insert_rp I32:$Rs, I32:$Rt, I64:$Ru)>; + +def: Pat<(HexagonINSERT_rdv I64:$Rs, I64:$Rt, I64:$Ru), + (S2_insertp_rp I64:$Rs, I64:$Rt, I64:$Ru)>; + + //===----------------------------------------------------------------------===// // Template class for 'extract bitfield' instructions //===----------------------------------------------------------------------===// @@ -5692,6 +5676,37 @@ let hasNewValue = 1 in { def S2_extractu : T_S2op_extract <"extractu", 0b1101, IntRegs, u5Imm>; } +def SDTHexagonEXTRACTU_ri : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, + SDTCisVT<1, i32>, + SDTCisVT<2, i32>, + SDTCisVT<3, i32>]>; +def SDTHexagonEXTRACTU_rd : SDTypeProfile<1, 3, [SDTCisVT<0, i64>, + SDTCisVT<1, i64>, + SDTCisVT<2, i32>, + SDTCisVT<3, i32>]>; +def SDTHexagonEXTRACTU_riv : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, + SDTCisVT<1, i32>, + SDTCisVT<2, i64>]>; +def SDTHexagonEXTRACTU_rdv : SDTypeProfile<1, 2, [SDTCisVT<0, i64>, + SDTCisVT<1, i64>, + SDTCisVT<2, i64>]>; +def HexagonEXTRACTU_ri : SDNode<"HexagonISD::EXTRACTU_ri", SDTHexagonEXTRACTU_ri>; +def HexagonEXTRACTU_rd : SDNode<"HexagonISD::EXTRACTU_rd", SDTHexagonEXTRACTU_rd>; +def HexagonEXTRACTU_riv: SDNode<"HexagonISD::EXTRACTU_riv", SDTHexagonEXTRACTU_riv>; +def HexagonEXTRACTU_rdv: SDNode<"HexagonISD::EXTRACTU_rdv", SDTHexagonEXTRACTU_rdv>; + +def: Pat<(HexagonEXTRACTU_ri I32:$src1, u5ImmPred:$src2, u5ImmPred:$src3), + (S2_extractu I32:$src1, u5ImmPred:$src2, u5ImmPred:$src3)>; + +def: Pat<(HexagonEXTRACTU_rd I64:$src1, u6ImmPred:$src2, u6ImmPred:$src3), + (S2_extractup I64:$src1, u6ImmPred:$src2, u6ImmPred:$src3)>; + +def: Pat<(HexagonEXTRACTU_riv I32:$src1, I64:$src2), + (S2_extractu_rp I32:$src1, I64:$src2)>; + +def: Pat<(HexagonEXTRACTU_rdv I64:$src1, I64:$src2), + (S2_extractup_rp I64:$src1, I64:$src2)>; + // Change the sign of the immediate for Rd=-mpyi(Rs,#u8) def: Pat<(mul (i32 IntRegs:$src1), (ineg n8ImmPred:$src2)), (M2_mpysin IntRegs:$src1, u8ImmPred:$src2)>; @@ -5728,6 +5743,22 @@ def S2_tableidxw : tableidxRaw<"tableidxw", 0b10>; def S2_tableidxd : tableidxRaw<"tableidxd", 0b11>; //===----------------------------------------------------------------------===// +// Template class for 'table index' instructions which are assembler mapped +// to their :raw format. +//===----------------------------------------------------------------------===// +let isPseudo = 1 in +class tableidx_goodsyntax <string mnemonic> + : SInst <(outs IntRegs:$Rx), + (ins IntRegs:$_dst_, IntRegs:$Rs, u4Imm:$u4, u5Imm:$u5), + "$Rx = "#mnemonic#"($Rs, #$u4, #$u5)", + [], "$Rx = $_dst_" >; + +def S2_tableidxb_goodsyntax : tableidx_goodsyntax<"tableidxb">; +def S2_tableidxh_goodsyntax : tableidx_goodsyntax<"tableidxh">; +def S2_tableidxw_goodsyntax : tableidx_goodsyntax<"tableidxw">; +def S2_tableidxd_goodsyntax : tableidx_goodsyntax<"tableidxd">; + +//===----------------------------------------------------------------------===// // V3 Instructions + //===----------------------------------------------------------------------===// @@ -5761,4 +5792,4 @@ include "HexagonInstrInfoV5.td" // ALU32/64/Vector + //===----------------------------------------------------------------------===/// -include "HexagonInstrInfoVector.td"
\ No newline at end of file +include "HexagonInstrInfoVector.td" diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td index 0e4dde3..918b482 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -11,6 +11,25 @@ // //===----------------------------------------------------------------------===// +def DuplexIClass0: InstDuplex < 0 >; +def DuplexIClass1: InstDuplex < 1 >; +def DuplexIClass2: InstDuplex < 2 >; +let isExtendable = 1 in { + def DuplexIClass3: InstDuplex < 3 >; + def DuplexIClass4: InstDuplex < 4 >; + def DuplexIClass5: InstDuplex < 5 >; + def DuplexIClass6: InstDuplex < 6 >; + def DuplexIClass7: InstDuplex < 7 >; +} +def DuplexIClass8: InstDuplex < 8 >; +def DuplexIClass9: InstDuplex < 9 >; +def DuplexIClassA: InstDuplex < 0xA >; +def DuplexIClassB: InstDuplex < 0xB >; +def DuplexIClassC: InstDuplex < 0xC >; +def DuplexIClassD: InstDuplex < 0xD >; +def DuplexIClassE: InstDuplex < 0xE >; +def DuplexIClassF: InstDuplex < 0xF >; + def addrga: PatLeaf<(i32 AddrGA:$Addr)>; def addrgp: PatLeaf<(i32 AddrGP:$Addr)>; @@ -137,6 +156,9 @@ def: T_cmp32_rr_pat<A4_rcmpeq, CmpInReg<seteq>, i32>; def: T_cmp32_rr_pat<A4_rcmpneq, CmpInReg<setne>, i32>; def: T_cmp32_rr_pat<C4_cmpneq, setne, i1>; +def: T_cmp32_rr_pat<C4_cmplteu, setule, i1>; + +def: T_cmp32_rr_pat<C4_cmplteu, RevCmp<setuge>, i1>; class T_CMP_rrbh<string mnemonic, bits<3> MinOp, bit IsComm> : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt), @@ -247,10 +269,10 @@ class T_RCMP_EQ_ri<string mnemonic, bit IsNeg> def A4_rcmpeqi : T_RCMP_EQ_ri<"cmp.eq", 0>; def A4_rcmpneqi : T_RCMP_EQ_ri<"!cmp.eq", 1>; -def: Pat<(i32 (zext (i1 (seteq (i32 IntRegs:$Rs), s8ExtPred:$s8)))), - (A4_rcmpeqi IntRegs:$Rs, s8ExtPred:$s8)>; -def: Pat<(i32 (zext (i1 (setne (i32 IntRegs:$Rs), s8ExtPred:$s8)))), - (A4_rcmpneqi IntRegs:$Rs, s8ExtPred:$s8)>; +def: Pat<(i32 (zext (i1 (seteq (i32 IntRegs:$Rs), s32ImmPred:$s8)))), + (A4_rcmpeqi IntRegs:$Rs, s32ImmPred:$s8)>; +def: Pat<(i32 (zext (i1 (setne (i32 IntRegs:$Rs), s32ImmPred:$s8)))), + (A4_rcmpneqi IntRegs:$Rs, s32ImmPred:$s8)>; // Preserve the S2_tstbit_r generation def: Pat<(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, (i32 IntRegs:$src2))), @@ -292,16 +314,15 @@ let opExtendable = 1 in def A4_combineir : T_Combine1<0b01, (ins s8Ext:$s8, IntRegs:$Rs), "$Rdd = combine(#$s8, $Rs)">; -def HexagonWrapperCombineRI_V4 : - SDNode<"HexagonISD::WrapperCombineRI_V4", SDTHexagonI64I32I32>; -def HexagonWrapperCombineIR_V4 : - SDNode<"HexagonISD::WrapperCombineIR_V4", SDTHexagonI64I32I32>; +// The complexity of the combines involving immediates should be greater +// than the complexity of the combine with two registers. +let AddedComplexity = 50 in { +def: Pat<(HexagonCOMBINE IntRegs:$r, s32ImmPred:$i), + (A4_combineri IntRegs:$r, s32ImmPred:$i)>; -def : Pat <(HexagonWrapperCombineRI_V4 IntRegs:$r, s8ExtPred:$i), - (A4_combineri IntRegs:$r, s8ExtPred:$i)>; - -def : Pat <(HexagonWrapperCombineIR_V4 s8ExtPred:$i, IntRegs:$r), - (A4_combineir s8ExtPred:$i, IntRegs:$r)>; +def: Pat<(HexagonCOMBINE s32ImmPred:$i, IntRegs:$r), + (A4_combineir s32ImmPred:$i, IntRegs:$r)>; +} // A4_combineii: Set two small immediates. let hasSideEffects = 0, isExtendable = 1, opExtentBits = 6, opExtendable = 2 in @@ -322,7 +343,7 @@ def A4_combineii: ALU32Inst<(outs DoubleRegs:$Rdd), (ins s8Imm:$s8, u6Ext:$U6), // The complexity of the combine with two immediates should be greater than // the complexity of a combine involving a register. let AddedComplexity = 75 in -def: Pat<(HexagonCOMBINE s8ImmPred:$s8, u6ExtPred:$u6), +def: Pat<(HexagonCOMBINE s8ImmPred:$s8, u32ImmPred:$u6), (A4_combineii imm:$s8, imm:$u6)>; //===----------------------------------------------------------------------===// @@ -346,20 +367,22 @@ multiclass Loadxm_pat<PatFrag Load, ValueType VT, PatFrag ValueMod, PatLeaf ImmPred, InstHexagon MI> { def: Pat<(VT (Load AddrFI:$fi)), (VT (ValueMod (MI AddrFI:$fi, 0)))>; + def: Pat<(VT (Load (add AddrFI:$fi, ImmPred:$Off))), + (VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>; def: Pat<(VT (Load (add IntRegs:$Rs, ImmPred:$Off))), (VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>; def: Pat<(VT (Load (i32 IntRegs:$Rs))), (VT (ValueMod (MI IntRegs:$Rs, 0)))>; } -defm: Loadxm_pat<extloadi1, i64, Zext64, s11_0ExtPred, L2_loadrub_io>; -defm: Loadxm_pat<extloadi8, i64, Zext64, s11_0ExtPred, L2_loadrub_io>; -defm: Loadxm_pat<extloadi16, i64, Zext64, s11_1ExtPred, L2_loadruh_io>; -defm: Loadxm_pat<zextloadi1, i64, Zext64, s11_0ExtPred, L2_loadrub_io>; -defm: Loadxm_pat<zextloadi8, i64, Zext64, s11_0ExtPred, L2_loadrub_io>; -defm: Loadxm_pat<zextloadi16, i64, Zext64, s11_1ExtPred, L2_loadruh_io>; -defm: Loadxm_pat<sextloadi8, i64, Sext64, s11_0ExtPred, L2_loadrb_io>; -defm: Loadxm_pat<sextloadi16, i64, Sext64, s11_1ExtPred, L2_loadrh_io>; +defm: Loadxm_pat<extloadi1, i64, Zext64, s32_0ImmPred, L2_loadrub_io>; +defm: Loadxm_pat<extloadi8, i64, Zext64, s32_0ImmPred, L2_loadrub_io>; +defm: Loadxm_pat<extloadi16, i64, Zext64, s31_1ImmPred, L2_loadruh_io>; +defm: Loadxm_pat<zextloadi1, i64, Zext64, s32_0ImmPred, L2_loadrub_io>; +defm: Loadxm_pat<zextloadi8, i64, Zext64, s32_0ImmPred, L2_loadrub_io>; +defm: Loadxm_pat<zextloadi16, i64, Zext64, s31_1ImmPred, L2_loadruh_io>; +defm: Loadxm_pat<sextloadi8, i64, Sext64, s32_0ImmPred, L2_loadrb_io>; +defm: Loadxm_pat<sextloadi16, i64, Sext64, s31_1ImmPred, L2_loadrh_io>; // Map Rdd = anyext(Rs) -> Rdd = combine(#0, Rs). def: Pat<(i64 (anyext (i32 IntRegs:$src1))), (Zext64 IntRegs:$src1)>; @@ -635,19 +658,6 @@ def: Pat<(i64 (zext (i1 PredRegs:$src1))), def: Pat<(i64 (zext (i32 IntRegs:$src1))), (Zext64 IntRegs:$src1)>; -// zext i32->i64 -def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)), - (i64 (A4_combineir 0, (L2_loadri_io AddrFI:$src1, 0)))>; - -let AddedComplexity = 100 in -def: Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))), - (i64 (A4_combineir 0, (L2_loadri_io IntRegs:$src1, - s11_2ExtPred:$offset)))>; - -// anyext i32->i64 -def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)), - (i64 (A4_combineir 0, (L2_loadri_io AddrFI:$src1, 0)))>; - //===----------------------------------------------------------------------===// // LD - //===----------------------------------------------------------------------===// @@ -768,8 +778,8 @@ multiclass T_StoreAbsReg_Pats <InstHexagon MI, RegisterClass RC, ValueType VT, PatFrag stOp> { def : Pat<(stOp (VT RC:$src4), (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2), - u0AlwaysExtPred:$src3)), - (MI IntRegs:$src1, u2ImmPred:$src2, u0AlwaysExtPred:$src3, RC:$src4)>; + u32ImmPred:$src3)), + (MI IntRegs:$src1, u2ImmPred:$src2, u32ImmPred:$src3, RC:$src4)>; def : Pat<(stOp (VT RC:$src4), (add (shl IntRegs:$src1, u2ImmPred:$src2), @@ -1157,17 +1167,17 @@ let AddedComplexity = 40 in { // is not extendable. This could cause problems during removing the frame // indices, since the offset with respect to R29/R30 may not fit in the // u6 field. - def: Storexm_add_pat<truncstorei8, s8ExtPred, u6_0ImmPred, ToImmByte, + def: Storexm_add_pat<truncstorei8, s32ImmPred, u6_0ImmPred, ToImmByte, S4_storeirb_io>; - def: Storexm_add_pat<truncstorei16, s8ExtPred, u6_1ImmPred, ToImmHalf, + def: Storexm_add_pat<truncstorei16, s32ImmPred, u6_1ImmPred, ToImmHalf, S4_storeirh_io>; - def: Storexm_add_pat<store, s8ExtPred, u6_2ImmPred, ToImmWord, + def: Storexm_add_pat<store, s32ImmPred, u6_2ImmPred, ToImmWord, S4_storeiri_io>; } -def: Storexm_simple_pat<truncstorei8, s8ExtPred, ToImmByte, S4_storeirb_io>; -def: Storexm_simple_pat<truncstorei16, s8ExtPred, ToImmHalf, S4_storeirh_io>; -def: Storexm_simple_pat<store, s8ExtPred, ToImmWord, S4_storeiri_io>; +def: Storexm_simple_pat<truncstorei8, s32ImmPred, ToImmByte, S4_storeirb_io>; +def: Storexm_simple_pat<truncstorei16, s32ImmPred, ToImmHalf, S4_storeirh_io>; +def: Storexm_simple_pat<store, s32ImmPred, ToImmWord, S4_storeiri_io>; // memb(Rx++#s4:0:circ(Mu))=Rt // memb(Rx++I:circ(Mu))=Rt @@ -1798,6 +1808,49 @@ def: LogLogNot_pat<or, and, C4_or_andn>; def: LogLogNot_pat<or, or, C4_or_orn>; //===----------------------------------------------------------------------===// +// PIC: Support for PIC compilations. The patterns and SD nodes defined +// below are needed to support code generation for PIC +//===----------------------------------------------------------------------===// + +def SDT_HexagonPICAdd + : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; +def SDT_HexagonGOTAdd + : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; + +def SDT_HexagonGOTAddInternal : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>; +def SDT_HexagonGOTAddInternalJT : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>; +def SDT_HexagonGOTAddInternalBA : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>; + +def Hexagonpic_add : SDNode<"HexagonISD::PIC_ADD", SDT_HexagonPICAdd>; +def Hexagonat_got : SDNode<"HexagonISD::AT_GOT", SDT_HexagonGOTAdd>; +def Hexagongat_pcrel : SDNode<"HexagonISD::AT_PCREL", + SDT_HexagonGOTAddInternal>; +def Hexagongat_pcrel_jt : SDNode<"HexagonISD::AT_PCREL", + SDT_HexagonGOTAddInternalJT>; +def Hexagongat_pcrel_ba : SDNode<"HexagonISD::AT_PCREL", + SDT_HexagonGOTAddInternalBA>; + +// PIC: Map from a block address computation to a PC-relative add +def: Pat<(Hexagongat_pcrel_ba tblockaddress:$src1), + (C4_addipc u32ImmPred:$src1)>; + +// PIC: Map from the computation to generate a GOT pointer to a PC-relative add +def: Pat<(Hexagonpic_add texternalsym:$src1), + (C4_addipc u32ImmPred:$src1)>; + +// PIC: Map from a jump table address computation to a PC-relative add +def: Pat<(Hexagongat_pcrel_jt tjumptable:$src1), + (C4_addipc u32ImmPred:$src1)>; + +// PIC: Map from a GOT-relative symbol reference to a load +def: Pat<(Hexagonat_got (i32 IntRegs:$src1), tglobaladdr:$src2), + (L2_loadri_io IntRegs:$src1, s30_2ImmPred:$src2)>; + +// PIC: Map from a static symbol reference to a PC-relative add +def: Pat<(Hexagongat_pcrel tglobaladdr:$src1), + (C4_addipc u32ImmPred:$src1)>; + +//===----------------------------------------------------------------------===// // CR - //===----------------------------------------------------------------------===// @@ -1836,7 +1889,7 @@ def S4_addaddi : ALU64Inst <(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Ru, s6Ext:$s6), "$Rd = add($Rs, add($Ru, #$s6))" , [(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rs), - (add (i32 IntRegs:$Ru), s6_16ExtPred:$s6)))], + (add (i32 IntRegs:$Ru), s16_16ImmPred:$s6)))], "", ALU64_tc_2_SLOT23> { bits<5> Rd; bits<5> Rs; @@ -1877,19 +1930,19 @@ def S4_subaddi: ALU64Inst <(outs IntRegs:$Rd), } // Rd=add(Rs,sub(#s6,Ru)) -def: Pat<(add (i32 IntRegs:$src1), (sub s6_10ExtPred:$src2, +def: Pat<(add (i32 IntRegs:$src1), (sub s32ImmPred:$src2, (i32 IntRegs:$src3))), - (S4_subaddi IntRegs:$src1, s6_10ExtPred:$src2, IntRegs:$src3)>; + (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>; // Rd=sub(add(Rs,#s6),Ru) -def: Pat<(sub (add (i32 IntRegs:$src1), s6_10ExtPred:$src2), +def: Pat<(sub (add (i32 IntRegs:$src1), s32ImmPred:$src2), (i32 IntRegs:$src3)), - (S4_subaddi IntRegs:$src1, s6_10ExtPred:$src2, IntRegs:$src3)>; + (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>; // Rd=add(sub(Rs,Ru),#s6) def: Pat<(add (sub (i32 IntRegs:$src1), (i32 IntRegs:$src3)), - (s6_10ExtPred:$src2)), - (S4_subaddi IntRegs:$src1, s6_10ExtPred:$src2, IntRegs:$src3)>; + (s32ImmPred:$src2)), + (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>; // Add or subtract doublewords with carry. @@ -2042,7 +2095,7 @@ def S4_or_andix: (ins IntRegs:$Ru, IntRegs:$_src_, s10Ext:$s10), "$Rx = or($Ru, and($_src_, #$s10))" , [(set (i32 IntRegs:$Rx), - (or (i32 IntRegs:$Ru), (and (i32 IntRegs:$_src_), s10ExtPred:$s10)))] , + (or (i32 IntRegs:$Ru), (and (i32 IntRegs:$_src_), s32ImmPred:$s10)))] , "$_src_ = $Rx", ALU64_tc_2_SLOT23> { bits<5> Rx; bits<5> Ru; @@ -2187,7 +2240,7 @@ class T_CompOR <string mnemonic, bits<2> MajOp, SDNode OpNode> (ins IntRegs:$src1, IntRegs:$Rs, s10Ext:$s10), "$Rx |= "#mnemonic#"($Rs, #$s10)", [(set (i32 IntRegs:$Rx), (or (i32 IntRegs:$src1), - (OpNode (i32 IntRegs:$Rs), s10ExtPred:$s10)))], + (OpNode (i32 IntRegs:$Rs), s32ImmPred:$s10)))], "$src1 = $Rx", ALU64_tc_2_SLOT23>, ImmRegRel { bits<5> Rx; bits<5> Rs; @@ -2349,7 +2402,7 @@ def M4_mpyri_addi : MInst<(outs IntRegs:$Rd), "$Rd = add(#$u6, mpyi($Rs, #$U6))" , [(set (i32 IntRegs:$Rd), (add (mul (i32 IntRegs:$Rs), u6ImmPred:$U6), - u6ExtPred:$u6))] ,"",ALU64_tc_3x_SLOT23> { + u32ImmPred:$u6))] ,"",ALU64_tc_3x_SLOT23> { bits<5> Rd; bits<6> u6; bits<5> Rs; @@ -2374,7 +2427,7 @@ def M4_mpyrr_addi : MInst <(outs IntRegs:$Rd), (ins u6Ext:$u6, IntRegs:$Rs, IntRegs:$Rt), "$Rd = add(#$u6, mpyi($Rs, $Rt))" , [(set (i32 IntRegs:$Rd), - (add (mul (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), u6ExtPred:$u6))], + (add (mul (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), u32ImmPred:$u6))], "", ALU64_tc_3x_SLOT23>, ImmRegRel { bits<5> Rd; bits<6> u6; @@ -2424,7 +2477,7 @@ def M4_mpyri_addr_u2 : T_AddMpy<0b0, u6_2ImmPred, let isExtendable = 1, opExtentBits = 6, opExtendable = 3, CextOpcode = "ADD_MPY", InputType = "imm" in -def M4_mpyri_addr : T_AddMpy<0b1, u6ExtPred, +def M4_mpyri_addr : T_AddMpy<0b1, u32ImmPred, (ins IntRegs:$src1, IntRegs:$src3, u6Ext:$src2)>, ImmRegRel; // Rx=add(Ru,mpyi(Rx,Rs)) @@ -2447,17 +2500,6 @@ def M4_mpyrr_addr: MInst_acc <(outs IntRegs:$Rx), let Inst{20-16} = Rs; } -// Rd=add(##,mpyi(Rs,#U6)) -def : Pat <(add (mul (i32 IntRegs:$src2), u6ImmPred:$src3), - (HexagonCONST32 tglobaladdr:$src1)), - (i32 (M4_mpyri_addi tglobaladdr:$src1, IntRegs:$src2, - u6ImmPred:$src3))>; - -// Rd=add(##,mpyi(Rs,Rt)) -def : Pat <(add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)), - (HexagonCONST32 tglobaladdr:$src1)), - (i32 (M4_mpyrr_addi tglobaladdr:$src1, IntRegs:$src2, - IntRegs:$src3))>; // Vector reduce multiply word by signed half (32x16) //Rdd=vrmpyweh(Rss,Rtt)[:<<1] @@ -2569,7 +2611,7 @@ class T_S4_ShiftOperate<string MnOp, string MnSh, SDNode Op, SDNode Sh, : MInst_acc<(outs IntRegs:$Rd), (ins u8Ext:$u8, IntRegs:$Rx, u5Imm:$U5), "$Rd = "#MnOp#"(#$u8, "#MnSh#"($Rx, #$U5))", [(set (i32 IntRegs:$Rd), - (Op (Sh I32:$Rx, u5ImmPred:$U5), u8ExtPred:$u8))], + (Op (Sh I32:$Rx, u5ImmPred:$U5), u32ImmPred:$u8))], "$Rd = $Rx", Itin> { bits<5> Rd; @@ -2904,7 +2946,7 @@ let isExtendable = 1, opExtendable = 1, isExtentSigned = 0 in { // mem[bh](Rs+#u6) += #U5 //===----------------------------------------------------------------------===// -multiclass MemOpi_u5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred, +multiclass MemOpi_u5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ImmPred, InstHexagon MI, SDNode OpNode> { let AddedComplexity = 180 in def: Pat<(stOp (OpNode (ldOp IntRegs:$addr), u5ImmPred:$addend), @@ -2912,24 +2954,24 @@ multiclass MemOpi_u5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred, (MI IntRegs:$addr, 0, u5ImmPred:$addend)>; let AddedComplexity = 190 in - def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, ExtPred:$offset)), + def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, ImmPred:$offset)), u5ImmPred:$addend), - (add IntRegs:$base, ExtPred:$offset)), - (MI IntRegs:$base, ExtPred:$offset, u5ImmPred:$addend)>; + (add IntRegs:$base, ImmPred:$offset)), + (MI IntRegs:$base, ImmPred:$offset, u5ImmPred:$addend)>; } -multiclass MemOpi_u5ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred, +multiclass MemOpi_u5ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf ImmPred, InstHexagon addMI, InstHexagon subMI> { - defm: MemOpi_u5Pats<ldOp, stOp, ExtPred, addMI, add>; - defm: MemOpi_u5Pats<ldOp, stOp, ExtPred, subMI, sub>; + defm: MemOpi_u5Pats<ldOp, stOp, ImmPred, addMI, add>; + defm: MemOpi_u5Pats<ldOp, stOp, ImmPred, subMI, sub>; } multiclass MemOpi_u5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > { // Half Word - defm: MemOpi_u5ALUOp <ldOpHalf, truncstorei16, u6_1ExtPred, + defm: MemOpi_u5ALUOp <ldOpHalf, truncstorei16, u31_1ImmPred, L4_iadd_memoph_io, L4_isub_memoph_io>; // Byte - defm: MemOpi_u5ALUOp <ldOpByte, truncstorei8, u6ExtPred, + defm: MemOpi_u5ALUOp <ldOpByte, truncstorei8, u32ImmPred, L4_iadd_memopb_io, L4_isub_memopb_io>; } @@ -2939,7 +2981,7 @@ let Predicates = [UseMEMOP] in { defm: MemOpi_u5ExtType<extloadi8, extloadi16>; // any extend // Word - defm: MemOpi_u5ALUOp <load, store, u6_2ExtPred, L4_iadd_memopw_io, + defm: MemOpi_u5ALUOp <load, store, u30_2ImmPred, L4_iadd_memopw_io, L4_isub_memopw_io>; } @@ -2950,7 +2992,7 @@ let Predicates = [UseMEMOP] in { // mem[bh](Rs+#u6) += #m5 //===----------------------------------------------------------------------===// -multiclass MemOpi_m5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf extPred, +multiclass MemOpi_m5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ImmPred, PatLeaf immPred, SDNodeXForm xformFunc, InstHexagon MI> { let AddedComplexity = 190 in @@ -2958,18 +3000,18 @@ multiclass MemOpi_m5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf extPred, (MI IntRegs:$addr, 0, (xformFunc immPred:$subend))>; let AddedComplexity = 195 in - def: Pat<(stOp (add (ldOp (add IntRegs:$base, extPred:$offset)), + def: Pat<(stOp (add (ldOp (add IntRegs:$base, ImmPred:$offset)), immPred:$subend), - (add IntRegs:$base, extPred:$offset)), - (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$subend))>; + (add IntRegs:$base, ImmPred:$offset)), + (MI IntRegs:$base, ImmPred:$offset, (xformFunc immPred:$subend))>; } multiclass MemOpi_m5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > { // Half Word - defm: MemOpi_m5Pats <ldOpHalf, truncstorei16, u6_1ExtPred, m5HImmPred, + defm: MemOpi_m5Pats <ldOpHalf, truncstorei16, u31_1ImmPred, m5HImmPred, MEMOPIMM_HALF, L4_isub_memoph_io>; // Byte - defm: MemOpi_m5Pats <ldOpByte, truncstorei8, u6ExtPred, m5BImmPred, + defm: MemOpi_m5Pats <ldOpByte, truncstorei8, u32ImmPred, m5BImmPred, MEMOPIMM_BYTE, L4_isub_memopb_io>; } @@ -2979,7 +3021,7 @@ let Predicates = [UseMEMOP] in { defm: MemOpi_m5ExtType<extloadi8, extloadi16>; // any extend // Word - defm: MemOpi_m5Pats <load, store, u6_2ExtPred, m5ImmPred, + defm: MemOpi_m5Pats <load, store, u30_2ImmPred, m5ImmPred, MEMOPIMM, L4_isub_memopw_io>; } @@ -3008,16 +3050,16 @@ multiclass MemOpi_bitPats <PatFrag ldOp, PatFrag stOp, PatLeaf immPred, multiclass MemOpi_bitExtType<PatFrag ldOpByte, PatFrag ldOpHalf> { // Byte - clrbit - defm: MemOpi_bitPats<ldOpByte, truncstorei8, Clr3ImmPred, u6ExtPred, + defm: MemOpi_bitPats<ldOpByte, truncstorei8, Clr3ImmPred, u32ImmPred, CLRMEMIMM_BYTE, L4_iand_memopb_io, and>; // Byte - setbit - defm: MemOpi_bitPats<ldOpByte, truncstorei8, Set3ImmPred, u6ExtPred, + defm: MemOpi_bitPats<ldOpByte, truncstorei8, Set3ImmPred, u32ImmPred, SETMEMIMM_BYTE, L4_ior_memopb_io, or>; // Half Word - clrbit - defm: MemOpi_bitPats<ldOpHalf, truncstorei16, Clr4ImmPred, u6_1ExtPred, + defm: MemOpi_bitPats<ldOpHalf, truncstorei16, Clr4ImmPred, u31_1ImmPred, CLRMEMIMM_SHORT, L4_iand_memoph_io, and>; // Half Word - setbit - defm: MemOpi_bitPats<ldOpHalf, truncstorei16, Set4ImmPred, u6_1ExtPred, + defm: MemOpi_bitPats<ldOpHalf, truncstorei16, Set4ImmPred, u31_1ImmPred, SETMEMIMM_SHORT, L4_ior_memoph_io, or>; } @@ -3030,9 +3072,9 @@ let Predicates = [UseMEMOP] in { // memw(Rs+#0) = [clrbit|setbit](#U5) // memw(Rs+#u6:2) = [clrbit|setbit](#U5) - defm: MemOpi_bitPats<load, store, Clr5ImmPred, u6_2ExtPred, CLRMEMIMM, + defm: MemOpi_bitPats<load, store, Clr5ImmPred, u30_2ImmPred, CLRMEMIMM, L4_iand_memopw_io, and>; - defm: MemOpi_bitPats<load, store, Set5ImmPred, u6_2ExtPred, SETMEMIMM, + defm: MemOpi_bitPats<load, store, Set5ImmPred, u30_2ImmPred, SETMEMIMM, L4_ior_memopw_io, or>; } @@ -3070,11 +3112,11 @@ multiclass MemOPr_ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf extPred, multiclass MemOPr_ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > { // Half Word - defm: MemOPr_ALUOp <ldOpHalf, truncstorei16, u6_1ExtPred, + defm: MemOPr_ALUOp <ldOpHalf, truncstorei16, u31_1ImmPred, L4_add_memoph_io, L4_sub_memoph_io, L4_and_memoph_io, L4_or_memoph_io>; // Byte - defm: MemOPr_ALUOp <ldOpByte, truncstorei8, u6ExtPred, + defm: MemOPr_ALUOp <ldOpByte, truncstorei8, u32ImmPred, L4_add_memopb_io, L4_sub_memopb_io, L4_and_memopb_io, L4_or_memopb_io>; } @@ -3086,7 +3128,7 @@ let Predicates = [UseMEMOP] in { defm: MemOPr_ExtType<sextloadi8, sextloadi16>; // sign extend defm: MemOPr_ExtType<extloadi8, extloadi16>; // any extend // Word - defm: MemOPr_ALUOp <load, store, u6_2ExtPred, L4_add_memopw_io, + defm: MemOPr_ALUOp <load, store, u30_2ImmPred, L4_add_memopw_io, L4_sub_memopw_io, L4_and_memopw_io, L4_or_memopw_io>; } @@ -3110,23 +3152,23 @@ def C4_cmpneqi : T_CMP <"cmp.eq", 0b00, 1, s10Ext>; def C4_cmpltei : T_CMP <"cmp.gt", 0b01, 1, s10Ext>; def C4_cmplteui : T_CMP <"cmp.gtu", 0b10, 1, u9Ext>; -def : T_CMP_pat <C4_cmpneqi, setne, s10ExtPred>; -def : T_CMP_pat <C4_cmpltei, setle, s10ExtPred>; +def : T_CMP_pat <C4_cmpneqi, setne, s32ImmPred>; +def : T_CMP_pat <C4_cmpltei, setle, s32ImmPred>; def : T_CMP_pat <C4_cmplteui, setule, u9ImmPred>; // rs <= rt -> !(rs > rt). /* -def: Pat<(i1 (setle (i32 IntRegs:$src1), s10ExtPred:$src2)), - (C2_not (C2_cmpgti IntRegs:$src1, s10ExtPred:$src2))>; -// (C4_cmpltei IntRegs:$src1, s10ExtPred:$src2)>; +def: Pat<(i1 (setle (i32 IntRegs:$src1), s32ImmPred:$src2)), + (C2_not (C2_cmpgti IntRegs:$src1, s32ImmPred:$src2))>; +// (C4_cmpltei IntRegs:$src1, s32ImmPred:$src2)>; */ // Map cmplt(Rs, Imm) -> !cmpgt(Rs, Imm-1). -def: Pat<(i1 (setlt (i32 IntRegs:$src1), s8ExtPred:$src2)), - (C4_cmpltei IntRegs:$src1, (DEC_CONST_SIGNED s8ExtPred:$src2))>; +def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32ImmPred:$src2)), + (C4_cmpltei IntRegs:$src1, (DEC_CONST_SIGNED s32ImmPred:$src2))>; // rs != rt -> !(rs == rt). -def: Pat<(i1 (setne (i32 IntRegs:$src1), s10ExtPred:$src2)), - (C4_cmpneqi IntRegs:$src1, s10ExtPred:$src2)>; +def: Pat<(i1 (setne (i32 IntRegs:$src1), s32ImmPred:$src2)), + (C4_cmpneqi IntRegs:$src1, s32ImmPred:$src2)>; // SDNode for converting immediate C to C-1. def DEC_CONST_BYTE : SDNodeXForm<imm, [{ @@ -3136,168 +3178,6 @@ def DEC_CONST_BYTE : SDNodeXForm<imm, [{ }]>; // For the sequence -// zext( seteq ( and(Rs, 255), u8)) -// Generate -// Pd=cmpb.eq(Rs, #u8) -// if (Pd.new) Rd=#1 -// if (!Pd.new) Rd=#0 -def : Pat <(i32 (zext (i1 (seteq (i32 (and (i32 IntRegs:$Rs), 255)), - u8ExtPred:$u8)))), - (i32 (TFR_condset_ii (i1 (A4_cmpbeqi (i32 IntRegs:$Rs), - (u8ExtPred:$u8))), - 1, 0))>; - -// For the sequence -// zext( setne ( and(Rs, 255), u8)) -// Generate -// Pd=cmpb.eq(Rs, #u8) -// if (Pd.new) Rd=#0 -// if (!Pd.new) Rd=#1 -def : Pat <(i32 (zext (i1 (setne (i32 (and (i32 IntRegs:$Rs), 255)), - u8ExtPred:$u8)))), - (i32 (TFR_condset_ii (i1 (A4_cmpbeqi (i32 IntRegs:$Rs), - (u8ExtPred:$u8))), - 0, 1))>; - -// For the sequence -// zext( seteq (Rs, and(Rt, 255))) -// Generate -// Pd=cmpb.eq(Rs, Rt) -// if (Pd.new) Rd=#1 -// if (!Pd.new) Rd=#0 -def : Pat <(i32 (zext (i1 (seteq (i32 IntRegs:$Rt), - (i32 (and (i32 IntRegs:$Rs), 255)))))), - (i32 (TFR_condset_ii (i1 (A4_cmpbeq (i32 IntRegs:$Rs), - (i32 IntRegs:$Rt))), - 1, 0))>; - -// For the sequence -// zext( setne (Rs, and(Rt, 255))) -// Generate -// Pd=cmpb.eq(Rs, Rt) -// if (Pd.new) Rd=#0 -// if (!Pd.new) Rd=#1 -def : Pat <(i32 (zext (i1 (setne (i32 IntRegs:$Rt), - (i32 (and (i32 IntRegs:$Rs), 255)))))), - (i32 (TFR_condset_ii (i1 (A4_cmpbeq (i32 IntRegs:$Rs), - (i32 IntRegs:$Rt))), - 0, 1))>; - -// For the sequence -// zext( setugt ( and(Rs, 255), u8)) -// Generate -// Pd=cmpb.gtu(Rs, #u8) -// if (Pd.new) Rd=#1 -// if (!Pd.new) Rd=#0 -def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 255)), - u8ExtPred:$u8)))), - (i32 (TFR_condset_ii (i1 (A4_cmpbgtui (i32 IntRegs:$Rs), - (u8ExtPred:$u8))), - 1, 0))>; - -// For the sequence -// zext( setugt ( and(Rs, 254), u8)) -// Generate -// Pd=cmpb.gtu(Rs, #u8) -// if (Pd.new) Rd=#1 -// if (!Pd.new) Rd=#0 -def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 254)), - u8ExtPred:$u8)))), - (i32 (TFR_condset_ii (i1 (A4_cmpbgtui (i32 IntRegs:$Rs), - (u8ExtPred:$u8))), - 1, 0))>; - -// For the sequence -// zext( setult ( Rs, Rt)) -// Generate -// Pd=cmp.ltu(Rs, Rt) -// if (Pd.new) Rd=#1 -// if (!Pd.new) Rd=#0 -// cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs) -def : Pat <(i32 (zext (i1 (setult (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), - (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rt), - (i32 IntRegs:$Rs))), - 1, 0))>; - -// For the sequence -// zext( setlt ( Rs, Rt)) -// Generate -// Pd=cmp.lt(Rs, Rt) -// if (Pd.new) Rd=#1 -// if (!Pd.new) Rd=#0 -// cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs) -def : Pat <(i32 (zext (i1 (setlt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), - (i32 (TFR_condset_ii (i1 (C2_cmpgt (i32 IntRegs:$Rt), - (i32 IntRegs:$Rs))), - 1, 0))>; - -// For the sequence -// zext( setugt ( Rs, Rt)) -// Generate -// Pd=cmp.gtu(Rs, Rt) -// if (Pd.new) Rd=#1 -// if (!Pd.new) Rd=#0 -def : Pat <(i32 (zext (i1 (setugt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), - (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rs), - (i32 IntRegs:$Rt))), - 1, 0))>; - -// This pattern interefers with coremark performance, not implementing at this -// time. -// For the sequence -// zext( setgt ( Rs, Rt)) -// Generate -// Pd=cmp.gt(Rs, Rt) -// if (Pd.new) Rd=#1 -// if (!Pd.new) Rd=#0 - -// For the sequence -// zext( setuge ( Rs, Rt)) -// Generate -// Pd=cmp.ltu(Rs, Rt) -// if (Pd.new) Rd=#0 -// if (!Pd.new) Rd=#1 -// cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs) -def : Pat <(i32 (zext (i1 (setuge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), - (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rt), - (i32 IntRegs:$Rs))), - 0, 1))>; - -// For the sequence -// zext( setge ( Rs, Rt)) -// Generate -// Pd=cmp.lt(Rs, Rt) -// if (Pd.new) Rd=#0 -// if (!Pd.new) Rd=#1 -// cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs) -def : Pat <(i32 (zext (i1 (setge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), - (i32 (TFR_condset_ii (i1 (C2_cmpgt (i32 IntRegs:$Rt), - (i32 IntRegs:$Rs))), - 0, 1))>; - -// For the sequence -// zext( setule ( Rs, Rt)) -// Generate -// Pd=cmp.gtu(Rs, Rt) -// if (Pd.new) Rd=#0 -// if (!Pd.new) Rd=#1 -def : Pat <(i32 (zext (i1 (setule (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), - (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rs), - (i32 IntRegs:$Rt))), - 0, 1))>; - -// For the sequence -// zext( setle ( Rs, Rt)) -// Generate -// Pd=cmp.gt(Rs, Rt) -// if (Pd.new) Rd=#0 -// if (!Pd.new) Rd=#1 -def : Pat <(i32 (zext (i1 (setle (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), - (i32 (TFR_condset_ii (i1 (C2_cmpgt (i32 IntRegs:$Rs), - (i32 IntRegs:$Rt))), - 0, 1))>; - -// For the sequence // zext( setult ( and(Rs, 255), u8)) // Use the isdigit transformation below @@ -3381,26 +3261,17 @@ defm L4_return: LD_MISC_L4_RETURN <"dealloc_return">, PredNewRel; // Restore registers and dealloc return function call. let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1, Defs = [R29, R30, R31, PC], isPredicable = 0, isAsmParserOnly = 1 in { - def RESTORE_DEALLOC_RET_JMP_V4 : JInst<(outs), - (ins calltarget:$dst), - "jump $dst", - []>; + def RESTORE_DEALLOC_RET_JMP_V4 : T_JMP<"">; } // Restore registers and dealloc frame before a tail call. let isCall = 1, Defs = [R29, R30, R31, PC], isAsmParserOnly = 1 in { - def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : JInst<(outs), - (ins calltarget:$dst), - "call $dst", - []>; + def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : T_Call<"">, PredRel; } // Save registers function call. let isCall = 1, Uses = [R29, R31], isAsmParserOnly = 1 in { - def SAVE_REGISTERS_CALL_V4 : JInst<(outs), - (ins calltarget:$dst), - "call $dst // Save_calle_saved_registers", - []>; + def SAVE_REGISTERS_CALL_V4 : T_Call<"">, PredRel; } //===----------------------------------------------------------------------===// @@ -3472,7 +3343,7 @@ class T_StoreAbs_Pred <string mnemonic, RegisterClass RC, bits<2> MajOp, //===----------------------------------------------------------------------===// class T_StoreAbs <string mnemonic, RegisterClass RC, Operand ImmOp, bits<2> MajOp, bit isHalf> - : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, u0AlwaysExt, 1, isHalf>, + : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, u32Imm, 1, isHalf>, AddrModeRel { string ImmOpStr = !cast<string>(ImmOp); let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19, @@ -3513,7 +3384,7 @@ multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC, let hasSideEffects = 0, isPredicable = 1, mayStore = 1, isNVStore = 1, isNewValue = 1, opNewValue = 1 in class T_StoreAbsGP_NV <string mnemonic, Operand ImmOp, bits<2>MajOp, bit isAbs> - : NVInst_V4<(outs), (ins u0AlwaysExt:$addr, IntRegs:$src), + : NVInst_V4<(outs), (ins u32Imm:$addr, IntRegs:$src), mnemonic # !if(isAbs, "(##", "(#")#"$addr) = $src.new", [], "", V2LDST_tc_st_SLOT0> { bits<19> addr; @@ -3743,7 +3614,7 @@ class T_LoadAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp, class T_LoadAbs <string mnemonic, RegisterClass RC, Operand ImmOp, bits<3> MajOp> - : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp, u0AlwaysExt, 1>, AddrModeRel { + : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp, u32Imm, 1>, AddrModeRel { string ImmOpStr = !cast<string>(ImmOp); let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19, @@ -3903,17 +3774,17 @@ def: Pat<(i64 (ctlz I64:$src1)), (Zext64 (S2_cl0p I64:$src1))>; def: Pat<(i64 (cttz I64:$src1)), (Zext64 (S2_ct0p I64:$src1))>; let AddedComplexity = 30 in { - def: Storea_pat<truncstorei8, I32, u0AlwaysExtPred, S2_storerbabs>; - def: Storea_pat<truncstorei16, I32, u0AlwaysExtPred, S2_storerhabs>; - def: Storea_pat<store, I32, u0AlwaysExtPred, S2_storeriabs>; + def: Storea_pat<truncstorei8, I32, u32ImmPred, S2_storerbabs>; + def: Storea_pat<truncstorei16, I32, u32ImmPred, S2_storerhabs>; + def: Storea_pat<store, I32, u32ImmPred, S2_storeriabs>; } let AddedComplexity = 30 in { - def: Loada_pat<load, i32, u0AlwaysExtPred, L4_loadri_abs>; - def: Loada_pat<sextloadi8, i32, u0AlwaysExtPred, L4_loadrb_abs>; - def: Loada_pat<zextloadi8, i32, u0AlwaysExtPred, L4_loadrub_abs>; - def: Loada_pat<sextloadi16, i32, u0AlwaysExtPred, L4_loadrh_abs>; - def: Loada_pat<zextloadi16, i32, u0AlwaysExtPred, L4_loadruh_abs>; + def: Loada_pat<load, i32, u32ImmPred, L4_loadri_abs>; + def: Loada_pat<sextloadi8, i32, u32ImmPred, L4_loadrb_abs>; + def: Loada_pat<zextloadi8, i32, u32ImmPred, L4_loadrub_abs>; + def: Loada_pat<sextloadi16, i32, u32ImmPred, L4_loadrh_abs>; + def: Loada_pat<zextloadi16, i32, u32ImmPred, L4_loadruh_abs>; } // Indexed store word - global address. @@ -4012,6 +3883,18 @@ def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, S2_storerhabs>; def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, S2_storeriabs>; def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, S2_storerdabs>; +let Constraints = "@earlyclobber $dst" in +def Insert4 : PseudoM<(outs DoubleRegs:$dst), (ins IntRegs:$a, IntRegs:$b, + IntRegs:$c, IntRegs:$d), + ".error \"Should never try to emit Insert4\"", + [(set (i64 DoubleRegs:$dst), + (or (or (or (shl (i64 (zext (i32 (and (i32 IntRegs:$b), (i32 65535))))), + (i32 16)), + (i64 (zext (i32 (and (i32 IntRegs:$a), (i32 65535)))))), + (shl (i64 (anyext (i32 (and (i32 IntRegs:$c), (i32 65535))))), + (i32 32))), + (shl (i64 (anyext (i32 IntRegs:$d))), (i32 48))))]>; + //===----------------------------------------------------------------------===// // :raw for of boundscheck:hi:lo insns //===----------------------------------------------------------------------===// @@ -4116,7 +3999,7 @@ class CJInst_tstbit_R0<string px, bit np, string tnt> : InstHexagon<(outs), (ins IntRegs:$Rs, brtarget:$r9_2), ""#px#" = tstbit($Rs, #0); if (" #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2", - [], "", COMPOUND, TypeCOMPOUND> { + [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon { bits<4> Rs; bits<11> r9_2; @@ -4162,7 +4045,7 @@ class CJInst_RR<string px, string op, bit np, string tnt> : InstHexagon<(outs), (ins IntRegs:$Rs, IntRegs:$Rt, brtarget:$r9_2), ""#px#" = cmp."#op#"($Rs, $Rt); if (" #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2", - [], "", COMPOUND, TypeCOMPOUND> { + [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon { bits<4> Rs; bits<4> Rt; bits<11> r9_2; @@ -4216,7 +4099,7 @@ class CJInst_RU5<string px, string op, bit np, string tnt> : InstHexagon<(outs), (ins IntRegs:$Rs, u5Imm:$U5, brtarget:$r9_2), ""#px#" = cmp."#op#"($Rs, #$U5); if (" #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2", - [], "", COMPOUND, TypeCOMPOUND> { + [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon { bits<4> Rs; bits<5> U5; bits<11> r9_2; @@ -4271,7 +4154,7 @@ class CJInst_Rn1<string px, string op, bit np, string tnt> : InstHexagon<(outs), (ins IntRegs:$Rs, brtarget:$r9_2), ""#px#" = cmp."#op#"($Rs,#-1); if (" #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2", - [], "", COMPOUND, TypeCOMPOUND> { + [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon { bits<4> Rs; bits<11> r9_2; diff --git a/lib/Target/Hexagon/HexagonInstrInfoV5.td b/lib/Target/Hexagon/HexagonInstrInfoV5.td index 19b0935..337f4ea 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV5.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV5.td @@ -139,11 +139,11 @@ def S5_popcountp : ALU64_rr<(outs IntRegs:$Rd), (ins DoubleRegs:$Rss), let Inst{20-16} = Rss; } -defm: Loadx_pat<load, f32, s11_2ExtPred, L2_loadri_io>; -defm: Loadx_pat<load, f64, s11_3ExtPred, L2_loadrd_io>; +defm: Loadx_pat<load, f32, s30_2ImmPred, L2_loadri_io>; +defm: Loadx_pat<load, f64, s29_3ImmPred, L2_loadrd_io>; -defm: Storex_pat<store, F32, s11_2ExtPred, S2_storeri_io>; -defm: Storex_pat<store, F64, s11_3ExtPred, S2_storerd_io>; +defm: Storex_pat<store, F32, s30_2ImmPred, S2_storeri_io>; +defm: Storex_pat<store, F64, s29_3ImmPred, S2_storerd_io>; def: Storex_simple_pat<store, F32, S2_storeri_io>; def: Storex_simple_pat<store, F64, S2_storerd_io>; diff --git a/lib/Target/Hexagon/HexagonInstrInfoVector.td b/lib/Target/Hexagon/HexagonInstrInfoVector.td index 6e67b6e..f4fb946 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoVector.td +++ b/lib/Target/Hexagon/HexagonInstrInfoVector.td @@ -20,6 +20,34 @@ def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>; def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>; def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>; + +multiclass bitconvert_32<ValueType a, ValueType b> { + def : Pat <(b (bitconvert (a IntRegs:$src))), + (b IntRegs:$src)>; + def : Pat <(a (bitconvert (b IntRegs:$src))), + (a IntRegs:$src)>; +} + +multiclass bitconvert_64<ValueType a, ValueType b> { + def : Pat <(b (bitconvert (a DoubleRegs:$src))), + (b DoubleRegs:$src)>; + def : Pat <(a (bitconvert (b DoubleRegs:$src))), + (a DoubleRegs:$src)>; +} + +// Bit convert vector types. +defm : bitconvert_32<v4i8, i32>; +defm : bitconvert_32<v2i16, i32>; +defm : bitconvert_32<v2i16, v4i8>; + +defm : bitconvert_64<v8i8, i64>; +defm : bitconvert_64<v4i16, i64>; +defm : bitconvert_64<v2i32, i64>; +defm : bitconvert_64<v8i8, v4i16>; +defm : bitconvert_64<v8i8, v2i32>; +defm : bitconvert_64<v4i16, v2i32>; + + // Vector shift support. Vector shifting in Hexagon is rather different // from internal representation of LLVM. // LLVM assumes all shifts (in vector case) will have the form @@ -44,6 +72,12 @@ class vshift_v2i32<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp> let Inst{12-8} = src2; } +def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), + (A2_svaddh IntRegs:$src1, IntRegs:$src2)>; + +def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), + (A2_svsubh IntRegs:$src1, IntRegs:$src2)>; + def S2_asr_i_vw : vshift_v2i32<sra, "vasrw", 0b010, 0b000>; def S2_lsr_i_vw : vshift_v2i32<srl, "vlsrw", 0b010, 0b001>; def S2_asl_i_vw : vshift_v2i32<shl, "vaslw", 0b010, 0b010>; @@ -52,6 +86,87 @@ def S2_asr_i_vh : vshift_v4i16<sra, "vasrh", 0b100, 0b000>; def S2_lsr_i_vh : vshift_v4i16<srl, "vlsrh", 0b100, 0b001>; def S2_asl_i_vh : vshift_v4i16<shl, "vaslh", 0b100, 0b010>; + +def HexagonVSPLATB: SDNode<"HexagonISD::VSPLATB", SDTUnaryOp>; +def HexagonVSPLATH: SDNode<"HexagonISD::VSPLATH", SDTUnaryOp>; + +// Replicate the low 8-bits from 32-bits input register into each of the +// four bytes of 32-bits destination register. +def: Pat<(v4i8 (HexagonVSPLATB I32:$Rs)), (S2_vsplatrb I32:$Rs)>; + +// Replicate the low 16-bits from 32-bits input register into each of the +// four halfwords of 64-bits destination register. +def: Pat<(v4i16 (HexagonVSPLATH I32:$Rs)), (S2_vsplatrh I32:$Rs)>; + + +class VArith_pat <InstHexagon MI, SDNode Op, PatFrag Type> + : Pat <(Op Type:$Rss, Type:$Rtt), + (MI Type:$Rss, Type:$Rtt)>; + +def: VArith_pat <A2_vaddub, add, V8I8>; +def: VArith_pat <A2_vaddh, add, V4I16>; +def: VArith_pat <A2_vaddw, add, V2I32>; +def: VArith_pat <A2_vsubub, sub, V8I8>; +def: VArith_pat <A2_vsubh, sub, V4I16>; +def: VArith_pat <A2_vsubw, sub, V2I32>; + +def: VArith_pat <A2_and, and, V2I16>; +def: VArith_pat <A2_xor, xor, V2I16>; +def: VArith_pat <A2_or, or, V2I16>; + +def: VArith_pat <A2_andp, and, V8I8>; +def: VArith_pat <A2_andp, and, V4I16>; +def: VArith_pat <A2_andp, and, V2I32>; +def: VArith_pat <A2_orp, or, V8I8>; +def: VArith_pat <A2_orp, or, V4I16>; +def: VArith_pat <A2_orp, or, V2I32>; +def: VArith_pat <A2_xorp, xor, V8I8>; +def: VArith_pat <A2_xorp, xor, V4I16>; +def: VArith_pat <A2_xorp, xor, V2I32>; + +def: Pat<(v2i32 (sra V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c), + (i32 u5ImmPred:$c))))), + (S2_asr_i_vw V2I32:$b, imm:$c)>; +def: Pat<(v2i32 (srl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c), + (i32 u5ImmPred:$c))))), + (S2_lsr_i_vw V2I32:$b, imm:$c)>; +def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c), + (i32 u5ImmPred:$c))))), + (S2_asl_i_vw V2I32:$b, imm:$c)>; + +def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))), + (S2_asr_i_vh V4I16:$b, imm:$c)>; +def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))), + (S2_lsr_i_vh V4I16:$b, imm:$c)>; +def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))), + (S2_asl_i_vh V4I16:$b, imm:$c)>; + + +def SDTHexagon_v2i32_v2i32_i32 : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, SDTCisVT<0, v2i32>, SDTCisInt<2>]>; +def SDTHexagon_v4i16_v4i16_i32 : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, SDTCisVT<0, v4i16>, SDTCisInt<2>]>; + +def HexagonVSRAW: SDNode<"HexagonISD::VSRAW", SDTHexagon_v2i32_v2i32_i32>; +def HexagonVSRAH: SDNode<"HexagonISD::VSRAH", SDTHexagon_v4i16_v4i16_i32>; +def HexagonVSRLW: SDNode<"HexagonISD::VSRLW", SDTHexagon_v2i32_v2i32_i32>; +def HexagonVSRLH: SDNode<"HexagonISD::VSRLH", SDTHexagon_v4i16_v4i16_i32>; +def HexagonVSHLW: SDNode<"HexagonISD::VSHLW", SDTHexagon_v2i32_v2i32_i32>; +def HexagonVSHLH: SDNode<"HexagonISD::VSHLH", SDTHexagon_v4i16_v4i16_i32>; + +def: Pat<(v2i32 (HexagonVSRAW V2I32:$Rs, u5ImmPred:$u5)), + (S2_asr_i_vw V2I32:$Rs, imm:$u5)>; +def: Pat<(v4i16 (HexagonVSRAH V4I16:$Rs, u4ImmPred:$u4)), + (S2_asr_i_vh V4I16:$Rs, imm:$u4)>; +def: Pat<(v2i32 (HexagonVSRLW V2I32:$Rs, u5ImmPred:$u5)), + (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>; +def: Pat<(v4i16 (HexagonVSRLH V4I16:$Rs, u4ImmPred:$u4)), + (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>; +def: Pat<(v2i32 (HexagonVSHLW V2I32:$Rs, u5ImmPred:$u5)), + (S2_asl_i_vw V2I32:$Rs, imm:$u5)>; +def: Pat<(v4i16 (HexagonVSHLH V4I16:$Rs, u4ImmPred:$u4)), + (S2_asl_i_vh V4I16:$Rs, imm:$u4)>; + // Vector shift words by register def S2_asr_r_vw : T_S3op_shiftVect < "vasrw", 0b00, 0b00>; def S2_lsr_r_vw : T_S3op_shiftVect < "vlsrw", 0b00, 0b01>; @@ -63,3 +178,306 @@ def S2_asr_r_vh : T_S3op_shiftVect < "vasrh", 0b01, 0b00>; def S2_lsr_r_vh : T_S3op_shiftVect < "vlsrh", 0b01, 0b01>; def S2_asl_r_vh : T_S3op_shiftVect < "vaslh", 0b01, 0b10>; def S2_lsl_r_vh : T_S3op_shiftVect < "vlslh", 0b01, 0b11>; + +class vshift_rr_pat<InstHexagon MI, SDNode Op, PatFrag Value> + : Pat <(Op Value:$Rs, I32:$Rt), + (MI Value:$Rs, I32:$Rt)>; + +def: vshift_rr_pat <S2_asr_r_vw, HexagonVSRAW, V2I32>; +def: vshift_rr_pat <S2_asr_r_vh, HexagonVSRAH, V4I16>; +def: vshift_rr_pat <S2_lsr_r_vw, HexagonVSRLW, V2I32>; +def: vshift_rr_pat <S2_lsr_r_vh, HexagonVSRLH, V4I16>; +def: vshift_rr_pat <S2_asl_r_vw, HexagonVSHLW, V2I32>; +def: vshift_rr_pat <S2_asl_r_vh, HexagonVSHLH, V4I16>; + + +def SDTHexagonVecCompare_v8i8 : SDTypeProfile<1, 2, + [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v8i8>]>; +def SDTHexagonVecCompare_v4i16 : SDTypeProfile<1, 2, + [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v4i16>]>; +def SDTHexagonVecCompare_v2i32 : SDTypeProfile<1, 2, + [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v2i32>]>; + +def HexagonVCMPBEQ: SDNode<"HexagonISD::VCMPBEQ", SDTHexagonVecCompare_v8i8>; +def HexagonVCMPBGT: SDNode<"HexagonISD::VCMPBGT", SDTHexagonVecCompare_v8i8>; +def HexagonVCMPBGTU: SDNode<"HexagonISD::VCMPBGTU", SDTHexagonVecCompare_v8i8>; +def HexagonVCMPHEQ: SDNode<"HexagonISD::VCMPHEQ", SDTHexagonVecCompare_v4i16>; +def HexagonVCMPHGT: SDNode<"HexagonISD::VCMPHGT", SDTHexagonVecCompare_v4i16>; +def HexagonVCMPHGTU: SDNode<"HexagonISD::VCMPHGTU", SDTHexagonVecCompare_v4i16>; +def HexagonVCMPWEQ: SDNode<"HexagonISD::VCMPWEQ", SDTHexagonVecCompare_v2i32>; +def HexagonVCMPWGT: SDNode<"HexagonISD::VCMPWGT", SDTHexagonVecCompare_v2i32>; +def HexagonVCMPWGTU: SDNode<"HexagonISD::VCMPWGTU", SDTHexagonVecCompare_v2i32>; + + +class vcmp_i1_pat<InstHexagon MI, SDNode Op, PatFrag Value> + : Pat <(i1 (Op Value:$Rs, Value:$Rt)), + (MI Value:$Rs, Value:$Rt)>; + +def: vcmp_i1_pat<A2_vcmpbeq, HexagonVCMPBEQ, V8I8>; +def: vcmp_i1_pat<A4_vcmpbgt, HexagonVCMPBGT, V8I8>; +def: vcmp_i1_pat<A2_vcmpbgtu, HexagonVCMPBGTU, V8I8>; + +def: vcmp_i1_pat<A2_vcmpheq, HexagonVCMPHEQ, V4I16>; +def: vcmp_i1_pat<A2_vcmphgt, HexagonVCMPHGT, V4I16>; +def: vcmp_i1_pat<A2_vcmphgtu, HexagonVCMPHGTU, V4I16>; + +def: vcmp_i1_pat<A2_vcmpweq, HexagonVCMPWEQ, V2I32>; +def: vcmp_i1_pat<A2_vcmpwgt, HexagonVCMPWGT, V2I32>; +def: vcmp_i1_pat<A2_vcmpwgtu, HexagonVCMPWGTU, V2I32>; + + +class vcmp_vi1_pat<InstHexagon MI, PatFrag Op, PatFrag InVal, ValueType OutTy> + : Pat <(OutTy (Op InVal:$Rs, InVal:$Rt)), + (MI InVal:$Rs, InVal:$Rt)>; + +def: vcmp_vi1_pat<A2_vcmpweq, seteq, V2I32, v2i1>; +def: vcmp_vi1_pat<A2_vcmpwgt, setgt, V2I32, v2i1>; +def: vcmp_vi1_pat<A2_vcmpwgtu, setugt, V2I32, v2i1>; + +def: vcmp_vi1_pat<A2_vcmpheq, seteq, V4I16, v4i1>; +def: vcmp_vi1_pat<A2_vcmphgt, setgt, V4I16, v4i1>; +def: vcmp_vi1_pat<A2_vcmphgtu, setugt, V4I16, v4i1>; + + +// Hexagon doesn't have a vector multiply with C semantics. +// Instead, generate a pseudo instruction that gets expaneded into two +// scalar MPYI instructions. +// This is expanded by ExpandPostRAPseudos. +let isPseudo = 1 in +def VMULW : PseudoM<(outs DoubleRegs:$Rd), + (ins DoubleRegs:$Rs, DoubleRegs:$Rt), + ".error \"Should never try to emit VMULW\"", + [(set V2I32:$Rd, (mul V2I32:$Rs, V2I32:$Rt))]>; + +let isPseudo = 1 in +def VMULW_ACC : PseudoM<(outs DoubleRegs:$Rd), + (ins DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt), + ".error \"Should never try to emit VMULW_ACC\"", + [(set V2I32:$Rd, (add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)))], + "$Rd = $Rx">; + +// Adds two v4i8: Hexagon does not have an insn for this one, so we +// use the double add v8i8, and use only the low part of the result. +def: Pat<(v4i8 (add (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))), + (LoReg (A2_vaddub (Zext64 $Rs), (Zext64 $Rt)))>; + +// Subtract two v4i8: Hexagon does not have an insn for this one, so we +// use the double sub v8i8, and use only the low part of the result. +def: Pat<(v4i8 (sub (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))), + (LoReg (A2_vsubub (Zext64 $Rs), (Zext64 $Rt)))>; + +// +// No 32 bit vector mux. +// +def: Pat<(v4i8 (select I1:$Pu, V4I8:$Rs, V4I8:$Rt)), + (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>; +def: Pat<(v2i16 (select I1:$Pu, V2I16:$Rs, V2I16:$Rt)), + (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>; + +// +// 64-bit vector mux. +// +def: Pat<(v8i8 (vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)), + (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>; +def: Pat<(v4i16 (vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)), + (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>; +def: Pat<(v2i32 (vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)), + (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>; + +// +// No 32 bit vector compare. +// +def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)), + (A2_vcmpbeq (Zext64 $Rs), (Zext64 $Rt))>; +def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)), + (A4_vcmpbgt (Zext64 $Rs), (Zext64 $Rt))>; +def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)), + (A2_vcmpbgtu (Zext64 $Rs), (Zext64 $Rt))>; + +def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)), + (A2_vcmpheq (Zext64 $Rs), (Zext64 $Rt))>; +def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)), + (A2_vcmphgt (Zext64 $Rs), (Zext64 $Rt))>; +def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)), + (A2_vcmphgtu (Zext64 $Rs), (Zext64 $Rt))>; + + +class InvertCmp_pat<InstHexagon InvMI, PatFrag CmpOp, PatFrag Value, + ValueType CmpTy> + : Pat<(CmpTy (CmpOp Value:$Rs, Value:$Rt)), + (InvMI Value:$Rt, Value:$Rs)>; + +// Map from a compare operation to the corresponding instruction with the +// order of operands reversed, e.g. x > y --> cmp.lt(y,x). +def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, i1>; +def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, v8i1>; +def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, i1>; +def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, v4i1>; +def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, i1>; +def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, v2i1>; + +def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, i1>; +def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, v8i1>; +def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, i1>; +def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, v4i1>; +def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, i1>; +def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, v2i1>; + +// Map from vcmpne(Rss) -> !vcmpew(Rss). +// rs != rt -> !(rs == rt). +def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)), + (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>; + + +// Truncate: from vector B copy all 'E'ven 'B'yte elements: +// A[0] = B[0]; A[1] = B[2]; A[2] = B[4]; A[3] = B[6]; +def: Pat<(v4i8 (trunc V4I16:$Rs)), + (S2_vtrunehb V4I16:$Rs)>; + +// Truncate: from vector B copy all 'O'dd 'B'yte elements: +// A[0] = B[1]; A[1] = B[3]; A[2] = B[5]; A[3] = B[7]; +// S2_vtrunohb + +// Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements: +// A[0] = B[0]; A[1] = B[2]; A[2] = C[0]; A[3] = C[2]; +// S2_vtruneh + +def: Pat<(v2i16 (trunc V2I32:$Rs)), + (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>; + + +def HexagonVSXTBH : SDNode<"HexagonISD::VSXTBH", SDTUnaryOp>; +def HexagonVSXTBW : SDNode<"HexagonISD::VSXTBW", SDTUnaryOp>; + +def: Pat<(i64 (HexagonVSXTBH I32:$Rs)), (S2_vsxtbh I32:$Rs)>; +def: Pat<(i64 (HexagonVSXTBW I32:$Rs)), (S2_vsxthw I32:$Rs)>; + +def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; +def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; +def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; +def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; +def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>; +def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>; + +// Sign extends a v2i8 into a v2i32. +def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)), + (A2_combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>; + +// Sign extends a v2i16 into a v2i32. +def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)), + (A2_combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>; + + +// Multiplies two v2i16 and returns a v2i32. We are using here the +// saturating multiply, as hexagon does not provide a non saturating +// vector multiply, and saturation does not impact the result that is +// in double precision of the operands. + +// Multiplies two v2i16 vectors: as Hexagon does not have a multiply +// with the C semantics for this one, this pattern uses the half word +// multiply vmpyh that takes two v2i16 and returns a v2i32. This is +// then truncated to fit this back into a v2i16 and to simulate the +// wrap around semantics for unsigned in C. +def vmpyh: OutPatFrag<(ops node:$Rs, node:$Rt), + (M2_vmpy2s_s0 (i32 $Rs), (i32 $Rt))>; + +def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)), + (LoReg (S2_vtrunewh (v2i32 (A2_combineii 0, 0)), + (v2i32 (vmpyh V2I16:$Rs, V2I16:$Rt))))>; + +// Multiplies two v4i16 vectors. +def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)), + (S2_vtrunewh (vmpyh (HiReg $Rs), (HiReg $Rt)), + (vmpyh (LoReg $Rs), (LoReg $Rt)))>; + +def VMPYB_no_V5: OutPatFrag<(ops node:$Rs, node:$Rt), + (S2_vtrunewh (vmpyh (HiReg (S2_vsxtbh $Rs)), (HiReg (S2_vsxtbh $Rt))), + (vmpyh (LoReg (S2_vsxtbh $Rs)), (LoReg (S2_vsxtbh $Rt))))>; + +// Multiplies two v4i8 vectors. +def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), + (S2_vtrunehb (M5_vmpybsu V4I8:$Rs, V4I8:$Rt))>, + Requires<[HasV5T]>; + +def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), + (S2_vtrunehb (VMPYB_no_V5 V4I8:$Rs, V4I8:$Rt))>; + +// Multiplies two v8i8 vectors. +def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), + (A2_combinew (S2_vtrunehb (M5_vmpybsu (HiReg $Rs), (HiReg $Rt))), + (S2_vtrunehb (M5_vmpybsu (LoReg $Rs), (LoReg $Rt))))>, + Requires<[HasV5T]>; + +def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), + (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))), + (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>; + + +class shuffler<SDNode Op, string Str> + : SInst<(outs DoubleRegs:$a), (ins DoubleRegs:$b, DoubleRegs:$c), + "$a = " # Str # "($b, $c)", + [(set (i64 DoubleRegs:$a), + (i64 (Op (i64 DoubleRegs:$b), (i64 DoubleRegs:$c))))], + "", S_3op_tc_1_SLOT23>; + +def SDTHexagonBinOp64 : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>]>; + +def HexagonSHUFFEB: SDNode<"HexagonISD::SHUFFEB", SDTHexagonBinOp64>; +def HexagonSHUFFEH: SDNode<"HexagonISD::SHUFFEH", SDTHexagonBinOp64>; +def HexagonSHUFFOB: SDNode<"HexagonISD::SHUFFOB", SDTHexagonBinOp64>; +def HexagonSHUFFOH: SDNode<"HexagonISD::SHUFFOH", SDTHexagonBinOp64>; + +class ShufflePat<InstHexagon MI, SDNode Op> + : Pat<(i64 (Op DoubleRegs:$src1, DoubleRegs:$src2)), + (i64 (MI DoubleRegs:$src1, DoubleRegs:$src2))>; + +// Shuffles even bytes for i=0..3: A[2*i].b = C[2*i].b; A[2*i+1].b = B[2*i].b +def: ShufflePat<S2_shuffeb, HexagonSHUFFEB>; + +// Shuffles odd bytes for i=0..3: A[2*i].b = C[2*i+1].b; A[2*i+1].b = B[2*i+1].b +def: ShufflePat<S2_shuffob, HexagonSHUFFOB>; + +// Shuffles even half for i=0,1: A[2*i].h = C[2*i].h; A[2*i+1].h = B[2*i].h +def: ShufflePat<S2_shuffeh, HexagonSHUFFEH>; + +// Shuffles odd half for i=0,1: A[2*i].h = C[2*i+1].h; A[2*i+1].h = B[2*i+1].h +def: ShufflePat<S2_shuffoh, HexagonSHUFFOH>; + + +// Truncated store from v4i16 to v4i8. +def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), + [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i8; }]>; + +// Truncated store from v2i32 to v2i16. +def truncstorev2i16: PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), + [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i16; }]>; + +def: Pat<(truncstorev2i16 V2I32:$Rs, I32:$Rt), + (S2_storeri_io I32:$Rt, 0, (LoReg (S2_packhl (HiReg $Rs), + (LoReg $Rs))))>; + +def: Pat<(truncstorev4i8 V4I16:$Rs, I32:$Rt), + (S2_storeri_io I32:$Rt, 0, (S2_vtrunehb V4I16:$Rs))>; + + +// Zero and sign extended load from v2i8 into v2i16. +def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr), + [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>; + +def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr), + [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>; + +def: Pat<(v2i16 (zextloadv2i8 I32:$Rs)), + (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0))))>; + +def: Pat<(v2i16 (sextloadv2i8 I32:$Rs)), + (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0))))>; + +def: Pat<(v2i32 (zextloadv2i8 I32:$Rs)), + (S2_vzxthw (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0)))))>; + +def: Pat<(v2i32 (sextloadv2i8 I32:$Rs)), + (S2_vsxthw (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0)))))>; diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td index c0551e8..4275230 100644 --- a/lib/Target/Hexagon/HexagonIntrinsics.td +++ b/lib/Target/Hexagon/HexagonIntrinsics.td @@ -690,16 +690,15 @@ def: T_RR_pat<A2_combine_hl, int_hexagon_A2_combine_hl>; def: T_RR_pat<A2_combine_lh, int_hexagon_A2_combine_lh>; def: T_RR_pat<A2_combine_ll, int_hexagon_A2_combine_ll>; -def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s8ExtPred, s8ImmPred>; +def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s32ImmPred, s8ImmPred>; -def: Pat<(i32 (int_hexagon_C2_mux (I32:$Rp), (I32:$Rs), - (I32:$Rt))), +def: Pat<(i32 (int_hexagon_C2_mux (I32:$Rp), (I32:$Rs), (I32:$Rt))), (i32 (C2_mux (C2_tfrrp IntRegs:$Rp), IntRegs:$Rs, IntRegs:$Rt))>; // Mux -def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s8ExtPred>; -def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s8ExtPred>; -def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s8ExtPred, s8ImmPred>; +def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s32ImmPred>; +def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s32ImmPred>; +def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s32ImmPred, s8ImmPred>; // Shift halfword def : T_R_pat<A2_aslh, int_hexagon_A2_aslh>; @@ -720,17 +719,17 @@ def : T_RR_pat<C2_cmpeq, int_hexagon_C2_cmpeq>; def : T_RR_pat<C2_cmpgt, int_hexagon_C2_cmpgt>; def : T_RR_pat<C2_cmpgtu, int_hexagon_C2_cmpgtu>; -def : T_RI_pat<C2_cmpeqi, int_hexagon_C2_cmpeqi, s10ExtPred>; -def : T_RI_pat<C2_cmpgti, int_hexagon_C2_cmpgti, s10ExtPred>; -def : T_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u9ExtPred>; +def : T_RI_pat<C2_cmpeqi, int_hexagon_C2_cmpeqi, s32ImmPred>; +def : T_RI_pat<C2_cmpgti, int_hexagon_C2_cmpgti, s32ImmPred>; +def : T_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u32ImmPred>; -def : Pat <(i32 (int_hexagon_C2_cmpgei (I32:$src1), s8ExtPred:$src2)), +def : Pat <(i32 (int_hexagon_C2_cmpgei (I32:$src1), s32ImmPred:$src2)), (i32 (C2_cmpgti (I32:$src1), - (DEC_CONST_SIGNED s8ExtPred:$src2)))>; + (DEC_CONST_SIGNED s32ImmPred:$src2)))>; -def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), u8ExtPred:$src2)), +def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), u32ImmPred:$src2)), (i32 (C2_cmpgtui (I32:$src1), - (DEC_CONST_UNSIGNED u8ExtPred:$src2)))>; + (DEC_CONST_UNSIGNED u32ImmPred:$src2)))>; // The instruction, Pd=cmp.geu(Rs, #u8) -> Pd=cmp.eq(Rs,Rs) when #u8 == 0. def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), 0)), @@ -1258,6 +1257,30 @@ def: Pat<(i32 (int_hexagon_S2_storew_locked (I32:$Rs), (I32:$Rt))), def: Pat<(i32 (int_hexagon_S4_stored_locked (I32:$Rs), (I64:$Rt))), (i32 (C2_tfrpr (S4_stored_locked (I32:$Rs), (I64:$Rt))))>; +/******************************************************************** +* ST +*********************************************************************/ + +class T_stb_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Val> + : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru), + (MI I32:$Rs, Val:$Rt, I32:$Ru)>; + +def : T_stb_pat <S2_storerh_pbr_pseudo, int_hexagon_brev_sth, I32>; +def : T_stb_pat <S2_storerb_pbr_pseudo, int_hexagon_brev_stb, I32>; +def : T_stb_pat <S2_storeri_pbr_pseudo, int_hexagon_brev_stw, I32>; +def : T_stb_pat <S2_storerf_pbr_pseudo, int_hexagon_brev_sthhi, I32>; +def : T_stb_pat <S2_storerd_pbr_pseudo, int_hexagon_brev_std, I64>; + +class T_stc_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Imm, PatLeaf Val> + : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s), + (MI I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s)>; + +def: T_stc_pat<S2_storerb_pci_pseudo, int_hexagon_circ_stb, s4_0ImmPred, I32>; +def: T_stc_pat<S2_storerh_pci_pseudo, int_hexagon_circ_sth, s4_1ImmPred, I32>; +def: T_stc_pat<S2_storeri_pci_pseudo, int_hexagon_circ_stw, s4_2ImmPred, I32>; +def: T_stc_pat<S2_storerd_pci_pseudo, int_hexagon_circ_std, s4_3ImmPred, I64>; +def: T_stc_pat<S2_storerf_pci_pseudo, int_hexagon_circ_sthhi, s4_1ImmPred, I32>; + include "HexagonIntrinsicsV3.td" include "HexagonIntrinsicsV4.td" include "HexagonIntrinsicsV5.td" diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV4.td b/lib/Target/Hexagon/HexagonIntrinsicsV4.td index 8d068eb..c80a188 100644 --- a/lib/Target/Hexagon/HexagonIntrinsicsV4.td +++ b/lib/Target/Hexagon/HexagonIntrinsicsV4.td @@ -234,17 +234,17 @@ def: T_RR_pat<A4_orn, int_hexagon_A4_orn>; *********************************************************************/ // Combine Words Into Doublewords. -def: T_RI_pat<A4_combineri, int_hexagon_A4_combineri, s8ExtPred>; -def: T_IR_pat<A4_combineir, int_hexagon_A4_combineir, s8ExtPred>; +def: T_RI_pat<A4_combineri, int_hexagon_A4_combineri, s32ImmPred>; +def: T_IR_pat<A4_combineir, int_hexagon_A4_combineir, s32ImmPred>; /******************************************************************** * ALU32/PRED * *********************************************************************/ // Compare -def : T_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi, s10ExtPred>; -def : T_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei, s10ExtPred>; -def : T_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui, u9ExtPred>; +def : T_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi, s32ImmPred>; +def : T_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei, s32ImmPred>; +def : T_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui, u32ImmPred>; def: T_RR_pat<A4_rcmpeq, int_hexagon_A4_rcmpeq>; def: T_RR_pat<A4_rcmpneq, int_hexagon_A4_rcmpneq>; diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp index 806d448..81af4db 100644 --- a/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -40,6 +40,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -199,10 +200,7 @@ static bool commonChecksToProhibitNewValueJump(bool afterRA, // of registers by individual passes in the backend. At this time, // we don't know the scope of usage and definitions of these // instructions. - if (MII->getOpcode() == Hexagon::TFR_condset_ii || - MII->getOpcode() == Hexagon::TFR_condset_ri || - MII->getOpcode() == Hexagon::TFR_condset_ir || - MII->getOpcode() == Hexagon::LDriw_pred || + if (MII->getOpcode() == Hexagon::LDriw_pred || MII->getOpcode() == Hexagon::STriw_pred) return false; } diff --git a/lib/Target/Hexagon/HexagonOperands.td b/lib/Target/Hexagon/HexagonOperands.td index 318ca72..450f594 100644 --- a/lib/Target/Hexagon/HexagonOperands.td +++ b/lib/Target/Hexagon/HexagonOperands.td @@ -66,162 +66,131 @@ def nOneImm : Operand<i32>; // Immediate predicates // def s32ImmPred : PatLeaf<(i32 imm), [{ - // s32ImmPred predicate - True if the immediate fits in a 32-bit sign extended - // field. int64_t v = (int64_t)N->getSExtValue(); return isInt<32>(v); }]>; -def s32_24ImmPred : PatLeaf<(i32 imm), [{ - // s32_24ImmPred predicate - True if the immediate fits in a 32-bit sign - // extended field that is a multiple of 0x1000000. +def s32_0ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isInt<32>(v); +}]>; + +def s31_1ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<31,1>(v); +}]>; + +def s30_2ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<31,1>(v); +}]>; + +def s29_3ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<31,1>(v); +}]>; + +def s22_10ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<22,10>(v); +}]>; + +def s8_24ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<32,24>(v); + return isShiftedInt<8,24>(v); }]>; -def s32_16s8ImmPred : PatLeaf<(i32 imm), [{ - // s32_16s8ImmPred predicate - True if the immediate fits in a 32-bit sign - // extended field that is a multiple of 0x10000. +def s16_16ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<24,16>(v); + return isShiftedInt<16,16>(v); }]>; def s26_6ImmPred : PatLeaf<(i32 imm), [{ - // s26_6ImmPred predicate - True if the immediate fits in a 32-bit - // sign extended field. int64_t v = (int64_t)N->getSExtValue(); return isShiftedInt<26,6>(v); }]>; - def s16ImmPred : PatLeaf<(i32 imm), [{ - // s16ImmPred predicate - True if the immediate fits in a 16-bit sign extended - // field. int64_t v = (int64_t)N->getSExtValue(); return isInt<16>(v); }]>; - def s13ImmPred : PatLeaf<(i32 imm), [{ - // s13ImmPred predicate - True if the immediate fits in a 13-bit sign extended - // field. int64_t v = (int64_t)N->getSExtValue(); return isInt<13>(v); }]>; - def s12ImmPred : PatLeaf<(i32 imm), [{ - // s12ImmPred predicate - True if the immediate fits in a 12-bit - // sign extended field. int64_t v = (int64_t)N->getSExtValue(); return isInt<12>(v); }]>; def s11_0ImmPred : PatLeaf<(i32 imm), [{ - // s11_0ImmPred predicate - True if the immediate fits in a 11-bit - // sign extended field. int64_t v = (int64_t)N->getSExtValue(); return isInt<11>(v); }]>; - def s11_1ImmPred : PatLeaf<(i32 imm), [{ - // s11_1ImmPred predicate - True if the immediate fits in a 12-bit - // sign extended field and is a multiple of 2. int64_t v = (int64_t)N->getSExtValue(); return isShiftedInt<11,1>(v); }]>; - def s11_2ImmPred : PatLeaf<(i32 imm), [{ - // s11_2ImmPred predicate - True if the immediate fits in a 13-bit - // sign extended field and is a multiple of 4. int64_t v = (int64_t)N->getSExtValue(); return isShiftedInt<11,2>(v); }]>; - def s11_3ImmPred : PatLeaf<(i32 imm), [{ - // s11_3ImmPred predicate - True if the immediate fits in a 14-bit - // sign extended field and is a multiple of 8. int64_t v = (int64_t)N->getSExtValue(); return isShiftedInt<11,3>(v); }]>; - def s10ImmPred : PatLeaf<(i32 imm), [{ - // s10ImmPred predicate - True if the immediate fits in a 10-bit sign extended - // field. int64_t v = (int64_t)N->getSExtValue(); return isInt<10>(v); }]>; - def s9ImmPred : PatLeaf<(i32 imm), [{ - // s9ImmPred predicate - True if the immediate fits in a 9-bit sign extended - // field. int64_t v = (int64_t)N->getSExtValue(); return isInt<9>(v); }]>; def m9ImmPred : PatLeaf<(i32 imm), [{ - // m9ImmPred predicate - True if the immediate fits in a 9-bit magnitude - // field. The range of m9 is -255 to 255. int64_t v = (int64_t)N->getSExtValue(); return isInt<9>(v) && (v != -256); }]>; def s8ImmPred : PatLeaf<(i32 imm), [{ - // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended - // field. int64_t v = (int64_t)N->getSExtValue(); return isInt<8>(v); }]>; - def s8Imm64Pred : PatLeaf<(i64 imm), [{ - // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended - // field. int64_t v = (int64_t)N->getSExtValue(); return isInt<8>(v); }]>; - def s6ImmPred : PatLeaf<(i32 imm), [{ - // s6ImmPred predicate - True if the immediate fits in a 6-bit sign extended - // field. int64_t v = (int64_t)N->getSExtValue(); return isInt<6>(v); }]>; - def s4_0ImmPred : PatLeaf<(i32 imm), [{ - // s4_0ImmPred predicate - True if the immediate fits in a 4-bit sign extended - // field. int64_t v = (int64_t)N->getSExtValue(); return isInt<4>(v); }]>; - def s4_1ImmPred : PatLeaf<(i32 imm), [{ - // s4_1ImmPred predicate - True if the immediate fits in a 4-bit sign extended - // field of 2. int64_t v = (int64_t)N->getSExtValue(); return isShiftedInt<4,1>(v); }]>; - def s4_2ImmPred : PatLeaf<(i32 imm), [{ - // s4_2ImmPred predicate - True if the immediate fits in a 4-bit sign extended - // field that is a multiple of 4. int64_t v = (int64_t)N->getSExtValue(); return isShiftedInt<4,2>(v); }]>; - def s4_3ImmPred : PatLeaf<(i32 imm), [{ - // s4_3ImmPred predicate - True if the immediate fits in a 4-bit sign extended - // field that is a multiple of 8. int64_t v = (int64_t)N->getSExtValue(); return isShiftedInt<4,3>(v); }]>; @@ -233,56 +202,61 @@ def u64ImmPred : PatLeaf<(i64 imm), [{ }]>; def u32ImmPred : PatLeaf<(i32 imm), [{ - // u32ImmPred predicate - True if the immediate fits in a 32-bit field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<32>(v); }]>; +def u32_0ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<32>(v); +}]>; + +def u31_1ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<31,1>(v); +}]>; + +def u30_2ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<30,2>(v); +}]>; + +def u29_3ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<29,3>(v); +}]>; + def u26_6ImmPred : PatLeaf<(i32 imm), [{ - // u26_6ImmPred - True if the immediate fits in a 32-bit field and - // is a multiple of 64. int64_t v = (int64_t)N->getSExtValue(); return isShiftedUInt<26,6>(v); }]>; def u16ImmPred : PatLeaf<(i32 imm), [{ - // u16ImmPred predicate - True if the immediate fits in a 16-bit unsigned - // field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<16>(v); }]>; def u16_s8ImmPred : PatLeaf<(i32 imm), [{ - // u16_s8ImmPred predicate - True if the immediate fits in a 16-bit sign - // extended s8 field. int64_t v = (int64_t)N->getSExtValue(); return isShiftedUInt<16,8>(v); }]>; def u16_0ImmPred : PatLeaf<(i32 imm), [{ - // True if the immediate fits in a 16-bit unsigned field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<16>(v); }]>; def u11_3ImmPred : PatLeaf<(i32 imm), [{ - // True if the immediate fits in a 14-bit unsigned field, and the lowest - // three bits are 0. int64_t v = (int64_t)N->getSExtValue(); return isShiftedUInt<11,3>(v); }]>; def u9ImmPred : PatLeaf<(i32 imm), [{ - // u9ImmPred predicate - True if the immediate fits in a 9-bit unsigned - // field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<9>(v); }]>; - def u8ImmPred : PatLeaf<(i32 imm), [{ - // u8ImmPred predicate - True if the immediate fits in a 8-bit unsigned - // field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<8>(v); }]>; @@ -294,81 +268,56 @@ def u7StrictPosImmPred : ImmLeaf<i32, [{ }]>; def u7ImmPred : PatLeaf<(i32 imm), [{ - // u7ImmPred predicate - True if the immediate fits in a 7-bit unsigned - // field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<7>(v); }]>; - def u6ImmPred : PatLeaf<(i32 imm), [{ - // u6ImmPred predicate - True if the immediate fits in a 6-bit unsigned - // field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<6>(v); }]>; def u6_0ImmPred : PatLeaf<(i32 imm), [{ - // u6_0ImmPred predicate - True if the immediate fits in a 6-bit unsigned - // field. Same as u6ImmPred. int64_t v = (int64_t)N->getSExtValue(); return isUInt<6>(v); }]>; def u6_1ImmPred : PatLeaf<(i32 imm), [{ - // u6_1ImmPred predicate - True if the immediate fits in a 7-bit unsigned - // field that is 1 bit alinged - multiple of 2. int64_t v = (int64_t)N->getSExtValue(); return isShiftedUInt<6,1>(v); }]>; def u6_2ImmPred : PatLeaf<(i32 imm), [{ - // u6_2ImmPred predicate - True if the immediate fits in a 8-bit unsigned - // field that is 2 bits alinged - multiple of 4. int64_t v = (int64_t)N->getSExtValue(); return isShiftedUInt<6,2>(v); }]>; def u6_3ImmPred : PatLeaf<(i32 imm), [{ - // u6_3ImmPred predicate - True if the immediate fits in a 9-bit unsigned - // field that is 3 bits alinged - multiple of 8. int64_t v = (int64_t)N->getSExtValue(); return isShiftedUInt<6,3>(v); }]>; def u5ImmPred : PatLeaf<(i32 imm), [{ - // u5ImmPred predicate - True if the immediate fits in a 5-bit unsigned - // field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<5>(v); }]>; def u4ImmPred : PatLeaf<(i32 imm), [{ - // u4ImmPred predicate - True if the immediate fits in a 4-bit unsigned - // field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<4>(v); }]>; def u3ImmPred : PatLeaf<(i32 imm), [{ - // u3ImmPred predicate - True if the immediate fits in a 3-bit unsigned - // field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<3>(v); }]>; - def u2ImmPred : PatLeaf<(i32 imm), [{ - // u2ImmPred predicate - True if the immediate fits in a 2-bit unsigned - // field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<2>(v); }]>; - def u1ImmPred : PatLeaf<(i1 imm), [{ - // u1ImmPred predicate - True if the immediate fits in a 1-bit unsigned - // field. int64_t v = (int64_t)N->getSExtValue(); return isUInt<1>(v); }]>; @@ -511,212 +460,6 @@ let PrintMethod = "printExtOperand" in { def u6_3Ext : Operand<i32>; } -let PrintMethod = "printImmOperand" in -def u0AlwaysExt : Operand<i32>; - -// Predicates for constant extendable operands -def s16ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isInt<16>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit signed field. - return isConstExtProfitable(Node) && isInt<32>(v); -}]>; - -def s10ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isInt<10>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit signed field. - return isConstExtProfitable(Node) && isInt<32>(v); -}]>; - -def s9ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isInt<9>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isInt<32>(v); -}]>; - -def s8ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isInt<8>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit signed field. - return isConstExtProfitable(Node) && isInt<32>(v); -}]>; - -def s8_16ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isInt<8>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can't fit in a 16-bit signed field. This is required to avoid - // unnecessary constant extenders. - return isConstExtProfitable(Node) && !isInt<16>(v); -}]>; - -def s6ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isInt<6>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isInt<32>(v); -}]>; - -def s6_16ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isInt<6>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can't fit in a 16-bit signed field. This is required to avoid - // unnecessary constant extenders. - return isConstExtProfitable(Node) && !isInt<16>(v); -}]>; - -def s6_10ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isInt<6>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can't fit in a 10-bit signed field. This is required to avoid - // unnecessary constant extenders. - return isConstExtProfitable(Node) && !isInt<10>(v); -}]>; - -def s11_0ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isInt<11>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit signed field. - return isConstExtProfitable(Node) && isInt<32>(v); -}]>; - -def s11_1ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isInt<12>(v)) - return isShiftedInt<11,1>(v); - - // Return true if extending this immediate is profitable and the low 1 bit - // is zero (2-byte aligned). - return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 2) == 0); -}]>; - -def s11_2ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isInt<13>(v)) - return isShiftedInt<11,2>(v); - - // Return true if extending this immediate is profitable and the low 2-bits - // are zero (4-byte aligned). - return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 4) == 0); -}]>; - -def s11_3ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isInt<14>(v)) - return isShiftedInt<11,3>(v); - - // Return true if extending this immediate is profitable and the low 3-bits - // are zero (8-byte aligned). - return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 8) == 0); -}]>; - -def u0AlwaysExtPred : PatLeaf<(i32 imm), [{ - // Predicate for an unsigned 32-bit value that always needs to be extended. - if (isConstExtProfitable(Node)) { - int64_t v = (int64_t)N->getSExtValue(); - return isUInt<32>(v); - } - return false; -}]>; - -def u6ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isUInt<6>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v); -}]>; - -def u7ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isUInt<7>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v); -}]>; - -def u8ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isUInt<8>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v); -}]>; - -def u9ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isUInt<9>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v); -}]>; - -def u6_1ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isUInt<7>(v)) - return isShiftedUInt<6,1>(v); - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 2) == 0); -}]>; - -def u6_2ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isUInt<8>(v)) - return isShiftedUInt<6,2>(v); - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 4) == 0); -}]>; - -def u6_3ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isUInt<9>(v)) - return isShiftedUInt<6,3>(v); - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 8) == 0); -}]>; - // This complex pattern exists only to create a machine instruction operand // of type "frame index". There doesn't seem to be a way to do that directly @@ -729,41 +472,8 @@ def AddrFI : ComplexPattern<i32, 1, "SelectAddrFI", [frameindex], []>; def AddrGA : ComplexPattern<i32, 1, "SelectAddrGA", [], []>; def AddrGP : ComplexPattern<i32, 1, "SelectAddrGP", [], []>; -// Addressing modes. - -def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>; -def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>; -def ADDRriS11_0 : ComplexPattern<i32, 2, "SelectADDRriS11_0", [frameindex], []>; -def ADDRriS11_1 : ComplexPattern<i32, 2, "SelectADDRriS11_1", [frameindex], []>; -def ADDRriS11_2 : ComplexPattern<i32, 2, "SelectADDRriS11_2", [frameindex], []>; -def ADDRriS11_3 : ComplexPattern<i32, 2, "SelectADDRriS11_3", [frameindex], []>; -def ADDRriU6_0 : ComplexPattern<i32, 2, "SelectADDRriU6_0", [frameindex], []>; -def ADDRriU6_1 : ComplexPattern<i32, 2, "SelectADDRriU6_1", [frameindex], []>; -def ADDRriU6_2 : ComplexPattern<i32, 2, "SelectADDRriU6_2", [frameindex], []>; - // Address operands. -def MEMrr : Operand<i32> { - let PrintMethod = "printMEMrrOperand"; - let MIOperandInfo = (ops IntRegs, IntRegs); -} - -def MEMri : Operand<i32> { - let PrintMethod = "printMEMriOperand"; - let MIOperandInfo = (ops IntRegs, IntRegs); -} - -def MEMri_s11_2 : Operand<i32>, - ComplexPattern<i32, 2, "SelectMEMriS11_2", []> { - let PrintMethod = "printMEMriOperand"; - let MIOperandInfo = (ops IntRegs, s11Imm); -} - -def FrameIndex : Operand<i32> { - let PrintMethod = "printFrameIndexOperand"; - let MIOperandInfo = (ops IntRegs, s11Imm); -} - let PrintMethod = "printGlobalOperand" in { def globaladdress : Operand<i32>; def globaladdressExt : Operand<i32>; diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp index afd3a17..503bfdb 100644 --- a/lib/Target/Hexagon/HexagonPeephole.cpp +++ b/lib/Target/Hexagon/HexagonPeephole.cpp @@ -271,15 +271,8 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { switch (Op) { case Hexagon::C2_mux: case Hexagon::C2_muxii: - case Hexagon::TFR_condset_ii: NewOp = Op; break; - case Hexagon::TFR_condset_ri: - NewOp = Hexagon::TFR_condset_ir; - break; - case Hexagon::TFR_condset_ir: - NewOp = Hexagon::TFR_condset_ri; - break; case Hexagon::C2_muxri: NewOp = Hexagon::C2_muxir; break; diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp index 3df98d6..86eaee8 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -37,11 +37,8 @@ using namespace llvm; - -HexagonRegisterInfo::HexagonRegisterInfo(HexagonSubtarget &st) - : HexagonGenRegisterInfo(Hexagon::R31), - Subtarget(st) { -} +HexagonRegisterInfo::HexagonRegisterInfo() + : HexagonGenRegisterInfo(Hexagon::R31) {} const MCPhysReg * HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { @@ -51,7 +48,7 @@ HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0 }; - switch(Subtarget.getHexagonArchVersion()) { + switch (MF->getSubtarget<HexagonSubtarget>().getHexagonArchVersion()) { case HexagonSubtarget::V4: case HexagonSubtarget::V5: return CalleeSavedRegsV3; @@ -89,7 +86,7 @@ HexagonRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, }; - switch(Subtarget.getHexagonArchVersion()) { + switch (MF->getSubtarget<HexagonSubtarget>().getHexagonArchVersion()) { case HexagonSubtarget::V4: case HexagonSubtarget::V5: return CalleeSavedRegClassesV3; @@ -122,7 +119,9 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset -= 2 * Hexagon_WordSize; } - const unsigned FrameSize = MFI.getStackSize(); + unsigned FrameSize = MFI.getStackSize(); + if (MI.getOpcode() == Hexagon::TFR_FI) + MI.setDesc(TII.get(Hexagon::A2_addi)); if (!MFI.hasVarSizedObjects() && TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset)) && diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h index a83b502..dc6dd2a 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.h +++ b/lib/Target/Hexagon/HexagonRegisterInfo.h @@ -37,19 +37,11 @@ #define HEXAGON_RESERVED_REG_2 Hexagon::R11 namespace llvm { - -class HexagonSubtarget; -class HexagonInstrInfo; -class Type; - struct HexagonRegisterInfo : public HexagonGenRegisterInfo { - HexagonSubtarget &Subtarget; - - HexagonRegisterInfo(HexagonSubtarget &st); + HexagonRegisterInfo(); /// Code Generation virtual methods... - const MCPhysReg * - getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override; + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; const TargetRegisterClass* const* getCalleeSavedRegClasses(const MachineFunction *MF = nullptr) const; diff --git a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp index ce6a39a..1a4c7ae 100644 --- a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp +++ b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp @@ -71,6 +71,7 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) { return true; const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo(); + const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); // Loop over all of the basic blocks for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); @@ -82,82 +83,78 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) { while (MII != MIE) { MachineInstr *MI = MII; int Opc = MI->getOpcode(); - if (Opc == Hexagon::CONST32_set) { + if (Opc == Hexagon::CONST32_set_jt) { int DestReg = MI->getOperand(0).getReg(); MachineOperand &Symbol = MI->getOperand (1); - - BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::LO), DestReg).addOperand(Symbol); BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::HI), DestReg).addOperand(Symbol); - // MBB->erase returns the iterator to the next instruction, which is the - // one we want to process next - MII = MBB->erase (MI); - continue; - } - else if (Opc == Hexagon::CONST32_set_jt) { - int DestReg = MI->getOperand(0).getReg(); - MachineOperand &Symbol = MI->getOperand (1); + TII->get(Hexagon::A2_tfrsi), DestReg).addOperand(Symbol); - BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::LO_jt), DestReg).addOperand(Symbol); - BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::HI_jt), DestReg).addOperand(Symbol); // MBB->erase returns the iterator to the next instruction, which is the // one we want to process next MII = MBB->erase (MI); continue; } - else if (Opc == Hexagon::CONST32_Label) { + else if (Opc == Hexagon::CONST32_Int_Real && + MI->getOperand(1).isBlockAddress()) { int DestReg = MI->getOperand(0).getReg(); MachineOperand &Symbol = MI->getOperand (1); BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::LO_PIC), DestReg).addOperand(Symbol); + TII->get(Hexagon::LO), DestReg).addOperand(Symbol); BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::HI_PIC), DestReg).addOperand(Symbol); + TII->get(Hexagon::HI), DestReg).addOperand(Symbol); // MBB->erase returns the iterator to the next instruction, which is the // one we want to process next MII = MBB->erase (MI); continue; } - else if (Opc == Hexagon::CONST32_Int_Real) { + + else if (Opc == Hexagon::CONST32_Int_Real || + Opc == Hexagon::CONST32_Float_Real) { int DestReg = MI->getOperand(0).getReg(); - int64_t ImmValue = MI->getOperand(1).getImm (); - BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::LOi), DestReg).addImm(ImmValue); - BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::HIi), DestReg).addImm(ImmValue); + // We have to convert an FP immediate into its corresponding integer + // representation + int64_t ImmValue; + if (Opc == Hexagon::CONST32_Float_Real) { + APFloat Val = MI->getOperand(1).getFPImm()->getValueAPF(); + ImmValue = *Val.bitcastToAPInt().getRawData(); + } + else + ImmValue = MI->getOperand(1).getImm(); + + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::A2_tfrsi), DestReg).addImm(ImmValue); MII = MBB->erase (MI); continue; } - else if (Opc == Hexagon::CONST64_Int_Real) { + else if (Opc == Hexagon::CONST64_Int_Real || + Opc == Hexagon::CONST64_Float_Real) { int DestReg = MI->getOperand(0).getReg(); - int64_t ImmValue = MI->getOperand(1).getImm (); - unsigned DestLo = Fn.getSubtarget().getRegisterInfo()->getSubReg( - DestReg, Hexagon::subreg_loreg); - unsigned DestHi = Fn.getSubtarget().getRegisterInfo()->getSubReg( - DestReg, Hexagon::subreg_hireg); + + // We have to convert an FP immediate into its corresponding integer + // representation + int64_t ImmValue; + if (Opc == Hexagon::CONST64_Float_Real) { + APFloat Val = MI->getOperand(1).getFPImm()->getValueAPF(); + ImmValue = *Val.bitcastToAPInt().getRawData(); + } + else + ImmValue = MI->getOperand(1).getImm(); + + unsigned DestLo = TRI->getSubReg(DestReg, Hexagon::subreg_loreg); + unsigned DestHi = TRI->getSubReg(DestReg, Hexagon::subreg_hireg); int32_t LowWord = (ImmValue & 0xFFFFFFFF); int32_t HighWord = (ImmValue >> 32) & 0xFFFFFFFF; - // Lower Registers Lower Half - BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::LOi), DestLo).addImm(LowWord); - // Lower Registers Higher Half + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::A2_tfrsi), DestLo).addImm(LowWord); BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::HIi), DestLo).addImm(LowWord); - // Higher Registers Lower Half - BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::LOi), DestHi).addImm(HighWord); - // Higher Registers Higher Half. - BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::HIi), DestHi).addImm(HighWord); + TII->get(Hexagon::A2_tfrsi), DestHi).addImm(HighWord); MII = MBB->erase (MI); continue; - } + } ++MII; } } diff --git a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp deleted file mode 100644 index 8873bb9..0000000 --- a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp +++ /dev/null @@ -1,172 +0,0 @@ -//===-- HexagonSplitTFRCondSets.cpp - split TFR condsets into xfers -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -// -//===----------------------------------------------------------------------===// -// This pass tries to provide opportunities for better optimization of muxes. -// The default code generated for something like: flag = (a == b) ? 1 : 3; -// would be: -// -// {p0 = cmp.eq(r0,r1)} -// {r3 = mux(p0,#1,#3)} -// -// This requires two packets. If we use .new predicated immediate transfers, -// then we can do this in a single packet, e.g.: -// -// {p0 = cmp.eq(r0,r1) -// if (p0.new) r3 = #1 -// if (!p0.new) r3 = #3} -// -// Note that the conditional assignments are not generated in .new form here. -// We assume opptimisically that they will be formed later. -// -//===----------------------------------------------------------------------===// - -#include "Hexagon.h" -#include "HexagonMachineFunctionInfo.h" -#include "HexagonSubtarget.h" -#include "HexagonTargetMachine.h" -#include "llvm/CodeGen/LatencyPriorityQueue.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/CodeGen/SchedulerRegistry.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" - -using namespace llvm; - -#define DEBUG_TYPE "xfer" - -namespace llvm { - void initializeHexagonSplitTFRCondSetsPass(PassRegistry&); -} - - -namespace { - -class HexagonSplitTFRCondSets : public MachineFunctionPass { - public: - static char ID; - HexagonSplitTFRCondSets() : MachineFunctionPass(ID) { - initializeHexagonSplitTFRCondSetsPass(*PassRegistry::getPassRegistry()); - } - - const char *getPassName() const override { - return "Hexagon Split TFRCondSets"; - } - bool runOnMachineFunction(MachineFunction &Fn) override; -}; - - -char HexagonSplitTFRCondSets::ID = 0; - - -bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) { - - const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo(); - - // Loop over all of the basic blocks. - for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); - MBBb != MBBe; ++MBBb) { - MachineBasicBlock* MBB = MBBb; - // Traverse the basic block. - for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); - ++MII) { - MachineInstr *MI = MII; - switch(MI->getOpcode()) { - case Hexagon::TFR_condset_ri: { - int DestReg = MI->getOperand(0).getReg(); - int SrcReg1 = MI->getOperand(2).getReg(); - - // Do not emit the predicated copy if the source and the destination - // is the same register. - if (DestReg != SrcReg1) { - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::A2_tfrt), DestReg). - addReg(MI->getOperand(1).getReg()).addReg(SrcReg1); - } - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::C2_cmoveif), DestReg). - addReg(MI->getOperand(1).getReg()). - addImm(MI->getOperand(3).getImm()); - - MII = MBB->erase(MI); - --MII; - break; - } - case Hexagon::TFR_condset_ir: { - int DestReg = MI->getOperand(0).getReg(); - int SrcReg2 = MI->getOperand(3).getReg(); - - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::C2_cmoveit), DestReg). - addReg(MI->getOperand(1).getReg()). - addImm(MI->getOperand(2).getImm()); - - // Do not emit the predicated copy if the source and - // the destination is the same register. - if (DestReg != SrcReg2) { - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::A2_tfrf), DestReg). - addReg(MI->getOperand(1).getReg()).addReg(SrcReg2); - } - MII = MBB->erase(MI); - --MII; - break; - } - case Hexagon::TFR_condset_ii: { - int DestReg = MI->getOperand(0).getReg(); - int SrcReg1 = MI->getOperand(1).getReg(); - - int Immed1 = MI->getOperand(2).getImm(); - int Immed2 = MI->getOperand(3).getImm(); - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::C2_cmoveit), - DestReg).addReg(SrcReg1).addImm(Immed1); - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::C2_cmoveif), - DestReg).addReg(SrcReg1).addImm(Immed2); - MII = MBB->erase(MI); - --MII; - break; - } - } - } - } - return true; -} - -} - -//===----------------------------------------------------------------------===// -// Public Constructor Functions -//===----------------------------------------------------------------------===// - -static void initializePassOnce(PassRegistry &Registry) { - const char *Name = "Hexagon Split TFRCondSets"; - PassInfo *PI = new PassInfo(Name, "hexagon-split-tfr", - &HexagonSplitTFRCondSets::ID, nullptr, false, - false); - Registry.registerPass(*PI, true); -} - -void llvm::initializeHexagonSplitTFRCondSetsPass(PassRegistry &Registry) { - CALL_ONCE_INITIALIZATION(initializePassOnce) -} - -FunctionPass *llvm::createHexagonSplitTFRCondSets() { - return new HexagonSplitTFRCondSets(); -} diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp index 380f023..1717ae3 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -48,6 +48,10 @@ EnableIEEERndNear( cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Generate non-chopped conversion from fp to int.")); +static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Hexagon MI Scheduling")); + HexagonSubtarget & HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { // If the programmer has not specified a Hexagon version, default to -mv4. @@ -91,3 +95,9 @@ HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS, // Pin the vtable to this file. void HexagonSubtarget::anchor() {} + +bool HexagonSubtarget::enableMachineScheduler() const { + if (DisableHexagonMISched.getNumOccurrences()) + return !DisableHexagonMISched; + return true; +} diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h index 57de546..780567b 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.h +++ b/lib/Target/Hexagon/HexagonSubtarget.h @@ -85,6 +85,11 @@ public: bool hasV5TOps() const { return getHexagonArchVersion() >= V5; } bool hasV5TOpsOnly() const { return getHexagonArchVersion() == V5; } bool modeIEEERndNear() const { return ModeIEEERndNear; } + bool enableMachineScheduler() const override; + // Always use the TargetLowering default scheduler. + // FIXME: This will use the vliw scheduler which is probably just hurting + // compiler time and will be removed eventually anyway. + bool enableMachineSchedDefaultSched() const override { return false; } const std::string &getCPUString () const { return CPUString; } diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index 64f75a3..48b0bc8 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -29,10 +29,6 @@ using namespace llvm; static cl:: opt<bool> DisableHardwareLoops("disable-hexagon-hwloops", cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target")); -static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Disable Hexagon MI Scheduling")); - static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Disable Hexagon CFG Optimization")); @@ -69,9 +65,10 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + : LLVMTargetMachine(T, "e-m:e-p:32:32-i1:32-i64:64-a:0-n32", TT, CPU, FS, + Options, RM, CM, OL), TLOF(make_unique<HexagonTargetObjectFile>()), - DL("e-m:e-p:32:32-i1:32-i64:64-a:0-n32"), Subtarget(TT, CPU, FS, *this) { + Subtarget(TT, CPU, FS, *this) { initAsmInfo(); } @@ -82,16 +79,7 @@ namespace { class HexagonPassConfig : public TargetPassConfig { public: HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) { - // FIXME: Rather than calling enablePass(&MachineSchedulerID) below, define - // HexagonSubtarget::enableMachineScheduler() { return true; }. - // That will bypass the SelectionDAG VLIW scheduler, which is probably just - // hurting compile time and will be removed eventually anyway. - if (DisableHexagonMISched) - disablePass(&MachineSchedulerID); - else - enablePass(&MachineSchedulerID); - } + : TargetPassConfig(TM, PM) {} HexagonTargetMachine &getHexagonTargetMachine() const { return getTM<HexagonTargetMachine>(); @@ -159,9 +147,6 @@ void HexagonPassConfig::addPreEmitPass() { // Expand Spill code for predicate registers. addPass(createHexagonExpandPredSpillCode(), false); - // Split up TFRcondsets into conditional transfers. - addPass(createHexagonSplitTFRCondSets(), false); - // Create Packets. if (!NoOpt) { if (!DisableHardwareLoops) diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h index e0b3a9b..5774f7e 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.h +++ b/lib/Target/Hexagon/HexagonTargetMachine.h @@ -24,7 +24,6 @@ class Module; class HexagonTargetMachine : public LLVMTargetMachine { std::unique_ptr<TargetLoweringObjectFile> TLOF; - const DataLayout DL; // Calculates type size & alignment. HexagonSubtarget Subtarget; public: @@ -33,8 +32,7 @@ public: Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); ~HexagonTargetMachine() override; - const DataLayout *getDataLayout() const override { return &DL; } - const HexagonSubtarget *getSubtargetImpl() const override { + const HexagonSubtarget *getSubtargetImpl(const Function &) const override { return &Subtarget; } static unsigned getModuleMatchQuality(const Module &M); diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index c123640..4ca628e 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -389,7 +389,9 @@ static bool IsLoopN(MachineInstr *MI) { /// callee-saved register. static bool DoesModifyCalleeSavedReg(MachineInstr *MI, const TargetRegisterInfo *TRI) { - for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(); *CSR; ++CSR) { + for (const MCPhysReg *CSR = + TRI->getCalleeSavedRegs(MI->getParent()->getParent()); + *CSR; ++CSR) { unsigned CalleeSavedReg = *CSR; if (MI->modifiesRegister(CalleeSavedReg, TRI)) return true; @@ -401,10 +403,7 @@ static bool DoesModifyCalleeSavedReg(MachineInstr *MI, // or new-value store. bool HexagonPacketizerList::isNewifiable(MachineInstr* MI) { const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - if ( isCondInst(MI) || QII->mayBeNewStore(MI)) - return true; - else - return false; + return isCondInst(MI) || QII->mayBeNewStore(MI); } bool HexagonPacketizerList::isCondInst (MachineInstr* MI) { diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp index 56c9dc7..4a3ac8c 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp @@ -11,6 +11,7 @@ #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #define DEBUG_TYPE "hexagon-elf-writer" diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp index a5a09ba..eac7d6d 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp @@ -49,9 +49,8 @@ void emitLittleEndian(uint64_t Binary, raw_ostream &OS) { } HexagonMCCodeEmitter::HexagonMCCodeEmitter(MCInstrInfo const &aMII, - MCSubtargetInfo const &aMST, MCContext &aMCT) - : MST(aMST), MCT(aMCT), MCII (aMII) {} + : MCT(aMCT), MCII(aMII) {} void HexagonMCCodeEmitter::EncodeInstruction(MCInst const &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, @@ -75,15 +74,10 @@ HexagonMCCodeEmitter::getMachineOpValue(MCInst const &MI, MCOperand const &MO, llvm_unreachable("Only Immediates and Registers implemented right now"); } -MCSubtargetInfo const &HexagonMCCodeEmitter::getSubtargetInfo() const { - return MST; -} - MCCodeEmitter *llvm::createHexagonMCCodeEmitter(MCInstrInfo const &MII, MCRegisterInfo const &MRI, - MCSubtargetInfo const &MST, MCContext &MCT) { - return new HexagonMCCodeEmitter(MII, MST, MCT); + return new HexagonMCCodeEmitter(MII, MCT); } #include "HexagonGenMCCodeEmitter.inc" diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h index db1d707..768c10e 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h @@ -26,13 +26,11 @@ namespace llvm { class HexagonMCCodeEmitter : public MCCodeEmitter { - MCSubtargetInfo const &MST; MCContext &MCT; MCInstrInfo const &MCII; public: - HexagonMCCodeEmitter(MCInstrInfo const &aMII, MCSubtargetInfo const &aMST, - MCContext &aMCT); + HexagonMCCodeEmitter(MCInstrInfo const &aMII, MCContext &aMCT); MCSubtargetInfo const &getSubtargetInfo() const; diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index 09a305b..c63bf32 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -47,15 +47,6 @@ static MCRegisterInfo *createHexagonMCRegisterInfo(StringRef TT) { return X; } -static MCStreamer * -createHexagonELFStreamer(MCContext &Context, MCAsmBackend &MAB, - raw_ostream &OS, MCCodeEmitter *CE, - bool RelaxAll) { - MCELFStreamer *ES = new MCELFStreamer(Context, MAB, OS, CE); - return ES; -} - - static MCSubtargetInfo * createHexagonMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) { MCSubtargetInfo *X = new MCSubtargetInfo(); @@ -75,16 +66,6 @@ static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI, return MAI; } -static MCStreamer *createMCStreamer(Target const &T, StringRef TT, - MCContext &Context, MCAsmBackend &MAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - MCSubtargetInfo const &STI, bool RelaxAll) { - MCStreamer *ES = createHexagonELFStreamer(Context, MAB, OS, Emitter, RelaxAll); - new MCTargetStreamer(*ES); - return ES; -} - - static MCCodeGenInfo *createHexagonMCCodeGenInfo(StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { @@ -135,7 +116,4 @@ extern "C" void LLVMInitializeHexagonTargetMC() { // Register the asm backend TargetRegistry::RegisterMCAsmBackend(TheHexagonTarget, createHexagonAsmBackend); - - // Register the obj streamer - TargetRegistry::RegisterMCObjectStreamer(TheHexagonTarget, createMCStreamer); } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h index f074b65..17072d9 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h @@ -34,7 +34,6 @@ MCInstrInfo *createHexagonMCInstrInfo(); MCCodeEmitter *createHexagonMCCodeEmitter(MCInstrInfo const &MCII, MCRegisterInfo const &MRI, - MCSubtargetInfo const &MST, MCContext &MCT); MCAsmBackend *createHexagonAsmBackend(Target const &T, |