From dab366bf25ef00a37e5d9306c6dcbbb7c1d3e19a Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Wed, 12 Jun 2013 19:20:32 +0000 Subject: [PowerPC] Fix switch warnings from r183841. Introducing PRED_BAD caused some unexpected warnings that are now suppressed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183854 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp | 1 + lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 432167e..f0a57c0 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -90,6 +90,7 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, if (StringRef(Modifier) == "cc") { switch ((PPC::Predicate)Code) { + default: llvm_unreachable("Bad predicate!"); case PPC::PRED_LT: O << "lt"; return; case PPC::PRED_LE: O << "le"; return; case PPC::PRED_EQ: O << "eq"; return; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp index 853e505..1c69a0b 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp @@ -18,6 +18,7 @@ using namespace llvm; PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) { switch (Opcode) { + default: llvm_unreachable("Unknown PPC branch opcode!"); case PPC::PRED_EQ: return PPC::PRED_NE; case PPC::PRED_NE: return PPC::PRED_EQ; case PPC::PRED_LT: return PPC::PRED_GE; @@ -27,11 +28,11 @@ PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) { case PPC::PRED_NU: return PPC::PRED_UN; case PPC::PRED_UN: return PPC::PRED_NU; } - llvm_unreachable("Unknown PPC branch opcode!"); } PPC::Predicate PPC::getSwappedPredicate(PPC::Predicate Opcode) { switch (Opcode) { + default: llvm_unreachable("Unknown PPC branch opcode!"); case PPC::PRED_EQ: return PPC::PRED_EQ; case PPC::PRED_NE: return PPC::PRED_NE; case PPC::PRED_LT: return PPC::PRED_GT; @@ -41,6 +42,5 @@ PPC::Predicate PPC::getSwappedPredicate(PPC::Predicate Opcode) { case PPC::PRED_NU: return PPC::PRED_NU; case PPC::PRED_UN: return PPC::PRED_UN; } - llvm_unreachable("Unknown PPC branch opcode!"); } -- cgit v1.1 From 628953385396cfa4f59b6ccb56ee3b10d6b5f865 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Wed, 12 Jun 2013 20:22:24 +0000 Subject: [PowerPC] Remove PRED_BAD from PPC::Predicate enumeration. I'm taking David Blaikie's suggestion to use an Optional return value instead. That's the right solution for this problem. Thanks for pointing out that possibility! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183858 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h index 3ab9005..444758c 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h @@ -32,8 +32,7 @@ namespace PPC { PRED_GT = (1 << 5) | 12, PRED_NE = (2 << 5) | 4, PRED_UN = (3 << 5) | 12, - PRED_NU = (3 << 5) | 4, - PRED_BAD = 0 + PRED_NU = (3 << 5) | 4 }; /// Invert the specified predicate. != -> ==, < -> >=. -- cgit v1.1 From 7e17024400941889b6fe1b178e5374f75c34d9ab Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Wed, 12 Jun 2013 20:57:32 +0000 Subject: Revert r183854 (PPC: Fix switch warnings from r183841) Now that the PRED_BAD has been removed, this is failing the Clang -Werror build due to -Wcovered-switch-default. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183863 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp | 1 - lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index f0a57c0..432167e 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -90,7 +90,6 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, if (StringRef(Modifier) == "cc") { switch ((PPC::Predicate)Code) { - default: llvm_unreachable("Bad predicate!"); case PPC::PRED_LT: O << "lt"; return; case PPC::PRED_LE: O << "le"; return; case PPC::PRED_EQ: O << "eq"; return; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp index 1c69a0b..853e505 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp @@ -18,7 +18,6 @@ using namespace llvm; PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) { switch (Opcode) { - default: llvm_unreachable("Unknown PPC branch opcode!"); case PPC::PRED_EQ: return PPC::PRED_NE; case PPC::PRED_NE: return PPC::PRED_EQ; case PPC::PRED_LT: return PPC::PRED_GE; @@ -28,11 +27,11 @@ PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) { case PPC::PRED_NU: return PPC::PRED_UN; case PPC::PRED_UN: return PPC::PRED_NU; } + llvm_unreachable("Unknown PPC branch opcode!"); } PPC::Predicate PPC::getSwappedPredicate(PPC::Predicate Opcode) { switch (Opcode) { - default: llvm_unreachable("Unknown PPC branch opcode!"); case PPC::PRED_EQ: return PPC::PRED_EQ; case PPC::PRED_NE: return PPC::PRED_NE; case PPC::PRED_LT: return PPC::PRED_GT; @@ -42,5 +41,6 @@ PPC::Predicate PPC::getSwappedPredicate(PPC::Predicate Opcode) { case PPC::PRED_NU: return PPC::PRED_NU; case PPC::PRED_UN: return PPC::PRED_UN; } + llvm_unreachable("Unknown PPC branch opcode!"); } -- cgit v1.1 From 19b30d56b224ab3507f7a93743eac2b01c5861dd Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 13 Jun 2013 15:45:24 +0000 Subject: X86: Make the cmov aliases work with intel syntax too. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183907 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.td | 46 +++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 21 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 817bd6c..d67203e 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1921,29 +1921,31 @@ def : MnemonicAlias<"fucomip", "fucompi", "att">; def : MnemonicAlias<"fwait", "wait", "att">; -class CondCodeAlias +class CondCodeAlias : MnemonicAlias; + !strconcat(Prefix, NewCond, Suffix), VariantName>; /// IntegerCondCodeMnemonicAlias - This multiclass defines a bunch of /// MnemonicAlias's that canonicalize the condition code in a mnemonic, for /// example "setz" -> "sete". -multiclass IntegerCondCodeMnemonicAlias { - def C : CondCodeAlias; // setc -> setb - def Z : CondCodeAlias; // setz -> sete - def NA : CondCodeAlias; // setna -> setbe - def NB : CondCodeAlias; // setnb -> setae - def NC : CondCodeAlias; // setnc -> setae - def NG : CondCodeAlias; // setng -> setle - def NL : CondCodeAlias; // setnl -> setge - def NZ : CondCodeAlias; // setnz -> setne - def PE : CondCodeAlias; // setpe -> setp - def PO : CondCodeAlias; // setpo -> setnp - - def NAE : CondCodeAlias; // setnae -> setb - def NBE : CondCodeAlias; // setnbe -> seta - def NGE : CondCodeAlias; // setnge -> setl - def NLE : CondCodeAlias; // setnle -> setg +multiclass IntegerCondCodeMnemonicAlias { + def C : CondCodeAlias; // setc -> setb + def Z : CondCodeAlias; // setz -> sete + def NA : CondCodeAlias; // setna -> setbe + def NB : CondCodeAlias; // setnb -> setae + def NC : CondCodeAlias; // setnc -> setae + def NG : CondCodeAlias; // setng -> setle + def NL : CondCodeAlias; // setnl -> setge + def NZ : CondCodeAlias; // setnz -> setne + def PE : CondCodeAlias; // setpe -> setp + def PO : CondCodeAlias; // setpo -> setnp + + def NAE : CondCodeAlias; // setnae -> setb + def NBE : CondCodeAlias; // setnbe -> seta + def NGE : CondCodeAlias; // setnge -> setl + def NLE : CondCodeAlias; // setnle -> setg } // Aliases for set @@ -1951,9 +1953,11 @@ defm : IntegerCondCodeMnemonicAlias<"set", "">; // Aliases for j defm : IntegerCondCodeMnemonicAlias<"j", "">; // Aliases for cmov{w,l,q} -defm : IntegerCondCodeMnemonicAlias<"cmov", "w">; -defm : IntegerCondCodeMnemonicAlias<"cmov", "l">; -defm : IntegerCondCodeMnemonicAlias<"cmov", "q">; +defm : IntegerCondCodeMnemonicAlias<"cmov", "w", "att">; +defm : IntegerCondCodeMnemonicAlias<"cmov", "l", "att">; +defm : IntegerCondCodeMnemonicAlias<"cmov", "q", "att">; +// No size suffix for intel-style asm. +defm : IntegerCondCodeMnemonicAlias<"cmov", "", "intel">; //===----------------------------------------------------------------------===// -- cgit v1.1 From 1290ce00a372f10fa1667d3566477f86ede04c73 Mon Sep 17 00:00:00 2001 From: Amaury de la Vieuville Date: Thu, 13 Jun 2013 16:40:51 +0000 Subject: ARM: fix t2am_imm8_offset operand printing for imm=#-0 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183913 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 7fef795..0b3d266 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -1148,7 +1148,9 @@ void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI, const MCOperand &MO1 = MI->getOperand(OpNum); int32_t OffImm = (int32_t)MO1.getImm(); O << ", " << markup(" Date: Thu, 13 Jun 2013 16:41:55 +0000 Subject: ARM: fix B decoding git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183914 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index a6eab33..4086f36 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -2104,7 +2104,7 @@ DecodeT2BInstruction(MCInst &Inst, unsigned Insn, unsigned imm10 = fieldFromInstruction(Insn, 16, 10); unsigned imm11 = fieldFromInstruction(Insn, 0, 11); unsigned tmp = (S << 23) | (I1 << 22) | (I2 << 21) | (imm10 << 11) | imm11; - int imm32 = SignExtend32<24>(tmp << 1); + int imm32 = SignExtend32<25>(tmp << 1); if (!tryAddingSymbolicOperand(Address, Address + imm32 + 4, true, 4, Inst, Decoder)) Inst.addOperand(MCOperand::CreateImm(imm32)); -- cgit v1.1 From 90cd06e90be1db06bc4812ae9ec96b6638847285 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 13 Jun 2013 19:06:52 +0000 Subject: Mips: Remove global set. Backends shouldn't retain any global state. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183927 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips16ISelLowering.cpp | 123 ++++++++++++++++++--------------- lib/Target/Mips/Mips16ISelLowering.h | 2 - 2 files changed, 68 insertions(+), 57 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp index d8dd88c..1ec0f2f 100644 --- a/lib/Target/Mips/Mips16ISelLowering.cpp +++ b/lib/Target/Mips/Mips16ISelLowering.cpp @@ -18,7 +18,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetInstrInfo.h" -#include using namespace llvm; @@ -30,9 +29,54 @@ static cl::opt DontExpandCondPseudos16( cl::Hidden); namespace { - std::set NoHelperNeeded; +struct Mips16Libcall { + RTLIB::Libcall Libcall; + const char *Name; + + bool operator<(const Mips16Libcall &RHS) const { + return std::strcmp(Name, RHS.Name) < 0; + } +}; } +// Libcalls for which no helper is generated. Sorted by name for binary search. +static const Mips16Libcall HardFloatLibCalls[] = { + { RTLIB::ADD_F64, "__mips16_adddf3" }, + { RTLIB::ADD_F32, "__mips16_addsf3" }, + { RTLIB::DIV_F64, "__mips16_divdf3" }, + { RTLIB::DIV_F32, "__mips16_divsf3" }, + { RTLIB::OEQ_F64, "__mips16_eqdf2" }, + { RTLIB::OEQ_F32, "__mips16_eqsf2" }, + { RTLIB::FPEXT_F32_F64, "__mips16_extendsfdf2" }, + { RTLIB::FPTOSINT_F64_I32, "__mips16_fix_truncdfsi" }, + { RTLIB::FPTOSINT_F32_I32, "__mips16_fix_truncsfsi" }, + { RTLIB::SINTTOFP_I32_F64, "__mips16_floatsidf" }, + { RTLIB::SINTTOFP_I32_F32, "__mips16_floatsisf" }, + { RTLIB::UINTTOFP_I32_F64, "__mips16_floatunsidf" }, + { RTLIB::UINTTOFP_I32_F32, "__mips16_floatunsisf" }, + { RTLIB::OGE_F64, "__mips16_gedf2" }, + { RTLIB::OGE_F32, "__mips16_gesf2" }, + { RTLIB::OGT_F64, "__mips16_gtdf2" }, + { RTLIB::OGT_F32, "__mips16_gtsf2" }, + { RTLIB::OLE_F64, "__mips16_ledf2" }, + { RTLIB::OLE_F32, "__mips16_lesf2" }, + { RTLIB::OLT_F64, "__mips16_ltdf2" }, + { RTLIB::OLT_F32, "__mips16_ltsf2" }, + { RTLIB::MUL_F64, "__mips16_muldf3" }, + { RTLIB::MUL_F32, "__mips16_mulsf3" }, + { RTLIB::UNE_F64, "__mips16_nedf2" }, + { RTLIB::UNE_F32, "__mips16_nesf2" }, + { RTLIB::UNKNOWN_LIBCALL, "__mips16_ret_dc" }, // No associated libcall. + { RTLIB::UNKNOWN_LIBCALL, "__mips16_ret_df" }, // No associated libcall. + { RTLIB::UNKNOWN_LIBCALL, "__mips16_ret_sc" }, // No associated libcall. + { RTLIB::UNKNOWN_LIBCALL, "__mips16_ret_sf" }, // No associated libcall. + { RTLIB::SUB_F64, "__mips16_subdf3" }, + { RTLIB::SUB_F32, "__mips16_subsf3" }, + { RTLIB::FPROUND_F64_F32, "__mips16_truncdfsf2" }, + { RTLIB::UO_F64, "__mips16_unorddf2" }, + { RTLIB::UO_F32, "__mips16_unordsf2" } +}; + Mips16TargetLowering::Mips16TargetLowering(MipsTargetMachine &TM) : MipsTargetLowering(TM) { // @@ -46,13 +90,9 @@ Mips16TargetLowering::Mips16TargetLowering(MipsTargetMachine &TM) // Set up the register classes addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass); - if (Subtarget->inMips16HardFloat()) { + if (Subtarget->inMips16HardFloat()) setMips16HardFloatLibCalls(); - NoHelperNeeded.insert("__mips16_ret_sf"); - NoHelperNeeded.insert("__mips16_ret_df"); - NoHelperNeeded.insert("__mips16_ret_sc"); - NoHelperNeeded.insert("__mips16_ret_dc"); - } + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); @@ -166,47 +206,17 @@ isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, return false; } -void Mips16TargetLowering::setMips16LibcallName - (RTLIB::Libcall L, const char *Name) { - setLibcallName(L, Name); - NoHelperNeeded.insert(Name); -} - void Mips16TargetLowering::setMips16HardFloatLibCalls() { - setMips16LibcallName(RTLIB::ADD_F32, "__mips16_addsf3"); - setMips16LibcallName(RTLIB::ADD_F64, "__mips16_adddf3"); - setMips16LibcallName(RTLIB::SUB_F32, "__mips16_subsf3"); - setMips16LibcallName(RTLIB::SUB_F64, "__mips16_subdf3"); - setMips16LibcallName(RTLIB::MUL_F32, "__mips16_mulsf3"); - setMips16LibcallName(RTLIB::MUL_F64, "__mips16_muldf3"); - setMips16LibcallName(RTLIB::DIV_F32, "__mips16_divsf3"); - setMips16LibcallName(RTLIB::DIV_F64, "__mips16_divdf3"); - setMips16LibcallName(RTLIB::FPEXT_F32_F64, "__mips16_extendsfdf2"); - setMips16LibcallName(RTLIB::FPROUND_F64_F32, "__mips16_truncdfsf2"); - setMips16LibcallName(RTLIB::FPTOSINT_F32_I32, "__mips16_fix_truncsfsi"); - setMips16LibcallName(RTLIB::FPTOSINT_F64_I32, "__mips16_fix_truncdfsi"); - setMips16LibcallName(RTLIB::SINTTOFP_I32_F32, "__mips16_floatsisf"); - setMips16LibcallName(RTLIB::SINTTOFP_I32_F64, "__mips16_floatsidf"); - setMips16LibcallName(RTLIB::UINTTOFP_I32_F32, "__mips16_floatunsisf"); - setMips16LibcallName(RTLIB::UINTTOFP_I32_F64, "__mips16_floatunsidf"); - setMips16LibcallName(RTLIB::OEQ_F32, "__mips16_eqsf2"); - setMips16LibcallName(RTLIB::OEQ_F64, "__mips16_eqdf2"); - setMips16LibcallName(RTLIB::UNE_F32, "__mips16_nesf2"); - setMips16LibcallName(RTLIB::UNE_F64, "__mips16_nedf2"); - setMips16LibcallName(RTLIB::OGE_F32, "__mips16_gesf2"); - setMips16LibcallName(RTLIB::OGE_F64, "__mips16_gedf2"); - setMips16LibcallName(RTLIB::OLT_F32, "__mips16_ltsf2"); - setMips16LibcallName(RTLIB::OLT_F64, "__mips16_ltdf2"); - setMips16LibcallName(RTLIB::OLE_F32, "__mips16_lesf2"); - setMips16LibcallName(RTLIB::OLE_F64, "__mips16_ledf2"); - setMips16LibcallName(RTLIB::OGT_F32, "__mips16_gtsf2"); - setMips16LibcallName(RTLIB::OGT_F64, "__mips16_gtdf2"); - setMips16LibcallName(RTLIB::UO_F32, "__mips16_unordsf2"); - setMips16LibcallName(RTLIB::UO_F64, "__mips16_unorddf2"); - setMips16LibcallName(RTLIB::O_F32, "__mips16_unordsf2"); - setMips16LibcallName(RTLIB::O_F64, "__mips16_unorddf2"); -} + for (unsigned I = 0; I != array_lengthof(HardFloatLibCalls); ++I) { + assert((I == 0 || HardFloatLibCalls[I - 1] < HardFloatLibCalls[I]) && + "Array not sorted!"); + if (HardFloatLibCalls[I].Libcall != RTLIB::UNKNOWN_LIBCALL) + setLibcallName(HardFloatLibCalls[I].Libcall, HardFloatLibCalls[I].Name); + } + setLibcallName(RTLIB::O_F64, "__mips16_unorddf2"); + setLibcallName(RTLIB::O_F32, "__mips16_unordsf2"); +} // // The Mips16 hard float is a crazy quilt inherited from gcc. I have a much @@ -383,16 +393,19 @@ getOpndList(SmallVectorImpl &Ops, // bool LookupHelper = true; if (ExternalSymbolSDNode *S = dyn_cast(CLI.Callee)) { - if (NoHelperNeeded.find(S->getSymbol()) != NoHelperNeeded.end()) { + Mips16Libcall Find = { RTLIB::UNKNOWN_LIBCALL, S->getSymbol() }; + + if (std::binary_search(HardFloatLibCalls, array_endof(HardFloatLibCalls), + Find)) LookupHelper = false; - } - } - else if (GlobalAddressSDNode *G = - dyn_cast(CLI.Callee)) { - if (NoHelperNeeded.find(G->getGlobal()->getName().data()) != - NoHelperNeeded.end()) { + } else if (GlobalAddressSDNode *G = + dyn_cast(CLI.Callee)) { + Mips16Libcall Find = { RTLIB::UNKNOWN_LIBCALL, + G->getGlobal()->getName().data() }; + + if (std::binary_search(HardFloatLibCalls, array_endof(HardFloatLibCalls), + Find)) LookupHelper = false; - } } if (LookupHelper) Mips16HelperFunction = getMips16HelperFunction(CLI.RetTy, CLI.Args, NeedMips16Helper); diff --git a/lib/Target/Mips/Mips16ISelLowering.h b/lib/Target/Mips/Mips16ISelLowering.h index d3c7028..33b953f 100644 --- a/lib/Target/Mips/Mips16ISelLowering.h +++ b/lib/Target/Mips/Mips16ISelLowering.h @@ -32,8 +32,6 @@ namespace llvm { unsigned NextStackOffset, const MipsFunctionInfo& FI) const; - void setMips16LibcallName(RTLIB::Libcall, const char *Name); - void setMips16HardFloatLibCalls(); unsigned int -- cgit v1.1 From 2948e693cb9ed0f99e8ba2448459d8a34f091075 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 13 Jun 2013 20:14:00 +0000 Subject: R600: Don't try to fix reg class when copying IMPLICIT_DEF to a register The test case for this is way too complex to be useful as a lit test, and I was unable to reduce it. https://bugs.freedesktop.org/show_bug.cgi?id=65438 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183937 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDILISelDAGToDAG.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp index 93432a2..9a5e9e6 100644 --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp @@ -766,7 +766,8 @@ void AMDGPUDAGToDAGISel::PostprocessISelDAG() { continue; } - if (!Val.getNode()->isMachineOpcode()) { + if (!Val.getNode()->isMachineOpcode() || + Val.getNode()->getMachineOpcode() == AMDGPU::IMPLICIT_DEF) { continue; } -- cgit v1.1 From fe532ad6d6643219669056dc268d63fb29a8d1ee Mon Sep 17 00:00:00 2001 From: JF Bastien Date: Fri, 14 Jun 2013 02:49:43 +0000 Subject: Enable FastISel on ARM for Linux and NaCl, not MCJIT This is a resubmit of r182877, which was reverted because it broken MCJIT tests on ARM. The patch leaves MCJIT on ARM as it was before: only enabled for iOS. I've CC'ed people from the original review and revert. FastISel was only enabled for iOS ARM and Thumb2, this patch enables it for ARM (not Thumb2) on Linux and NaCl, but not MCJIT. Thumb2 support needs a bit more work, mainly around register class restrictions. The patch punts to SelectionDAG when doing TLS relocation on non-Darwin targets. I will fix this and other FastISel-to-SelectionDAG failures in a separate patch. The patch also forces FastISel to retain frame pointers: iOS always keeps them for backtracking (so emitted code won't change because of this), but Linux was getting much worse code that was incorrect when using big frames (such as test-suite's lencod). I'll also fix this in a later patch, it will probably require a peephole so that FastISel doesn't rematerialize frame pointers back-to-back. The test changes are straightforward, similar to: http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20130513/174279.html They also add a vararg test that got dropped in that change. I ran all of lnt test-suite on A15 hardware with --optimize-option=-O0 and all the tests pass. All the tests also pass on x86 make check-all. I also re-ran the check-all tests that failed on ARM, and they all seem to pass. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183966 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index a4de941..97dc63f 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -630,6 +630,11 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { (const TargetRegisterClass*)&ARM::GPRRegClass; unsigned DestReg = createResultReg(RC); + // FastISel TLS support on non-Darwin is broken, punt to SelectionDAG. + const GlobalVariable *GVar = dyn_cast(GV); + bool IsThreadLocal = GVar && GVar->isThreadLocal(); + if (!Subtarget->isTargetDarwin() && IsThreadLocal) return 0; + // Use movw+movt when possible, it avoids constant pool entries. // Darwin targets don't support movt with Reloc::Static, see // ARMTargetLowering::LowerGlobalAddressDarwin. Other targets only support @@ -3044,13 +3049,23 @@ bool ARMFastISel::FastLowerArguments() { namespace llvm { FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) { - // Completely untested on non-iOS. const TargetMachine &TM = funcInfo.MF->getTarget(); - // Darwin and thumb1 only for now. const ARMSubtarget *Subtarget = &TM.getSubtarget(); - if (Subtarget->isTargetIOS() && !Subtarget->isThumb1Only()) + // Thumb2 support on iOS; ARM support on iOS, Linux and NaCl. + bool UseFastISel = false; + UseFastISel |= Subtarget->isTargetIOS() && !Subtarget->isThumb1Only(); + UseFastISel |= Subtarget->isTargetLinux() && !Subtarget->isThumb(); + UseFastISel |= Subtarget->isTargetNaCl() && !Subtarget->isThumb(); + + if (UseFastISel) { + // iOS always has a FP for backtracking, force other targets + // to keep their FP when doing FastISel. The emitted code is + // currently superior, and in cases like test-suite's lencod + // FastISel isn't quite correct when FP is eliminated. + TM.Options.NoFramePointerElim = true; return new ARMFastISel(funcInfo, libInfo); + } return 0; } } -- cgit v1.1 From d25ec760cbf93e8c8493eaab2265c8bb8cf1d233 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 14 Jun 2013 09:31:41 +0000 Subject: X86: cvtpi2ps is just an SSE instruction with MMX operands. It has no AVX equivalent. Give it the right register format so we can also emit it when AVX is enabled. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183971 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrMMX.td | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 07314a0..cb12956 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -189,13 +189,14 @@ multiclass sse12_cvt_pint opc, RegisterClass SrcRC, RegisterClass DstRC, multiclass sse12_cvt_pint_3addr opc, RegisterClass SrcRC, RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, string asm, Domain d> { - def irr : PI; - def irm : PI; + def irr : MMXPI; + def irm : MMXPI; } //===----------------------------------------------------------------------===// -- cgit v1.1 From a768a4954818456fa6fe2077a3cbe75979025c15 Mon Sep 17 00:00:00 2001 From: Amaury de la Vieuville Date: Fri, 14 Jun 2013 11:21:35 +0000 Subject: ARM: fix thumb coprocessor instruction with pre-writeback disassembly was stc2 p0, c0, [r0]! instead of stc2 p0, c0, [r0,#0]! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183975 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrThumb2.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index ff21bf7..8b114a8 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3632,7 +3632,7 @@ multiclass t2LdStCop op31_28, bit load, bit Dbit, string asm> { let DecoderMethod = "DecodeCopMemInstruction"; } def _PRE : T2CI { bits<13> addr; bits<4> cop; -- cgit v1.1 From 2def95fc1e5537e5c156bd12027d41212e0b2fc2 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 14 Jun 2013 22:12:09 +0000 Subject: R600: Move instruction encoding definitions into a separate .td file Reviewed-by: Vincent Lejeune git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184013 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600InstrFormats.td | 392 ++++++++++++++++++++++++++++++++++++ lib/Target/R600/R600Instructions.td | 363 +-------------------------------- 2 files changed, 393 insertions(+), 362 deletions(-) create mode 100644 lib/Target/R600/R600InstrFormats.td (limited to 'lib/Target') diff --git a/lib/Target/R600/R600InstrFormats.td b/lib/Target/R600/R600InstrFormats.td new file mode 100644 index 0000000..7cc48f4 --- /dev/null +++ b/lib/Target/R600/R600InstrFormats.td @@ -0,0 +1,392 @@ +//===-- R600InstrFormats.td - R600 Instruction Encodings ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// R600 Instruction format definitions. +// +//===----------------------------------------------------------------------===// + +class InstR600 pattern, + InstrItinClass itin> + : AMDGPUInst { + + field bits<64> Inst; + bit TransOnly = 0; + bit Trig = 0; + bit Op3 = 0; + bit isVector = 0; + bits<2> FlagOperandIdx = 0; + bit Op1 = 0; + bit Op2 = 0; + bit HasNativeOperands = 0; + bit VTXInst = 0; + bit TEXInst = 0; + + let Namespace = "AMDGPU"; + let OutOperandList = outs; + let InOperandList = ins; + let AsmString = asm; + let Pattern = pattern; + let Itinerary = itin; + + let TSFlags{0} = TransOnly; + let TSFlags{4} = Trig; + let TSFlags{5} = Op3; + + // Vector instructions are instructions that must fill all slots in an + // instruction group + let TSFlags{6} = isVector; + let TSFlags{8-7} = FlagOperandIdx; + let TSFlags{9} = HasNativeOperands; + let TSFlags{10} = Op1; + let TSFlags{11} = Op2; + let TSFlags{12} = VTXInst; + let TSFlags{13} = TEXInst; +} + +//===----------------------------------------------------------------------===// +// ALU instructions +//===----------------------------------------------------------------------===// + +class R600ALU_Word0 { + field bits<32> Word0; + + bits<11> src0; + bits<1> src0_neg; + bits<1> src0_rel; + bits<11> src1; + bits<1> src1_rel; + bits<1> src1_neg; + bits<3> index_mode = 0; + bits<2> pred_sel; + bits<1> last; + + bits<9> src0_sel = src0{8-0}; + bits<2> src0_chan = src0{10-9}; + bits<9> src1_sel = src1{8-0}; + bits<2> src1_chan = src1{10-9}; + + let Word0{8-0} = src0_sel; + let Word0{9} = src0_rel; + let Word0{11-10} = src0_chan; + let Word0{12} = src0_neg; + let Word0{21-13} = src1_sel; + let Word0{22} = src1_rel; + let Word0{24-23} = src1_chan; + let Word0{25} = src1_neg; + let Word0{28-26} = index_mode; + let Word0{30-29} = pred_sel; + let Word0{31} = last; +} + +class R600ALU_Word1 { + field bits<32> Word1; + + bits<11> dst; + bits<3> bank_swizzle; + bits<1> dst_rel; + bits<1> clamp; + + bits<7> dst_sel = dst{6-0}; + bits<2> dst_chan = dst{10-9}; + + let Word1{20-18} = bank_swizzle; + let Word1{27-21} = dst_sel; + let Word1{28} = dst_rel; + let Word1{30-29} = dst_chan; + let Word1{31} = clamp; +} + +class R600ALU_Word1_OP2 alu_inst> : R600ALU_Word1{ + + bits<1> src0_abs; + bits<1> src1_abs; + bits<1> update_exec_mask; + bits<1> update_pred; + bits<1> write; + bits<2> omod; + + let Word1{0} = src0_abs; + let Word1{1} = src1_abs; + let Word1{2} = update_exec_mask; + let Word1{3} = update_pred; + let Word1{4} = write; + let Word1{6-5} = omod; + let Word1{17-7} = alu_inst; +} + +class R600ALU_Word1_OP3 alu_inst> : R600ALU_Word1{ + + bits<11> src2; + bits<1> src2_rel; + bits<1> src2_neg; + + bits<9> src2_sel = src2{8-0}; + bits<2> src2_chan = src2{10-9}; + + let Word1{8-0} = src2_sel; + let Word1{9} = src2_rel; + let Word1{11-10} = src2_chan; + let Word1{12} = src2_neg; + let Word1{17-13} = alu_inst; +} + +/* +XXX: R600 subtarget uses a slightly different encoding than the other +subtargets. We currently handle this in R600MCCodeEmitter, but we may +want to use these instruction classes in the future. + +class R600ALU_Word1_OP2_r600 : R600ALU_Word1_OP2 { + + bits<1> fog_merge; + bits<10> alu_inst; + + let Inst{37} = fog_merge; + let Inst{39-38} = omod; + let Inst{49-40} = alu_inst; +} + +class R600ALU_Word1_OP2_r700 : R600ALU_Word1_OP2 { + + bits<11> alu_inst; + + let Inst{38-37} = omod; + let Inst{49-39} = alu_inst; +} +*/ + +//===----------------------------------------------------------------------===// +// Vertex Fetch instructions +//===----------------------------------------------------------------------===// + +class VTX_WORD0 { + field bits<32> Word0; + bits<7> SRC_GPR; + bits<5> VC_INST; + bits<2> FETCH_TYPE; + bits<1> FETCH_WHOLE_QUAD; + bits<8> BUFFER_ID; + bits<1> SRC_REL; + bits<2> SRC_SEL_X; + bits<6> MEGA_FETCH_COUNT; + + let Word0{4-0} = VC_INST; + let Word0{6-5} = FETCH_TYPE; + let Word0{7} = FETCH_WHOLE_QUAD; + let Word0{15-8} = BUFFER_ID; + let Word0{22-16} = SRC_GPR; + let Word0{23} = SRC_REL; + let Word0{25-24} = SRC_SEL_X; + let Word0{31-26} = MEGA_FETCH_COUNT; +} + +class VTX_WORD1_GPR { + field bits<32> Word1; + bits<7> DST_GPR; + bits<1> DST_REL; + bits<3> DST_SEL_X; + bits<3> DST_SEL_Y; + bits<3> DST_SEL_Z; + bits<3> DST_SEL_W; + bits<1> USE_CONST_FIELDS; + bits<6> DATA_FORMAT; + bits<2> NUM_FORMAT_ALL; + bits<1> FORMAT_COMP_ALL; + bits<1> SRF_MODE_ALL; + + let Word1{6-0} = DST_GPR; + let Word1{7} = DST_REL; + let Word1{8} = 0; // Reserved + let Word1{11-9} = DST_SEL_X; + let Word1{14-12} = DST_SEL_Y; + let Word1{17-15} = DST_SEL_Z; + let Word1{20-18} = DST_SEL_W; + let Word1{21} = USE_CONST_FIELDS; + let Word1{27-22} = DATA_FORMAT; + let Word1{29-28} = NUM_FORMAT_ALL; + let Word1{30} = FORMAT_COMP_ALL; + let Word1{31} = SRF_MODE_ALL; +} + +//===----------------------------------------------------------------------===// +// Texture fetch instructions +//===----------------------------------------------------------------------===// + +class TEX_WORD0 { + field bits<32> Word0; + + bits<5> TEX_INST; + bits<2> INST_MOD; + bits<1> FETCH_WHOLE_QUAD; + bits<8> RESOURCE_ID; + bits<7> SRC_GPR; + bits<1> SRC_REL; + bits<1> ALT_CONST; + bits<2> RESOURCE_INDEX_MODE; + bits<2> SAMPLER_INDEX_MODE; + + let Word0{4-0} = TEX_INST; + let Word0{6-5} = INST_MOD; + let Word0{7} = FETCH_WHOLE_QUAD; + let Word0{15-8} = RESOURCE_ID; + let Word0{22-16} = SRC_GPR; + let Word0{23} = SRC_REL; + let Word0{24} = ALT_CONST; + let Word0{26-25} = RESOURCE_INDEX_MODE; + let Word0{28-27} = SAMPLER_INDEX_MODE; +} + +class TEX_WORD1 { + field bits<32> Word1; + + bits<7> DST_GPR; + bits<1> DST_REL; + bits<3> DST_SEL_X; + bits<3> DST_SEL_Y; + bits<3> DST_SEL_Z; + bits<3> DST_SEL_W; + bits<7> LOD_BIAS; + bits<1> COORD_TYPE_X; + bits<1> COORD_TYPE_Y; + bits<1> COORD_TYPE_Z; + bits<1> COORD_TYPE_W; + + let Word1{6-0} = DST_GPR; + let Word1{7} = DST_REL; + let Word1{11-9} = DST_SEL_X; + let Word1{14-12} = DST_SEL_Y; + let Word1{17-15} = DST_SEL_Z; + let Word1{20-18} = DST_SEL_W; + let Word1{27-21} = LOD_BIAS; + let Word1{28} = COORD_TYPE_X; + let Word1{29} = COORD_TYPE_Y; + let Word1{30} = COORD_TYPE_Z; + let Word1{31} = COORD_TYPE_W; +} + +class TEX_WORD2 { + field bits<32> Word2; + + bits<5> OFFSET_X; + bits<5> OFFSET_Y; + bits<5> OFFSET_Z; + bits<5> SAMPLER_ID; + bits<3> SRC_SEL_X; + bits<3> SRC_SEL_Y; + bits<3> SRC_SEL_Z; + bits<3> SRC_SEL_W; + + let Word2{4-0} = OFFSET_X; + let Word2{9-5} = OFFSET_Y; + let Word2{14-10} = OFFSET_Z; + let Word2{19-15} = SAMPLER_ID; + let Word2{22-20} = SRC_SEL_X; + let Word2{25-23} = SRC_SEL_Y; + let Word2{28-26} = SRC_SEL_Z; + let Word2{31-29} = SRC_SEL_W; +} + +//===----------------------------------------------------------------------===// +// Control Flow Instructions +//===----------------------------------------------------------------------===// + +class CF_WORD1_R600 { + field bits<32> Word1; + + bits<3> POP_COUNT; + bits<5> CF_CONST; + bits<2> COND; + bits<3> COUNT; + bits<6> CALL_COUNT; + bits<1> COUNT_3; + bits<1> END_OF_PROGRAM; + bits<1> VALID_PIXEL_MODE; + bits<7> CF_INST; + bits<1> WHOLE_QUAD_MODE; + bits<1> BARRIER; + + let Word1{2-0} = POP_COUNT; + let Word1{7-3} = CF_CONST; + let Word1{9-8} = COND; + let Word1{12-10} = COUNT; + let Word1{18-13} = CALL_COUNT; + let Word1{19} = COUNT_3; + let Word1{21} = END_OF_PROGRAM; + let Word1{22} = VALID_PIXEL_MODE; + let Word1{29-23} = CF_INST; + let Word1{30} = WHOLE_QUAD_MODE; + let Word1{31} = BARRIER; +} + +class CF_WORD0_EG { + field bits<32> Word0; + + bits<24> ADDR; + bits<3> JUMPTABLE_SEL; + + let Word0{23-0} = ADDR; + let Word0{26-24} = JUMPTABLE_SEL; +} + +class CF_WORD1_EG { + field bits<32> Word1; + + bits<3> POP_COUNT; + bits<5> CF_CONST; + bits<2> COND; + bits<6> COUNT; + bits<1> VALID_PIXEL_MODE; + bits<1> END_OF_PROGRAM; + bits<8> CF_INST; + bits<1> BARRIER; + + let Word1{2-0} = POP_COUNT; + let Word1{7-3} = CF_CONST; + let Word1{9-8} = COND; + let Word1{15-10} = COUNT; + let Word1{20} = VALID_PIXEL_MODE; + let Word1{21} = END_OF_PROGRAM; + let Word1{29-22} = CF_INST; + let Word1{31} = BARRIER; +} + +class CF_ALU_WORD0 { + field bits<32> Word0; + + bits<22> ADDR; + bits<4> KCACHE_BANK0; + bits<4> KCACHE_BANK1; + bits<2> KCACHE_MODE0; + + let Word0{21-0} = ADDR; + let Word0{25-22} = KCACHE_BANK0; + let Word0{29-26} = KCACHE_BANK1; + let Word0{31-30} = KCACHE_MODE0; +} + +class CF_ALU_WORD1 { + field bits<32> Word1; + + bits<2> KCACHE_MODE1; + bits<8> KCACHE_ADDR0; + bits<8> KCACHE_ADDR1; + bits<7> COUNT; + bits<1> ALT_CONST; + bits<4> CF_INST; + bits<1> WHOLE_QUAD_MODE; + bits<1> BARRIER; + + let Word1{1-0} = KCACHE_MODE1; + let Word1{9-2} = KCACHE_ADDR0; + let Word1{17-10} = KCACHE_ADDR1; + let Word1{24-18} = COUNT; + let Word1{25} = ALT_CONST; + let Word1{29-26} = CF_INST; + let Word1{30} = WHOLE_QUAD_MODE; + let Word1{31} = BARRIER; +} diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index b4131be..7512cf4 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -12,44 +12,7 @@ //===----------------------------------------------------------------------===// include "R600Intrinsics.td" - -class InstR600 pattern, - InstrItinClass itin> - : AMDGPUInst { - - field bits<64> Inst; - bit TransOnly = 0; - bit Trig = 0; - bit Op3 = 0; - bit isVector = 0; - bits<2> FlagOperandIdx = 0; - bit Op1 = 0; - bit Op2 = 0; - bit HasNativeOperands = 0; - bit VTXInst = 0; - bit TEXInst = 0; - - let Namespace = "AMDGPU"; - let OutOperandList = outs; - let InOperandList = ins; - let AsmString = asm; - let Pattern = pattern; - let Itinerary = itin; - - let TSFlags{0} = TransOnly; - let TSFlags{4} = Trig; - let TSFlags{5} = Op3; - - // Vector instructions are instructions that must fill all slots in an - // instruction group - let TSFlags{6} = isVector; - let TSFlags{8-7} = FlagOperandIdx; - let TSFlags{9} = HasNativeOperands; - let TSFlags{10} = Op1; - let TSFlags{11} = Op2; - let TSFlags{12} = VTXInst; - let TSFlags{13} = TEXInst; -} +include "R600InstrFormats.td" class InstR600ISA pattern> : InstR600 { @@ -114,235 +77,6 @@ def ADDRGA_CONST_OFFSET : ComplexPattern; def ADDRIndirect : ComplexPattern; -class R600ALU_Word0 { - field bits<32> Word0; - - bits<11> src0; - bits<1> src0_neg; - bits<1> src0_rel; - bits<11> src1; - bits<1> src1_rel; - bits<1> src1_neg; - bits<3> index_mode = 0; - bits<2> pred_sel; - bits<1> last; - - bits<9> src0_sel = src0{8-0}; - bits<2> src0_chan = src0{10-9}; - bits<9> src1_sel = src1{8-0}; - bits<2> src1_chan = src1{10-9}; - - let Word0{8-0} = src0_sel; - let Word0{9} = src0_rel; - let Word0{11-10} = src0_chan; - let Word0{12} = src0_neg; - let Word0{21-13} = src1_sel; - let Word0{22} = src1_rel; - let Word0{24-23} = src1_chan; - let Word0{25} = src1_neg; - let Word0{28-26} = index_mode; - let Word0{30-29} = pred_sel; - let Word0{31} = last; -} - -class R600ALU_Word1 { - field bits<32> Word1; - - bits<11> dst; - bits<3> bank_swizzle; - bits<1> dst_rel; - bits<1> clamp; - - bits<7> dst_sel = dst{6-0}; - bits<2> dst_chan = dst{10-9}; - - let Word1{20-18} = bank_swizzle; - let Word1{27-21} = dst_sel; - let Word1{28} = dst_rel; - let Word1{30-29} = dst_chan; - let Word1{31} = clamp; -} - -class R600ALU_Word1_OP2 alu_inst> : R600ALU_Word1{ - - bits<1> src0_abs; - bits<1> src1_abs; - bits<1> update_exec_mask; - bits<1> update_pred; - bits<1> write; - bits<2> omod; - - let Word1{0} = src0_abs; - let Word1{1} = src1_abs; - let Word1{2} = update_exec_mask; - let Word1{3} = update_pred; - let Word1{4} = write; - let Word1{6-5} = omod; - let Word1{17-7} = alu_inst; -} - -class R600ALU_Word1_OP3 alu_inst> : R600ALU_Word1{ - - bits<11> src2; - bits<1> src2_rel; - bits<1> src2_neg; - - bits<9> src2_sel = src2{8-0}; - bits<2> src2_chan = src2{10-9}; - - let Word1{8-0} = src2_sel; - let Word1{9} = src2_rel; - let Word1{11-10} = src2_chan; - let Word1{12} = src2_neg; - let Word1{17-13} = alu_inst; -} - -class VTX_WORD0 { - field bits<32> Word0; - bits<7> SRC_GPR; - bits<5> VC_INST; - bits<2> FETCH_TYPE; - bits<1> FETCH_WHOLE_QUAD; - bits<8> BUFFER_ID; - bits<1> SRC_REL; - bits<2> SRC_SEL_X; - bits<6> MEGA_FETCH_COUNT; - - let Word0{4-0} = VC_INST; - let Word0{6-5} = FETCH_TYPE; - let Word0{7} = FETCH_WHOLE_QUAD; - let Word0{15-8} = BUFFER_ID; - let Word0{22-16} = SRC_GPR; - let Word0{23} = SRC_REL; - let Word0{25-24} = SRC_SEL_X; - let Word0{31-26} = MEGA_FETCH_COUNT; -} - -class VTX_WORD1_GPR { - field bits<32> Word1; - bits<7> DST_GPR; - bits<1> DST_REL; - bits<3> DST_SEL_X; - bits<3> DST_SEL_Y; - bits<3> DST_SEL_Z; - bits<3> DST_SEL_W; - bits<1> USE_CONST_FIELDS; - bits<6> DATA_FORMAT; - bits<2> NUM_FORMAT_ALL; - bits<1> FORMAT_COMP_ALL; - bits<1> SRF_MODE_ALL; - - let Word1{6-0} = DST_GPR; - let Word1{7} = DST_REL; - let Word1{8} = 0; // Reserved - let Word1{11-9} = DST_SEL_X; - let Word1{14-12} = DST_SEL_Y; - let Word1{17-15} = DST_SEL_Z; - let Word1{20-18} = DST_SEL_W; - let Word1{21} = USE_CONST_FIELDS; - let Word1{27-22} = DATA_FORMAT; - let Word1{29-28} = NUM_FORMAT_ALL; - let Word1{30} = FORMAT_COMP_ALL; - let Word1{31} = SRF_MODE_ALL; -} - -class TEX_WORD0 { - field bits<32> Word0; - - bits<5> TEX_INST; - bits<2> INST_MOD; - bits<1> FETCH_WHOLE_QUAD; - bits<8> RESOURCE_ID; - bits<7> SRC_GPR; - bits<1> SRC_REL; - bits<1> ALT_CONST; - bits<2> RESOURCE_INDEX_MODE; - bits<2> SAMPLER_INDEX_MODE; - - let Word0{4-0} = TEX_INST; - let Word0{6-5} = INST_MOD; - let Word0{7} = FETCH_WHOLE_QUAD; - let Word0{15-8} = RESOURCE_ID; - let Word0{22-16} = SRC_GPR; - let Word0{23} = SRC_REL; - let Word0{24} = ALT_CONST; - let Word0{26-25} = RESOURCE_INDEX_MODE; - let Word0{28-27} = SAMPLER_INDEX_MODE; -} - -class TEX_WORD1 { - field bits<32> Word1; - - bits<7> DST_GPR; - bits<1> DST_REL; - bits<3> DST_SEL_X; - bits<3> DST_SEL_Y; - bits<3> DST_SEL_Z; - bits<3> DST_SEL_W; - bits<7> LOD_BIAS; - bits<1> COORD_TYPE_X; - bits<1> COORD_TYPE_Y; - bits<1> COORD_TYPE_Z; - bits<1> COORD_TYPE_W; - - let Word1{6-0} = DST_GPR; - let Word1{7} = DST_REL; - let Word1{11-9} = DST_SEL_X; - let Word1{14-12} = DST_SEL_Y; - let Word1{17-15} = DST_SEL_Z; - let Word1{20-18} = DST_SEL_W; - let Word1{27-21} = LOD_BIAS; - let Word1{28} = COORD_TYPE_X; - let Word1{29} = COORD_TYPE_Y; - let Word1{30} = COORD_TYPE_Z; - let Word1{31} = COORD_TYPE_W; -} - -class TEX_WORD2 { - field bits<32> Word2; - - bits<5> OFFSET_X; - bits<5> OFFSET_Y; - bits<5> OFFSET_Z; - bits<5> SAMPLER_ID; - bits<3> SRC_SEL_X; - bits<3> SRC_SEL_Y; - bits<3> SRC_SEL_Z; - bits<3> SRC_SEL_W; - - let Word2{4-0} = OFFSET_X; - let Word2{9-5} = OFFSET_Y; - let Word2{14-10} = OFFSET_Z; - let Word2{19-15} = SAMPLER_ID; - let Word2{22-20} = SRC_SEL_X; - let Word2{25-23} = SRC_SEL_Y; - let Word2{28-26} = SRC_SEL_Z; - let Word2{31-29} = SRC_SEL_W; -} - -/* -XXX: R600 subtarget uses a slightly different encoding than the other -subtargets. We currently handle this in R600MCCodeEmitter, but we may -want to use these instruction classes in the future. - -class R600ALU_Word1_OP2_r600 : R600ALU_Word1_OP2 { - - bits<1> fog_merge; - bits<10> alu_inst; - - let Inst{37} = fog_merge; - let Inst{39-38} = omod; - let Inst{49-40} = alu_inst; -} - -class R600ALU_Word1_OP2_r700 : R600ALU_Word1_OP2 { - - bits<11> alu_inst; - - let Inst{38-37} = omod; - let Inst{49-39} = alu_inst; -} -*/ def R600_Pred : PredicateOperand; @@ -779,41 +513,6 @@ class ExportBufInst : InstR600ISA<( // Control Flow Instructions //===----------------------------------------------------------------------===// -class CF_ALU_WORD0 { - field bits<32> Word0; - - bits<22> ADDR; - bits<4> KCACHE_BANK0; - bits<4> KCACHE_BANK1; - bits<2> KCACHE_MODE0; - - let Word0{21-0} = ADDR; - let Word0{25-22} = KCACHE_BANK0; - let Word0{29-26} = KCACHE_BANK1; - let Word0{31-30} = KCACHE_MODE0; -} - -class CF_ALU_WORD1 { - field bits<32> Word1; - - bits<2> KCACHE_MODE1; - bits<8> KCACHE_ADDR0; - bits<8> KCACHE_ADDR1; - bits<7> COUNT; - bits<1> ALT_CONST; - bits<4> CF_INST; - bits<1> WHOLE_QUAD_MODE; - bits<1> BARRIER; - - let Word1{1-0} = KCACHE_MODE1; - let Word1{9-2} = KCACHE_ADDR0; - let Word1{17-10} = KCACHE_ADDR1; - let Word1{24-18} = COUNT; - let Word1{25} = ALT_CONST; - let Word1{29-26} = CF_INST; - let Word1{30} = WHOLE_QUAD_MODE; - let Word1{31} = BARRIER; -} def KCACHE : InstFlag<"printKCache">; @@ -844,34 +543,6 @@ class CF_WORD0_R600 { let Word0 = ADDR; } -class CF_WORD1_R600 { - field bits<32> Word1; - - bits<3> POP_COUNT; - bits<5> CF_CONST; - bits<2> COND; - bits<3> COUNT; - bits<6> CALL_COUNT; - bits<1> COUNT_3; - bits<1> END_OF_PROGRAM; - bits<1> VALID_PIXEL_MODE; - bits<7> CF_INST; - bits<1> WHOLE_QUAD_MODE; - bits<1> BARRIER; - - let Word1{2-0} = POP_COUNT; - let Word1{7-3} = CF_CONST; - let Word1{9-8} = COND; - let Word1{12-10} = COUNT; - let Word1{18-13} = CALL_COUNT; - let Word1{19} = COUNT_3; - let Word1{21} = END_OF_PROGRAM; - let Word1{22} = VALID_PIXEL_MODE; - let Word1{29-23} = CF_INST; - let Word1{30} = WHOLE_QUAD_MODE; - let Word1{31} = BARRIER; -} - class CF_CLAUSE_R600 inst, dag ins, string AsmPrint> : AMDGPUInst <(outs), ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 { field bits<64> Inst; @@ -890,38 +561,6 @@ ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 { let Inst{63-32} = Word1; } -class CF_WORD0_EG { - field bits<32> Word0; - - bits<24> ADDR; - bits<3> JUMPTABLE_SEL; - - let Word0{23-0} = ADDR; - let Word0{26-24} = JUMPTABLE_SEL; -} - -class CF_WORD1_EG { - field bits<32> Word1; - - bits<3> POP_COUNT; - bits<5> CF_CONST; - bits<2> COND; - bits<6> COUNT; - bits<1> VALID_PIXEL_MODE; - bits<1> END_OF_PROGRAM; - bits<8> CF_INST; - bits<1> BARRIER; - - let Word1{2-0} = POP_COUNT; - let Word1{7-3} = CF_CONST; - let Word1{9-8} = COND; - let Word1{15-10} = COUNT; - let Word1{20} = VALID_PIXEL_MODE; - let Word1{21} = END_OF_PROGRAM; - let Word1{29-22} = CF_INST; - let Word1{31} = BARRIER; -} - class CF_CLAUSE_EG inst, dag ins, string AsmPrint> : AMDGPUInst <(outs), ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG { field bits<64> Inst; -- cgit v1.1 From c30b232349b44dcd158eaa5b4e8599615ae8144e Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 14 Jun 2013 22:12:19 +0000 Subject: R600: Factor the instruction encoding out the RAT_WRITE_CACHELESS_eg class Reviewed-by: Vincent Lejeune git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184014 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600InstrFormats.td | 45 +++++++++++++++++++++++ lib/Target/R600/R600Instructions.td | 73 ++++++++++++------------------------- 2 files changed, 68 insertions(+), 50 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/R600InstrFormats.td b/lib/Target/R600/R600InstrFormats.td index 7cc48f4..618004f 100644 --- a/lib/Target/R600/R600InstrFormats.td +++ b/lib/Target/R600/R600InstrFormats.td @@ -390,3 +390,48 @@ class CF_ALU_WORD1 { let Word1{30} = WHOLE_QUAD_MODE; let Word1{31} = BARRIER; } + +class CF_ALLOC_EXPORT_WORD0_RAT { + field bits<32> Word0; + + bits<4> rat_id; + bits<6> rat_inst; + bits<2> rim; + bits<2> type; + bits<7> rw_gpr; + bits<1> rw_rel; + bits<7> index_gpr; + bits<2> elem_size; + + let Word0{3-0} = rat_id; + let Word0{9-4} = rat_inst; + let Word0{10} = 0; // Reserved + let Word0{12-11} = rim; + let Word0{14-13} = type; + let Word0{21-15} = rw_gpr; + let Word0{22} = rw_rel; + let Word0{29-23} = index_gpr; + let Word0{31-30} = elem_size; +} + +class CF_ALLOC_EXPORT_WORD1_BUF { + field bits<32> Word1; + + bits<12> array_size; + bits<4> comp_mask; + bits<4> burst_count; + bits<1> vpm; + bits<1> eop; + bits<8> cf_inst; + bits<1> mark; + bits<1> barrier; + + let Word1{11-0} = array_size; + let Word1{15-12} = comp_mask; + let Word1{19-16} = burst_count; + let Word1{20} = vpm; + let Word1{21} = eop; + let Word1{29-22} = cf_inst; + let Word1{30} = mark; + let Word1{31} = barrier; +} diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 7512cf4..9716fcf 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -235,45 +235,18 @@ def TEX_SHADOW_ARRAY : PatLeaf< }] >; -class EG_CF_RAT cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs, +class EG_CF_RAT cfinst, bits <6> ratinst, bits<4> ratid, dag outs, dag ins, string asm, list pattern> : - InstR600ISA { - bits<7> RW_GPR; - bits<7> INDEX_GPR; - - bits<2> RIM; - bits<2> TYPE; - bits<1> RW_REL; - bits<2> ELEM_SIZE; - - bits<12> ARRAY_SIZE; - bits<4> COMP_MASK; - bits<4> BURST_COUNT; - bits<1> VPM; - bits<1> eop; - bits<1> MARK; - bits<1> BARRIER; - - // CF_ALLOC_EXPORT_WORD0_RAT - let Inst{3-0} = rat_id; - let Inst{9-4} = rat_inst; - let Inst{10} = 0; // Reserved - let Inst{12-11} = RIM; - let Inst{14-13} = TYPE; - let Inst{21-15} = RW_GPR; - let Inst{22} = RW_REL; - let Inst{29-23} = INDEX_GPR; - let Inst{31-30} = ELEM_SIZE; - - // CF_ALLOC_EXPORT_WORD1_BUF - let Inst{43-32} = ARRAY_SIZE; - let Inst{47-44} = COMP_MASK; - let Inst{51-48} = BURST_COUNT; - let Inst{52} = VPM; - let Inst{53} = eop; - let Inst{61-54} = cf_inst; - let Inst{62} = MARK; - let Inst{63} = BARRIER; + InstR600ISA , + CF_ALLOC_EXPORT_WORD0_RAT, CF_ALLOC_EXPORT_WORD1_BUF { + + let cf_inst = cfinst; + let rat_inst = ratinst; + let rat_id = ratid; + + let Inst{31-0} = Word0; + let Inst{63-32} = Word1; + } class LoadParamFrag : PatFrag < @@ -1396,21 +1369,21 @@ let hasSideEffects = 1 in { //===----------------------------------------------------------------------===// let usesCustomInserter = 1 in { -class RAT_WRITE_CACHELESS_eg comp_mask, string name, +class RAT_WRITE_CACHELESS_eg mask, string name, list pattern> : EG_CF_RAT <0x57, 0x2, 0, (outs), ins, name, pattern> { - let RIM = 0; + let rim = 0; // XXX: Have a separate instruction for non-indexed writes. - let TYPE = 1; - let RW_REL = 0; - let ELEM_SIZE = 0; - - let ARRAY_SIZE = 0; - let COMP_MASK = comp_mask; - let BURST_COUNT = 0; - let VPM = 0; - let MARK = 0; - let BARRIER = 1; + let type = 1; + let rw_rel = 0; + let elem_size = 0; + + let array_size = 0; + let comp_mask = mask; + let burst_count = 0; + let vpm = 0; + let mark = 0; + let barrier = 1; } } // End usesCustomInserter = 1 -- cgit v1.1 From 4efccd0fb17c9eca776b37b82790c82d772fafc6 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 14 Jun 2013 22:12:24 +0000 Subject: R600: Use EXPORT_RAT_INST_STORE_DWORD for stores on Cayman We were using RAT_INST_STORE_RAW, which seemed to work, but the docs say this instruction doesn't exist for Cayman, so it's probably safer to use a documented instruction instead. Reviewed-by: Vincent Lejeune git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184015 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600ControlFlowFinalizer.cpp | 1 + lib/Target/R600/R600Instructions.td | 93 +++++++++++++++++----------- 2 files changed, 57 insertions(+), 37 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp index ab29d60..887c808 100644 --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -370,6 +370,7 @@ public: case AMDGPU::R600_ExportSwz: case AMDGPU::RAT_WRITE_CACHELESS_32_eg: case AMDGPU::RAT_WRITE_CACHELESS_128_eg: + case AMDGPU::RAT_STORE_DWORD_cm: DEBUG(dbgs() << CfCount << ":"; MI->dump();); CfCount++; break; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 9716fcf..86ddd00 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -235,14 +235,26 @@ def TEX_SHADOW_ARRAY : PatLeaf< }] >; -class EG_CF_RAT cfinst, bits <6> ratinst, bits<4> ratid, dag outs, +class EG_CF_RAT cfinst, bits <6> ratinst, bits<4> mask, dag outs, dag ins, string asm, list pattern> : InstR600ISA , CF_ALLOC_EXPORT_WORD0_RAT, CF_ALLOC_EXPORT_WORD1_BUF { - let cf_inst = cfinst; + let rat_id = 0; let rat_inst = ratinst; - let rat_id = ratid; + let rim = 0; + // XXX: Have a separate instruction for non-indexed writes. + let type = 1; + let rw_rel = 0; + let elem_size = 0; + + let array_size = 0; + let comp_mask = mask; + let burst_count = 0; + let vpm = 0; + let cf_inst = cfinst; + let mark = 0; + let barrier = 1; let Inst{31-0} = Word0; let Inst{63-32} = Word1; @@ -1210,6 +1222,33 @@ def : POW_Common ; def : SIN_PAT ; def : COS_PAT ; def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>; + +//===----------------------------------------------------------------------===// +// Memory read/write instructions +//===----------------------------------------------------------------------===// +let usesCustomInserter = 1 in { + +class RAT_WRITE_CACHELESS_eg mask, string name, + list pattern> + : EG_CF_RAT <0x57, 0x2, mask, (outs), ins, name, pattern> { +} + +} // End usesCustomInserter = 1 + +// 32-bit store +def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg < + (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), + 0x1, "RAT_WRITE_CACHELESS_32_eg $rw_gpr, $index_gpr, $eop", + [(global_store i32:$rw_gpr, i32:$index_gpr)] +>; + +//128-bit store +def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg < + (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), + 0xf, "RAT_WRITE_CACHELESS_128 $rw_gpr.XYZW, $index_gpr, $eop", + [(global_store v4i32:$rw_gpr, i32:$index_gpr)] +>; + } // End Predicates = [isEG] //===----------------------------------------------------------------------===// @@ -1367,40 +1406,6 @@ let hasSideEffects = 1 in { //===----------------------------------------------------------------------===// // Memory read/write instructions //===----------------------------------------------------------------------===// -let usesCustomInserter = 1 in { - -class RAT_WRITE_CACHELESS_eg mask, string name, - list pattern> - : EG_CF_RAT <0x57, 0x2, 0, (outs), ins, name, pattern> { - let rim = 0; - // XXX: Have a separate instruction for non-indexed writes. - let type = 1; - let rw_rel = 0; - let elem_size = 0; - - let array_size = 0; - let comp_mask = mask; - let burst_count = 0; - let vpm = 0; - let mark = 0; - let barrier = 1; -} - -} // End usesCustomInserter = 1 - -// 32-bit store -def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg < - (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), - 0x1, "RAT_WRITE_CACHELESS_32_eg $rw_gpr, $index_gpr, $eop", - [(global_store i32:$rw_gpr, i32:$index_gpr)] ->; - -//128-bit store -def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg < - (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), - 0xf, "RAT_WRITE_CACHELESS_128 $rw_gpr.XYZW, $index_gpr, $eop", - [(global_store v4i32:$rw_gpr, i32:$index_gpr)] ->; class VTX_READ_eg buffer_id, dag outs, list pattern> : InstR600ISA , @@ -1575,6 +1580,10 @@ def CONSTANT_LOAD_eg : VTX_READ_32_eg <1, defm R600_ : RegisterLoadStore ; +//===----------------------------------------------------------------------===// +// Cayman Instructions +//===----------------------------------------------------------------------===// + let Predicates = [isCayman] in { let isVector = 1 in { @@ -1616,6 +1625,16 @@ def : Pat < def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>; + +def RAT_STORE_DWORD_cm : EG_CF_RAT < + 0x57, 0x14, 0x1, (outs), + (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr), + "EXPORT_RAT_INST_STORE_DWORD $rw_gpr, $index_gpr", + [(global_store i32:$rw_gpr, i32:$index_gpr)] +> { + let eop = 0; // This bit is not used on Cayman. +} + } // End isCayman //===----------------------------------------------------------------------===// -- cgit v1.1 From d6055262d23b1a8f2b5c74ab94fc6c143aca1c45 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 14 Jun 2013 22:12:30 +0000 Subject: R600: Use correct encoding for Vertex Fetch instructions on Cayman Reviewed-by: Vincent Lejeune git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184016 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 4 +- lib/Target/R600/R600InstrFormats.td | 28 +- lib/Target/R600/R600Instructions.td | 418 +++++++++++++-------- 3 files changed, 294 insertions(+), 156 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp index 4d6c25c..f470783 100644 --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp @@ -99,7 +99,9 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, } else if (IS_VTX(Desc)) { uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups); uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset - InstWord2 |= 1 << 19; + if (!(STI.getFeatureBits() & AMDGPU::FeatureCaymanISA)) { + InstWord2 |= 1 << 19; // Mega-Fetch bit + } Emit(InstWord01, OS); Emit(InstWord2, OS); diff --git a/lib/Target/R600/R600InstrFormats.td b/lib/Target/R600/R600InstrFormats.td index 618004f..d31f18c 100644 --- a/lib/Target/R600/R600InstrFormats.td +++ b/lib/Target/R600/R600InstrFormats.td @@ -166,28 +166,46 @@ class R600ALU_Word1_OP2_r700 : R600ALU_Word1_OP2 { class VTX_WORD0 { field bits<32> Word0; - bits<7> SRC_GPR; + bits<7> src_gpr; bits<5> VC_INST; bits<2> FETCH_TYPE; bits<1> FETCH_WHOLE_QUAD; bits<8> BUFFER_ID; bits<1> SRC_REL; bits<2> SRC_SEL_X; - bits<6> MEGA_FETCH_COUNT; let Word0{4-0} = VC_INST; let Word0{6-5} = FETCH_TYPE; let Word0{7} = FETCH_WHOLE_QUAD; let Word0{15-8} = BUFFER_ID; - let Word0{22-16} = SRC_GPR; + let Word0{22-16} = src_gpr; let Word0{23} = SRC_REL; let Word0{25-24} = SRC_SEL_X; +} + +class VTX_WORD0_eg : VTX_WORD0 { + + bits<6> MEGA_FETCH_COUNT; + let Word0{31-26} = MEGA_FETCH_COUNT; } +class VTX_WORD0_cm : VTX_WORD0 { + + bits<2> SRC_SEL_Y; + bits<2> STRUCTURED_READ; + bits<1> LDS_REQ; + bits<1> COALESCED_READ; + + let Word0{27-26} = SRC_SEL_Y; + let Word0{29-28} = STRUCTURED_READ; + let Word0{30} = LDS_REQ; + let Word0{31} = COALESCED_READ; +} + class VTX_WORD1_GPR { field bits<32> Word1; - bits<7> DST_GPR; + bits<7> dst_gpr; bits<1> DST_REL; bits<3> DST_SEL_X; bits<3> DST_SEL_Y; @@ -199,7 +217,7 @@ class VTX_WORD1_GPR { bits<1> FORMAT_COMP_ALL; bits<1> SRF_MODE_ALL; - let Word1{6-0} = DST_GPR; + let Word1{6-0} = dst_gpr; let Word1{7} = DST_REL; let Word1{8} = 0; // Reserved let Word1{11-9} = DST_SEL_X; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 86ddd00..27b0214 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -261,6 +261,50 @@ class EG_CF_RAT cfinst, bits <6> ratinst, bits<4> mask, dag outs, } +class VTX_READ buffer_id, dag outs, list pattern> + : InstR600ISA , + VTX_WORD1_GPR { + + // Static fields + let DST_REL = 0; + // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL, + // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored, + // however, based on my testing if USE_CONST_FIELDS is set, then all + // these fields need to be set to 0. + let USE_CONST_FIELDS = 0; + let NUM_FORMAT_ALL = 1; + let FORMAT_COMP_ALL = 0; + let SRF_MODE_ALL = 0; + + let Inst{63-32} = Word1; + // LLVM can only encode 64-bit instructions, so these fields are manually + // encoded in R600CodeEmitter + // + // bits<16> OFFSET; + // bits<2> ENDIAN_SWAP = 0; + // bits<1> CONST_BUF_NO_STRIDE = 0; + // bits<1> MEGA_FETCH = 0; + // bits<1> ALT_CONST = 0; + // bits<2> BUFFER_INDEX_MODE = 0; + + // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding + // is done in R600CodeEmitter + // + // Inst{79-64} = OFFSET; + // Inst{81-80} = ENDIAN_SWAP; + // Inst{82} = CONST_BUF_NO_STRIDE; + // Inst{83} = MEGA_FETCH; + // Inst{84} = ALT_CONST; + // Inst{86-85} = BUFFER_INDEX_MODE; + // Inst{95-86} = 0; Reserved + + // VTX_WORD3 (Padding) + // + // Inst{127-96} = 0; + + let VTXInst = 1; +} + class LoadParamFrag : PatFrag < (ops node:$ptr), (load_type node:$ptr), [{ return isParamLoad(dyn_cast(N)); }] @@ -1249,6 +1293,133 @@ def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg < [(global_store v4i32:$rw_gpr, i32:$index_gpr)] >; +class VTX_READ_eg buffer_id, dag outs, list pattern> + : VTX_WORD0_eg, VTX_READ { + + // Static fields + let VC_INST = 0; + let FETCH_TYPE = 2; + let FETCH_WHOLE_QUAD = 0; + let BUFFER_ID = buffer_id; + let SRC_REL = 0; + // XXX: We can infer this field based on the SRC_GPR. This would allow us + // to store vertex addresses in any channel, not just X. + let SRC_SEL_X = 0; + + let Inst{31-0} = Word0; +} + +class VTX_READ_8_eg buffer_id, list pattern> + : VTX_READ_eg <"VTX_READ_8 $dst_gpr, $src_gpr", buffer_id, + (outs R600_TReg32_X:$dst_gpr), pattern> { + + let MEGA_FETCH_COUNT = 1; + let DST_SEL_X = 0; + let DST_SEL_Y = 7; // Masked + let DST_SEL_Z = 7; // Masked + let DST_SEL_W = 7; // Masked + let DATA_FORMAT = 1; // FMT_8 +} + +class VTX_READ_16_eg buffer_id, list pattern> + : VTX_READ_eg <"VTX_READ_16 $dst_gpr, $src_gpr", buffer_id, + (outs R600_TReg32_X:$dst_gpr), pattern> { + let MEGA_FETCH_COUNT = 2; + let DST_SEL_X = 0; + let DST_SEL_Y = 7; // Masked + let DST_SEL_Z = 7; // Masked + let DST_SEL_W = 7; // Masked + let DATA_FORMAT = 5; // FMT_16 + +} + +class VTX_READ_32_eg buffer_id, list pattern> + : VTX_READ_eg <"VTX_READ_32 $dst_gpr, $src_gpr", buffer_id, + (outs R600_TReg32_X:$dst_gpr), pattern> { + + let MEGA_FETCH_COUNT = 4; + let DST_SEL_X = 0; + let DST_SEL_Y = 7; // Masked + let DST_SEL_Z = 7; // Masked + let DST_SEL_W = 7; // Masked + let DATA_FORMAT = 0xD; // COLOR_32 + + // This is not really necessary, but there were some GPU hangs that appeared + // to be caused by ALU instructions in the next instruction group that wrote + // to the $src_gpr registers of the VTX_READ. + // e.g. + // %T3_X = VTX_READ_PARAM_32_eg %T2_X, 24 + // %T2_X = MOV %ZERO + //Adding this constraint prevents this from happening. + let Constraints = "$src_gpr.ptr = $dst_gpr"; +} + +class VTX_READ_128_eg buffer_id, list pattern> + : VTX_READ_eg <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr", buffer_id, + (outs R600_Reg128:$dst_gpr), pattern> { + + let MEGA_FETCH_COUNT = 16; + let DST_SEL_X = 0; + let DST_SEL_Y = 1; + let DST_SEL_Z = 2; + let DST_SEL_W = 3; + let DATA_FORMAT = 0x22; // COLOR_32_32_32_32 + + // XXX: Need to force VTX_READ_128 instructions to write to the same register + // that holds its buffer address to avoid potential hangs. We can't use + // the same constraint as VTX_READ_32_eg, because the $src_gpr.ptr and $dst + // registers are different sizes. +} + +//===----------------------------------------------------------------------===// +// VTX Read from parameter memory space +//===----------------------------------------------------------------------===// + +def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0, + [(set i32:$dst_gpr, (load_param_zexti8 ADDRVTX_READ:$src_gpr))] +>; + +def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0, + [(set i32:$dst_gpr, (load_param_zexti16 ADDRVTX_READ:$src_gpr))] +>; + +def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0, + [(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] +>; + +def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0, + [(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] +>; + +//===----------------------------------------------------------------------===// +// VTX Read from global memory space +//===----------------------------------------------------------------------===// + +// 8-bit reads +def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1, + [(set i32:$dst_gpr, (zextloadi8_global ADDRVTX_READ:$src_gpr))] +>; + +// 32-bit reads +def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1, + [(set i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] +>; + +// 128-bit reads +def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1, + [(set v4i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] +>; + +//===----------------------------------------------------------------------===// +// Constant Loads +// XXX: We are currently storing all constants in the global address space. +//===----------------------------------------------------------------------===// + +def CONSTANT_LOAD_eg : VTX_READ_32_eg <1, + [(set i32:$dst_gpr, (constant_load ADDRVTX_READ:$src_gpr))] +>; + + } // End Predicates = [isEG] //===----------------------------------------------------------------------===// @@ -1403,13 +1574,71 @@ let hasSideEffects = 1 in { let END_OF_PROGRAM = 1; } +} // End Predicates = [isEGorCayman] + //===----------------------------------------------------------------------===// -// Memory read/write instructions +// Regist loads and stores - for indirect addressing //===----------------------------------------------------------------------===// -class VTX_READ_eg buffer_id, dag outs, list pattern> - : InstR600ISA , - VTX_WORD1_GPR, VTX_WORD0 { +defm R600_ : RegisterLoadStore ; + +//===----------------------------------------------------------------------===// +// Cayman Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [isCayman] in { + +let isVector = 1 in { + +def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>; + +def MULLO_INT_cm : MULLO_INT_Common<0x8F>; +def MULHI_INT_cm : MULHI_INT_Common<0x90>; +def MULLO_UINT_cm : MULLO_UINT_Common<0x91>; +def MULHI_UINT_cm : MULHI_UINT_Common<0x92>; +def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>; +def EXP_IEEE_cm : EXP_IEEE_Common<0x81>; +def LOG_IEEE_cm : LOG_IEEE_Common<0x83>; +def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>; +def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>; +def SIN_cm : SIN_Common<0x8D>; +def COS_cm : COS_Common<0x8E>; +} // End isVector = 1 + +def : POW_Common ; +def : SIN_PAT ; +def : COS_PAT ; + +defm DIV_cm : DIV_Common; + +// RECIP_UINT emulation for Cayman +// The multiplication scales from [0,1] to the unsigned integer range +def : Pat < + (AMDGPUurecip i32:$src0), + (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg $src0)), + (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1))) +>; + + def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> { + let ADDR = 0; + let POP_COUNT = 0; + let COUNT = 0; + } + +def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>; + + +def RAT_STORE_DWORD_cm : EG_CF_RAT < + 0x57, 0x14, 0x1, (outs), + (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr), + "EXPORT_RAT_INST_STORE_DWORD $rw_gpr, $index_gpr", + [(global_store i32:$rw_gpr, i32:$index_gpr)] +> { + let eop = 0; // This bit is not used on Cayman. +} + +class VTX_READ_cm buffer_id, dag outs, list pattern> + : VTX_WORD0_cm, VTX_READ { // Static fields let VC_INST = 0; @@ -1420,53 +1649,18 @@ class VTX_READ_eg buffer_id, dag outs, list pattern> // XXX: We can infer this field based on the SRC_GPR. This would allow us // to store vertex addresses in any channel, not just X. let SRC_SEL_X = 0; - let DST_REL = 0; - // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL, - // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored, - // however, based on my testing if USE_CONST_FIELDS is set, then all - // these fields need to be set to 0. - let USE_CONST_FIELDS = 0; - let NUM_FORMAT_ALL = 1; - let FORMAT_COMP_ALL = 0; - let SRF_MODE_ALL = 0; + let SRC_SEL_Y = 0; + let STRUCTURED_READ = 0; + let LDS_REQ = 0; + let COALESCED_READ = 0; let Inst{31-0} = Word0; - let Inst{63-32} = Word1; - // LLVM can only encode 64-bit instructions, so these fields are manually - // encoded in R600CodeEmitter - // - // bits<16> OFFSET; - // bits<2> ENDIAN_SWAP = 0; - // bits<1> CONST_BUF_NO_STRIDE = 0; - // bits<1> MEGA_FETCH = 0; - // bits<1> ALT_CONST = 0; - // bits<2> BUFFER_INDEX_MODE = 0; - - - - // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding - // is done in R600CodeEmitter - // - // Inst{79-64} = OFFSET; - // Inst{81-80} = ENDIAN_SWAP; - // Inst{82} = CONST_BUF_NO_STRIDE; - // Inst{83} = MEGA_FETCH; - // Inst{84} = ALT_CONST; - // Inst{86-85} = BUFFER_INDEX_MODE; - // Inst{95-86} = 0; Reserved - - // VTX_WORD3 (Padding) - // - // Inst{127-96} = 0; - - let VTXInst = 1; } -class VTX_READ_8_eg buffer_id, list pattern> - : VTX_READ_eg <"VTX_READ_8 $dst, $ptr", buffer_id, (outs R600_TReg32_X:$dst), - pattern> { +class VTX_READ_8_cm buffer_id, list pattern> + : VTX_READ_cm <"VTX_READ_8 $dst_gpr, $src_gpr", buffer_id, + (outs R600_TReg32_X:$dst_gpr), pattern> { - let MEGA_FETCH_COUNT = 1; let DST_SEL_X = 0; let DST_SEL_Y = 7; // Masked let DST_SEL_Z = 7; // Masked @@ -1474,10 +1668,9 @@ class VTX_READ_8_eg buffer_id, list pattern> let DATA_FORMAT = 1; // FMT_8 } -class VTX_READ_16_eg buffer_id, list pattern> - : VTX_READ_eg <"VTX_READ_16 $dst, $ptr", buffer_id, (outs R600_TReg32_X:$dst), - pattern> { - let MEGA_FETCH_COUNT = 2; +class VTX_READ_16_cm buffer_id, list pattern> + : VTX_READ_cm <"VTX_READ_16 $dst_gpr, $src_gpr", buffer_id, + (outs R600_TReg32_X:$dst_gpr), pattern> { let DST_SEL_X = 0; let DST_SEL_Y = 7; // Masked let DST_SEL_Z = 7; // Masked @@ -1486,11 +1679,10 @@ class VTX_READ_16_eg buffer_id, list pattern> } -class VTX_READ_32_eg buffer_id, list pattern> - : VTX_READ_eg <"VTX_READ_32 $dst, $ptr", buffer_id, (outs R600_TReg32_X:$dst), - pattern> { +class VTX_READ_32_cm buffer_id, list pattern> + : VTX_READ_cm <"VTX_READ_32 $dst_gpr, $src_gpr", buffer_id, + (outs R600_TReg32_X:$dst_gpr), pattern> { - let MEGA_FETCH_COUNT = 4; let DST_SEL_X = 0; let DST_SEL_Y = 7; // Masked let DST_SEL_Z = 7; // Masked @@ -1499,19 +1691,18 @@ class VTX_READ_32_eg buffer_id, list pattern> // This is not really necessary, but there were some GPU hangs that appeared // to be caused by ALU instructions in the next instruction group that wrote - // to the $ptr registers of the VTX_READ. + // to the $src_gpr registers of the VTX_READ. // e.g. // %T3_X = VTX_READ_PARAM_32_eg %T2_X, 24 // %T2_X = MOV %ZERO //Adding this constraint prevents this from happening. - let Constraints = "$ptr.ptr = $dst"; + let Constraints = "$src_gpr.ptr = $dst_gpr"; } -class VTX_READ_128_eg buffer_id, list pattern> - : VTX_READ_eg <"VTX_READ_128 $dst.XYZW, $ptr", buffer_id, (outs R600_Reg128:$dst), - pattern> { +class VTX_READ_128_cm buffer_id, list pattern> + : VTX_READ_cm <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr", buffer_id, + (outs R600_Reg128:$dst_gpr), pattern> { - let MEGA_FETCH_COUNT = 16; let DST_SEL_X = 0; let DST_SEL_Y = 1; let DST_SEL_Z = 2; @@ -1520,28 +1711,27 @@ class VTX_READ_128_eg buffer_id, list pattern> // XXX: Need to force VTX_READ_128 instructions to write to the same register // that holds its buffer address to avoid potential hangs. We can't use - // the same constraint as VTX_READ_32_eg, because the $ptr.ptr and $dst + // the same constraint as VTX_READ_32_eg, because the $src_gpr.ptr and $dst // registers are different sizes. } //===----------------------------------------------------------------------===// // VTX Read from parameter memory space //===----------------------------------------------------------------------===// - -def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0, - [(set i32:$dst, (load_param_zexti8 ADDRVTX_READ:$ptr))] +def VTX_READ_PARAM_8_cm : VTX_READ_8_cm <0, + [(set i32:$dst_gpr, (load_param_zexti8 ADDRVTX_READ:$src_gpr))] >; -def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0, - [(set i32:$dst, (load_param_zexti16 ADDRVTX_READ:$ptr))] +def VTX_READ_PARAM_16_cm : VTX_READ_16_cm <0, + [(set i32:$dst_gpr, (load_param_zexti16 ADDRVTX_READ:$src_gpr))] >; -def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0, - [(set i32:$dst, (load_param ADDRVTX_READ:$ptr))] +def VTX_READ_PARAM_32_cm : VTX_READ_32_cm <0, + [(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] >; -def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0, - [(set v4i32:$dst, (load_param ADDRVTX_READ:$ptr))] +def VTX_READ_PARAM_128_cm : VTX_READ_128_cm <0, + [(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] >; //===----------------------------------------------------------------------===// @@ -1549,92 +1739,20 @@ def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0, //===----------------------------------------------------------------------===// // 8-bit reads -def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1, - [(set i32:$dst, (zextloadi8_global ADDRVTX_READ:$ptr))] +def VTX_READ_GLOBAL_8_cm : VTX_READ_8_cm <1, + [(set i32:$dst_gpr, (zextloadi8_global ADDRVTX_READ:$src_gpr))] >; // 32-bit reads -def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1, - [(set i32:$dst, (global_load ADDRVTX_READ:$ptr))] +def VTX_READ_GLOBAL_32_cm : VTX_READ_32_cm <1, + [(set i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] >; // 128-bit reads -def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1, - [(set v4i32:$dst, (global_load ADDRVTX_READ:$ptr))] ->; - -//===----------------------------------------------------------------------===// -// Constant Loads -// XXX: We are currently storing all constants in the global address space. -//===----------------------------------------------------------------------===// - -def CONSTANT_LOAD_eg : VTX_READ_32_eg <1, - [(set i32:$dst, (constant_load ADDRVTX_READ:$ptr))] +def VTX_READ_GLOBAL_128_cm : VTX_READ_128_cm <1, + [(set v4i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] >; -} - -//===----------------------------------------------------------------------===// -// Regist loads and stores - for indirect addressing -//===----------------------------------------------------------------------===// - -defm R600_ : RegisterLoadStore ; - -//===----------------------------------------------------------------------===// -// Cayman Instructions -//===----------------------------------------------------------------------===// - -let Predicates = [isCayman] in { - -let isVector = 1 in { - -def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>; - -def MULLO_INT_cm : MULLO_INT_Common<0x8F>; -def MULHI_INT_cm : MULHI_INT_Common<0x90>; -def MULLO_UINT_cm : MULLO_UINT_Common<0x91>; -def MULHI_UINT_cm : MULHI_UINT_Common<0x92>; -def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>; -def EXP_IEEE_cm : EXP_IEEE_Common<0x81>; -def LOG_IEEE_cm : LOG_IEEE_Common<0x83>; -def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>; -def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>; -def SIN_cm : SIN_Common<0x8D>; -def COS_cm : COS_Common<0x8E>; -} // End isVector = 1 - -def : POW_Common ; -def : SIN_PAT ; -def : COS_PAT ; - -defm DIV_cm : DIV_Common; - -// RECIP_UINT emulation for Cayman -// The multiplication scales from [0,1] to the unsigned integer range -def : Pat < - (AMDGPUurecip i32:$src0), - (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg $src0)), - (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1))) ->; - - def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> { - let ADDR = 0; - let POP_COUNT = 0; - let COUNT = 0; - } - -def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>; - - -def RAT_STORE_DWORD_cm : EG_CF_RAT < - 0x57, 0x14, 0x1, (outs), - (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr), - "EXPORT_RAT_INST_STORE_DWORD $rw_gpr, $index_gpr", - [(global_store i32:$rw_gpr, i32:$index_gpr)] -> { - let eop = 0; // This bit is not used on Cayman. -} - } // End isCayman //===----------------------------------------------------------------------===// @@ -1755,7 +1873,7 @@ def CONST_COPY : Instruction { def TEX_VTX_CONSTBUF : InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr", [(set v4i32:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>, - VTX_WORD1_GPR, VTX_WORD0 { + VTX_WORD1_GPR, VTX_WORD0_eg { let VC_INST = 0; let FETCH_TYPE = 2; @@ -1809,7 +1927,7 @@ def TEX_VTX_CONSTBUF : def TEX_VTX_TEXBUF: InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr", [(set v4f32:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>, -VTX_WORD1_GPR, VTX_WORD0 { +VTX_WORD1_GPR, VTX_WORD0_eg { let VC_INST = 0; let FETCH_TYPE = 2; -- cgit v1.1 From 5aee09da12a1ba8553792d9017785e51199e38c2 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Sat, 15 Jun 2013 00:09:31 +0000 Subject: R600: Add SI load support for v[24]i32 and store for v2i32 Also add a seperate vector lit test file, since r600 doesn't seem to handle v2i32 load/store yet, but we can test both for SI. Patch by: Aaron Watry Reviewed-by: Tom Stellard Signed-off-by: Aaron Watry git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184021 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstructions.td | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index e8ed2dd..9c96c08 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1638,6 +1638,10 @@ defm : MUBUFLoad_Pattern ; defm : MUBUFLoad_Pattern ; +defm : MUBUFLoad_Pattern ; +defm : MUBUFLoad_Pattern ; multiclass MUBUFStore_Pattern { @@ -1654,6 +1658,7 @@ multiclass MUBUFStore_Pattern { defm : MUBUFStore_Pattern ; defm : MUBUFStore_Pattern ; +defm : MUBUFStore_Pattern ; defm : MUBUFStore_Pattern ; /********** ====================== **********/ -- cgit v1.1 From b86a0cdb674549d8493043331cecd9cbf53b80da Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Sat, 15 Jun 2013 04:49:57 +0000 Subject: Machine Model: Add MicroOpBufferSize and resource BufferSize. Replace the ill-defined MinLatency and ILPWindow properties with with straightforward buffer sizes: MCSchedMode::MicroOpBufferSize MCProcResourceDesc::BufferSize These can be used to more precisely model instruction execution if desired. Disabled some misched tests temporarily. They'll be reenabled in a few commits. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184032 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseInstrInfo.cpp | 3 +-- lib/Target/ARM/ARMScheduleA9.td | 5 +---- lib/Target/Hexagon/HexagonMachineScheduler.cpp | 4 ++-- lib/Target/X86/X86SchedHaswell.td | 1 - lib/Target/X86/X86SchedSandyBridge.td | 1 - lib/Target/X86/X86Schedule.td | 5 ----- lib/Target/X86/X86ScheduleAtom.td | 1 - 7 files changed, 4 insertions(+), 16 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index ad14475..496bcb2 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -3684,8 +3684,7 @@ hasHighOperandLatency(const InstrItineraryData *ItinData, return true; // Hoist VFP / NEON instructions with 4 or higher latency. - int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx, - /*FindMin=*/false); + int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); if (Latency < 0) Latency = getInstrLatency(ItinData, DefMI); if (Latency <= 3) diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index d06ad7d..ce49857 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -1887,9 +1887,6 @@ def CortexA9Model : SchedMachineModel { let LoadLatency = 2; // Optimistic load latency assuming bypass. // This is overriden by OperandCycles if the // Itineraries are queried instead. - let ILPWindow = 10; // Don't reschedule small blocks to hide - // latency. Minimum latency requirements are already - // modeled strictly by reserving resources. let MispredictPenalty = 8; // Based on estimate of pipeline depth. let Itineraries = CortexA9Itineraries; @@ -1904,7 +1901,7 @@ def A9UnitALU : ProcResource<2>; def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; } def A9UnitAGU : ProcResource<1>; def A9UnitLS : ProcResource<1>; -def A9UnitFP : ProcResource<1> { let Buffered = 0; } +def A9UnitFP : ProcResource<1>; def A9UnitB : ProcResource<1>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp index 6e966ec..b73e585 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -222,7 +222,7 @@ void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) { for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; - unsigned MinLatency = I->getMinLatency(); + unsigned MinLatency = I->getLatency(); #ifndef NDEBUG Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); #endif @@ -241,7 +241,7 @@ void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) { for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; - unsigned MinLatency = I->getMinLatency(); + unsigned MinLatency = I->getLatency(); #ifndef NDEBUG Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); #endif diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td index 84c9203..49e81a7 100644 --- a/lib/Target/X86/X86SchedHaswell.td +++ b/lib/Target/X86/X86SchedHaswell.td @@ -18,7 +18,6 @@ def HaswellModel : SchedMachineModel { let IssueWidth = 4; let MinLatency = 0; // 0 = Out-of-order execution. let LoadLatency = 4; - let ILPWindow = 30; let MispredictPenalty = 16; } diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td index b36b3ad..c5fa521 100644 --- a/lib/Target/X86/X86SchedSandyBridge.td +++ b/lib/Target/X86/X86SchedSandyBridge.td @@ -19,7 +19,6 @@ def SandyBridgeModel : SchedMachineModel { let IssueWidth = 4; let MinLatency = 0; // 0 = Out-of-order execution. let LoadLatency = 4; - let ILPWindow = 20; let MispredictPenalty = 16; } diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index 9f2c781..c32d12b 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -559,17 +559,12 @@ def IIC_NOP : InstrItinClass; // latencies. Since these latencies are not used for pipeline hazards, // they do not need to be exact. // -// ILPWindow=10 is an arbitrary threshold that approximates cycles of -// latency hidden by instruction buffers. The actual value is not very -// important but should be zero for inorder and nonzero for OOO processors. -// // The GenericModel contains no instruciton itineraries. def GenericModel : SchedMachineModel { let IssueWidth = 4; let MinLatency = 0; let LoadLatency = 4; let HighLatency = 10; - let ILPWindow = 10; } include "X86ScheduleAtom.td" diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td index cb0960a..494a690 100644 --- a/lib/Target/X86/X86ScheduleAtom.td +++ b/lib/Target/X86/X86ScheduleAtom.td @@ -529,7 +529,6 @@ def AtomModel : SchedMachineModel { // OperandCycles may be used for expected latency. let LoadLatency = 3; // Expected cycles, may be overriden by OperandCycles. let HighLatency = 30;// Expected, may be overriden by OperandCycles. - let ILPWindow = 0; // Always try to hide expected latency. let Itineraries = AtomItineraries; } -- cgit v1.1 From a5ce5f36d3a1e312304e8312ca64a1342f5f55a6 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Sat, 15 Jun 2013 04:50:02 +0000 Subject: Update machine models. Specify buffer sizes for OOO processors. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184033 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMScheduleA9.td | 4 ++-- lib/Target/ARM/ARMScheduleSwift.td | 2 +- lib/Target/X86/X86SchedHaswell.td | 2 +- lib/Target/X86/X86SchedSandyBridge.td | 2 +- lib/Target/X86/X86Schedule.td | 7 ++++--- lib/Target/X86/X86ScheduleAtom.td | 3 +-- 6 files changed, 10 insertions(+), 10 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index ce49857..74ee50b 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -1883,7 +1883,7 @@ def CortexA9Itineraries : ProcessorItineraries< // Cortex-A9 machine model for scheduling and other instruction cost heuristics. def CortexA9Model : SchedMachineModel { let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. - let MinLatency = 0; // Data dependencies are allowed within dispatch groups. + let MicroOpBufferSize = 56; // Based on available renamed registers. let LoadLatency = 2; // Optimistic load latency assuming bypass. // This is overriden by OperandCycles if the // Itineraries are queried instead. @@ -1901,7 +1901,7 @@ def A9UnitALU : ProcResource<2>; def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; } def A9UnitAGU : ProcResource<1>; def A9UnitLS : ProcResource<1>; -def A9UnitFP : ProcResource<1>; +def A9UnitFP : ProcResource<1> { let BufferSize = 0; } def A9UnitB : ProcResource<1>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td index b5cf251..2a41616 100644 --- a/lib/Target/ARM/ARMScheduleSwift.td +++ b/lib/Target/ARM/ARMScheduleSwift.td @@ -1076,7 +1076,7 @@ def SwiftItineraries : ProcessorItineraries< // Swift machine model for scheduling and other instruction cost heuristics. def SwiftModel : SchedMachineModel { let IssueWidth = 3; // 3 micro-ops are dispatched per cycle. - let MinLatency = 0; // Data dependencies are allowed within dispatch groups. + let MicroOpBufferSize = 45; // Based on NEON renamed registers. let LoadLatency = 3; let MispredictPenalty = 14; // A branch direction mispredict. diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td index 49e81a7..f98d0cc 100644 --- a/lib/Target/X86/X86SchedHaswell.td +++ b/lib/Target/X86/X86SchedHaswell.td @@ -16,7 +16,7 @@ def HaswellModel : SchedMachineModel { // All x86 instructions are modeled as a single micro-op, and HW can decode 4 // instructions per cycle. let IssueWidth = 4; - let MinLatency = 0; // 0 = Out-of-order execution. + let MicroOpBufferSize = 192; // Based on the reorder buffer. let LoadLatency = 4; let MispredictPenalty = 16; } diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td index c5fa521..ecfd3db 100644 --- a/lib/Target/X86/X86SchedSandyBridge.td +++ b/lib/Target/X86/X86SchedSandyBridge.td @@ -17,7 +17,7 @@ def SandyBridgeModel : SchedMachineModel { // instructions per cycle. // FIXME: Identify instructions that aren't a single fused micro-op. let IssueWidth = 4; - let MinLatency = 0; // 0 = Out-of-order execution. + let MicroOpBufferSize = 168; // Based on the reorder buffer. let LoadLatency = 4; let MispredictPenalty = 16; } diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index c32d12b..625a05c 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -550,8 +550,9 @@ def IIC_NOP : InstrItinClass; // Resources beyond the decoder operate on micro-ops and are bufferred // so adjacent micro-ops don't directly compete. // -// MinLatency=0 indicates that RAW dependencies can be decoded in the -// same cycle. +// MicroOpBufferSize > 1 indicates that RAW dependencies can be +// decoded in the same cycle. The value 32 is a reasonably arbitrary +// number of in-flight instructions. // // HighLatency=10 is optimistic. X86InstrInfo::isHighLatencyDef // indicates high latency opcodes. Alternatively, InstrItinData @@ -562,7 +563,7 @@ def IIC_NOP : InstrItinClass; // The GenericModel contains no instruciton itineraries. def GenericModel : SchedMachineModel { let IssueWidth = 4; - let MinLatency = 0; + let MicroOpBufferSize = 32; let LoadLatency = 4; let HighLatency = 10; } diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td index 494a690..14a1471 100644 --- a/lib/Target/X86/X86ScheduleAtom.td +++ b/lib/Target/X86/X86ScheduleAtom.td @@ -525,8 +525,7 @@ def AtomItineraries : ProcessorItineraries< // Atom machine model. def AtomModel : SchedMachineModel { let IssueWidth = 2; // Allows 2 instructions per scheduling group. - let MinLatency = 1; // InstrStage cycles overrides MinLatency. - // OperandCycles may be used for expected latency. + let MicroOpBufferSize = 0; // In-order execution, always hide latency. let LoadLatency = 3; // Expected cycles, may be overriden by OperandCycles. let HighLatency = 30;// Expected, may be overriden by OperandCycles. -- cgit v1.1 From a3d82ce19fd825cbf3bf85b5969424217fc40b45 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Sat, 15 Jun 2013 04:50:06 +0000 Subject: Support BufferSize on ProcResGroup for unified MOp schedulers. And add Sandybridge/Haswell resource buffers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184034 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86SchedHaswell.td | 6 ++++++ lib/Target/X86/X86SchedSandyBridge.td | 5 +++++ 2 files changed, 11 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td index f98d0cc..6770f0a 100644 --- a/lib/Target/X86/X86SchedHaswell.td +++ b/lib/Target/X86/X86SchedHaswell.td @@ -49,6 +49,12 @@ def HWPort15 : ProcResGroup<[HWPort1, HWPort5]>; def HWPort015 : ProcResGroup<[HWPort0, HWPort1, HWPort5]>; def HWPort0156: ProcResGroup<[HWPort0, HWPort1, HWPort5, HWPort6]>; +// 60 Entry Unified Scheduler +def HWPortAny : ProcResGroup<[HWPort0, HWPort1, HWPort2, HWPort3, HWPort4, + HWPort5, HWPort6, HWPort7]> { + let BufferSize=60; +} + // Integer division issued on port 0. def HWDivider : ProcResource<1>; diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td index ecfd3db..e03de14 100644 --- a/lib/Target/X86/X86SchedSandyBridge.td +++ b/lib/Target/X86/X86SchedSandyBridge.td @@ -45,6 +45,11 @@ def SBPort05 : ProcResGroup<[SBPort0, SBPort5]>; def SBPort15 : ProcResGroup<[SBPort1, SBPort5]>; def SBPort015 : ProcResGroup<[SBPort0, SBPort1, SBPort5]>; +// 54 Entry Unified Scheduler +def SBPortAny : ProcResGroup<[SBPort0, SBPort1, SBPort23, SBPort4, SBPort5]> { + let BufferSize=54; +} + // Integer division issued on port 0. def SBDivider : ProcResource<1>; -- cgit v1.1 From 6d9dbd5526e3161db884fc4fe99c278bb59ccc19 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Sun, 16 Jun 2013 20:34:15 +0000 Subject: Debug Info: Simplify Frame Index handling in DBG_VALUE Machine Instructions Rather than using the full power of target-specific addressing modes in DBG_VALUEs with Frame Indicies, simply use Frame Index + Offset. This reduces the complexity of debug info handling down to two representations of values (reg+offset and frame index+offset) rather than three or four. Ideally we could ensure that frame indicies had been eliminated by the time we reached an assembly or dwarf generation, but I haven't spent the time to figure out where the FIs are leaking through into that & whether there's a good place to convert them. Some FI+offset=>reg+offset conversion is done (see PrologEpilogInserter, for example) which is necessary for some SelectionDAG assumptions about registers, I believe, but it might be possible to make this a more thorough conversion & ensure there are no remaining FIs no matter how instruction selection is performed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184066 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseRegisterInfo.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 7c03055..f8ecc60 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -702,12 +702,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } #endif // NDEBUG - // Special handling of dbg_value instructions. - if (MI.isDebugValue()) { - MI.getOperand(FIOperandNum). ChangeToRegister(FrameReg, false /*isDef*/); - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); - return; - } + assert(!MI.isDebugValue() && "DBG_VALUEs should be handled in target-independent code"); // Modify MI as necessary to handle as much of 'Offset' as possible bool Done = false; -- cgit v1.1 From 0187e7a9ba5c50b4559e0c2e0afceb6d5cd32190 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Sun, 16 Jun 2013 20:34:27 +0000 Subject: DebugInfo: remove target-specific Frame Index handling for DBG_VALUE MachineInstrs Frame index handling is now target-agnostic, so delete the target hooks for creation & asm printing of target-specific addressing in DBG_VALUEs and any related functions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184067 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64AsmPrinter.cpp | 40 ------------------------------ lib/Target/AArch64/AArch64AsmPrinter.h | 4 --- lib/Target/AArch64/AArch64InstrInfo.cpp | 11 --------- lib/Target/AArch64/AArch64InstrInfo.h | 4 --- lib/Target/ARM/ARMAsmPrinter.cpp | 40 +----------------------------- lib/Target/ARM/ARMAsmPrinter.h | 5 ---- lib/Target/ARM/ARMBaseInstrInfo.cpp | 10 -------- lib/Target/ARM/ARMBaseInstrInfo.h | 6 ----- lib/Target/Hexagon/HexagonInstrInfo.cpp | 10 -------- lib/Target/Hexagon/HexagonInstrInfo.h | 5 ---- lib/Target/Mips/MipsAsmPrinter.cpp | 10 -------- lib/Target/Mips/MipsAsmPrinter.h | 1 - lib/Target/Mips/MipsInstrInfo.cpp | 9 ------- lib/Target/Mips/MipsInstrInfo.h | 5 ---- lib/Target/PowerPC/PPCAsmPrinter.cpp | 36 ++------------------------- lib/Target/PowerPC/PPCInstrInfo.cpp | 10 -------- lib/Target/PowerPC/PPCInstrInfo.h | 6 ----- lib/Target/PowerPC/PPCRegisterInfo.cpp | 6 ++--- lib/Target/Sparc/SparcAsmPrinter.cpp | 11 --------- lib/Target/Sparc/SparcInstrInfo.cpp | 12 --------- lib/Target/Sparc/SparcInstrInfo.h | 8 ------ lib/Target/X86/X86AsmPrinter.cpp | 42 -------------------------------- lib/Target/X86/X86AsmPrinter.h | 5 ---- lib/Target/X86/X86InstrInfo.cpp | 13 ---------- lib/Target/X86/X86InstrInfo.h | 6 ----- lib/Target/X86/X86MCInstLower.cpp | 8 +----- lib/Target/XCore/XCoreAsmPrinter.cpp | 40 ++---------------------------- lib/Target/XCore/XCoreInstrInfo.cpp | 9 ------- lib/Target/XCore/XCoreInstrInfo.h | 6 ----- 29 files changed, 9 insertions(+), 369 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp index 47ebb82..03d99c6 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -27,16 +27,6 @@ using namespace llvm; -MachineLocation -AArch64AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { - // See emitFrameIndexDebugValue in InstrInfo for where this instruction is - // expected to be created. - assert(MI->getNumOperands() == 4 && MI->getOperand(0).isReg() - && MI->getOperand(1).isImm() && "unexpected custom DBG_VALUE"); - return MachineLocation(MI->getOperand(0).getReg(), - MI->getOperand(1).getImm()); -} - /// Try to print a floating-point register as if it belonged to a specified /// register-class. For example the inline asm operand modifier "b" requires its /// argument to be printed as "bN". @@ -271,24 +261,6 @@ bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, return false; } -void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, - raw_ostream &OS) { - unsigned NOps = MI->getNumOperands(); - assert(NOps==4); - OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; - // cast away const; DIetc do not take const operands for some reason. - DIVariable V(const_cast(MI->getOperand(NOps-1).getMetadata())); - OS << V.getName(); - OS << " <- "; - // Frame address. Currently handles register +- offset only. - assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); - OS << '[' << AArch64InstPrinter::getRegisterName(MI->getOperand(0).getReg()); - OS << '+' << MI->getOperand(1).getImm(); - OS << ']'; - OS << "+" << MI->getOperand(NOps - 2).getImm(); -} - - #include "AArch64GenMCPseudoLowering.inc" void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { @@ -296,18 +268,6 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { if (emitPseudoExpansionLowering(OutStreamer, MI)) return; - switch (MI->getOpcode()) { - case AArch64::DBG_VALUE: { - if (isVerbose() && OutStreamer.hasRawTextSupport()) { - SmallString<128> TmpStr; - raw_svector_ostream OS(TmpStr); - PrintDebugValueComment(MI, OS); - OutStreamer.EmitRawText(StringRef(OS.str())); - } - return; - } - } - MCInst TmpInst; LowerAArch64MachineInstrToMCInst(MI, TmpInst, *this); OutStreamer.EmitInstruction(TmpInst); diff --git a/lib/Target/AArch64/AArch64AsmPrinter.h b/lib/Target/AArch64/AArch64AsmPrinter.h index af0c9fe..824f003 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.h +++ b/lib/Target/AArch64/AArch64AsmPrinter.h @@ -55,8 +55,6 @@ class LLVM_LIBRARY_VISIBILITY AArch64AsmPrinter : public AsmPrinter { unsigned AsmVariant, const char *ExtraCode, raw_ostream &O); - void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); - /// printSymbolicAddress - Given some kind of reasonably bare symbolic /// reference, print out the appropriate asm string to represent it. If /// appropriate, a relocation-specifier will be produced, composed of a @@ -67,8 +65,6 @@ class LLVM_LIBRARY_VISIBILITY AArch64AsmPrinter : public AsmPrinter { bool PrintImmediatePrefix, StringRef Suffix, raw_ostream &O); - MachineLocation getDebugValueLocation(const MachineInstr *MI) const; - virtual const char *getPassName() const { return "AArch64 Assembly Printer"; } diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index f90bcef..d8f45eb 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -116,17 +116,6 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addImm(0); } -MachineInstr * -AArch64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, - uint64_t Offset, const MDNode *MDPtr, - DebugLoc DL) const { - MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE)) - .addFrameIndex(FrameIx).addImm(0) - .addImm(Offset) - .addMetadata(MDPtr); - return &*MIB; -} - /// Does the Opcode represent a conditional branch that we can remove and re-add /// at the end of a basic block? static bool isCondBranch(unsigned Opc) { diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h index 22a2ab4..620ecc9 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.h +++ b/lib/Target/AArch64/AArch64InstrInfo.h @@ -43,10 +43,6 @@ public: unsigned DestReg, unsigned SrcReg, bool KillSrc) const; - MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, - uint64_t Offset, const MDNode *MDPtr, - DebugLoc DL) const; - void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIndex, diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 13ec208..787daba 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -213,19 +213,6 @@ namespace { } // end of anonymous namespace -MachineLocation ARMAsmPrinter:: -getDebugValueLocation(const MachineInstr *MI) const { - MachineLocation Location; - assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!"); - // Frame address. Currently handles register +- offset only. - if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) - Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); - else { - DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n"); - } - return Location; -} - /// EmitDwarfRegOp - Emit dwarf register operation. void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { const TargetRegisterInfo *RI = TM.getRegisterInfo(); @@ -1092,23 +1079,6 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) { OutStreamer.EmitDataRegion(MCDR_DataRegionEnd); } -void ARMAsmPrinter::PrintDebugValueComment(const MachineInstr *MI, - raw_ostream &OS) { - unsigned NOps = MI->getNumOperands(); - assert(NOps==4); - OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; - // cast away const; DIetc do not take const operands for some reason. - DIVariable V(const_cast(MI->getOperand(NOps-1).getMetadata())); - OS << V.getName(); - OS << " <- "; - // Frame address. Currently handles register +- offset only. - assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); - OS << '['; printOperand(MI, 0, OS); OS << '+'; printOperand(MI, 1, OS); - OS << ']'; - OS << "+"; - printOperand(MI, NOps-2, OS); -} - void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { assert(MI->getFlag(MachineInstr::FrameSetup) && "Only instruction which are involved into frame setup code are allowed"); @@ -1272,15 +1242,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { unsigned Opc = MI->getOpcode(); switch (Opc) { case ARM::t2MOVi32imm: llvm_unreachable("Should be lowered by thumb2it pass"); - case ARM::DBG_VALUE: { - if (isVerbose() && OutStreamer.hasRawTextSupport()) { - SmallString<128> TmpStr; - raw_svector_ostream OS(TmpStr); - PrintDebugValueComment(MI, OS); - OutStreamer.EmitRawText(StringRef(OS.str())); - } - return; - } + case ARM::DBG_VALUE: llvm_unreachable("Should be handled by generic printing"); case ARM::LEApcrel: case ARM::tLEApcrel: case ARM::t2LEApcrel: { diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index c945e4f..7ce2b83 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -97,11 +97,6 @@ private: const MachineInstr *MI); public: - void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); - - virtual MachineLocation - getDebugValueLocation(const MachineInstr *MI) const LLVM_OVERRIDE; - /// EmitDwarfRegOp - Emit dwarf register operation. virtual void EmitDwarfRegOp(const MachineLocation &MLoc) const LLVM_OVERRIDE; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 496bcb2..5283d7b 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1213,16 +1213,6 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ return true; } -MachineInstr* -ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, - int FrameIx, uint64_t Offset, - const MDNode *MDPtr, - DebugLoc DL) const { - MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::DBG_VALUE)) - .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr); - return &*MIB; -} - /// Create a copy of a const pool value. Update CPI to the new index and return /// the label UID. static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 4ca3d7b..96f8637 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -125,12 +125,6 @@ public: virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; - virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, - int FrameIx, - uint64_t Offset, - const MDNode *MDPtr, - DebugLoc DL) const; - virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SubIdx, diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 3218134..5af645c 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -558,16 +558,6 @@ MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, return(0); } -MachineInstr* -HexagonInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, - int FrameIx, uint64_t Offset, - const MDNode *MDPtr, - DebugLoc DL) const { - MachineInstrBuilder MIB = BuildMI(MF, DL, get(Hexagon::DBG_VALUE)) - .addImm(0).addImm(Offset).addMetadata(MDPtr); - return &*MIB; -} - unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const { MachineRegisterInfo &RegInfo = MF->getRegInfo(); diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index 42ffb48..3c28df4 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -148,11 +148,6 @@ public: isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumCycles, const BranchProbability &Probability) const; - virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, - int FrameIx, - uint64_t Offset, - const MDNode *MDPtr, - DebugLoc DL) const; virtual DFAPacketizer* CreateTargetScheduleState(const TargetMachine *TM, const ScheduleDAG *DAG) const; diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index 6e4feda..638001b 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -589,16 +589,6 @@ void MipsAsmPrinter::EmitEndOfAsmFile(Module &M) { MES->emitELFHeaderFlagsCG(*Subtarget); } -MachineLocation -MipsAsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { - // Handles frame addresses emitted in MipsInstrInfo::emitFrameIndexDebugValue. - assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!"); - assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm() && - "Unexpected MachineOperand types"); - return MachineLocation(MI->getOperand(0).getReg(), - MI->getOperand(1).getImm()); -} - void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS) { // TODO: implement diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h index dbdaf26..4d1d624 100644 --- a/lib/Target/Mips/MipsAsmPrinter.h +++ b/lib/Target/Mips/MipsAsmPrinter.h @@ -81,7 +81,6 @@ public: const char *Modifier = 0); void EmitStartOfAsmFile(Module &M); void EmitEndOfAsmFile(Module &M); - virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const; void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); }; } diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index 3144dae..eae05a3 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -61,15 +61,6 @@ MachineMemOperand *MipsInstrInfo::GetMemOperand(MachineBasicBlock &MBB, int FI, MFI.getObjectSize(FI), Align); } -MachineInstr* -MipsInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, - uint64_t Offset, const MDNode *MDPtr, - DebugLoc DL) const { - MachineInstrBuilder MIB = BuildMI(MF, DL, get(Mips::DBG_VALUE)) - .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr); - return &*MIB; -} - //===----------------------------------------------------------------------===// // Branch Analysis //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 0f075ec..b6480ef 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -67,11 +67,6 @@ public: bool AllowModify, SmallVectorImpl &BranchInstrs) const; - virtual MachineInstr* emitFrameIndexDebugValue(MachineFunction &MF, - int FrameIx, uint64_t Offset, - const MDNode *MDPtr, - DebugLoc DL) const; - /// Insert nop instruction when hazard condition is found virtual void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const; diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index c43b5c9..6e6d653 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -86,18 +86,6 @@ namespace { bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O); - - MachineLocation getDebugValueLocation(const MachineInstr *MI) const { - MachineLocation Location; - assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!"); - // Frame address. Currently handles register +- offset only. - if (MI->getOperand(0).isReg() && MI->getOperand(2).isImm()) - Location.set(MI->getOperand(0).getReg(), MI->getOperand(2).getImm()); - else { - DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n"); - } - return Location; - } }; /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux @@ -340,28 +328,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Lower multi-instruction pseudo operations. switch (MI->getOpcode()) { default: break; - case TargetOpcode::DBG_VALUE: { - if (!isVerbose() || !OutStreamer.hasRawTextSupport()) return; - - SmallString<32> Str; - raw_svector_ostream O(Str); - unsigned NOps = MI->getNumOperands(); - assert(NOps==4); - O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; - // cast away const; DIetc do not take const operands for some reason. - DIVariable V(const_cast(MI->getOperand(NOps-1).getMetadata())); - O << V.getName(); - O << " <- "; - // Frame address. Currently handles register +- offset only. - assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); - O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 1, O); - O << ']'; - O << "+"; - printOperand(MI, NOps-2, O); - OutStreamer.EmitRawText(O.str()); - return; - } - + case TargetOpcode::DBG_VALUE: + llvm_unreachable("Should be handled target independently"); case PPC::MovePCtoLR: case PPC::MovePCtoLR8: { // Transform %LR = MovePCtoLR diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index a3eeb20..1ad879d 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -791,16 +791,6 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, NewMIs.back()->addMemOperand(MF, MMO); } -MachineInstr* -PPCInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, - int FrameIx, uint64_t Offset, - const MDNode *MDPtr, - DebugLoc DL) const { - MachineInstrBuilder MIB = BuildMI(MF, DL, get(PPC::DBG_VALUE)); - addFrameReference(MIB, FrameIx, 0, false).addImm(Offset).addMetadata(MDPtr); - return &*MIB; -} - bool PPCInstrInfo:: ReverseBranchCondition(SmallVectorImpl &Cond) const { assert(Cond.size() == 2 && "Invalid PPC branch opcode!"); diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 34a1a73..bd72a4d 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -148,12 +148,6 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, - int FrameIx, - uint64_t Offset, - const MDNode *MDPtr, - DebugLoc DL) const; - virtual bool ReverseBranchCondition(SmallVectorImpl &Cond) const; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index a4e328e..96b5bb6 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -577,9 +577,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // clear can be encoded. This is extremely uncommon, because normally you // only "std" to a stack slot that is at least 4-byte aligned, but it can // happen in invalid code. - if (OpC == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm - (!noImmForm && - isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) { + assert(OpC != PPC::DBG_VALUE && + "This should be handle in a target independent way"); + if (!noImmForm && isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) { MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); return; } diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp index b538d5c..3fe2b44 100644 --- a/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -63,8 +63,6 @@ namespace { virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const; - - virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const; }; } // end of anonymous namespace @@ -266,15 +264,6 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { return I == Pred->end() || !I->isBarrier(); } -MachineLocation SparcAsmPrinter:: -getDebugValueLocation(const MachineInstr *MI) const { - assert(MI->getNumOperands() == 4 && "Invalid number of operands!"); - assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm() && - "Unexpected MachineOperand types"); - return MachineLocation(MI->getOperand(0).getReg(), - MI->getOperand(1).getImm()); -} - // Force static initialization. extern "C" void LLVMInitializeSparcAsmPrinter() { RegisterAsmPrinter X(TheSparcTarget); diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp index 626bc40..08a13b8 100644 --- a/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/lib/Target/Sparc/SparcInstrInfo.cpp @@ -114,18 +114,6 @@ static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC) llvm_unreachable("Invalid cond code"); } -MachineInstr * -SparcInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, - int FrameIx, - uint64_t Offset, - const MDNode *MDPtr, - DebugLoc dl) const { - MachineInstrBuilder MIB = BuildMI(MF, dl, get(SP::DBG_VALUE)) - .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr); - return &*MIB; -} - - bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h index a0a0ffd8..d0b220b 100644 --- a/lib/Target/Sparc/SparcInstrInfo.h +++ b/lib/Target/Sparc/SparcInstrInfo.h @@ -62,14 +62,6 @@ public: virtual unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; - /// emitFrameIndexDebugValue - Emit a target-dependent form of - /// DBG_VALUE encoding the address of a frame index. - virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, - int FrameIx, - uint64_t Offset, - const MDNode *MDPtr, - DebugLoc dl) const; - virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 6b228b0..9e0ab82 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -702,48 +702,6 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { } } -MachineLocation -X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { - MachineLocation Location; - assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!"); - // Frame address. Currently handles register +- offset only. - - if (MI->getOperand(0).isReg() && MI->getOperand(3).isImm()) - Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm()); - else { - DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n"); - } - return Location; -} - -void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, - raw_ostream &O) { - // Only the target-dependent form of DBG_VALUE should get here. - // Referencing the offset and metadata as NOps-2 and NOps-1 is - // probably portable to other targets; frame pointer location is not. - unsigned NOps = MI->getNumOperands(); - assert(NOps==7); - O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; - // cast away const; DIetc do not take const operands for some reason. - DIVariable V(const_cast(MI->getOperand(NOps-1).getMetadata())); - if (V.getContext().isSubprogram()) - O << DISubprogram(V.getContext()).getDisplayName() << ":"; - O << V.getName(); - O << " <- "; - // Frame address. Currently handles register +- offset only. - O << '['; - if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg()) - printOperand(MI, 0, O); - else - O << "undef"; - O << '+'; printOperand(MI, 3, O); - O << ']'; - O << "+"; - printOperand(MI, NOps-2, O); -} - - - //===----------------------------------------------------------------------===// // Target Registry Stuff //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h index bc7496b..6eed5ce 100644 --- a/lib/Target/X86/X86AsmPrinter.h +++ b/lib/Target/X86/X86AsmPrinter.h @@ -67,11 +67,6 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { unsigned AsmVariant = 1); virtual bool runOnMachineFunction(MachineFunction &F) LLVM_OVERRIDE; - - void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); - - virtual MachineLocation - getDebugValueLocation(const MachineInstr *MI) const LLVM_OVERRIDE; }; } // end namespace llvm diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index df7b721..0688c9b 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -3733,19 +3733,6 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { return false; } -MachineInstr* -X86InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, - int FrameIx, uint64_t Offset, - const MDNode *MDPtr, - DebugLoc DL) const { - X86AddressMode AM; - AM.BaseType = X86AddressMode::FrameIndexBase; - AM.Base.FrameIndex = FrameIx; - MachineInstrBuilder MIB = BuildMI(MF, DL, get(X86::DBG_VALUE)); - addFullAddress(MIB, AM).addImm(Offset).addMetadata(MDPtr); - return &*MIB; -} - static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, const SmallVectorImpl &MOs, MachineInstr *MI, diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 332874f..f2f47ef 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -275,12 +275,6 @@ public: virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; - virtual - MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, - int FrameIx, uint64_t Offset, - const MDNode *MDPtr, - DebugLoc DL) const; - /// foldMemoryOperand - If this target supports it, fold a load or store of /// the specified stack slot into the specified machine instruction for the /// specified operand(s). If this is possible, the target should perform the diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index a453245..3fea4cb 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -654,13 +654,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { X86MCInstLower MCInstLowering(Mang, *MF, *this); switch (MI->getOpcode()) { case TargetOpcode::DBG_VALUE: - if (isVerbose() && OutStreamer.hasRawTextSupport()) { - std::string TmpStr; - raw_string_ostream OS(TmpStr); - PrintDebugValueComment(MI, OS); - OutStreamer.EmitRawText(StringRef(OS.str())); - } - return; + llvm_unreachable("Should be handled target independently"); // Emit nothing here but a comment if we can. case X86::Int_MemBarrier: diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp index e177ad3..e802c1b 100644 --- a/lib/Target/XCore/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/XCoreAsmPrinter.cpp @@ -49,7 +49,6 @@ namespace { class XCoreAsmPrinter : public AsmPrinter { const XCoreSubtarget &Subtarget; XCoreMCInstLower MCInstLowering; - void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); public: explicit XCoreAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) : AsmPrinter(TM, Streamer), Subtarget(TM.getSubtarget()), @@ -76,7 +75,6 @@ namespace { void EmitInstruction(const MachineInstr *MI); void EmitFunctionBodyStart(); void EmitFunctionBodyEnd(); - virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const; }; } // end of anonymous namespace @@ -256,47 +254,13 @@ bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, return false; } -void XCoreAsmPrinter::PrintDebugValueComment(const MachineInstr *MI, - raw_ostream &OS) { - unsigned NOps = MI->getNumOperands(); - assert(NOps == 4); - OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; - // cast away const; DIetc do not take const operands for some reason. - DIVariable V(const_cast(MI->getOperand(NOps-1).getMetadata())); - OS << V.getName(); - OS << " <- "; - // Frame address. Currently handles register +- offset only. - assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); - OS << '['; printOperand(MI, 0, OS); OS << '+'; printOperand(MI, 1, OS); - OS << ']'; - OS << "+"; - printOperand(MI, NOps-2, OS); -} - -MachineLocation XCoreAsmPrinter:: -getDebugValueLocation(const MachineInstr *MI) const { - // Handles frame addresses emitted in XCoreInstrInfo::emitFrameIndexDebugValue. - assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!"); - assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm() && - "Unexpected MachineOperand types"); - return MachineLocation(MI->getOperand(0).getReg(), - MI->getOperand(1).getImm()); -} - void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) { SmallString<128> Str; raw_svector_ostream O(Str); switch (MI->getOpcode()) { - case XCore::DBG_VALUE: { - if (isVerbose() && OutStreamer.hasRawTextSupport()) { - SmallString<128> TmpStr; - raw_svector_ostream OS(TmpStr); - PrintDebugValueComment(MI, OS); - OutStreamer.EmitRawText(StringRef(OS.str())); - } - return; - } + case XCore::DBG_VALUE: + llvm_unreachable("Should be handled target independently"); case XCore::ADD_2rus: if (MI->getOperand(2).getImm() == 0) { O << "\tmov " diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp index eb7a936..d6b8c2d 100644 --- a/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/lib/Target/XCore/XCoreInstrInfo.cpp @@ -386,15 +386,6 @@ void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, .addImm(0); } -MachineInstr* -XCoreInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, - uint64_t Offset, const MDNode *MDPtr, - DebugLoc DL) const { - MachineInstrBuilder MIB = BuildMI(MF, DL, get(XCore::DBG_VALUE)) - .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr); - return &*MIB; -} - /// ReverseBranchCondition - Return the inverse opcode of the /// specified Branch instruction. bool XCoreInstrInfo:: diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h index 42eeed8..51d66a1 100644 --- a/lib/Target/XCore/XCoreInstrInfo.h +++ b/lib/Target/XCore/XCoreInstrInfo.h @@ -78,12 +78,6 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, - int FrameIx, - uint64_t Offset, - const MDNode *MDPtr, - DebugLoc DL) const; - virtual bool ReverseBranchCondition( SmallVectorImpl &Cond) const; }; -- cgit v1.1 From 98f5cf8000bd67ab97605f3454ae374fff5389c6 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Mon, 17 Jun 2013 20:16:26 +0000 Subject: R600: Properly set COUNT_3 bit in TEX clause initiating inst for pre EG gen. Fixes rv7x0 bug in Heaven reported here: https://bugs.freedesktop.org/show_bug.cgi?id=64257 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184116 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600Instructions.td | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 27b0214..83d735f 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -575,14 +575,16 @@ class CF_WORD0_R600 { class CF_CLAUSE_R600 inst, dag ins, string AsmPrint> : AMDGPUInst <(outs), ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 { field bits<64> Inst; + bits<4> CNT; let CF_INST = inst; let BARRIER = 1; let CF_CONST = 0; let VALID_PIXEL_MODE = 0; let COND = 0; + let COUNT = CNT{2-0}; let CALL_COUNT = 0; - let COUNT_3 = 0; + let COUNT_3 = CNT{3}; let END_OF_PROGRAM = 0; let WHOLE_QUAD_MODE = 0; @@ -1162,52 +1164,52 @@ let Predicates = [isR600] in { } defm : SteamOutputExportPattern; - def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$COUNT), - "TEX $COUNT @$ADDR"> { + def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$CNT), + "TEX $CNT @$ADDR"> { let POP_COUNT = 0; } - def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$COUNT), - "VTX $COUNT @$ADDR"> { + def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$CNT), + "VTX $CNT @$ADDR"> { let POP_COUNT = 0; } def WHILE_LOOP_R600 : CF_CLAUSE_R600<6, (ins i32imm:$ADDR), "LOOP_START_DX10 @$ADDR"> { let POP_COUNT = 0; - let COUNT = 0; + let CNT = 0; } def END_LOOP_R600 : CF_CLAUSE_R600<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> { let POP_COUNT = 0; - let COUNT = 0; + let CNT = 0; } def LOOP_BREAK_R600 : CF_CLAUSE_R600<9, (ins i32imm:$ADDR), "LOOP_BREAK @$ADDR"> { let POP_COUNT = 0; - let COUNT = 0; + let CNT = 0; } def CF_CONTINUE_R600 : CF_CLAUSE_R600<8, (ins i32imm:$ADDR), "CONTINUE @$ADDR"> { let POP_COUNT = 0; - let COUNT = 0; + let CNT = 0; } def CF_JUMP_R600 : CF_CLAUSE_R600<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "JUMP @$ADDR POP:$POP_COUNT"> { - let COUNT = 0; + let CNT = 0; } def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "ELSE @$ADDR POP:$POP_COUNT"> { - let COUNT = 0; + let CNT = 0; } def CF_CALL_FS_R600 : CF_CLAUSE_R600<19, (ins), "CALL_FS"> { let ADDR = 0; - let COUNT = 0; + let CNT = 0; let POP_COUNT = 0; } def POP_R600 : CF_CLAUSE_R600<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "POP @$ADDR POP:$POP_COUNT"> { - let COUNT = 0; + let CNT = 0; } def CF_END_R600 : CF_CLAUSE_R600<0, (ins), "CF_END"> { - let COUNT = 0; + let CNT = 0; let POP_COUNT = 0; let ADDR = 0; let END_OF_PROGRAM = 1; -- cgit v1.1 From f7c9b95f94b18d1c8ae15a59bf28c5c2cafa5ad8 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Mon, 17 Jun 2013 20:16:40 +0000 Subject: R600: PV stores Reg id, not index git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184117 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600InstrInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 4f5cfcd..018583d 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -239,7 +239,7 @@ R600InstrInfo::ExtractSrcs(MachineInstr *MI, Result.push_back(DummyPair); continue; } - if (PV.find(Index) != PV.end()) { + if (PV.find(Reg) != PV.end()) { Result.push_back(DummyPair); continue; } -- cgit v1.1 From b44193dde11836b2f1bfb00bc99f899f0de757e4 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 18 Jun 2013 06:24:14 +0000 Subject: Remove dead prototype. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184173 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUTargetMachine.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUTargetMachine.h b/lib/Target/R600/AMDGPUTargetMachine.h index bb26ed9..26e95d3 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.h +++ b/lib/Target/R600/AMDGPUTargetMachine.h @@ -25,8 +25,6 @@ namespace llvm { -MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT); - class AMDGPUTargetMachine : public LLVMTargetMachine { AMDGPUSubtarget Subtarget; -- cgit v1.1 From 99cb622041a0839c7dfcf0263c5102a305a0fdb5 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 18 Jun 2013 07:20:20 +0000 Subject: Use pointers to the MCAsmInfo and MCRegInfo. Someone may want to do something crazy, like replace these objects if they change or something. No functionality change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184175 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64FrameLowering.cpp | 10 ++-- .../AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp | 2 +- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 6 +-- lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp | 8 +-- lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp | 62 +++++++++++----------- lib/Target/Mangler.cpp | 30 +++++------ lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 2 +- lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp | 2 +- lib/Target/Mips/Mips16FrameLowering.cpp | 8 +-- lib/Target/Mips/MipsSEFrameLowering.cpp | 12 ++--- .../PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp | 6 +-- lib/Target/PowerPC/PPCFrameLowering.cpp | 12 ++--- .../SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp | 2 +- lib/Target/SystemZ/SystemZFrameLowering.cpp | 8 +-- lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp | 2 +- lib/Target/X86/X86FrameLowering.cpp | 4 +- 16 files changed, 88 insertions(+), 88 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index 8b907b2..d571765 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -54,7 +54,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); MachineModuleInfo &MMI = MF.getMMI(); - const MCRegisterInfo &MRI = MMI.getContext().getRegisterInfo(); + const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); bool NeedsFrameMoves = MMI.hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry(); @@ -97,7 +97,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { .addSym(SPLabel); MachineLocation Dst(MachineLocation::VirtualFP); - unsigned Reg = MRI.getDwarfRegNum(AArch64::XSP, true); + unsigned Reg = MRI->getDwarfRegNum(AArch64::XSP, true); MMI.addFrameInst( MCCFIInstruction::createDefCfa(SPLabel, Reg, -NumInitialBytes)); } @@ -132,7 +132,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { MCSymbol *FPLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL)) .addSym(FPLabel); - unsigned Reg = MRI.getDwarfRegNum(AArch64::X29, true); + unsigned Reg = MRI->getDwarfRegNum(AArch64::X29, true); unsigned Offset = MFI->getObjectOffset(X29FrameIdx); MMI.addFrameInst(MCCFIInstruction::createDefCfa(FPLabel, Reg, Offset)); } @@ -165,7 +165,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { .addSym(CSLabel); MachineLocation Dst(MachineLocation::VirtualFP); - unsigned Reg = MRI.getDwarfRegNum(AArch64::XSP, true); + unsigned Reg = MRI->getDwarfRegNum(AArch64::XSP, true); unsigned Offset = NumResidualBytes + NumInitialBytes; MMI.addFrameInst(MCCFIInstruction::createDefCfa(CSLabel, Reg, -Offset)); } @@ -183,7 +183,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { for (std::vector::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { unsigned Offset = MFI->getObjectOffset(I->getFrameIdx()); - unsigned Reg = MRI.getDwarfRegNum(I->getReg(), true); + unsigned Reg = MRI->getDwarfRegNum(I->getReg(), true); MMI.addFrameInst(MCCFIInstruction::createOffset(CSLabel, Reg, Offset)); } } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index a5c591e..8cf374f 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -346,7 +346,7 @@ AArch64MCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl &Fixups) const { if (MO.isReg()) { - return Ctx.getRegisterInfo().getEncodingValue(MO.getReg()); + return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()); } else if (MO.isImm()) { return static_cast(MO.getImm()); } diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index c59ca64..e315d16 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -277,7 +277,7 @@ public: MCAsmParserExtension::Initialize(_Parser); // Cache the MCRegisterInfo. - MRI = &getContext().getRegisterInfo(); + MRI = getContext().getRegisterInfo(); // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); @@ -7851,8 +7851,8 @@ bool ARMAsmParser::parseDirectiveARM(SMLoc L) { /// parseDirectiveThumbFunc /// ::= .thumbfunc symbol_name bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) { - const MCAsmInfo &MAI = getParser().getStreamer().getContext().getAsmInfo(); - bool isMachO = MAI.hasSubsectionsViaSymbols(); + const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo(); + bool isMachO = MAI->hasSubsectionsViaSymbols(); StringRef Name; bool needFuncName = true; diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 679d3c4..dc3d945 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -380,10 +380,10 @@ void ARMELFStreamer::FlushPendingOffset() { void ARMELFStreamer::FlushUnwindOpcodes(bool AllowCompactModel0) { // Emit the unwind opcode to restore $sp. if (UsedFP) { - const MCRegisterInfo &MRI = getContext().getRegisterInfo(); + const MCRegisterInfo *MRI = getContext().getRegisterInfo(); int64_t LastRegSaveSPOffset = SPOffset - PendingOffset; UnwindOpAsm.EmitSPOffset(LastRegSaveSPOffset - FPOffset); - UnwindOpAsm.EmitSetSP(MRI.getEncodingValue(FPReg)); + UnwindOpAsm.EmitSetSP(MRI->getEncodingValue(FPReg)); } else { FlushPendingOffset(); } @@ -458,9 +458,9 @@ void ARMELFStreamer::EmitRegSave(const SmallVectorImpl &RegList, // Collect the registers in the register list unsigned Count = 0; uint32_t Mask = 0; - const MCRegisterInfo &MRI = getContext().getRegisterInfo(); + const MCRegisterInfo *MRI = getContext().getRegisterInfo(); for (size_t i = 0; i < RegList.size(); ++i) { - unsigned Reg = MRI.getEncodingValue(RegList[i]); + unsigned Reg = MRI->getEncodingValue(RegList[i]); assert(Reg < (IsVector ? 32U : 16U) && "Register out of range"); unsigned Bit = (1u << Reg); if ((Mask & Bit) == 0) { diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 2aa1010..f324bc2 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -407,7 +407,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl &Fixups) const { if (MO.isReg()) { unsigned Reg = MO.getReg(); - unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg); + unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg); // Q registers are encoded as 2x their register number. switch (Reg) { @@ -436,7 +436,7 @@ EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg, const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx + 1); - Reg = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + Reg = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); int32_t SImm = MO1.getImm(); bool isAdd = true; @@ -724,8 +724,8 @@ getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx, // {2-0} = Rn const MCOperand &MO1 = MI.getOperand(OpIdx); const MCOperand &MO2 = MI.getOperand(OpIdx + 1); - unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO1.getReg()); - unsigned Rm = CTX.getRegisterInfo().getEncodingValue(MO2.getReg()); + unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(MO1.getReg()); + unsigned Rm = CTX.getRegisterInfo()->getEncodingValue(MO2.getReg()); return (Rm << 3) | Rn; } @@ -741,7 +741,7 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx, // If The first operand isn't a register, we have a label reference. const MCOperand &MO = MI.getOperand(OpIdx); if (!MO.isReg()) { - Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC. + Reg = CTX.getRegisterInfo()->getEncodingValue(ARM::PC); // Rn is PC. Imm12 = 0; isAdd = false ; // 'U' bit is set as part of the fixup. @@ -821,7 +821,7 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx, // If The first operand isn't a register, we have a label reference. const MCOperand &MO = MI.getOperand(OpIdx); if (!MO.isReg()) { - Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC. + Reg = CTX.getRegisterInfo()->getEncodingValue(ARM::PC); // Rn is PC. Imm8 = 0; isAdd = false ; // 'U' bit is set as part of the fixup. @@ -857,7 +857,7 @@ getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx, // {7-0} = imm8 const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx + 1); - unsigned Reg = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + unsigned Reg = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); unsigned Imm8 = MO1.getImm(); return (Reg << 8) | Imm8; } @@ -940,8 +940,8 @@ getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx, const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx+1); const MCOperand &MO2 = MI.getOperand(OpIdx+2); - unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); - unsigned Rm = CTX.getRegisterInfo().getEncodingValue(MO1.getReg()); + unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); + unsigned Rm = CTX.getRegisterInfo()->getEncodingValue(MO1.getReg()); unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()); bool isAdd = ARM_AM::getAM2Op(MO2.getImm()) == ARM_AM::add; ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(MO2.getImm()); @@ -975,7 +975,7 @@ getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx, // {12} isAdd // {11-0} imm12/Rm const MCOperand &MO = MI.getOperand(OpIdx); - unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); uint32_t Binary = getAddrMode2OffsetOpValue(MI, OpIdx + 1, Fixups); Binary |= Rn << 14; return Binary; @@ -998,7 +998,7 @@ getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx, ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(Imm); Binary <<= 7; // Shift amount is bits [11:7] Binary |= getShiftOp(ShOp) << 5; // Shift type is bits [6:5] - Binary |= CTX.getRegisterInfo().getEncodingValue(MO.getReg()); // Rm is bits [3:0] + Binary |= CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); // Rm is bits [3:0] } return Binary | (isAdd << 12) | (isReg << 13); } @@ -1011,7 +1011,7 @@ getPostIdxRegOpValue(const MCInst &MI, unsigned OpIdx, const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx+1); bool isAdd = MO1.getImm() != 0; - return CTX.getRegisterInfo().getEncodingValue(MO.getReg()) | (isAdd << 4); + return CTX.getRegisterInfo()->getEncodingValue(MO.getReg()) | (isAdd << 4); } uint32_t ARMMCCodeEmitter:: @@ -1029,7 +1029,7 @@ getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx, uint32_t Imm8 = ARM_AM::getAM3Offset(Imm); // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8 if (!isImm) - Imm8 = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + Imm8 = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); return Imm8 | (isAdd << 8) | (isImm << 9); } @@ -1047,7 +1047,7 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx, // If The first operand isn't a register, we have a label reference. if (!MO.isReg()) { - unsigned Rn = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC. + unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(ARM::PC); // Rn is PC. assert(MO.isExpr() && "Unexpected machine operand type!"); const MCExpr *Expr = MO.getExpr(); @@ -1057,14 +1057,14 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx, ++MCNumCPRelocations; return (Rn << 9) | (1 << 13); } - unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); unsigned Imm = MO2.getImm(); bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add; bool isImm = MO1.getReg() == 0; uint32_t Imm8 = ARM_AM::getAM3Offset(Imm); // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8 if (!isImm) - Imm8 = CTX.getRegisterInfo().getEncodingValue(MO1.getReg()); + Imm8 = CTX.getRegisterInfo()->getEncodingValue(MO1.getReg()); return (Rn << 9) | Imm8 | (isAdd << 8) | (isImm << 13); } @@ -1092,7 +1092,7 @@ getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx, // {2-0} = Rn const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx + 1); - unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); unsigned Imm5 = MO1.getImm(); return ((Imm5 & 0x1f) << 3) | Rn; } @@ -1119,7 +1119,7 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx, // If The first operand isn't a register, we have a label reference. const MCOperand &MO = MI.getOperand(OpIdx); if (!MO.isReg()) { - Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC. + Reg = CTX.getRegisterInfo()->getEncodingValue(ARM::PC); // Rn is PC. Imm8 = 0; isAdd = false; // 'U' bit is handled as part of the fixup. @@ -1165,7 +1165,7 @@ getSORegRegOpValue(const MCInst &MI, unsigned OpIdx, ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm()); // Encode Rm. - unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + unsigned Binary = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); // Encode the shift opcode. unsigned SBits = 0; @@ -1190,7 +1190,7 @@ getSORegRegOpValue(const MCInst &MI, unsigned OpIdx, // Encode the shift operation Rs. // Encode Rs bit[11:8]. assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0); - return Binary | (CTX.getRegisterInfo().getEncodingValue(Rs) << ARMII::RegRsShift); + return Binary | (CTX.getRegisterInfo()->getEncodingValue(Rs) << ARMII::RegRsShift); } unsigned ARMMCCodeEmitter:: @@ -1209,7 +1209,7 @@ getSORegImmOpValue(const MCInst &MI, unsigned OpIdx, ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm()); // Encode Rm. - unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + unsigned Binary = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); // Encode the shift opcode. unsigned SBits = 0; @@ -1248,9 +1248,9 @@ getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum, // Encoded as [Rn, Rm, imm]. // FIXME: Needs fixup support. - unsigned Value = CTX.getRegisterInfo().getEncodingValue(MO1.getReg()); + unsigned Value = CTX.getRegisterInfo()->getEncodingValue(MO1.getReg()); Value <<= 4; - Value |= CTX.getRegisterInfo().getEncodingValue(MO2.getReg()); + Value |= CTX.getRegisterInfo()->getEncodingValue(MO2.getReg()); Value <<= 2; Value |= MO3.getImm(); @@ -1264,7 +1264,7 @@ getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum, const MCOperand &MO2 = MI.getOperand(OpNum+1); // FIXME: Needs fixup support. - unsigned Value = CTX.getRegisterInfo().getEncodingValue(MO1.getReg()); + unsigned Value = CTX.getRegisterInfo()->getEncodingValue(MO1.getReg()); // Even though the immediate is 8 bits long, we need 9 bits in order // to represent the (inverse of the) sign bit. @@ -1326,7 +1326,7 @@ getT2SORegOpValue(const MCInst &MI, unsigned OpIdx, ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm()); // Encode Rm. - unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + unsigned Binary = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); // Encode the shift opcode. unsigned SBits = 0; @@ -1382,7 +1382,7 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op, if (SPRRegs || DPRRegs) { // VLDM/VSTM - unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg); + unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg); unsigned NumRegs = (MI.getNumOperands() - Op) & 0xff; Binary |= (RegNo & 0x1f) << 8; if (SPRRegs) @@ -1391,7 +1391,7 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op, Binary |= NumRegs * 2; } else { for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) { - unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(MI.getOperand(I).getReg()); + unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(MI.getOperand(I).getReg()); Binary |= 1 << RegNo; } } @@ -1407,7 +1407,7 @@ getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op, const MCOperand &Reg = MI.getOperand(Op); const MCOperand &Imm = MI.getOperand(Op + 1); - unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg()); + unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg.getReg()); unsigned Align = 0; switch (Imm.getImm()) { @@ -1430,7 +1430,7 @@ getAddrMode6OneLane32AddressOpValue(const MCInst &MI, unsigned Op, const MCOperand &Reg = MI.getOperand(Op); const MCOperand &Imm = MI.getOperand(Op + 1); - unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg()); + unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg.getReg()); unsigned Align = 0; switch (Imm.getImm()) { @@ -1456,7 +1456,7 @@ getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op, const MCOperand &Reg = MI.getOperand(Op); const MCOperand &Imm = MI.getOperand(Op + 1); - unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg()); + unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg.getReg()); unsigned Align = 0; switch (Imm.getImm()) { @@ -1475,7 +1475,7 @@ getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups) const { const MCOperand &MO = MI.getOperand(Op); if (MO.getReg() == 0) return 0x0D; - return CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + return CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); } unsigned ARMMCCodeEmitter:: diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp index 2269b73..dc70259 100644 --- a/lib/Target/Mangler.cpp +++ b/lib/Target/Mangler.cpp @@ -47,18 +47,18 @@ static void MangleLetter(SmallVectorImpl &OutName, unsigned char C) { /// NameNeedsEscaping - Return true if the identifier \p Str needs quotes /// for this assembler. -static bool NameNeedsEscaping(StringRef Str, const MCAsmInfo &MAI) { +static bool NameNeedsEscaping(StringRef Str, const MCAsmInfo *MAI) { assert(!Str.empty() && "Cannot create an empty MCSymbol"); // If the first character is a number and the target does not allow this, we // need quotes. - if (!MAI.doesAllowNameToStartWithDigit() && Str[0] >= '0' && Str[0] <= '9') + if (!MAI->doesAllowNameToStartWithDigit() && Str[0] >= '0' && Str[0] <= '9') return true; // If any of the characters in the string is an unacceptable character, force // quotes. - bool AllowPeriod = MAI.doesAllowPeriodsInName(); - bool AllowUTF8 = MAI.doesAllowUTF8(); + bool AllowPeriod = MAI->doesAllowPeriodsInName(); + bool AllowUTF8 = MAI->doesAllowUTF8(); for (unsigned i = 0, e = Str.size(); i != e; ++i) if (!isAcceptableChar(Str[i], AllowPeriod, AllowUTF8)) return true; @@ -68,16 +68,16 @@ static bool NameNeedsEscaping(StringRef Str, const MCAsmInfo &MAI) { /// appendMangledName - Add the specified string in mangled form if it uses /// any unusual characters. static void appendMangledName(SmallVectorImpl &OutName, StringRef Str, - const MCAsmInfo &MAI) { + const MCAsmInfo *MAI) { // The first character is not allowed to be a number unless the target // explicitly allows it. - if (!MAI.doesAllowNameToStartWithDigit() && Str[0] >= '0' && Str[0] <= '9') { + if (!MAI->doesAllowNameToStartWithDigit() && Str[0] >= '0' && Str[0] <= '9') { MangleLetter(OutName, Str[0]); Str = Str.substr(1); } - bool AllowPeriod = MAI.doesAllowPeriodsInName(); - bool AllowUTF8 = MAI.doesAllowUTF8(); + bool AllowPeriod = MAI->doesAllowPeriodsInName(); + bool AllowUTF8 = MAI->doesAllowUTF8(); for (unsigned i = 0, e = Str.size(); i != e; ++i) { if (!isAcceptableChar(Str[i], AllowPeriod, AllowUTF8)) MangleLetter(OutName, Str[i]); @@ -110,21 +110,21 @@ void Mangler::getNameWithPrefix(SmallVectorImpl &OutName, StringRef Name = GVName.toStringRef(TmpData); assert(!Name.empty() && "getNameWithPrefix requires non-empty name"); - const MCAsmInfo &MAI = Context.getAsmInfo(); + const MCAsmInfo *MAI = Context.getAsmInfo(); // If the global name is not led with \1, add the appropriate prefixes. if (Name[0] == '\1') { Name = Name.substr(1); } else { if (PrefixTy == Mangler::Private) { - const char *Prefix = MAI.getPrivateGlobalPrefix(); + const char *Prefix = MAI->getPrivateGlobalPrefix(); OutName.append(Prefix, Prefix+strlen(Prefix)); } else if (PrefixTy == Mangler::LinkerPrivate) { - const char *Prefix = MAI.getLinkerPrivateGlobalPrefix(); + const char *Prefix = MAI->getLinkerPrivateGlobalPrefix(); OutName.append(Prefix, Prefix+strlen(Prefix)); } - const char *Prefix = MAI.getGlobalPrefix(); + const char *Prefix = MAI->getGlobalPrefix(); if (Prefix[0] == 0) ; // Common noop, no prefix. else if (Prefix[1] == 0) @@ -137,7 +137,7 @@ void Mangler::getNameWithPrefix(SmallVectorImpl &OutName, if (!NameNeedsEscaping(Name, MAI) || // If quotes are supported, they can be used unless the string contains // a quote or newline. - (MAI.doesAllowQuotesInName() && + (MAI->doesAllowQuotesInName() && Name.find_first_of("\n\"") == StringRef::npos)) { OutName.append(Name.begin(), Name.end()); return; @@ -145,7 +145,7 @@ void Mangler::getNameWithPrefix(SmallVectorImpl &OutName, // On systems that do not allow quoted names, we need to mangle most // strange characters. - if (!MAI.doesAllowQuotesInName()) + if (!MAI->doesAllowQuotesInName()) return appendMangledName(OutName, Name, MAI); // Okay, the system allows quoted strings. We can quote most anything, the @@ -207,7 +207,7 @@ void Mangler::getNameWithPrefix(SmallVectorImpl &OutName, // If we are supposed to add a microsoft-style suffix for stdcall/fastcall, // add it. - if (Context.getAsmInfo().hasMicrosoftFastStdCallMangling()) { + if (Context.getAsmInfo()->hasMicrosoftFastStdCallMangling()) { if (const Function *F = dyn_cast(GV)) { CallingConv::ID CC = F->getCallingConv(); diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index d1d69d8..ab23d9f 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -880,7 +880,7 @@ int MipsAsmParser::getATReg() { } unsigned MipsAsmParser::getReg(int RC, int RegNo) { - return *(getContext().getRegisterInfo().getRegClass(RC).begin() + RegNo); + return *(getContext().getRegisterInfo()->getRegClass(RC).begin() + RegNo); } int MipsAsmParser::matchRegisterByNumber(unsigned RegNum, unsigned RegClass) { diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index a464dfe..4dc6917 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -380,7 +380,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl &Fixups) const { if (MO.isReg()) { unsigned Reg = MO.getReg(); - unsigned RegNo = Ctx.getRegisterInfo().getEncodingValue(Reg); + unsigned RegNo = Ctx.getRegisterInfo()->getEncodingValue(Reg); return RegNo; } else if (MO.isImm()) { return static_cast(MO.getImm()); diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp index e180c49..9fde614 100644 --- a/lib/Target/Mips/Mips16FrameLowering.cpp +++ b/lib/Target/Mips/Mips16FrameLowering.cpp @@ -40,7 +40,7 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF) const { if (StackSize == 0 && !MFI->adjustsStack()) return; MachineModuleInfo &MMI = MF.getMMI(); - const MCRegisterInfo &MRI = MMI.getContext().getRegisterInfo(); + const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); MachineLocation DstML, SrcML; // Adjust stack. @@ -56,13 +56,13 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF) const { MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel); - unsigned S1 = MRI.getDwarfRegNum(Mips::S1, true); + unsigned S1 = MRI->getDwarfRegNum(Mips::S1, true); MMI.addFrameInst(MCCFIInstruction::createOffset(CSLabel, S1, -8)); - unsigned S0 = MRI.getDwarfRegNum(Mips::S0, true); + unsigned S0 = MRI->getDwarfRegNum(Mips::S0, true); MMI.addFrameInst(MCCFIInstruction::createOffset(CSLabel, S0, -12)); - unsigned RA = MRI.getDwarfRegNum(Mips::RA, true); + unsigned RA = MRI->getDwarfRegNum(Mips::RA, true); MMI.addFrameInst(MCCFIInstruction::createOffset(CSLabel, RA, -4)); if (hasFP(MF)) diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index 91ffb94..c8d8388 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -285,7 +285,7 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const { if (StackSize == 0 && !MFI->adjustsStack()) return; MachineModuleInfo &MMI = MF.getMMI(); - const MCRegisterInfo &MRI = MMI.getContext().getRegisterInfo(); + const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); MachineLocation DstML, SrcML; // Adjust stack. @@ -321,9 +321,9 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const { // one for each of the paired single precision registers. if (Mips::AFGR64RegClass.contains(Reg)) { unsigned Reg0 = - MRI.getDwarfRegNum(RegInfo.getSubReg(Reg, Mips::sub_fpeven), true); + MRI->getDwarfRegNum(RegInfo.getSubReg(Reg, Mips::sub_fpeven), true); unsigned Reg1 = - MRI.getDwarfRegNum(RegInfo.getSubReg(Reg, Mips::sub_fpodd), true); + MRI->getDwarfRegNum(RegInfo.getSubReg(Reg, Mips::sub_fpodd), true); if (!STI.isLittle()) std::swap(Reg0, Reg1); @@ -335,7 +335,7 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const { } else { // Reg is either in CPURegs or FGR32. MMI.addFrameInst(MCCFIInstruction::createOffset( - CSLabel, MRI.getDwarfRegNum(Reg, 1), Offset)); + CSLabel, MRI->getDwarfRegNum(Reg, 1), Offset)); } } } @@ -358,7 +358,7 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const { TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel2); for (int I = 0; I < 4; ++I) { int64_t Offset = MFI->getObjectOffset(MipsFI->getEhDataRegFI(I)); - unsigned Reg = MRI.getDwarfRegNum(ehDataReg(I), true); + unsigned Reg = MRI->getDwarfRegNum(ehDataReg(I), true); MMI.addFrameInst(MCCFIInstruction::createOffset(CSLabel2, Reg, Offset)); } } @@ -373,7 +373,7 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::PROLOG_LABEL)).addSym(SetFPLabel); MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister( - SetFPLabel, MRI.getDwarfRegNum(FP, true))); + SetFPLabel, MRI->getDwarfRegNum(FP, true))); } } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 31c73ae..420c01b 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -191,7 +191,7 @@ unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo, // Return the thread-pointer register's encoding. Fixups.push_back(MCFixup::Create(0, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_tlsreg)); - return CTX.getRegisterInfo().getEncodingValue(PPC::X13); + return CTX.getRegisterInfo()->getEncodingValue(PPC::X13); } unsigned PPCMCCodeEmitter:: @@ -202,7 +202,7 @@ get_crbitm_encoding(const MCInst &MI, unsigned OpNo, MI.getOpcode() == PPC::MFOCRF || MI.getOpcode() == PPC::MTCRF8) && (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)); - return 0x80 >> CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + return 0x80 >> CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); } @@ -214,7 +214,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, // The GPR operand should come through here though. assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) || MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7); - return CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + return CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); } assert(MO.isImm() && diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index dabe613..dc734d1 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -334,7 +334,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { *static_cast(MF.getTarget().getInstrInfo()); MachineModuleInfo &MMI = MF.getMMI(); - const MCRegisterInfo &MRI = MMI.getContext().getRegisterInfo(); + const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); DebugLoc dl; bool needsFrameMoves = MMI.hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry(); @@ -530,14 +530,14 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { if (HasFP) { unsigned Reg = isPPC64 ? PPC::X31 : PPC::R31; - Reg = MRI.getDwarfRegNum(Reg, true); + Reg = MRI->getDwarfRegNum(Reg, true); MMI.addFrameInst( MCCFIInstruction::createOffset(FrameLabel, Reg, FPOffset)); } if (MustSaveLR) { unsigned Reg = isPPC64 ? PPC::LR8 : PPC::LR; - Reg = MRI.getDwarfRegNum(Reg, true); + Reg = MRI->getDwarfRegNum(Reg, true); MMI.addFrameInst( MCCFIInstruction::createOffset(FrameLabel, Reg, LROffset)); } @@ -565,7 +565,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { unsigned Reg = HasFP ? (isPPC64 ? PPC::X31 : PPC::R31) : (isPPC64 ? PPC::X1 : PPC::R1); - Reg = MRI.getDwarfRegNum(Reg, true); + Reg = MRI->getDwarfRegNum(Reg, true); MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(ReadyLabel, Reg)); } } @@ -597,13 +597,13 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { && Subtarget.isPPC64() && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { MMI.addFrameInst(MCCFIInstruction::createOffset( - Label, MRI.getDwarfRegNum(PPC::CR2, true), 8)); + Label, MRI->getDwarfRegNum(PPC::CR2, true), 8)); continue; } int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); MMI.addFrameInst(MCCFIInstruction::createOffset( - Label, MRI.getDwarfRegNum(Reg, true), Offset)); + Label, MRI->getDwarfRegNum(Reg, true), Offset)); } } } diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp index 7721b1f..f8f8998 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp @@ -112,7 +112,7 @@ uint64_t SystemZMCCodeEmitter:: getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl &Fixups) const { if (MO.isReg()) - return Ctx.getRegisterInfo().getEncodingValue(MO.getReg()); + return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()); if (MO.isImm()) return static_cast(MO.getImm()); llvm_unreachable("Unexpected operand type!"); diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp index c0d72c3..43f1e47 100644 --- a/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -297,7 +297,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const { SystemZMachineFunctionInfo *ZFI = MF.getInfo(); MachineBasicBlock::iterator MBBI = MBB.begin(); MachineModuleInfo &MMI = MF.getMMI(); - const MCRegisterInfo &MRI = MMI.getContext().getRegisterInfo(); + const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); const std::vector &CSI = MFFrame->getCalleeSavedInfo(); bool HasFP = hasFP(MF); DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); @@ -322,7 +322,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const { if (SystemZ::GR64BitRegClass.contains(Reg)) { int64_t Offset = SPOffsetFromCFA + RegSpillOffsets[Reg]; MMI.addFrameInst(MCCFIInstruction::createOffset( - GPRSaveLabel, MRI.getDwarfRegNum(Reg, true), Offset)); + GPRSaveLabel, MRI->getDwarfRegNum(Reg, true), Offset)); } } } @@ -351,7 +351,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const { MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::PROLOG_LABEL)) .addSym(SetFPLabel); - unsigned HardFP = MRI.getDwarfRegNum(SystemZ::R11D, true); + unsigned HardFP = MRI->getDwarfRegNum(SystemZ::R11D, true); MMI.addFrameInst( MCCFIInstruction::createDefCfaRegister(SetFPLabel, HardFP)); @@ -379,7 +379,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const { // Add CFI for the this save. if (!FPRSaveLabel) FPRSaveLabel = MMI.getContext().CreateTempSymbol(); - unsigned Reg = MRI.getDwarfRegNum(I->getReg(), true); + unsigned Reg = MRI->getDwarfRegNum(I->getReg(), true); int64_t Offset = getFrameIndexOffset(MF, I->getFrameIdx()); MMI.addFrameInst(MCCFIInstruction::createOffset( FPRSaveLabel, Reg, SPOffsetFromCFA + Offset)); diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 016af71..3e450fd 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -53,7 +53,7 @@ public: } unsigned GetX86RegNum(const MCOperand &MO) const { - return Ctx.getRegisterInfo().getEncodingValue(MO.getReg()) & 0x7; + return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()) & 0x7; } // On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 3061117..9d66bfd 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -307,7 +307,7 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF, unsigned FramePtr) const { MachineFrameInfo *MFI = MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); - const MCRegisterInfo &MRI = MMI.getContext().getRegisterInfo(); + const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); // Add callee saved registers to move list. const std::vector &CSI = MFI->getCalleeSavedInfo(); @@ -360,7 +360,7 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF, if (HasFP && FramePtr == Reg) continue; - unsigned DwarfReg = MRI.getDwarfRegNum(Reg, true); + unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); MMI.addFrameInst(MCCFIInstruction::createOffset(Label, DwarfReg, Offset)); } } -- cgit v1.1 From cea0032f73a56a62b692b25ca4084850cd51763b Mon Sep 17 00:00:00 2001 From: Amaury de la Vieuville Date: Tue, 18 Jun 2013 08:02:56 +0000 Subject: ARM: thumb stores cannot use PC as dest register git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184179 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 37 +++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 4086f36..196fc32 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -3164,6 +3164,17 @@ static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val, unsigned Rm = fieldFromInstruction(Val, 2, 4); unsigned imm = fieldFromInstruction(Val, 0, 2); + // Thumb stores cannot use PC as dest register. + switch (Inst.getOpcode()) { + case ARM::t2STRHs: + case ARM::t2STRBs: + case ARM::t2STRs: + if (Rn == 15) + return MCDisassembler::Fail; + default: + break; + } + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder))) @@ -3292,6 +3303,21 @@ static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val, unsigned Rn = fieldFromInstruction(Val, 9, 4); unsigned imm = fieldFromInstruction(Val, 0, 9); + // Thumb stores cannot use PC as dest register. + switch (Inst.getOpcode()) { + case ARM::t2STRT: + case ARM::t2STRBT: + case ARM::t2STRHT: + case ARM::t2STRi8: + case ARM::t2STRHi8: + case ARM::t2STRBi8: + if (Rn == 15) + return MCDisassembler::Fail; + break; + default: + break; + } + // Some instructions always use an additive offset. switch (Inst.getOpcode()) { case ARM::t2LDRT: @@ -3353,6 +3379,17 @@ static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val, unsigned Rn = fieldFromInstruction(Val, 13, 4); unsigned imm = fieldFromInstruction(Val, 0, 12); + // Thumb stores cannot use PC as dest register. + switch (Inst.getOpcode()) { + case ARM::t2STRi12: + case ARM::t2STRBi12: + case ARM::t2STRHi12: + if (Rn == 15) + return MCDisassembler::Fail; + default: + break; + } + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; Inst.addOperand(MCOperand::CreateImm(imm)); -- cgit v1.1 From ce046b98ed6c351779fc43599a80d588752bc1ca Mon Sep 17 00:00:00 2001 From: Amaury de la Vieuville Date: Tue, 18 Jun 2013 08:03:06 +0000 Subject: ARM: fix thumb literal loads decoding This fixes two previous issues: - Negative offsets were not correctly disassembled - The decoded opcodes were not the right one git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184180 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrThumb2.td | 16 +- lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 223 ++++++++++++++++++++++-- lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp | 28 ++- 3 files changed, 238 insertions(+), 29 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 8b114a8..da296dc 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -959,6 +959,8 @@ multiclass T2I_ld opcod, string opc, let Inst{19-16} = addr{16-13}; // Rn let Inst{15-12} = Rt; let Inst{11-0} = addr{11-0}; // imm + + let DecoderMethod = "DecodeT2LoadImm12"; } def i8 : T2Ii8 <(outs target:$Rt), (ins t2addrmode_negimm8:$addr), iii, opc, "\t$Rt, $addr", @@ -979,6 +981,8 @@ multiclass T2I_ld opcod, string opc, let Inst{9} = addr{8}; // U let Inst{8} = 0; // The W bit. let Inst{7-0} = addr{7-0}; // imm + + let DecoderMethod = "DecodeT2LoadImm8"; } def s : T2Iso <(outs target:$Rt), (ins t2addrmode_so_reg:$addr), iis, opc, ".w\t$Rt, $addr", @@ -1019,6 +1023,8 @@ multiclass T2I_ld opcod, string opc, bits<12> addr; let Inst{15-12} = Rt{3-0}; let Inst{11-0} = addr{11-0}; + + let DecoderMethod = "DecodeT2LoadLabel"; } } @@ -1228,15 +1234,15 @@ defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si, GPR, // Loads with zero extension defm t2LDRH : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - rGPR, UnOpFrag<(zextloadi16 node:$Src)>>; + GPR, UnOpFrag<(zextloadi16 node:$Src)>>; defm t2LDRB : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - rGPR, UnOpFrag<(zextloadi8 node:$Src)>>; + GPR, UnOpFrag<(zextloadi8 node:$Src)>>; // Loads with sign extension defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - rGPR, UnOpFrag<(sextloadi16 node:$Src)>>; + GPR, UnOpFrag<(sextloadi16 node:$Src)>>; defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - rGPR, UnOpFrag<(sextloadi8 node:$Src)>>; + GPR, UnOpFrag<(sextloadi8 node:$Src)>>; let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // Load doubleword @@ -1373,6 +1379,8 @@ class T2IldT type, string opc, InstrItinClass ii> let Inst{11} = 1; let Inst{10-8} = 0b110; // PUW. let Inst{7-0} = addr{7-0}; + + let DecoderMethod = "DecodeT2LoadT"; } def t2LDRT : T2IldT<0, 0b10, "ldrt", IIC_iLoad_i>; diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 196fc32..39a5af9 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -347,6 +347,14 @@ static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn, + uint64_t Address, const void* Decoder); +static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn, + uint64_t Address, const void* Decoder); +static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn, + uint64_t Address, const void* Decoder); +static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn, + uint64_t Address, const void* Decoder); static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val, @@ -3188,19 +3196,9 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - switch (Inst.getOpcode()) { - case ARM::t2PLDs: - case ARM::t2PLDWs: - case ARM::t2PLIs: - break; - default: { - unsigned Rt = fieldFromInstruction(Insn, 12, 4); - if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder))) - return MCDisassembler::Fail; - } - } - + unsigned Rt = fieldFromInstruction(Insn, 12, 4); unsigned Rn = fieldFromInstruction(Insn, 16, 4); + if (Rn == 0xF) { switch (Inst.getOpcode()) { case ARM::t2LDRBs: @@ -3215,19 +3213,32 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn, case ARM::t2LDRSBs: Inst.setOpcode(ARM::t2LDRSBpci); break; - case ARM::t2PLDs: + case ARM::t2LDRs: + Inst.setOpcode(ARM::t2LDRpci); + break; + case ARM::t2PLDs: { Inst.setOpcode(ARM::t2PLDi12); Inst.addOperand(MCOperand::CreateReg(ARM::PC)); - break; + int imm = fieldFromInstruction(Insn, 0, 12); + if (!fieldFromInstruction(Insn, 23, 1)) imm *= -1; + Inst.addOperand(MCOperand::CreateImm(imm)); + return S; + } default: return MCDisassembler::Fail; } - int imm = fieldFromInstruction(Insn, 0, 12); - if (!fieldFromInstruction(Insn, 23, 1)) imm *= -1; - Inst.addOperand(MCOperand::CreateImm(imm)); + return DecodeT2LoadLabel(Inst, Insn, Address, Decoder); + } - return S; + switch (Inst.getOpcode()) { + case ARM::t2PLDs: + case ARM::t2PLDWs: + case ARM::t2PLIs: + break; + default: + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; } unsigned addrmode = fieldFromInstruction(Insn, 4, 2); @@ -3239,6 +3250,154 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn, return S; } +static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn, + uint64_t Address, const void* Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned U = fieldFromInstruction(Insn, 9, 1); + unsigned imm = fieldFromInstruction(Insn, 0, 8); + imm |= (U << 8); + imm |= (Rn << 9); + + if (Rn == 15) { + switch (Inst.getOpcode()) { + case ARM::t2LDRi8: + Inst.setOpcode(ARM::t2LDRpci); + break; + case ARM::t2LDRBi8: + Inst.setOpcode(ARM::t2LDRBpci); + break; + case ARM::t2LDRSBi8: + Inst.setOpcode(ARM::t2LDRSBpci); + break; + case ARM::t2LDRHi8: + Inst.setOpcode(ARM::t2LDRHpci); + break; + case ARM::t2LDRSHi8: + Inst.setOpcode(ARM::t2LDRSHpci); + break; + default: + return MCDisassembler::Fail; + } + return DecodeT2LoadLabel(Inst, Insn, Address, Decoder); + } + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeT2AddrModeImm8(Inst, imm, Address, Decoder))) + return MCDisassembler::Fail; + return S; +} + +static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn, + uint64_t Address, const void* Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned imm = fieldFromInstruction(Insn, 0, 12); + imm |= (Rn << 13); + + if (Rn == 15) { + switch (Inst.getOpcode()) { + case ARM::t2LDRi12: + Inst.setOpcode(ARM::t2LDRpci); + break; + case ARM::t2LDRHi12: + Inst.setOpcode(ARM::t2LDRHpci); + break; + case ARM::t2LDRSHi12: + Inst.setOpcode(ARM::t2LDRSHpci); + break; + case ARM::t2LDRBi12: + Inst.setOpcode(ARM::t2LDRBpci); + break; + case ARM::t2LDRSBi12: + Inst.setOpcode(ARM::t2LDRSBpci); + break; + default: + return MCDisassembler::Fail; + } + return DecodeT2LoadLabel(Inst, Insn, Address, Decoder); + } + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeT2AddrModeImm12(Inst, imm, Address, Decoder))) + return MCDisassembler::Fail; + return S; +} + +static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn, + uint64_t Address, const void* Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned imm = fieldFromInstruction(Insn, 0, 8); + imm |= (Rn << 9); + + if (Rn == 15) { + switch (Inst.getOpcode()) { + case ARM::t2LDRT: + Inst.setOpcode(ARM::t2LDRpci); + break; + case ARM::t2LDRBT: + Inst.setOpcode(ARM::t2LDRBpci); + break; + case ARM::t2LDRHT: + Inst.setOpcode(ARM::t2LDRHpci); + break; + case ARM::t2LDRSBT: + Inst.setOpcode(ARM::t2LDRSBpci); + break; + case ARM::t2LDRSHT: + Inst.setOpcode(ARM::t2LDRSHpci); + break; + default: + return MCDisassembler::Fail; + } + return DecodeT2LoadLabel(Inst, Insn, Address, Decoder); + } + + if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeT2AddrModeImm8(Inst, imm, Address, Decoder))) + return MCDisassembler::Fail; + return S; +} + +static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn, + uint64_t Address, const void* Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned U = fieldFromInstruction(Insn, 23, 1); + int imm = fieldFromInstruction(Insn, 0, 12); + + // FIXME: detect and decode PLD properly + if (Inst.getOpcode() == ARM::t2LDRBpci && Rt == 15) { + Inst.setOpcode(ARM::t2PLDi12); + Inst.addOperand(MCOperand::CreateReg(ARM::PC)); + } else { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + } + + if (!U) { + // Special case for #-0. + if (imm == 0) + imm = INT32_MIN; + else + imm = -imm; + } + Inst.addOperand(MCOperand::CreateImm(imm)); + + return S; +} + static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { if (Val == 0) @@ -3353,6 +3512,34 @@ static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn, addr |= Rn << 9; unsigned load = fieldFromInstruction(Insn, 20, 1); + if (Rn == 15) { + switch (Inst.getOpcode()) { + case ARM::t2LDR_PRE: + case ARM::t2LDR_POST: + Inst.setOpcode(ARM::t2LDRpci); + break; + case ARM::t2LDRB_PRE: + case ARM::t2LDRB_POST: + Inst.setOpcode(ARM::t2LDRBpci); + break; + case ARM::t2LDRH_PRE: + case ARM::t2LDRH_POST: + Inst.setOpcode(ARM::t2LDRHpci); + break; + case ARM::t2LDRSB_PRE: + case ARM::t2LDRSB_POST: + Inst.setOpcode(ARM::t2LDRSBpci); + break; + case ARM::t2LDRSH_PRE: + case ARM::t2LDRSH_POST: + Inst.setOpcode(ARM::t2LDRSHpci); + break; + default: + return MCDisassembler::Fail; + } + return DecodeT2LoadLabel(Inst, Insn, Address, Decoder); + } + if (!load) { if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 0b3d266..0931e59 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -315,15 +315,29 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - if (MO1.isExpr()) + if (MO1.isExpr()) { O << *MO1.getExpr(); - else if (MO1.isImm()) { - O << markup("]>", "]"); + return; } - else - llvm_unreachable("Unknown LDR label operand?"); + + O << markup(""); + } else { + O << markup(""); + } + O << "]" << markup(">"); } // so_reg is a 4-operand unit corresponding to register forms of the A5.1 -- cgit v1.1 From f8b60d6f30a8f25c84a71d36ff3a86fe1f52f671 Mon Sep 17 00:00:00 2001 From: Amaury de la Vieuville Date: Tue, 18 Jun 2013 08:12:51 +0000 Subject: ARM: add operands pre-writeback variants when needed git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184181 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrThumb2.td | 49 ++++++++++++++++---------- lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp | 50 +++++++++++++++------------ lib/Target/ARM/InstPrinter/ARMInstPrinter.h | 2 ++ 3 files changed, 61 insertions(+), 40 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index da296dc..8e19d32 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -176,11 +176,10 @@ def t2adrlabel : Operand { let PrintMethod = "printAdrLabelOperand"; } - // t2addrmode_posimm8 := reg + imm8 def MemPosImm8OffsetAsmOperand : AsmOperandClass {let Name="MemPosImm8Offset";} def t2addrmode_posimm8 : Operand { - let PrintMethod = "printT2AddrModeImm8Operand"; + let PrintMethod = "printT2AddrModeImm8Operand"; let EncoderMethod = "getT2AddrModeImm8OpValue"; let DecoderMethod = "DecodeT2AddrModeImm8"; let ParserMatchClass = MemPosImm8OffsetAsmOperand; @@ -191,7 +190,7 @@ def t2addrmode_posimm8 : Operand { def MemNegImm8OffsetAsmOperand : AsmOperandClass {let Name="MemNegImm8Offset";} def t2addrmode_negimm8 : Operand, ComplexPattern { - let PrintMethod = "printT2AddrModeImm8Operand"; + let PrintMethod = "printT2AddrModeImm8Operand"; let EncoderMethod = "getT2AddrModeImm8OpValue"; let DecoderMethod = "DecodeT2AddrModeImm8"; let ParserMatchClass = MemNegImm8OffsetAsmOperand; @@ -200,15 +199,22 @@ def t2addrmode_negimm8 : Operand, // t2addrmode_imm8 := reg +/- imm8 def MemImm8OffsetAsmOperand : AsmOperandClass { let Name = "MemImm8Offset"; } -def t2addrmode_imm8 : Operand, - ComplexPattern { - let PrintMethod = "printT2AddrModeImm8Operand"; +class T2AddrMode_Imm8 : Operand, + ComplexPattern { let EncoderMethod = "getT2AddrModeImm8OpValue"; let DecoderMethod = "DecodeT2AddrModeImm8"; let ParserMatchClass = MemImm8OffsetAsmOperand; let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); } +def t2addrmode_imm8 : T2AddrMode_Imm8 { + let PrintMethod = "printT2AddrModeImm8Operand"; +} + +def t2addrmode_imm8_pre : T2AddrMode_Imm8 { + let PrintMethod = "printT2AddrModeImm8Operand"; +} + def t2am_imm8_offset : Operand, ComplexPattern { @@ -219,14 +225,21 @@ def t2am_imm8_offset : Operand, // t2addrmode_imm8s4 := reg +/- (imm8 << 2) def MemImm8s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm8s4Offset";} -def t2addrmode_imm8s4 : Operand { - let PrintMethod = "printT2AddrModeImm8s4Operand"; +class T2AddrMode_Imm8s4 : Operand { let EncoderMethod = "getT2AddrModeImm8s4OpValue"; let DecoderMethod = "DecodeT2AddrModeImm8s4"; let ParserMatchClass = MemImm8s4OffsetAsmOperand; let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); } +def t2addrmode_imm8s4 : T2AddrMode_Imm8s4 { + let PrintMethod = "printT2AddrModeImm8s4Operand"; +} + +def t2addrmode_imm8s4_pre : T2AddrMode_Imm8s4 { + let PrintMethod = "printT2AddrModeImm8s4Operand"; +} + def t2am_imm8s4_offset_asmoperand : AsmOperandClass { let Name = "Imm8s4"; } def t2am_imm8s4_offset : Operand { let PrintMethod = "printT2AddrModeImm8s4OffsetOperand"; @@ -1300,7 +1313,7 @@ def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)), let mayLoad = 1, neverHasSideEffects = 1 in { def t2LDR_PRE : T2Ipreldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), - (ins t2addrmode_imm8:$addr), + (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_iu, "ldr", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { @@ -1313,7 +1326,7 @@ def t2LDR_POST : T2Ipostldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), "ldr", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; def t2LDRB_PRE : T2Ipreldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), - (ins t2addrmode_imm8:$addr), + (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { @@ -1325,7 +1338,7 @@ def t2LDRB_POST : T2Ipostldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), "ldrb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; def t2LDRH_PRE : T2Ipreldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), - (ins t2addrmode_imm8:$addr), + (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { @@ -1337,7 +1350,7 @@ def t2LDRH_POST : T2Ipostldst<0, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), "ldrh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; def t2LDRSB_PRE : T2Ipreldst<1, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), - (ins t2addrmode_imm8:$addr), + (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, "ldrsb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { @@ -1349,7 +1362,7 @@ def t2LDRSB_POST : T2Ipostldst<1, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), "ldrsb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; def t2LDRSH_PRE : T2Ipreldst<1, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), - (ins t2addrmode_imm8:$addr), + (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, "ldrsh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { @@ -1407,14 +1420,14 @@ def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs), let mayStore = 1, neverHasSideEffects = 1 in { def t2STR_PRE : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb), - (ins GPRnopc:$Rt, t2addrmode_imm8:$addr), + (ins GPRnopc:$Rt, t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_iu, "str", "\t$Rt, $addr!", "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8"; } def t2STRH_PRE : T2Ipreldst<0, 0b01, 0, 1, (outs GPRnopc:$Rn_wb), - (ins rGPR:$Rt, t2addrmode_imm8:$addr), + (ins rGPR:$Rt, t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_iu, "strh", "\t$Rt, $addr!", "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { @@ -1422,7 +1435,7 @@ def t2STRH_PRE : T2Ipreldst<0, 0b01, 0, 1, (outs GPRnopc:$Rn_wb), } def t2STRB_PRE : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb), - (ins rGPR:$Rt, t2addrmode_imm8:$addr), + (ins rGPR:$Rt, t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu, "strb", "\t$Rt, $addr!", "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { @@ -1514,7 +1527,7 @@ def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>; // For disassembly only. def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb), - (ins t2addrmode_imm8s4:$addr), IIC_iLoad_d_ru, + (ins t2addrmode_imm8s4_pre:$addr), IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []> { let AsmMatchConverter = "cvtT2LdrdPre"; let DecoderMethod = "DecodeT2LDRDPreInstruction"; @@ -1526,7 +1539,7 @@ def t2LDRD_POST : T2Ii8s4post<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb), "$addr.base = $wb", []>; def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs GPR:$wb), - (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr), + (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4_pre:$addr), IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []> { let AsmMatchConverter = "cvtT2StrdPre"; diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 0931e59..62394fa 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -1079,6 +1079,7 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum, O << "]" << markup(">"); } +template void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { @@ -1089,22 +1090,25 @@ void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI, printRegName(O, MO1.getReg()); int32_t OffImm = (int32_t)MO2.getImm(); + bool isSub = OffImm < 0; // Don't print +0. - if (OffImm != 0) - O << ", "; - if (OffImm != 0 && UseMarkup) - O << " 0) - O << "#" << OffImm; - if (OffImm != 0 && UseMarkup) - O << ">"; + OffImm = 0; + if (isSub) { + O << ", " + << markup(""); + } else if (AlwaysPrintImm0 || OffImm > 0) { + O << ", " + << markup(""); + } O << "]" << markup(">"); } +template void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { @@ -1120,22 +1124,24 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI, printRegName(O, MO1.getReg()); int32_t OffImm = (int32_t)MO2.getImm(); + bool isSub = OffImm < 0; assert(((OffImm & 0x3) == 0) && "Not a valid immediate!"); // Don't print +0. - if (OffImm != 0) - O << ", "; - if (OffImm != 0 && UseMarkup) - O << " 0) - O << "#" << OffImm; - if (OffImm != 0 && UseMarkup) - O << ">"; + OffImm = 0; + if (isSub) { + O << ", " + << markup(""); + } else if (AlwaysPrintImm0 || OffImm > 0) { + O << ", " + << markup(""); + } O << "]" << markup(">"); } diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 5a64348..a3ea640 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -97,8 +97,10 @@ public: template void printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + template void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + template void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printT2AddrModeImm0_1020s4Operand(const MCInst *MI, unsigned OpNum, -- cgit v1.1 From beb920fce6ccc89b4735f280f94cb8c227f4ef5e Mon Sep 17 00:00:00 2001 From: Amaury de la Vieuville Date: Tue, 18 Jun 2013 08:13:05 +0000 Subject: ARM: fix literal load with positive offset encoding When using a positive offset, literal loads where encoded as if it was negative, because: - The sign bit was not assigned to an operand - The addrmode_imm12 operand was not encoding the sign bit correctly This patch also makes the assembler look at the .w/.n specifier for loads. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184182 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrThumb2.td | 6 +++--- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 4 +++- lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 8e19d32..2693f32 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1024,16 +1024,16 @@ multiclass T2I_ld opcod, string opc, def pci : T2Ipc <(outs target:$Rt), (ins t2ldrlabel:$addr), iii, opc, ".w\t$Rt, $addr", [(set target:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> { + bits<4> Rt; + bits<13> addr; let isReMaterializable = 1; let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; let Inst{24} = signed; - let Inst{23} = ?; // add = (U == '1') + let Inst{23} = addr{12}; // add = (U == '1') let Inst{22-21} = opcod; let Inst{20} = 1; // load let Inst{19-16} = 0b1111; // Rn - bits<4> Rt; - bits<12> addr; let Inst{15-12} = Rt{3-0}; let Inst{11-0} = addr{11-0}; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index e315d16..170d434 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -5862,7 +5862,9 @@ processInstruction(MCInst &Inst, case ARM::t2LDRpcrel: // Select the narrow version if the immediate will fit. if (Inst.getOperand(1).getImm() > 0 && - Inst.getOperand(1).getImm() <= 0xff) + Inst.getOperand(1).getImm() <= 0xff && + !(static_cast(Operands[2])->isToken() && + static_cast(Operands[2])->getToken() == ".w")) Inst.setOpcode(ARM::tLDRpci); else Inst.setOpcode(ARM::t2LDRpci); diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index f324bc2..8631d81 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -743,10 +743,10 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx, if (!MO.isReg()) { Reg = CTX.getRegisterInfo()->getEncodingValue(ARM::PC); // Rn is PC. Imm12 = 0; - isAdd = false ; // 'U' bit is set as part of the fixup. if (MO.isExpr()) { const MCExpr *Expr = MO.getExpr(); + isAdd = false ; // 'U' bit is set as part of the fixup. MCFixupKind Kind; if (isThumb2()) -- cgit v1.1 From 23306deb92e2424165f2145895e21e223c3887eb Mon Sep 17 00:00:00 2001 From: Stefanus Du Toit Date: Tue, 18 Jun 2013 17:08:10 +0000 Subject: Add support for encoding the HLE XACQUIRE and XRELEASE prefixes. For decoding, keep the current behavior of always decoding these as their REP versions. In the future, this could be improved to recognize the cases where these behave as XACQUIRE and XRELEASE and decode them as such. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184207 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrTSX.td | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrTSX.td b/lib/Target/X86/X86InstrTSX.td index 363a190..59a6f1e 100644 --- a/lib/Target/X86/X86InstrTSX.td +++ b/lib/Target/X86/X86InstrTSX.td @@ -37,3 +37,10 @@ def XTEST : I<0x01, MRM_D6, (outs), (ins), def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm), "xabort\t$imm", [(int_x86_xabort imm:$imm)]>, Requires<[HasRTM]>; + +// HLE prefixes + +def XACQUIRE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "xacquire", []>, Requires<[HasHLE]>; + +def XRELEASE_PREFIX : I<0xF3, RawFrm, (outs), (ins), "xrelease", []>, Requires<[HasHLE]>; + -- cgit v1.1 From 13769fa725b03a937335cf04d2b9cc1ca426060f Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Tue, 18 Jun 2013 18:03:17 +0000 Subject: Reduce indentation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184213 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMAsmPrinter.cpp | 108 ++++++++++++++++++++------------------- 1 file changed, 55 insertions(+), 53 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 787daba..d917009 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -216,61 +216,63 @@ namespace { /// EmitDwarfRegOp - Emit dwarf register operation. void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { const TargetRegisterInfo *RI = TM.getRegisterInfo(); - if (RI->getDwarfRegNum(MLoc.getReg(), false) != -1) + if (RI->getDwarfRegNum(MLoc.getReg(), false) != -1) { AsmPrinter::EmitDwarfRegOp(MLoc); - else { - unsigned Reg = MLoc.getReg(); - if (Reg >= ARM::S0 && Reg <= ARM::S31) { - assert(ARM::S0 + 31 == ARM::S31 && "Unexpected ARM S register numbering"); - // S registers are described as bit-pieces of a register - // S[2x] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 0) - // S[2x+1] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 32) - - unsigned SReg = Reg - ARM::S0; - bool odd = SReg & 0x1; - unsigned Rx = 256 + (SReg >> 1); - - OutStreamer.AddComment("DW_OP_regx for S register"); - EmitInt8(dwarf::DW_OP_regx); - - OutStreamer.AddComment(Twine(SReg)); - EmitULEB128(Rx); - - if (odd) { - OutStreamer.AddComment("DW_OP_bit_piece 32 32"); - EmitInt8(dwarf::DW_OP_bit_piece); - EmitULEB128(32); - EmitULEB128(32); - } else { - OutStreamer.AddComment("DW_OP_bit_piece 32 0"); - EmitInt8(dwarf::DW_OP_bit_piece); - EmitULEB128(32); - EmitULEB128(0); - } - } else if (Reg >= ARM::Q0 && Reg <= ARM::Q15) { - assert(ARM::Q0 + 15 == ARM::Q15 && "Unexpected ARM Q register numbering"); - // Q registers Q0-Q15 are described by composing two D registers together. - // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1) - // DW_OP_piece(8) - - unsigned QReg = Reg - ARM::Q0; - unsigned D1 = 256 + 2 * QReg; - unsigned D2 = D1 + 1; - - OutStreamer.AddComment("DW_OP_regx for Q register: D1"); - EmitInt8(dwarf::DW_OP_regx); - EmitULEB128(D1); - OutStreamer.AddComment("DW_OP_piece 8"); - EmitInt8(dwarf::DW_OP_piece); - EmitULEB128(8); - - OutStreamer.AddComment("DW_OP_regx for Q register: D2"); - EmitInt8(dwarf::DW_OP_regx); - EmitULEB128(D2); - OutStreamer.AddComment("DW_OP_piece 8"); - EmitInt8(dwarf::DW_OP_piece); - EmitULEB128(8); + return; + } + assert(MLoc.isReg() && + "This doesn't support offset/indirection - implement it if needed"); + unsigned Reg = MLoc.getReg(); + if (Reg >= ARM::S0 && Reg <= ARM::S31) { + assert(ARM::S0 + 31 == ARM::S31 && "Unexpected ARM S register numbering"); + // S registers are described as bit-pieces of a register + // S[2x] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 0) + // S[2x+1] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 32) + + unsigned SReg = Reg - ARM::S0; + bool odd = SReg & 0x1; + unsigned Rx = 256 + (SReg >> 1); + + OutStreamer.AddComment("DW_OP_regx for S register"); + EmitInt8(dwarf::DW_OP_regx); + + OutStreamer.AddComment(Twine(SReg)); + EmitULEB128(Rx); + + if (odd) { + OutStreamer.AddComment("DW_OP_bit_piece 32 32"); + EmitInt8(dwarf::DW_OP_bit_piece); + EmitULEB128(32); + EmitULEB128(32); + } else { + OutStreamer.AddComment("DW_OP_bit_piece 32 0"); + EmitInt8(dwarf::DW_OP_bit_piece); + EmitULEB128(32); + EmitULEB128(0); } + } else if (Reg >= ARM::Q0 && Reg <= ARM::Q15) { + assert(ARM::Q0 + 15 == ARM::Q15 && "Unexpected ARM Q register numbering"); + // Q registers Q0-Q15 are described by composing two D registers together. + // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1) + // DW_OP_piece(8) + + unsigned QReg = Reg - ARM::Q0; + unsigned D1 = 256 + 2 * QReg; + unsigned D2 = D1 + 1; + + OutStreamer.AddComment("DW_OP_regx for Q register: D1"); + EmitInt8(dwarf::DW_OP_regx); + EmitULEB128(D1); + OutStreamer.AddComment("DW_OP_piece 8"); + EmitInt8(dwarf::DW_OP_piece); + EmitULEB128(8); + + OutStreamer.AddComment("DW_OP_regx for Q register: D2"); + EmitInt8(dwarf::DW_OP_regx); + EmitULEB128(D2); + OutStreamer.AddComment("DW_OP_piece 8"); + EmitInt8(dwarf::DW_OP_piece); + EmitULEB128(8); } } -- cgit v1.1 From 571dd98ea4d6bf911c3b46a20ca3b5e3b341b21f Mon Sep 17 00:00:00 2001 From: Jack Carter Date: Tue, 18 Jun 2013 19:47:15 +0000 Subject: Mips ELF: Mark object file as ABI compliant When producing objects that are abi compliant we are marking neither the object file nor the assembly file correctly and thus generate warnings. We need to set the EF_CPIC flag in the ELF header when generating direct object. Note that the warning is only generated when compiling without PIC. When compiling with clang the warning will be suppressed by supplying: -Wa,-mno-shared -Wa,-call_nonpic Also the following directive should also be added: .option pic0 when compiling without PIC, This eliminates the need for supplying: -mno-shared -call_nonpic on the assembler command line. Patch by Douglas Gilmore git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184220 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp | 4 ++++ lib/Target/Mips/MipsAsmPrinter.cpp | 9 +++++++++ 2 files changed, 13 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp index c33bc9a..cfcb877 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp @@ -36,6 +36,10 @@ namespace llvm { MCAssembler& MCA = getAssembler(); unsigned EFlags = MCA.getELFHeaderEFlags(); + // TODO: Need to add -mabicalls and -mno-abicalls flags. + // Currently we assume that -mabicalls is the default. + EFlags |= ELF::EF_MIPS_CPIC; + if (Subtarget.inMips16Mode()) EFlags |= ELF::EF_MIPS_ARCH_ASE_M16; else diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index 638001b..bab70af 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -557,6 +557,15 @@ printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O, void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) { // FIXME: Use SwitchSection. + // TODO: Need to add -mabicalls and -mno-abicalls flags. + // Currently we assume that -mabicalls is the default. + if (OutStreamer.hasRawTextSupport()) { + OutStreamer.EmitRawText(StringRef("\t.abicalls")); + Reloc::Model RM = Subtarget->getRelocationModel(); + if (RM == Reloc::Static) + OutStreamer.EmitRawText(StringRef("\t.option\tpic0")); + } + // Tell the assembler which ABI we are using if (OutStreamer.hasRawTextSupport()) OutStreamer.EmitRawText("\t.section .mdebug." + -- cgit v1.1 From bf811d602d1d81b93846c6cbbd1cec85f2f153cb Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Tue, 18 Jun 2013 20:19:24 +0000 Subject: Change the arm assembler to support this from the v7c spec: "When assembling to the ARM instruction set, the .N qualifier produces an assembler error and the .W qualifier has no effect." In the pre-matcher handler in the asm parser the ".w" (wide) qualifier when in ARM mode is now discarded. And an error message is now produced when the ".n" (narrow) qualifier is used in ARM mode. Test cases for these were added. rdar://14064574 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184224 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 170d434..647fdb3 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -5266,7 +5266,17 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, doesIgnoreDataTypeSuffix(Mnemonic, ExtraToken)) continue; - if (ExtraToken != ".n") { + // For for ARM mode generate an error if the .n qualifier is used. + if (ExtraToken == ".n" && !isThumb()) { + SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Start); + return Error(Loc, "instruction with .n (narrow) qualifier not allowed in " + "arm mode"); + } + + // The .n qualifier is always discarded as that is what the tables + // and matcher expect. In ARM mode the .w qualifier has no effect, + // so discard it to avoid errors that can be caused by the matcher. + if (ExtraToken != ".n" && (isThumb() || ExtraToken != ".w")) { SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Start); Operands.push_back(ARMOperand::CreateToken(ExtraToken, Loc)); } -- cgit v1.1 From ef2d9e31940fc3121646e15effdfcc8f7f5e239b Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Tue, 18 Jun 2013 20:41:52 +0000 Subject: Fix 80 col violation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184228 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86TargetTransformInfo.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index eba9d78..df6f37b 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -467,19 +467,22 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, }; if (ST->hasAVX2()) { - int Idx = CostTableLookup(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy); + int Idx = CostTableLookup(AVX2CostTbl, array_lengthof(AVX2CostTbl), + ISD, MTy); if (Idx != -1) return LT.first * AVX2CostTbl[Idx].Cost; } if (ST->hasAVX()) { - int Idx = CostTableLookup(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy); + int Idx = CostTableLookup(AVX1CostTbl, array_lengthof(AVX1CostTbl), + ISD, MTy); if (Idx != -1) return LT.first * AVX1CostTbl[Idx].Cost; } if (ST->hasSSE42()) { - int Idx = CostTableLookup(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy); + int Idx = CostTableLookup(SSE42CostTbl, array_lengthof(SSE42CostTbl), + ISD, MTy); if (Idx != -1) return LT.first * SSE42CostTbl[Idx].Cost; } -- cgit v1.1 From 8493edfb4b61e5c63669fc19a55b640e1ad7aee1 Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Tue, 18 Jun 2013 20:49:40 +0000 Subject: Converted an overly aggressive assert to a conditional check in AddCombineTo64bitMLAL. Said assert assumes that ADDC will always have a glue node as its second argument and is checked before we even know that we are actually performing the relevant MLAL optimization. This is incorrect since on ARM we *CAN* codegen ADDC with a use list based second argument. Thus to have both effects, I converted the assert to a conditional check which if it fails we do not perform the optimization. In terms of tests I can not produce an ADDC from the IR level until I get in my multiprecision optimization patch which is forthcoming. The tests for said patch would cause this assert to fail implying that said tests will provide the relevant tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184230 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index ec0e9c2..015b023 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -7948,8 +7948,11 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, assert(AddcNode->getNumValues() == 2 && AddcNode->getValueType(0) == MVT::i32 && - AddcNode->getValueType(1) == MVT::Glue && - "Expect ADDC with two result values: i32, glue"); + "Expect ADDC with two result values. First: i32"); + + // Check that we have a glued ADDC node. + if (AddcNode->getValueType(1) != MVT::Glue) + return SDValue(); // Check that the ADDC adds the low result of the S/UMUL_LOHI. if (AddcOp0->getOpcode() != ISD::UMUL_LOHI && -- cgit v1.1 From 41502e1af77443c31138cee309bd89898f23e33a Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Tue, 18 Jun 2013 20:49:45 +0000 Subject: [ARMTargetLowering] ARMISD::{SUB,ADD}{C,E} second result is a boolean implying that upper bits are always 0. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184231 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 015b023..a63cb27 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -10184,9 +10184,19 @@ void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { - KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); + unsigned BitWidth = KnownOne.getBitWidth(); + KnownZero = KnownOne = APInt(BitWidth, 0); switch (Op.getOpcode()) { default: break; + case ARMISD::ADDC: + case ARMISD::ADDE: + case ARMISD::SUBC: + case ARMISD::SUBE: + // These nodes' second result is a boolean + if (Op.getResNo() == 0) + break; + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); + break; case ARMISD::CMOV: { // Bits are known zero/one if known on the LHS and RHS. DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); -- cgit v1.1 From a95e3091eb9ba5010f6c5f6f51958b01ca3a85f6 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Tue, 18 Jun 2013 21:49:21 +0000 Subject: ARM: Add optional datatype suffix to NEON mvn asm syntax. rdar://14194152 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184244 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 9d1a8ea..32b8d67 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -6698,12 +6698,17 @@ def VST4qWB_register_Asm_32 : (ins VecListFourQ:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -// VMOV takes an optional datatype suffix +// VMOV/VMVN takes an optional datatype suffix defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", + (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", + (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>; + // VCLT (register) is an assembler alias for VCGT w/ the operands reversed. // D-register versions. def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", -- cgit v1.1 From 70a3dc1bc2b16748a8a79d1d11d2b1603c38c032 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 18 Jun 2013 23:37:58 +0000 Subject: Use GetUnderlyingObject instead of custom function git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184261 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDILISelDAGToDAG.cpp | 78 +++++++++-------------------------- 1 file changed, 20 insertions(+), 58 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp index 9a5e9e6..0f64496 100644 --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp @@ -17,6 +17,7 @@ #include "R600InstrInfo.h" #include "SIISelLowering.h" #include "llvm/ADT/ValueMap.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -59,20 +60,19 @@ private: bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2); static bool checkType(const Value *ptr, unsigned int addrspace); - static const Value *getBasePointerValue(const Value *V); static bool isGlobalStore(const StoreSDNode *N); static bool isPrivateStore(const StoreSDNode *N); static bool isLocalStore(const StoreSDNode *N); static bool isRegionStore(const StoreSDNode *N); - static bool isCPLoad(const LoadSDNode *N); - static bool isConstantLoad(const LoadSDNode *N, int cbID); - static bool isGlobalLoad(const LoadSDNode *N); - static bool isParamLoad(const LoadSDNode *N); - static bool isPrivateLoad(const LoadSDNode *N); - static bool isLocalLoad(const LoadSDNode *N); - static bool isRegionLoad(const LoadSDNode *N); + bool isCPLoad(const LoadSDNode *N) const; + bool isConstantLoad(const LoadSDNode *N, int cbID) const; + bool isGlobalLoad(const LoadSDNode *N) const; + bool isParamLoad(const LoadSDNode *N) const; + bool isPrivateLoad(const LoadSDNode *N) const; + bool isLocalLoad(const LoadSDNode *N) const; + bool isRegionLoad(const LoadSDNode *N) const; bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); bool SelectGlobalValueVariableOffset(SDValue Addr, @@ -332,7 +332,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size()); } } while (IsModified); - + } if (Result && Result->isMachineOpcode() && !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR) @@ -542,46 +542,6 @@ bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) { return dyn_cast(ptrType)->getAddressSpace() == addrspace; } -const Value * AMDGPUDAGToDAGISel::getBasePointerValue(const Value *V) { - if (!V) { - return NULL; - } - const Value *ret = NULL; - ValueMap ValueBitMap; - std::queue > ValueQueue; - ValueQueue.push(V); - while (!ValueQueue.empty()) { - V = ValueQueue.front(); - if (ValueBitMap.find(V) == ValueBitMap.end()) { - ValueBitMap[V] = true; - if (dyn_cast(V) && dyn_cast(V->getType())) { - ret = V; - break; - } else if (dyn_cast(V)) { - ret = V; - break; - } else if (dyn_cast(V)) { - const ConstantExpr *CE = dyn_cast(V); - if (CE) { - ValueQueue.push(CE->getOperand(0)); - } - } else if (const AllocaInst *AI = dyn_cast(V)) { - ret = AI; - break; - } else if (const Instruction *I = dyn_cast(V)) { - uint32_t numOps = I->getNumOperands(); - for (uint32_t x = 0; x < numOps; ++x) { - ValueQueue.push(I->getOperand(x)); - } - } else { - assert(!"Found a Value that we didn't know how to handle!"); - } - } - ValueQueue.pop(); - } - return ret; -} - bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) { return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS); } @@ -600,41 +560,43 @@ bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) { return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS); } -bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) { +bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) const { if (checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)) { return true; } + + const DataLayout *DL = TM.getDataLayout(); MachineMemOperand *MMO = N->getMemOperand(); const Value *V = MMO->getValue(); - const Value *BV = getBasePointerValue(V); + const Value *BV = GetUnderlyingObject(V, DL, 0); if (MMO && MMO->getValue() && ((V && dyn_cast(V)) || (BV && dyn_cast( - getBasePointerValue(MMO->getValue()))))) { + GetUnderlyingObject(MMO->getValue(), DL, 0))))) { return checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS); } else { return false; } } -bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) { +bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const { return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS); } -bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) { +bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const { return checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS); } -bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) { +bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const { return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS); } -bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) { +bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) const { return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS); } -bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) { +bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const { MachineMemOperand *MMO = N->getMemOperand(); if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) { if (MMO) { @@ -648,7 +610,7 @@ bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) { return false; } -bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) { +bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const { if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) { // Check to make sure we are not a constant pool load or a constant load // that is marked as a private load -- cgit v1.1 From dd5fe2ffc6f564192876065d2617ecbc18d03f23 Mon Sep 17 00:00:00 2001 From: Vladimir Medic Date: Wed, 19 Jun 2013 10:14:36 +0000 Subject: The RenderMethod field in RegisterOperand class sets the name of the method on the target specific operand to call to add the target specific operand to an MCInst. This patch defines RenderMethod for mips RegisterOperand classes and removes redundant code from MipsAsmParser.cpp git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184292 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 15 +-------------- lib/Target/Mips/MipsRegisterInfo.td | 14 +++++++++----- 2 files changed, 10 insertions(+), 19 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index ab23d9f..e810480 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -357,36 +357,23 @@ public: bool isCPURegsAsm() const { return Kind == k_Register && Reg.Kind == Kind_CPURegs; } - void addCPURegsAsmOperands(MCInst &Inst, unsigned N) const { + void addRegAsmOperands(MCInst &Inst, unsigned N) const { Inst.addOperand(MCOperand::CreateReg(Reg.RegNum)); } bool isCPU64RegsAsm() const { return Kind == k_Register && Reg.Kind == Kind_CPU64Regs; } - void addCPU64RegsAsmOperands(MCInst &Inst, unsigned N) const { - Inst.addOperand(MCOperand::CreateReg(Reg.RegNum)); - } bool isHWRegsAsm() const { assert((Kind == k_Register) && "Invalid access!"); return Reg.Kind == Kind_HWRegs; } - void addHWRegsAsmOperands(MCInst &Inst, unsigned N) const { - Inst.addOperand(MCOperand::CreateReg(Reg.RegNum)); - } bool isHW64RegsAsm() const { assert((Kind == k_Register) && "Invalid access!"); return Reg.Kind == Kind_HW64Regs; } - void addHW64RegsAsmOperands(MCInst &Inst, unsigned N) const { - Inst.addOperand(MCOperand::CreateReg(Reg.RegNum)); - } - - void addCCRAsmOperands(MCInst &Inst, unsigned N) const { - Inst.addOperand(MCOperand::CreateReg(Reg.RegNum)); - } bool isCCRAsm() const { assert((Kind == k_Register) && "Invalid access!"); diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index ad6912c..d98cb21 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -388,17 +388,21 @@ def ACRegsDSP : RegisterClass<"Mips", [untyped], 64, (sequence "AC%u", 0, 3)> { def DSPCC : RegisterClass<"Mips", [v4i8, v2i16], 32, (add DSPCCond)>; // Register Operands. -def CPURegsAsmOperand : AsmOperandClass { + +class MipsAsmRegOperand : AsmOperandClass { + let RenderMethod = "addRegAsmOperands"; +} +def CPURegsAsmOperand : MipsAsmRegOperand { let Name = "CPURegsAsm"; let ParserMethod = "parseCPURegs"; } -def CPU64RegsAsmOperand : AsmOperandClass { +def CPU64RegsAsmOperand : MipsAsmRegOperand { let Name = "CPU64RegsAsm"; let ParserMethod = "parseCPU64Regs"; } -def CCRAsmOperand : AsmOperandClass { +def CCRAsmOperand : MipsAsmRegOperand { let Name = "CCRAsm"; let ParserMethod = "parseCCRRegs"; } @@ -415,12 +419,12 @@ def CCROpnd : RegisterOperand { let ParserMatchClass = CCRAsmOperand; } -def HWRegsAsmOperand : AsmOperandClass { +def HWRegsAsmOperand : MipsAsmRegOperand { let Name = "HWRegsAsm"; let ParserMethod = "parseHWRegs"; } -def HW64RegsAsmOperand : AsmOperandClass { +def HW64RegsAsmOperand : MipsAsmRegOperand { let Name = "HW64RegsAsm"; let ParserMethod = "parseHW64Regs"; } -- cgit v1.1 From ad966ea7a81a538425d5319f6d8568e460639e54 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 19 Jun 2013 20:18:24 +0000 Subject: Move StructurizeCFG out of R600 to generic Transforms. Register it with PassManager git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184343 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUStructurizeCFG.cpp | 896 ------------------------------- lib/Target/R600/AMDGPUTargetMachine.cpp | 2 +- lib/Target/R600/CMakeLists.txt | 1 - 3 files changed, 1 insertion(+), 898 deletions(-) delete mode 100644 lib/Target/R600/AMDGPUStructurizeCFG.cpp (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/lib/Target/R600/AMDGPUStructurizeCFG.cpp deleted file mode 100644 index d26783d..0000000 --- a/lib/Target/R600/AMDGPUStructurizeCFG.cpp +++ /dev/null @@ -1,896 +0,0 @@ -//===-- AMDGPUStructurizeCFG.cpp - ------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -/// \file -/// The pass implemented in this file transforms the programs control flow -/// graph into a form that's suitable for code generation on hardware that -/// implements control flow by execution masking. This currently includes all -/// AMD GPUs but may as well be useful for other types of hardware. -// -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "llvm/ADT/MapVector.h" -#include "llvm/ADT/SCCIterator.h" -#include "llvm/Analysis/RegionInfo.h" -#include "llvm/Analysis/RegionIterator.h" -#include "llvm/Analysis/RegionPass.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/PatternMatch.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" - -using namespace llvm; -using namespace llvm::PatternMatch; - -namespace { - -// Definition of the complex types used in this pass. - -typedef std::pair BBValuePair; - -typedef SmallVector RNVector; -typedef SmallVector BBVector; -typedef SmallVector BranchVector; -typedef SmallVector BBValueVector; - -typedef SmallPtrSet BBSet; - -typedef MapVector PhiMap; -typedef MapVector BB2BBVecMap; - -typedef DenseMap DTN2UnsignedMap; -typedef DenseMap BBPhiMap; -typedef DenseMap BBPredicates; -typedef DenseMap PredMap; -typedef DenseMap BB2BBMap; - -// The name for newly created blocks. - -static const char *FlowBlockName = "Flow"; - -/// @brief Find the nearest common dominator for multiple BasicBlocks -/// -/// Helper class for AMDGPUStructurizeCFG -/// TODO: Maybe move into common code -class NearestCommonDominator { - - DominatorTree *DT; - - DTN2UnsignedMap IndexMap; - - BasicBlock *Result; - unsigned ResultIndex; - bool ExplicitMentioned; - -public: - /// \brief Start a new query - NearestCommonDominator(DominatorTree *DomTree) { - DT = DomTree; - Result = 0; - } - - /// \brief Add BB to the resulting dominator - void addBlock(BasicBlock *BB, bool Remember = true) { - - DomTreeNode *Node = DT->getNode(BB); - - if (Result == 0) { - unsigned Numbering = 0; - for (;Node;Node = Node->getIDom()) - IndexMap[Node] = ++Numbering; - Result = BB; - ResultIndex = 1; - ExplicitMentioned = Remember; - return; - } - - for (;Node;Node = Node->getIDom()) - if (IndexMap.count(Node)) - break; - else - IndexMap[Node] = 0; - - assert(Node && "Dominator tree invalid!"); - - unsigned Numbering = IndexMap[Node]; - if (Numbering > ResultIndex) { - Result = Node->getBlock(); - ResultIndex = Numbering; - ExplicitMentioned = Remember && (Result == BB); - } else if (Numbering == ResultIndex) { - ExplicitMentioned |= Remember; - } - } - - /// \brief Is "Result" one of the BBs added with "Remember" = True? - bool wasResultExplicitMentioned() { - return ExplicitMentioned; - } - - /// \brief Get the query result - BasicBlock *getResult() { - return Result; - } -}; - -/// @brief Transforms the control flow graph on one single entry/exit region -/// at a time. -/// -/// After the transform all "If"/"Then"/"Else" style control flow looks like -/// this: -/// -/// \verbatim -/// 1 -/// || -/// | | -/// 2 | -/// | / -/// |/ -/// 3 -/// || Where: -/// | | 1 = "If" block, calculates the condition -/// 4 | 2 = "Then" subregion, runs if the condition is true -/// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow -/// |/ 4 = "Else" optional subregion, runs if the condition is false -/// 5 5 = "End" block, also rejoins the control flow -/// \endverbatim -/// -/// Control flow is expressed as a branch where the true exit goes into the -/// "Then"/"Else" region, while the false exit skips the region -/// The condition for the optional "Else" region is expressed as a PHI node. -/// The incomming values of the PHI node are true for the "If" edge and false -/// for the "Then" edge. -/// -/// Additionally to that even complicated loops look like this: -/// -/// \verbatim -/// 1 -/// || -/// | | -/// 2 ^ Where: -/// | / 1 = "Entry" block -/// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block -/// 3 3 = "Flow" block, with back edge to entry block -/// | -/// \endverbatim -/// -/// The back edge of the "Flow" block is always on the false side of the branch -/// while the true side continues the general flow. So the loop condition -/// consist of a network of PHI nodes where the true incoming values expresses -/// breaks and the false values expresses continue states. -class AMDGPUStructurizeCFG : public RegionPass { - - static char ID; - - Type *Boolean; - ConstantInt *BoolTrue; - ConstantInt *BoolFalse; - UndefValue *BoolUndef; - - Function *Func; - Region *ParentRegion; - - DominatorTree *DT; - - RNVector Order; - BBSet Visited; - - BBPhiMap DeletedPhis; - BB2BBVecMap AddedPhis; - - PredMap Predicates; - BranchVector Conditions; - - BB2BBMap Loops; - PredMap LoopPreds; - BranchVector LoopConds; - - RegionNode *PrevNode; - - void orderNodes(); - - void analyzeLoops(RegionNode *N); - - Value *invert(Value *Condition); - - Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert); - - void gatherPredicates(RegionNode *N); - - void collectInfos(); - - void insertConditions(bool Loops); - - void delPhiValues(BasicBlock *From, BasicBlock *To); - - void addPhiValues(BasicBlock *From, BasicBlock *To); - - void setPhiValues(); - - void killTerminator(BasicBlock *BB); - - void changeExit(RegionNode *Node, BasicBlock *NewExit, - bool IncludeDominator); - - BasicBlock *getNextFlow(BasicBlock *Dominator); - - BasicBlock *needPrefix(bool NeedEmpty); - - BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed); - - void setPrevNode(BasicBlock *BB); - - bool dominatesPredicates(BasicBlock *BB, RegionNode *Node); - - bool isPredictableTrue(RegionNode *Node); - - void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd); - - void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd); - - void createFlow(); - - void rebuildSSA(); - -public: - AMDGPUStructurizeCFG(): - RegionPass(ID) { - - initializeRegionInfoPass(*PassRegistry::getPassRegistry()); - } - - using Pass::doInitialization; - virtual bool doInitialization(Region *R, RGPassManager &RGM); - - virtual bool runOnRegion(Region *R, RGPassManager &RGM); - - virtual const char *getPassName() const { - return "AMDGPU simplify control flow"; - } - - void getAnalysisUsage(AnalysisUsage &AU) const { - - AU.addRequired(); - AU.addPreserved(); - RegionPass::getAnalysisUsage(AU); - } - -}; - -} // end anonymous namespace - -char AMDGPUStructurizeCFG::ID = 0; - -/// \brief Initialize the types and constants used in the pass -bool AMDGPUStructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) { - LLVMContext &Context = R->getEntry()->getContext(); - - Boolean = Type::getInt1Ty(Context); - BoolTrue = ConstantInt::getTrue(Context); - BoolFalse = ConstantInt::getFalse(Context); - BoolUndef = UndefValue::get(Boolean); - - return false; -} - -/// \brief Build up the general order of nodes -void AMDGPUStructurizeCFG::orderNodes() { - scc_iterator I = scc_begin(ParentRegion), - E = scc_end(ParentRegion); - for (Order.clear(); I != E; ++I) { - std::vector &Nodes = *I; - Order.append(Nodes.begin(), Nodes.end()); - } -} - -/// \brief Determine the end of the loops -void AMDGPUStructurizeCFG::analyzeLoops(RegionNode *N) { - - if (N->isSubRegion()) { - // Test for exit as back edge - BasicBlock *Exit = N->getNodeAs()->getExit(); - if (Visited.count(Exit)) - Loops[Exit] = N->getEntry(); - - } else { - // Test for sucessors as back edge - BasicBlock *BB = N->getNodeAs(); - BranchInst *Term = cast(BB->getTerminator()); - - for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { - BasicBlock *Succ = Term->getSuccessor(i); - - if (Visited.count(Succ)) - Loops[Succ] = BB; - } - } -} - -/// \brief Invert the given condition -Value *AMDGPUStructurizeCFG::invert(Value *Condition) { - - // First: Check if it's a constant - if (Condition == BoolTrue) - return BoolFalse; - - if (Condition == BoolFalse) - return BoolTrue; - - if (Condition == BoolUndef) - return BoolUndef; - - // Second: If the condition is already inverted, return the original value - if (match(Condition, m_Not(m_Value(Condition)))) - return Condition; - - // Third: Check all the users for an invert - BasicBlock *Parent = cast(Condition)->getParent(); - for (Value::use_iterator I = Condition->use_begin(), - E = Condition->use_end(); I != E; ++I) { - - Instruction *User = dyn_cast(*I); - if (!User || User->getParent() != Parent) - continue; - - if (match(*I, m_Not(m_Specific(Condition)))) - return *I; - } - - // Last option: Create a new instruction - return BinaryOperator::CreateNot(Condition, "", Parent->getTerminator()); -} - -/// \brief Build the condition for one edge -Value *AMDGPUStructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx, - bool Invert) { - Value *Cond = Invert ? BoolFalse : BoolTrue; - if (Term->isConditional()) { - Cond = Term->getCondition(); - - if (Idx != (unsigned)Invert) - Cond = invert(Cond); - } - return Cond; -} - -/// \brief Analyze the predecessors of each block and build up predicates -void AMDGPUStructurizeCFG::gatherPredicates(RegionNode *N) { - - RegionInfo *RI = ParentRegion->getRegionInfo(); - BasicBlock *BB = N->getEntry(); - BBPredicates &Pred = Predicates[BB]; - BBPredicates &LPred = LoopPreds[BB]; - - for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); - PI != PE; ++PI) { - - // Ignore it if it's a branch from outside into our region entry - if (!ParentRegion->contains(*PI)) - continue; - - Region *R = RI->getRegionFor(*PI); - if (R == ParentRegion) { - - // It's a top level block in our region - BranchInst *Term = cast((*PI)->getTerminator()); - for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { - BasicBlock *Succ = Term->getSuccessor(i); - if (Succ != BB) - continue; - - if (Visited.count(*PI)) { - // Normal forward edge - if (Term->isConditional()) { - // Try to treat it like an ELSE block - BasicBlock *Other = Term->getSuccessor(!i); - if (Visited.count(Other) && !Loops.count(Other) && - !Pred.count(Other) && !Pred.count(*PI)) { - - Pred[Other] = BoolFalse; - Pred[*PI] = BoolTrue; - continue; - } - } - Pred[*PI] = buildCondition(Term, i, false); - - } else { - // Back edge - LPred[*PI] = buildCondition(Term, i, true); - } - } - - } else { - - // It's an exit from a sub region - while(R->getParent() != ParentRegion) - R = R->getParent(); - - // Edge from inside a subregion to its entry, ignore it - if (R == N) - continue; - - BasicBlock *Entry = R->getEntry(); - if (Visited.count(Entry)) - Pred[Entry] = BoolTrue; - else - LPred[Entry] = BoolFalse; - } - } -} - -/// \brief Collect various loop and predicate infos -void AMDGPUStructurizeCFG::collectInfos() { - - // Reset predicate - Predicates.clear(); - - // and loop infos - Loops.clear(); - LoopPreds.clear(); - - // Reset the visited nodes - Visited.clear(); - - for (RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend(); - OI != OE; ++OI) { - - // Analyze all the conditions leading to a node - gatherPredicates(*OI); - - // Remember that we've seen this node - Visited.insert((*OI)->getEntry()); - - // Find the last back edges - analyzeLoops(*OI); - } -} - -/// \brief Insert the missing branch conditions -void AMDGPUStructurizeCFG::insertConditions(bool Loops) { - BranchVector &Conds = Loops ? LoopConds : Conditions; - Value *Default = Loops ? BoolTrue : BoolFalse; - SSAUpdater PhiInserter; - - for (BranchVector::iterator I = Conds.begin(), - E = Conds.end(); I != E; ++I) { - - BranchInst *Term = *I; - assert(Term->isConditional()); - - BasicBlock *Parent = Term->getParent(); - BasicBlock *SuccTrue = Term->getSuccessor(0); - BasicBlock *SuccFalse = Term->getSuccessor(1); - - PhiInserter.Initialize(Boolean, ""); - PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default); - PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default); - - BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue]; - - NearestCommonDominator Dominator(DT); - Dominator.addBlock(Parent, false); - - Value *ParentValue = 0; - for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end(); - PI != PE; ++PI) { - - if (PI->first == Parent) { - ParentValue = PI->second; - break; - } - PhiInserter.AddAvailableValue(PI->first, PI->second); - Dominator.addBlock(PI->first); - } - - if (ParentValue) { - Term->setCondition(ParentValue); - } else { - if (!Dominator.wasResultExplicitMentioned()) - PhiInserter.AddAvailableValue(Dominator.getResult(), Default); - - Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent)); - } - } -} - -/// \brief Remove all PHI values coming from "From" into "To" and remember -/// them in DeletedPhis -void AMDGPUStructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) { - PhiMap &Map = DeletedPhis[To]; - for (BasicBlock::iterator I = To->begin(), E = To->end(); - I != E && isa(*I);) { - - PHINode &Phi = cast(*I++); - while (Phi.getBasicBlockIndex(From) != -1) { - Value *Deleted = Phi.removeIncomingValue(From, false); - Map[&Phi].push_back(std::make_pair(From, Deleted)); - } - } -} - -/// \brief Add a dummy PHI value as soon as we knew the new predecessor -void AMDGPUStructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) { - for (BasicBlock::iterator I = To->begin(), E = To->end(); - I != E && isa(*I);) { - - PHINode &Phi = cast(*I++); - Value *Undef = UndefValue::get(Phi.getType()); - Phi.addIncoming(Undef, From); - } - AddedPhis[To].push_back(From); -} - -/// \brief Add the real PHI value as soon as everything is set up -void AMDGPUStructurizeCFG::setPhiValues() { - - SSAUpdater Updater; - for (BB2BBVecMap::iterator AI = AddedPhis.begin(), AE = AddedPhis.end(); - AI != AE; ++AI) { - - BasicBlock *To = AI->first; - BBVector &From = AI->second; - - if (!DeletedPhis.count(To)) - continue; - - PhiMap &Map = DeletedPhis[To]; - for (PhiMap::iterator PI = Map.begin(), PE = Map.end(); - PI != PE; ++PI) { - - PHINode *Phi = PI->first; - Value *Undef = UndefValue::get(Phi->getType()); - Updater.Initialize(Phi->getType(), ""); - Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); - Updater.AddAvailableValue(To, Undef); - - NearestCommonDominator Dominator(DT); - Dominator.addBlock(To, false); - for (BBValueVector::iterator VI = PI->second.begin(), - VE = PI->second.end(); VI != VE; ++VI) { - - Updater.AddAvailableValue(VI->first, VI->second); - Dominator.addBlock(VI->first); - } - - if (!Dominator.wasResultExplicitMentioned()) - Updater.AddAvailableValue(Dominator.getResult(), Undef); - - for (BBVector::iterator FI = From.begin(), FE = From.end(); - FI != FE; ++FI) { - - int Idx = Phi->getBasicBlockIndex(*FI); - assert(Idx != -1); - Phi->setIncomingValue(Idx, Updater.GetValueAtEndOfBlock(*FI)); - } - } - - DeletedPhis.erase(To); - } - assert(DeletedPhis.empty()); -} - -/// \brief Remove phi values from all successors and then remove the terminator. -void AMDGPUStructurizeCFG::killTerminator(BasicBlock *BB) { - TerminatorInst *Term = BB->getTerminator(); - if (!Term) - return; - - for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); - SI != SE; ++SI) { - - delPhiValues(BB, *SI); - } - - Term->eraseFromParent(); -} - -/// \brief Let node exit(s) point to NewExit -void AMDGPUStructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit, - bool IncludeDominator) { - - if (Node->isSubRegion()) { - Region *SubRegion = Node->getNodeAs(); - BasicBlock *OldExit = SubRegion->getExit(); - BasicBlock *Dominator = 0; - - // Find all the edges from the sub region to the exit - for (pred_iterator I = pred_begin(OldExit), E = pred_end(OldExit); - I != E;) { - - BasicBlock *BB = *I++; - if (!SubRegion->contains(BB)) - continue; - - // Modify the edges to point to the new exit - delPhiValues(BB, OldExit); - BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit); - addPhiValues(BB, NewExit); - - // Find the new dominator (if requested) - if (IncludeDominator) { - if (!Dominator) - Dominator = BB; - else - Dominator = DT->findNearestCommonDominator(Dominator, BB); - } - } - - // Change the dominator (if requested) - if (Dominator) - DT->changeImmediateDominator(NewExit, Dominator); - - // Update the region info - SubRegion->replaceExit(NewExit); - - } else { - BasicBlock *BB = Node->getNodeAs(); - killTerminator(BB); - BranchInst::Create(NewExit, BB); - addPhiValues(BB, NewExit); - if (IncludeDominator) - DT->changeImmediateDominator(NewExit, BB); - } -} - -/// \brief Create a new flow node and update dominator tree and region info -BasicBlock *AMDGPUStructurizeCFG::getNextFlow(BasicBlock *Dominator) { - LLVMContext &Context = Func->getContext(); - BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() : - Order.back()->getEntry(); - BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName, - Func, Insert); - DT->addNewBlock(Flow, Dominator); - ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion); - return Flow; -} - -/// \brief Create a new or reuse the previous node as flow node -BasicBlock *AMDGPUStructurizeCFG::needPrefix(bool NeedEmpty) { - - BasicBlock *Entry = PrevNode->getEntry(); - - if (!PrevNode->isSubRegion()) { - killTerminator(Entry); - if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end()) - return Entry; - - } - - // create a new flow node - BasicBlock *Flow = getNextFlow(Entry); - - // and wire it up - changeExit(PrevNode, Flow, true); - PrevNode = ParentRegion->getBBNode(Flow); - return Flow; -} - -/// \brief Returns the region exit if possible, otherwise just a new flow node -BasicBlock *AMDGPUStructurizeCFG::needPostfix(BasicBlock *Flow, - bool ExitUseAllowed) { - - if (Order.empty() && ExitUseAllowed) { - BasicBlock *Exit = ParentRegion->getExit(); - DT->changeImmediateDominator(Exit, Flow); - addPhiValues(Flow, Exit); - return Exit; - } - return getNextFlow(Flow); -} - -/// \brief Set the previous node -void AMDGPUStructurizeCFG::setPrevNode(BasicBlock *BB) { - PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) : 0; -} - -/// \brief Does BB dominate all the predicates of Node ? -bool AMDGPUStructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) { - BBPredicates &Preds = Predicates[Node->getEntry()]; - for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end(); - PI != PE; ++PI) { - - if (!DT->dominates(BB, PI->first)) - return false; - } - return true; -} - -/// \brief Can we predict that this node will always be called? -bool AMDGPUStructurizeCFG::isPredictableTrue(RegionNode *Node) { - - BBPredicates &Preds = Predicates[Node->getEntry()]; - bool Dominated = false; - - // Regionentry is always true - if (PrevNode == 0) - return true; - - for (BBPredicates::iterator I = Preds.begin(), E = Preds.end(); - I != E; ++I) { - - if (I->second != BoolTrue) - return false; - - if (!Dominated && DT->dominates(I->first, PrevNode->getEntry())) - Dominated = true; - } - - // TODO: The dominator check is too strict - return Dominated; -} - -/// Take one node from the order vector and wire it up -void AMDGPUStructurizeCFG::wireFlow(bool ExitUseAllowed, - BasicBlock *LoopEnd) { - - RegionNode *Node = Order.pop_back_val(); - Visited.insert(Node->getEntry()); - - if (isPredictableTrue(Node)) { - // Just a linear flow - if (PrevNode) { - changeExit(PrevNode, Node->getEntry(), true); - } - PrevNode = Node; - - } else { - // Insert extra prefix node (or reuse last one) - BasicBlock *Flow = needPrefix(false); - - // Insert extra postfix node (or use exit instead) - BasicBlock *Entry = Node->getEntry(); - BasicBlock *Next = needPostfix(Flow, ExitUseAllowed); - - // let it point to entry and next block - Conditions.push_back(BranchInst::Create(Entry, Next, BoolUndef, Flow)); - addPhiValues(Flow, Entry); - DT->changeImmediateDominator(Entry, Flow); - - PrevNode = Node; - while (!Order.empty() && !Visited.count(LoopEnd) && - dominatesPredicates(Entry, Order.back())) { - handleLoops(false, LoopEnd); - } - - changeExit(PrevNode, Next, false); - setPrevNode(Next); - } -} - -void AMDGPUStructurizeCFG::handleLoops(bool ExitUseAllowed, - BasicBlock *LoopEnd) { - RegionNode *Node = Order.back(); - BasicBlock *LoopStart = Node->getEntry(); - - if (!Loops.count(LoopStart)) { - wireFlow(ExitUseAllowed, LoopEnd); - return; - } - - if (!isPredictableTrue(Node)) - LoopStart = needPrefix(true); - - LoopEnd = Loops[Node->getEntry()]; - wireFlow(false, LoopEnd); - while (!Visited.count(LoopEnd)) { - handleLoops(false, LoopEnd); - } - - // Create an extra loop end node - LoopEnd = needPrefix(false); - BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed); - LoopConds.push_back(BranchInst::Create(Next, LoopStart, - BoolUndef, LoopEnd)); - addPhiValues(LoopEnd, LoopStart); - setPrevNode(Next); -} - -/// After this function control flow looks like it should be, but -/// branches and PHI nodes only have undefined conditions. -void AMDGPUStructurizeCFG::createFlow() { - - BasicBlock *Exit = ParentRegion->getExit(); - bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit); - - DeletedPhis.clear(); - AddedPhis.clear(); - Conditions.clear(); - LoopConds.clear(); - - PrevNode = 0; - Visited.clear(); - - while (!Order.empty()) { - handleLoops(EntryDominatesExit, 0); - } - - if (PrevNode) - changeExit(PrevNode, Exit, EntryDominatesExit); - else - assert(EntryDominatesExit); -} - -/// Handle a rare case where the disintegrated nodes instructions -/// no longer dominate all their uses. Not sure if this is really nessasary -void AMDGPUStructurizeCFG::rebuildSSA() { - SSAUpdater Updater; - for (Region::block_iterator I = ParentRegion->block_begin(), - E = ParentRegion->block_end(); - I != E; ++I) { - - BasicBlock *BB = *I; - for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); - II != IE; ++II) { - - bool Initialized = false; - for (Use *I = &II->use_begin().getUse(), *Next; I; I = Next) { - - Next = I->getNext(); - - Instruction *User = cast(I->getUser()); - if (User->getParent() == BB) { - continue; - - } else if (PHINode *UserPN = dyn_cast(User)) { - if (UserPN->getIncomingBlock(*I) == BB) - continue; - } - - if (DT->dominates(II, User)) - continue; - - if (!Initialized) { - Value *Undef = UndefValue::get(II->getType()); - Updater.Initialize(II->getType(), ""); - Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); - Updater.AddAvailableValue(BB, II); - Initialized = true; - } - Updater.RewriteUseAfterInsertions(*I); - } - } - } -} - -/// \brief Run the transformation for each region found -bool AMDGPUStructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) { - if (R->isTopLevelRegion()) - return false; - - Func = R->getEntry()->getParent(); - ParentRegion = R; - - DT = &getAnalysis(); - - orderNodes(); - collectInfos(); - createFlow(); - insertConditions(false); - insertConditions(true); - setPhiValues(); - rebuildSSA(); - - // Cleanup - Order.clear(); - Visited.clear(); - DeletedPhis.clear(); - AddedPhis.clear(); - Predicates.clear(); - Conditions.clear(); - Loops.clear(); - LoopPreds.clear(); - LoopConds.clear(); - - return true; -} - -/// \brief Create the pass -Pass *llvm::createAMDGPUStructurizeCFGPass() { - return new AMDGPUStructurizeCFG(); -} diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index 2fba434..90f72de 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -109,7 +109,7 @@ bool AMDGPUPassConfig::addPreISel() { const AMDGPUSubtarget &ST = TM->getSubtarget(); if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { - addPass(createAMDGPUStructurizeCFGPass()); + addPass(createStructurizeCFGPass()); addPass(createSIAnnotateControlFlowPass()); } else { addPass(createR600TextureIntrinsicsReplacer()); diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt index 1b79bf5..824475e 100644 --- a/lib/Target/R600/CMakeLists.txt +++ b/lib/Target/R600/CMakeLists.txt @@ -22,7 +22,6 @@ add_llvm_target(R600CodeGen AMDGPUMCInstLower.cpp AMDGPUMachineFunction.cpp AMDGPUSubtarget.cpp - AMDGPUStructurizeCFG.cpp AMDGPUTargetMachine.cpp AMDGPUISelLowering.cpp AMDGPUConvertToISA.cpp -- cgit v1.1 From ea44281d5da5096de50ce1cb358ff0c6f20e1a2a Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 19 Jun 2013 20:51:24 +0000 Subject: Access the TargetLoweringInfo from the TargetMachine object instead of caching it. The TLI may change between functions. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184349 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMTargetMachine.cpp | 2 +- lib/Target/PowerPC/PPCTargetMachine.cpp | 2 +- lib/Target/X86/X86TargetMachine.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 17c52c9..d9aef78 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -60,7 +60,7 @@ void ARMBaseTargetMachine::addAnalysisPasses(PassManagerBase &PM) { // Add first the target-independent BasicTTI pass, then our ARM pass. This // allows the ARM pass to delegate to the target independent layer when // appropriate. - PM.add(createBasicTargetTransformInfoPass(getTargetLowering())); + PM.add(createBasicTargetTransformInfoPass(this)); PM.add(createARMTargetTransformInfoPass(this)); } diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index da03b4c..5ebf6ab 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -162,7 +162,7 @@ void PPCTargetMachine::addAnalysisPasses(PassManagerBase &PM) { // Add first the target-independent BasicTTI pass, then our PPC pass. This // allows the PPC pass to delegate to the target independent layer when // appropriate. - PM.add(createBasicTargetTransformInfoPass(getTargetLowering())); + PM.add(createBasicTargetTransformInfoPass(this)); PM.add(createPPCTargetTransformInfoPass(this)); } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 0422a61..49ebd1a 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -132,7 +132,7 @@ void X86TargetMachine::addAnalysisPasses(PassManagerBase &PM) { // Add first the target-independent BasicTTI pass, then our X86 pass. This // allows the X86 pass to delegate to the target independent layer when // appropriate. - PM.add(createBasicTargetTransformInfoPass(getTargetLowering())); + PM.add(createBasicTargetTransformInfoPass(this)); PM.add(createX86TargetTransformInfoPass(this)); } -- cgit v1.1 From f9fd58a44bbc7d9371ce39eb20eec16b0f1f7395 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 19 Jun 2013 21:07:11 +0000 Subject: Access the TargetLoweringInfo from the TargetMachine object instead of caching it. The TLI may change between functions. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184352 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMTargetMachine.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index d9aef78..354a779 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -150,7 +150,7 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) { bool ARMPassConfig::addPreISel() { if (TM->getOptLevel() != CodeGenOpt::None && EnableGlobalMerge) - addPass(createGlobalMergePass(TM->getTargetLowering())); + addPass(createGlobalMergePass(TM)); return false; } -- cgit v1.1 From ba54bca472a15d0657e1b88776f7069042b60b4e Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 19 Jun 2013 21:36:55 +0000 Subject: Access the TargetLoweringInfo from the TargetMachine object instead of caching it. The TLI may change between functions. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184360 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelDAGToDAG.cpp | 49 +++++++++++++++++++---------- lib/Target/ARM/ARMInstrInfo.td | 4 +-- lib/Target/Hexagon/Hexagon.h | 2 +- lib/Target/Hexagon/HexagonISelDAGToDAG.cpp | 20 ++++++------ lib/Target/Hexagon/HexagonTargetMachine.cpp | 2 +- lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp | 3 +- lib/Target/MSP430/MSP430ISelDAGToDAG.cpp | 3 +- lib/Target/Mips/Mips16ISelDAGToDAG.cpp | 8 +++-- lib/Target/Mips/MipsISelDAGToDAG.cpp | 3 +- lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 2 +- lib/Target/R600/AMDILISelDAGToDAG.cpp | 6 ++-- lib/Target/R600/SIInstrInfo.td | 3 +- lib/Target/Sparc/SparcISelDAGToDAG.cpp | 12 ++++--- lib/Target/X86/X86ISelDAGToDAG.cpp | 16 +++++----- lib/Target/XCore/XCoreISelDAGToDAG.cpp | 4 +-- 15 files changed, 78 insertions(+), 59 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 962368d..3e23253 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -61,7 +61,6 @@ enum AddrMode2Type { class ARMDAGToDAGISel : public SelectionDAGISel { ARMBaseTargetMachine &TM; - const ARMBaseInstrInfo *TII; /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can /// make the right decision when generating code for different targets. @@ -71,7 +70,6 @@ public: explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel) : SelectionDAGISel(tm, OptLevel), TM(tm), - TII(static_cast(TM.getInstrInfo())), Subtarget(&TM.getSubtarget()) { } @@ -434,6 +432,9 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { if (Use->getOpcode() == ISD::CopyToReg) return true; if (Use->isMachineOpcode()) { + const ARMBaseInstrInfo *TII = + static_cast(TM.getInstrInfo()); + const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); if (MCID.mayStore()) return true; @@ -533,7 +534,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, if (N.getOpcode() == ISD::FrameIndex) { // Match frame index. int FI = cast(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; } @@ -557,7 +559,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -703,7 +706,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, Base = N; if (N.getOpcode() == ISD::FrameIndex) { int FI = cast(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } else if (N.getOpcode() == ARMISD::Wrapper && !(Subtarget->useMovt() && N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { @@ -724,7 +728,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } Offset = CurDAG->getRegister(0, MVT::i32); @@ -901,7 +906,8 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, Base = N; if (N.getOpcode() == ISD::FrameIndex) { int FI = cast(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } Offset = CurDAG->getRegister(0, MVT::i32); Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32); @@ -915,7 +921,8 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } Offset = CurDAG->getRegister(0, MVT::i32); @@ -960,7 +967,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, Base = N; if (N.getOpcode() == ISD::FrameIndex) { int FI = cast(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } else if (N.getOpcode() == ARMISD::Wrapper && !(Subtarget->useMovt() && N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { @@ -978,7 +986,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } ARM_AM::AddrOpc AddSub = ARM_AM::add; @@ -1202,7 +1211,8 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm) { if (N.getOpcode() == ISD::FrameIndex) { int FI = cast(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; } @@ -1219,7 +1229,8 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -1267,7 +1278,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, if (N.getOpcode() == ISD::FrameIndex) { // Match frame index. int FI = cast(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; } @@ -1297,7 +1309,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -1326,7 +1339,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -2587,7 +2601,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue CPIdx = CurDAG->getTargetConstantPool(ConstantInt::get( Type::getInt32Ty(*CurDAG->getContext()), Val), - TLI->getPointerTy()); + getTargetLowering()->getPointerTy()); SDNode *ResNode; if (Subtarget->isThumb1Only()) { @@ -2617,7 +2631,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ISD::FrameIndex: { // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. int FI = cast(N)->getIndex(); - SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); if (Subtarget->isThumb1Only()) { SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) }; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index da815d5..cc17b00 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -275,8 +275,8 @@ def HasSlowVDUP32 : Predicate<"Subtarget->isSwift()">; def UseVMOVSR : Predicate<"Subtarget->isCortexA9() || !Subtarget->useNEONForSinglePrecisionFP()">; def DontUseVMOVSR : Predicate<"!Subtarget->isCortexA9() && Subtarget->useNEONForSinglePrecisionFP()">; -def IsLE : Predicate<"TLI->isLittleEndian()">; -def IsBE : Predicate<"TLI->isBigEndian()">; +def IsLE : Predicate<"getTargetLowering()->isLittleEndian()">; +def IsBE : Predicate<"getTargetLowering()->isBigEndian()">; //===----------------------------------------------------------------------===// // ARM Flag Definitions. diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h index b88637a..5467ee3 100644 --- a/lib/Target/Hexagon/Hexagon.h +++ b/lib/Target/Hexagon/Hexagon.h @@ -29,7 +29,7 @@ namespace llvm { class HexagonTargetMachine; class raw_ostream; - FunctionPass *createHexagonISelDag(const HexagonTargetMachine &TM, + FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM, CodeGenOpt::Level OptLevel); FunctionPass *createHexagonDelaySlotFillerPass(const TargetMachine &TM); FunctionPass *createHexagonFPMoverPass(const TargetMachine &TM); diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 22740b7..9e78e51 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -52,7 +52,7 @@ class HexagonDAGToDAGISel : public SelectionDAGISel { const HexagonTargetMachine& TM; DenseMap GlobalAddressUseCountMap; public: - explicit HexagonDAGToDAGISel(const HexagonTargetMachine &targetmachine, + explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine, CodeGenOpt::Level OptLevel) : SelectionDAGISel(targetmachine, OptLevel), Subtarget(targetmachine.getSubtarget()), @@ -178,7 +178,7 @@ inline SDValue XformUToUM1Imm(unsigned Imm) { /// createHexagonISelDag - This pass converts a legalized DAG into a /// Hexagon-specific DAG, ready for instruction scheduling. /// -FunctionPass *llvm::createHexagonISelDag(const HexagonTargetMachine &TM, +FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM, CodeGenOpt::Level OptLevel) { return new HexagonDAGToDAGISel(TM, OptLevel); } @@ -394,7 +394,7 @@ SDNode *HexagonDAGToDAGISel::SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl) { EVT LoadedVT = LD->getMemoryVT(); int64_t Offset = cast(Base)->getOffset(); if (Offset != 0 && OffsetFitsS11(LoadedVT, Offset)) { - MVT PointerTy = TLI->getPointerTy(); + MVT PointerTy = getTargetLowering()->getPointerTy(); const GlobalValue* GV = cast(Base)->getGlobal(); SDValue TargAddr = @@ -443,10 +443,10 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD, SDValue CPTmpN1_0; SDValue CPTmpN1_1; - const HexagonInstrInfo *TII = - static_cast(TM.getInstrInfo()); if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) && N1.getNode()->getValueType(0) == MVT::i32) { + const HexagonInstrInfo *TII = + static_cast(TM.getInstrInfo()); if (TII->isValidAutoIncImm(LoadedVT, Val)) { SDValue TargetConst = CurDAG->getTargetConstant(Val, MVT::i32); SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32, @@ -510,10 +510,10 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD, SDValue CPTmpN1_0; SDValue CPTmpN1_1; - const HexagonInstrInfo *TII = - static_cast(TM.getInstrInfo()); if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) && N1.getNode()->getValueType(0) == MVT::i32) { + const HexagonInstrInfo *TII = + static_cast(TM.getInstrInfo()); if (TII->isValidAutoIncImm(LoadedVT, Val)) { SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); @@ -777,7 +777,7 @@ SDNode *HexagonDAGToDAGISel::SelectBaseOffsetStore(StoreSDNode *ST, EVT StoredVT = ST->getMemoryVT(); int64_t Offset = cast(Base)->getOffset(); if (Offset != 0 && OffsetFitsS11(StoredVT, Offset)) { - MVT PointerTy = TLI->getPointerTy(); + MVT PointerTy = getTargetLowering()->getPointerTy(); const GlobalValue* GV = cast(Base)->getGlobal(); SDValue TargAddr = @@ -1215,10 +1215,10 @@ SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) { // We are concerned with only those intrinsics that have predicate registers // as at least one of the operands. - const HexagonInstrInfo *TII = - static_cast(TM.getInstrInfo()); if (IntrinsicWithPred) { SmallVector Ops; + const HexagonInstrInfo *TII = + static_cast(TM.getInstrInfo()); const MCInstrDesc &MCID = TII->get(IntrinsicWithPred); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index b113b35..cd96b58 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -126,7 +126,7 @@ TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) { } bool HexagonPassConfig::addInstSelector() { - const HexagonTargetMachine &TM = getHexagonTargetMachine(); + HexagonTargetMachine &TM = getHexagonTargetMachine(); bool NoOpt = (getOptLevel() == CodeGenOpt::None); if (!NoOpt) diff --git a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp index 9d6dfe6..626eeb5 100644 --- a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp +++ b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp @@ -181,7 +181,8 @@ SelectAddrRegImm(SDValue N, SDValue &Base, SDValue &Disp) { /// GOT address into a register. SDNode *MBlazeDAGToDAGISel::getGlobalBaseReg() { unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); - return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy()).getNode(); + return CurDAG->getRegister(GlobalBaseReg, + getTargetLowering()->getPointerTy()).getNode(); } /// Select instructions not customized! Used for diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index 76bc1e7..543f54c 100644 --- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -259,7 +259,8 @@ bool MSP430DAGToDAGISel::SelectAddr(SDValue N, } Base = (AM.BaseType == MSP430ISelAddressMode::FrameIndexBase) ? - CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI->getPointerTy()) : + CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, + getTargetLowering()->getPointerTy()) : AM.Base.Reg; if (AM.GV) diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp index f70abda..0caa277 100644 --- a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp +++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp @@ -118,11 +118,13 @@ void Mips16DAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) { SDValue Mips16DAGToDAGISel::getMips16SPAliasReg() { unsigned Mips16SPAliasReg = MF->getInfo()->getMips16SPAliasReg(); - return CurDAG->getRegister(Mips16SPAliasReg, TLI->getPointerTy()); + return CurDAG->getRegister(Mips16SPAliasReg, + getTargetLowering()->getPointerTy()); } void Mips16DAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) { - SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, TLI->getPointerTy()); + SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, + getTargetLowering()->getPointerTy()); if (Parent) { switch (Parent->getOpcode()) { case ISD::LOAD: { @@ -149,7 +151,7 @@ void Mips16DAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) { } } } - AliasReg = CurDAG->getRegister(Mips::SP, TLI->getPointerTy()); + AliasReg = CurDAG->getRegister(Mips::SP, getTargetLowering()->getPointerTy()); return; } diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index a1de174..0002a5f 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -57,7 +57,8 @@ bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { /// GOT address into a register. SDNode *MipsDAGToDAGISel::getGlobalBaseReg() { unsigned GlobalBaseReg = MF->getInfo()->getGlobalBaseReg(); - return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy()).getNode(); + return CurDAG->getRegister(GlobalBaseReg, + getTargetLowering()->getPointerTy()).getNode(); } /// ComplexPattern used on MipsInstrInfo diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 7684bec..2080275 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -402,7 +402,7 @@ std::pair MipsSEDAGToDAGISel::selectNode(SDNode *Node) { } case MipsISD::ThreadPointer: { - EVT PtrVT = TLI->getPointerTy(); + EVT PtrVT = getTargetLowering()->getPointerTy(); unsigned RdhwrOpc, SrcReg, DestReg; if (PtrVT == MVT::i32) { diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp index 0f64496..c03ced3 100644 --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp @@ -92,8 +92,7 @@ FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM return new AMDGPUDAGToDAGISel(TM); } -AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM - ) +AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM) : SelectionDAGISel(TM), Subtarget(TM.getSubtarget()) { } @@ -712,7 +711,8 @@ void AMDGPUDAGToDAGISel::PostprocessISelDAG() { } // Go over all selected nodes and try to fold them a bit more - const AMDGPUTargetLowering& Lowering = (*(const AMDGPUTargetLowering*)TLI); + const AMDGPUTargetLowering& Lowering = + (*(const AMDGPUTargetLowering*)getTargetLowering()); for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E; ++I) { diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 42fa95f..36812ca 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -44,7 +44,8 @@ def IMM12bit : PatLeaf <(imm), >; class InlineImm : PatLeaf <(vt imm), [{ - return (*(const SITargetLowering *)TLI).analyzeImmediate(N) == 0; + return + (*(const SITargetLowering *)getTargetLowering()).analyzeImmediate(N) == 0; }]>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp index e85cf74..db62151 100644 --- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp +++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -33,7 +33,7 @@ class SparcDAGToDAGISel : public SelectionDAGISel { /// Subtarget - Keep a pointer to the Sparc Subtarget around so that we can /// make the right decision when generating code for different targets. const SparcSubtarget &Subtarget; - SparcTargetMachine& TM; + SparcTargetMachine &TM; public: explicit SparcDAGToDAGISel(SparcTargetMachine &tm) : SelectionDAGISel(tm), @@ -67,13 +67,15 @@ private: SDNode* SparcDAGToDAGISel::getGlobalBaseReg() { unsigned GlobalBaseReg = TM.getInstrInfo()->getGlobalBaseReg(MF); - return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy()).getNode(); + return CurDAG->getRegister(GlobalBaseReg, + getTargetLowering()->getPointerTy()).getNode(); } bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr, SDValue &Base, SDValue &Offset) { if (FrameIndexSDNode *FIN = dyn_cast(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), + getTargetLowering()->getPointerTy()); Offset = CurDAG->getTargetConstant(0, MVT::i32); return true; } @@ -88,7 +90,7 @@ bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr, dyn_cast(Addr.getOperand(0))) { // Constant offset from frame ref. Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), - TLI->getPointerTy()); + getTargetLowering()->getPointerTy()); } else { Base = Addr.getOperand(0); } @@ -131,7 +133,7 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) { } R1 = Addr; - R2 = CurDAG->getRegister(SP::G0, TLI->getPointerTy()); + R2 = CurDAG->getRegister(SP::G0, getTargetLowering()->getPointerTy()); return true; } diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 4ffffa1..39b205e 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -141,10 +141,6 @@ namespace { /// SelectionDAG operations. /// class X86DAGToDAGISel : public SelectionDAGISel { - /// X86Lowering - This object fully describes how to lower LLVM code to an - /// X86-specific SelectionDAG. - const X86TargetLowering &X86Lowering; - /// Subtarget - Keep a pointer to the X86Subtarget around so that we can /// make the right decision when generating code for different targets. const X86Subtarget *Subtarget; @@ -156,7 +152,6 @@ namespace { public: explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel) : SelectionDAGISel(tm, OptLevel), - X86Lowering(*tm.getTargetLowering()), Subtarget(&tm.getSubtarget()), OptForSize(false) {} @@ -233,7 +228,8 @@ namespace { SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ? - CurDAG->getTargetFrameIndex(AM.Base_FrameIndex, TLI->getPointerTy()) : + CurDAG->getTargetFrameIndex(AM.Base_FrameIndex, + getTargetLowering()->getPointerTy()) : AM.Base_Reg; Scale = getI8Imm(AM.Scale); Index = AM.IndexReg; @@ -504,8 +500,9 @@ void X86DAGToDAGISel::PreprocessISelDAG() { // If the source and destination are SSE registers, then this is a legal // conversion that should not be lowered. - bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT); - bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT); + X86TargetLowering *X86Lowering = (X86TargetLowering*)getTargetLowering(); + bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT); + bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT); if (SrcIsSSE && DstIsSSE) continue; @@ -1556,7 +1553,8 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, /// SDNode *X86DAGToDAGISel::getGlobalBaseReg() { unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); - return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy()).getNode(); + return CurDAG->getRegister(GlobalBaseReg, + getTargetLowering()->getPointerTy()).getNode(); } SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp index ee183aa..768cba6 100644 --- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -37,13 +37,11 @@ using namespace llvm; /// namespace { class XCoreDAGToDAGISel : public SelectionDAGISel { - const XCoreTargetLowering &Lowering; const XCoreSubtarget &Subtarget; public: XCoreDAGToDAGISel(XCoreTargetMachine &TM, CodeGenOpt::Level OptLevel) : SelectionDAGISel(TM, OptLevel), - Lowering(*TM.getTargetLowering()), Subtarget(*TM.getSubtargetImpl()) { } SDNode *Select(SDNode *N); @@ -125,7 +123,7 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) { SDValue CPIdx = CurDAG->getTargetConstantPool(ConstantInt::get( Type::getInt32Ty(*CurDAG->getContext()), Val), - TLI->getPointerTy()); + getTargetLowering()->getPointerTy()); SDNode *node = CurDAG->getMachineNode(XCore::LDWCP_lru6, dl, MVT::i32, MVT::Other, CPIdx, CurDAG->getEntryNode()); -- cgit v1.1 From 0159ae4295720c5ce8fc770ddb5fed67e90b8d3a Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Wed, 19 Jun 2013 21:55:13 +0000 Subject: DebugInfo: PR14763/r183329 correct the location of indirect parameters We had been papering over a problem with location info for non-trivial types passed by value by emitting their type as references (this caused the debugger to interpret the location information correctly, but broke the type of the function). r183329 corrected the type information but lead to the debugger interpreting the pointer parameter as the value - the debug info describing the location needed an extra dereference. Use a new flag in DIVariable to add the extra indirection (either by promoting an existing DW_OP_reg (parameter passed in a register) to DW_OP_breg + 0 or by adding DW_OP_deref to an existing DW_OP_breg + n (parameter passed on the stack). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184368 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMAsmPrinter.cpp | 7 ++++--- lib/Target/ARM/ARMAsmPrinter.h | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index d917009..dd7e20f 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -214,13 +214,14 @@ namespace { } // end of anonymous namespace /// EmitDwarfRegOp - Emit dwarf register operation. -void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { +void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc, + bool Indirect) const { const TargetRegisterInfo *RI = TM.getRegisterInfo(); if (RI->getDwarfRegNum(MLoc.getReg(), false) != -1) { - AsmPrinter::EmitDwarfRegOp(MLoc); + AsmPrinter::EmitDwarfRegOp(MLoc, Indirect); return; } - assert(MLoc.isReg() && + assert(MLoc.isReg() && !Indirect && "This doesn't support offset/indirection - implement it if needed"); unsigned Reg = MLoc.getReg(); if (Reg >= ARM::S0 && Reg <= ARM::S31) { diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index 7ce2b83..de72e06 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -98,7 +98,8 @@ private: public: /// EmitDwarfRegOp - Emit dwarf register operation. - virtual void EmitDwarfRegOp(const MachineLocation &MLoc) const LLVM_OVERRIDE; + virtual void EmitDwarfRegOp(const MachineLocation &MLoc, bool Indirect) const + LLVM_OVERRIDE; virtual unsigned getISAEncoding() LLVM_OVERRIDE { // ARM/Darwin adds ISA to the DWARF info for each function. -- cgit v1.1 From 0c92f2a1c73aeccc9e0f4deadcad8dc563dec225 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 19 Jun 2013 21:59:00 +0000 Subject: Don't pass in the TargetInstrInfo into the register info object. It doesn't use it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184369 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXInstrInfo.cpp | 2 +- lib/Target/NVPTX/NVPTXRegisterInfo.cpp | 3 +-- lib/Target/NVPTX/NVPTXRegisterInfo.h | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp index 52be287..80af163 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -27,7 +27,7 @@ using namespace llvm; // FIXME: Add the subtarget support on this constructor. NVPTXInstrInfo::NVPTXInstrInfo(NVPTXTargetMachine &tm) - : NVPTXGenInstrInfo(), TM(tm), RegInfo(*this, *TM.getSubtargetImpl()) {} + : NVPTXGenInstrInfo(), TM(tm), RegInfo(*TM.getSubtargetImpl()) {} void NVPTXInstrInfo::copyPhysReg( MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp index bb039f8..b749b05 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp @@ -77,8 +77,7 @@ std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) { } } -NVPTXRegisterInfo::NVPTXRegisterInfo(const TargetInstrInfo &tii, - const NVPTXSubtarget &st) +NVPTXRegisterInfo::NVPTXRegisterInfo(const NVPTXSubtarget &st) : NVPTXGenRegisterInfo(0), Is64Bit(st.is64Bit()) {} #define GET_REGINFO_TARGET_DESC diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h index d406820..0a20f29 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.h +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h @@ -35,7 +35,7 @@ private: ManagedStringPool ManagedStrPool; public: - NVPTXRegisterInfo(const TargetInstrInfo &tii, const NVPTXSubtarget &st); + NVPTXRegisterInfo(const NVPTXSubtarget &st); //------------------------------------------------------ // Pure virtual functions from TargetRegisterInfo -- cgit v1.1 From 7231625f75b4da1c87deb833cd9cad6c5ee95d95 Mon Sep 17 00:00:00 2001 From: Vladimir Medic Date: Thu, 20 Jun 2013 11:21:49 +0000 Subject: Optimize register parsing for MipsAsmParser. Allow symbolic aliases for FPU registers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184411 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 106 +++++++++++++++------------- 1 file changed, 58 insertions(+), 48 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index e810480..30149d3 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -84,6 +84,9 @@ class MipsAsmParser : public MCTargetAsmParser { bool ParseDirective(AsmToken DirectiveID); MipsAsmParser::OperandMatchResultTy + parseRegs(SmallVectorImpl &Operands, + int RegKind); + MipsAsmParser::OperandMatchResultTy parseMemOperand(SmallVectorImpl &Operands); MipsAsmParser::OperandMatchResultTy @@ -102,7 +105,7 @@ class MipsAsmParser : public MCTargetAsmParser { parseCCRRegs(SmallVectorImpl &Operands); bool searchSymbolAlias(SmallVectorImpl &Operands, - unsigned RegisterClass); + unsigned RegKind); bool ParseOperand(SmallVectorImpl &, StringRef Mnemonic); @@ -162,6 +165,8 @@ class MipsAsmParser : public MCTargetAsmParser { int matchRegisterByNumber(unsigned RegNum, unsigned RegClass); + int matchFPURegisterName(StringRef Name, FpFormatTy Format); + void setFpFormat(FpFormatTy Format) { FpFormat = Format; } @@ -787,16 +792,7 @@ int MipsAsmParser::matchCPURegisterName(StringRef Name) { return CC; } -int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) { - - if (Name.equals("fcc0")) - return Mips::FCC0; - - int CC; - CC = matchCPURegisterName(Name); - if (CC != -1) - return matchRegisterByNumber(CC, is64BitReg ? Mips::CPU64RegsRegClassID - : Mips::CPURegsRegClassID); +int MipsAsmParser::matchFPURegisterName(StringRef Name, FpFormatTy Format) { if (Name[0] == 'f') { StringRef NumString = Name.substr(1); @@ -806,8 +802,6 @@ int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) { if (IntVal > 31) return -1; - FpFormatTy Format = getFpFormat(); - if (Format == FP_FORMAT_S || Format == FP_FORMAT_W) return getReg(Mips::FGR32RegClassID, IntVal); if (Format == FP_FORMAT_D) { @@ -820,10 +814,22 @@ int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) { return getReg(Mips::AFGR64RegClassID, IntVal / 2); } } - return -1; } +int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) { + + if (Name.equals("fcc0")) + return Mips::FCC0; + + int CC; + CC = matchCPURegisterName(Name); + if (CC != -1) + return matchRegisterByNumber(CC, is64BitReg ? Mips::CPU64RegsRegClassID + : Mips::CPURegsRegClassID); + return matchFPURegisterName(Name, getFpFormat()); +} + void MipsAsmParser::setDefaultFpFormat() { if (isMips64() || isFP64()) @@ -1240,12 +1246,11 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand( } MipsAsmParser::OperandMatchResultTy -MipsAsmParser::parseCPU64Regs(SmallVectorImpl &Operands) { - - if (!isMips64()) - return MatchOperand_NoMatch; +MipsAsmParser::parseRegs(SmallVectorImpl &Operands, + int RegKind) { + MipsOperand::RegisterKind Kind = (MipsOperand::RegisterKind)RegKind; if (getLexer().getKind() == AsmToken::Identifier) { - if (searchSymbolAlias(Operands, MipsOperand::Kind_CPU64Regs)) + if (searchSymbolAlias(Operands, Kind)) return MatchOperand_Success; return MatchOperand_NoMatch; } @@ -1254,17 +1259,29 @@ MipsAsmParser::parseCPU64Regs(SmallVectorImpl &Operands) { return MatchOperand_NoMatch; Parser.Lex(); // Eat $ - if (!tryParseRegisterOperand(Operands, true)) { + if (!tryParseRegisterOperand(Operands, isMips64())) { // Set the proper register kind. MipsOperand* op = static_cast(Operands.back()); - op->setRegKind(MipsOperand::Kind_CPU64Regs); + op->setRegKind(Kind); return MatchOperand_Success; } return MatchOperand_NoMatch; } +MipsAsmParser::OperandMatchResultTy +MipsAsmParser::parseCPU64Regs(SmallVectorImpl &Operands) { + + if (!isMips64()) + return MatchOperand_NoMatch; + return parseRegs(Operands, (int) MipsOperand::Kind_CPU64Regs); +} + +MipsAsmParser::OperandMatchResultTy +MipsAsmParser::parseCPURegs(SmallVectorImpl &Operands) { + return parseRegs(Operands, (int) MipsOperand::Kind_CPURegs); +} bool MipsAsmParser::searchSymbolAlias( - SmallVectorImpl &Operands, unsigned RegisterKind) { + SmallVectorImpl &Operands, unsigned RegKind) { MCSymbol *Sym = getContext().LookupSymbol(Parser.getTok().getIdentifier()); if (Sym) { @@ -1275,6 +1292,7 @@ bool MipsAsmParser::searchSymbolAlias( else return false; if (Expr->getKind() == MCExpr::SymbolRef) { + MipsOperand::RegisterKind Kind = (MipsOperand::RegisterKind) RegKind; const MCSymbolRefExpr *Ref = static_cast(Expr); const StringRef DefSymbol = Ref->getSymbol().getName(); if (DefSymbol.startswith("$")) { @@ -1285,14 +1303,28 @@ bool MipsAsmParser::searchSymbolAlias( isMips64() ? Mips::CPU64RegsRegClassID : Mips::CPURegsRegClassID); - else - // Lookup for the register with corresponding name - RegNum = matchRegisterName(DefSymbol.substr(1), isMips64()); + else { + // Lookup for the register with the corresponding name. + switch (Kind) { + case MipsOperand::Kind_AFGR64Regs: + case MipsOperand::Kind_FGR64Regs: + RegNum = matchFPURegisterName(DefSymbol.substr(1), FP_FORMAT_D); + break; + case MipsOperand::Kind_FGR32Regs: + RegNum = matchFPURegisterName(DefSymbol.substr(1), FP_FORMAT_S); + break; + case MipsOperand::Kind_CPU64Regs: + case MipsOperand::Kind_CPURegs: + default: + RegNum = matchRegisterName(DefSymbol.substr(1), isMips64()); + break; + } + } if (RegNum > -1) { Parser.Lex(); MipsOperand *op = MipsOperand::CreateReg(RegNum, S, Parser.getTok().getLoc()); - op->setRegKind((MipsOperand::RegisterKind) RegisterKind); + op->setRegKind(Kind); Operands.push_back(op); return true; } @@ -1310,28 +1342,6 @@ bool MipsAsmParser::searchSymbolAlias( } MipsAsmParser::OperandMatchResultTy -MipsAsmParser::parseCPURegs(SmallVectorImpl &Operands) { - - if (getLexer().getKind() == AsmToken::Identifier) { - if (searchSymbolAlias(Operands, MipsOperand::Kind_CPURegs)) - return MatchOperand_Success; - return MatchOperand_NoMatch; - } - // If the first token is not '$' we have an error. - if (Parser.getTok().isNot(AsmToken::Dollar)) - return MatchOperand_NoMatch; - - Parser.Lex(); // Eat $ - if (!tryParseRegisterOperand(Operands, false)) { - // Set the proper register kind. - MipsOperand* op = static_cast(Operands.back()); - op->setRegKind(MipsOperand::Kind_CPURegs); - return MatchOperand_Success; - } - return MatchOperand_NoMatch; -} - -MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseHWRegs(SmallVectorImpl &Operands) { if (isMips64()) -- cgit v1.1 From 0db5379fe643cbe738b4831e337251819cc5dc5d Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Thu, 20 Jun 2013 16:15:12 +0000 Subject: [PowerPC] Support compare mnemonics with implied CR0 Just like for branch mnemonics (where support was recently added), the assembler is supposed to support extended mnemonics for the compare instructions where no condition register is specified explicitly (and CR0 is assumed implicitly). This patch adds support for those extended compare mnemonics. Index: llvm-head/test/MC/PowerPC/ppc64-encoding-ext.s =================================================================== --- llvm-head.orig/test/MC/PowerPC/ppc64-encoding-ext.s +++ llvm-head/test/MC/PowerPC/ppc64-encoding-ext.s @@ -449,21 +449,37 @@ # CHECK: cmpdi 2, 3, 128 # encoding: [0x2d,0x23,0x00,0x80] cmpdi 2, 3, 128 +# CHECK: cmpdi 0, 3, 128 # encoding: [0x2c,0x23,0x00,0x80] + cmpdi 3, 128 # CHECK: cmpd 2, 3, 4 # encoding: [0x7d,0x23,0x20,0x00] cmpd 2, 3, 4 +# CHECK: cmpd 0, 3, 4 # encoding: [0x7c,0x23,0x20,0x00] + cmpd 3, 4 # CHECK: cmpldi 2, 3, 128 # encoding: [0x29,0x23,0x00,0x80] cmpldi 2, 3, 128 +# CHECK: cmpldi 0, 3, 128 # encoding: [0x28,0x23,0x00,0x80] + cmpldi 3, 128 # CHECK: cmpld 2, 3, 4 # encoding: [0x7d,0x23,0x20,0x40] cmpld 2, 3, 4 +# CHECK: cmpld 0, 3, 4 # encoding: [0x7c,0x23,0x20,0x40] + cmpld 3, 4 # CHECK: cmpwi 2, 3, 128 # encoding: [0x2d,0x03,0x00,0x80] cmpwi 2, 3, 128 +# CHECK: cmpwi 0, 3, 128 # encoding: [0x2c,0x03,0x00,0x80] + cmpwi 3, 128 # CHECK: cmpw 2, 3, 4 # encoding: [0x7d,0x03,0x20,0x00] cmpw 2, 3, 4 +# CHECK: cmpw 0, 3, 4 # encoding: [0x7c,0x03,0x20,0x00] + cmpw 3, 4 # CHECK: cmplwi 2, 3, 128 # encoding: [0x29,0x03,0x00,0x80] cmplwi 2, 3, 128 +# CHECK: cmplwi 0, 3, 128 # encoding: [0x28,0x03,0x00,0x80] + cmplwi 3, 128 # CHECK: cmplw 2, 3, 4 # encoding: [0x7d,0x03,0x20,0x40] cmplw 2, 3, 4 +# CHECK: cmplw 0, 3, 4 # encoding: [0x7c,0x03,0x20,0x40] + cmplw 3, 4 # FIXME: Trap mnemonics Index: llvm-head/lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- llvm-head.orig/lib/Target/PowerPC/PPCInstrInfo.td +++ llvm-head/lib/Target/PowerPC/PPCInstrInfo.td @@ -2201,3 +2201,12 @@ defm : BranchExtendedMnemonic<"ne", 68>; defm : BranchExtendedMnemonic<"nu", 100>; defm : BranchExtendedMnemonic<"ns", 100>; +def : InstAlias<"cmpwi $rA, $imm", (CMPWI CR0, gprc:$rA, s16imm:$imm)>; +def : InstAlias<"cmpw $rA, $rB", (CMPW CR0, gprc:$rA, gprc:$rB)>; +def : InstAlias<"cmplwi $rA, $imm", (CMPLWI CR0, gprc:$rA, u16imm:$imm)>; +def : InstAlias<"cmplw $rA, $rB", (CMPLW CR0, gprc:$rA, gprc:$rB)>; +def : InstAlias<"cmpdi $rA, $imm", (CMPDI CR0, g8rc:$rA, s16imm:$imm)>; +def : InstAlias<"cmpd $rA, $rB", (CMPD CR0, g8rc:$rA, g8rc:$rB)>; +def : InstAlias<"cmpldi $rA, $imm", (CMPLDI CR0, g8rc:$rA, u16imm:$imm)>; +def : InstAlias<"cmpld $rA, $rB", (CMPLD CR0, g8rc:$rA, g8rc:$rB)>; + git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184435 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCInstrInfo.td | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 1b7ea93..4f00602 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -2201,3 +2201,12 @@ defm : BranchExtendedMnemonic<"ne", 68>; defm : BranchExtendedMnemonic<"nu", 100>; defm : BranchExtendedMnemonic<"ns", 100>; +def : InstAlias<"cmpwi $rA, $imm", (CMPWI CR0, gprc:$rA, s16imm:$imm)>; +def : InstAlias<"cmpw $rA, $rB", (CMPW CR0, gprc:$rA, gprc:$rB)>; +def : InstAlias<"cmplwi $rA, $imm", (CMPLWI CR0, gprc:$rA, u16imm:$imm)>; +def : InstAlias<"cmplw $rA, $rB", (CMPLW CR0, gprc:$rA, gprc:$rB)>; +def : InstAlias<"cmpdi $rA, $imm", (CMPDI CR0, g8rc:$rA, s16imm:$imm)>; +def : InstAlias<"cmpd $rA, $rB", (CMPD CR0, g8rc:$rA, g8rc:$rB)>; +def : InstAlias<"cmpldi $rA, $imm", (CMPLDI CR0, g8rc:$rA, u16imm:$imm)>; +def : InstAlias<"cmpld $rA, $rB", (CMPLD CR0, g8rc:$rA, g8rc:$rB)>; + -- cgit v1.1 From 027e94479c9e69eb3c3c5536fa9990d0b96e9510 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Thu, 20 Jun 2013 16:23:52 +0000 Subject: [PowerPC] Optimize @ha/@l constructs This patch adds support for having the assembler optimize fixups to constructs like "symbol@ha" or "symbol@l" if "symbol" can be resolved at assembler time. This optimization is already present in the PPCMCExpr.cpp code for handling PPC_HA16/PPC_LO16 target expressions. However, those target expression were used only on Darwin targets. This patch changes target expression code so that they are usable also with the GNU assembler (using the @ha / @l syntax instead of the ha16() / lo16() syntax), and changes the MCInst lowering code to generate those target expressions where appropriate. It also changes the asm parser to generate HA16/LO16 target expressions when parsing assembler source that uses the @ha / @l modifiers. The effect is that now the above- mentioned optimization automatically becomes available for those situations too. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184436 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 92 ++++++++++++++++++++++++++- lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp | 30 ++++++--- lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h | 10 ++- lib/Target/PowerPC/PPCMCInstLower.cpp | 58 +++++++---------- 4 files changed, 144 insertions(+), 46 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 9cf16f0..59a5ef9 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/PPCMCTargetDesc.h" +#include "MCTargetDesc/PPCMCExpr.h" #include "llvm/MC/MCTargetAsmParser.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" @@ -126,6 +127,10 @@ class PPCAsmParser : public MCTargetAsmParser { virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); + const MCExpr *ExtractModifierFromExpr(const MCExpr *E, + PPCMCExpr::VariantKind &Variant); + bool ParseExpression(const MCExpr *&EVal); + bool ParseOperand(SmallVectorImpl &Operands); bool ParseDirectiveWord(unsigned Size, SMLoc L); @@ -540,6 +545,91 @@ ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { return Error(StartLoc, "invalid register name"); } +/// Extract @l/@ha modifier from expression. Recursively scan +/// the expression and check for VK_PPC_ADDR16_HA/VK_PPC_ADDR16_LO +/// symbol variants. If all symbols with modifier use the same +/// variant, return the corresponding PPCMCExpr::VariantKind, +/// and a modified expression using the default symbol variant. +/// Otherwise, return NULL. +const MCExpr *PPCAsmParser:: +ExtractModifierFromExpr(const MCExpr *E, + PPCMCExpr::VariantKind &Variant) { + MCContext &Context = getParser().getContext(); + Variant = PPCMCExpr::VK_PPC_None; + + switch (E->getKind()) { + case MCExpr::Target: + case MCExpr::Constant: + return 0; + + case MCExpr::SymbolRef: { + const MCSymbolRefExpr *SRE = cast(E); + + switch (SRE->getKind()) { + case MCSymbolRefExpr::VK_PPC_ADDR16_HA: + Variant = PPCMCExpr::VK_PPC_HA16; + break; + case MCSymbolRefExpr::VK_PPC_ADDR16_LO: + Variant = PPCMCExpr::VK_PPC_LO16; + break; + default: + return 0; + } + + return MCSymbolRefExpr::Create(&SRE->getSymbol(), Context); + } + + case MCExpr::Unary: { + const MCUnaryExpr *UE = cast(E); + const MCExpr *Sub = ExtractModifierFromExpr(UE->getSubExpr(), Variant); + if (!Sub) + return 0; + return MCUnaryExpr::Create(UE->getOpcode(), Sub, Context); + } + + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast(E); + PPCMCExpr::VariantKind LHSVariant, RHSVariant; + const MCExpr *LHS = ExtractModifierFromExpr(BE->getLHS(), LHSVariant); + const MCExpr *RHS = ExtractModifierFromExpr(BE->getRHS(), RHSVariant); + + if (!LHS && !RHS) + return 0; + + if (!LHS) LHS = BE->getLHS(); + if (!RHS) RHS = BE->getRHS(); + + if (LHSVariant == PPCMCExpr::VK_PPC_None) + Variant = RHSVariant; + else if (RHSVariant == PPCMCExpr::VK_PPC_None) + Variant = LHSVariant; + else if (LHSVariant == RHSVariant) + Variant = LHSVariant; + else + return 0; + + return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, Context); + } + } + + llvm_unreachable("Invalid expression kind!"); +} + +/// Parse an expression. This differs from the default "parseExpression" +/// in that it handles complex @l/@ha modifiers. +bool PPCAsmParser:: +ParseExpression(const MCExpr *&EVal) { + if (getParser().parseExpression(EVal)) + return true; + + PPCMCExpr::VariantKind Variant; + const MCExpr *E = ExtractModifierFromExpr(EVal, Variant); + if (E) + EVal = PPCMCExpr::Create(Variant, E, getParser().getContext()); + + return false; +} + bool PPCAsmParser:: ParseOperand(SmallVectorImpl &Operands) { SMLoc S = Parser.getTok().getLoc(); @@ -571,7 +661,7 @@ ParseOperand(SmallVectorImpl &Operands) { case AsmToken::Identifier: case AsmToken::Dot: case AsmToken::Dollar: - if (!getParser().parseExpression(EVal)) + if (!ParseExpression(EVal)) break; /* fall through */ default: diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp index f0613ff..3b794fe 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp @@ -11,25 +11,37 @@ #include "PPCMCExpr.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCAsmInfo.h" using namespace llvm; const PPCMCExpr* PPCMCExpr::Create(VariantKind Kind, const MCExpr *Expr, MCContext &Ctx) { - return new (Ctx) PPCMCExpr(Kind, Expr); + int AssemblerDialect = Ctx.getAsmInfo()->getAssemblerDialect(); + return new (Ctx) PPCMCExpr(Kind, Expr, AssemblerDialect); } void PPCMCExpr::PrintImpl(raw_ostream &OS) const { - switch (Kind) { - default: llvm_unreachable("Invalid kind!"); - case VK_PPC_HA16: OS << "ha16"; break; - case VK_PPC_LO16: OS << "lo16"; break; - } + if (isDarwinSyntax()) { + switch (Kind) { + default: llvm_unreachable("Invalid kind!"); + case VK_PPC_HA16: OS << "ha16"; break; + case VK_PPC_LO16: OS << "lo16"; break; + } + + OS << '('; + getSubExpr()->print(OS); + OS << ')'; + } else { + getSubExpr()->print(OS); - OS << '('; - getSubExpr()->print(OS); - OS << ')'; + switch (Kind) { + default: llvm_unreachable("Invalid kind!"); + case VK_PPC_HA16: OS << "@ha"; break; + case VK_PPC_LO16: OS << "@l"; break; + } + } } bool diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h index a080537..1b57687 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h @@ -27,9 +27,11 @@ public: private: const VariantKind Kind; const MCExpr *Expr; + const int AssemblerDialect; - explicit PPCMCExpr(VariantKind _Kind, const MCExpr *_Expr) - : Kind(_Kind), Expr(_Expr) {} + explicit PPCMCExpr(VariantKind _Kind, const MCExpr *_Expr, + int _AssemblerDialect) + : Kind(_Kind), Expr(_Expr), AssemblerDialect(_AssemblerDialect) {} public: /// @name Construction @@ -56,6 +58,10 @@ public: /// getSubExpr - Get the child of this expression. const MCExpr *getSubExpr() const { return Expr; } + /// isDarwinSyntax - True if expression is to be printed using Darwin syntax. + bool isDarwinSyntax() const { return AssemblerDialect == 1; } + + /// @} void PrintImpl(raw_ostream &OS) const; diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index ba7efc1..7cecf25 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -111,30 +111,22 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, unsigned access = MO.getTargetFlags() & PPCII::MO_ACCESS_MASK; - if (!isDarwin) { - switch (access) { - case PPCII::MO_HA16: - RefKind = MCSymbolRefExpr::VK_PPC_ADDR16_HA; - break; - case PPCII::MO_LO16: - RefKind = MCSymbolRefExpr::VK_PPC_ADDR16_LO; - break; - case PPCII::MO_TPREL16_HA: - RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_HA; - break; - case PPCII::MO_TPREL16_LO: - RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_LO; - break; - case PPCII::MO_DTPREL16_LO: - RefKind = MCSymbolRefExpr::VK_PPC_DTPREL16_LO; - break; - case PPCII::MO_TLSLD16_LO: - RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO; - break; - case PPCII::MO_TOC16_LO: - RefKind = MCSymbolRefExpr::VK_PPC_TOC16_LO; - break; - } + switch (access) { + case PPCII::MO_TPREL16_HA: + RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_HA; + break; + case PPCII::MO_TPREL16_LO: + RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_LO; + break; + case PPCII::MO_DTPREL16_LO: + RefKind = MCSymbolRefExpr::VK_PPC_DTPREL16_LO; + break; + case PPCII::MO_TLSLD16_LO: + RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO; + break; + case PPCII::MO_TOC16_LO: + RefKind = MCSymbolRefExpr::VK_PPC_TOC16_LO; + break; } const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, RefKind, Ctx); @@ -152,16 +144,14 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, Expr = MCBinaryExpr::CreateSub(Expr, PB, Ctx); } - // Add Darwin ha16() / lo16() markers if required. - if (isDarwin) { - switch (access) { - case PPCII::MO_HA16: - Expr = PPCMCExpr::CreateHa16(Expr, Ctx); - break; - case PPCII::MO_LO16: - Expr = PPCMCExpr::CreateLo16(Expr, Ctx); - break; - } + // Add ha16() / lo16() markers if required. + switch (access) { + case PPCII::MO_HA16: + Expr = PPCMCExpr::CreateHa16(Expr, Ctx); + break; + case PPCII::MO_LO16: + Expr = PPCMCExpr::CreateLo16(Expr, Ctx); + break; } return MCOperand::CreateExpr(Expr); -- cgit v1.1 From 06eb45c358c0872c8f9a82f601d89d0a7329d38d Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Thu, 20 Jun 2013 16:38:00 +0000 Subject: [PowerPC] Add missing build dependency This (hopefully) fixes build failures resulting from r184436; the PowerPC asm parser now depends on PowerPC target expresssions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184439 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/AsmParser/LLVMBuild.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/AsmParser/LLVMBuild.txt b/lib/Target/PowerPC/AsmParser/LLVMBuild.txt index bd08c13..02ebf1d 100644 --- a/lib/Target/PowerPC/AsmParser/LLVMBuild.txt +++ b/lib/Target/PowerPC/AsmParser/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = PowerPCAsmParser parent = PowerPC -required_libraries = PowerPCInfo MC MCParser Support +required_libraries = PowerPCDesc PowerPCInfo MC MCParser Support add_to_library_groups = PowerPC -- cgit v1.1 From ea18f0cc4d3595ed55b53faf08ead1fc3d5abfa3 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Thu, 20 Jun 2013 16:58:14 +0000 Subject: [PowerPC] Remove unused parameter The isDarwin parameter to the llvm::LowerPPCMachineInstrToMCInst routine is now no longer needed; remove it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184441 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPC.h | 2 +- lib/Target/PowerPC/PPCAsmPrinter.cpp | 12 ++++++------ lib/Target/PowerPC/PPCMCInstLower.cpp | 14 +++++++------- 3 files changed, 14 insertions(+), 14 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index 2e79610..bcad94f 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -40,7 +40,7 @@ namespace llvm { FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM, JITCodeEmitter &MCE); void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, - AsmPrinter &AP, bool isDarwin); + AsmPrinter &AP); /// \brief Creates an PPC-specific Target Transformation Info pass. ImmutablePass *createPPCTargetTransformInfoPass(const PPCTargetMachine *TM); diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 6e6d653..8fe53c4 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -352,7 +352,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::LDtocCPT: case PPC::LDtoc: { // Transform %X3 = LDtoc , %X2 - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this); // Change the opcode to LD, and the global address operand to be a // reference to the TOC entry we will synthesize later. @@ -381,7 +381,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::ADDIStocHA: { // Transform %Xd = ADDIStocHA %X2, - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this); // Change the opcode to ADDIS8. If the global address is external, // has common linkage, is a function address, or is a jump table @@ -425,7 +425,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case PPC::LDtocL: { // Transform %Xd = LDtocL , %Xs - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this); // Change the opcode to LD. If the global address is external, has // common linkage, or is a jump table address, then reference the @@ -462,7 +462,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case PPC::ADDItocL: { // Transform %Xd = ADDItocL %Xs, - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this); // Change the opcode to ADDI8. If the global address is external, then // generate a TOC entry and reference that. Otherwise reference the @@ -514,7 +514,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case PPC::LDgotTprelL: { // Transform %Xd = LDgotTprelL , %Xs - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this); // Change the opcode to LD. TmpInst.setOpcode(PPC::LD); @@ -681,7 +681,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } } - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this); OutStreamer.EmitInstruction(TmpInst); } diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index 7cecf25..284ab14 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -105,7 +105,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){ } static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, - AsmPrinter &Printer, bool isDarwin) { + AsmPrinter &Printer) { MCContext &Ctx = Printer.OutContext; MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; @@ -158,7 +158,7 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, } void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, - AsmPrinter &AP, bool isDarwin) { + AsmPrinter &AP) { OutMI.setOpcode(MI->getOpcode()); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -182,17 +182,17 @@ void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, break; case MachineOperand::MO_GlobalAddress: case MachineOperand::MO_ExternalSymbol: - MCOp = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP, isDarwin); + MCOp = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP); break; case MachineOperand::MO_JumpTableIndex: - MCOp = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP, isDarwin); + MCOp = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP); break; case MachineOperand::MO_ConstantPoolIndex: - MCOp = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP, isDarwin); + MCOp = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP); break; case MachineOperand::MO_BlockAddress: - MCOp = GetSymbolRef(MO,AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP, - isDarwin); + MCOp = GetSymbolRef(MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()), + AP); break; case MachineOperand::MO_RegisterMask: continue; -- cgit v1.1 From 4cbbbf49b69646ff990203ef3feae6a2726b8753 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Thu, 20 Jun 2013 17:42:36 +0000 Subject: This reverts r155000. The cdp2 instruction should have the same restrictions as cdp on the co-processor registers. VFP instructions on v8/AArch32 share the same encoding space as cdp2. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184445 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrInfo.td | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index cc17b00..8003e51 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -1007,11 +1007,6 @@ def p_imm : Operand { let DecoderMethod = "DecodeCoprocessor"; } -def pf_imm : Operand { - let PrintMethod = "printPImmediate"; - let ParserMatchClass = CoprocNumAsmOperand; -} - def CoprocRegAsmOperand : AsmOperandClass { let Name = "CoprocReg"; let ParserMethod = "parseCoprocRegOperand"; @@ -4447,7 +4442,7 @@ def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, let Inst{23-20} = opc1; } -def CDP2 : ABXI<0b1110, (outs), (ins pf_imm:$cop, imm0_15:$opc1, +def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, -- cgit v1.1 From eb3aa070c9b3984c375ef65ef6e5f113efd7e968 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 20 Jun 2013 21:55:23 +0000 Subject: R600: Expand v2i32 load/store instead of custom lowering The custom lowering causes llc to crash with a segfault. Ideally, the custom lowering can be fixed, but this allows programs which load/store v2i32 to work without crashing. Patch by: Aaron Watry Reviewed-by: Tom Stellard Signed-off-by: Aaron Watry git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184480 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600ISelLowering.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 9cedadb..812df83 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -86,7 +86,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : // Legalize loads and stores to the private address space. setOperationAction(ISD::LOAD, MVT::i32, Custom); - setOperationAction(ISD::LOAD, MVT::v2i32, Custom); + setOperationAction(ISD::LOAD, MVT::v2i32, Expand); setOperationAction(ISD::LOAD, MVT::v4i32, Custom); setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom); setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom); @@ -94,7 +94,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom); setOperationAction(ISD::STORE, MVT::i8, Custom); setOperationAction(ISD::STORE, MVT::i32, Custom); - setOperationAction(ISD::STORE, MVT::v2i32, Custom); + setOperationAction(ISD::STORE, MVT::v2i32, Expand); setOperationAction(ISD::STORE, MVT::v4i32, Custom); setOperationAction(ISD::LOAD, MVT::i32, Custom); -- cgit v1.1 From fe91c515d788c4e07413704bcdaaa7de84e77be8 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 20 Jun 2013 21:55:30 +0000 Subject: R600/SI: Expand add for v2i32 and v4i32 Also add SI tests to existing file and a v2i32 test for both R600 and SI. Patch by: Aaron Watry Reviewed-by: Tom Stellard Signed-off-by: Aaron Watry git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184481 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIISelLowering.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index d74f401..bf4918a 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -65,6 +65,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::ADD, MVT::i64, Legal); setOperationAction(ISD::ADD, MVT::i32, Legal); + setOperationAction(ISD::ADD, MVT::v4i32, Expand); + setOperationAction(ISD::ADD, MVT::v2i32, Expand); setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); -- cgit v1.1 From 4010e438100fedeacd36ecd2385adabc02b6f236 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 20 Jun 2013 21:55:37 +0000 Subject: R600/SI: Expand sub for v2i32 and v4i32 for SI Also add a v2i32 test to the existing v4i32 test. Patch by: Aaron Watry Reviewed-by: Tom Stellard Signed-off-by: Aaron Watry git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184482 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIISelLowering.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index bf4918a..ea2b123 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -68,6 +68,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::ADD, MVT::v4i32, Expand); setOperationAction(ISD::ADD, MVT::v2i32, Expand); + setOperationAction(ISD::SUB, MVT::v2i32, Expand); + setOperationAction(ISD::SUB, MVT::v4i32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); -- cgit v1.1 From 769accfb4d71caff9152309eaa5e704e065b5846 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Thu, 20 Jun 2013 22:04:40 +0000 Subject: [PowerPC] Minor cleanup in PPCELFObjectWriter::getRelocTypeInner This just re-sorts the big switch statement in PPCELFObjectWriter::getRelocTypeInner to follow the (numerical) order of the reloc types, and fixes a couple of whitespace issues. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184485 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp | 48 +++++++++++----------- 1 file changed, 24 insertions(+), 24 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 7188f93..e6a2bc4 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -84,51 +84,51 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case PPC::fixup_ppc_half16: switch (Modifier) { default: llvm_unreachable("Unsupported Modifier"); - case MCSymbolRefExpr::VK_PPC_TPREL16_HA: - Type = ELF::R_PPC_TPREL16_HA; + case MCSymbolRefExpr::VK_None: + Type = ELF::R_PPC_ADDR16; break; - case MCSymbolRefExpr::VK_PPC_DTPREL16_HA: - Type = ELF::R_PPC64_DTPREL16_HA; + case MCSymbolRefExpr::VK_PPC_ADDR16_LO: + Type = ELF::R_PPC_ADDR16_LO; break; case MCSymbolRefExpr::VK_PPC_ADDR16_HA: Type = ELF::R_PPC_ADDR16_HA; - break; - case MCSymbolRefExpr::VK_PPC_TOC16_HA: - Type = ELF::R_PPC64_TOC16_HA; break; - case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_HA: - Type = ELF::R_PPC64_GOT_TPREL16_HA; + case MCSymbolRefExpr::VK_PPC_TOC_ENTRY: + Type = ELF::R_PPC64_TOC16; break; - case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA: - Type = ELF::R_PPC64_GOT_TLSGD16_HA; + case MCSymbolRefExpr::VK_PPC_TOC16_LO: + Type = ELF::R_PPC64_TOC16_LO; break; - case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA: - Type = ELF::R_PPC64_GOT_TLSLD16_HA; + case MCSymbolRefExpr::VK_PPC_TOC16_HA: + Type = ELF::R_PPC64_TOC16_HA; break; case MCSymbolRefExpr::VK_PPC_TPREL16_LO: Type = ELF::R_PPC_TPREL16_LO; break; + case MCSymbolRefExpr::VK_PPC_TPREL16_HA: + Type = ELF::R_PPC_TPREL16_HA; + break; case MCSymbolRefExpr::VK_PPC_DTPREL16_LO: Type = ELF::R_PPC64_DTPREL16_LO; break; - case MCSymbolRefExpr::VK_None: - Type = ELF::R_PPC_ADDR16; - break; - case MCSymbolRefExpr::VK_PPC_ADDR16_LO: - Type = ELF::R_PPC_ADDR16_LO; - break; - case MCSymbolRefExpr::VK_PPC_TOC_ENTRY: - Type = ELF::R_PPC64_TOC16; - break; - case MCSymbolRefExpr::VK_PPC_TOC16_LO: - Type = ELF::R_PPC64_TOC16_LO; + case MCSymbolRefExpr::VK_PPC_DTPREL16_HA: + Type = ELF::R_PPC64_DTPREL16_HA; break; case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO: Type = ELF::R_PPC64_GOT_TLSGD16_LO; break; + case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA: + Type = ELF::R_PPC64_GOT_TLSGD16_HA; + break; case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO: Type = ELF::R_PPC64_GOT_TLSLD16_LO; break; + case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA: + Type = ELF::R_PPC64_GOT_TLSLD16_HA; + break; + case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_HA: + Type = ELF::R_PPC64_GOT_TPREL16_HA; + break; } break; case PPC::fixup_ppc_half16ds: -- cgit v1.1 From 46d7de7a192f43eb568c26c88e2dc2b804c09614 Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Thu, 20 Jun 2013 22:32:18 +0000 Subject: Update the X86 disassembler to use xacquire and xrelease when appropriate. This is a bit tricky as the xacquire and xrelease hints use the same bytes, 0xf2 and 0xf3, as the repne and rep prefixes. Fortunately llvm has different llvm MCInst Opcode enums for rep/xrelease and repne/xacquire. So to make this work a boolean was added the InternalInstruction struct as part of the Prefix state which is set with the added logic in readPrefixes() when decoding an instruction to determine if these prefix bytes are to be disassembled as xacquire or xrelease. Then we let the matcher pick the normal prefix instructionID and we change the Opcode after that when it is set into the MCInst being created. rdar://11019859 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184490 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/Disassembler/X86Disassembler.cpp | 9 +++++++++ .../X86/Disassembler/X86DisassemblerDecoder.c | 21 +++++++++++++++++++++ .../X86/Disassembler/X86DisassemblerDecoder.h | 2 ++ 3 files changed, 32 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index ca71c4f..f03068e 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -683,6 +683,15 @@ static bool translateInstruction(MCInst &mcInst, } mcInst.setOpcode(insn.instructionID); + // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3 + // prefix bytes should be disassembled as xrelease and xacquire then set the + // opcode to those instead of the rep and repne opcodes. + if (insn.xAcquireRelease) { + if(mcInst.getOpcode() == X86::REP_PREFIX) + mcInst.setOpcode(X86::XRELEASE_PREFIX); + else if(mcInst.getOpcode() == X86::REPNE_PREFIX) + mcInst.setOpcode(X86::XACQUIRE_PREFIX); + } int index; diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index e40edba..55ab8eb 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -328,6 +328,27 @@ static int readPrefixes(struct InternalInstruction* insn) { break; if (lookAtByte(insn, &nextByte)) return -1; + /* + * If the byte is 0xf2 or 0xf3, and any of the following conditions are + * met: + * - it is followed by a LOCK (0xf0) prefix + * - it is followed by an xchg instruction + * then it should be disassembled as a xacquire/xrelease not repne/rep. + */ + if ((byte == 0xf2 || byte == 0xf3) && + ((nextByte == 0xf0) | + ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) + insn->xAcquireRelease = TRUE; + /* + * Also if the byte is 0xf3, and the following condition is met: + * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or + * "mov mem, imm" (opcode 0xc6/0xc7) instructions. + * then it should be disassembled as an xrelease not rep. + */ + if (byte == 0xf3 && + (nextByte == 0x88 || nextByte == 0x89 || + nextByte == 0xc6 || nextByte == 0xc7)) + insn->xAcquireRelease = TRUE; if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) { if (consumeByte(insn, &nextByte)) return -1; diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index 407ead3..04a0dc0 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -457,6 +457,8 @@ struct InternalInstruction { uint64_t necessaryPrefixLocation; /* The segment override type */ SegmentOverride segmentOverride; + /* 1 if the prefix byte, 0xf2 or 0xf3 is xacquire or xrelease */ + BOOL xAcquireRelease; /* Sizes of various critical pieces of data, in bytes */ uint8_t registerSize; -- cgit v1.1 From 846565924a6f2932efc75c249b29c3619e587bbb Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Thu, 20 Jun 2013 22:39:42 +0000 Subject: [PowerPC] Clean up VK_PPC_TOC... names This is another minor cleanup; to bring enum names in line with the corresponding @modifier names, this renames: VK_PPC_TOC -> VK_PPC_TOCBASE VK_PPC_TOC_ENTRY -> VK_PPC_TOC16 No code change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184491 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp | 6 +++--- lib/Target/PowerPC/PPCAsmPrinter.cpp | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index e6a2bc4..1881645 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -93,7 +93,7 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_PPC_ADDR16_HA: Type = ELF::R_PPC_ADDR16_HA; break; - case MCSymbolRefExpr::VK_PPC_TOC_ENTRY: + case MCSymbolRefExpr::VK_PPC_TOC16: Type = ELF::R_PPC64_TOC16; break; case MCSymbolRefExpr::VK_PPC_TOC16_LO: @@ -140,7 +140,7 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_PPC_ADDR16_LO: Type = ELF::R_PPC64_ADDR16_LO_DS; break; - case MCSymbolRefExpr::VK_PPC_TOC_ENTRY: + case MCSymbolRefExpr::VK_PPC_TOC16: Type = ELF::R_PPC64_TOC16_DS; break; case MCSymbolRefExpr::VK_PPC_TOC16_LO: @@ -168,7 +168,7 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case FK_Data_8: switch (Modifier) { default: llvm_unreachable("Unsupported Modifier"); - case MCSymbolRefExpr::VK_PPC_TOC: + case MCSymbolRefExpr::VK_PPC_TOCBASE: Type = ELF::R_PPC64_TOC; break; case MCSymbolRefExpr::VK_None: diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 8fe53c4..6121304 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -372,7 +372,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); const MCExpr *Exp = - MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC_ENTRY, + MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC16, OutContext); TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp); OutStreamer.EmitInstruction(TmpInst); @@ -706,7 +706,7 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC.")); // Generates a R_PPC64_TOC relocation for TOC base insertion. OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2, - MCSymbolRefExpr::VK_PPC_TOC, OutContext), + MCSymbolRefExpr::VK_PPC_TOCBASE, OutContext), 8/*size*/); // Emit a null environment pointer. OutStreamer.EmitIntValue(0, 8 /* size */); -- cgit v1.1 From 2b7cdf09a142b7f3e9a0ec8c7044eaf89bc59caa Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Thu, 20 Jun 2013 22:51:44 +0000 Subject: ARM: Remove a (false) dependency on the memoryoperand's value as we do not use it at the moment. This allows to form more paired loads even when stack coloring pass destroys the memoryoperand's value. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184492 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index c8ed576..4e97dda 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -1602,8 +1602,9 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, return false; // Make sure the base address satisfies i64 ld / st alignment requirement. + // At the moment, we ignore the memoryoperand's value. + // If we want to use AliasAnalysis, we should check it accordingly. if (!Op0->hasOneMemOperand() || - !(*Op0->memoperands_begin())->getValue() || (*Op0->memoperands_begin())->isVolatile()) return false; -- cgit v1.1 From 92cfa61c50d01307d658753f8d47f4e8555a6fa9 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Fri, 21 Jun 2013 14:42:20 +0000 Subject: [PowerPC] Rename some more VK_PPC_ enums This renames more VK_PPC_ enums, to make them more closely reflect the @modifier string they represent. This also prepares for adding a bunch of new VK_PPC_ enums in upcoming patches. For consistency, some MO_ flags related to VK_PPC_ enums are likewise renamed. No change in behaviour. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184547 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 10 +++--- .../PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp | 36 +++++++++++----------- lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp | 24 +++++++-------- lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h | 12 ++++---- lib/Target/PowerPC/PPC.h | 16 +++++----- lib/Target/PowerPC/PPCAsmPrinter.cpp | 32 +++++++++---------- lib/Target/PowerPC/PPCCodeEmitter.cpp | 4 +-- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 6 ++-- lib/Target/PowerPC/PPCISelLowering.cpp | 8 ++--- lib/Target/PowerPC/PPCMCInstLower.cpp | 28 ++++++++--------- 10 files changed, 88 insertions(+), 88 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 59a5ef9..752b6f7 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -546,7 +546,7 @@ ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { } /// Extract @l/@ha modifier from expression. Recursively scan -/// the expression and check for VK_PPC_ADDR16_HA/VK_PPC_ADDR16_LO +/// the expression and check for VK_PPC_LO / VK_PPC_HA /// symbol variants. If all symbols with modifier use the same /// variant, return the corresponding PPCMCExpr::VariantKind, /// and a modified expression using the default symbol variant. @@ -566,11 +566,11 @@ ExtractModifierFromExpr(const MCExpr *E, const MCSymbolRefExpr *SRE = cast(E); switch (SRE->getKind()) { - case MCSymbolRefExpr::VK_PPC_ADDR16_HA: - Variant = PPCMCExpr::VK_PPC_HA16; + case MCSymbolRefExpr::VK_PPC_LO: + Variant = PPCMCExpr::VK_PPC_LO; break; - case MCSymbolRefExpr::VK_PPC_ADDR16_LO: - Variant = PPCMCExpr::VK_PPC_LO16; + case MCSymbolRefExpr::VK_PPC_HA: + Variant = PPCMCExpr::VK_PPC_HA; break; default: return 0; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 1881645..0eb13b4 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -87,46 +87,46 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_None: Type = ELF::R_PPC_ADDR16; break; - case MCSymbolRefExpr::VK_PPC_ADDR16_LO: + case MCSymbolRefExpr::VK_PPC_LO: Type = ELF::R_PPC_ADDR16_LO; break; - case MCSymbolRefExpr::VK_PPC_ADDR16_HA: + case MCSymbolRefExpr::VK_PPC_HA: Type = ELF::R_PPC_ADDR16_HA; break; - case MCSymbolRefExpr::VK_PPC_TOC16: + case MCSymbolRefExpr::VK_PPC_TOC: Type = ELF::R_PPC64_TOC16; break; - case MCSymbolRefExpr::VK_PPC_TOC16_LO: + case MCSymbolRefExpr::VK_PPC_TOC_LO: Type = ELF::R_PPC64_TOC16_LO; break; - case MCSymbolRefExpr::VK_PPC_TOC16_HA: + case MCSymbolRefExpr::VK_PPC_TOC_HA: Type = ELF::R_PPC64_TOC16_HA; break; - case MCSymbolRefExpr::VK_PPC_TPREL16_LO: + case MCSymbolRefExpr::VK_PPC_TPREL_LO: Type = ELF::R_PPC_TPREL16_LO; break; - case MCSymbolRefExpr::VK_PPC_TPREL16_HA: + case MCSymbolRefExpr::VK_PPC_TPREL_HA: Type = ELF::R_PPC_TPREL16_HA; break; - case MCSymbolRefExpr::VK_PPC_DTPREL16_LO: + case MCSymbolRefExpr::VK_PPC_DTPREL_LO: Type = ELF::R_PPC64_DTPREL16_LO; break; - case MCSymbolRefExpr::VK_PPC_DTPREL16_HA: + case MCSymbolRefExpr::VK_PPC_DTPREL_HA: Type = ELF::R_PPC64_DTPREL16_HA; break; - case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO: + case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO: Type = ELF::R_PPC64_GOT_TLSGD16_LO; break; - case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA: + case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HA: Type = ELF::R_PPC64_GOT_TLSGD16_HA; break; - case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO: + case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO: Type = ELF::R_PPC64_GOT_TLSLD16_LO; break; - case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA: + case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HA: Type = ELF::R_PPC64_GOT_TLSLD16_HA; break; - case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_HA: + case MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA: Type = ELF::R_PPC64_GOT_TPREL16_HA; break; } @@ -137,16 +137,16 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_None: Type = ELF::R_PPC64_ADDR16_DS; break; - case MCSymbolRefExpr::VK_PPC_ADDR16_LO: + case MCSymbolRefExpr::VK_PPC_LO: Type = ELF::R_PPC64_ADDR16_LO_DS; break; - case MCSymbolRefExpr::VK_PPC_TOC16: + case MCSymbolRefExpr::VK_PPC_TOC: Type = ELF::R_PPC64_TOC16_DS; break; - case MCSymbolRefExpr::VK_PPC_TOC16_LO: + case MCSymbolRefExpr::VK_PPC_TOC_LO: Type = ELF::R_PPC64_TOC16_LO_DS; break; - case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_LO: + case MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO: Type = ELF::R_PPC64_GOT_TPREL16_LO_DS; break; } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp index 3b794fe..b29d6ff 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp @@ -26,8 +26,8 @@ void PPCMCExpr::PrintImpl(raw_ostream &OS) const { if (isDarwinSyntax()) { switch (Kind) { default: llvm_unreachable("Invalid kind!"); - case VK_PPC_HA16: OS << "ha16"; break; - case VK_PPC_LO16: OS << "lo16"; break; + case VK_PPC_LO: OS << "lo16"; break; + case VK_PPC_HA: OS << "ha16"; break; } OS << '('; @@ -38,8 +38,8 @@ void PPCMCExpr::PrintImpl(raw_ostream &OS) const { switch (Kind) { default: llvm_unreachable("Invalid kind!"); - case VK_PPC_HA16: OS << "@ha"; break; - case VK_PPC_LO16: OS << "@l"; break; + case VK_PPC_LO: OS << "@l"; break; + case VK_PPC_HA: OS << "@ha"; break; } } } @@ -57,12 +57,12 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, switch (Kind) { default: llvm_unreachable("Invalid kind!"); - case VK_PPC_HA16: - Result = ((Result >> 16) + ((Result & 0x8000) ? 1 : 0)) & 0xffff; - break; - case VK_PPC_LO16: + case VK_PPC_LO: Result = Result & 0xffff; break; + case VK_PPC_HA: + Result = ((Result >> 16) + ((Result & 0x8000) ? 1 : 0)) & 0xffff; + break; } Res = MCValue::get(Result); } else { @@ -74,11 +74,11 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, switch (Kind) { default: llvm_unreachable("Invalid kind!"); - case VK_PPC_HA16: - Modifier = MCSymbolRefExpr::VK_PPC_ADDR16_HA; + case VK_PPC_LO: + Modifier = MCSymbolRefExpr::VK_PPC_LO; break; - case VK_PPC_LO16: - Modifier = MCSymbolRefExpr::VK_PPC_ADDR16_LO; + case VK_PPC_HA: + Modifier = MCSymbolRefExpr::VK_PPC_HA; break; } Sym = MCSymbolRefExpr::Create(&Sym->getSymbol(), Modifier, Context); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h index 1b57687..8a7b78d 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h @@ -20,8 +20,8 @@ class PPCMCExpr : public MCTargetExpr { public: enum VariantKind { VK_PPC_None, - VK_PPC_HA16, - VK_PPC_LO16 + VK_PPC_LO, + VK_PPC_HA }; private: @@ -40,12 +40,12 @@ public: static const PPCMCExpr *Create(VariantKind Kind, const MCExpr *Expr, MCContext &Ctx); - static const PPCMCExpr *CreateHa16(const MCExpr *Expr, MCContext &Ctx) { - return Create(VK_PPC_HA16, Expr, Ctx); + static const PPCMCExpr *CreateLo(const MCExpr *Expr, MCContext &Ctx) { + return Create(VK_PPC_LO, Expr, Ctx); } - static const PPCMCExpr *CreateLo16(const MCExpr *Expr, MCContext &Ctx) { - return Create(VK_PPC_LO16, Expr, Ctx); + static const PPCMCExpr *CreateHa(const MCExpr *Expr, MCContext &Ctx) { + return Create(VK_PPC_HA, Expr, Ctx); } /// @} diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index bcad94f..d5a08ee 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -74,18 +74,18 @@ namespace llvm { /// The next are not flags but distinct values. MO_ACCESS_MASK = 0xf0, - /// MO_LO16, MO_HA16 - lo16(symbol) and ha16(symbol) - MO_LO16 = 1 << 4, - MO_HA16 = 2 << 4, + /// MO_LO, MO_HA - lo16(symbol) and ha16(symbol) + MO_LO = 1 << 4, + MO_HA = 2 << 4, - MO_TPREL16_HA = 3 << 4, - MO_TPREL16_LO = 4 << 4, + MO_TPREL_LO = 4 << 4, + MO_TPREL_HA = 3 << 4, /// These values identify relocations on immediates folded /// into memory operations. - MO_DTPREL16_LO = 5 << 4, - MO_TLSLD16_LO = 6 << 4, - MO_TOC16_LO = 7 << 4 + MO_DTPREL_LO = 5 << 4, + MO_TLSLD_LO = 6 << 4, + MO_TOC_LO = 7 << 4 }; } // end namespace PPCII diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 6121304..6af3072 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -372,7 +372,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); const MCExpr *Exp = - MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC16, + MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC, OutContext); TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp); OutStreamer.EmitInstruction(TmpInst); @@ -417,7 +417,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); const MCExpr *Exp = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC16_HA, + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_HA, OutContext); TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp); OutStreamer.EmitInstruction(TmpInst); @@ -454,7 +454,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } const MCExpr *Exp = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC16_LO, + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO, OutContext); TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp); OutStreamer.EmitInstruction(TmpInst); @@ -490,7 +490,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); const MCExpr *Exp = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC16_LO, + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO, OutContext); TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp); OutStreamer.EmitInstruction(TmpInst); @@ -504,7 +504,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = Mang->getSymbol(GValue); const MCExpr *SymGotTprel = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL16_HA, + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA, OutContext); OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8) .addReg(MI->getOperand(0).getReg()) @@ -522,7 +522,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = Mang->getSymbol(GValue); const MCExpr *Exp = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL16_LO, + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO, OutContext); TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp); OutStreamer.EmitInstruction(TmpInst); @@ -536,7 +536,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = Mang->getSymbol(GValue); const MCExpr *SymGotTlsGD = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA, + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HA, OutContext); OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8) .addReg(MI->getOperand(0).getReg()) @@ -552,7 +552,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = Mang->getSymbol(GValue); const MCExpr *SymGotTlsGD = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO, + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO, OutContext); OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8) .addReg(MI->getOperand(0).getReg()) @@ -588,7 +588,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = Mang->getSymbol(GValue); const MCExpr *SymGotTlsLD = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA, + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HA, OutContext); OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8) .addReg(MI->getOperand(0).getReg()) @@ -604,7 +604,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = Mang->getSymbol(GValue); const MCExpr *SymGotTlsLD = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO, + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO, OutContext); OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8) .addReg(MI->getOperand(0).getReg()) @@ -640,7 +640,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = Mang->getSymbol(GValue); const MCExpr *SymDtprel = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL16_HA, + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_HA, OutContext); OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8) .addReg(MI->getOperand(0).getReg()) @@ -656,7 +656,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = Mang->getSymbol(GValue); const MCExpr *SymDtprel = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL16_LO, + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO, OutContext); OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8) .addReg(MI->getOperand(0).getReg()) @@ -891,7 +891,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { // mflr r11 OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R11)); // addis r11, r11, ha16(LazyPtr - AnonSymbol) - const MCExpr *SubHa16 = PPCMCExpr::CreateHa16(Sub, OutContext); + const MCExpr *SubHa16 = PPCMCExpr::CreateHa(Sub, OutContext); OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS) .addReg(PPC::R11) .addReg(PPC::R11) @@ -901,7 +901,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { // ldu r12, lo16(LazyPtr - AnonSymbol)(r11) // lwzu r12, lo16(LazyPtr - AnonSymbol)(r11) - const MCExpr *SubLo16 = PPCMCExpr::CreateLo16(Sub, OutContext); + const MCExpr *SubLo16 = PPCMCExpr::CreateLo(Sub, OutContext); OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU) .addReg(PPC::R12) .addExpr(SubLo16).addExpr(SubLo16) @@ -946,14 +946,14 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); // lis r11, ha16(LazyPtr) - const MCExpr *LazyPtrHa16 = PPCMCExpr::CreateHa16(LazyPtrExpr, OutContext); + const MCExpr *LazyPtrHa16 = PPCMCExpr::CreateHa(LazyPtrExpr, OutContext); OutStreamer.EmitInstruction(MCInstBuilder(PPC::LIS) .addReg(PPC::R11) .addExpr(LazyPtrHa16)); // ldu r12, lo16(LazyPtr)(r11) // lwzu r12, lo16(LazyPtr)(r11) - const MCExpr *LazyPtrLo16 = PPCMCExpr::CreateLo16(LazyPtrExpr, OutContext); + const MCExpr *LazyPtrLo16 = PPCMCExpr::CreateLo(LazyPtrExpr, OutContext); OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU) .addReg(PPC::R12) .addExpr(LazyPtrLo16).addExpr(LazyPtrLo16) diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp index 40e4968..0ad4ea3 100644 --- a/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -201,8 +201,8 @@ unsigned PPCCodeEmitter::getS16ImmEncoding(const MachineInstr &MI, unsigned RelocID; switch (MO.getTargetFlags() & PPCII::MO_ACCESS_MASK) { default: llvm_unreachable("Unsupported target operand flags!"); - case PPCII::MO_HA16: RelocID = PPC::reloc_absolute_high; break; - case PPCII::MO_LO16: RelocID = PPC::reloc_absolute_low; break; + case PPCII::MO_LO: RelocID = PPC::reloc_absolute_low; break; + case PPCII::MO_HA: RelocID = PPC::reloc_absolute_high; break; } MCE.addRelocation(GetRelocation(MO, RelocID)); diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index e006945..35f4b7c 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -1502,13 +1502,13 @@ void PPCDAGToDAGISel::PostprocessISelDAG() { continue; break; case PPC::ADDIdtprelL: - Flags = PPCII::MO_DTPREL16_LO; + Flags = PPCII::MO_DTPREL_LO; break; case PPC::ADDItlsldL: - Flags = PPCII::MO_TLSLD16_LO; + Flags = PPCII::MO_TLSLD_LO; break; case PPC::ADDItocL: - Flags = PPCII::MO_TOC16_LO; + Flags = PPCII::MO_TOC_LO; break; } diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index e2433e7..a0856ec 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1236,8 +1236,8 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, /// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags. static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV = 0) { - HiOpFlags = PPCII::MO_HA16; - LoOpFlags = PPCII::MO_LO16; + HiOpFlags = PPCII::MO_HA; + LoOpFlags = PPCII::MO_LO; // Don't use the pic base if not in PIC relocation model. Or if we are on a // non-darwin platform. We don't support PIC on other platforms yet. @@ -1350,9 +1350,9 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, if (Model == TLSModel::LocalExec) { SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, - PPCII::MO_TPREL16_HA); + PPCII::MO_TPREL_HA); SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, - PPCII::MO_TPREL16_LO); + PPCII::MO_TPREL_LO); SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, is64bit ? MVT::i64 : MVT::i32); SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index 284ab14..1eefb7f 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -112,20 +112,20 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, unsigned access = MO.getTargetFlags() & PPCII::MO_ACCESS_MASK; switch (access) { - case PPCII::MO_TPREL16_HA: - RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_HA; + case PPCII::MO_TPREL_LO: + RefKind = MCSymbolRefExpr::VK_PPC_TPREL_LO; break; - case PPCII::MO_TPREL16_LO: - RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_LO; + case PPCII::MO_TPREL_HA: + RefKind = MCSymbolRefExpr::VK_PPC_TPREL_HA; break; - case PPCII::MO_DTPREL16_LO: - RefKind = MCSymbolRefExpr::VK_PPC_DTPREL16_LO; + case PPCII::MO_DTPREL_LO: + RefKind = MCSymbolRefExpr::VK_PPC_DTPREL_LO; break; - case PPCII::MO_TLSLD16_LO: - RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO; + case PPCII::MO_TLSLD_LO: + RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO; break; - case PPCII::MO_TOC16_LO: - RefKind = MCSymbolRefExpr::VK_PPC_TOC16_LO; + case PPCII::MO_TOC_LO: + RefKind = MCSymbolRefExpr::VK_PPC_TOC_LO; break; } @@ -146,11 +146,11 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, // Add ha16() / lo16() markers if required. switch (access) { - case PPCII::MO_HA16: - Expr = PPCMCExpr::CreateHa16(Expr, Ctx); + case PPCII::MO_LO: + Expr = PPCMCExpr::CreateLo(Expr, Ctx); break; - case PPCII::MO_LO16: - Expr = PPCMCExpr::CreateLo16(Expr, Ctx); + case PPCII::MO_HA: + Expr = PPCMCExpr::CreateHa(Expr, Ctx); break; } -- cgit v1.1 From d2849572463da994c685b3bd7a60d5a7566c01e3 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Fri, 21 Jun 2013 14:42:49 +0000 Subject: [PowerPC] Support @h modifier This adds necessary infrastructure to support the @h modifier. Note that all required relocation types were already present (and unused). This patch provides support for using @h in the assembler; it would also be possible to now use this feature in code generated by the compiler, but this is not done yet. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184548 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 5 ++++- lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp | 3 +++ lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp | 8 ++++++++ lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h | 5 +++++ 4 files changed, 20 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 752b6f7..964f272 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -546,7 +546,7 @@ ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { } /// Extract @l/@ha modifier from expression. Recursively scan -/// the expression and check for VK_PPC_LO / VK_PPC_HA +/// the expression and check for VK_PPC_LO/HI/HA /// symbol variants. If all symbols with modifier use the same /// variant, return the corresponding PPCMCExpr::VariantKind, /// and a modified expression using the default symbol variant. @@ -569,6 +569,9 @@ ExtractModifierFromExpr(const MCExpr *E, case MCSymbolRefExpr::VK_PPC_LO: Variant = PPCMCExpr::VK_PPC_LO; break; + case MCSymbolRefExpr::VK_PPC_HI: + Variant = PPCMCExpr::VK_PPC_HI; + break; case MCSymbolRefExpr::VK_PPC_HA: Variant = PPCMCExpr::VK_PPC_HA; break; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 0eb13b4..9a52816 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -90,6 +90,9 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_PPC_LO: Type = ELF::R_PPC_ADDR16_LO; break; + case MCSymbolRefExpr::VK_PPC_HI: + Type = ELF::R_PPC_ADDR16_HI; + break; case MCSymbolRefExpr::VK_PPC_HA: Type = ELF::R_PPC_ADDR16_HA; break; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp index b29d6ff..8a346b4 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp @@ -27,6 +27,7 @@ void PPCMCExpr::PrintImpl(raw_ostream &OS) const { switch (Kind) { default: llvm_unreachable("Invalid kind!"); case VK_PPC_LO: OS << "lo16"; break; + case VK_PPC_HI: OS << "hi16"; break; case VK_PPC_HA: OS << "ha16"; break; } @@ -39,6 +40,7 @@ void PPCMCExpr::PrintImpl(raw_ostream &OS) const { switch (Kind) { default: llvm_unreachable("Invalid kind!"); case VK_PPC_LO: OS << "@l"; break; + case VK_PPC_HI: OS << "@h"; break; case VK_PPC_HA: OS << "@ha"; break; } } @@ -60,6 +62,9 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, case VK_PPC_LO: Result = Result & 0xffff; break; + case VK_PPC_HI: + Result = (Result >> 16) & 0xffff; + break; case VK_PPC_HA: Result = ((Result >> 16) + ((Result & 0x8000) ? 1 : 0)) & 0xffff; break; @@ -77,6 +82,9 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, case VK_PPC_LO: Modifier = MCSymbolRefExpr::VK_PPC_LO; break; + case VK_PPC_HI: + Modifier = MCSymbolRefExpr::VK_PPC_HI; + break; case VK_PPC_HA: Modifier = MCSymbolRefExpr::VK_PPC_HA; break; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h index 8a7b78d..150acf6 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h @@ -21,6 +21,7 @@ public: enum VariantKind { VK_PPC_None, VK_PPC_LO, + VK_PPC_HI, VK_PPC_HA }; @@ -44,6 +45,10 @@ public: return Create(VK_PPC_LO, Expr, Ctx); } + static const PPCMCExpr *CreateHi(const MCExpr *Expr, MCContext &Ctx) { + return Create(VK_PPC_HI, Expr, Ctx); + } + static const PPCMCExpr *CreateHa(const MCExpr *Expr, MCContext &Ctx) { return Create(VK_PPC_HA, Expr, Ctx); } -- cgit v1.1 From f8f87dcfceadd1b842d130303a7091ad7d7d67d0 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Fri, 21 Jun 2013 14:43:10 +0000 Subject: [PowerPC] Support @toc@h modifier This adds the relocation type and other necessary infrastructure to use the @toc@h modifier in the assembler. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184549 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 9a52816..ed77529 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -102,6 +102,9 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_PPC_TOC_LO: Type = ELF::R_PPC64_TOC16_LO; break; + case MCSymbolRefExpr::VK_PPC_TOC_HI: + Type = ELF::R_PPC64_TOC16_HI; + break; case MCSymbolRefExpr::VK_PPC_TOC_HA: Type = ELF::R_PPC64_TOC16_HA; break; -- cgit v1.1 From f7c1ee79fe90353fcd3f545f9d45a01a837bbf4b Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Fri, 21 Jun 2013 14:43:42 +0000 Subject: [PowerPC] Support @higher et.al. modifiers This adds support for the @higher, @highera, @highest, and @highesta modifers, including some missing relocation types. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184550 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 12 +++++++++ .../PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp | 12 +++++++++ lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp | 30 +++++++++++++++++++++- lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h | 6 ++++- 4 files changed, 58 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 964f272..6318d41 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -575,6 +575,18 @@ ExtractModifierFromExpr(const MCExpr *E, case MCSymbolRefExpr::VK_PPC_HA: Variant = PPCMCExpr::VK_PPC_HA; break; + case MCSymbolRefExpr::VK_PPC_HIGHER: + Variant = PPCMCExpr::VK_PPC_HIGHER; + break; + case MCSymbolRefExpr::VK_PPC_HIGHERA: + Variant = PPCMCExpr::VK_PPC_HIGHERA; + break; + case MCSymbolRefExpr::VK_PPC_HIGHEST: + Variant = PPCMCExpr::VK_PPC_HIGHEST; + break; + case MCSymbolRefExpr::VK_PPC_HIGHESTA: + Variant = PPCMCExpr::VK_PPC_HIGHESTA; + break; default: return 0; } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index ed77529..7a95bcc 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -96,6 +96,18 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_PPC_HA: Type = ELF::R_PPC_ADDR16_HA; break; + case MCSymbolRefExpr::VK_PPC_HIGHER: + Type = ELF::R_PPC64_ADDR16_HIGHER; + break; + case MCSymbolRefExpr::VK_PPC_HIGHERA: + Type = ELF::R_PPC64_ADDR16_HIGHERA; + break; + case MCSymbolRefExpr::VK_PPC_HIGHEST: + Type = ELF::R_PPC64_ADDR16_HIGHEST; + break; + case MCSymbolRefExpr::VK_PPC_HIGHESTA: + Type = ELF::R_PPC64_ADDR16_HIGHESTA; + break; case MCSymbolRefExpr::VK_PPC_TOC: Type = ELF::R_PPC64_TOC16; break; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp index 8a346b4..db0f57d 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp @@ -42,6 +42,10 @@ void PPCMCExpr::PrintImpl(raw_ostream &OS) const { case VK_PPC_LO: OS << "@l"; break; case VK_PPC_HI: OS << "@h"; break; case VK_PPC_HA: OS << "@ha"; break; + case VK_PPC_HIGHER: OS << "@higher"; break; + case VK_PPC_HIGHERA: OS << "@highera"; break; + case VK_PPC_HIGHEST: OS << "@highest"; break; + case VK_PPC_HIGHESTA: OS << "@highesta"; break; } } } @@ -66,7 +70,19 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, Result = (Result >> 16) & 0xffff; break; case VK_PPC_HA: - Result = ((Result >> 16) + ((Result & 0x8000) ? 1 : 0)) & 0xffff; + Result = ((Result + 0x8000) >> 16) & 0xffff; + break; + case VK_PPC_HIGHER: + Result = (Result >> 32) & 0xffff; + break; + case VK_PPC_HIGHERA: + Result = ((Result + 0x8000) >> 32) & 0xffff; + break; + case VK_PPC_HIGHEST: + Result = (Result >> 48) & 0xffff; + break; + case VK_PPC_HIGHESTA: + Result = ((Result + 0x8000) >> 48) & 0xffff; break; } Res = MCValue::get(Result); @@ -88,6 +104,18 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, case VK_PPC_HA: Modifier = MCSymbolRefExpr::VK_PPC_HA; break; + case VK_PPC_HIGHERA: + Modifier = MCSymbolRefExpr::VK_PPC_HIGHERA; + break; + case VK_PPC_HIGHER: + Modifier = MCSymbolRefExpr::VK_PPC_HIGHER; + break; + case VK_PPC_HIGHEST: + Modifier = MCSymbolRefExpr::VK_PPC_HIGHEST; + break; + case VK_PPC_HIGHESTA: + Modifier = MCSymbolRefExpr::VK_PPC_HIGHESTA; + break; } Sym = MCSymbolRefExpr::Create(&Sym->getSymbol(), Modifier, Context); Res = MCValue::get(Sym, Value.getSymB(), Value.getConstant()); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h index 150acf6..3cbb493 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h @@ -22,7 +22,11 @@ public: VK_PPC_None, VK_PPC_LO, VK_PPC_HI, - VK_PPC_HA + VK_PPC_HA, + VK_PPC_HIGHER, + VK_PPC_HIGHERA, + VK_PPC_HIGHEST, + VK_PPC_HIGHESTA }; private: -- cgit v1.1 From cab0a1933875935c717136d251e2af9749533ba8 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Fri, 21 Jun 2013 14:44:15 +0000 Subject: [PowerPC] Support various tls-related modifiers The current code base only supports the minimum set of tls-related relocations and @modifiers that are necessary to support compiler- generated code. This patch extends this to the full set defined in the ABI (and supported by the GNU assembler) for the benefit of the assembler parser. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184551 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp | 78 ++++++++++++++++++++++ 1 file changed, 78 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 7a95bcc..0155a89 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -120,33 +120,90 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_PPC_TOC_HA: Type = ELF::R_PPC64_TOC16_HA; break; + case MCSymbolRefExpr::VK_PPC_TPREL: + Type = ELF::R_PPC_TPREL16; + break; case MCSymbolRefExpr::VK_PPC_TPREL_LO: Type = ELF::R_PPC_TPREL16_LO; break; + case MCSymbolRefExpr::VK_PPC_TPREL_HI: + Type = ELF::R_PPC_TPREL16_HI; + break; case MCSymbolRefExpr::VK_PPC_TPREL_HA: Type = ELF::R_PPC_TPREL16_HA; break; + case MCSymbolRefExpr::VK_PPC_TPREL_HIGHER: + Type = ELF::R_PPC64_TPREL16_HIGHER; + break; + case MCSymbolRefExpr::VK_PPC_TPREL_HIGHERA: + Type = ELF::R_PPC64_TPREL16_HIGHERA; + break; + case MCSymbolRefExpr::VK_PPC_TPREL_HIGHEST: + Type = ELF::R_PPC64_TPREL16_HIGHEST; + break; + case MCSymbolRefExpr::VK_PPC_TPREL_HIGHESTA: + Type = ELF::R_PPC64_TPREL16_HIGHESTA; + break; + case MCSymbolRefExpr::VK_PPC_DTPREL: + Type = ELF::R_PPC64_DTPREL16; + break; case MCSymbolRefExpr::VK_PPC_DTPREL_LO: Type = ELF::R_PPC64_DTPREL16_LO; break; + case MCSymbolRefExpr::VK_PPC_DTPREL_HI: + Type = ELF::R_PPC64_DTPREL16_HI; + break; case MCSymbolRefExpr::VK_PPC_DTPREL_HA: Type = ELF::R_PPC64_DTPREL16_HA; break; + case MCSymbolRefExpr::VK_PPC_DTPREL_HIGHER: + Type = ELF::R_PPC64_DTPREL16_HIGHER; + break; + case MCSymbolRefExpr::VK_PPC_DTPREL_HIGHERA: + Type = ELF::R_PPC64_DTPREL16_HIGHERA; + break; + case MCSymbolRefExpr::VK_PPC_DTPREL_HIGHEST: + Type = ELF::R_PPC64_DTPREL16_HIGHEST; + break; + case MCSymbolRefExpr::VK_PPC_DTPREL_HIGHESTA: + Type = ELF::R_PPC64_DTPREL16_HIGHESTA; + break; + case MCSymbolRefExpr::VK_PPC_GOT_TLSGD: + Type = ELF::R_PPC64_GOT_TLSGD16; + break; case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO: Type = ELF::R_PPC64_GOT_TLSGD16_LO; break; + case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HI: + Type = ELF::R_PPC64_GOT_TLSGD16_HI; + break; case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HA: Type = ELF::R_PPC64_GOT_TLSGD16_HA; break; + case MCSymbolRefExpr::VK_PPC_GOT_TLSLD: + Type = ELF::R_PPC64_GOT_TLSLD16; + break; case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO: Type = ELF::R_PPC64_GOT_TLSLD16_LO; break; + case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HI: + Type = ELF::R_PPC64_GOT_TLSLD16_HI; + break; case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HA: Type = ELF::R_PPC64_GOT_TLSLD16_HA; break; + case MCSymbolRefExpr::VK_PPC_GOT_TPREL_HI: + Type = ELF::R_PPC64_GOT_TPREL16_HI; + break; case MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA: Type = ELF::R_PPC64_GOT_TPREL16_HA; break; + case MCSymbolRefExpr::VK_PPC_GOT_DTPREL_HI: + Type = ELF::R_PPC64_GOT_DTPREL16_HI; + break; + case MCSymbolRefExpr::VK_PPC_GOT_DTPREL_HA: + Type = ELF::R_PPC64_GOT_DTPREL16_HA; + break; } break; case PPC::fixup_ppc_half16ds: @@ -164,9 +221,30 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_PPC_TOC_LO: Type = ELF::R_PPC64_TOC16_LO_DS; break; + case MCSymbolRefExpr::VK_PPC_TPREL: + Type = ELF::R_PPC64_TPREL16_DS; + break; + case MCSymbolRefExpr::VK_PPC_TPREL_LO: + Type = ELF::R_PPC64_TPREL16_LO_DS; + break; + case MCSymbolRefExpr::VK_PPC_DTPREL: + Type = ELF::R_PPC64_DTPREL16_DS; + break; + case MCSymbolRefExpr::VK_PPC_DTPREL_LO: + Type = ELF::R_PPC64_DTPREL16_LO_DS; + break; + case MCSymbolRefExpr::VK_PPC_GOT_TPREL: + Type = ELF::R_PPC64_GOT_TPREL16_DS; + break; case MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO: Type = ELF::R_PPC64_GOT_TPREL16_LO_DS; break; + case MCSymbolRefExpr::VK_PPC_GOT_DTPREL: + Type = ELF::R_PPC64_GOT_DTPREL16_DS; + break; + case MCSymbolRefExpr::VK_PPC_GOT_DTPREL_LO: + Type = ELF::R_PPC64_GOT_DTPREL16_LO_DS; + break; } break; case PPC::fixup_ppc_tlsreg: -- cgit v1.1 From 84569698f01bcb49afe5b6140bf0d61cf4f3cf5a Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Fri, 21 Jun 2013 14:44:37 +0000 Subject: [PowerPC] Support R_PPC_REL16 family of relocations The GNU assembler supports (as extension to the ABI) use of PC-relative relocations in half16 fields, which allows writing code like: li 1, base-. This patch adds support for those relocation types in the assembler. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184552 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 0155a89..69e84a1 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -63,6 +63,23 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case PPC::fixup_ppc_brcond14: Type = ELF::R_PPC_REL14; break; + case PPC::fixup_ppc_half16: + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_None: + Type = ELF::R_PPC_REL16; + break; + case MCSymbolRefExpr::VK_PPC_LO: + Type = ELF::R_PPC_REL16_LO; + break; + case MCSymbolRefExpr::VK_PPC_HI: + Type = ELF::R_PPC_REL16_HI; + break; + case MCSymbolRefExpr::VK_PPC_HA: + Type = ELF::R_PPC_REL16_HA; + break; + } + break; case FK_Data_4: case FK_PCRel_4: Type = ELF::R_PPC_REL32; -- cgit v1.1 From 9b5575d55add0bb2c8769f76db250ff0f4efe8dc Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Fri, 21 Jun 2013 18:33:04 +0000 Subject: Fix IMULX machine model. Multiple def operands require multiple SchedWrites. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184566 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrArithmetic.td | 8 ++++---- lib/Target/X86/X86SchedHaswell.td | 1 + lib/Target/X86/X86SchedSandyBridge.td | 1 + lib/Target/X86/X86Schedule.td | 1 + 4 files changed, 7 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index fa2b2d8..292c8b9 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -1302,12 +1302,12 @@ let neverHasSideEffects = 1 in { let isCommutable = 1 in def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src), !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), - [], IIC_MUL8>, T8XD, VEX_4V, Sched<[WriteIMul]>; + [], IIC_MUL8>, T8XD, VEX_4V, Sched<[WriteIMul, WriteIMulH]>; let mayLoad = 1 in def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src), !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), - [], IIC_MUL8>, T8XD, VEX_4V, Sched<[WriteIMulLd]>; + [], IIC_MUL8>, T8XD, VEX_4V, Sched<[WriteIMulLd, WriteIMulH]>; } } @@ -1336,7 +1336,7 @@ let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "adcx{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_MEM>, T8, OpSize; - + def ADCX64rm : I<0xF6, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "adcx{q}\t{$src, $dst|$dst, $src}", [], IIC_BIN_MEM>, T8, OpSize, REX_W, Requires<[In64BitMode]>; @@ -1361,7 +1361,7 @@ let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { def ADOX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "adox{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_MEM>, T8XS; - + def ADOX64rm : I<0xF6, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "adox{q}\t{$src, $dst|$dst, $src}", [], IIC_BIN_MEM>, T8XS, REX_W, Requires<[In64BitMode]>; diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td index 6770f0a..62ba2bc 100644 --- a/lib/Target/X86/X86SchedHaswell.td +++ b/lib/Target/X86/X86SchedHaswell.td @@ -91,6 +91,7 @@ def : WriteRes; defm : HWWriteResPair; defm : HWWriteResPair; +def : WriteRes { let Latency = 3; } defm : HWWriteResPair; defm : HWWriteResPair; diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td index e03de14..52ead94 100644 --- a/lib/Target/X86/X86SchedSandyBridge.td +++ b/lib/Target/X86/X86SchedSandyBridge.td @@ -86,6 +86,7 @@ def : WriteRes; defm : SBWriteResPair; defm : SBWriteResPair; +def : WriteRes { let Latency = 3; } defm : SBWriteResPair; defm : SBWriteResPair; diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index 625a05c..ceb2e05 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -42,6 +42,7 @@ multiclass X86SchedWritePair { // Arithmetic. defm WriteALU : X86SchedWritePair; // Simple integer ALU op. defm WriteIMul : X86SchedWritePair; // Integer multiplication. +def WriteIMulH : SchedWrite; // Integer multiplication, high part. defm WriteIDiv : X86SchedWritePair; // Integer division. def WriteLEA : SchedWrite; // LEA instructions can't fold loads. -- cgit v1.1 From 0ff4287fe2245f417ed78fa535fce360b8b8df23 Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Fri, 21 Jun 2013 18:51:49 +0000 Subject: [NVPTX] Add support for selecting CUDA vs OCL mode based on triple IR for CUDA should use "nvptx[64]-nvidia-cuda", and IR for NV OpenCL should use "nvptx[64]-nvidia-nvcl" git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184579 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTX.h | 3 +-- lib/Target/NVPTX/NVPTXSubtarget.cpp | 17 ++++++----------- 2 files changed, 7 insertions(+), 13 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h index 179dc27..85cdb8b 100644 --- a/lib/Target/NVPTX/NVPTX.h +++ b/lib/Target/NVPTX/NVPTX.h @@ -77,8 +77,7 @@ extern Target TheNVPTXTarget64; namespace NVPTX { enum DrvInterface { NVCL, - CUDA, - TEST + CUDA }; // A field inside TSFlags needs a shift and a mask. The usage is diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp index 2dcd73d..c4d0d6e 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -19,23 +19,18 @@ using namespace llvm; -// Select Driver Interface -#include "llvm/Support/CommandLine.h" -namespace { -cl::opt DriverInterface( - cl::desc("Choose driver interface:"), - cl::values(clEnumValN(NVPTX::NVCL, "drvnvcl", "Nvidia OpenCL driver"), - clEnumValN(NVPTX::CUDA, "drvcuda", "Nvidia CUDA driver"), - clEnumValN(NVPTX::TEST, "drvtest", "Plain Test"), clEnumValEnd), - cl::init(NVPTX::NVCL)); -} NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, bool is64Bit) : NVPTXGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit), PTXVersion(0), SmVersion(20) { - drvInterface = DriverInterface; + Triple T(TT); + + if (T.getOS() == Triple::NVCL) + drvInterface = NVPTX::NVCL; + else + drvInterface = NVPTX::CUDA; // Provide the default CPU if none std::string defCPU = "sm_20"; -- cgit v1.1 From 1948910e3186d31bc0d213ecd0d7e87bb2c2760d Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Fri, 21 Jun 2013 22:56:30 +0000 Subject: DebugInfo: Don't lose unreferenced non-trivial by-value parameters A FastISel optimization was causing us to emit no information for such parameters & when they go missing we end up emitting a different function type. By avoiding that shortcut we not only get types correct (very important) but also location information (handy) - even if it's only live at the start of a function & may be clobbered later. Reviewed/discussion by Evan Cheng & Dan Gohman. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184604 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 2 -- lib/Target/X86/X86FastISel.cpp | 2 -- 2 files changed, 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 97dc63f..7f52749 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -3030,8 +3030,6 @@ bool ARMFastISel::FastLowerArguments() { Idx = 0; for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++Idx) { - if (I->use_empty()) - continue; unsigned SrcReg = GPRArgRegs[Idx]; unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 295a577..669108f 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1732,8 +1732,6 @@ bool X86FastISel::FastLowerArguments() { const TargetRegisterClass *RC64 = TLI.getRegClassFor(MVT::i64); for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++Idx) { - if (I->use_empty()) - continue; bool is32Bit = TLI.getValueType(I->getType()) == MVT::i32; const TargetRegisterClass *RC = is32Bit ? RC32 : RC64; unsigned SrcReg = is32Bit ? GPR32ArgRegs[Idx] : GPR64ArgRegs[Idx]; -- cgit v1.1 From 5b3fca50a08865f0db55fc92ad1c037a04e12177 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Sat, 22 Jun 2013 18:37:38 +0000 Subject: The getRegForInlineAsmConstraint function should only accept MVT value types. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184642 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 2 +- lib/Target/AArch64/AArch64ISelLowering.h | 2 +- lib/Target/ARM/ARMISelLowering.cpp | 2 +- lib/Target/ARM/ARMISelLowering.h | 2 +- lib/Target/Hexagon/HexagonISelLowering.cpp | 4 ++-- lib/Target/Hexagon/HexagonISelLowering.h | 2 +- lib/Target/MBlaze/MBlazeISelLowering.cpp | 2 +- lib/Target/MBlaze/MBlazeISelLowering.h | 2 +- lib/Target/MSP430/MSP430ISelLowering.cpp | 2 +- lib/Target/MSP430/MSP430ISelLowering.h | 2 +- lib/Target/Mips/MipsISelLowering.cpp | 2 +- lib/Target/Mips/MipsISelLowering.h | 2 +- lib/Target/NVPTX/NVPTXISelLowering.cpp | 2 +- lib/Target/NVPTX/NVPTXISelLowering.h | 2 +- lib/Target/PowerPC/PPCISelLowering.cpp | 2 +- lib/Target/PowerPC/PPCISelLowering.h | 2 +- lib/Target/Sparc/SparcISelLowering.cpp | 2 +- lib/Target/Sparc/SparcISelLowering.h | 2 +- lib/Target/SystemZ/SystemZISelLowering.cpp | 2 +- lib/Target/SystemZ/SystemZISelLowering.h | 2 +- lib/Target/X86/X86ISelLowering.cpp | 4 ++-- lib/Target/X86/X86ISelLowering.h | 2 +- lib/Target/XCore/XCoreISelLowering.cpp | 2 +- lib/Target/XCore/XCoreISelLowering.h | 2 +- 24 files changed, 26 insertions(+), 26 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 5a53339..dff01f7 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2932,7 +2932,7 @@ AArch64TargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::pair AArch64TargetLowering::getRegForInlineAsmConstraint( const std::string &Constraint, - EVT VT) const { + MVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index edef68b..901a9be 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -245,7 +245,7 @@ public: SelectionDAG &DAG) const; std::pair - getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; + getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const; private: const InstrItineraryData *Itins; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index a63cb27..6b981d5 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -10310,7 +10310,7 @@ ARMTargetLowering::getSingleConstraintMatchWeight( typedef std::pair RCPair; RCPair ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { + MVT VT) const { if (Constraint.size() == 1) { // GCC ARM Constraint Letters switch (Constraint[0]) { diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 2b65019..cb5b680 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -349,7 +349,7 @@ namespace llvm { std::pair getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const; + MVT VT) const; /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. If hasMemory is diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 2b0fa5e..ab8a767 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1590,11 +1590,11 @@ const { std::pair HexagonTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { + MVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': // R0-R31 - switch (VT.getSimpleVT().SimpleTy) { + switch (VT.SimpleTy) { default: llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type"); case MVT::i32: diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index 70642e6..e00f787 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -150,7 +150,7 @@ namespace llvm { std::pair getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const; + MVT VT) const; // Intrinsics virtual SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp index e07ceec..8046da2 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -1127,7 +1127,7 @@ MBlazeTargetLowering::getSingleConstraintMatchWeight( /// to an LLVM register class, return a register of 0 and the register class /// pointer. std::pair MBlazeTargetLowering:: -getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { +getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h index f874113..08c29b6 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.h +++ b/lib/Target/MBlaze/MBlazeISelLowering.h @@ -165,7 +165,7 @@ namespace llvm { std::pair getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const; + MVT VT) const; virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index a4818b2..3c19213 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -226,7 +226,7 @@ MSP430TargetLowering::getConstraintType(const std::string &Constraint) const { std::pair MSP430TargetLowering:: getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { + MVT VT) const { if (Constraint.size() == 1) { // GCC Constraint Letters switch (Constraint[0]) { diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h index 9570ef2..878b207 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.h +++ b/lib/Target/MSP430/MSP430ISelLowering.h @@ -98,7 +98,7 @@ namespace llvm { TargetLowering::ConstraintType getConstraintType(const std::string &Constraint) const; std::pair - getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; + getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const; /// isTruncateFree - Return true if it's free to truncate a value of type /// Ty1 to type Ty2. e.g. On msp430 it's free to truncate a i16 value in diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 6351073..a58f177 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -2885,7 +2885,7 @@ MipsTargetLowering::getSingleConstraintMatchWeight( /// to an LLVM register class, return a register of 0 and the register class /// pointer. std::pair MipsTargetLowering:: -getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const +getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index fe043ae..d9b5ecd 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -435,7 +435,7 @@ namespace llvm { std::pair getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const; + MVT VT) const; /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. If hasMemory is diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 6cc850e..6b10cd4 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -1421,7 +1421,7 @@ NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const { std::pair NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { + MVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { case 'c': diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index d3ed63a..c4119c6 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -108,7 +108,7 @@ public: ConstraintType getConstraintType(const std::string &Constraint) const; std::pair - getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; + getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const; virtual SDValue LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index a0856ec..3378ace 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -7514,7 +7514,7 @@ PPCTargetLowering::getSingleConstraintMatchWeight( std::pair PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { + MVT VT) const { if (Constraint.size() == 1) { // GCC RS6000 Constraint Letters switch (Constraint[0]) { diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index e85f96c..1c0ad1b 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -420,7 +420,7 @@ namespace llvm { std::pair getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const; + MVT VT) const; /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. This is the actual diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 1d765f2..414087a 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -1906,7 +1906,7 @@ SparcTargetLowering::getConstraintType(const std::string &Constraint) const { std::pair SparcTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { + MVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index 7137171..261c25a 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -68,7 +68,7 @@ namespace llvm { ConstraintType getConstraintType(const std::string &Constraint) const; std::pair - getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; + getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const; virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; } diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 1dc187f..0b0dbea 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -360,7 +360,7 @@ getSingleConstraintMatchWeight(AsmOperandInfo &info, } std::pair SystemZTargetLowering:: -getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { +getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const { if (Constraint.size() == 1) { // GCC Constraint Letters switch (Constraint[0]) { diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index f17e9e4..f48cc4f 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -129,7 +129,7 @@ public: virtual const char *getTargetNodeName(unsigned Opcode) const LLVM_OVERRIDE; virtual std::pair getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const LLVM_OVERRIDE; + MVT VT) const LLVM_OVERRIDE; virtual TargetLowering::ConstraintType getConstraintType(const std::string &Constraint) const LLVM_OVERRIDE; virtual TargetLowering::ConstraintWeight diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 7e2e99c..7db1e47 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -18423,7 +18423,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::pair X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { + MVT VT) const { // First, see if this is a constraint that directly corresponds to an LLVM // register class. if (Constraint.size() == 1) { @@ -18490,7 +18490,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, case 'x': // SSE_REGS if SSE1 allowed or AVX_REGS if AVX allowed if (!Subtarget->hasSSE1()) break; - switch (VT.getSimpleVT().SimpleTy) { + switch (VT.SimpleTy) { default: break; // Scalar SSE types. case MVT::f32: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index c0e1015..0e5e822 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -610,7 +610,7 @@ namespace llvm { /// error, this returns a register number of 0. std::pair getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const; + MVT VT) const; /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 7b89b1a..5af2c9c 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -1582,7 +1582,7 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, std::pair XCoreTargetLowering:: getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { + MVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { default : break; diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index f765f02..7761b7c 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -158,7 +158,7 @@ namespace llvm { // Inline asm support std::pair getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const; + MVT VT) const; // Expand specifics SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG) const; -- cgit v1.1 From 0f2eec65fb9e9e1dee3f672d38d03d047936a62a Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Sun, 23 Jun 2013 09:00:28 +0000 Subject: Add MI-Sched support for x86 macro fusion. This is an awful implementation of the target hook. But we don't have abstractions yet for common machine ops, and I don't see any quick way to make it table-driven. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184664 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 161 ++++++++++++++++++++++++++++++++++++++++ lib/Target/X86/X86InstrInfo.h | 3 + 2 files changed, 164 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 0688c9b..0443a93 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -4647,6 +4647,167 @@ bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, return true; } +bool X86InstrInfo::shouldScheduleAdjacent(MachineInstr* First, + MachineInstr *Second) const { + // Check if this processor supports macro-fusion. Since this is a minor + // heuristic, we haven't specifically reserved a feature. hasAVX is a decent + // proxy for SandyBridge+. + if (!TM.getSubtarget().hasAVX()) + return false; + + enum { + FuseTest, + FuseCmp, + FuseInc + } FuseKind; + + switch(Second->getOpcode()) { + default: + return false; + case X86::JE_4: + case X86::JNE_4: + case X86::JL_4: + case X86::JLE_4: + case X86::JG_4: + case X86::JGE_4: + FuseKind = FuseInc; + break; + case X86::JB_4: + case X86::JBE_4: + case X86::JA_4: + case X86::JAE_4: + FuseKind = FuseCmp; + break; + case X86::JS_4: + case X86::JNS_4: + case X86::JP_4: + case X86::JNP_4: + case X86::JO_4: + case X86::JNO_4: + FuseKind = FuseTest; + break; + } + switch (First->getOpcode()) { + default: + return false; + case X86::TEST8rr: + case X86::TEST16rr: + case X86::TEST32rr: + case X86::TEST64rr: + case X86::TEST8ri: + case X86::TEST16ri: + case X86::TEST32ri: + case X86::TEST32i32: + case X86::TEST64i32: + case X86::TEST64ri32: + case X86::TEST8rm: + case X86::TEST16rm: + case X86::TEST32rm: + case X86::TEST64rm: + case X86::AND16i16: + case X86::AND16ri: + case X86::AND16ri8: + case X86::AND16rm: + case X86::AND16rr: + case X86::AND32i32: + case X86::AND32ri: + case X86::AND32ri8: + case X86::AND32rm: + case X86::AND32rr: + case X86::AND64i32: + case X86::AND64ri32: + case X86::AND64ri8: + case X86::AND64rm: + case X86::AND64rr: + case X86::AND8i8: + case X86::AND8ri: + case X86::AND8rm: + case X86::AND8rr: + return true; + case X86::CMP16i16: + case X86::CMP16ri: + case X86::CMP16ri8: + case X86::CMP16rm: + case X86::CMP16rr: + case X86::CMP32i32: + case X86::CMP32ri: + case X86::CMP32ri8: + case X86::CMP32rm: + case X86::CMP32rr: + case X86::CMP64i32: + case X86::CMP64ri32: + case X86::CMP64ri8: + case X86::CMP64rm: + case X86::CMP64rr: + case X86::CMP8i8: + case X86::CMP8ri: + case X86::CMP8rm: + case X86::CMP8rr: + case X86::ADD16i16: + case X86::ADD16ri: + case X86::ADD16ri8: + case X86::ADD16ri8_DB: + case X86::ADD16ri_DB: + case X86::ADD16rm: + case X86::ADD16rr: + case X86::ADD16rr_DB: + case X86::ADD32i32: + case X86::ADD32ri: + case X86::ADD32ri8: + case X86::ADD32ri8_DB: + case X86::ADD32ri_DB: + case X86::ADD32rm: + case X86::ADD32rr: + case X86::ADD32rr_DB: + case X86::ADD64i32: + case X86::ADD64ri32: + case X86::ADD64ri32_DB: + case X86::ADD64ri8: + case X86::ADD64ri8_DB: + case X86::ADD64rm: + case X86::ADD64rr: + case X86::ADD64rr_DB: + case X86::ADD8i8: + case X86::ADD8mi: + case X86::ADD8mr: + case X86::ADD8ri: + case X86::ADD8rm: + case X86::ADD8rr: + case X86::SUB16i16: + case X86::SUB16ri: + case X86::SUB16ri8: + case X86::SUB16rm: + case X86::SUB16rr: + case X86::SUB32i32: + case X86::SUB32ri: + case X86::SUB32ri8: + case X86::SUB32rm: + case X86::SUB32rr: + case X86::SUB64i32: + case X86::SUB64ri32: + case X86::SUB64ri8: + case X86::SUB64rm: + case X86::SUB64rr: + case X86::SUB8i8: + case X86::SUB8ri: + case X86::SUB8rm: + case X86::SUB8rr: + return FuseKind == FuseCmp || FuseKind == FuseInc; + case X86::INC16r: + case X86::INC32r: + case X86::INC64_16r: + case X86::INC64_32r: + case X86::INC64r: + case X86::INC8r: + case X86::DEC16r: + case X86::DEC32r: + case X86::DEC64_16r: + case X86::DEC64_32r: + case X86::DEC64r: + case X86::DEC8r: + return FuseKind == FuseInc; + } +} bool X86InstrInfo:: ReverseBranchCondition(SmallVectorImpl &Cond) const { diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index f2f47ef..a0d1ba7 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -339,6 +339,9 @@ public: int64_t Offset1, int64_t Offset2, unsigned NumLoads) const; + virtual bool shouldScheduleAdjacent(MachineInstr* First, + MachineInstr *Second) const LLVM_OVERRIDE; + virtual void getNoopForMachoTarget(MCInst &NopInst) const; virtual -- cgit v1.1 From 0c9f0c047dfba91bc7c0fb66f7e868e917d37c4c Mon Sep 17 00:00:00 2001 From: Amaury de la Vieuville Date: Mon, 24 Jun 2013 09:11:38 +0000 Subject: ARM: enable decoding of pc-relative PLD/PLI git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184701 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrThumb2.td | 43 +++++-- lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 151 ++++++++++++++++++------ 2 files changed, 148 insertions(+), 46 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 2693f32..5448ee3 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1024,17 +1024,19 @@ multiclass T2I_ld opcod, string opc, def pci : T2Ipc <(outs target:$Rt), (ins t2ldrlabel:$addr), iii, opc, ".w\t$Rt, $addr", [(set target:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> { - bits<4> Rt; - bits<13> addr; let isReMaterializable = 1; let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; let Inst{24} = signed; - let Inst{23} = addr{12}; // add = (U == '1') let Inst{22-21} = opcod; let Inst{20} = 1; // load let Inst{19-16} = 0b1111; // Rn + + bits<4> Rt; let Inst{15-12} = Rt{3-0}; + + bits<13> addr; + let Inst{23} = addr{12}; // add = (U == '1') let Inst{11-0} = addr{11-0}; let DecoderMethod = "DecodeT2LoadLabel"; @@ -1564,16 +1566,17 @@ multiclass T2Ipl write, bits<1> instr, string opc> { Sched<[WritePreLd]> { let Inst{31-25} = 0b1111100; let Inst{24} = instr; + let Inst{23} = 1; let Inst{22} = 0; let Inst{21} = write; let Inst{20} = 1; let Inst{15-12} = 0b1111; bits<17> addr; - let addr{12} = 1; // add = TRUE let Inst{19-16} = addr{16-13}; // Rn - let Inst{23} = addr{12}; // U let Inst{11-0} = addr{11-0}; // imm12 + + let DecoderMethod = "DecodeT2LoadImm12"; } def i8 : T2Ii8<(outs), (ins t2addrmode_negimm8:$addr), IIC_Preload, opc, @@ -1592,6 +1595,8 @@ multiclass T2Ipl write, bits<1> instr, string opc> { bits<13> addr; let Inst{19-16} = addr{12-9}; // Rn let Inst{7-0} = addr{7-0}; // imm8 + + let DecoderMethod = "DecodeT2LoadImm8"; } def s : T2Iso<(outs), (ins t2addrmode_so_reg:$addr), IIC_Preload, opc, @@ -1605,7 +1610,7 @@ multiclass T2Ipl write, bits<1> instr, string opc> { let Inst{21} = write; let Inst{20} = 1; let Inst{15-12} = 0b1111; - let Inst{11-6} = 0000000; + let Inst{11-6} = 0b000000; bits<10> addr; let Inst{19-16} = addr{9-6}; // Rn @@ -1614,10 +1619,28 @@ multiclass T2Ipl write, bits<1> instr, string opc> { let DecoderMethod = "DecodeT2LoadShift"; } - // FIXME: We should have a separate 'pci' variant here. As-is we represent - // it via the i12 variant, which it's related to, but that means we can - // represent negative immediates, which aren't legal for anything except - // the 'pci' case (Rn == 15). + + // pci variant is very similar to i12, but supports negative offsets + // from the PC. + def pci : T2Iso<(outs), (ins t2ldrlabel:$addr), IIC_Preload, opc, + "\t$addr", + [(ARMPreload (ARMWrapper tconstpool:$addr), + (i32 write), (i32 instr))]>, + Sched<[WritePreLd]> { + let Inst{31-25} = 0b1111100; + let Inst{24} = instr; + let Inst{22} = 0; + let Inst{21} = write; + let Inst{20} = 1; + let Inst{19-16} = 0b1111; + let Inst{15-12} = 0b1111; + + bits<13> addr; + let Inst{23} = addr{12}; // add = (U == '1') + let Inst{11-0} = addr{11-0}; // imm12 + + let DecoderMethod = "DecodeT2LoadLabel"; + } } defm t2PLD : T2Ipl<0, 0, "pld">, Requires<[IsThumb2]>; diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 39a5af9..186bc9c 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -3199,38 +3199,51 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn, unsigned Rt = fieldFromInstruction(Insn, 12, 4); unsigned Rn = fieldFromInstruction(Insn, 16, 4); - if (Rn == 0xF) { + if (Rn == 15) { switch (Inst.getOpcode()) { - case ARM::t2LDRBs: - Inst.setOpcode(ARM::t2LDRBpci); - break; - case ARM::t2LDRHs: - Inst.setOpcode(ARM::t2LDRHpci); - break; - case ARM::t2LDRSHs: - Inst.setOpcode(ARM::t2LDRSHpci); - break; - case ARM::t2LDRSBs: - Inst.setOpcode(ARM::t2LDRSBpci); - break; - case ARM::t2LDRs: - Inst.setOpcode(ARM::t2LDRpci); - break; - case ARM::t2PLDs: { - Inst.setOpcode(ARM::t2PLDi12); - Inst.addOperand(MCOperand::CreateReg(ARM::PC)); - int imm = fieldFromInstruction(Insn, 0, 12); - if (!fieldFromInstruction(Insn, 23, 1)) imm *= -1; - Inst.addOperand(MCOperand::CreateImm(imm)); - return S; - } - default: - return MCDisassembler::Fail; + case ARM::t2LDRBs: + Inst.setOpcode(ARM::t2LDRBpci); + break; + case ARM::t2LDRHs: + Inst.setOpcode(ARM::t2LDRHpci); + break; + case ARM::t2LDRSHs: + Inst.setOpcode(ARM::t2LDRSHpci); + break; + case ARM::t2LDRSBs: + Inst.setOpcode(ARM::t2LDRSBpci); + break; + case ARM::t2LDRs: + Inst.setOpcode(ARM::t2LDRpci); + break; + case ARM::t2PLDs: + Inst.setOpcode(ARM::t2PLDpci); + break; + case ARM::t2PLIs: + Inst.setOpcode(ARM::t2PLIpci); + break; + default: + return MCDisassembler::Fail; } return DecodeT2LoadLabel(Inst, Insn, Address, Decoder); } + if (Rt == 15) { + switch (Inst.getOpcode()) { + case ARM::t2LDRSHs: + return MCDisassembler::Fail; + case ARM::t2LDRHs: + // FIXME: this instruction is only available with MP extensions, + // this should be checked first but we don't have access to the + // feature bits here. + Inst.setOpcode(ARM::t2PLDWs); + break; + default: + break; + } + } + switch (Inst.getOpcode()) { case ARM::t2PLDs: case ARM::t2PLDWs: @@ -3278,14 +3291,36 @@ static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn, case ARM::t2LDRSHi8: Inst.setOpcode(ARM::t2LDRSHpci); break; + case ARM::t2PLDi8: + Inst.setOpcode(ARM::t2PLDpci); + break; + case ARM::t2PLIi8: + Inst.setOpcode(ARM::t2PLIpci); + break; default: return MCDisassembler::Fail; } return DecodeT2LoadLabel(Inst, Insn, Address, Decoder); } - if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) - return MCDisassembler::Fail; + if (Rt == 15) { + switch (Inst.getOpcode()) { + case ARM::t2LDRSHi8: + return MCDisassembler::Fail; + default: + break; + } + } + + switch (Inst.getOpcode()) { + case ARM::t2PLDi8: + case ARM::t2PLIi8: + break; + default: + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + } + if (!Check(S, DecodeT2AddrModeImm8(Inst, imm, Address, Decoder))) return MCDisassembler::Fail; return S; @@ -3317,14 +3352,39 @@ static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn, case ARM::t2LDRSBi12: Inst.setOpcode(ARM::t2LDRSBpci); break; + case ARM::t2PLDi12: + Inst.setOpcode(ARM::t2PLDpci); + break; + case ARM::t2PLIi12: + Inst.setOpcode(ARM::t2PLIpci); + break; default: return MCDisassembler::Fail; } return DecodeT2LoadLabel(Inst, Insn, Address, Decoder); } - if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) - return MCDisassembler::Fail; + if (Rt == 15) { + switch (Inst.getOpcode()) { + case ARM::t2LDRSHi12: + return MCDisassembler::Fail; + case ARM::t2LDRHi12: + Inst.setOpcode(ARM::t2PLDi12); + break; + default: + break; + } + } + + switch (Inst.getOpcode()) { + case ARM::t2PLDi12: + case ARM::t2PLIi12: + break; + default: + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + } + if (!Check(S, DecodeT2AddrModeImm12(Inst, imm, Address, Decoder))) return MCDisassembler::Fail; return S; @@ -3377,11 +3437,27 @@ static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn, unsigned U = fieldFromInstruction(Insn, 23, 1); int imm = fieldFromInstruction(Insn, 0, 12); - // FIXME: detect and decode PLD properly - if (Inst.getOpcode() == ARM::t2LDRBpci && Rt == 15) { - Inst.setOpcode(ARM::t2PLDi12); - Inst.addOperand(MCOperand::CreateReg(ARM::PC)); - } else { + if (Rt == 15) { + switch (Inst.getOpcode()) { + case ARM::t2LDRBpci: + case ARM::t2LDRHpci: + Inst.setOpcode(ARM::t2PLDpci); + break; + case ARM::t2LDRSBpci: + Inst.setOpcode(ARM::t2PLIpci); + break; + case ARM::t2LDRSHpci: + return MCDisassembler::Fail; + default: + break; + } + } + + switch(Inst.getOpcode()) { + case ARM::t2PLDpci: + case ARM::t2PLIpci: + break; + default: if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) return MCDisassembler::Fail; } @@ -3528,7 +3604,10 @@ static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn, break; case ARM::t2LDRSB_PRE: case ARM::t2LDRSB_POST: - Inst.setOpcode(ARM::t2LDRSBpci); + if (Rt == 15) + Inst.setOpcode(ARM::t2PLIpci); + else + Inst.setOpcode(ARM::t2LDRSBpci); break; case ARM::t2LDRSH_PRE: case ARM::t2LDRSH_POST: -- cgit v1.1 From ff08da15cf3d0412ee9cc325fc5a720bcad178f2 Mon Sep 17 00:00:00 2001 From: Amaury de la Vieuville Date: Mon, 24 Jun 2013 09:11:45 +0000 Subject: ARM: fix IT decoding mask == 0 -> UNPRED git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184702 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 186bc9c..bc874f4 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -4667,10 +4667,8 @@ static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn, S = MCDisassembler::SoftFail; } - if (mask == 0x0) { - mask |= 0x8; - S = MCDisassembler::SoftFail; - } + if (mask == 0x0) + return MCDisassembler::Fail; Inst.addOperand(MCOperand::CreateImm(pred)); Inst.addOperand(MCOperand::CreateImm(mask)); -- cgit v1.1 From 4ee72398a15cd7b8e217bb3d34a4e9e0e72caca1 Mon Sep 17 00:00:00 2001 From: Amaury de la Vieuville Date: Mon, 24 Jun 2013 09:11:53 +0000 Subject: ARM: fix thumb1 nop decoding In thumb1, NOP is a pseudo-instruction equivalent to mov r8, r8. However the disassembler should not use this alias. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184703 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp | 9 --------- 1 file changed, 9 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 62394fa..8734e44 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -243,15 +243,6 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, return; } - // Thumb1 NOP - if (Opcode == ARM::tMOVr && MI->getOperand(0).getReg() == ARM::R8 && - MI->getOperand(1).getReg() == ARM::R8) { - O << "\tnop"; - printPredicateOperand(MI, 2, O); - printAnnotation(O, Annot); - return; - } - // Combine 2 GPRs from disassember into a GPRPair to match with instr def. // ldrexd/strexd require even/odd GPR pair. To enforce this constraint, // a single GPRPair reg operand is used in the .td file to replace the two -- cgit v1.1 From 98a9b72e8c56dc13a2617de84503a3d78352789c Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Mon, 24 Jun 2013 09:13:20 +0000 Subject: Temporarily enable MI-Sched on X86. Sorry for the unit test churn. I'll try to make the change permanently next time. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184705 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86Subtarget.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 66832b9..59911ba 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -361,11 +361,14 @@ public: /// memset with zero passed as the second argument. Otherwise it /// returns null. const char *getBZeroEntry() const; - + /// This function returns true if the target has sincos() routine in its /// compiler runtime or math libraries. bool hasSinCos() const; + /// Enable the MachineScheduler pass for all X86 subtargets. + bool enableMachineScheduler() const LLVM_OVERRIDE { return true; } + /// enablePostRAScheduler - run for Atom optimization. bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, TargetSubtargetInfo::AntiDepBreakMode& Mode, -- cgit v1.1 From 07c3e159d8fffc8b16bcd52cc395a78007c62910 Mon Sep 17 00:00:00 2001 From: Amaury de la Vieuville Date: Mon, 24 Jun 2013 09:14:54 +0000 Subject: ARM: rGPR is meant to be unpredictable, not undefined git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184706 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index bc874f4..6aaf4c0 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -916,8 +916,11 @@ static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { - if (RegNo == 13 || RegNo == 15) return MCDisassembler::Fail; - return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); + DecodeStatus S = MCDisassembler::Success; + if (RegNo == 13 || RegNo == 15) + S = MCDisassembler::SoftFail; + Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder)); + return S; } static const uint16_t SPRDecoderTable[] = { -- cgit v1.1 From ebc3938ae717d7352de800344c3ad5a1bceb74e5 Mon Sep 17 00:00:00 2001 From: Amaury de la Vieuville Date: Mon, 24 Jun 2013 09:15:01 +0000 Subject: ARM: check predicate bits for thumb instructions When encoded to thumb, VFP instruction and VMOV/VDUP between scalar and core registers, must have their predicate bit to 0b1110. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184707 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 30 ++++++++++++++----------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 6aaf4c0..31941c1 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -754,21 +754,25 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, return result; } - MI.clear(); - result = decodeInstruction(DecoderTableVFP32, MI, insn32, Address, this, STI); - if (result != MCDisassembler::Fail) { - Size = 4; - UpdateThumbVFPPredicate(MI); - return result; + if (fieldFromInstruction(insn32, 28, 4) == 0xE) { + MI.clear(); + result = decodeInstruction(DecoderTableVFP32, MI, insn32, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + UpdateThumbVFPPredicate(MI); + return result; + } } - MI.clear(); - result = decodeInstruction(DecoderTableNEONDup32, MI, insn32, Address, - this, STI); - if (result != MCDisassembler::Fail) { - Size = 4; - Check(result, AddThumbPredicate(MI)); - return result; + if (fieldFromInstruction(insn32, 28, 4) == 0xE) { + MI.clear(); + result = decodeInstruction(DecoderTableNEONDup32, MI, insn32, Address, + this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + Check(result, AddThumbPredicate(MI)); + return result; + } } if (fieldFromInstruction(insn32, 24, 8) == 0xF9) { -- cgit v1.1 From 90b1086b93708149ed7a3749e2eeccea264a037d Mon Sep 17 00:00:00 2001 From: Vladimir Medic Date: Mon, 24 Jun 2013 10:05:34 +0000 Subject: This patch introduces RegisterOperand class into Mips FPU instruction definitions and adds dedicated parser methods to MipsAsmParser. It is the first in a series of patches that should fix the problems with parsing Mips FPU instructions and optimize the code in MipsAsmParser. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184716 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 137 +++++++++++++++++----------- lib/Target/Mips/MipsInstrFPU.td | 80 +++++++++------- lib/Target/Mips/MipsRegisterInfo.td | 27 ++++++ 3 files changed, 157 insertions(+), 87 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 30149d3..4d805a7 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -104,6 +104,15 @@ class MipsAsmParser : public MCTargetAsmParser { MipsAsmParser::OperandMatchResultTy parseCCRRegs(SmallVectorImpl &Operands); + MipsAsmParser::OperandMatchResultTy + parseAFGR64Regs(SmallVectorImpl &Operands); + + MipsAsmParser::OperandMatchResultTy + parseFGR64Regs(SmallVectorImpl &Operands); + + MipsAsmParser::OperandMatchResultTy + parseFGR32Regs(SmallVectorImpl &Operands); + bool searchSymbolAlias(SmallVectorImpl &Operands, unsigned RegKind); @@ -177,8 +186,6 @@ class MipsAsmParser : public MCTargetAsmParser { FpFormatTy getFpFormat() {return FpFormat;} - bool requestsDoubleOperand(StringRef Mnemonic); - unsigned getReg(int RC, int RegNo); int getATReg(); @@ -385,6 +392,18 @@ public: return Reg.Kind == Kind_CCRRegs; } + bool isAFGR64Asm() const { + return Kind == k_Register && Reg.Kind == Kind_AFGR64Regs; + } + + bool isFGR64Asm() const { + return Kind == k_Register && Reg.Kind == Kind_FGR64Regs; + } + + bool isFGR32Asm() const { + return (Kind == k_Register) && Reg.Kind == Kind_FGR32Regs; + } + /// getStartLoc - Get the location of the first token of this operand. SMLoc getStartLoc() const { return StartLoc; @@ -838,18 +857,6 @@ void MipsAsmParser::setDefaultFpFormat() { FpFormat = FP_FORMAT_S; } -bool MipsAsmParser::requestsDoubleOperand(StringRef Mnemonic){ - - bool IsDouble = StringSwitch(Mnemonic.lower()) - .Case("ldxc1", true) - .Case("ldc1", true) - .Case("sdxc1", true) - .Case("sdc1", true) - .Default(false); - - return IsDouble; -} - void MipsAsmParser::setFpFormat(StringRef Format) { FpFormat = StringSwitch(Format.lower()) @@ -1280,6 +1287,34 @@ MipsAsmParser::parseCPURegs(SmallVectorImpl &Operands) { return parseRegs(Operands, (int) MipsOperand::Kind_CPURegs); } +MipsAsmParser::OperandMatchResultTy +MipsAsmParser::parseAFGR64Regs(SmallVectorImpl &Operands) { + + if (isFP64()) + return MatchOperand_NoMatch; + // Double operand is expected, set appropriate format + setFpFormat(FP_FORMAT_D); + + return parseRegs(Operands, (int) MipsOperand::Kind_AFGR64Regs); +} + +MipsAsmParser::OperandMatchResultTy +MipsAsmParser::parseFGR64Regs(SmallVectorImpl &Operands) { + if (!isFP64()) + return MatchOperand_NoMatch; + // Double operand is expected, set appropriate format + setFpFormat(FP_FORMAT_D); + + return parseRegs(Operands, (int) MipsOperand::Kind_FGR64Regs); +} + +MipsAsmParser::OperandMatchResultTy +MipsAsmParser::parseFGR32Regs(SmallVectorImpl &Operands) { + // Single operand is expected, set appropriate format + setFpFormat(FP_FORMAT_S); + return parseRegs(Operands, (int) MipsOperand::Kind_FGR32Regs); +} + bool MipsAsmParser::searchSymbolAlias( SmallVectorImpl &Operands, unsigned RegKind) { @@ -1537,50 +1572,44 @@ bool MipsAsmParser:: ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, SmallVectorImpl &Operands) { StringRef Mnemonic; - // Floating point instructions: Should the register be treated as a double? - if (requestsDoubleOperand(Name)) { - setFpFormat(FP_FORMAT_D); - Operands.push_back(MipsOperand::CreateToken(Name, NameLoc)); - Mnemonic = Name; - } else { - setDefaultFpFormat(); - // Create the leading tokens for the mnemonic, split by '.' characters. - size_t Start = 0, Next = Name.find('.'); - Mnemonic = Name.slice(Start, Next); - - Operands.push_back(MipsOperand::CreateToken(Mnemonic, NameLoc)); - - if (Next != StringRef::npos) { - // There is a format token in mnemonic. - size_t Dot = Name.find('.', Next + 1); - StringRef Format = Name.slice(Next, Dot); - if (Dot == StringRef::npos) // Only one '.' in a string, it's a format. - Operands.push_back(MipsOperand::CreateToken(Format, NameLoc)); - else { - if (Name.startswith("c.")) { - // Floating point compare, add '.' and immediate represent for cc. - Operands.push_back(MipsOperand::CreateToken(".", NameLoc)); - int Cc = ConvertCcString(Format); - if (Cc == -1) { - return Error(NameLoc, "Invalid conditional code"); - } - SMLoc E = SMLoc::getFromPointer( - Parser.getTok().getLoc().getPointer() - 1); - Operands.push_back( - MipsOperand::CreateImm(MCConstantExpr::Create(Cc, getContext()), - NameLoc, E)); - } else { - // trunc, ceil, floor ... - return parseMathOperation(Name, NameLoc, Operands); - } - // The rest is a format. - Format = Name.slice(Dot, StringRef::npos); - Operands.push_back(MipsOperand::CreateToken(Format, NameLoc)); + setDefaultFpFormat(); + // Create the leading tokens for the mnemonic, split by '.' characters. + size_t Start = 0, Next = Name.find('.'); + Mnemonic = Name.slice(Start, Next); + + Operands.push_back(MipsOperand::CreateToken(Mnemonic, NameLoc)); + + if (Next != StringRef::npos) { + // There is a format token in mnemonic. + size_t Dot = Name.find('.', Next + 1); + StringRef Format = Name.slice(Next, Dot); + if (Dot == StringRef::npos) // Only one '.' in a string, it's a format. + Operands.push_back(MipsOperand::CreateToken(Format, NameLoc)); + else { + if (Name.startswith("c.")) { + // Floating point compare, add '.' and immediate represent for cc. + Operands.push_back(MipsOperand::CreateToken(".", NameLoc)); + int Cc = ConvertCcString(Format); + if (Cc == -1) { + return Error(NameLoc, "Invalid conditional code"); + } + SMLoc E = SMLoc::getFromPointer( + Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back( + MipsOperand::CreateImm(MCConstantExpr::Create(Cc, getContext()), + NameLoc, E)); + } else { + // trunc, ceil, floor ... + return parseMathOperation(Name, NameLoc, Operands); } - setFpFormat(Format); + // The rest is a format. + Format = Name.slice(Dot, StringRef::npos); + Operands.push_back(MipsOperand::CreateToken(Format, NameLoc)); } + + setFpFormat(Format); } // Read the remaining operands. diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index e2acf28..6b2b859 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -151,7 +151,7 @@ class MTC1_FT_CCR; -class LW_FT : InstSE<(outs RC:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), [(set RC:$rt, (OpNode addrDefault:$addr))], Itin, FrmFI> { @@ -159,7 +159,7 @@ class LW_FT : InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), [(OpNode RC:$rt, addrDefault:$addr)], Itin, FrmFI> { @@ -180,7 +180,7 @@ class NMADDS_FT; -class LWXC1_FT : InstSE<(outs DRC:$fd), (ins PRC:$base, PRC:$index), !strconcat(opstr, "\t$fd, ${index}(${base})"), @@ -188,7 +188,7 @@ class LWXC1_FT : InstSE<(outs), (ins DRC:$fs, PRC:$base, PRC:$index), !strconcat(opstr, "\t$fs, ${index}(${base})"), @@ -304,73 +304,87 @@ def FMOV_D64 : ABSS_FT<"mov.d", FGR64, FGR64, IIFmove>, ABSS_FM<0x6, 17>, /// Floating Point Memory Instructions let Predicates = [IsN64, HasStdEnc], DecoderNamespace = "Mips64" in { - def LWC1_P8 : LW_FT<"lwc1", FGR32, IILoad, mem64, load>, LW_FM<0x31>; - def SWC1_P8 : SW_FT<"swc1", FGR32, IIStore, mem64, store>, LW_FM<0x39>; - def LDC164_P8 : LW_FT<"ldc1", FGR64, IILoad, mem64, load>, LW_FM<0x35> { + def LWC1_P8 : LW_FT<"lwc1", FGR32RegsOpnd, IILoad, mem64, load>, LW_FM<0x31>; + def SWC1_P8 : SW_FT<"swc1", FGR32RegsOpnd, IIStore, mem64, store>, + LW_FM<0x39>; + def LDC164_P8 : LW_FT<"ldc1", FGR64RegsOpnd, IILoad, mem64, load>, + LW_FM<0x35> { let isCodeGenOnly =1; } - def SDC164_P8 : SW_FT<"sdc1", FGR64, IIStore, mem64, store>, LW_FM<0x3d> { + def SDC164_P8 : SW_FT<"sdc1", FGR64RegsOpnd, IIStore, mem64, store>, + LW_FM<0x3d> { let isCodeGenOnly =1; } } let Predicates = [NotN64, HasStdEnc] in { - def LWC1 : LW_FT<"lwc1", FGR32, IILoad, mem, load>, LW_FM<0x31>; - def SWC1 : SW_FT<"swc1", FGR32, IIStore, mem, store>, LW_FM<0x39>; + def LWC1 : LW_FT<"lwc1", FGR32RegsOpnd, IILoad, mem, load>, LW_FM<0x31>; + def SWC1 : SW_FT<"swc1", FGR32RegsOpnd, IIStore, mem, store>, LW_FM<0x39>; } let Predicates = [NotN64, HasMips64, HasStdEnc], DecoderNamespace = "Mips64" in { - def LDC164 : LW_FT<"ldc1", FGR64, IILoad, mem, load>, LW_FM<0x35>; - def SDC164 : SW_FT<"sdc1", FGR64, IIStore, mem, store>, LW_FM<0x3d>; + def LDC164 : LW_FT<"ldc1", FGR64RegsOpnd, IILoad, mem, load>, LW_FM<0x35>; + def SDC164 : SW_FT<"sdc1", FGR64RegsOpnd, IIStore, mem, store>, LW_FM<0x3d>; } let Predicates = [NotN64, NotMips64, HasStdEnc] in { let isPseudo = 1, isCodeGenOnly = 1 in { - def PseudoLDC1 : LW_FT<"", AFGR64, IILoad, mem, load>; - def PseudoSDC1 : SW_FT<"", AFGR64, IIStore, mem, store>; + def PseudoLDC1 : LW_FT<"", AFGR64RegsOpnd, IILoad, mem, load>; + def PseudoSDC1 : SW_FT<"", AFGR64RegsOpnd, IIStore, mem, store>; } - def LDC1 : LW_FT<"ldc1", AFGR64, IILoad, mem>, LW_FM<0x35>; - def SDC1 : SW_FT<"sdc1", AFGR64, IIStore, mem>, LW_FM<0x3d>; + def LDC1 : LW_FT<"ldc1", AFGR64RegsOpnd, IILoad, mem>, LW_FM<0x35>; + def SDC1 : SW_FT<"sdc1", AFGR64RegsOpnd, IIStore, mem>, LW_FM<0x3d>; } // Indexed loads and stores. let Predicates = [HasFPIdx, HasStdEnc] in { - def LWXC1 : LWXC1_FT<"lwxc1", FGR32, CPURegs, IILoad, load>, LWXC1_FM<0>; - def SWXC1 : SWXC1_FT<"swxc1", FGR32, CPURegs, IIStore, store>, SWXC1_FM<8>; + def LWXC1 : LWXC1_FT<"lwxc1", FGR32RegsOpnd, CPURegsOpnd, IILoad, load>, + LWXC1_FM<0>; + def SWXC1 : SWXC1_FT<"swxc1", FGR32RegsOpnd, CPURegsOpnd, IIStore, store>, + SWXC1_FM<8>; } let Predicates = [HasMips32r2, NotMips64, HasStdEnc] in { - def LDXC1 : LWXC1_FT<"ldxc1", AFGR64, CPURegs, IILoad, load>, LWXC1_FM<1>; - def SDXC1 : SWXC1_FT<"sdxc1", AFGR64, CPURegs, IIStore, store>, SWXC1_FM<9>; + def LDXC1 : LWXC1_FT<"ldxc1", AFGR64RegsOpnd, CPURegsOpnd, IILoad, load>, + LWXC1_FM<1>; + def SDXC1 : SWXC1_FT<"sdxc1", AFGR64RegsOpnd, CPURegsOpnd, IIStore, store>, + SWXC1_FM<9>; } let Predicates = [HasMips64, NotN64, HasStdEnc], DecoderNamespace="Mips64" in { - def LDXC164 : LWXC1_FT<"ldxc1", FGR64, CPURegs, IILoad, load>, LWXC1_FM<1>; - def SDXC164 : SWXC1_FT<"sdxc1", FGR64, CPURegs, IIStore, store>, SWXC1_FM<9>; + def LDXC164 : LWXC1_FT<"ldxc1", FGR64RegsOpnd, CPURegsOpnd, IILoad, load>, + LWXC1_FM<1>; + def SDXC164 : SWXC1_FT<"sdxc1", FGR64RegsOpnd, CPURegsOpnd, IIStore, store>, + SWXC1_FM<9>; } // n64 let Predicates = [IsN64, HasStdEnc], isCodeGenOnly=1 in { - def LWXC1_P8 : LWXC1_FT<"lwxc1", FGR32, CPU64Regs, IILoad, load>, LWXC1_FM<0>; - def LDXC164_P8 : LWXC1_FT<"ldxc1", FGR64, CPU64Regs, IILoad, load>, - LWXC1_FM<1>; - def SWXC1_P8 : SWXC1_FT<"swxc1", FGR32, CPU64Regs, IIStore, store>, - SWXC1_FM<8>; - def SDXC164_P8 : SWXC1_FT<"sdxc1", FGR64, CPU64Regs, IIStore, store>, - SWXC1_FM<9>; + def LWXC1_P8 : LWXC1_FT<"lwxc1", FGR32RegsOpnd, CPU64RegsOpnd, IILoad, load>, + LWXC1_FM<0>; + def LDXC164_P8 : LWXC1_FT<"ldxc1", FGR64RegsOpnd, CPU64RegsOpnd, IILoad, + load>, LWXC1_FM<1>; + def SWXC1_P8 : SWXC1_FT<"swxc1", FGR32RegsOpnd, CPU64RegsOpnd, IIStore, + store>, SWXC1_FM<8>; + def SDXC164_P8 : SWXC1_FT<"sdxc1", FGR64RegsOpnd, CPU64RegsOpnd, IIStore, + store>, SWXC1_FM<9>; } // Load/store doubleword indexed unaligned. let Predicates = [NotMips64, HasStdEnc] in { - def LUXC1 : LWXC1_FT<"luxc1", AFGR64, CPURegs, IILoad>, LWXC1_FM<0x5>; - def SUXC1 : SWXC1_FT<"suxc1", AFGR64, CPURegs, IIStore>, SWXC1_FM<0xd>; + def LUXC1 : LWXC1_FT<"luxc1", AFGR64RegsOpnd, CPURegsOpnd, IILoad>, + LWXC1_FM<0x5>; + def SUXC1 : SWXC1_FT<"suxc1", AFGR64RegsOpnd, CPURegsOpnd, IIStore>, + SWXC1_FM<0xd>; } let Predicates = [HasMips64, HasStdEnc], DecoderNamespace="Mips64" in { - def LUXC164 : LWXC1_FT<"luxc1", FGR64, CPURegs, IILoad>, LWXC1_FM<0x5>; - def SUXC164 : SWXC1_FT<"suxc1", FGR64, CPURegs, IIStore>, SWXC1_FM<0xd>; + def LUXC164 : LWXC1_FT<"luxc1", FGR64RegsOpnd, CPURegsOpnd, IILoad>, + LWXC1_FM<0x5>; + def SUXC164 : SWXC1_FT<"suxc1", FGR64RegsOpnd, CPURegsOpnd, IIStore>, + SWXC1_FM<0xd>; } /// Floating-point Aritmetic diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index d98cb21..3687084 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -407,6 +407,21 @@ def CCRAsmOperand : MipsAsmRegOperand { let ParserMethod = "parseCCRRegs"; } +def AFGR64AsmOperand : MipsAsmRegOperand { + let Name = "AFGR64Asm"; + let ParserMethod = "parseAFGR64Regs"; +} + +def FGR64AsmOperand : MipsAsmRegOperand { + let Name = "FGR64Asm"; + let ParserMethod = "parseFGR64Regs"; +} + +def FGR32AsmOperand : MipsAsmRegOperand { + let Name = "FGR32Asm"; + let ParserMethod = "parseFGR32Regs"; +} + def CPURegsOpnd : RegisterOperand { let ParserMatchClass = CPURegsAsmOperand; } @@ -436,3 +451,15 @@ def HWRegsOpnd : RegisterOperand { def HW64RegsOpnd : RegisterOperand { let ParserMatchClass = HW64RegsAsmOperand; } + +def AFGR64RegsOpnd : RegisterOperand { + let ParserMatchClass = AFGR64AsmOperand; +} + +def FGR64RegsOpnd : RegisterOperand { + let ParserMatchClass = FGR64AsmOperand; +} + +def FGR32RegsOpnd : RegisterOperand { + let ParserMatchClass = FGR32AsmOperand; +} \ No newline at end of file -- cgit v1.1 From 7e66f5c1b4a166d823e0452d1a1bc0f822d04201 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Mon, 24 Jun 2013 11:01:55 +0000 Subject: [PowerPC] Support blrl and variants in the asm parser This patch adds support for blrl and its conditional variants. The patterns are (currently) used for the asm parser only. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184718 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCInstrInfo.td | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 4f00602..5343270 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -926,6 +926,14 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { def BCCTRL : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond), "b${cond:cc}ctrl ${cond:reg}", BrB, []>; } + let Uses = [LR, RM] in { + def BLRL : XLForm_2_ext<19, 16, 20, 0, 1, (outs), (ins), + "blrl", BrB, []>; + + let isCodeGenOnly = 1 in + def BCLRL : XLForm_2_br<19, 16, 1, (outs), (ins pred:$cond), + "b${cond:cc}lrl ${cond:reg}", BrB, []>; + } } let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in @@ -2183,6 +2191,11 @@ multiclass BranchExtendedMnemonic { def : InstAlias<"b"#name#"ctr", (BCCTR bibo, CR0)>; + def : InstAlias<"b"#name#"lrl $cc", + (BCLRL bibo, crrc:$cc)>; + def : InstAlias<"b"#name#"lrl", + (BCLRL bibo, CR0)>; + def : InstAlias<"b"#name#"ctrl $cc", (BCCTRL bibo, crrc:$cc)>; def : InstAlias<"b"#name#"ctrl", -- cgit v1.1 From 813942a0cf8e0605002c5fa364372a8a61634cc4 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Mon, 24 Jun 2013 11:02:19 +0000 Subject: [PowerPC] Support b(cond)l in the asm parser This patch adds support for the conditional variants of bl. The pattern is currently used by the asm parser only. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184719 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCInstrInfo.td | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 5343270..8f896ad 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -916,6 +916,10 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { "bl $func", BrB, []>; // See Pat patterns below. def BLA : IForm<18, 1, 1, (outs), (ins aaddr:$func), "bla $func", BrB, [(PPCcall (i32 imm:$func))]>; + + let isCodeGenOnly = 1 in + def BCCL : BForm<16, 0, 1, (outs), (ins pred:$cond, condbrtarget:$dst), + "b${cond:cc}l ${cond:reg}, $dst">; } let Uses = [CTR, RM] in { def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins), @@ -2191,6 +2195,11 @@ multiclass BranchExtendedMnemonic { def : InstAlias<"b"#name#"ctr", (BCCTR bibo, CR0)>; + def : InstAlias<"b"#name#"l $cc, $dst", + (BCCL bibo, crrc:$cc, condbrtarget:$dst)>; + def : InstAlias<"b"#name#"l $dst", + (BCCL bibo, CR0, condbrtarget:$dst)>; + def : InstAlias<"b"#name#"lrl $cc", (BCLRL bibo, crrc:$cc)>; def : InstAlias<"b"#name#"lrl", -- cgit v1.1 From 9068d5310cfafdd201f77b0434dc7eebb7f51a45 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Mon, 24 Jun 2013 11:02:38 +0000 Subject: [PowerPC] Support bd(n)zl and bd(n)zlrl This adds support for the bd(n)zl and bd(n)zlrl instructions. The patterns are currently used for the asm parser only. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184720 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCInstrInfo.td | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 8f896ad..700875a 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -938,6 +938,18 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { def BCLRL : XLForm_2_br<19, 16, 1, (outs), (ins pred:$cond), "b${cond:cc}lrl ${cond:reg}", BrB, []>; } + let Defs = [CTR], Uses = [CTR, RM] in { + def BDZL : BForm_1<16, 18, 0, 1, (outs), (ins condbrtarget:$dst), + "bdzl $dst">; + def BDNZL : BForm_1<16, 16, 0, 1, (outs), (ins condbrtarget:$dst), + "bdnzl $dst">; + } + let Defs = [CTR], Uses = [CTR, LR, RM] in { + def BDZLRL : XLForm_2_ext<19, 16, 18, 0, 1, (outs), (ins), + "bdzlrl", BrB, []>; + def BDNZLRL : XLForm_2_ext<19, 16, 16, 0, 1, (outs), (ins), + "bdnzlrl", BrB, []>; + } } let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in -- cgit v1.1 From 9679c47a07386cbf3547a0927609c7ee080b2aab Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Mon, 24 Jun 2013 11:03:33 +0000 Subject: [PowerPC] Support absolute branches There is currently only limited support for the "absolute" variants of branch instructions. This patch adds support for the absolute variants of all branches that are currently otherwise supported. This requires adding new fixup types so that the correct variant of relocation type can be selected by the object writer. While the compiler will continue to usually choose the relative branch variants, this will allow the asm parser to fully support the absolute branches, with either immediate (numerical) or symbolic target addresses. No change in code generation intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184721 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 14 +++++ lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp | 9 ++- lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h | 2 +- lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp | 6 ++ .../PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp | 6 +- lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h | 8 +++ .../PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp | 28 +++++++++ lib/Target/PowerPC/PPCCodeEmitter.cpp | 16 +++++ lib/Target/PowerPC/PPCInstr64Bit.td | 8 +-- lib/Target/PowerPC/PPCInstrInfo.td | 72 ++++++++++++++++++---- 10 files changed, 147 insertions(+), 22 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 6318d41..999c677 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -267,6 +267,12 @@ public: bool isS16ImmX4() const { return Kind == Expression || (Kind == Immediate && isInt<16>(getImm()) && (getImm() & 3) == 0); } + bool isDirectBr() const { return Kind == Expression || + (Kind == Immediate && isInt<26>(getImm()) && + (getImm() & 3) == 0); } + bool isCondBr() const { return Kind == Expression || + (Kind == Immediate && isInt<16>(getImm()) && + (getImm() & 3) == 0); } bool isRegNumber() const { return Kind == Immediate && isUInt<5>(getImm()); } bool isCCRegNumber() const { return Kind == Immediate && isUInt<3>(getImm()); } @@ -351,6 +357,14 @@ public: Inst.addOperand(MCOperand::CreateExpr(getExpr())); } + void addBranchTargetOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + if (Kind == Immediate) + Inst.addOperand(MCOperand::CreateImm(getImm() / 4)); + else + Inst.addOperand(MCOperand::CreateExpr(getExpr())); + } + StringRef getToken() const { assert(Kind == Token && "Invalid access!"); return StringRef(Tok.Data, Tok.Length); diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 432167e..9af5e53 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -148,11 +148,14 @@ void PPCInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo, // Branches can take an immediate operand. This is used by the branch // selection pass to print .+8, an eight byte displacement from the PC. O << ".+"; - printAbsAddrOperand(MI, OpNo, O); + printAbsBranchOperand(MI, OpNo, O); } -void PPCInstPrinter::printAbsAddrOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { +void PPCInstPrinter::printAbsBranchOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (!MI->getOperand(OpNo).isImm()) + return printOperand(MI, OpNo, O); + O << (int)MI->getOperand(OpNo).getImm()*4; } diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index f64a329..da09810 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -51,7 +51,7 @@ public: void printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printAbsAddrOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printAbsBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printcrbitm(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 3fa2e09..e01f142 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -34,8 +34,10 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) { case PPC::fixup_ppc_nofixup: return Value; case PPC::fixup_ppc_brcond14: + case PPC::fixup_ppc_brcond14abs: return Value & 0xfffc; case PPC::fixup_ppc_br24: + case PPC::fixup_ppc_br24abs: return Value & 0x3fffffc; case PPC::fixup_ppc_half16: return Value & 0xffff; @@ -56,7 +58,9 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { return 2; case FK_Data_4: case PPC::fixup_ppc_brcond14: + case PPC::fixup_ppc_brcond14abs: case PPC::fixup_ppc_br24: + case PPC::fixup_ppc_br24abs: return 4; case FK_Data_8: return 8; @@ -93,6 +97,8 @@ public: // name offset bits flags { "fixup_ppc_br24", 6, 24, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_ppc_br24abs", 6, 24, 0 }, + { "fixup_ppc_brcond14abs", 16, 14, 0 }, { "fixup_ppc_half16", 0, 16, 0 }, { "fixup_ppc_half16ds", 0, 14, 0 }, { "fixup_ppc_tlsreg", 0, 0, 0 }, diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 69e84a1..f48cb5e 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -58,9 +58,11 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, default: llvm_unreachable("Unimplemented"); case PPC::fixup_ppc_br24: + case PPC::fixup_ppc_br24abs: Type = ELF::R_PPC_REL24; break; case PPC::fixup_ppc_brcond14: + case PPC::fixup_ppc_brcond14abs: Type = ELF::R_PPC_REL14; break; case PPC::fixup_ppc_half16: @@ -92,10 +94,10 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, } else { switch ((unsigned)Fixup.getKind()) { default: llvm_unreachable("invalid fixup kind!"); - case PPC::fixup_ppc_br24: + case PPC::fixup_ppc_br24abs: Type = ELF::R_PPC_ADDR24; break; - case PPC::fixup_ppc_brcond14: + case PPC::fixup_ppc_brcond14abs: Type = ELF::R_PPC_ADDR14; // XXX: or BRNTAKEN?_ break; case PPC::fixup_ppc_half16: diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h index 3ea59f0..0438c0e 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h @@ -25,6 +25,14 @@ enum Fixups { /// branches. fixup_ppc_brcond14, + /// fixup_ppc_br24abs - 24-bit absolute relocation for direct branches + /// like 'ba' and 'bla'. + fixup_ppc_br24abs, + + /// fixup_ppc_brcond14abs - 14-bit absolute relocation for conditional + /// branches. + fixup_ppc_brcond14abs, + /// fixup_ppc_half16 - A 16-bit fixup corresponding to lo16(_foo) /// or ha16(_foo) for instrs like 'li' or 'addis'. fixup_ppc_half16, diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 420c01b..1c6adac 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -48,6 +48,10 @@ public: SmallVectorImpl &Fixups) const; unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups) const; + unsigned getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups) const; + unsigned getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups) const; unsigned getS16ImmEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups) const; unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo, @@ -134,6 +138,30 @@ unsigned PPCMCCodeEmitter::getCondBrEncoding(const MCInst &MI, unsigned OpNo, return 0; } +unsigned PPCMCCodeEmitter:: +getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups) const { + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups); + + // Add a fixup for the branch target. + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_br24abs)); + return 0; +} + +unsigned PPCMCCodeEmitter:: +getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups) const { + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups); + + // Add a fixup for the branch target. + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_brcond14abs)); + return 0; +} + unsigned PPCMCCodeEmitter::getS16ImmEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups) const { const MCOperand &MO = MI.getOperand(OpNo); diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp index 0ad4ea3..f006b49 100644 --- a/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -63,6 +63,9 @@ namespace { unsigned get_crbitm_encoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getDirectBrEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getAbsDirectBrEncoding(const MachineInstr &MI, + unsigned OpNo) const; + unsigned getAbsCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getS16ImmEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const; @@ -193,6 +196,19 @@ unsigned PPCCodeEmitter::getCondBrEncoding(const MachineInstr &MI, return 0; } +unsigned PPCCodeEmitter::getAbsDirectBrEncoding(const MachineInstr &MI, + unsigned OpNo) const { + const MachineOperand &MO = MI.getOperand(OpNo); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO); + + llvm_unreachable("Absolute branch relocations unsupported on the old JIT."); +} + +unsigned PPCCodeEmitter::getAbsCondBrEncoding(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("Absolute branch relocations unsupported on the old JIT."); +} + unsigned PPCCodeEmitter::getS16ImmEncoding(const MachineInstr &MI, unsigned OpNo) const { const MachineOperand &MO = MI.getOperand(OpNo); diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 0245ba7..89883e2 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -102,7 +102,7 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { def BL8 : IForm<18, 0, 1, (outs), (ins calltarget:$func), "bl $func", BrB, []>; // See Pat patterns below. - def BLA8 : IForm<18, 1, 1, (outs), (ins aaddr:$func), + def BLA8 : IForm<18, 1, 1, (outs), (ins abscalltarget:$func), "bla $func", BrB, [(PPCcall (i64 imm:$func))]>; } let Uses = [RM], isCodeGenOnly = 1 in { @@ -119,7 +119,7 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { "bl $func($sym)\n\tnop", BrB, []>; def BLA8_NOP : IForm_and_DForm_4_zero<18, 1, 1, 24, - (outs), (ins aaddr:$func), + (outs), (ins abscalltarget:$func), "bla $func\n\tnop", BrB, [(PPCcall_nop (i64 imm:$func))]>; } @@ -198,7 +198,7 @@ def TCRETURNdi8 :Pseudo< (outs), []>; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in -def TCRETURNai8 :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset), +def TCRETURNai8 :Pseudo<(outs), (ins abscalltarget:$func, i32imm:$offset), "#TC_RETURNa8 $func $offset", [(PPCtc_return (i64 imm:$func), imm:$offset)]>; @@ -224,7 +224,7 @@ def TAILB8 : IForm<18, 0, 0, (outs), (ins calltarget:$dst), let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in -def TAILBA8 : IForm<18, 0, 0, (outs), (ins aaddr:$dst), +def TAILBA8 : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst), "ba $dst", BrB, []>; diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 700875a..dcea65c 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -445,19 +445,43 @@ def u16imm : Operand { let PrintMethod = "printU16ImmOperand"; let ParserMatchClass = PPCU16ImmAsmOperand; } +def PPCDirectBrAsmOperand : AsmOperandClass { + let Name = "DirectBr"; let PredicateMethod = "isDirectBr"; + let RenderMethod = "addBranchTargetOperands"; +} def directbrtarget : Operand { let PrintMethod = "printBranchOperand"; let EncoderMethod = "getDirectBrEncoding"; + let ParserMatchClass = PPCDirectBrAsmOperand; +} +def absdirectbrtarget : Operand { + let PrintMethod = "printAbsBranchOperand"; + let EncoderMethod = "getAbsDirectBrEncoding"; + let ParserMatchClass = PPCDirectBrAsmOperand; +} +def PPCCondBrAsmOperand : AsmOperandClass { + let Name = "CondBr"; let PredicateMethod = "isCondBr"; + let RenderMethod = "addBranchTargetOperands"; } def condbrtarget : Operand { let PrintMethod = "printBranchOperand"; let EncoderMethod = "getCondBrEncoding"; + let ParserMatchClass = PPCCondBrAsmOperand; +} +def abscondbrtarget : Operand { + let PrintMethod = "printAbsBranchOperand"; + let EncoderMethod = "getAbsCondBrEncoding"; + let ParserMatchClass = PPCCondBrAsmOperand; } def calltarget : Operand { + let PrintMethod = "printBranchOperand"; let EncoderMethod = "getDirectBrEncoding"; + let ParserMatchClass = PPCDirectBrAsmOperand; } -def aaddr : Operand { - let PrintMethod = "printAbsAddrOperand"; +def abscalltarget : Operand { + let PrintMethod = "printAbsBranchOperand"; + let EncoderMethod = "getAbsDirectBrEncoding"; + let ParserMatchClass = PPCDirectBrAsmOperand; } def PPCCRBitMaskOperand : AsmOperandClass { let Name = "CRBitMask"; let PredicateMethod = "isCRBitMask"; @@ -872,6 +896,8 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { def B : IForm<18, 0, 0, (outs), (ins directbrtarget:$dst), "b $dst", BrB, [(br bb:$dst)]>; + def BA : IForm<18, 1, 0, (outs), (ins absdirectbrtarget:$dst), + "ba $dst", BrB, []>; } // BCC represents an arbitrary conditional branch on a predicate. @@ -881,6 +907,9 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst), "b${cond:cc} ${cond:reg}, $dst" /*[(PPCcondbranch crrc:$crS, imm:$opc, bb:$dst)]*/>; + def BCCA : BForm<16, 1, 0, (outs), (ins pred:$cond, abscondbrtarget:$dst), + "b${cond:cc}a ${cond:reg}, $dst">; + let isReturn = 1, Uses = [LR, RM] in def BCLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$cond), "b${cond:cc}lr ${cond:reg}", BrB, []>; @@ -898,6 +927,10 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { "bdz $dst">; def BDNZ : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst), "bdnz $dst">; + def BDZA : BForm_1<16, 18, 1, 0, (outs), (ins abscondbrtarget:$dst), + "bdza $dst">; + def BDNZA : BForm_1<16, 16, 1, 0, (outs), (ins abscondbrtarget:$dst), + "bdnza $dst">; } } @@ -914,12 +947,15 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { let Uses = [RM] in { def BL : IForm<18, 0, 1, (outs), (ins calltarget:$func), "bl $func", BrB, []>; // See Pat patterns below. - def BLA : IForm<18, 1, 1, (outs), (ins aaddr:$func), + def BLA : IForm<18, 1, 1, (outs), (ins abscalltarget:$func), "bla $func", BrB, [(PPCcall (i32 imm:$func))]>; - let isCodeGenOnly = 1 in - def BCCL : BForm<16, 0, 1, (outs), (ins pred:$cond, condbrtarget:$dst), - "b${cond:cc}l ${cond:reg}, $dst">; + let isCodeGenOnly = 1 in { + def BCCL : BForm<16, 0, 1, (outs), (ins pred:$cond, condbrtarget:$dst), + "b${cond:cc}l ${cond:reg}, $dst">; + def BCCLA : BForm<16, 1, 1, (outs), (ins pred:$cond, abscondbrtarget:$dst), + "b${cond:cc}la ${cond:reg}, $dst">; + } } let Uses = [CTR, RM] in { def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins), @@ -943,6 +979,10 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { "bdzl $dst">; def BDNZL : BForm_1<16, 16, 0, 1, (outs), (ins condbrtarget:$dst), "bdnzl $dst">; + def BDZLA : BForm_1<16, 18, 1, 1, (outs), (ins abscondbrtarget:$dst), + "bdzla $dst">; + def BDNZLA : BForm_1<16, 16, 1, 1, (outs), (ins abscondbrtarget:$dst), + "bdnzla $dst">; } let Defs = [CTR], Uses = [CTR, LR, RM] in { def BDZLRL : XLForm_2_ext<19, 16, 18, 0, 1, (outs), (ins), @@ -960,7 +1000,7 @@ def TCRETURNdi :Pseudo< (outs), let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in -def TCRETURNai :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset), +def TCRETURNai :Pseudo<(outs), (ins abscalltarget:$func, i32imm:$offset), "#TC_RETURNa $func $offset", [(PPCtc_return (i32 imm:$func), imm:$offset)]>; @@ -977,22 +1017,20 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1, def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>, Requires<[In32BitMode]>; - - let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in def TAILB : IForm<18, 0, 0, (outs), (ins calltarget:$dst), "b $dst", BrB, []>; -} - let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in -def TAILBA : IForm<18, 0, 0, (outs), (ins aaddr:$dst), +def TAILBA : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst), "ba $dst", BrB, []>; +} + let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { def EH_SjLj_SetJmp32 : Pseudo<(outs gprc:$dst), (ins memr:$buf), "#EH_SJLJ_SETJMP32", @@ -2197,6 +2235,11 @@ multiclass BranchExtendedMnemonic { def : InstAlias<"b"#name#" $dst", (BCC bibo, CR0, condbrtarget:$dst)>; + def : InstAlias<"b"#name#"a $cc, $dst", + (BCCA bibo, crrc:$cc, abscondbrtarget:$dst)>; + def : InstAlias<"b"#name#"a $dst", + (BCCA bibo, CR0, abscondbrtarget:$dst)>; + def : InstAlias<"b"#name#"lr $cc", (BCLR bibo, crrc:$cc)>; def : InstAlias<"b"#name#"lr", @@ -2212,6 +2255,11 @@ multiclass BranchExtendedMnemonic { def : InstAlias<"b"#name#"l $dst", (BCCL bibo, CR0, condbrtarget:$dst)>; + def : InstAlias<"b"#name#"la $cc, $dst", + (BCCLA bibo, crrc:$cc, abscondbrtarget:$dst)>; + def : InstAlias<"b"#name#"la $dst", + (BCCLA bibo, CR0, abscondbrtarget:$dst)>; + def : InstAlias<"b"#name#"lrl $cc", (BCLRL bibo, crrc:$cc)>; def : InstAlias<"b"#name#"lrl", -- cgit v1.1 From e5a30f0ca22cc1ba97478e9fadcdef02d341004e Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Mon, 24 Jun 2013 11:55:21 +0000 Subject: [PowerPC] Support generic conditional branches in asm parser This adds instruction patterns to cover the generic forms of the conditional branch instructions. This allows the assembler to support the generic mnemonics. The compiler will still generate the various specific forms of the instruction that were already supported. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184722 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCInstrFormats.td | 14 ++++++++++++ lib/Target/PowerPC/PPCInstrInfo.td | 41 +++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index a244058..b316fa6 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -145,6 +145,20 @@ class BForm_2 opcode, bits<5> bo, bits<5> bi, bit aa, bit lk, let Inst{31} = lk; } +class BForm_3 opcode, bit aa, bit lk, + dag OOL, dag IOL, string asmstr> + : I { + bits<5> BO; + bits<5> BI; + bits<14> BD; + + let Inst{6-10} = BO; + let Inst{11-15} = BI; + let Inst{16-29} = BD; + let Inst{30} = aa; + let Inst{31} = lk; +} + // 1.7.3 SC-Form class SCForm opcode, bits<1> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index dcea65c..7934da9 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -2229,6 +2229,47 @@ def SLDI : PPCAsmPseudo<"sldi $rA, $rS, $n", def SRDI : PPCAsmPseudo<"srdi $rA, $rS, $n", (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>; +// These generic branch instruction forms are used for the assembler parser only. +// Defs and Uses are conservative, since we don't know the BO value. +let PPC970_Unit = 7 in { + let Defs = [CTR], Uses = [CTR, RM] in { + def gBC : BForm_3<16, 0, 0, (outs), + (ins u5imm:$bo, crbitrc:$bi, condbrtarget:$dst), + "bc $bo, $bi, $dst">; + def gBCA : BForm_3<16, 1, 0, (outs), + (ins u5imm:$bo, crbitrc:$bi, abscondbrtarget:$dst), + "bca $bo, $bi, $dst">; + } + let Defs = [LR, CTR], Uses = [CTR, RM] in { + def gBCL : BForm_3<16, 0, 1, (outs), + (ins u5imm:$bo, crbitrc:$bi, condbrtarget:$dst), + "bcl $bo, $bi, $dst">; + def gBCLA : BForm_3<16, 1, 1, (outs), + (ins u5imm:$bo, crbitrc:$bi, abscondbrtarget:$dst), + "bcla $bo, $bi, $dst">; + } + let Defs = [CTR], Uses = [CTR, LR, RM] in + def gBCLR : XLForm_2<19, 16, 0, (outs), + (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh), + "bclr $bo, $bi, $bh", BrB, []>; + let Defs = [LR, CTR], Uses = [CTR, LR, RM] in + def gBCLRL : XLForm_2<19, 16, 1, (outs), + (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh), + "bclrl $bo, $bi, $bh", BrB, []>; + let Defs = [CTR], Uses = [CTR, LR, RM] in + def gBCCTR : XLForm_2<19, 528, 0, (outs), + (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh), + "bcctr $bo, $bi, $bh", BrB, []>; + let Defs = [LR, CTR], Uses = [CTR, LR, RM] in + def gBCCTRL : XLForm_2<19, 528, 1, (outs), + (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh), + "bcctrl $bo, $bi, $bh", BrB, []>; +} +def : InstAlias<"bclr $bo, $bi", (gBCLR u5imm:$bo, crbitrc:$bi, 0)>; +def : InstAlias<"bclrl $bo, $bi", (gBCLRL u5imm:$bo, crbitrc:$bi, 0)>; +def : InstAlias<"bcctr $bo, $bi", (gBCCTR u5imm:$bo, crbitrc:$bi, 0)>; +def : InstAlias<"bcctrl $bo, $bi", (gBCCTRL u5imm:$bo, crbitrc:$bi, 0)>; + multiclass BranchExtendedMnemonic { def : InstAlias<"b"#name#" $cc, $dst", (BCC bibo, crrc:$cc, condbrtarget:$dst)>; -- cgit v1.1 From 48473a8de50d6047432a3619e4781788ba004c93 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Mon, 24 Jun 2013 12:49:20 +0000 Subject: [PowerPC] Add t/f branch mnemonics to asm parser This adds the bt/bf/bd(n)zt/bd(n)zf mnemonics as aliases for the asm parser, resolving to the generic conditional patterns. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184725 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCInstrInfo.td | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 7934da9..df01baa 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -2270,6 +2270,26 @@ def : InstAlias<"bclrl $bo, $bi", (gBCLRL u5imm:$bo, crbitrc:$bi, 0)>; def : InstAlias<"bcctr $bo, $bi", (gBCCTR u5imm:$bo, crbitrc:$bi, 0)>; def : InstAlias<"bcctrl $bo, $bi", (gBCCTRL u5imm:$bo, crbitrc:$bi, 0)>; +multiclass BranchSimpleMnemonic1 { + def : InstAlias<"b"#name#" $bi, $dst", (gBC bo, crbitrc:$bi, condbrtarget:$dst)>; + def : InstAlias<"b"#name#"a $bi, $dst", (gBCA bo, crbitrc:$bi, abscondbrtarget:$dst)>; + def : InstAlias<"b"#name#"lr $bi", (gBCLR bo, crbitrc:$bi, 0)>; + def : InstAlias<"b"#name#"l $bi, $dst", (gBCL bo, crbitrc:$bi, condbrtarget:$dst)>; + def : InstAlias<"b"#name#"la $bi, $dst", (gBCLA bo, crbitrc:$bi, abscondbrtarget:$dst)>; + def : InstAlias<"b"#name#"lrl $bi", (gBCLRL bo, crbitrc:$bi, 0)>; +} +multiclass BranchSimpleMnemonic2 + : BranchSimpleMnemonic1 { + def : InstAlias<"b"#name#"ctr $bi", (gBCCTR bo, crbitrc:$bi, 0)>; + def : InstAlias<"b"#name#"ctrl $bi", (gBCCTRL bo, crbitrc:$bi, 0)>; +} +defm : BranchSimpleMnemonic2<"t", 12>; +defm : BranchSimpleMnemonic2<"f", 4>; +defm : BranchSimpleMnemonic1<"dnzt", 8>; +defm : BranchSimpleMnemonic1<"dnzf", 0>; +defm : BranchSimpleMnemonic1<"dzt", 10>; +defm : BranchSimpleMnemonic1<"dzf", 2>; + multiclass BranchExtendedMnemonic { def : InstAlias<"b"#name#" $cc, $dst", (BCC bibo, crrc:$cc, condbrtarget:$dst)>; -- cgit v1.1 From f1505ff35bf76cb044c261b4cc97773d47658805 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Mon, 24 Jun 2013 13:19:41 +0000 Subject: NVPTXTargetObjectFile.h: Initialize some pointers as NULL in the constructor of NVPTXTargetObjectFile. ~NVPTXTargetObjectFile() tries to delete them. It caused crash on some hosts since r184595. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184728 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXTargetObjectFile.h | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h index 6ab0e08..bfd6ab1 100644 --- a/lib/Target/NVPTX/NVPTXTargetObjectFile.h +++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h @@ -21,7 +21,29 @@ class Module; class NVPTXTargetObjectFile : public TargetLoweringObjectFile { public: - NVPTXTargetObjectFile() {} + NVPTXTargetObjectFile() { + TextSection = 0; + DataSection = 0; + BSSSection = 0; + ReadOnlySection = 0; + + StaticCtorSection = 0; + StaticDtorSection = 0; + LSDASection = 0; + EHFrameSection = 0; + DwarfAbbrevSection = 0; + DwarfInfoSection = 0; + DwarfLineSection = 0; + DwarfFrameSection = 0; + DwarfPubTypesSection = 0; + DwarfDebugInlineSection = 0; + DwarfStrSection = 0; + DwarfLocSection = 0; + DwarfARangesSection = 0; + DwarfRangesSection = 0; + DwarfMacroInfoSection = 0; + } + ~NVPTXTargetObjectFile() { delete TextSection; delete DataSection; -- cgit v1.1 From 2e8bd8950345b0857130dd0f4068222a79c103f2 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Mon, 24 Jun 2013 16:52:04 +0000 Subject: [PowerPC] Add predicted forms of branches This adds support for the predicted forms of branches (+/-). There are three cases to consider: - Branches using a PPC::Predicate code For these, I've added new PPC::Predicate codes corresponding to the BO values for predicted branch forms, and updated insn printing to print them correctly. I've also added new aliases for the asm parser matching the new forms. - bt/bf I've added new aliases matching to gBC etc. - bd(n)z variants I've added new instruction patterns for the predicted forms. In all cases, the new patterns are used for the asm parser only. (The new infrastructure ought to be sufficient to allow use by the compiler too at some point.) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184754 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 16 +++ lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp | 88 ++++++++++-- lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp | 32 +++++ lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h | 32 +++-- lib/Target/PowerPC/PPCInstr64Bit.td | 4 +- lib/Target/PowerPC/PPCInstrInfo.td | 155 +++++++++++++++------- 6 files changed, 259 insertions(+), 68 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 999c677..6803d66 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -747,6 +747,22 @@ bool PPCAsmParser:: ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, SmallVectorImpl &Operands) { // The first operand is the token for the instruction name. + // If the next character is a '+' or '-', we need to add it to the + // instruction name, to match what TableGen is doing. + if (getLexer().is(AsmToken::Plus)) { + getLexer().Lex(); + char *NewOpcode = new char[Name.size() + 1]; + memcpy(NewOpcode, Name.data(), Name.size()); + NewOpcode[Name.size()] = '+'; + Name = StringRef(NewOpcode, Name.size() + 1); + } + if (getLexer().is(AsmToken::Minus)) { + getLexer().Lex(); + char *NewOpcode = new char[Name.size() + 1]; + memcpy(NewOpcode, Name.data(), Name.size()); + NewOpcode[Name.size()] = '-'; + Name = StringRef(NewOpcode, Name.size() + 1); + } // If the instruction ends in a '.', we need to create a separate // token for it, to match what TableGen is doing. size_t Dot = Name.find('.'); diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 9af5e53..920cda9 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -90,19 +90,89 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, if (StringRef(Modifier) == "cc") { switch ((PPC::Predicate)Code) { - case PPC::PRED_LT: O << "lt"; return; - case PPC::PRED_LE: O << "le"; return; - case PPC::PRED_EQ: O << "eq"; return; - case PPC::PRED_GE: O << "ge"; return; - case PPC::PRED_GT: O << "gt"; return; - case PPC::PRED_NE: O << "ne"; return; - case PPC::PRED_UN: O << "un"; return; - case PPC::PRED_NU: O << "nu"; return; + case PPC::PRED_LT_MINUS: + case PPC::PRED_LT_PLUS: + case PPC::PRED_LT: + O << "lt"; + return; + case PPC::PRED_LE_MINUS: + case PPC::PRED_LE_PLUS: + case PPC::PRED_LE: + O << "le"; + return; + case PPC::PRED_EQ_MINUS: + case PPC::PRED_EQ_PLUS: + case PPC::PRED_EQ: + O << "eq"; + return; + case PPC::PRED_GE_MINUS: + case PPC::PRED_GE_PLUS: + case PPC::PRED_GE: + O << "ge"; + return; + case PPC::PRED_GT_MINUS: + case PPC::PRED_GT_PLUS: + case PPC::PRED_GT: + O << "gt"; + return; + case PPC::PRED_NE_MINUS: + case PPC::PRED_NE_PLUS: + case PPC::PRED_NE: + O << "ne"; + return; + case PPC::PRED_UN_MINUS: + case PPC::PRED_UN_PLUS: + case PPC::PRED_UN: + O << "un"; + return; + case PPC::PRED_NU_MINUS: + case PPC::PRED_NU_PLUS: + case PPC::PRED_NU: + O << "nu"; + return; + default: + llvm_unreachable("Invalid predicate code"); + } + } + + if (StringRef(Modifier) == "pm") { + switch ((PPC::Predicate)Code) { + case PPC::PRED_LT: + case PPC::PRED_LE: + case PPC::PRED_EQ: + case PPC::PRED_GE: + case PPC::PRED_GT: + case PPC::PRED_NE: + case PPC::PRED_UN: + case PPC::PRED_NU: + return; + case PPC::PRED_LT_MINUS: + case PPC::PRED_LE_MINUS: + case PPC::PRED_EQ_MINUS: + case PPC::PRED_GE_MINUS: + case PPC::PRED_GT_MINUS: + case PPC::PRED_NE_MINUS: + case PPC::PRED_UN_MINUS: + case PPC::PRED_NU_MINUS: + O << "-"; + return; + case PPC::PRED_LT_PLUS: + case PPC::PRED_LE_PLUS: + case PPC::PRED_EQ_PLUS: + case PPC::PRED_GE_PLUS: + case PPC::PRED_GT_PLUS: + case PPC::PRED_NE_PLUS: + case PPC::PRED_UN_PLUS: + case PPC::PRED_NU_PLUS: + O << "+"; + return; + default: + llvm_unreachable("Invalid predicate code"); } } assert(StringRef(Modifier) == "reg" && - "Need to specify 'cc' or 'reg' as predicate op modifier!"); + "Need to specify 'cc', 'pm' or 'reg' as predicate op modifier!"); printOperand(MI, OpNo+1, O); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp index 853e505..63facc5 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp @@ -26,6 +26,22 @@ PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) { case PPC::PRED_LE: return PPC::PRED_GT; case PPC::PRED_NU: return PPC::PRED_UN; case PPC::PRED_UN: return PPC::PRED_NU; + case PPC::PRED_EQ_MINUS: return PPC::PRED_NE_PLUS; + case PPC::PRED_NE_MINUS: return PPC::PRED_EQ_PLUS; + case PPC::PRED_LT_MINUS: return PPC::PRED_GE_PLUS; + case PPC::PRED_GE_MINUS: return PPC::PRED_LT_PLUS; + case PPC::PRED_GT_MINUS: return PPC::PRED_LE_PLUS; + case PPC::PRED_LE_MINUS: return PPC::PRED_GT_PLUS; + case PPC::PRED_NU_MINUS: return PPC::PRED_UN_PLUS; + case PPC::PRED_UN_MINUS: return PPC::PRED_NU_PLUS; + case PPC::PRED_EQ_PLUS: return PPC::PRED_NE_MINUS; + case PPC::PRED_NE_PLUS: return PPC::PRED_EQ_MINUS; + case PPC::PRED_LT_PLUS: return PPC::PRED_GE_MINUS; + case PPC::PRED_GE_PLUS: return PPC::PRED_LT_MINUS; + case PPC::PRED_GT_PLUS: return PPC::PRED_LE_MINUS; + case PPC::PRED_LE_PLUS: return PPC::PRED_GT_MINUS; + case PPC::PRED_NU_PLUS: return PPC::PRED_UN_MINUS; + case PPC::PRED_UN_PLUS: return PPC::PRED_NU_MINUS; } llvm_unreachable("Unknown PPC branch opcode!"); } @@ -40,6 +56,22 @@ PPC::Predicate PPC::getSwappedPredicate(PPC::Predicate Opcode) { case PPC::PRED_LE: return PPC::PRED_GE; case PPC::PRED_NU: return PPC::PRED_NU; case PPC::PRED_UN: return PPC::PRED_UN; + case PPC::PRED_EQ_MINUS: return PPC::PRED_EQ_MINUS; + case PPC::PRED_NE_MINUS: return PPC::PRED_NE_MINUS; + case PPC::PRED_LT_MINUS: return PPC::PRED_GT_MINUS; + case PPC::PRED_GE_MINUS: return PPC::PRED_LE_MINUS; + case PPC::PRED_GT_MINUS: return PPC::PRED_LT_MINUS; + case PPC::PRED_LE_MINUS: return PPC::PRED_GE_MINUS; + case PPC::PRED_NU_MINUS: return PPC::PRED_NU_MINUS; + case PPC::PRED_UN_MINUS: return PPC::PRED_UN_MINUS; + case PPC::PRED_EQ_PLUS: return PPC::PRED_EQ_PLUS; + case PPC::PRED_NE_PLUS: return PPC::PRED_NE_PLUS; + case PPC::PRED_LT_PLUS: return PPC::PRED_GT_PLUS; + case PPC::PRED_GE_PLUS: return PPC::PRED_LE_PLUS; + case PPC::PRED_GT_PLUS: return PPC::PRED_LT_PLUS; + case PPC::PRED_LE_PLUS: return PPC::PRED_GE_PLUS; + case PPC::PRED_NU_PLUS: return PPC::PRED_NU_PLUS; + case PPC::PRED_UN_PLUS: return PPC::PRED_UN_PLUS; } llvm_unreachable("Unknown PPC branch opcode!"); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h index 444758c..d498c2f 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h @@ -25,14 +25,30 @@ namespace llvm { namespace PPC { /// Predicate - These are "(BI << 5) | BO" for various predicates. enum Predicate { - PRED_LT = (0 << 5) | 12, - PRED_LE = (1 << 5) | 4, - PRED_EQ = (2 << 5) | 12, - PRED_GE = (0 << 5) | 4, - PRED_GT = (1 << 5) | 12, - PRED_NE = (2 << 5) | 4, - PRED_UN = (3 << 5) | 12, - PRED_NU = (3 << 5) | 4 + PRED_LT = (0 << 5) | 12, + PRED_LE = (1 << 5) | 4, + PRED_EQ = (2 << 5) | 12, + PRED_GE = (0 << 5) | 4, + PRED_GT = (1 << 5) | 12, + PRED_NE = (2 << 5) | 4, + PRED_UN = (3 << 5) | 12, + PRED_NU = (3 << 5) | 4, + PRED_LT_MINUS = (0 << 5) | 14, + PRED_LE_MINUS = (1 << 5) | 6, + PRED_EQ_MINUS = (2 << 5) | 14, + PRED_GE_MINUS = (0 << 5) | 6, + PRED_GT_MINUS = (1 << 5) | 14, + PRED_NE_MINUS = (2 << 5) | 6, + PRED_UN_MINUS = (3 << 5) | 14, + PRED_NU_MINUS = (3 << 5) | 6, + PRED_LT_PLUS = (0 << 5) | 15, + PRED_LE_PLUS = (1 << 5) | 7, + PRED_EQ_PLUS = (2 << 5) | 15, + PRED_GE_PLUS = (0 << 5) | 7, + PRED_GT_PLUS = (1 << 5) | 15, + PRED_NE_PLUS = (2 << 5) | 7, + PRED_UN_PLUS = (3 << 5) | 15, + PRED_NU_PLUS = (3 << 5) | 7 }; /// Invert the specified predicate. != -> ==, < -> >=. diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 89883e2..cab1a20 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -69,7 +69,7 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { let isCodeGenOnly = 1 in def BCCTR8 : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond), - "b${cond:cc}ctr ${cond:reg}", BrB, []>, + "b${cond:cc}ctr${cond:pm} ${cond:reg}", BrB, []>, Requires<[In64BitMode]>; } } @@ -130,7 +130,7 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { let isCodeGenOnly = 1 in def BCCTRL8 : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond), - "b${cond:cc}ctrl ${cond:reg}", BrB, []>, + "b${cond:cc}ctrl${cond:pm} ${cond:reg}", BrB, []>, Requires<[In64BitMode]>; } } diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index df01baa..3433696 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -883,7 +883,7 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { let isCodeGenOnly = 1 in def BCCTR : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond), - "b${cond:cc}ctr ${cond:reg}", BrB, []>; + "b${cond:cc}ctr${cond:pm} ${cond:reg}", BrB, []>; } } @@ -905,21 +905,29 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { // a two-value operand where a dag node expects two operands. :( let isCodeGenOnly = 1 in { def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst), - "b${cond:cc} ${cond:reg}, $dst" + "b${cond:cc}${cond:pm} ${cond:reg}, $dst" /*[(PPCcondbranch crrc:$crS, imm:$opc, bb:$dst)]*/>; def BCCA : BForm<16, 1, 0, (outs), (ins pred:$cond, abscondbrtarget:$dst), - "b${cond:cc}a ${cond:reg}, $dst">; + "b${cond:cc}a${cond:pm} ${cond:reg}, $dst">; let isReturn = 1, Uses = [LR, RM] in def BCLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$cond), - "b${cond:cc}lr ${cond:reg}", BrB, []>; + "b${cond:cc}lr${cond:pm} ${cond:reg}", BrB, []>; + } - let isReturn = 1, Defs = [CTR], Uses = [CTR, LR, RM] in { - def BDZLR : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins), + let isReturn = 1, Defs = [CTR], Uses = [CTR, LR, RM] in { + def BDZLR : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins), "bdzlr", BrB, []>; - def BDNZLR : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins), + def BDNZLR : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins), "bdnzlr", BrB, []>; - } + def BDZLRp : XLForm_2_ext<19, 16, 27, 0, 0, (outs), (ins), + "bdzlr+", BrB, []>; + def BDNZLRp: XLForm_2_ext<19, 16, 25, 0, 0, (outs), (ins), + "bdnzlr+", BrB, []>; + def BDZLRm : XLForm_2_ext<19, 16, 26, 0, 0, (outs), (ins), + "bdzlr-", BrB, []>; + def BDNZLRm: XLForm_2_ext<19, 16, 24, 0, 0, (outs), (ins), + "bdnzlr-", BrB, []>; } let Defs = [CTR], Uses = [CTR] in { @@ -931,6 +939,22 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { "bdza $dst">; def BDNZA : BForm_1<16, 16, 1, 0, (outs), (ins abscondbrtarget:$dst), "bdnza $dst">; + def BDZp : BForm_1<16, 27, 0, 0, (outs), (ins condbrtarget:$dst), + "bdz+ $dst">; + def BDNZp: BForm_1<16, 25, 0, 0, (outs), (ins condbrtarget:$dst), + "bdnz+ $dst">; + def BDZAp : BForm_1<16, 27, 1, 0, (outs), (ins abscondbrtarget:$dst), + "bdza+ $dst">; + def BDNZAp: BForm_1<16, 25, 1, 0, (outs), (ins abscondbrtarget:$dst), + "bdnza+ $dst">; + def BDZm : BForm_1<16, 26, 0, 0, (outs), (ins condbrtarget:$dst), + "bdz- $dst">; + def BDNZm: BForm_1<16, 24, 0, 0, (outs), (ins condbrtarget:$dst), + "bdnz- $dst">; + def BDZAm : BForm_1<16, 26, 1, 0, (outs), (ins abscondbrtarget:$dst), + "bdza- $dst">; + def BDNZAm: BForm_1<16, 24, 1, 0, (outs), (ins abscondbrtarget:$dst), + "bdnza- $dst">; } } @@ -952,9 +976,9 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { let isCodeGenOnly = 1 in { def BCCL : BForm<16, 0, 1, (outs), (ins pred:$cond, condbrtarget:$dst), - "b${cond:cc}l ${cond:reg}, $dst">; + "b${cond:cc}l${cond:pm} ${cond:reg}, $dst">; def BCCLA : BForm<16, 1, 1, (outs), (ins pred:$cond, abscondbrtarget:$dst), - "b${cond:cc}la ${cond:reg}, $dst">; + "b${cond:cc}la${cond:pm} ${cond:reg}, $dst">; } } let Uses = [CTR, RM] in { @@ -964,7 +988,7 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { let isCodeGenOnly = 1 in def BCCTRL : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond), - "b${cond:cc}ctrl ${cond:reg}", BrB, []>; + "b${cond:cc}ctrl${cond:pm} ${cond:reg}", BrB, []>; } let Uses = [LR, RM] in { def BLRL : XLForm_2_ext<19, 16, 20, 0, 1, (outs), (ins), @@ -972,7 +996,7 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { let isCodeGenOnly = 1 in def BCLRL : XLForm_2_br<19, 16, 1, (outs), (ins pred:$cond), - "b${cond:cc}lrl ${cond:reg}", BrB, []>; + "b${cond:cc}lrl${cond:pm} ${cond:reg}", BrB, []>; } let Defs = [CTR], Uses = [CTR, RM] in { def BDZL : BForm_1<16, 18, 0, 1, (outs), (ins condbrtarget:$dst), @@ -983,12 +1007,36 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { "bdzla $dst">; def BDNZLA : BForm_1<16, 16, 1, 1, (outs), (ins abscondbrtarget:$dst), "bdnzla $dst">; + def BDZLp : BForm_1<16, 27, 0, 1, (outs), (ins condbrtarget:$dst), + "bdzl+ $dst">; + def BDNZLp: BForm_1<16, 25, 0, 1, (outs), (ins condbrtarget:$dst), + "bdnzl+ $dst">; + def BDZLAp : BForm_1<16, 27, 1, 1, (outs), (ins abscondbrtarget:$dst), + "bdzla+ $dst">; + def BDNZLAp: BForm_1<16, 25, 1, 1, (outs), (ins abscondbrtarget:$dst), + "bdnzla+ $dst">; + def BDZLm : BForm_1<16, 26, 0, 1, (outs), (ins condbrtarget:$dst), + "bdzl- $dst">; + def BDNZLm: BForm_1<16, 24, 0, 1, (outs), (ins condbrtarget:$dst), + "bdnzl- $dst">; + def BDZLAm : BForm_1<16, 26, 1, 1, (outs), (ins abscondbrtarget:$dst), + "bdzla- $dst">; + def BDNZLAm: BForm_1<16, 24, 1, 1, (outs), (ins abscondbrtarget:$dst), + "bdnzla- $dst">; } let Defs = [CTR], Uses = [CTR, LR, RM] in { def BDZLRL : XLForm_2_ext<19, 16, 18, 0, 1, (outs), (ins), "bdzlrl", BrB, []>; def BDNZLRL : XLForm_2_ext<19, 16, 16, 0, 1, (outs), (ins), "bdnzlrl", BrB, []>; + def BDZLRLp : XLForm_2_ext<19, 16, 27, 0, 1, (outs), (ins), + "bdzlrl+", BrB, []>; + def BDNZLRLp: XLForm_2_ext<19, 16, 25, 0, 1, (outs), (ins), + "bdnzlrl+", BrB, []>; + def BDZLRLm : XLForm_2_ext<19, 16, 26, 0, 1, (outs), (ins), + "bdzlrl-", BrB, []>; + def BDNZLRLm: XLForm_2_ext<19, 16, 24, 0, 1, (outs), (ins), + "bdnzlrl-", BrB, []>; } } @@ -2270,67 +2318,76 @@ def : InstAlias<"bclrl $bo, $bi", (gBCLRL u5imm:$bo, crbitrc:$bi, 0)>; def : InstAlias<"bcctr $bo, $bi", (gBCCTR u5imm:$bo, crbitrc:$bi, 0)>; def : InstAlias<"bcctrl $bo, $bi", (gBCCTRL u5imm:$bo, crbitrc:$bi, 0)>; -multiclass BranchSimpleMnemonic1 { - def : InstAlias<"b"#name#" $bi, $dst", (gBC bo, crbitrc:$bi, condbrtarget:$dst)>; - def : InstAlias<"b"#name#"a $bi, $dst", (gBCA bo, crbitrc:$bi, abscondbrtarget:$dst)>; - def : InstAlias<"b"#name#"lr $bi", (gBCLR bo, crbitrc:$bi, 0)>; - def : InstAlias<"b"#name#"l $bi, $dst", (gBCL bo, crbitrc:$bi, condbrtarget:$dst)>; - def : InstAlias<"b"#name#"la $bi, $dst", (gBCLA bo, crbitrc:$bi, abscondbrtarget:$dst)>; - def : InstAlias<"b"#name#"lrl $bi", (gBCLRL bo, crbitrc:$bi, 0)>; -} -multiclass BranchSimpleMnemonic2 - : BranchSimpleMnemonic1 { - def : InstAlias<"b"#name#"ctr $bi", (gBCCTR bo, crbitrc:$bi, 0)>; - def : InstAlias<"b"#name#"ctrl $bi", (gBCCTRL bo, crbitrc:$bi, 0)>; -} -defm : BranchSimpleMnemonic2<"t", 12>; -defm : BranchSimpleMnemonic2<"f", 4>; -defm : BranchSimpleMnemonic1<"dnzt", 8>; -defm : BranchSimpleMnemonic1<"dnzf", 0>; -defm : BranchSimpleMnemonic1<"dzt", 10>; -defm : BranchSimpleMnemonic1<"dzf", 2>; - -multiclass BranchExtendedMnemonic { - def : InstAlias<"b"#name#" $cc, $dst", +multiclass BranchSimpleMnemonic1 { + def : InstAlias<"b"#name#pm#" $bi, $dst", (gBC bo, crbitrc:$bi, condbrtarget:$dst)>; + def : InstAlias<"b"#name#"a"#pm#" $bi, $dst", (gBCA bo, crbitrc:$bi, abscondbrtarget:$dst)>; + def : InstAlias<"b"#name#"lr"#pm#" $bi", (gBCLR bo, crbitrc:$bi, 0)>; + def : InstAlias<"b"#name#"l"#pm#" $bi, $dst", (gBCL bo, crbitrc:$bi, condbrtarget:$dst)>; + def : InstAlias<"b"#name#"la"#pm#" $bi, $dst", (gBCLA bo, crbitrc:$bi, abscondbrtarget:$dst)>; + def : InstAlias<"b"#name#"lrl"#pm#" $bi", (gBCLRL bo, crbitrc:$bi, 0)>; +} +multiclass BranchSimpleMnemonic2 + : BranchSimpleMnemonic1 { + def : InstAlias<"b"#name#"ctr"#pm#" $bi", (gBCCTR bo, crbitrc:$bi, 0)>; + def : InstAlias<"b"#name#"ctrl"#pm#" $bi", (gBCCTRL bo, crbitrc:$bi, 0)>; +} +defm : BranchSimpleMnemonic2<"t", "", 12>; +defm : BranchSimpleMnemonic2<"f", "", 4>; +defm : BranchSimpleMnemonic2<"t", "-", 14>; +defm : BranchSimpleMnemonic2<"f", "-", 6>; +defm : BranchSimpleMnemonic2<"t", "+", 15>; +defm : BranchSimpleMnemonic2<"f", "+", 7>; +defm : BranchSimpleMnemonic1<"dnzt", "", 8>; +defm : BranchSimpleMnemonic1<"dnzf", "", 0>; +defm : BranchSimpleMnemonic1<"dzt", "", 10>; +defm : BranchSimpleMnemonic1<"dzf", "", 2>; + +multiclass BranchExtendedMnemonicPM { + def : InstAlias<"b"#name#pm#" $cc, $dst", (BCC bibo, crrc:$cc, condbrtarget:$dst)>; - def : InstAlias<"b"#name#" $dst", + def : InstAlias<"b"#name#pm#" $dst", (BCC bibo, CR0, condbrtarget:$dst)>; - def : InstAlias<"b"#name#"a $cc, $dst", + def : InstAlias<"b"#name#"a"#pm#" $cc, $dst", (BCCA bibo, crrc:$cc, abscondbrtarget:$dst)>; - def : InstAlias<"b"#name#"a $dst", + def : InstAlias<"b"#name#"a"#pm#" $dst", (BCCA bibo, CR0, abscondbrtarget:$dst)>; - def : InstAlias<"b"#name#"lr $cc", + def : InstAlias<"b"#name#"lr"#pm#" $cc", (BCLR bibo, crrc:$cc)>; - def : InstAlias<"b"#name#"lr", + def : InstAlias<"b"#name#"lr"#pm, (BCLR bibo, CR0)>; - def : InstAlias<"b"#name#"ctr $cc", + def : InstAlias<"b"#name#"ctr"#pm#" $cc", (BCCTR bibo, crrc:$cc)>; - def : InstAlias<"b"#name#"ctr", + def : InstAlias<"b"#name#"ctr"#pm, (BCCTR bibo, CR0)>; - def : InstAlias<"b"#name#"l $cc, $dst", + def : InstAlias<"b"#name#"l"#pm#" $cc, $dst", (BCCL bibo, crrc:$cc, condbrtarget:$dst)>; - def : InstAlias<"b"#name#"l $dst", + def : InstAlias<"b"#name#"l"#pm#" $dst", (BCCL bibo, CR0, condbrtarget:$dst)>; - def : InstAlias<"b"#name#"la $cc, $dst", + def : InstAlias<"b"#name#"la"#pm#" $cc, $dst", (BCCLA bibo, crrc:$cc, abscondbrtarget:$dst)>; - def : InstAlias<"b"#name#"la $dst", + def : InstAlias<"b"#name#"la"#pm#" $dst", (BCCLA bibo, CR0, abscondbrtarget:$dst)>; - def : InstAlias<"b"#name#"lrl $cc", + def : InstAlias<"b"#name#"lrl"#pm#" $cc", (BCLRL bibo, crrc:$cc)>; - def : InstAlias<"b"#name#"lrl", + def : InstAlias<"b"#name#"lrl"#pm, (BCLRL bibo, CR0)>; - def : InstAlias<"b"#name#"ctrl $cc", + def : InstAlias<"b"#name#"ctrl"#pm#" $cc", (BCCTRL bibo, crrc:$cc)>; - def : InstAlias<"b"#name#"ctrl", + def : InstAlias<"b"#name#"ctrl"#pm, (BCCTRL bibo, CR0)>; } +multiclass BranchExtendedMnemonic { + defm : BranchExtendedMnemonicPM; + defm : BranchExtendedMnemonicPM; + defm : BranchExtendedMnemonicPM; +} defm : BranchExtendedMnemonic<"lt", 12>; defm : BranchExtendedMnemonic<"gt", 44>; defm : BranchExtendedMnemonic<"eq", 76>; -- cgit v1.1 From 6c4b8fe9dd221a219bc3a92ca3019de7d6e4454b Mon Sep 17 00:00:00 2001 From: Aaron Watry Date: Mon, 24 Jun 2013 16:57:57 +0000 Subject: R600: Fix spelling error in comment our -> or git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184756 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600Instructions.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 83d735f..a468ab7 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -126,7 +126,7 @@ class R600_1OP_Helper inst, string opName, SDPatternOperator node, [(set R600_Reg32:$dst, (node R600_Reg32:$src0))] >; -// If you add our change the operands for R600_2OP instructions, you must +// If you add or change the operands for R600_2OP instructions, you must // also update the R600Op2OperandIndex::ROI enum in R600Defines.h, // R600InstrInfo::buildDefaultInstruction(), and R600InstrInfo::getOperandIdx(). class R600_2OP inst, string opName, list pattern, -- cgit v1.1 From cf1a3b16c00cef30207e6b83d046ad38752dfefb Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 24 Jun 2013 17:03:25 +0000 Subject: PPC: Remove default case from fully covered switch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184758 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 920cda9..eee1f45 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -130,9 +130,8 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, case PPC::PRED_NU: O << "nu"; return; - default: - llvm_unreachable("Invalid predicate code"); } + llvm_unreachable("Invalid predicate code"); } if (StringRef(Modifier) == "pm") { @@ -166,9 +165,8 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, case PPC::PRED_NU_PLUS: O << "+"; return; - default: - llvm_unreachable("Invalid predicate code"); } + llvm_unreachable("Invalid predicate code"); } assert(StringRef(Modifier) == "reg" && -- cgit v1.1 From 96fb3a25cb0007f06d22d28c0b9c3503798324f6 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Mon, 24 Jun 2013 18:08:03 +0000 Subject: [PowerPC] Support some miscellaneous mnemonics in the asm parser This adds support for the following extended mnemonics: xnop mr. not not. la git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184767 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 9 +++++++++ lib/Target/PowerPC/PPCInstrInfo.td | 8 ++++++++ 2 files changed, 17 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 6803d66..6e075f2 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -423,6 +423,15 @@ void PPCAsmParser:: ProcessInstruction(MCInst &Inst, const SmallVectorImpl &Operands) { switch (Inst.getOpcode()) { + case PPC::LAx: { + MCInst TmpInst; + TmpInst.setOpcode(PPC::LA); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(1)); + Inst = TmpInst; + break; + } case PPC::SLWI: { MCInst TmpInst; int64_t N = Inst.getOperand(2).getImm(); diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 3433696..ee992c0 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -2266,7 +2266,15 @@ class PPCAsmPseudo def : InstAlias<"sc", (SC 0)>; +def : InstAlias<"xnop", (XORI R0, R0, 0)>; + def : InstAlias<"mr $rA, $rB", (OR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>; +def : InstAlias<"mr. $rA, $rB", (OR8o g8rc:$rA, g8rc:$rB, g8rc:$rB)>; + +def : InstAlias<"not $rA, $rB", (NOR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>; +def : InstAlias<"not. $rA, $rB", (NOR8o g8rc:$rA, g8rc:$rB, g8rc:$rB)>; + +def LAx : PPCAsmPseudo<"la $rA, $addr", (ins gprc:$rA, memri:$addr)>; def SLWI : PPCAsmPseudo<"slwi $rA, $rS, $n", (ins gprc:$rA, gprc:$rS, u5imm:$n)>; -- cgit v1.1 From 746f7cafb2d1362de62024a6e62664c3eb3999d2 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Tue, 25 Jun 2013 01:14:20 +0000 Subject: PPCAsmParser.cpp: Quote "@l/@ha" in comments. [-Wdocumentation] git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184809 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 6e075f2..2aed324 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -568,7 +568,7 @@ ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { return Error(StartLoc, "invalid register name"); } -/// Extract @l/@ha modifier from expression. Recursively scan +/// Extract \code @l/@ha \endcode modifier from expression. Recursively scan /// the expression and check for VK_PPC_LO/HI/HA /// symbol variants. If all symbols with modifier use the same /// variant, return the corresponding PPCMCExpr::VariantKind, @@ -654,7 +654,7 @@ ExtractModifierFromExpr(const MCExpr *E, } /// Parse an expression. This differs from the default "parseExpression" -/// in that it handles complex @l/@ha modifiers. +/// in that it handles complex \code @l/@ha \endcode modifiers. bool PPCAsmParser:: ParseExpression(const MCExpr *&EVal) { if (getParser().parseExpression(EVal)) -- cgit v1.1 From 1cb1107c660bdade8b033bae10bf223d977691e5 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 25 Jun 2013 02:39:20 +0000 Subject: R600: Fix typo in R600Schedule.td This should only make a difference in programs that use a lot of the vector ALU instructions like BFI_INT and BIT_ALIGN. There is a slight improvement in the phatk bitcoin mining kernel with this patch on Evergreen (vector size == 1): Before: 1173 Instruction Groups / 9520 dwords After: 1167 Instruction Groups / 9510 dwords Reviewed-by: Reviewed-by: Vincent Lejeune git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184819 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600Schedule.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/R600Schedule.td b/lib/Target/R600/R600Schedule.td index 78a460a..207233d 100644 --- a/lib/Target/R600/R600Schedule.td +++ b/lib/Target/R600/R600Schedule.td @@ -29,7 +29,7 @@ def R600_VLIW5_Itin : ProcessorItineraries < [], [ InstrItinData]>, - InstrItinData]>, + InstrItinData]>, InstrItinData]>, InstrItinData]> ] @@ -40,7 +40,7 @@ def R600_VLIW4_Itin : ProcessorItineraries < [], [ InstrItinData]>, - InstrItinData]>, + InstrItinData]>, InstrItinData]>, InstrItinData]> ] -- cgit v1.1 From fa57da12f7ce559d502a4319d39a1f0582f4d2b3 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 25 Jun 2013 02:39:25 +0000 Subject: R600/SI: Add support for v4i32 and v4f32 kernel args Tested-By: Aaron Watry git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184820 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUCallingConv.td | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUCallingConv.td b/lib/Target/R600/AMDGPUCallingConv.td index 84e4f3a..826932b 100644 --- a/lib/Target/R600/AMDGPUCallingConv.td +++ b/lib/Target/R600/AMDGPUCallingConv.td @@ -38,10 +38,11 @@ def CC_SI : CallingConv<[ // Calling convention for SI compute kernels def CC_SI_Kernel : CallingConv<[ - CCIfType<[i64], CCAssignToStack <8, 4>>, - CCIfType<[i32, f32], CCAssignToStack <4, 4>>, - CCIfType<[i16], CCAssignToStack <2, 4>>, - CCIfType<[i8], CCAssignToStack <1, 4>> + CCIfType<[v4i32, v4f32], CCAssignToStack <16, 4>>, + CCIfType<[i64], CCAssignToStack < 8, 4>>, + CCIfType<[i32, f32], CCAssignToStack < 4, 4>>, + CCIfType<[i16], CCAssignToStack < 2, 4>>, + CCIfType<[i8], CCAssignToStack < 1, 4>> ]>; def CC_AMDGPU : CallingConv<[ -- cgit v1.1 From d67d029b6d2d9cc4274ffea5cc5c4bd120fe3425 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 25 Jun 2013 02:39:30 +0000 Subject: R600: Add support for i32 loads from the constant address space on Cayman Tested-By: Aaron Watry git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184821 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600Instructions.td | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index a468ab7..3c83905 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1755,6 +1755,15 @@ def VTX_READ_GLOBAL_128_cm : VTX_READ_128_cm <1, [(set v4i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] >; +//===----------------------------------------------------------------------===// +// Constant Loads +// XXX: We are currently storing all constants in the global address space. +//===----------------------------------------------------------------------===// + +def CONSTANT_LOAD_cm : VTX_READ_32_cm <1, + [(set i32:$dst_gpr, (constant_load ADDRVTX_READ:$src_gpr))] +>; + } // End isCayman //===----------------------------------------------------------------------===// -- cgit v1.1 From 73e44d8ae4c227af92b8f96f447e4a7ed38f6de5 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 25 Jun 2013 02:39:35 +0000 Subject: R600/SI: Report unaligned memory accesses as legal for > 32-bit types In reality, some unaligned memory accesses are legal for 32-bit types and smaller too, but it all depends on the address space. Allowing unaligned loads/stores for > 32-bit types is mainly to prevent the legalizer from splitting one load into multiple loads of smaller types. https://bugs.freedesktop.org/show_bug.cgi?id=65873 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184822 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIISelLowering.cpp | 12 ++++++++++++ lib/Target/R600/SIISelLowering.h | 1 + 2 files changed, 13 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index ea2b123..776eb86 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -87,6 +87,18 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setSchedulingPreference(Sched::RegPressure); } +//===----------------------------------------------------------------------===// +// TargetLowering queries +//===----------------------------------------------------------------------===// + +bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT, + bool *IsFast) const { + // XXX: This depends on the address space and also we may want to revist + // the alignment values we specify in the DataLayout. + return VT.bitsGT(MVT::i32); +} + + SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, SDLoc DL, SDValue Chain, unsigned Offset) const { diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index 78ae6a1..0a856d5 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -40,6 +40,7 @@ class SITargetLowering : public AMDGPUTargetLowering { public: SITargetLowering(TargetMachine &tm); + bool allowsUnalignedMemoryAccesses(EVT VT, bool *IsFast) const; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, -- cgit v1.1 From b2b5dc642cbbe781f73b9da83874d4005c50bd8e Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Tue, 25 Jun 2013 02:48:58 +0000 Subject: Revert "Temporarily enable MI-Sched on X86." This reverts commit 98a9b72e8c56dc13a2617de84503a3d78352789c. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184823 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86Subtarget.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 59911ba..66832b9 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -361,14 +361,11 @@ public: /// memset with zero passed as the second argument. Otherwise it /// returns null. const char *getBZeroEntry() const; - + /// This function returns true if the target has sincos() routine in its /// compiler runtime or math libraries. bool hasSinCos() const; - /// Enable the MachineScheduler pass for all X86 subtargets. - bool enableMachineScheduler() const LLVM_OVERRIDE { return true; } - /// enablePostRAScheduler - run for Atom optimization. bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, TargetSubtargetInfo::AntiDepBreakMode& Mode, -- cgit v1.1 From 5df8c1bbb66a6b7386c9d7866592ecbb646da66c Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Tue, 25 Jun 2013 12:22:21 +0000 Subject: [NVPTX] Default pointer type doesn't make sense for getParamSymbol() git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184831 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXISelLowering.cpp | 2 +- lib/Target/NVPTX/NVPTXISelLowering.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 6b10cd4..be8e130 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -1135,7 +1135,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( // A plain scalar. if (isABI || isKernel) { // If ABI, load from the param symbol - SDValue Arg = getParamSymbol(DAG, idx); + SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); // Conjure up a value that we can get the address space from. // FIXME: Using a constant here is a hack. Value *srcValue = Constant::getNullValue( diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index c4119c6..2ec9436 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -144,7 +144,7 @@ private: SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx, EVT = MVT::i32) const; - SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT = MVT::i32) const; + SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const; SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx); SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; -- cgit v1.1 From 9c52f81e1787dc9666e510f5b7a0ea75b697cd0b Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Tue, 25 Jun 2013 13:16:48 +0000 Subject: [PowerPC] Add extended subtract mnemonics This adds support for the extended subtract mnemonics to the asm parser: subi subis subic subic. sub sub. subc subc. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184832 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 40 +++++++++++++++++++++++++++ lib/Target/PowerPC/PPCInstrInfo.td | 14 ++++++++++ 2 files changed, 54 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 2aed324..d426ba0 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -432,6 +432,46 @@ ProcessInstruction(MCInst &Inst, Inst = TmpInst; break; } + case PPC::SUBI: { + MCInst TmpInst; + int64_t N = Inst.getOperand(2).getImm(); + TmpInst.setOpcode(PPC::ADDI); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(MCOperand::CreateImm(-N)); + Inst = TmpInst; + break; + } + case PPC::SUBIS: { + MCInst TmpInst; + int64_t N = Inst.getOperand(2).getImm(); + TmpInst.setOpcode(PPC::ADDIS); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(MCOperand::CreateImm(-N)); + Inst = TmpInst; + break; + } + case PPC::SUBIC: { + MCInst TmpInst; + int64_t N = Inst.getOperand(2).getImm(); + TmpInst.setOpcode(PPC::ADDIC); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(MCOperand::CreateImm(-N)); + Inst = TmpInst; + break; + } + case PPC::SUBICo: { + MCInst TmpInst; + int64_t N = Inst.getOperand(2).getImm(); + TmpInst.setOpcode(PPC::ADDICo); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(MCOperand::CreateImm(-N)); + Inst = TmpInst; + break; + } case PPC::SLWI: { MCInst TmpInst; int64_t N = Inst.getOperand(2).getImm(); diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index ee992c0..5b99a66 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -2276,6 +2276,20 @@ def : InstAlias<"not. $rA, $rB", (NOR8o g8rc:$rA, g8rc:$rB, g8rc:$rB)>; def LAx : PPCAsmPseudo<"la $rA, $addr", (ins gprc:$rA, memri:$addr)>; +def SUBI : PPCAsmPseudo<"subi $rA, $rB, $imm", + (ins gprc:$rA, gprc:$rB, s16imm:$imm)>; +def SUBIS : PPCAsmPseudo<"subis $rA, $rB, $imm", + (ins gprc:$rA, gprc:$rB, s16imm:$imm)>; +def SUBIC : PPCAsmPseudo<"subic $rA, $rB, $imm", + (ins gprc:$rA, gprc:$rB, s16imm:$imm)>; +def SUBICo : PPCAsmPseudo<"subic. $rA, $rB, $imm", + (ins gprc:$rA, gprc:$rB, s16imm:$imm)>; + +def : InstAlias<"sub $rA, $rB, $rC", (SUBF8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>; +def : InstAlias<"sub. $rA, $rB, $rC", (SUBF8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>; +def : InstAlias<"subc $rA, $rB, $rC", (SUBFC8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>; +def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>; + def SLWI : PPCAsmPseudo<"slwi $rA, $rS, $n", (ins gprc:$rA, gprc:$rS, u5imm:$n)>; def SRWI : PPCAsmPseudo<"srwi $rA, $rS, $n", -- cgit v1.1 From 816c06f7fa73e8150e260a11d897be2f52d4f2b8 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Tue, 25 Jun 2013 13:17:10 +0000 Subject: [PowerPC] Add rldcr/rldic instructions This adds pattern for the rldcr and rldic instructions (the last instruction from the rotate/shift family that were missing). They are currently used only by the asm parser. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184833 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCInstr64Bit.td | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index cab1a20..d612fd9 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -538,6 +538,10 @@ defm RLDCL : MDSForm_1r<30, 8, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB, u6imm:$MBE), "rldcl", "$rA, $rS, $rB, $MBE", IntRotateD, []>, isPPC64; +defm RLDCR : MDSForm_1r<30, 9, + (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB, u6imm:$MBE), + "rldcr", "$rA, $rS, $rB, $MBE", IntRotateD, + []>, isPPC64; defm RLDICL : MDForm_1r<30, 0, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE), "rldicl", "$rA, $rS, $SH, $MBE", IntRotateDI, @@ -546,6 +550,10 @@ defm RLDICR : MDForm_1r<30, 1, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE), "rldicr", "$rA, $rS, $SH, $MBE", IntRotateDI, []>, isPPC64; +defm RLDIC : MDForm_1r<30, 2, + (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE), + "rldic", "$rA, $rS, $SH, $MBE", IntRotateDI, + []>, isPPC64; let Interpretation64Bit = 1 in { defm RLWINM8 : MForm_2r<21, (outs g8rc:$rA), -- cgit v1.1 From 1bc147c0910bb02398730c79e0d0310ffbbd2868 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Tue, 25 Jun 2013 13:17:41 +0000 Subject: [PowerPC] Add extended rotate/shift mnemonics This adds all missing extended rotate/shift mnemonics to the asm parser. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184834 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 198 ++++++++++++++++++++++++-- lib/Target/PowerPC/PPCInstrInfo.td | 75 ++++++++++ 2 files changed, 264 insertions(+), 9 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index d426ba0..2310bb3 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -422,7 +422,8 @@ void PPCOperand::print(raw_ostream &OS) const { void PPCAsmParser:: ProcessInstruction(MCInst &Inst, const SmallVectorImpl &Operands) { - switch (Inst.getOpcode()) { + int Opcode = Inst.getOpcode(); + switch (Opcode) { case PPC::LAx: { MCInst TmpInst; TmpInst.setOpcode(PPC::LA); @@ -472,10 +473,82 @@ ProcessInstruction(MCInst &Inst, Inst = TmpInst; break; } - case PPC::SLWI: { + case PPC::EXTLWI: + case PPC::EXTLWIo: { MCInst TmpInst; int64_t N = Inst.getOperand(2).getImm(); - TmpInst.setOpcode(PPC::RLWINM); + int64_t B = Inst.getOperand(3).getImm(); + TmpInst.setOpcode(Opcode == PPC::EXTLWI? PPC::RLWINM : PPC::RLWINMo); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(MCOperand::CreateImm(B)); + TmpInst.addOperand(MCOperand::CreateImm(0)); + TmpInst.addOperand(MCOperand::CreateImm(N - 1)); + Inst = TmpInst; + break; + } + case PPC::EXTRWI: + case PPC::EXTRWIo: { + MCInst TmpInst; + int64_t N = Inst.getOperand(2).getImm(); + int64_t B = Inst.getOperand(3).getImm(); + TmpInst.setOpcode(Opcode == PPC::EXTRWI? PPC::RLWINM : PPC::RLWINMo); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(MCOperand::CreateImm(B + N)); + TmpInst.addOperand(MCOperand::CreateImm(32 - N)); + TmpInst.addOperand(MCOperand::CreateImm(31)); + Inst = TmpInst; + break; + } + case PPC::INSLWI: + case PPC::INSLWIo: { + MCInst TmpInst; + int64_t N = Inst.getOperand(2).getImm(); + int64_t B = Inst.getOperand(3).getImm(); + TmpInst.setOpcode(Opcode == PPC::INSLWI? PPC::RLWIMI : PPC::RLWIMIo); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(MCOperand::CreateImm(32 - B)); + TmpInst.addOperand(MCOperand::CreateImm(B)); + TmpInst.addOperand(MCOperand::CreateImm((B + N) - 1)); + Inst = TmpInst; + break; + } + case PPC::INSRWI: + case PPC::INSRWIo: { + MCInst TmpInst; + int64_t N = Inst.getOperand(2).getImm(); + int64_t B = Inst.getOperand(3).getImm(); + TmpInst.setOpcode(Opcode == PPC::INSRWI? PPC::RLWIMI : PPC::RLWIMIo); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(MCOperand::CreateImm(32 - (B + N))); + TmpInst.addOperand(MCOperand::CreateImm(B)); + TmpInst.addOperand(MCOperand::CreateImm((B + N) - 1)); + Inst = TmpInst; + break; + } + case PPC::ROTRWI: + case PPC::ROTRWIo: { + MCInst TmpInst; + int64_t N = Inst.getOperand(2).getImm(); + TmpInst.setOpcode(Opcode == PPC::ROTRWI? PPC::RLWINM : PPC::RLWINMo); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(MCOperand::CreateImm(32 - N)); + TmpInst.addOperand(MCOperand::CreateImm(0)); + TmpInst.addOperand(MCOperand::CreateImm(31)); + Inst = TmpInst; + break; + } + case PPC::SLWI: + case PPC::SLWIo: { + MCInst TmpInst; + int64_t N = Inst.getOperand(2).getImm(); + TmpInst.setOpcode(Opcode == PPC::SLWI? PPC::RLWINM : PPC::RLWINMo); TmpInst.addOperand(Inst.getOperand(0)); TmpInst.addOperand(Inst.getOperand(1)); TmpInst.addOperand(MCOperand::CreateImm(N)); @@ -484,10 +557,11 @@ ProcessInstruction(MCInst &Inst, Inst = TmpInst; break; } - case PPC::SRWI: { + case PPC::SRWI: + case PPC::SRWIo: { MCInst TmpInst; int64_t N = Inst.getOperand(2).getImm(); - TmpInst.setOpcode(PPC::RLWINM); + TmpInst.setOpcode(Opcode == PPC::SRWI? PPC::RLWINM : PPC::RLWINMo); TmpInst.addOperand(Inst.getOperand(0)); TmpInst.addOperand(Inst.getOperand(1)); TmpInst.addOperand(MCOperand::CreateImm(32 - N)); @@ -496,10 +570,90 @@ ProcessInstruction(MCInst &Inst, Inst = TmpInst; break; } - case PPC::SLDI: { + case PPC::CLRRWI: + case PPC::CLRRWIo: { MCInst TmpInst; int64_t N = Inst.getOperand(2).getImm(); - TmpInst.setOpcode(PPC::RLDICR); + TmpInst.setOpcode(Opcode == PPC::CLRRWI? PPC::RLWINM : PPC::RLWINMo); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(MCOperand::CreateImm(0)); + TmpInst.addOperand(MCOperand::CreateImm(0)); + TmpInst.addOperand(MCOperand::CreateImm(31 - N)); + Inst = TmpInst; + break; + } + case PPC::CLRLSLWI: + case PPC::CLRLSLWIo: { + MCInst TmpInst; + int64_t B = Inst.getOperand(2).getImm(); + int64_t N = Inst.getOperand(3).getImm(); + TmpInst.setOpcode(Opcode == PPC::CLRLSLWI? PPC::RLWINM : PPC::RLWINMo); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(MCOperand::CreateImm(N)); + TmpInst.addOperand(MCOperand::CreateImm(B - N)); + TmpInst.addOperand(MCOperand::CreateImm(31 - N)); + Inst = TmpInst; + break; + } + case PPC::EXTLDI: + case PPC::EXTLDIo: { + MCInst TmpInst; + int64_t N = Inst.getOperand(2).getImm(); + int64_t B = Inst.getOperand(3).getImm(); + TmpInst.setOpcode(Opcode == PPC::EXTLDI? PPC::RLDICR : PPC::RLDICRo); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(MCOperand::CreateImm(B)); + TmpInst.addOperand(MCOperand::CreateImm(N - 1)); + Inst = TmpInst; + break; + } + case PPC::EXTRDI: + case PPC::EXTRDIo: { + MCInst TmpInst; + int64_t N = Inst.getOperand(2).getImm(); + int64_t B = Inst.getOperand(3).getImm(); + TmpInst.setOpcode(Opcode == PPC::EXTRDI? PPC::RLDICL : PPC::RLDICLo); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(MCOperand::CreateImm(B + N)); + TmpInst.addOperand(MCOperand::CreateImm(64 - N)); + Inst = TmpInst; + break; + } + case PPC::INSRDI: + case PPC::INSRDIo: { + MCInst TmpInst; + int64_t N = Inst.getOperand(2).getImm(); + int64_t B = Inst.getOperand(3).getImm(); + TmpInst.setOpcode(Opcode == PPC::INSRDI? PPC::RLDIMI : PPC::RLDIMIo); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(MCOperand::CreateImm(64 - (B + N))); + TmpInst.addOperand(MCOperand::CreateImm(B)); + Inst = TmpInst; + break; + } + case PPC::ROTRDI: + case PPC::ROTRDIo: { + MCInst TmpInst; + int64_t N = Inst.getOperand(2).getImm(); + TmpInst.setOpcode(Opcode == PPC::ROTRDI? PPC::RLDICL : PPC::RLDICLo); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(MCOperand::CreateImm(64 - N)); + TmpInst.addOperand(MCOperand::CreateImm(0)); + Inst = TmpInst; + break; + } + case PPC::SLDI: + case PPC::SLDIo: { + MCInst TmpInst; + int64_t N = Inst.getOperand(2).getImm(); + TmpInst.setOpcode(Opcode == PPC::SLDI? PPC::RLDICR : PPC::RLDICRo); TmpInst.addOperand(Inst.getOperand(0)); TmpInst.addOperand(Inst.getOperand(1)); TmpInst.addOperand(MCOperand::CreateImm(N)); @@ -507,10 +661,11 @@ ProcessInstruction(MCInst &Inst, Inst = TmpInst; break; } - case PPC::SRDI: { + case PPC::SRDI: + case PPC::SRDIo: { MCInst TmpInst; int64_t N = Inst.getOperand(2).getImm(); - TmpInst.setOpcode(PPC::RLDICL); + TmpInst.setOpcode(Opcode == PPC::SRDI? PPC::RLDICL : PPC::RLDICLo); TmpInst.addOperand(Inst.getOperand(0)); TmpInst.addOperand(Inst.getOperand(1)); TmpInst.addOperand(MCOperand::CreateImm(64 - N)); @@ -518,6 +673,31 @@ ProcessInstruction(MCInst &Inst, Inst = TmpInst; break; } + case PPC::CLRRDI: + case PPC::CLRRDIo: { + MCInst TmpInst; + int64_t N = Inst.getOperand(2).getImm(); + TmpInst.setOpcode(Opcode == PPC::CLRRDI? PPC::RLDICR : PPC::RLDICRo); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(MCOperand::CreateImm(0)); + TmpInst.addOperand(MCOperand::CreateImm(63 - N)); + Inst = TmpInst; + break; + } + case PPC::CLRLSLDI: + case PPC::CLRLSLDIo: { + MCInst TmpInst; + int64_t B = Inst.getOperand(2).getImm(); + int64_t N = Inst.getOperand(3).getImm(); + TmpInst.setOpcode(Opcode == PPC::CLRLSLDI? PPC::RLDIC : PPC::RLDICo); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(MCOperand::CreateImm(N)); + TmpInst.addOperand(MCOperand::CreateImm(B - N)); + Inst = TmpInst; + break; + } } } diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 5b99a66..a9cfd5e 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -2290,14 +2290,89 @@ def : InstAlias<"sub. $rA, $rB, $rC", (SUBF8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>; def : InstAlias<"subc $rA, $rB, $rC", (SUBFC8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>; def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>; +def EXTLWI : PPCAsmPseudo<"extlwi $rA, $rS, $n, $b", + (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>; +def EXTLWIo : PPCAsmPseudo<"extlwi. $rA, $rS, $n, $b", + (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>; +def EXTRWI : PPCAsmPseudo<"extrwi $rA, $rS, $n, $b", + (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>; +def EXTRWIo : PPCAsmPseudo<"extrwi. $rA, $rS, $n, $b", + (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>; +def INSLWI : PPCAsmPseudo<"inslwi $rA, $rS, $n, $b", + (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>; +def INSLWIo : PPCAsmPseudo<"inslwi. $rA, $rS, $n, $b", + (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>; +def INSRWI : PPCAsmPseudo<"insrwi $rA, $rS, $n, $b", + (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>; +def INSRWIo : PPCAsmPseudo<"insrwi. $rA, $rS, $n, $b", + (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>; +def ROTRWI : PPCAsmPseudo<"rotrwi $rA, $rS, $n", + (ins gprc:$rA, gprc:$rS, u5imm:$n)>; +def ROTRWIo : PPCAsmPseudo<"rotrwi. $rA, $rS, $n", + (ins gprc:$rA, gprc:$rS, u5imm:$n)>; def SLWI : PPCAsmPseudo<"slwi $rA, $rS, $n", (ins gprc:$rA, gprc:$rS, u5imm:$n)>; +def SLWIo : PPCAsmPseudo<"slwi. $rA, $rS, $n", + (ins gprc:$rA, gprc:$rS, u5imm:$n)>; def SRWI : PPCAsmPseudo<"srwi $rA, $rS, $n", (ins gprc:$rA, gprc:$rS, u5imm:$n)>; +def SRWIo : PPCAsmPseudo<"srwi. $rA, $rS, $n", + (ins gprc:$rA, gprc:$rS, u5imm:$n)>; +def CLRRWI : PPCAsmPseudo<"clrrwi $rA, $rS, $n", + (ins gprc:$rA, gprc:$rS, u5imm:$n)>; +def CLRRWIo : PPCAsmPseudo<"clrrwi. $rA, $rS, $n", + (ins gprc:$rA, gprc:$rS, u5imm:$n)>; +def CLRLSLWI : PPCAsmPseudo<"clrlslwi $rA, $rS, $b, $n", + (ins gprc:$rA, gprc:$rS, u5imm:$b, u5imm:$n)>; +def CLRLSLWIo : PPCAsmPseudo<"clrlslwi. $rA, $rS, $b, $n", + (ins gprc:$rA, gprc:$rS, u5imm:$b, u5imm:$n)>; + +def : InstAlias<"rotlwi $rA, $rS, $n", (RLWINM gprc:$rA, gprc:$rS, u5imm:$n, 0, 31)>; +def : InstAlias<"rotlwi. $rA, $rS, $n", (RLWINMo gprc:$rA, gprc:$rS, u5imm:$n, 0, 31)>; +def : InstAlias<"rotlw $rA, $rS, $rB", (RLWNM gprc:$rA, gprc:$rS, gprc:$rB, 0, 31)>; +def : InstAlias<"rotlw. $rA, $rS, $rB", (RLWNMo gprc:$rA, gprc:$rS, gprc:$rB, 0, 31)>; +def : InstAlias<"clrlwi $rA, $rS, $n", (RLWINM gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>; +def : InstAlias<"clrlwi. $rA, $rS, $n", (RLWINMo gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>; + +def EXTLDI : PPCAsmPseudo<"extldi $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>; +def EXTLDIo : PPCAsmPseudo<"extldi. $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>; +def EXTRDI : PPCAsmPseudo<"extrdi $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>; +def EXTRDIo : PPCAsmPseudo<"extrdi. $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>; +def INSRDI : PPCAsmPseudo<"insrdi $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>; +def INSRDIo : PPCAsmPseudo<"insrdi. $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>; +def ROTRDI : PPCAsmPseudo<"rotrdi $rA, $rS, $n", + (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>; +def ROTRDIo : PPCAsmPseudo<"rotrdi. $rA, $rS, $n", + (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>; def SLDI : PPCAsmPseudo<"sldi $rA, $rS, $n", (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>; +def SLDIo : PPCAsmPseudo<"sldi. $rA, $rS, $n", + (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>; def SRDI : PPCAsmPseudo<"srdi $rA, $rS, $n", (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>; +def SRDIo : PPCAsmPseudo<"srdi. $rA, $rS, $n", + (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>; +def CLRRDI : PPCAsmPseudo<"clrrdi $rA, $rS, $n", + (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>; +def CLRRDIo : PPCAsmPseudo<"clrrdi. $rA, $rS, $n", + (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>; +def CLRLSLDI : PPCAsmPseudo<"clrlsldi $rA, $rS, $b, $n", + (ins g8rc:$rA, g8rc:$rS, u6imm:$b, u6imm:$n)>; +def CLRLSLDIo : PPCAsmPseudo<"clrlsldi. $rA, $rS, $b, $n", + (ins g8rc:$rA, g8rc:$rS, u6imm:$b, u6imm:$n)>; + +def : InstAlias<"rotldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, u6imm:$n, 0)>; +def : InstAlias<"rotldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, u6imm:$n, 0)>; +def : InstAlias<"rotld $rA, $rS, $rB", (RLDCL g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>; +def : InstAlias<"rotld. $rA, $rS, $rB", (RLDCLo g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>; +def : InstAlias<"clrldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>; +def : InstAlias<"clrldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>; // These generic branch instruction forms are used for the assembler parser only. // Defs and Uses are conservative, since we don't know the BO value. -- cgit v1.1 From e3599ca1915f56ce13139fed58f6daac1cc7ca70 Mon Sep 17 00:00:00 2001 From: Aaron Watry Date: Tue, 25 Jun 2013 13:55:23 +0000 Subject: R600/SI: Expand and of v2i32/v4i32 for SI Also add lit test for both cases on SI, and v2i32 for evergreen. Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184837 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIISelLowering.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 776eb86..bf2e7d3 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -68,6 +68,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::ADD, MVT::v4i32, Expand); setOperationAction(ISD::ADD, MVT::v2i32, Expand); + setOperationAction(ISD::AND, MVT::v2i32, Expand); + setOperationAction(ISD::AND, MVT::v4i32, Expand); + setOperationAction(ISD::SUB, MVT::v2i32, Expand); setOperationAction(ISD::SUB, MVT::v4i32, Expand); -- cgit v1.1 From 02ed261f7f7601f8ed5685345f3542b5120bcbf0 Mon Sep 17 00:00:00 2001 From: Aaron Watry Date: Tue, 25 Jun 2013 13:55:26 +0000 Subject: R600/SI: Expand mul of v2i32/v4i32 for SI Also add lit test for both cases on SI, and v2i32 for evergreen. Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184838 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIISelLowering.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index bf2e7d3..cb80e5e 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -71,6 +71,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::AND, MVT::v2i32, Expand); setOperationAction(ISD::AND, MVT::v4i32, Expand); + setOperationAction(ISD::MUL, MVT::v2i32, Expand); + setOperationAction(ISD::MUL, MVT::v4i32, Expand); + setOperationAction(ISD::SUB, MVT::v2i32, Expand); setOperationAction(ISD::SUB, MVT::v4i32, Expand); -- cgit v1.1 From b8ce77752b9654fa81f80d63a50a8d5c5b390c9a Mon Sep 17 00:00:00 2001 From: Aaron Watry Date: Tue, 25 Jun 2013 13:55:29 +0000 Subject: R600/SI: Expand or of v2i32/v4i32 for SI Also add lit test for both cases on SI, and v2i32 for evergreen. Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184839 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIISelLowering.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index cb80e5e..30a7de5 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -74,6 +74,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::MUL, MVT::v2i32, Expand); setOperationAction(ISD::MUL, MVT::v4i32, Expand); + setOperationAction(ISD::OR, MVT::v2i32, Expand); + setOperationAction(ISD::OR, MVT::v4i32, Expand); + setOperationAction(ISD::SUB, MVT::v2i32, Expand); setOperationAction(ISD::SUB, MVT::v4i32, Expand); -- cgit v1.1 From 60e6dacd1c21eba75599a294fe37a6072c37604f Mon Sep 17 00:00:00 2001 From: Aaron Watry Date: Tue, 25 Jun 2013 13:55:32 +0000 Subject: R600/SI: Expand shl of v2i32/v4i32 for SI Also add lit test for both cases on SI, and v2i32 for evergreen. Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184840 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIISelLowering.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 30a7de5..515c7a4 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -77,6 +77,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::OR, MVT::v2i32, Expand); setOperationAction(ISD::OR, MVT::v4i32, Expand); + setOperationAction(ISD::SHL, MVT::v2i32, Expand); + setOperationAction(ISD::SHL, MVT::v4i32, Expand); + setOperationAction(ISD::SUB, MVT::v2i32, Expand); setOperationAction(ISD::SUB, MVT::v4i32, Expand); -- cgit v1.1 From 61de9f83cb67a9738fa146e2b4542332aabee019 Mon Sep 17 00:00:00 2001 From: Aaron Watry Date: Tue, 25 Jun 2013 13:55:37 +0000 Subject: R600/SI: Expand srl of v2i32/v4i32 for SI Also add lit test for both cases on SI, and v2i32 for evergreen. Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184841 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIISelLowering.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 515c7a4..4219825 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -79,6 +79,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::SHL, MVT::v2i32, Expand); setOperationAction(ISD::SHL, MVT::v4i32, Expand); + setOperationAction(ISD::SRL, MVT::v4i32, Expand); + setOperationAction(ISD::SRL, MVT::v2i32, Expand); setOperationAction(ISD::SUB, MVT::v2i32, Expand); setOperationAction(ISD::SUB, MVT::v4i32, Expand); -- cgit v1.1 From c1fcf01d7fa41171fc715618d0a9359df7865aa6 Mon Sep 17 00:00:00 2001 From: Aaron Watry Date: Tue, 25 Jun 2013 13:55:40 +0000 Subject: R600/SI: Expand ashr of v2i32/v4i32 for SI Also add lit test for both cases on SI, and v2i32 for evergreen. Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184842 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIISelLowering.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 4219825..5f44d3a 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -81,6 +81,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::SHL, MVT::v4i32, Expand); setOperationAction(ISD::SRL, MVT::v4i32, Expand); setOperationAction(ISD::SRL, MVT::v2i32, Expand); + setOperationAction(ISD::SRA, MVT::v4i32, Expand); + setOperationAction(ISD::SRA, MVT::v2i32, Expand); setOperationAction(ISD::SUB, MVT::v2i32, Expand); setOperationAction(ISD::SUB, MVT::v4i32, Expand); -- cgit v1.1 From e80978f9dd6bd8951d968fd2e85ec4c0996b62f5 Mon Sep 17 00:00:00 2001 From: Aaron Watry Date: Tue, 25 Jun 2013 13:55:43 +0000 Subject: R600/SI: Expand udiv v[24]i32 for SI and v2i32 for EG Also add lit test for both cases on SI, and v2i32 for evergreen. Note: I followed the guidance of the v4i32 EG check... UDIV produces really complex code, so let's just check that the instruction was lowered successfully. Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184843 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600ISelLowering.cpp | 1 + lib/Target/R600/SIISelLowering.cpp | 3 +++ 2 files changed, 4 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 812df83..cf349a8 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -56,6 +56,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::SUB, MVT::v4i32, Expand); setOperationAction(ISD::SUB, MVT::v2i32, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand); + setOperationAction(ISD::UDIV, MVT::v2i32, Expand); setOperationAction(ISD::UDIV, MVT::v4i32, Expand); setOperationAction(ISD::UREM, MVT::v4i32, Expand); setOperationAction(ISD::SETCC, MVT::v4i32, Expand); diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 5f44d3a..1fb28fa 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -87,6 +87,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::SUB, MVT::v2i32, Expand); setOperationAction(ISD::SUB, MVT::v4i32, Expand); + setOperationAction(ISD::UDIV, MVT::v2i32, Expand); + setOperationAction(ISD::UDIV, MVT::v4i32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); -- cgit v1.1 From 41f3db569c893eb39a6413718a5152f6de3c5099 Mon Sep 17 00:00:00 2001 From: Aaron Watry Date: Tue, 25 Jun 2013 13:55:46 +0000 Subject: R600/SI: Expand urem of v2i32/v4i32 for SI Also add lit test for both cases on SI, and v2i32 for evergreen. Note: I followed the guidance of the v4i32 EG check... UREM produces really complex code, so let's just check that the instruction was lowered successfully. Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184844 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIISelLowering.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 1fb28fa..a784667 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -90,6 +90,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::UDIV, MVT::v2i32, Expand); setOperationAction(ISD::UDIV, MVT::v4i32, Expand); + setOperationAction(ISD::UREM, MVT::v2i32, Expand); + setOperationAction(ISD::UREM, MVT::v4i32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); -- cgit v1.1 From 1842ec4d9fc9eeceb2a77527026dfd84ee24cff1 Mon Sep 17 00:00:00 2001 From: Aaron Watry Date: Tue, 25 Jun 2013 13:55:52 +0000 Subject: R600/SI: Expand xor v2i32/v4i32 Add test cases for both vector sizes on SI and also add v2i32 test for EG. Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184846 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIISelLowering.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index a784667..e70c7de 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -93,6 +93,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::UREM, MVT::v2i32, Expand); setOperationAction(ISD::UREM, MVT::v4i32, Expand); + setOperationAction(ISD::XOR, MVT::v2i32, Expand); + setOperationAction(ISD::XOR, MVT::v4i32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); -- cgit v1.1 From f97c7fef52098bbd6a7ccc69657d112a36d77660 Mon Sep 17 00:00:00 2001 From: Aaron Watry Date: Tue, 25 Jun 2013 13:55:57 +0000 Subject: R600: Consolidate expansion of v2i32/v4i32 ops for EG/SI By default, we expand these operations for both EG and SI. Move the duplicated code into a common space for now. If the targets ever actually implement these operations as instructions, we can override that in the relevant target. Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184848 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 22 ++++++++++++++++++++++ lib/Target/R600/R600ISelLowering.cpp | 19 ------------------- lib/Target/R600/SIISelLowering.cpp | 30 ------------------------------ 3 files changed, 22 insertions(+), 49 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 02d6fab..6d73590 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -70,6 +70,28 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::UDIV, MVT::i32, Expand); setOperationAction(ISD::UDIVREM, MVT::i32, Custom); setOperationAction(ISD::UREM, MVT::i32, Expand); + + int types[] = { + (int)MVT::v2i32, + (int)MVT::v4i32 + }; + size_t NumTypes = sizeof(types) / sizeof(*types); + + for (unsigned int x = 0; x < NumTypes; ++x) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x]; + //Expand the following operations for the current type by default + setOperationAction(ISD::ADD, VT, Expand); + setOperationAction(ISD::AND, VT, Expand); + setOperationAction(ISD::MUL, VT, Expand); + setOperationAction(ISD::OR, VT, Expand); + setOperationAction(ISD::SHL, VT, Expand); + setOperationAction(ISD::SRL, VT, Expand); + setOperationAction(ISD::SRA, VT, Expand); + setOperationAction(ISD::SUB, VT, Expand); + setOperationAction(ISD::UDIV, VT, Expand); + setOperationAction(ISD::UREM, VT, Expand); + setOperationAction(ISD::XOR, VT, Expand); + } } //===---------------------------------------------------------------------===// diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index cf349a8..18e83e8 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -38,30 +38,11 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::FDIV, MVT::v4f32, Expand); setOperationAction(ISD::FSUB, MVT::v4f32, Expand); - setOperationAction(ISD::ADD, MVT::v4i32, Expand); - setOperationAction(ISD::AND, MVT::v4i32, Expand); setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand); - setOperationAction(ISD::MUL, MVT::v2i32, Expand); - setOperationAction(ISD::MUL, MVT::v4i32, Expand); - setOperationAction(ISD::OR, MVT::v4i32, Expand); - setOperationAction(ISD::OR, MVT::v2i32, Expand); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand); - setOperationAction(ISD::SHL, MVT::v4i32, Expand); - setOperationAction(ISD::SHL, MVT::v2i32, Expand); - setOperationAction(ISD::SRL, MVT::v4i32, Expand); - setOperationAction(ISD::SRL, MVT::v2i32, Expand); - setOperationAction(ISD::SRA, MVT::v4i32, Expand); - setOperationAction(ISD::SRA, MVT::v2i32, Expand); - setOperationAction(ISD::SUB, MVT::v4i32, Expand); - setOperationAction(ISD::SUB, MVT::v2i32, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand); - setOperationAction(ISD::UDIV, MVT::v2i32, Expand); - setOperationAction(ISD::UDIV, MVT::v4i32, Expand); - setOperationAction(ISD::UREM, MVT::v4i32, Expand); setOperationAction(ISD::SETCC, MVT::v4i32, Expand); - setOperationAction(ISD::XOR, MVT::v4i32, Expand); - setOperationAction(ISD::XOR, MVT::v2i32, Expand); setOperationAction(ISD::BR_CC, MVT::i32, Expand); setOperationAction(ISD::BR_CC, MVT::f32, Expand); diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index e70c7de..9d4cfef 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -65,36 +65,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::ADD, MVT::i64, Legal); setOperationAction(ISD::ADD, MVT::i32, Legal); - setOperationAction(ISD::ADD, MVT::v4i32, Expand); - setOperationAction(ISD::ADD, MVT::v2i32, Expand); - - setOperationAction(ISD::AND, MVT::v2i32, Expand); - setOperationAction(ISD::AND, MVT::v4i32, Expand); - - setOperationAction(ISD::MUL, MVT::v2i32, Expand); - setOperationAction(ISD::MUL, MVT::v4i32, Expand); - - setOperationAction(ISD::OR, MVT::v2i32, Expand); - setOperationAction(ISD::OR, MVT::v4i32, Expand); - - setOperationAction(ISD::SHL, MVT::v2i32, Expand); - setOperationAction(ISD::SHL, MVT::v4i32, Expand); - setOperationAction(ISD::SRL, MVT::v4i32, Expand); - setOperationAction(ISD::SRL, MVT::v2i32, Expand); - setOperationAction(ISD::SRA, MVT::v4i32, Expand); - setOperationAction(ISD::SRA, MVT::v2i32, Expand); - - setOperationAction(ISD::SUB, MVT::v2i32, Expand); - setOperationAction(ISD::SUB, MVT::v4i32, Expand); - - setOperationAction(ISD::UDIV, MVT::v2i32, Expand); - setOperationAction(ISD::UDIV, MVT::v4i32, Expand); - - setOperationAction(ISD::UREM, MVT::v2i32, Expand); - setOperationAction(ISD::UREM, MVT::v4i32, Expand); - - setOperationAction(ISD::XOR, MVT::v2i32, Expand); - setOperationAction(ISD::XOR, MVT::v4i32, Expand); setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); -- cgit v1.1 From 5de735a962a255676cf3a9bc255579d465670633 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Tue, 25 Jun 2013 16:49:50 +0000 Subject: [PowerPC] Support @got modifier Add VK_... values and relocation types necessary to support the @got family of modifiers. Used by the asm parser only. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184860 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index f48cb5e..c2bf251 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -127,6 +127,18 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_PPC_HIGHESTA: Type = ELF::R_PPC64_ADDR16_HIGHESTA; break; + case MCSymbolRefExpr::VK_GOT: + Type = ELF::R_PPC_GOT16; + break; + case MCSymbolRefExpr::VK_PPC_GOT_LO: + Type = ELF::R_PPC_GOT16_LO; + break; + case MCSymbolRefExpr::VK_PPC_GOT_HI: + Type = ELF::R_PPC_GOT16_HI; + break; + case MCSymbolRefExpr::VK_PPC_GOT_HA: + Type = ELF::R_PPC_GOT16_HA; + break; case MCSymbolRefExpr::VK_PPC_TOC: Type = ELF::R_PPC64_TOC16; break; @@ -234,6 +246,12 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_PPC_LO: Type = ELF::R_PPC64_ADDR16_LO_DS; break; + case MCSymbolRefExpr::VK_GOT: + Type = ELF::R_PPC64_GOT16_DS; + break; + case MCSymbolRefExpr::VK_PPC_GOT_LO: + Type = ELF::R_PPC64_GOT16_LO_DS; + break; case MCSymbolRefExpr::VK_PPC_TOC: Type = ELF::R_PPC64_TOC16_DS; break; -- cgit v1.1 From 34eb2406b41854fc8df688fca7c0129f77d768f7 Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Tue, 25 Jun 2013 19:14:09 +0000 Subject: X86 cost model: Vectorizing integer division is a bad idea radar://14057959 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184872 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86TargetTransformInfo.cpp | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index df6f37b..3bcdfc1 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -196,6 +196,16 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, { ISD::SRA, MVT::v32i8, 32*10 }, // Scalarized. { ISD::SRA, MVT::v16i16, 16*10 }, // Scalarized. { ISD::SRA, MVT::v4i64, 4*10 }, // Scalarized. + + // Vectorizing division is a bad idea. See the SSE2 table for more comments. + { ISD::SDIV, MVT::v32i8, 32*20 }, + { ISD::SDIV, MVT::v16i16, 16*20 }, + { ISD::SDIV, MVT::v8i32, 8*20 }, + { ISD::SDIV, MVT::v4i64, 4*20 }, + { ISD::UDIV, MVT::v32i8, 32*20 }, + { ISD::UDIV, MVT::v16i16, 16*20 }, + { ISD::UDIV, MVT::v8i32, 8*20 }, + { ISD::UDIV, MVT::v4i64, 4*20 }, }; // Look for AVX2 lowering tricks. @@ -258,6 +268,21 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, { ISD::SRA, MVT::v8i16, 8*10 }, // Scalarized. { ISD::SRA, MVT::v4i32, 4*10 }, // Scalarized. { ISD::SRA, MVT::v2i64, 2*10 }, // Scalarized. + + // It is not a good idea to vectorize division. We have to scalarize it and + // in the process we will often end up having to spilling regular + // registers. The overhead of division is going to dominate most kernels + // anyways so try hard to prevent vectorization of division - it is + // generally a bad idea. Assume somewhat arbitrarily that we have to be able + // to hide "20 cycles" for each lane. + { ISD::SDIV, MVT::v16i8, 16*20 }, + { ISD::SDIV, MVT::v8i16, 8*20 }, + { ISD::SDIV, MVT::v4i32, 4*20 }, + { ISD::SDIV, MVT::v2i64, 2*20 }, + { ISD::UDIV, MVT::v16i8, 16*20 }, + { ISD::UDIV, MVT::v8i16, 8*20 }, + { ISD::UDIV, MVT::v4i32, 4*20 }, + { ISD::UDIV, MVT::v2i64, 2*20 }, }; if (ST->hasSSE2()) { -- cgit v1.1 From 5e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 25 Jun 2013 21:22:18 +0000 Subject: R600: Use new getNamedOperandIdx function generated by TableGen git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184880 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUInstrInfo.cpp | 1 + lib/Target/R600/AMDGPUInstrInfo.h | 5 + lib/Target/R600/AMDILISelDAGToDAG.cpp | 91 ++++++------- lib/Target/R600/R600Defines.h | 41 +----- lib/Target/R600/R600ExpandSpecialInstrs.cpp | 16 +-- lib/Target/R600/R600ISelLowering.cpp | 2 +- lib/Target/R600/R600InstrInfo.cpp | 190 +++++++++++----------------- lib/Target/R600/R600InstrInfo.h | 8 +- lib/Target/R600/R600Instructions.td | 16 ++- lib/Target/R600/R600Packetizer.cpp | 18 +-- 10 files changed, 160 insertions(+), 228 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp index 31b3002..61437e9 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.cpp +++ b/lib/Target/R600/AMDGPUInstrInfo.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #define GET_INSTRINFO_CTOR +#define GET_INSTRINFO_NAMED_OPS #define GET_INSTRMAP_INFO #include "AMDGPUGenInstrInfo.inc" diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h index 3909e4e..306f467 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.h +++ b/lib/Target/R600/AMDGPUInstrInfo.h @@ -23,6 +23,7 @@ #define GET_INSTRINFO_HEADER #define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_OPERAND_ENUM #include "AMDGPUGenInstrInfo.inc" #define OPCODE_IS_ZERO_INT AMDGPU::PRED_SETE_INT @@ -198,6 +199,10 @@ public: }; +namespace AMDGPU { + int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex); +} // End namespace AMDGPU + } // End llvm namespace #define AMDGPU_FLAG_REGISTER_LOAD (UINT64_C(1) << 63) diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp index c03ced3..9f077b9 100644 --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp @@ -280,7 +280,8 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { continue; } - int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(), R600Operands::IMM); + int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(), + AMDGPU::OpName::literal); assert(ImmIdx != -1); // subtract one from ImmIdx, because the DST operand is usually index @@ -357,7 +358,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { if (PotentialClamp->isMachineOpcode() && PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) { unsigned ClampIdx = - TII->getOperandIdx(Result->getMachineOpcode(), R600Operands::CLAMP); + TII->getOperandIdx(Result->getMachineOpcode(), AMDGPU::OpName::clamp); std::vector Ops; unsigned NumOp = Result->getNumOperands(); for (unsigned i = 0; i < NumOp; ++i) { @@ -415,23 +416,23 @@ bool AMDGPUDAGToDAGISel::FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode, const R600InstrInfo *TII, std::vector &Ops) { int OperandIdx[] = { - TII->getOperandIdx(Opcode, R600Operands::SRC0), - TII->getOperandIdx(Opcode, R600Operands::SRC1), - TII->getOperandIdx(Opcode, R600Operands::SRC2) + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src2) }; int SelIdx[] = { - TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL), - TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL), - TII->getOperandIdx(Opcode, R600Operands::SRC2_SEL) + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_sel) }; int NegIdx[] = { - TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG), - TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG), - TII->getOperandIdx(Opcode, R600Operands::SRC2_NEG) + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg) }; int AbsIdx[] = { - TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS), - TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs), -1 }; @@ -466,44 +467,44 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode, bool AMDGPUDAGToDAGISel::FoldDotOperands(unsigned Opcode, const R600InstrInfo *TII, std::vector &Ops) { int OperandIdx[] = { - TII->getOperandIdx(Opcode, R600Operands::SRC0_X), - TII->getOperandIdx(Opcode, R600Operands::SRC0_Y), - TII->getOperandIdx(Opcode, R600Operands::SRC0_Z), - TII->getOperandIdx(Opcode, R600Operands::SRC0_W), - TII->getOperandIdx(Opcode, R600Operands::SRC1_X), - TII->getOperandIdx(Opcode, R600Operands::SRC1_Y), - TII->getOperandIdx(Opcode, R600Operands::SRC1_Z), - TII->getOperandIdx(Opcode, R600Operands::SRC1_W) + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W) }; int SelIdx[] = { - TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_X), - TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_Y), - TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_Z), - TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_W), - TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_X), - TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_Y), - TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_Z), - TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_W) + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_X), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_Y), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_Z), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_W), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_X), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_Y), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_Z), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_W) }; int NegIdx[] = { - TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_X), - TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_Y), - TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_Z), - TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_W), - TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_X), - TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_Y), - TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_Z), - TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_W) + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W) }; int AbsIdx[] = { - TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_X), - TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_Y), - TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_Z), - TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_W), - TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_X), - TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_Y), - TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_Z), - TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_W) + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z), + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W) }; // Gather constants values diff --git a/lib/Target/R600/R600Defines.h b/lib/Target/R600/R600Defines.h index aebe581..e30ea27 100644 --- a/lib/Target/R600/R600Defines.h +++ b/lib/Target/R600/R600Defines.h @@ -57,46 +57,7 @@ namespace R600_InstFlag { #define IS_VTX(desc) ((desc).TSFlags & R600_InstFlag::VTX_INST) #define IS_TEX(desc) ((desc).TSFlags & R600_InstFlag::TEX_INST) -namespace R600Operands { - enum Ops { - DST, - UPDATE_EXEC_MASK, - UPDATE_PREDICATE, - WRITE, - OMOD, - DST_REL, - CLAMP, - SRC0, - SRC0_NEG, - SRC0_REL, - SRC0_ABS, - SRC0_SEL, - SRC1, - SRC1_NEG, - SRC1_REL, - SRC1_ABS, - SRC1_SEL, - SRC2, - SRC2_NEG, - SRC2_REL, - SRC2_SEL, - LAST, - PRED_SEL, - IMM, - BANK_SWIZZLE, - COUNT - }; - - const static int ALUOpTable[3][R600Operands::COUNT] = { -// W C S S S S S S S S S S S -// R O D L S R R R R S R R R R S R R R L P -// D U I M R A R C C C C R C C C C R C C C A R I -// S E U T O E M C 0 0 0 0 C 1 1 1 1 C 2 2 2 S E M B -// T M P E D L P 0 N R A S 1 N R A S 2 N R S T D M S - {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,10,11,12,13}, - {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,13,14,15,16,-1,-1,-1,-1,17,18,19,20}, - {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8, 9,-1,10,11,12,13,14,15,16,17,18} - }; +namespace OpName { enum VecOps { UPDATE_EXEC_MASK_X, diff --git a/lib/Target/R600/R600ExpandSpecialInstrs.cpp b/lib/Target/R600/R600ExpandSpecialInstrs.cpp index 40c058f..efc9523 100644 --- a/lib/Target/R600/R600ExpandSpecialInstrs.cpp +++ b/lib/Target/R600/R600ExpandSpecialInstrs.cpp @@ -82,9 +82,9 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { AMDGPU::ZERO); // src1 TII->addFlag(PredSet, 0, MO_FLAG_MASK); if (Flags & MO_FLAG_PUSH) { - TII->setImmOperand(PredSet, R600Operands::UPDATE_EXEC_MASK, 1); + TII->setImmOperand(PredSet, AMDGPU::OpName::update_exec_mask, 1); } else { - TII->setImmOperand(PredSet, R600Operands::UPDATE_PREDICATE, 1); + TII->setImmOperand(PredSet, AMDGPU::OpName::update_pred, 1); } MI.eraseFromParent(); continue; @@ -96,7 +96,7 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { AMDGPU::ZERO, AMDGPU::ZERO); TII->addFlag(PredSet, 0, MO_FLAG_MASK); - TII->setImmOperand(PredSet, R600Operands::UPDATE_EXEC_MASK, 1); + TII->setImmOperand(PredSet, AMDGPU::OpName::update_exec_mask, 1); BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::PREDICATED_BREAK)) @@ -208,10 +208,10 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { // While not strictly necessary from hw point of view, we force // all src operands of a dot4 inst to belong to the same slot. unsigned Src0 = BMI->getOperand( - TII->getOperandIdx(Opcode, R600Operands::SRC0)) + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0)) .getReg(); unsigned Src1 = BMI->getOperand( - TII->getOperandIdx(Opcode, R600Operands::SRC1)) + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1)) .getReg(); (void) Src0; (void) Src1; @@ -258,14 +258,14 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { // T0_W = CUBE T1_Y, T1_Z for (unsigned Chan = 0; Chan < 4; Chan++) { unsigned DstReg = MI.getOperand( - TII->getOperandIdx(MI, R600Operands::DST)).getReg(); + TII->getOperandIdx(MI, AMDGPU::OpName::dst)).getReg(); unsigned Src0 = MI.getOperand( - TII->getOperandIdx(MI, R600Operands::SRC0)).getReg(); + TII->getOperandIdx(MI, AMDGPU::OpName::src0)).getReg(); unsigned Src1 = 0; // Determine the correct source registers if (!IsCube) { - int Src1Idx = TII->getOperandIdx(MI, R600Operands::SRC1); + int Src1Idx = TII->getOperandIdx(MI, AMDGPU::OpName::src1); if (Src1Idx != -1) { Src1 = MI.getOperand(Src1Idx).getReg(); } diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 18e83e8..b898af1 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -150,7 +150,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( case AMDGPU::CONST_COPY: { MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV, MI->getOperand(0).getReg(), AMDGPU::ALU_CONST); - TII->setImmOperand(NewMI, R600Operands::SRC0_SEL, + TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel, MI->getOperand(1).getImm()); break; } diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 018583d..d17425f 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -69,7 +69,7 @@ R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV, DestReg, SrcReg); - NewMI->getOperand(getOperandIdx(*NewMI, R600Operands::SRC0)) + NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0)) .setIsKill(KillSrc); } } @@ -170,22 +170,24 @@ R600InstrInfo::getSrcs(MachineInstr *MI) const { SmallVector, 3> Result; if (MI->getOpcode() == AMDGPU::DOT_4) { - static const R600Operands::VecOps OpTable[8][2] = { - {R600Operands::SRC0_X, R600Operands::SRC0_SEL_X}, - {R600Operands::SRC0_Y, R600Operands::SRC0_SEL_Y}, - {R600Operands::SRC0_Z, R600Operands::SRC0_SEL_Z}, - {R600Operands::SRC0_W, R600Operands::SRC0_SEL_W}, - {R600Operands::SRC1_X, R600Operands::SRC1_SEL_X}, - {R600Operands::SRC1_Y, R600Operands::SRC1_SEL_Y}, - {R600Operands::SRC1_Z, R600Operands::SRC1_SEL_Z}, - {R600Operands::SRC1_W, R600Operands::SRC1_SEL_W}, + static const unsigned OpTable[8][2] = { + {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X}, + {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y}, + {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z}, + {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W}, + {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X}, + {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y}, + {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z}, + {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}, }; for (unsigned j = 0; j < 8; j++) { - MachineOperand &MO = MI->getOperand(OpTable[j][0] + 1); + MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(), + OpTable[j][0])); unsigned Reg = MO.getReg(); if (Reg == AMDGPU::ALU_CONST) { - unsigned Sel = MI->getOperand(OpTable[j][1] + 1).getImm(); + unsigned Sel = MI->getOperand(getOperandIdx(MI->getOpcode(), + OpTable[j][1])).getImm(); Result.push_back(std::pair(&MO, Sel)); continue; } @@ -194,10 +196,10 @@ R600InstrInfo::getSrcs(MachineInstr *MI) const { return Result; } - static const R600Operands::Ops OpTable[3][2] = { - {R600Operands::SRC0, R600Operands::SRC0_SEL}, - {R600Operands::SRC1, R600Operands::SRC1_SEL}, - {R600Operands::SRC2, R600Operands::SRC2_SEL}, + static const unsigned OpTable[3][2] = { + {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel}, + {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel}, + {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel}, }; for (unsigned j = 0; j < 3; j++) { @@ -214,7 +216,7 @@ R600InstrInfo::getSrcs(MachineInstr *MI) const { } if (Reg == AMDGPU::ALU_LITERAL_X) { unsigned Imm = MI->getOperand( - getOperandIdx(MI->getOpcode(), R600Operands::IMM)).getImm(); + getOperandIdx(MI->getOpcode(), AMDGPU::OpName::literal)).getImm(); Result.push_back(std::pair(&MO, Imm)); continue; } @@ -329,7 +331,7 @@ R600InstrInfo::fitsReadPortLimitations(const std::vector &IG, for (unsigned i = 0, e = IG.size(); i < e; ++i) { IGSrcs.push_back(ExtractSrcs(IG[i], PV)); unsigned Op = getOperandIdx(IG[i]->getOpcode(), - R600Operands::BANK_SWIZZLE); + AMDGPU::OpName::bank_swizzle); ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle) IG[i]->getOperand(Op).getImm()); } @@ -812,13 +814,13 @@ MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB, unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, AMDGPU::AR_X, OffsetReg); - setImmOperand(MOVA, R600Operands::WRITE, 0); + setImmOperand(MOVA, AMDGPU::OpName::write, 0); MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, AddrReg, ValueReg) .addReg(AMDGPU::AR_X, RegState::Implicit | RegState::Kill); - setImmOperand(Mov, R600Operands::DST_REL, 1); + setImmOperand(Mov, AMDGPU::OpName::dst_rel, 1); return Mov; } @@ -830,13 +832,13 @@ MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB, MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, AMDGPU::AR_X, OffsetReg); - setImmOperand(MOVA, R600Operands::WRITE, 0); + setImmOperand(MOVA, AMDGPU::OpName::write, 0); MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, ValueReg, AddrReg) .addReg(AMDGPU::AR_X, RegState::Implicit | RegState::Kill); - setImmOperand(Mov, R600Operands::SRC0_REL, 1); + setImmOperand(Mov, AMDGPU::OpName::src0_rel, 1); return Mov; } @@ -892,7 +894,7 @@ MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MB #define OPERAND_CASE(Label) \ case Label: { \ - static const R600Operands::VecOps Ops[] = \ + static const unsigned Ops[] = \ { \ Label##_X, \ Label##_Y, \ @@ -902,26 +904,25 @@ MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MB return Ops[Slot]; \ } -static R600Operands::VecOps -getSlotedOps(R600Operands::Ops Op, unsigned Slot) { +static unsigned getSlotedOps(unsigned Op, unsigned Slot) { switch (Op) { - OPERAND_CASE(R600Operands::UPDATE_EXEC_MASK) - OPERAND_CASE(R600Operands::UPDATE_PREDICATE) - OPERAND_CASE(R600Operands::WRITE) - OPERAND_CASE(R600Operands::OMOD) - OPERAND_CASE(R600Operands::DST_REL) - OPERAND_CASE(R600Operands::CLAMP) - OPERAND_CASE(R600Operands::SRC0) - OPERAND_CASE(R600Operands::SRC0_NEG) - OPERAND_CASE(R600Operands::SRC0_REL) - OPERAND_CASE(R600Operands::SRC0_ABS) - OPERAND_CASE(R600Operands::SRC0_SEL) - OPERAND_CASE(R600Operands::SRC1) - OPERAND_CASE(R600Operands::SRC1_NEG) - OPERAND_CASE(R600Operands::SRC1_REL) - OPERAND_CASE(R600Operands::SRC1_ABS) - OPERAND_CASE(R600Operands::SRC1_SEL) - OPERAND_CASE(R600Operands::PRED_SEL) + OPERAND_CASE(AMDGPU::OpName::update_exec_mask) + OPERAND_CASE(AMDGPU::OpName::update_pred) + OPERAND_CASE(AMDGPU::OpName::write) + OPERAND_CASE(AMDGPU::OpName::omod) + OPERAND_CASE(AMDGPU::OpName::dst_rel) + OPERAND_CASE(AMDGPU::OpName::clamp) + OPERAND_CASE(AMDGPU::OpName::src0) + OPERAND_CASE(AMDGPU::OpName::src0_neg) + OPERAND_CASE(AMDGPU::OpName::src0_rel) + OPERAND_CASE(AMDGPU::OpName::src0_abs) + OPERAND_CASE(AMDGPU::OpName::src0_sel) + OPERAND_CASE(AMDGPU::OpName::src1) + OPERAND_CASE(AMDGPU::OpName::src1_neg) + OPERAND_CASE(AMDGPU::OpName::src1_rel) + OPERAND_CASE(AMDGPU::OpName::src1_abs) + OPERAND_CASE(AMDGPU::OpName::src1_sel) + OPERAND_CASE(AMDGPU::OpName::pred_sel) default: llvm_unreachable("Wrong Operand"); } @@ -929,12 +930,6 @@ getSlotedOps(R600Operands::Ops Op, unsigned Slot) { #undef OPERAND_CASE -static int -getVecOperandIdx(R600Operands::VecOps Op) { - return 1 + Op; -} - - MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction( MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg) const { @@ -947,31 +942,31 @@ MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction( Opcode = AMDGPU::DOT4_eg; MachineBasicBlock::iterator I = MI; MachineOperand &Src0 = MI->getOperand( - getVecOperandIdx(getSlotedOps(R600Operands::SRC0, Slot))); + getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot))); MachineOperand &Src1 = MI->getOperand( - getVecOperandIdx(getSlotedOps(R600Operands::SRC1, Slot))); + getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot))); MachineInstr *MIB = buildDefaultInstruction( MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg()); - static const R600Operands::Ops Operands[14] = { - R600Operands::UPDATE_EXEC_MASK, - R600Operands::UPDATE_PREDICATE, - R600Operands::WRITE, - R600Operands::OMOD, - R600Operands::DST_REL, - R600Operands::CLAMP, - R600Operands::SRC0_NEG, - R600Operands::SRC0_REL, - R600Operands::SRC0_ABS, - R600Operands::SRC0_SEL, - R600Operands::SRC1_NEG, - R600Operands::SRC1_REL, - R600Operands::SRC1_ABS, - R600Operands::SRC1_SEL, + static const unsigned Operands[14] = { + AMDGPU::OpName::update_exec_mask, + AMDGPU::OpName::update_pred, + AMDGPU::OpName::write, + AMDGPU::OpName::omod, + AMDGPU::OpName::dst_rel, + AMDGPU::OpName::clamp, + AMDGPU::OpName::src0_neg, + AMDGPU::OpName::src0_rel, + AMDGPU::OpName::src0_abs, + AMDGPU::OpName::src0_sel, + AMDGPU::OpName::src1_neg, + AMDGPU::OpName::src1_rel, + AMDGPU::OpName::src1_abs, + AMDGPU::OpName::src1_sel, }; for (unsigned i = 0; i < 14; i++) { MachineOperand &MO = MI->getOperand( - getVecOperandIdx(getSlotedOps(Operands[i], Slot))); + getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot))); assert (MO.isImm()); setImmOperand(MIB, Operands[i], MO.getImm()); } @@ -985,56 +980,19 @@ MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB, uint64_t Imm) const { MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg, AMDGPU::ALU_LITERAL_X); - setImmOperand(MovImm, R600Operands::IMM, Imm); + setImmOperand(MovImm, AMDGPU::OpName::literal, Imm); return MovImm; } -int R600InstrInfo::getOperandIdx(const MachineInstr &MI, - R600Operands::Ops Op) const { - return getOperandIdx(MI.getOpcode(), Op); -} - -int R600InstrInfo::getOperandIdx(const MachineInstr &MI, - R600Operands::VecOps Op) const { +int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const { return getOperandIdx(MI.getOpcode(), Op); } -int R600InstrInfo::getOperandIdx(unsigned Opcode, - R600Operands::Ops Op) const { - unsigned TargetFlags = get(Opcode).TSFlags; - unsigned OpTableIdx; - - if (!HAS_NATIVE_OPERANDS(TargetFlags)) { - switch (Op) { - case R600Operands::DST: return 0; - case R600Operands::SRC0: return 1; - case R600Operands::SRC1: return 2; - case R600Operands::SRC2: return 3; - default: - assert(!"Unknown operand type for instruction"); - return -1; - } - } - - if (TargetFlags & R600_InstFlag::OP1) { - OpTableIdx = 0; - } else if (TargetFlags & R600_InstFlag::OP2) { - OpTableIdx = 1; - } else { - assert((TargetFlags & R600_InstFlag::OP3) && "OP1, OP2, or OP3 not defined " - "for this instruction"); - OpTableIdx = 2; - } - - return R600Operands::ALUOpTable[OpTableIdx][Op]; -} - -int R600InstrInfo::getOperandIdx(unsigned Opcode, - R600Operands::VecOps Op) const { - return Op + 1; +int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const { + return AMDGPU::getNamedOperandIdx(Opcode, Op); } -void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op, +void R600InstrInfo::setImmOperand(MachineInstr *MI, unsigned Op, int64_t Imm) const { int Idx = getOperandIdx(*MI, Op); assert(Idx != -1 && "Operand not supported for this instruction."); @@ -1062,20 +1020,20 @@ MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx, bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3; switch (Flag) { case MO_FLAG_CLAMP: - FlagIndex = getOperandIdx(*MI, R600Operands::CLAMP); + FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::clamp); break; case MO_FLAG_MASK: - FlagIndex = getOperandIdx(*MI, R600Operands::WRITE); + FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::write); break; case MO_FLAG_NOT_LAST: case MO_FLAG_LAST: - FlagIndex = getOperandIdx(*MI, R600Operands::LAST); + FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::last); break; case MO_FLAG_NEG: switch (SrcIdx) { - case 0: FlagIndex = getOperandIdx(*MI, R600Operands::SRC0_NEG); break; - case 1: FlagIndex = getOperandIdx(*MI, R600Operands::SRC1_NEG); break; - case 2: FlagIndex = getOperandIdx(*MI, R600Operands::SRC2_NEG); break; + case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_neg); break; + case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_neg); break; + case 2: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src2_neg); break; } break; @@ -1084,8 +1042,8 @@ MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx, "instructions."); (void)IsOP3; switch (SrcIdx) { - case 0: FlagIndex = getOperandIdx(*MI, R600Operands::SRC0_ABS); break; - case 1: FlagIndex = getOperandIdx(*MI, R600Operands::SRC1_ABS); break; + case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_abs); break; + case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_abs); break; } break; diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h index 6a11c63..f06abf6 100644 --- a/lib/Target/R600/R600InstrInfo.h +++ b/lib/Target/R600/R600InstrInfo.h @@ -210,17 +210,15 @@ namespace llvm { /// \brief Get the index of Op in the MachineInstr. /// /// \returns -1 if the Instruction does not contain the specified \p Op. - int getOperandIdx(const MachineInstr &MI, R600Operands::Ops Op) const; - int getOperandIdx(const MachineInstr &MI, R600Operands::VecOps Op) const; + int getOperandIdx(const MachineInstr &MI, unsigned Op) const; /// \brief Get the index of \p Op for the given Opcode. /// /// \returns -1 if the Instruction does not contain the specified \p Op. - int getOperandIdx(unsigned Opcode, R600Operands::Ops Op) const; - int getOperandIdx(unsigned Opcode, R600Operands::VecOps Op) const; + int getOperandIdx(unsigned Opcode, unsigned Op) const; /// \brief Helper function for setting instruction flag values. - void setImmOperand(MachineInstr *MI, R600Operands::Ops Op, int64_t Imm) const; + void setImmOperand(MachineInstr *MI, unsigned Op, int64_t Imm) const; /// \returns true if this instruction has an operand for storing target flags. bool hasFlagOperand(const MachineInstr &MI) const; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 3c83905..d819d44 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -115,6 +115,7 @@ class R600_1OP inst, string opName, list pattern, let HasNativeOperands = 1; let Op1 = 1; let DisableEncoding = "$literal"; + let UseNamedOperandTable = 1; let Inst{31-0} = Word0; let Inst{63-32} = Word1; @@ -151,6 +152,7 @@ class R600_2OP inst, string opName, list pattern, let HasNativeOperands = 1; let Op2 = 1; let DisableEncoding = "$literal"; + let UseNamedOperandTable = 1; let Inst{31-0} = Word0; let Inst{63-32} = Word1; @@ -190,6 +192,7 @@ class R600_3OP inst, string opName, list pattern, let HasNativeOperands = 1; let DisableEncoding = "$literal"; let Op3 = 1; + let UseNamedOperandTable = 1; let Inst{31-0} = Word0; let Inst{63-32} = Word1; @@ -931,7 +934,11 @@ class R600_VEC2OP pattern> : InstR600 <(outs R600_Reg32:$dst), (ins LITERAL:$literal0, LITERAL:$literal1), "", pattern, - AnyALU> {} + AnyALU> { + + let UseNamedOperandTable = 1; + +} } def DOT_4 : R600_VEC2OP<[(set R600_Reg32:$dst, (DOT4 @@ -949,12 +956,13 @@ multiclass CUBE_Common inst> { def _pseudo : InstR600 < (outs R600_Reg128:$dst), - (ins R600_Reg128:$src), - "CUBE $dst $src", - [(set v4f32:$dst, (int_AMDGPU_cube v4f32:$src))], + (ins R600_Reg128:$src0), + "CUBE $dst $src0", + [(set v4f32:$dst, (int_AMDGPU_cube v4f32:$src0))], VecALU > { let isPseudo = 1; + let UseNamedOperandTable = 1; } def _real : R600_2OP ; diff --git a/lib/Target/R600/R600Packetizer.cpp b/lib/Target/R600/R600Packetizer.cpp index da614c7..6024fd5 100644 --- a/lib/Target/R600/R600Packetizer.cpp +++ b/lib/Target/R600/R600Packetizer.cpp @@ -79,7 +79,7 @@ private: continue; if (TII->isTransOnly(BI)) continue; - int OperandIdx = TII->getOperandIdx(BI->getOpcode(), R600Operands::WRITE); + int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write); if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0) continue; unsigned Dst = BI->getOperand(0).getReg(); @@ -112,10 +112,10 @@ private: void substitutePV(MachineInstr *MI, const DenseMap &PVs) const { - R600Operands::Ops Ops[] = { - R600Operands::SRC0, - R600Operands::SRC1, - R600Operands::SRC2 + unsigned Ops[] = { + AMDGPU::OpName::src0, + AMDGPU::OpName::src1, + AMDGPU::OpName::src2 }; for (unsigned i = 0; i < 3; i++) { int OperandIdx = TII->getOperandIdx(MI->getOpcode(), Ops[i]); @@ -164,8 +164,8 @@ public: if (getSlot(MII) <= getSlot(MIJ)) return false; // Does MII and MIJ share the same pred_sel ? - int OpI = TII->getOperandIdx(MII->getOpcode(), R600Operands::PRED_SEL), - OpJ = TII->getOperandIdx(MIJ->getOpcode(), R600Operands::PRED_SEL); + int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel), + OpJ = TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel); unsigned PredI = (OpI > -1)?MII->getOperand(OpI).getReg():0, PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0; if (PredI != PredJ) @@ -191,7 +191,7 @@ public: bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {return false;} void setIsLastBit(MachineInstr *MI, unsigned Bit) const { - unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), R600Operands::LAST); + unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::last); MI->getOperand(LastOp).setImm(Bit); } @@ -230,7 +230,7 @@ public: for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) { MachineInstr *MI = CurrentPacketMIs[i]; unsigned Op = TII->getOperandIdx(MI->getOpcode(), - R600Operands::BANK_SWIZZLE); + AMDGPU::OpName::bank_swizzle); MI->getOperand(Op).setImm(BS[i]); } } -- cgit v1.1 From 87070fe1073b1e95748d987af0810d02aac43603 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Wed, 26 Jun 2013 10:55:03 +0000 Subject: Optimized integer vector multiplication operation by replacing it with shift/xor/sub when it is possible. Fixed a bug in SDIV, where the const operand is not a splat constant vector. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184931 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 7db1e47..954790b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -11560,9 +11560,11 @@ SDValue X86TargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const { return SDValue(); APInt SplatValue, SplatUndef; - unsigned MinSplatBits; + unsigned SplatBitSize; bool HasAnyUndefs; - if (!C->isConstantSplat(SplatValue, SplatUndef, MinSplatBits, HasAnyUndefs)) + if (!C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs) || + EltTy.getSizeInBits() < SplatBitSize) return SDValue(); if ((SplatValue != 0) && -- cgit v1.1 From eb4a55c949cc8ee32fdff3bc460f0c2788e247fd Mon Sep 17 00:00:00 2001 From: Venkatraman Govindaraju Date: Wed, 26 Jun 2013 12:40:16 +0000 Subject: [Sparc]: Add memory operands for the frame references in the storeRegToStackSlot and loadRegFromStackSlot. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184935 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Sparc/SparcInstrInfo.cpp | 38 +++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp index 08a13b8..6c14bc9 100644 --- a/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/lib/Target/Sparc/SparcInstrInfo.cpp @@ -17,7 +17,9 @@ #include "SparcSubtarget.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" @@ -310,19 +312,27 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); + MachineFunction *MF = MBB.getParent(); + const MachineFrameInfo &MFI = *MF->getFrameInfo(); + MachineMemOperand *MMO = + MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), + MachineMemOperand::MOStore, + MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); + // On the order of operands here: think "[FrameIdx + 0] = SrcReg". if (RC == &SP::I64RegsRegClass) BuildMI(MBB, I, DL, get(SP::STXri)).addFrameIndex(FI).addImm(0) - .addReg(SrcReg, getKillRegState(isKill)); + .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); else if (RC == &SP::IntRegsRegClass) BuildMI(MBB, I, DL, get(SP::STri)).addFrameIndex(FI).addImm(0) - .addReg(SrcReg, getKillRegState(isKill)); + .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); else if (RC == &SP::FPRegsRegClass) BuildMI(MBB, I, DL, get(SP::STFri)).addFrameIndex(FI).addImm(0) - .addReg(SrcReg, getKillRegState(isKill)); + .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); else if (RC == &SP::DFPRegsRegClass) BuildMI(MBB, I, DL, get(SP::STDFri)).addFrameIndex(FI).addImm(0) - .addReg(SrcReg, getKillRegState(isKill)); + .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); else llvm_unreachable("Can't store this register to stack slot"); } @@ -335,14 +345,26 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); + MachineFunction *MF = MBB.getParent(); + const MachineFrameInfo &MFI = *MF->getFrameInfo(); + MachineMemOperand *MMO = + MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), + MachineMemOperand::MOLoad, + MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); + if (RC == &SP::I64RegsRegClass) - BuildMI(MBB, I, DL, get(SP::LDXri), DestReg).addFrameIndex(FI).addImm(0); + BuildMI(MBB, I, DL, get(SP::LDXri), DestReg).addFrameIndex(FI).addImm(0) + .addMemOperand(MMO); else if (RC == &SP::IntRegsRegClass) - BuildMI(MBB, I, DL, get(SP::LDri), DestReg).addFrameIndex(FI).addImm(0); + BuildMI(MBB, I, DL, get(SP::LDri), DestReg).addFrameIndex(FI).addImm(0) + .addMemOperand(MMO); else if (RC == &SP::FPRegsRegClass) - BuildMI(MBB, I, DL, get(SP::LDFri), DestReg).addFrameIndex(FI).addImm(0); + BuildMI(MBB, I, DL, get(SP::LDFri), DestReg).addFrameIndex(FI).addImm(0) + .addMemOperand(MMO); else if (RC == &SP::DFPRegsRegClass) - BuildMI(MBB, I, DL, get(SP::LDDFri), DestReg).addFrameIndex(FI).addImm(0); + BuildMI(MBB, I, DL, get(SP::LDDFri), DestReg).addFrameIndex(FI).addImm(0) + .addMemOperand(MMO); else llvm_unreachable("Can't load this register from stack slot"); } -- cgit v1.1 From 6e0857e0b6b241e8b698417659a5821f15290a63 Mon Sep 17 00:00:00 2001 From: Amaury de la Vieuville Date: Wed, 26 Jun 2013 13:39:07 +0000 Subject: ARM: operands should be explicit when disassembled git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184943 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 8734e44..1797c6c 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -1176,19 +1176,14 @@ void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, assert(((OffImm & 0x3) == 0) && "Not a valid immediate!"); - // Don't print +0. - if (OffImm != 0) - O << ", "; - if (OffImm != 0 && UseMarkup) - O << " 0) + else O << "#" << OffImm; - if (OffImm != 0 && UseMarkup) - O << ">"; + O << markup(">"); } void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI, -- cgit v1.1 From 0b8594268feb1c804370541c7853e658caee0ae5 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Wed, 26 Jun 2013 13:49:15 +0000 Subject: [PowerPC] Support symbolic u16imm operands Currently, all instructions taking s16imm operands support symbolic operands. However, for u16imm operands, we only support actual immediate integers. This causes the assembler to reject code like ori %r5, %r5, symbol@l This patch changes the u16imm operand definition to likewise accept symbolic operands. In fact, s16imm and u16imm can share the same encoding routine, now renamed to getImm16Encoding. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184944 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp | 5 ++++- lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp | 6 +++--- lib/Target/PowerPC/PPCCodeEmitter.cpp | 6 +++--- lib/Target/PowerPC/PPCInstr64Bit.td | 3 ++- lib/Target/PowerPC/PPCInstrInfo.td | 3 ++- 5 files changed, 14 insertions(+), 9 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index eee1f45..a676302 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -205,7 +205,10 @@ void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo, void PPCInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - O << (unsigned short)MI->getOperand(OpNo).getImm(); + if (MI->getOperand(OpNo).isImm()) + O << (unsigned short)MI->getOperand(OpNo).getImm(); + else + printOperand(MI, OpNo, O); } void PPCInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo, diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 1c6adac..0657475 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -52,7 +52,7 @@ public: SmallVectorImpl &Fixups) const; unsigned getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups) const; - unsigned getS16ImmEncoding(const MCInst &MI, unsigned OpNo, + unsigned getImm16Encoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups) const; unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups) const; @@ -162,12 +162,12 @@ getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo, return 0; } -unsigned PPCMCCodeEmitter::getS16ImmEncoding(const MCInst &MI, unsigned OpNo, +unsigned PPCMCCodeEmitter::getImm16Encoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups) const { const MCOperand &MO = MI.getOperand(OpNo); if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups); - // Add a fixup for the branch target. + // Add a fixup for the immediate field. Fixups.push_back(MCFixup::Create(2, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_half16)); return 0; diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp index f006b49..382d709 100644 --- a/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -67,7 +67,7 @@ namespace { unsigned OpNo) const; unsigned getAbsCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const; - unsigned getS16ImmEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getImm16Encoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const; @@ -209,8 +209,8 @@ unsigned PPCCodeEmitter::getAbsCondBrEncoding(const MachineInstr &MI, llvm_unreachable("Absolute branch relocations unsupported on the old JIT."); } -unsigned PPCCodeEmitter::getS16ImmEncoding(const MachineInstr &MI, - unsigned OpNo) const { +unsigned PPCCodeEmitter::getImm16Encoding(const MachineInstr &MI, + unsigned OpNo) const { const MachineOperand &MO = MI.getOperand(OpNo); if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO); diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index d612fd9..f63ca24 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -17,11 +17,12 @@ // def s16imm64 : Operand { let PrintMethod = "printS16ImmOperand"; - let EncoderMethod = "getS16ImmEncoding"; + let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCS16ImmAsmOperand; } def u16imm64 : Operand { let PrintMethod = "printU16ImmOperand"; + let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCU16ImmAsmOperand; } def tocentry : Operand { diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index a9cfd5e..a970696 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -434,7 +434,7 @@ def PPCS16ImmAsmOperand : AsmOperandClass { } def s16imm : Operand { let PrintMethod = "printS16ImmOperand"; - let EncoderMethod = "getS16ImmEncoding"; + let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCS16ImmAsmOperand; } def PPCU16ImmAsmOperand : AsmOperandClass { @@ -443,6 +443,7 @@ def PPCU16ImmAsmOperand : AsmOperandClass { } def u16imm : Operand { let PrintMethod = "printU16ImmOperand"; + let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCU16ImmAsmOperand; } def PPCDirectBrAsmOperand : AsmOperandClass { -- cgit v1.1 From 8950dd127ad4cccd9dadf616b5057cf130f24ade Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Wed, 26 Jun 2013 13:49:53 +0000 Subject: [PowerPC] Accept 17-bit signed immediates for addis The assembler currently strictly verifies that immediates for s16imm operands are in range (-32768 ... 32767). This matches the behaviour of the GNU assembler, with one exception: gas allows, as a special case, operands in an extended range (-65536 .. 65535) for the addis instruction only (and its extended mnemonic lis). The main reason for this seems to be to allow using unsigned 16-bit operands for lis, e.g. like lis %r1, 0xfedc. Since this has been supported by gas for a long time, and assembler source code seen "in the wild" actually exploits this feature, this patch adds equivalent support to LLVM for compatibility reasons. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184946 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 2 ++ lib/Target/PowerPC/PPCInstr64Bit.td | 12 ++++++++++-- lib/Target/PowerPC/PPCInstrInfo.td | 16 ++++++++++++++-- 3 files changed, 26 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 2310bb3..cbe1321 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -267,6 +267,8 @@ public: bool isS16ImmX4() const { return Kind == Expression || (Kind == Immediate && isInt<16>(getImm()) && (getImm() & 3) == 0); } + bool isS17Imm() const { return Kind == Expression || + (Kind == Immediate && isInt<17>(getImm())); } bool isDirectBr() const { return Kind == Expression || (Kind == Immediate && isInt<26>(getImm()) && (getImm() & 3) == 0); } diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index f63ca24..b0386c3 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -25,6 +25,14 @@ def u16imm64 : Operand { let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCU16ImmAsmOperand; } +def s17imm64 : Operand { + // This operand type is used for addis/lis to allow the assembler parser + // to accept immediates in the range -65536..65535 for compatibility with + // the GNU assembler. The operand is treated as 16-bit otherwise. + let PrintMethod = "printS16ImmOperand"; + let EncoderMethod = "getImm16Encoding"; + let ParserMatchClass = PPCS17ImmAsmOperand; +} def tocentry : Operand { let MIOperandInfo = (ops i64imm:$imm); } @@ -330,7 +338,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def LI8 : DForm_2_r0<14, (outs g8rc:$rD), (ins s16imm64:$imm), "li $rD, $imm", IntSimple, [(set i64:$rD, imm64SExt16:$imm)]>; -def LIS8 : DForm_2_r0<15, (outs g8rc:$rD), (ins s16imm64:$imm), +def LIS8 : DForm_2_r0<15, (outs g8rc:$rD), (ins s17imm64:$imm), "lis $rD, $imm", IntSimple, [(set i64:$rD, imm16ShiftedSExt:$imm)]>; } @@ -406,7 +414,7 @@ def ADDIC8 : DForm_2<12, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm), def ADDI8 : DForm_2<14, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s16imm64:$imm), "addi $rD, $rA, $imm", IntSimple, [(set i64:$rD, (add i64:$rA, imm64SExt16:$imm))]>; -def ADDIS8 : DForm_2<15, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s16imm64:$imm), +def ADDIS8 : DForm_2<15, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s17imm64:$imm), "addis $rD, $rA, $imm", IntSimple, [(set i64:$rD, (add i64:$rA, imm16ShiftedSExt:$imm))]>; diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index a970696..28396fd 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -446,6 +446,18 @@ def u16imm : Operand { let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCU16ImmAsmOperand; } +def PPCS17ImmAsmOperand : AsmOperandClass { + let Name = "S17Imm"; let PredicateMethod = "isS17Imm"; + let RenderMethod = "addImmOperands"; +} +def s17imm : Operand { + // This operand type is used for addis/lis to allow the assembler parser + // to accept immediates in the range -65536..65535 for compatibility with + // the GNU assembler. The operand is treated as 16-bit otherwise. + let PrintMethod = "printS16ImmOperand"; + let EncoderMethod = "getImm16Encoding"; + let ParserMatchClass = PPCS17ImmAsmOperand; +} def PPCDirectBrAsmOperand : AsmOperandClass { let Name = "DirectBr"; let PredicateMethod = "isDirectBr"; let RenderMethod = "addBranchTargetOperands"; @@ -1519,7 +1531,7 @@ def ADDICo : DForm_2<13, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm), "addic. $rD, $rA, $imm", IntGeneral, []>, isDOT, RecFormRel; } -def ADDIS : DForm_2<15, (outs gprc:$rD), (ins gprc_nor0:$rA, s16imm:$imm), +def ADDIS : DForm_2<15, (outs gprc:$rD), (ins gprc_nor0:$rA, s17imm:$imm), "addis $rD, $rA, $imm", IntSimple, [(set i32:$rD, (add i32:$rA, imm16ShiftedSExt:$imm))]>; let isCodeGenOnly = 1 in @@ -1539,7 +1551,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def LI : DForm_2_r0<14, (outs gprc:$rD), (ins s16imm:$imm), "li $rD, $imm", IntSimple, [(set i32:$rD, imm32SExt16:$imm)]>; - def LIS : DForm_2_r0<15, (outs gprc:$rD), (ins s16imm:$imm), + def LIS : DForm_2_r0<15, (outs gprc:$rD), (ins s17imm:$imm), "lis $rD, $imm", IntSimple, [(set i32:$rD, imm16ShiftedSExt:$imm)]>; } -- cgit v1.1 From fce567aec90610e81e0b23968d8935ecf5b04505 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Wed, 26 Jun 2013 16:39:06 +0000 Subject: Remove the 'generic' CPU from the ARM eabi attributes printer. Make v4 the default ARM architecture attribute, to match CodeGen. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184962 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMAsmPrinter.cpp | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index dd7e20f..f8d4da5 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -749,15 +749,6 @@ void ARMAsmPrinter::emitAttributes() { ARMBuildAttrs::Allowed); AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed); - } else if (CPUString == "generic") { - // For a generic CPU, we assume a standard v7a architecture in Subtarget. - AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7); - AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch_profile, - ARMBuildAttrs::ApplicationProfile); - AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use, - ARMBuildAttrs::Allowed); - AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, - ARMBuildAttrs::AllowThumb32); } else if (Subtarget->hasV7Ops()) { AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7); AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, @@ -772,6 +763,8 @@ void ARMAsmPrinter::emitAttributes() { AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5T); else if (Subtarget->hasV4TOps()) AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T); + else + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4); if (Subtarget->hasNEON() && emitFPU) { /* NEON is not exactly a VFP architecture, but GAS emit one of -- cgit v1.1 From c1a91dd97b000128189421eda6c5bb7905b1f467 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Wed, 26 Jun 2013 16:52:32 +0000 Subject: ARM: allow predicated barriers in Thumb mode The barrier instructions are only "always-execute" in ARM mode, they can quite happily sit inside an IT block in Thumb. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184964 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrThumb2.td | 22 ++++++++++------------ lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 12 ++++++------ 2 files changed, 16 insertions(+), 18 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 5448ee3..3b18df0 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3136,26 +3136,24 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd), // memory barriers protect the atomic sequences let hasSideEffects = 1 in { -def t2DMB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary, - "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>, - Requires<[IsThumb, HasDB]> { +def t2DMB : T2I<(outs), (ins memb_opt:$opt), NoItinerary, + "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>, + Requires<[HasDB]> { bits<4> opt; let Inst{31-4} = 0xf3bf8f5; let Inst{3-0} = opt; } } -def t2DSB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary, - "dsb", "\t$opt", []>, - Requires<[IsThumb, HasDB]> { +def t2DSB : T2I<(outs), (ins memb_opt:$opt), NoItinerary, + "dsb", "\t$opt", []>, Requires<[HasDB]> { bits<4> opt; let Inst{31-4} = 0xf3bf8f4; let Inst{3-0} = opt; } -def t2ISB : AInoP<(outs), (ins instsyncb_opt:$opt), ThumbFrm, NoItinerary, - "isb", "\t$opt", - []>, Requires<[IsThumb, HasDB]> { +def t2ISB : T2I<(outs), (ins instsyncb_opt:$opt), NoItinerary, + "isb", "\t$opt", []>, Requires<[HasDB]> { bits<4> opt; let Inst{31-4} = 0xf3bf8f6; let Inst{3-0} = opt; @@ -4141,9 +4139,9 @@ def : t2InstAlias<"tst${p} $Rn, $Rm", (t2TSTrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>; // Memory barriers -def : InstAlias<"dmb", (t2DMB 0xf)>, Requires<[IsThumb, HasDB]>; -def : InstAlias<"dsb", (t2DSB 0xf)>, Requires<[IsThumb, HasDB]>; -def : InstAlias<"isb", (t2ISB 0xf)>, Requires<[IsThumb, HasDB]>; +def : InstAlias<"dmb${p}", (t2DMB 0xf, pred:$p)>, Requires<[IsThumb2, HasDB]>; +def : InstAlias<"dsb${p}", (t2DSB 0xf, pred:$p)>, Requires<[IsThumb2, HasDB]>; +def : InstAlias<"isb${p}", (t2ISB 0xf, pred:$p)>, Requires<[IsThumb2, HasDB]>; // Alias for LDR, LDRB, LDRH, LDRSB, and LDRSH without the ".w" optional // width specifier. diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 647fdb3..f80fba6 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -4966,12 +4966,12 @@ getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, } else CanAcceptCarrySet = false; - if (Mnemonic == "cbnz" || Mnemonic == "setend" || Mnemonic == "dmb" || - Mnemonic == "cps" || Mnemonic == "mcr2" || Mnemonic == "it" || - Mnemonic == "mcrr2" || Mnemonic == "cbz" || Mnemonic == "cdp2" || - Mnemonic == "trap" || Mnemonic == "mrc2" || Mnemonic == "mrrc2" || - Mnemonic == "dsb" || Mnemonic == "isb" || Mnemonic == "setend" || - (Mnemonic == "clrex" && !isThumb()) || + if (Mnemonic == "cbnz" || Mnemonic == "setend" || Mnemonic == "cps" || + Mnemonic == "mcr2" || Mnemonic == "it" || Mnemonic == "mcrr2" || + Mnemonic == "cbz" || Mnemonic == "cdp2" || Mnemonic == "trap" || + Mnemonic == "mrc2" || Mnemonic == "mrrc2" || Mnemonic == "setend" || + ((Mnemonic == "clrex" || Mnemonic == "dmb" || Mnemonic == "dsb" || + Mnemonic == "isb") && !isThumb()) || (Mnemonic == "nop" && isThumbOne()) || ((Mnemonic == "pld" || Mnemonic == "pli" || Mnemonic == "pldw" || Mnemonic == "ldc2" || Mnemonic == "ldc2l" || -- cgit v1.1 From c19bd321362166805194cbaf170e06a4790d2da9 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Wed, 26 Jun 2013 16:52:40 +0000 Subject: ARM: fix more cases where predication may or may not be allowed Unfortunately this addresses two issues (by the time I'd disentangled the logic it wasn't worth putting it back to half-broken): + Coprocessor instructions should all be predicable in Thumb mode. + BKPT should never be predicable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184965 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrFormats.td | 5 +++-- lib/Target/ARM/ARMInstrInfo.td | 5 +++-- lib/Target/ARM/ARMInstrThumb2.td | 25 +++++++++++---------- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 36 +++++++++++++++---------------- 4 files changed, 35 insertions(+), 36 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index bd9a212..239632f 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1230,8 +1230,9 @@ class T2JTI; // Move to/from coprocessor instructions -class T2Cop opc, dag oops, dag iops, string asm, list pattern> - : T2XI , Requires<[IsThumb2]> { +class T2Cop opc, dag oops, dag iops, string opcstr, string asm, + list pattern> + : T2I , Requires<[IsThumb2]> { let Inst{31-28} = opc; } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 8003e51..7f32c1f 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -1730,12 +1730,13 @@ def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel", // The 16-bit operand $val can be used by a debugger to store more information // about the breakpoint. -def BKPT : AI<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary, - "bkpt", "\t$val", []>, Requires<[IsARM]> { +def BKPT : AInoP<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary, + "bkpt", "\t$val", []>, Requires<[IsARM]> { bits<16> val; let Inst{3-0} = val{3-0}; let Inst{19-8} = val{15-4}; let Inst{27-20} = 0b00010010; + let Inst{31-28} = 0xe; // AL let Inst{7-4} = 0b0111; } diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 3b18df0..fa87fb9 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3825,8 +3825,7 @@ def t2MSR_M : T2I<(outs), (ins msr_mask:$SYSm, rGPR:$Rn), class t2MovRCopro Op, string opc, bit direction, dag oops, dag iops, list pattern> - : T2Cop { let Inst{27-24} = 0b1110; let Inst{20} = direction; @@ -3851,7 +3850,7 @@ class t2MovRRCopro Op, string opc, bit direction, list pattern = []> : T2Cop { + opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", pattern> { let Inst{27-24} = 0b1100; let Inst{23-21} = 0b010; let Inst{20} = direction; @@ -3876,32 +3875,32 @@ def t2MCR : t2MovRCopro<0b1110, "mcr", 0, c_imm:$CRm, imm0_7:$opc2), [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, imm:$CRm, imm:$opc2)]>; -def : t2InstAlias<"mcr $cop, $opc1, $Rt, $CRn, $CRm", +def : t2InstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm", (t2MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, - c_imm:$CRm, 0)>; + c_imm:$CRm, 0, pred:$p)>; def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0, (outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, imm:$CRm, imm:$opc2)]>; -def : t2InstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm", +def : t2InstAlias<"mcr2${p} $cop, $opc1, $Rt, $CRn, $CRm", (t2MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, - c_imm:$CRm, 0)>; + c_imm:$CRm, 0, pred:$p)>; /* from coprocessor to ARM core register */ def t2MRC : t2MovRCopro<0b1110, "mrc", 1, (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), []>; -def : t2InstAlias<"mrc $cop, $opc1, $Rt, $CRn, $CRm", +def : t2InstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm", (t2MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, - c_imm:$CRm, 0)>; + c_imm:$CRm, 0, pred:$p)>; def t2MRC2 : t2MovRCopro<0b1111, "mrc2", 1, (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), []>; -def : t2InstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm", +def : t2InstAlias<"mrc2${p} $cop, $opc1, $Rt, $CRn, $CRm", (t2MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, - c_imm:$CRm, 0)>; + c_imm:$CRm, 0, pred:$p)>; def : T2v6Pat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), (t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; @@ -3928,7 +3927,7 @@ def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1>; def tCDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), - "cdp\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", + "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, imm:$CRm, imm:$opc2)]> { let Inst{27-24} = 0b1110; @@ -3951,7 +3950,7 @@ def tCDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), - "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", + "cdp2", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, imm:$CRm, imm:$opc2)]> { let Inst{27-24} = 0b1110; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index f80fba6..eda5550 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -4966,28 +4966,26 @@ getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, } else CanAcceptCarrySet = false; - if (Mnemonic == "cbnz" || Mnemonic == "setend" || Mnemonic == "cps" || - Mnemonic == "mcr2" || Mnemonic == "it" || Mnemonic == "mcrr2" || - Mnemonic == "cbz" || Mnemonic == "cdp2" || Mnemonic == "trap" || - Mnemonic == "mrc2" || Mnemonic == "mrrc2" || Mnemonic == "setend" || - ((Mnemonic == "clrex" || Mnemonic == "dmb" || Mnemonic == "dsb" || - Mnemonic == "isb") && !isThumb()) || - (Mnemonic == "nop" && isThumbOne()) || - ((Mnemonic == "pld" || Mnemonic == "pli" || Mnemonic == "pldw" || - Mnemonic == "ldc2" || Mnemonic == "ldc2l" || - Mnemonic == "stc2" || Mnemonic == "stc2l") && !isThumb()) || - ((Mnemonic.startswith("rfe") || Mnemonic.startswith("srs")) && - !isThumb()) || - Mnemonic.startswith("cps") || (Mnemonic == "movs" && isThumbOne())) { + if (Mnemonic == "bkpt" || Mnemonic == "cbnz" || Mnemonic == "setend" || + Mnemonic == "cps" || Mnemonic == "it" || Mnemonic == "cbz" || + Mnemonic == "trap" || Mnemonic == "setend" || + Mnemonic.startswith("cps")) { + // These mnemonics are never predicable CanAcceptPredicationCode = false; + } else if (!isThumb()) { + // Some instructions are only predicable in Thumb mode + CanAcceptPredicationCode + = Mnemonic != "cdp2" && Mnemonic != "clrex" && Mnemonic != "mcr2" && + Mnemonic != "mcrr2" && Mnemonic != "mrc2" && Mnemonic != "mrrc2" && + Mnemonic != "dmb" && Mnemonic != "dsb" && Mnemonic != "isb" && + Mnemonic != "pld" && Mnemonic != "pli" && Mnemonic != "pldw" && + Mnemonic != "ldc2" && Mnemonic != "ldc2l" && + Mnemonic != "stc2" && Mnemonic != "stc2l" && + !Mnemonic.startswith("rfe") && !Mnemonic.startswith("srs"); + } else if (isThumbOne()) { + CanAcceptPredicationCode = Mnemonic != "nop" && Mnemonic != "movs"; } else CanAcceptPredicationCode = true; - - if (isThumb()) { - if (Mnemonic == "bkpt" || Mnemonic == "mcr" || Mnemonic == "mcrr" || - Mnemonic == "mrc" || Mnemonic == "mrrc" || Mnemonic == "cdp") - CanAcceptPredicationCode = false; - } } bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, -- cgit v1.1 From 849eedce9921eb8f285cd0df0ad69ee5133459d1 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Wed, 26 Jun 2013 16:58:26 +0000 Subject: Add a subtarget feature 'v8' to the ARM backend. This allows for targeting the ARMv8 AArch32 variant. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184967 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARM.td | 6 ++++++ lib/Target/ARM/ARMAsmPrinter.cpp | 4 +++- lib/Target/ARM/ARMBuildAttrs.h | 3 ++- lib/Target/ARM/ARMInstrInfo.td | 2 ++ lib/Target/ARM/ARMSubtarget.cpp | 1 + lib/Target/ARM/ARMSubtarget.h | 5 ++++- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 3 +++ lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp | 5 ++++- 8 files changed, 25 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 1bc9d6b..134b83c 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -138,6 +138,9 @@ def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true", def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true", "Support ARM v7 instructions", [HasV6T2Ops, FeaturePerfMon]>; +def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true", + "Support ARM v8 instructions", + [HasV7Ops]>; //===----------------------------------------------------------------------===// // ARM Processors supported. @@ -291,6 +294,9 @@ def : ProcessorModel<"swift", SwiftModel, FeatureDB, FeatureDSPThumb2, FeatureHasRAS]>; +// V8 Processors +def : ProcNoItin<"cortex-a53", [HasV8Ops]>; + //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index f8d4da5..d5b2cf5 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -749,7 +749,9 @@ void ARMAsmPrinter::emitAttributes() { ARMBuildAttrs::Allowed); AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed); - } else if (Subtarget->hasV7Ops()) { + } else if (Subtarget->hasV8Ops()) + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v8); + else if (Subtarget->hasV7Ops()) { AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7); AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::AllowThumb32); diff --git a/lib/Target/ARM/ARMBuildAttrs.h b/lib/Target/ARM/ARMBuildAttrs.h index 11bd6a4..3dfa7e7 100644 --- a/lib/Target/ARM/ARMBuildAttrs.h +++ b/lib/Target/ARM/ARMBuildAttrs.h @@ -89,7 +89,8 @@ namespace ARMBuildAttrs { v7 = 10, // e.g. Cortex A8, Cortex M3 v6_M = 11, // e.g. Cortex M1 v6S_M = 12, // v6_M with the System extensions - v7E_M = 13 // v7_M with DSP extensions + v7E_M = 13, // v7_M with DSP extensions + v8 = 14 // v8, AArch32 }; enum CPUArchProfile { // (=7), uleb128 diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 7f32c1f..ae87442 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -194,6 +194,8 @@ def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">, def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">; def HasV7 : Predicate<"Subtarget->hasV7Ops()">, AssemblerPredicate<"HasV7Ops", "armv7">; +def HasV8 : Predicate<"Subtarget->hasV8Ops()">, + AssemblerPredicate<"HasV8Ops", "armv8">; def NoVFP : Predicate<"!Subtarget->hasVFP2()">; def HasVFP2 : Predicate<"Subtarget->hasVFP2()">, AssemblerPredicate<"FeatureVFP2", "VFP2">; diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 455fe1a..3a5d4bb 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -77,6 +77,7 @@ void ARMSubtarget::initializeEnvironment() { HasV6Ops = false; HasV6T2Ops = false; HasV7Ops = false; + HasV8Ops = false; HasVFPv2 = false; HasVFPv3 = false; HasVFPv4 = false; diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index bc5af96..ad9c0d0 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -37,7 +37,8 @@ protected: /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others. ARMProcFamilyEnum ARMProcFamily; - /// HasV4TOps, HasV5TOps, HasV5TEOps, HasV6Ops, HasV6T2Ops, HasV7Ops - + /// HasV4TOps, HasV5TOps, HasV5TEOps, + /// HasV6Ops, HasV6T2Ops, HasV7Ops, HasV8Ops - /// Specify whether target support specific ARM ISA variants. bool HasV4TOps; bool HasV5TOps; @@ -45,6 +46,7 @@ protected: bool HasV6Ops; bool HasV6T2Ops; bool HasV7Ops; + bool HasV8Ops; /// HasVFPv2, HasVFPv3, HasVFPv4, HasNEON - Specify what /// floating point ISAs are supported. @@ -231,6 +233,7 @@ public: bool hasV6Ops() const { return HasV6Ops; } bool hasV6T2Ops() const { return HasV6T2Ops; } bool hasV7Ops() const { return HasV7Ops; } + bool hasV8Ops() const { return HasV8Ops; } bool isCortexA5() const { return ARMProcFamily == CortexA5; } bool isCortexA8() const { return ARMProcFamily == CortexA8; } diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index eda5550..ba2bf8e 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -161,6 +161,9 @@ class ARMAsmParser : public MCTargetAsmParser { bool hasV7Ops() const { return STI.getFeatureBits() & ARM::HasV7Ops; } + bool hasV8Ops() const { + return STI.getFeatureBits() & ARM::HasV8Ops; + } bool hasARM() const { return !(STI.getFeatureBits() & ARM::FeatureNoARM); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 14fd03f..caa1949 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -59,7 +59,10 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { std::string ARMArchFeature; if (Idx) { unsigned SubVer = TT[Idx]; - if (SubVer >= '7' && SubVer <= '9') { + if (SubVer == '8') { + // FIXME: Parse v8 features + ARMArchFeature = "+v8"; + } else if (SubVer == '7') { if (Len >= Idx+2 && TT[Idx+1] == 'm') { isThumb = true; if (NoCPU) -- cgit v1.1 From 9a308df027b60057d0fe3ba7a3ee9648f6677879 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Wed, 26 Jun 2013 18:48:17 +0000 Subject: [mips] Improve code generation for constant multiplication using shifts, adds and subs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185011 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsSEISelLowering.cpp | 54 ++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index f640ecc..8b5a874 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -99,6 +99,7 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) setTargetDAGCombine(ISD::ADDE); setTargetDAGCombine(ISD::SUBE); + setTargetDAGCombine(ISD::MUL); computeRegisterProperties(); } @@ -320,6 +321,57 @@ static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue genConstMult(SDValue X, uint64_t C, SDLoc DL, EVT VT, + EVT ShiftTy, SelectionDAG &DAG) { + // Clear the upper (64 - VT.sizeInBits) bits. + C &= ((uint64_t)-1) >> (64 - VT.getSizeInBits()); + + // Return 0. + if (C == 0) + return DAG.getConstant(0, VT); + + // Return x. + if (C == 1) + return X; + + // If c is power of 2, return (shl x, log2(c)). + if (isPowerOf2_64(C)) + return DAG.getNode(ISD::SHL, DL, VT, X, + DAG.getConstant(Log2_64(C), ShiftTy)); + + unsigned Log2Ceil = Log2_64_Ceil(C); + uint64_t Floor = 1LL << Log2_64(C); + uint64_t Ceil = Log2Ceil == 64 ? 0LL : 1LL << Log2Ceil; + + // If |c - floor_c| <= |c - ceil_c|, + // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))), + // return (add constMult(x, floor_c), constMult(x, c - floor_c)). + if (C - Floor <= Ceil - C) { + SDValue Op0 = genConstMult(X, Floor, DL, VT, ShiftTy, DAG); + SDValue Op1 = genConstMult(X, C - Floor, DL, VT, ShiftTy, DAG); + return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1); + } + + // If |c - floor_c| > |c - ceil_c|, + // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)). + SDValue Op0 = genConstMult(X, Ceil, DL, VT, ShiftTy, DAG); + SDValue Op1 = genConstMult(X, Ceil - C, DL, VT, ShiftTy, DAG); + return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1); +} + +static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, + const TargetLowering::DAGCombinerInfo &DCI, + const MipsSETargetLowering *TL) { + EVT VT = N->getValueType(0); + + if (ConstantSDNode *C = dyn_cast(N->getOperand(1))) + if (!VT.isVector()) + return genConstMult(N->getOperand(0), C->getZExtValue(), SDLoc(N), + VT, TL->getScalarShiftAmountTy(VT), DAG); + + return SDValue(N, 0); +} + static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, SelectionDAG &DAG, const MipsSubtarget *Subtarget) { @@ -432,6 +484,8 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { return performADDECombine(N, DAG, DCI, Subtarget); case ISD::SUBE: return performSUBECombine(N, DAG, DCI, Subtarget); + case ISD::MUL: + return performMULCombine(N, DAG, DCI, this); case ISD::SHL: return performSHLCombine(N, DAG, DCI, Subtarget); case ISD::SRA: -- cgit v1.1 From 842cfc91f29f6446bb675891f7abc127f9fbe768 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Wed, 26 Jun 2013 19:08:49 +0000 Subject: [mips] Do not emit ".option pic0" if target is mips64. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185012 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsAsmPrinter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index bab70af..c037c05 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -562,7 +562,7 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) { if (OutStreamer.hasRawTextSupport()) { OutStreamer.EmitRawText(StringRef("\t.abicalls")); Reloc::Model RM = Subtarget->getRelocationModel(); - if (RM == Reloc::Static) + if (RM == Reloc::Static && !Subtarget->hasMips64()) OutStreamer.EmitRawText(StringRef("\t.option\tpic0")); } -- cgit v1.1 From 8b9962d514c1834c17254e53b169bf618079562c Mon Sep 17 00:00:00 2001 From: Stephen Lin Date: Wed, 26 Jun 2013 20:19:06 +0000 Subject: Minor formatting fix to ARMBaseRegisterInfo::getCalleeSavedRegs git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185016 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseRegisterInfo.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index f8ecc60..3ba3b62 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -58,13 +58,11 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false); } - if (ghcCall) { - return CSR_GHC_SaveList; - } - else { - return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) - ? CSR_iOS_SaveList : CSR_AAPCS_SaveList; - } + if (ghcCall) + return CSR_GHC_SaveList; + else + return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) + ? CSR_iOS_SaveList : CSR_AAPCS_SaveList; } const uint32_t* -- cgit v1.1 From 02e75021d80bb068d0178f1e4fdd0a4fb36b9811 Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Wed, 26 Jun 2013 21:26:10 +0000 Subject: Debug Info: clean up usage of Verify. No functionality change. It should suffice to check the type of a debug info metadata, instead of calling Verify. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185020 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index ff73931..8ceddf3 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -279,7 +279,7 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) { const LLVMContext &ctx = MF->getFunction()->getContext(); DIScope Scope(curLoc.getScope(ctx)); - if (!Scope.Verify()) + if (!Scope.isScope()) return; StringRef fileName(Scope.getFilename()); -- cgit v1.1 From 6b97ebe9a32342207b24a5f73ebbf3070ec8d189 Mon Sep 17 00:00:00 2001 From: Stephen Lin Date: Wed, 26 Jun 2013 21:42:14 +0000 Subject: ARM: Proactively ensure that the LowerCallResult hack for 'this'-returns is not used for incompatible calling conventions. (Currently, ARM 'this'-returns are handled in the standard calling convention case by treating R0 as preserved and doing some extra magic in LowerCallResult; this may not apply to calling conventions added in the future so this patch provides and documents an interface for indicating such) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185024 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseRegisterInfo.cpp | 3 +++ lib/Target/ARM/ARMBaseRegisterInfo.h | 11 ++++++++++- lib/Target/ARM/ARMISelLowering.cpp | 13 ++++++++++--- 3 files changed, 23 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 3ba3b62..0e1e50e 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -75,6 +75,9 @@ const uint32_t* ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const { return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask; + // This should return NULL in the case of any calling convention that does + // not use the same register for an i32 first argument and an i32 return + // value } const uint32_t* diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 03b3682..be3f136 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -94,9 +94,18 @@ public: /// Code Generation virtual methods... const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; const uint32_t *getCallPreservedMask(CallingConv::ID) const; - const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const; const uint32_t *getNoPreservedMask() const; + // getThisReturnPreservedMask - Returns a call preserved mask specific to the + // case that 'returned' is an i32 first argument if the calling convention + // is one that can (partially) model this attribute with a preserved mask + // (i.e. it is a calling convention that uses the same register for the first + // i32 argument and an i32 return value) + // + // Should return NULL in the case that the calling convention does not have + // this property + const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const; + BitVector getReservedRegs(const MachineFunction &MF) const; const TargetRegisterClass* diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 6b981d5..c8d73d7 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1711,10 +1711,17 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, const uint32_t *Mask; const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); const ARMBaseRegisterInfo *ARI = static_cast(TRI); - if (isThisReturn) - // For 'this' returns, use the R0-preserving mask + if (isThisReturn) { + // For 'this' returns, use the R0-preserving mask if applicable Mask = ARI->getThisReturnPreservedMask(CallConv); - else + if (!Mask) { + // Set isThisReturn to false if the calling convention is not one that + // allows 'returned' to be modeled in this way, so LowerCallResult does + // not try to pass 'this' straight through + isThisReturn = false; + Mask = ARI->getCallPreservedMask(CallConv); + } + } else Mask = ARI->getCallPreservedMask(CallConv); assert(Mask && "Missing call preserved mask for calling convention"); -- cgit v1.1 From 096c0a03313ea43a1e4035645b02bf99fd35801a Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Wed, 26 Jun 2013 22:23:32 +0000 Subject: [Mips Disassembler] Have the DecodeCCRRegisterClass function use the getReg function to lookup the proper tablegen'ed register enumeration. Previously, it was using the encoded value directly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185026 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Disassembler/MipsDisassembler.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index 4af6703..b6b265c 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -405,7 +405,10 @@ static DecodeStatus DecodeCCRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { - Inst.addOperand(MCOperand::CreateReg(RegNo)); + if (RegNo > 31) + return MCDisassembler::Fail; + unsigned Reg = getReg(Decoder, Mips::CCRRegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Reg)); return MCDisassembler::Success; } -- cgit v1.1 From 165a7a925d73286abfc826b3d6339843b02c09e0 Mon Sep 17 00:00:00 2001 From: Stephen Lin Date: Wed, 26 Jun 2013 22:27:50 +0000 Subject: Clarify and doxygen-ify comments git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185030 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseRegisterInfo.cpp | 20 ++++++++++++-------- lib/Target/ARM/ARMBaseRegisterInfo.h | 16 ++++++++-------- 2 files changed, 20 insertions(+), 16 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 0e1e50e..6a9bfc3 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -72,17 +72,21 @@ ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const { } const uint32_t* -ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const { - return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) - ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask; - // This should return NULL in the case of any calling convention that does - // not use the same register for an i32 first argument and an i32 return - // value +ARMBaseRegisterInfo::getNoPreservedMask() const { + return CSR_NoRegs_RegMask; } const uint32_t* -ARMBaseRegisterInfo::getNoPreservedMask() const { - return CSR_NoRegs_RegMask; +ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const { + // This should return a register mask that is the same as that returned by + // getCallPreservedMask but that additionally preserves the register used for + // the first i32 argument (which must also be the register used to return a + // single i32 return value) + // + // In case that the calling convention does not use the same register for + // both, the function should return NULL (does not currently apply) + return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) + ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask; } BitVector ARMBaseRegisterInfo:: diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index be3f136..cdaad05 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -96,14 +96,14 @@ public: const uint32_t *getCallPreservedMask(CallingConv::ID) const; const uint32_t *getNoPreservedMask() const; - // getThisReturnPreservedMask - Returns a call preserved mask specific to the - // case that 'returned' is an i32 first argument if the calling convention - // is one that can (partially) model this attribute with a preserved mask - // (i.e. it is a calling convention that uses the same register for the first - // i32 argument and an i32 return value) - // - // Should return NULL in the case that the calling convention does not have - // this property + /// getThisReturnPreservedMask - Returns a call preserved mask specific to the + /// case that 'returned' is on an i32 first argument if the calling convention + /// is one that can (partially) model this attribute with a preserved mask + /// (i.e. it is a calling convention that uses the same register for the first + /// i32 argument and an i32 return value) + /// + /// Should return NULL in the case that the calling convention does not have + /// this property const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const; BitVector getReservedRegs(const MachineFunction &MF) const; -- cgit v1.1 From 8479989ebe30f8fb9e14fbd5622fe0fd51988ff6 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Wed, 26 Jun 2013 22:44:57 +0000 Subject: Revert "Debug Info: clean up usage of Verify." as it's breaking bots. This reverts commit r185020 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185032 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 8ceddf3..ff73931 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -279,7 +279,7 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) { const LLVMContext &ctx = MF->getFunction()->getContext(); DIScope Scope(curLoc.getScope(ctx)); - if (!Scope.isScope()) + if (!Scope.Verify()) return; StringRef fileName(Scope.getFilename()); -- cgit v1.1 From 722e9e6d0a5b67d136be40bc015abc5b0b32f97b Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 27 Jun 2013 09:27:40 +0000 Subject: [SystemZ] Add conditional store patterns Add pseudo conditional store instructions, so that we use: branch foo: store foo: instead of: load branch foo: move foo: store z196 has real 32-bit and 64-bit conditional stores, but we don't use any z196 instructions yet. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185065 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelLowering.cpp | 90 ++++++++++++++++++++++++++++++ lib/Target/SystemZ/SystemZISelLowering.h | 3 + lib/Target/SystemZ/SystemZInstrFP.td | 7 ++- lib/Target/SystemZ/SystemZInstrFormats.td | 13 +++++ lib/Target/SystemZ/SystemZInstrInfo.td | 20 +++++++ lib/Target/SystemZ/SystemZOperators.td | 24 +++++++- lib/Target/SystemZ/SystemZPatterns.td | 8 +-- 7 files changed, 156 insertions(+), 9 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 0b0dbea..955b88e 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1696,6 +1696,59 @@ SystemZTargetLowering::emitSelect(MachineInstr *MI, return JoinMBB; } +// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI. +// StoreOpcode is the store to use and Invert says whether the store should +// happen when the condition is false rather than true. +MachineBasicBlock * +SystemZTargetLowering::emitCondStore(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned StoreOpcode, bool Invert) const { + const SystemZInstrInfo *TII = TM.getInstrInfo(); + + MachineOperand Base = MI->getOperand(0); + int64_t Disp = MI->getOperand(1).getImm(); + unsigned IndexReg = MI->getOperand(2).getReg(); + unsigned SrcReg = MI->getOperand(3).getReg(); + unsigned CCMask = MI->getOperand(4).getImm(); + DebugLoc DL = MI->getDebugLoc(); + + StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp); + + // Get the condition needed to branch around the store. + if (!Invert) + CCMask = CCMask ^ SystemZ::CCMASK_ANY; + + MachineBasicBlock *StartMBB = MBB; + MachineBasicBlock *JoinMBB = splitBlockAfter(MI, MBB); + MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); + + // StartMBB: + // BRC CCMask, JoinMBB + // # fallthrough to FalseMBB + // + // The original DAG glues comparisons to their uses, both to ensure + // that no CC-clobbering instructions are inserted between them, and + // to ensure that comparison results are not reused. This means that + // this CondStore is the sole user of any preceding comparison instruction + // and that we can try to use a fused compare and branch instead. + MBB = StartMBB; + if (!convertPrevCompareToBranch(MBB, MI, CCMask, JoinMBB)) + BuildMI(MBB, DL, TII->get(SystemZ::BRC)).addImm(CCMask).addMBB(JoinMBB); + MBB->addSuccessor(JoinMBB); + MBB->addSuccessor(FalseMBB); + + // FalseMBB: + // store %SrcReg, %Disp(%Index,%Base) + // # fallthrough to JoinMBB + MBB = FalseMBB; + BuildMI(MBB, DL, TII->get(StoreOpcode)) + .addReg(SrcReg).addOperand(Base).addImm(Disp).addReg(IndexReg); + MBB->addSuccessor(JoinMBB); + + MI->eraseFromParent(); + return JoinMBB; +} + // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_* // or ATOMIC_SWAP{,W} instruction MI. BinOpcode is the instruction that // performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}. @@ -2100,6 +2153,43 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { case SystemZ::SelectF128: return emitSelect(MI, MBB); + case SystemZ::CondStore8_32: + return emitCondStore(MI, MBB, SystemZ::STC32, false); + case SystemZ::CondStore8_32Inv: + return emitCondStore(MI, MBB, SystemZ::STC32, true); + case SystemZ::CondStore16_32: + return emitCondStore(MI, MBB, SystemZ::STH32, false); + case SystemZ::CondStore16_32Inv: + return emitCondStore(MI, MBB, SystemZ::STH32, true); + case SystemZ::CondStore32_32: + return emitCondStore(MI, MBB, SystemZ::ST32, false); + case SystemZ::CondStore32_32Inv: + return emitCondStore(MI, MBB, SystemZ::ST32, true); + case SystemZ::CondStore8: + return emitCondStore(MI, MBB, SystemZ::STC, false); + case SystemZ::CondStore8Inv: + return emitCondStore(MI, MBB, SystemZ::STC, true); + case SystemZ::CondStore16: + return emitCondStore(MI, MBB, SystemZ::STH, false); + case SystemZ::CondStore16Inv: + return emitCondStore(MI, MBB, SystemZ::STH, true); + case SystemZ::CondStore32: + return emitCondStore(MI, MBB, SystemZ::ST, false); + case SystemZ::CondStore32Inv: + return emitCondStore(MI, MBB, SystemZ::ST, true); + case SystemZ::CondStore64: + return emitCondStore(MI, MBB, SystemZ::STG, false); + case SystemZ::CondStore64Inv: + return emitCondStore(MI, MBB, SystemZ::STG, true); + case SystemZ::CondStoreF32: + return emitCondStore(MI, MBB, SystemZ::STE, false); + case SystemZ::CondStoreF32Inv: + return emitCondStore(MI, MBB, SystemZ::STE, true); + case SystemZ::CondStoreF64: + return emitCondStore(MI, MBB, SystemZ::STD, false); + case SystemZ::CondStoreF64Inv: + return emitCondStore(MI, MBB, SystemZ::STD, true); + case SystemZ::AEXT128_64: return emitExt128(MI, MBB, false, SystemZ::subreg_low); case SystemZ::ZEXT128_32: diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index f48cc4f..f6c49f0 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -203,6 +203,9 @@ private: // Implement EmitInstrWithCustomInserter for individual operation types. MachineBasicBlock *emitSelect(MachineInstr *MI, MachineBasicBlock *BB) const; + MachineBasicBlock *emitCondStore(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned StoreOpcode, bool Invert) const; MachineBasicBlock *emitExt128(MachineInstr *MI, MachineBasicBlock *MBB, bool ClearEven, unsigned SubReg) const; diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td index 86ef14c..7499d2f 100644 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Control-flow instructions +// Select instructions //===----------------------------------------------------------------------===// // C's ?: operator for floating-point operands. @@ -16,6 +16,11 @@ def SelectF32 : SelectWrapper; def SelectF64 : SelectWrapper; def SelectF128 : SelectWrapper; +defm CondStoreF32 : CondStores; +defm CondStoreF64 : CondStores; + //===----------------------------------------------------------------------===// // Move instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index ad050fd..ac0300c 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -956,6 +956,19 @@ class SelectWrapper let Uses = [CC]; } +// Stores $new to $addr if $cc is true ("" case) or false (Inv case). +multiclass CondStores { + let Defs = [CC], Uses = [CC], usesCustomInserter = 1 in { + def "" : Pseudo<(outs), (ins mode:$addr, cls:$new, i8imm:$cc), + [(store (z_select_ccmask cls:$new, (load mode:$addr), + imm:$cc), mode:$addr)]>; + def Inv : Pseudo<(outs), (ins mode:$addr, cls:$new, i8imm:$cc), + [(store (z_select_ccmask (load mode:$addr), cls:$new, + imm:$cc), mode:$addr)]>; + } +} + // OPERATOR is ATOMIC_SWAP or an ATOMIC_LOAD_* operation. PAT and OPERAND // describe the second (non-memory) operand. class AtomicLoadBinary; defm AsmJHE : IntCondExtendedMnemonic<10, "he", "nl">; defm AsmJLE : IntCondExtendedMnemonic<12, "le", "nh">; +//===----------------------------------------------------------------------===// +// Select instructions +//===----------------------------------------------------------------------===// + def Select32 : SelectWrapper; def Select64 : SelectWrapper; +defm CondStore8_32 : CondStores; +defm CondStore16_32 : CondStores; +defm CondStore32_32 : CondStores; + +defm CondStore8 : CondStores; +defm CondStore16 : CondStores; +defm CondStore32 : CondStores; +defm CondStore64 : CondStores; + //===----------------------------------------------------------------------===// // Call instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index ab01b25..021824e 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -120,6 +120,20 @@ def zext32 : PatFrag<(ops node:$src), (zext (i32 node:$src))>; def loadf32 : PatFrag<(ops node:$src), (f32 (load node:$src))>; def loadf64 : PatFrag<(ops node:$src), (f64 (load node:$src))>; +// Extending loads in which the extension type doesn't matter. +def anyextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{ + return cast(N)->getExtensionType() != ISD::NON_EXTLOAD; +}]>; +def anyextloadi8 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; +}]>; +def anyextloadi16 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; +def anyextloadi32 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; + // Aligned loads. class AlignedLoad : PatFrag<(ops node:$addr), (load node:$addr), [{ @@ -149,7 +163,10 @@ class NonvolatileLoad LoadSDNode *Load = cast(N); return !Load->isVolatile(); }]>; -def nonvolatile_load : NonvolatileLoad; +def nonvolatile_load : NonvolatileLoad; +def nonvolatile_anyextloadi8 : NonvolatileLoad; +def nonvolatile_anyextloadi16 : NonvolatileLoad; +def nonvolatile_anyextloadi32 : NonvolatileLoad; // Non-volatile stores. class NonvolatileStore @@ -157,7 +174,10 @@ class NonvolatileStore StoreSDNode *Store = cast(N); return !Store->isVolatile(); }]>; -def nonvolatile_store : NonvolatileStore; +def nonvolatile_store : NonvolatileStore; +def nonvolatile_truncstorei8 : NonvolatileStore; +def nonvolatile_truncstorei16 : NonvolatileStore; +def nonvolatile_truncstorei32 : NonvolatileStore; // Insertions. def inserti8 : PatFrag<(ops node:$src1, node:$src2), diff --git a/lib/Target/SystemZ/SystemZPatterns.td b/lib/Target/SystemZ/SystemZPatterns.td index 3689f74..fb6c221 100644 --- a/lib/Target/SystemZ/SystemZPatterns.td +++ b/lib/Target/SystemZ/SystemZPatterns.td @@ -50,12 +50,8 @@ class RMWI { - def : RMWI; - def : RMWI; - def : RMWI; - def : RMWI; - def : RMWI; - def : RMWI; + def : RMWI; + def : RMWI; } // Record that INSN performs insertion TYPE into a register of class CLS. -- cgit v1.1 From a6e12b575676875d9ff8f3c01e0cd44ab1210d3a Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 27 Jun 2013 09:38:48 +0000 Subject: [SystemZ] Allow immediate moves to be rematerialized git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185068 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZInstrInfo.td | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 44ff1d0..7debcdd 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -222,7 +222,8 @@ let neverHasSideEffects = 1 in { } // Immediate moves. -let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { +let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1, + isReMaterializable = 1 in { // 16-bit sign-extended immediates. def LHI : UnaryRI<"lhi", 0xA78, bitconvert, GR32, imm32sx16>; def LGHI : UnaryRI<"lghi", 0xA79, bitconvert, GR64, imm64sx16>; @@ -476,7 +477,8 @@ def IIHH : BinaryRI<"iihh", 0xA50, inserthh, GR64, imm64hh16>; // full-width move. (We use IILF rather than something like LLILF // for 32-bit moves because IILF leaves the upper 32 bits of the // GR64 unchanged.) -let isCodeGenOnly = 1 in { +let isCodeGenOnly = 1, isAsCheapAsAMove = 1, isMoveImm = 1, + isReMaterializable = 1 in { def IILF32 : UnaryRIL<"iilf", 0xC09, bitconvert, GR32, uimm32>; } def IILF : BinaryRIL<"iilf", 0xC09, insertlf, GR64, imm64lf32>; -- cgit v1.1 From 7d884e4636fcd9572c5a00aec9bfcb97a0244172 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 27 Jun 2013 09:42:10 +0000 Subject: [SystemZ] Allow LA and LARL to be rematerialized git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185069 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZInstrInfo.td | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 7debcdd..ff0d566 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -417,7 +417,8 @@ def STRVG : StoreRXY<"strvg", 0xE32F, storeu, GR64>; //===----------------------------------------------------------------------===// // Load BDX-style addresses. -let neverHasSideEffects = 1, Function = "la" in { +let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isReMaterializable = 1, + Function = "la" in { let PairType = "12" in def LA : InstRX<0x41, (outs GR64:$R1), (ins laaddr12pair:$XBD2), "la\t$R1, $XBD2", @@ -430,7 +431,8 @@ let neverHasSideEffects = 1, Function = "la" in { // Load a PC-relative address. There's no version of this instruction // with a 16-bit offset, so there's no relaxation. -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1, + isReMaterializable = 1 in { def LARL : InstRIL<0xC00, (outs GR64:$R1), (ins pcrel32:$I2), "larl\t$R1, $I2", [(set GR64:$R1, pcrel32:$I2)]>; -- cgit v1.1 From 872bb3681009ab31965228305bc0452e84569355 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 27 Jun 2013 11:07:42 +0000 Subject: Don't cast away constness. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185071 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelDAGToDAG.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 39b205e..b079281 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -500,7 +500,8 @@ void X86DAGToDAGISel::PreprocessISelDAG() { // If the source and destination are SSE registers, then this is a legal // conversion that should not be lowered. - X86TargetLowering *X86Lowering = (X86TargetLowering*)getTargetLowering(); + const X86TargetLowering *X86Lowering = + static_cast(getTargetLowering()); bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT); bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT); if (SrcIsSSE && DstIsSSE) -- cgit v1.1 From 31d2f08f8893f38d2d7293195f3707edfefbeeb6 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Thu, 27 Jun 2013 11:49:26 +0000 Subject: Add a Subtarget feature 'v8fp' to the ARM backend. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185073 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARM.td | 3 +++ lib/Target/ARM/ARMAsmPrinter.cpp | 18 ++++++++++++++---- lib/Target/ARM/ARMBuildAttrs.h | 3 +++ lib/Target/ARM/ARMSubtarget.cpp | 1 + lib/Target/ARM/ARMSubtarget.h | 4 +++- 5 files changed, 24 insertions(+), 5 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 134b83c..46928dc 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -45,6 +45,9 @@ def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", "Enable VFP4 instructions", [FeatureVFP3, FeatureFP16]>; +def FeatureV8FP : SubtargetFeature<"v8fp", "HasV8FP", + "true", "Enable ARMv8 FP", + [FeatureVFP4]>; def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true", "Restrict VFP3 to 16 double registers">; def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true", diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index d5b2cf5..18c97f4 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -781,8 +781,14 @@ void ARMAsmPrinter::emitAttributes() { emitFPU = false; } - /* VFPv4 + .fpu */ - if (Subtarget->hasVFP4()) { + /* V8FP + .fpu */ + if (Subtarget->hasV8FP()) { + AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch, + ARMBuildAttrs::AllowV8FPA); + if (emitFPU) + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "v8fp"); + /* VFPv4 + .fpu */ + } else if (Subtarget->hasVFP4()) { AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch, ARMBuildAttrs::AllowFPv4A); if (emitFPU) @@ -806,8 +812,12 @@ void ARMAsmPrinter::emitAttributes() { /* TODO: ARMBuildAttrs::Allowed is not completely accurate, * since NEON can have 1 (allowed) or 2 (MAC operations) */ if (Subtarget->hasNEON()) { - AttrEmitter->EmitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, - ARMBuildAttrs::Allowed); + if (Subtarget->hasV8Ops()) + AttrEmitter->EmitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, + ARMBuildAttrs::AllowedNeonV8); + else + AttrEmitter->EmitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, + ARMBuildAttrs::Allowed); } // Signal various FP modes. diff --git a/lib/Target/ARM/ARMBuildAttrs.h b/lib/Target/ARM/ARMBuildAttrs.h index 3dfa7e7..f614dca 100644 --- a/lib/Target/ARM/ARMBuildAttrs.h +++ b/lib/Target/ARM/ARMBuildAttrs.h @@ -106,6 +106,7 @@ namespace ARMBuildAttrs { //ARMISAUse (=8), uleb128 and THUMBISAUse (=9), uleb128 Not_Allowed = 0, Allowed = 1, + AllowedNeonV8 = 3, // FP_arch (=10), uleb128 (formerly Tag_VFP_arch = 10) AllowFPv2 = 2, // v2 FP ISA permitted (implies use of the v1 FP ISA) @@ -113,6 +114,8 @@ namespace ARMBuildAttrs { AllowFPv3B = 4, // v3 FP ISA permitted, but only D0-D15, S0-S31 AllowFPv4A = 5, // v4 FP ISA permitted (implies use of v3 FP ISA) AllowFPv4B = 6, // v4 FP ISA was permitted, but only D0-D15, S0-S31 + AllowV8FPA = 7, // Use of the ARM v8-A FP ISA was permitted + AllowV8FPB = 8, // Use of the ARM v8-A FP ISA was permitted, but only D0-D15, S0-S31 // Tag_WMMX_arch, (=11), uleb128 AllowThumb32 = 2, // 32-bit Thumb (implies 16-bit instructions) diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 3a5d4bb..c592421 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -81,6 +81,7 @@ void ARMSubtarget::initializeEnvironment() { HasVFPv2 = false; HasVFPv3 = false; HasVFPv4 = false; + HasV8FP = false; HasNEON = false; UseNEONForSinglePrecisionFP = false; UseMulOps = UseFusedMulOps; diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index ad9c0d0..def6fbb 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -48,11 +48,12 @@ protected: bool HasV7Ops; bool HasV8Ops; - /// HasVFPv2, HasVFPv3, HasVFPv4, HasNEON - Specify what + /// HasVFPv2, HasVFPv3, HasVFPv4, HasV8FP, HasNEON - Specify what /// floating point ISAs are supported. bool HasVFPv2; bool HasVFPv3; bool HasVFPv4; + bool HasV8FP; bool HasNEON; /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been @@ -249,6 +250,7 @@ public: bool hasVFP2() const { return HasVFPv2; } bool hasVFP3() const { return HasVFPv3; } bool hasVFP4() const { return HasVFPv4; } + bool hasV8FP() const { return HasV8FP; } bool hasNEON() const { return HasNEON; } bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; } -- cgit v1.1 From 16d36a5cd1a581dfac79a4616b6b9602a43b6cd1 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Thu, 27 Jun 2013 17:52:04 +0000 Subject: CostModel: improve the cost model for load/store of non power-of-two types such as <3 x float>, which are popular in graphics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185085 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86TargetTransformInfo.cpp | 43 +++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 3bcdfc1..ac63db5 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -539,8 +539,51 @@ unsigned X86TTI::getVectorInstrCost(unsigned Opcode, Type *Val, return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); } +unsigned X86TTI::getScalarizationOverhead(Type *Ty, bool Insert, + bool Extract) const { + assert (Ty->isVectorTy() && "Can only scalarize vectors"); + unsigned Cost = 0; + + for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { + if (Insert) + Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i); + if (Extract) + Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i); + } + + return Cost; +} + unsigned X86TTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const { + // Handle non power of two vectors such as <3 x float> + if (VectorType *VTy = dyn_cast(Src)) { + unsigned NumElem = VTy->getVectorNumElements(); + + // Handle a few common cases: + // <3 x float> + if (NumElem == 3 && VTy->getScalarSizeInBits() == 32) + // Cost = 64 bit store + extract + 32 bit store. + return 3; + + // <3 x double> + if (NumElem == 3 && VTy->getScalarSizeInBits() == 64) + // Cost = 128 bit store + unpack + 64 bit store. + return 3; + + // Assume that all other non power-of-two numbers are scalarized. + if (!isPowerOf2_32(NumElem)) { + unsigned Cost = TargetTransformInfo::getMemoryOpCost(Opcode, + VTy->getScalarType(), + Alignment, + AddressSpace); + unsigned SplitCost = getScalarizationOverhead(Src, + Opcode == Instruction::Load, + Opcode==Instruction::Store); + return NumElem * Cost + SplitCost; + } + } + // Legalize the type. std::pair LT = TLI->getTypeLegalizationCost(Src); assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && -- cgit v1.1 From e6dc376eece3e48d7316b788846dac90181d2ffe Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Thu, 27 Jun 2013 17:54:10 +0000 Subject: Get rid of the unused class member. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185086 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86TargetTransformInfo.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index ac63db5..68e1a67 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -33,7 +33,6 @@ void initializeX86TTIPass(PassRegistry &); namespace { class X86TTI : public ImmutablePass, public TargetTransformInfo { - const X86TargetMachine *TM; const X86Subtarget *ST; const X86TargetLowering *TLI; @@ -42,12 +41,12 @@ class X86TTI : public ImmutablePass, public TargetTransformInfo { unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; public: - X86TTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) { + X86TTI() : ImmutablePass(ID), ST(0), TLI(0) { llvm_unreachable("This pass cannot be directly constructed"); } X86TTI(const X86TargetMachine *TM) - : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), + : ImmutablePass(ID), ST(TM->getSubtargetImpl()), TLI(TM->getTargetLowering()) { initializeX86TTIPass(*PassRegistry::getPassRegistry()); } -- cgit v1.1 From c084c0945b0530180e8969f5e2017d02d06db130 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Fri, 28 Jun 2013 04:24:32 +0000 Subject: Integrate Assembler: Support X86_64_DTPOFF64 relocations git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185131 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp index de80dd8..b400b87 100644 --- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp @@ -101,7 +101,18 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target, } else { switch ((unsigned)Fixup.getKind()) { default: llvm_unreachable("invalid fixup kind!"); - case FK_Data_8: Type = ELF::R_X86_64_64; break; + case FK_Data_8: + switch (Modifier) { + default: + llvm_unreachable("Unimplemented"); + case MCSymbolRefExpr::VK_None: + Type = ELF::R_X86_64_64; + break; + case MCSymbolRefExpr::VK_DTPOFF: + Type = ELF::R_X86_64_DTPOFF64; + break; + } + break; case X86::reloc_signed_4byte: switch (Modifier) { default: -- cgit v1.1 From cbafae6d33031a72ba8219c28cb0e852511f79a3 Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Fri, 28 Jun 2013 05:43:10 +0000 Subject: Debug Info: clean up usage of Verify. No functionality change. It should suffice to check the type of a debug info metadata, instead of calling Verify. For cases where we know the type of a DI metadata, use assert. Also update testing cases to make them conform to the format of DI classes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185135 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index ff73931..84b0884 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -279,8 +279,10 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) { const LLVMContext &ctx = MF->getFunction()->getContext(); DIScope Scope(curLoc.getScope(ctx)); - if (!Scope.Verify()) - return; + assert((!Scope || Scope.isScope()) && + "Scope of a DebugLoc should be null or a DIScope."); + if (!Scope) + return; StringRef fileName(Scope.getFilename()); StringRef dirName(Scope.getDirectory()); -- cgit v1.1 From a744d41a3f8af25938e12617abe2a8d32f6eabf6 Mon Sep 17 00:00:00 2001 From: Tilmann Scheller Date: Fri, 28 Jun 2013 15:09:46 +0000 Subject: ARM: Fix pseudo-instructions for SRS (Store Return State). The mapping between SRS pseudo-instructions and SRS native instructions was incorrect, the correct mapping is: srsfa -> srsib srsea -> srsia srsfd -> srsdb srsed -> srsda This fixes . git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185155 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrInfo.td | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index ae87442..2492c4e 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -5185,10 +5185,10 @@ def : MnemonicAlias<"rfeed", "rfeib">; def : MnemonicAlias<"rfe", "rfeia">; // SRS aliases -def : MnemonicAlias<"srsfa", "srsda">; -def : MnemonicAlias<"srsea", "srsdb">; -def : MnemonicAlias<"srsfd", "srsia">; -def : MnemonicAlias<"srsed", "srsib">; +def : MnemonicAlias<"srsfa", "srsib">; +def : MnemonicAlias<"srsea", "srsia">; +def : MnemonicAlias<"srsfd", "srsdb">; +def : MnemonicAlias<"srsed", "srsda">; def : MnemonicAlias<"srs", "srsia">; // QSAX == QSUBADDX -- cgit v1.1 From bcd8e7ad4d1dd486675e774778b3409464380f62 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Fri, 28 Jun 2013 15:29:25 +0000 Subject: ARM: ensure fixed-point conversions have sane types We were generating intrinsics for NEON fixed-point conversions that didn't exist (e.g. float -> i16). There are two cases to consider: + iN is smaller than float. In this case we can do the conversion but need an extend or truncate as well. + iN is larger than float. In this case using the NEON conversion would be incorrect so we don't perform any combining. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185158 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 41 +++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index c8d73d7..ff8571b 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -9141,12 +9141,27 @@ static SDValue PerformVCVTCombine(SDNode *N, !isConstVecPow2(ConstVec, isSigned, C)) return SDValue(); + MVT FloatTy = Op.getSimpleValueType().getVectorElementType(); + MVT IntTy = N->getSimpleValueType(0).getVectorElementType(); + if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) { + // These instructions only exist converting from f32 to i32. We can handle + // smaller integers by generating an extra truncate, but larger ones would + // be lossy. + return SDValue(); + } + unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs : Intrinsic::arm_neon_vcvtfp2fxu; - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), - N->getValueType(0), - DAG.getConstant(IntrinsicOpcode, MVT::i32), N0, - DAG.getConstant(Log2_64(C), MVT::i32)); + unsigned NumLanes = Op.getValueType().getVectorNumElements(); + SDValue FixConv = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), + NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, + DAG.getConstant(IntrinsicOpcode, MVT::i32), N0, + DAG.getConstant(Log2_64(C), MVT::i32)); + + if (IntTy.getSizeInBits() < FloatTy.getSizeInBits()) + FixConv = DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), FixConv); + + return FixConv; } /// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) @@ -9177,12 +9192,28 @@ static SDValue PerformVDIVCombine(SDNode *N, !isConstVecPow2(ConstVec, isSigned, C)) return SDValue(); + MVT FloatTy = N->getSimpleValueType(0).getVectorElementType(); + MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType(); + if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) { + // These instructions only exist converting from i32 to f32. We can handle + // smaller integers by generating an extra extend, but larger ones would + // be lossy. + return SDValue(); + } + + SDValue ConvInput = Op.getOperand(0); + unsigned NumLanes = Op.getValueType().getVectorNumElements(); + if (IntTy.getSizeInBits() < FloatTy.getSizeInBits()) + ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, + SDLoc(N), NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, + ConvInput); + unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp : Intrinsic::arm_neon_vcvtfxu2fp; return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), Op.getValueType(), DAG.getConstant(IntrinsicOpcode, MVT::i32), - Op.getOperand(0), DAG.getConstant(Log2_64(C), MVT::i32)); + ConvInput, DAG.getConstant(Log2_64(C), MVT::i32)); } /// Getvshiftimm - Check if this is a valid build_vector for the immediate -- cgit v1.1 From 7e9381951eb4dadf9c59257786416ac51a6a6c09 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 28 Jun 2013 15:46:53 +0000 Subject: R600: Add ALUInst bit to tablegen definitions v2 v2: - Remove functions left over from a previous rebase. Reviewed-by: Vincent Lejeune git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185160 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600Defines.h | 3 ++- lib/Target/R600/R600InstrFormats.td | 2 ++ lib/Target/R600/R600InstrInfo.cpp | 4 +--- lib/Target/R600/R600Instructions.td | 3 +++ 4 files changed, 8 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/R600Defines.h b/lib/Target/R600/R600Defines.h index e30ea27..6bcf8ae 100644 --- a/lib/Target/R600/R600Defines.h +++ b/lib/Target/R600/R600Defines.h @@ -41,7 +41,8 @@ namespace R600_InstFlag { OP1 = (1 << 10), OP2 = (1 << 11), VTX_INST = (1 << 12), - TEX_INST = (1 << 13) + TEX_INST = (1 << 13), + ALU_INST = (1 << 14) }; } diff --git a/lib/Target/R600/R600InstrFormats.td b/lib/Target/R600/R600InstrFormats.td index d31f18c..2c98fb9 100644 --- a/lib/Target/R600/R600InstrFormats.td +++ b/lib/Target/R600/R600InstrFormats.td @@ -26,6 +26,7 @@ class InstR600 pattern, bit HasNativeOperands = 0; bit VTXInst = 0; bit TEXInst = 0; + bit ALUInst = 0; let Namespace = "AMDGPU"; let OutOperandList = outs; @@ -47,6 +48,7 @@ class InstR600 pattern, let TSFlags{11} = Op2; let TSFlags{12} = VTXInst; let TSFlags{13} = TEXInst; + let TSFlags{14} = ALUInst; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index d17425f..f267ee9 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -133,9 +133,7 @@ bool R600InstrInfo::isCubeOp(unsigned Opcode) const { bool R600InstrInfo::isALUInstr(unsigned Opcode) const { unsigned TargetFlags = get(Opcode).TSFlags; - return ((TargetFlags & R600_InstFlag::OP1) | - (TargetFlags & R600_InstFlag::OP2) | - (TargetFlags & R600_InstFlag::OP3)); + return (TargetFlags & R600_InstFlag::ALU_INST); } bool R600InstrInfo::isTransOnly(unsigned Opcode) const { diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index d819d44..b0a82ff 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -114,6 +114,7 @@ class R600_1OP inst, string opName, list pattern, let update_pred = 0; let HasNativeOperands = 1; let Op1 = 1; + let ALUInst = 1; let DisableEncoding = "$literal"; let UseNamedOperandTable = 1; @@ -151,6 +152,7 @@ class R600_2OP inst, string opName, list pattern, let HasNativeOperands = 1; let Op2 = 1; + let ALUInst = 1; let DisableEncoding = "$literal"; let UseNamedOperandTable = 1; @@ -193,6 +195,7 @@ class R600_3OP inst, string opName, list pattern, let DisableEncoding = "$literal"; let Op3 = 1; let UseNamedOperandTable = 1; + let ALUInst = 1; let Inst{31-0} = Word0; let Inst{63-32} = Word1; -- cgit v1.1 From cedcfee405a22b245e869abe8609f094df34085a Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 28 Jun 2013 15:46:59 +0000 Subject: R600: Add support for GROUP_BARRIER instruction Reviewed-by: Vincent Lejeune git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185161 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUIntrinsics.td | 2 ++ lib/Target/R600/R600EmitClauseMarkers.cpp | 9 ++++++++- lib/Target/R600/R600InstrInfo.cpp | 10 ++++++++++ lib/Target/R600/R600InstrInfo.h | 2 ++ lib/Target/R600/R600Instructions.td | 30 ++++++++++++++++++++++++++++++ lib/Target/R600/R600MachineScheduler.cpp | 6 +++++- lib/Target/R600/R600Packetizer.cpp | 8 +++++++- 7 files changed, 64 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td index eecb25b..9f975bf 100644 --- a/lib/Target/R600/AMDGPUIntrinsics.td +++ b/lib/Target/R600/AMDGPUIntrinsics.td @@ -50,6 +50,8 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in { def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; + + def int_AMDGPU_barrier_local : Intrinsic<[], [], []>; } let TargetPrefix = "TGSI", isTarget = 1 in { diff --git a/lib/Target/R600/R600EmitClauseMarkers.cpp b/lib/Target/R600/R600EmitClauseMarkers.cpp index ff5ce5a..0aea2d7 100644 --- a/lib/Target/R600/R600EmitClauseMarkers.cpp +++ b/lib/Target/R600/R600EmitClauseMarkers.cpp @@ -177,7 +177,14 @@ private: AluInstCount ++; continue; } - if (I->getOpcode() == AMDGPU::KILLGT) { + // XXX: GROUP_BARRIER instructions cannot be in the same ALU clause as: + // + // * KILL or INTERP instructions + // * Any instruction that sets UPDATE_EXEC_MASK or UPDATE_PRED bits + // * Uses waterfalling (i.e. INDEX_MODE = AR.X) + // + // XXX: These checks have not been implemented yet. + if (TII->mustBeLastInClause(I->getOpcode())) { I++; break; } diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index f267ee9..3b1a240 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -163,6 +163,16 @@ bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const { usesTextureCache(MI->getOpcode()); } +bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const { + switch (Opcode) { + case AMDGPU::KILLGT: + case AMDGPU::GROUP_BARRIER: + return true; + default: + return false; + } +} + SmallVector, 3> R600InstrInfo::getSrcs(MachineInstr *MI) const { SmallVector, 3> Result; diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h index f06abf6..3c2e50b 100644 --- a/lib/Target/R600/R600InstrInfo.h +++ b/lib/Target/R600/R600InstrInfo.h @@ -72,6 +72,8 @@ namespace llvm { bool usesTextureCache(unsigned Opcode) const; bool usesTextureCache(const MachineInstr *MI) const; + bool mustBeLastInClause(unsigned Opcode) const; + /// \returns a pair for each src of an ALU instructions. /// The first member of a pair is the register id. /// If register is ALU_CONST, second member is SEL. diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index b0a82ff..f42501a 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1499,6 +1499,36 @@ let hasSideEffects = 1 in { def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>; +def GROUP_BARRIER : InstR600 < + (outs), (ins), " GROUP_BARRIER", [(int_AMDGPU_barrier_local)], AnyALU>, + R600ALU_Word0, + R600ALU_Word1_OP2 <0x54> { + + let dst = 0; + let dst_rel = 0; + let src0 = 0; + let src0_rel = 0; + let src0_neg = 0; + let src0_abs = 0; + let src1 = 0; + let src1_rel = 0; + let src1_neg = 0; + let src1_abs = 0; + let write = 0; + let omod = 0; + let clamp = 0; + let last = 1; + let bank_swizzle = 0; + let pred_sel = 0; + let update_exec_mask = 0; + let update_pred = 0; + + let Inst{31-0} = Word0; + let Inst{63-32} = Word1; + + let ALUInst = 1; +} + // TRUNC is used for the FLT_TO_INT instructions to work around a // perceived problem where the rounding modes are applied differently // depending on the instruction and the slot they are in. diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp index a330d88..acc1b4d 100644 --- a/lib/Target/R600/R600MachineScheduler.cpp +++ b/lib/Target/R600/R600MachineScheduler.cpp @@ -269,10 +269,14 @@ R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const { } // Does the instruction take a whole IG ? + // XXX: Is it possible to add a helper function in R600InstrInfo that can + // be used here and in R600PacketizerList::isSoloInstruction() ? if(TII->isVector(*MI) || TII->isCubeOp(MI->getOpcode()) || - TII->isReductionOp(MI->getOpcode())) + TII->isReductionOp(MI->getOpcode()) || + MI->getOpcode() == AMDGPU::GROUP_BARRIER) { return AluT_XYZW; + } // Is the result already assigned to a channel ? unsigned DestSubReg = MI->getOperand(0).getSubReg(); diff --git a/lib/Target/R600/R600Packetizer.cpp b/lib/Target/R600/R600Packetizer.cpp index 6024fd5..4c72d22 100644 --- a/lib/Target/R600/R600Packetizer.cpp +++ b/lib/Target/R600/R600Packetizer.cpp @@ -82,7 +82,11 @@ private: int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write); if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0) continue; - unsigned Dst = BI->getOperand(0).getReg(); + int DstIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::dst); + if (DstIdx == -1) { + continue; + } + unsigned Dst = BI->getOperand(DstIdx).getReg(); if (BI->getOpcode() == AMDGPU::DOT4_r600 || BI->getOpcode() == AMDGPU::DOT4_eg) { Result[Dst] = AMDGPU::PV_X; @@ -154,6 +158,8 @@ public: return true; if (TII->isTransOnly(MI)) return true; + if (MI->getOpcode() == AMDGPU::GROUP_BARRIER) + return true; return false; } -- cgit v1.1 From e3d4cbc7d25061441adafa47450a31571c87bf85 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 28 Jun 2013 15:47:08 +0000 Subject: R600: Add local memory support via LDS Reviewed-by: Vincent Lejeune git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185162 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUAsmPrinter.cpp | 6 +++ lib/Target/R600/AMDGPUISelLowering.cpp | 23 ++++++++++ lib/Target/R600/AMDGPUISelLowering.h | 3 ++ lib/Target/R600/AMDGPUInstructions.td | 9 ++++ lib/Target/R600/AMDGPUMachineFunction.cpp | 1 + lib/Target/R600/AMDGPUMachineFunction.h | 2 + lib/Target/R600/AMDILISelDAGToDAG.cpp | 15 ++++--- lib/Target/R600/R600Defines.h | 6 ++- lib/Target/R600/R600ISelLowering.cpp | 18 +++++++- lib/Target/R600/R600InstrFormats.td | 43 +++++++++++++++--- lib/Target/R600/R600InstrInfo.cpp | 46 +++++++++++++++---- lib/Target/R600/R600InstrInfo.h | 11 +++++ lib/Target/R600/R600Instructions.td | 75 +++++++++++++++++++++++++++++++ lib/Target/R600/R600MachineScheduler.cpp | 12 ++++- lib/Target/R600/R600Packetizer.cpp | 3 ++ lib/Target/R600/R600RegisterInfo.td | 3 +- lib/Target/R600/R600Schedule.td | 2 + 17 files changed, 254 insertions(+), 24 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp index f720c7e..996d2a6 100644 --- a/lib/Target/R600/AMDGPUAsmPrinter.cpp +++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp @@ -29,6 +29,7 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/ELF.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -130,6 +131,11 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction &MF) { S_STACK_SIZE(MFI->StackSize), 4); OutStreamer.EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4); OutStreamer.EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4); + + if (MFI->ShaderType == ShaderType::COMPUTE) { + OutStreamer.EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4); + OutStreamer.EmitIntValue(RoundUpToAlignment(MFI->LDSSize, 4) >> 2, 4); + } } void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) { diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 6d73590..4019a1f 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/DataLayout.h" using namespace llvm; @@ -71,6 +72,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::UDIVREM, MVT::i32, Custom); setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + int types[] = { (int)MVT::v2i32, (int)MVT::v4i32 @@ -138,6 +141,26 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) return Op; } +SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, + SDValue Op, + SelectionDAG &DAG) const { + + const DataLayout *TD = getTargetMachine().getDataLayout(); + GlobalAddressSDNode *G = cast(Op); + // XXX: What does the value of G->getOffset() mean? + assert(G->getOffset() == 0 && + "Do not know what to do with an non-zero offset"); + + unsigned Offset = MFI->LDSSize; + const GlobalValue *GV = G->getGlobal(); + uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType()); + + // XXX: Account for alignment? + MFI->LDSSize += Size; + + return DAG.getConstant(Offset, MVT::i32); +} + SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntrinsicID = cast(Op.getOperand(0))->getZExtValue(); diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index 69a0ac9..d739a01 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -20,6 +20,7 @@ namespace llvm { +class AMDGPUMachineFunction; class MachineRegisterInfo; class AMDGPUTargetLowering : public TargetLowering { @@ -36,6 +37,8 @@ protected: virtual SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, unsigned Reg, EVT VT) const; + SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, + SelectionDAG &DAG) const; bool isHWTrueValue(SDValue Op) const; bool isHWFalseValue(SDValue Op) const; diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 29df374..234bb99 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -94,6 +94,15 @@ def zextloadi8_constant : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr), [{ return isGlobalLoad(dyn_cast(N)); }]>; +def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return isLocalLoad(dyn_cast(N)); +}]>; + +def local_store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return isLocalStore(dyn_cast(N)); +}]>; + class Constants { int TWO_PI = 0x40c90fdb; int PI = 0x40490fdb; diff --git a/lib/Target/R600/AMDGPUMachineFunction.cpp b/lib/Target/R600/AMDGPUMachineFunction.cpp index 0461025..9a1e344 100644 --- a/lib/Target/R600/AMDGPUMachineFunction.cpp +++ b/lib/Target/R600/AMDGPUMachineFunction.cpp @@ -10,6 +10,7 @@ const char *AMDGPUMachineFunction::ShaderTypeAttribute = "ShaderType"; AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) : MachineFunctionInfo() { ShaderType = ShaderType::COMPUTE; + LDSSize = 0; AttributeSet Set = MF.getFunction()->getAttributes(); Attribute A = Set.getAttribute(AttributeSet::FunctionIndex, ShaderTypeAttribute); diff --git a/lib/Target/R600/AMDGPUMachineFunction.h b/lib/Target/R600/AMDGPUMachineFunction.h index 21c8c51..5d5df12 100644 --- a/lib/Target/R600/AMDGPUMachineFunction.h +++ b/lib/Target/R600/AMDGPUMachineFunction.h @@ -23,6 +23,8 @@ private: public: AMDGPUMachineFunction(const MachineFunction &MF); unsigned ShaderType; + /// Number of bytes in the LDS that are being used. + unsigned LDSSize; }; } diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp index 9f077b9..e79ab3c 100644 --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp @@ -282,11 +282,16 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(), AMDGPU::OpName::literal); - assert(ImmIdx != -1); + if (ImmIdx == -1) { + continue; + } - // subtract one from ImmIdx, because the DST operand is usually index - // 0 for MachineInstrs, but we have no DST in the Ops vector. - ImmIdx--; + if (TII->getOperandIdx(Use->getMachineOpcode(), + AMDGPU::OpName::dst) != -1) { + // subtract one from ImmIdx, because the DST operand is usually index + // 0 for MachineInstrs, but we have no DST in the Ops vector. + ImmIdx--; + } // Check that we aren't already using an immediate. // XXX: It's possible for an instruction to have more than one @@ -336,7 +341,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { } if (Result && Result->isMachineOpcode() && !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR) - && TII->isALUInstr(Result->getMachineOpcode())) { + && TII->hasInstrModifiers(Result->getMachineOpcode())) { // Fold FNEG/FABS/CONST_ADDRESS // TODO: Isel can generate multiple MachineInst, we need to recursively // parse Result diff --git a/lib/Target/R600/R600Defines.h b/lib/Target/R600/R600Defines.h index 6bcf8ae..90fc29c 100644 --- a/lib/Target/R600/R600Defines.h +++ b/lib/Target/R600/R600Defines.h @@ -42,7 +42,9 @@ namespace R600_InstFlag { OP2 = (1 << 11), VTX_INST = (1 << 12), TEX_INST = (1 << 13), - ALU_INST = (1 << 14) + ALU_INST = (1 << 14), + LDS_1A = (1 << 15), + LDS_1A1D = (1 << 16) }; } @@ -162,4 +164,6 @@ namespace OpName { #define R_028878_SQ_PGM_RESOURCES_GS 0x028878 #define R_0288D4_SQ_PGM_RESOURCES_LS 0x0288d4 +#define R_0288E8_SQ_LDS_ALLOC 0x0288E8 + #endif // R600DEFINES_H_ diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index b898af1..ce2aa92 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -138,6 +138,19 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( break; } + case AMDGPU::LDS_READ_RET: { + MachineInstrBuilder NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), + TII->get(MI->getOpcode()), + AMDGPU::OQAP); + for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) { + NewMI.addOperand(MI->getOperand(i)); + } + TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV, + MI->getOperand(0).getReg(), + AMDGPU::OQAP); + break; + } + case AMDGPU::MOV_IMM_F32: TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(), MI->getOperand(1).getFPImm()->getValueAPF() @@ -456,6 +469,8 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( //===----------------------------------------------------------------------===// SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + R600MachineFunctionInfo *MFI = MF.getInfo(); switch (Op.getOpcode()) { default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); @@ -463,14 +478,13 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::LOAD: return LowerLOAD(Op, DAG); case ISD::FrameIndex: return LowerFrameIndex(Op, DAG); + case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG); case ISD::INTRINSIC_VOID: { SDValue Chain = Op.getOperand(0); unsigned IntrinsicID = cast(Op.getOperand(1))->getZExtValue(); switch (IntrinsicID) { case AMDGPUIntrinsic::AMDGPU_store_output: { - MachineFunction &MF = DAG.getMachineFunction(); - R600MachineFunctionInfo *MFI = MF.getInfo(); int64_t RegIndex = cast(Op.getOperand(3))->getZExtValue(); unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); MFI->LiveOuts.push_back(Reg); diff --git a/lib/Target/R600/R600InstrFormats.td b/lib/Target/R600/R600InstrFormats.td index 2c98fb9..2d72404 100644 --- a/lib/Target/R600/R600InstrFormats.td +++ b/lib/Target/R600/R600InstrFormats.td @@ -23,6 +23,8 @@ class InstR600 pattern, bits<2> FlagOperandIdx = 0; bit Op1 = 0; bit Op2 = 0; + bit LDS_1A = 0; + bit LDS_1A1D = 0; bit HasNativeOperands = 0; bit VTXInst = 0; bit TEXInst = 0; @@ -49,21 +51,21 @@ class InstR600 pattern, let TSFlags{12} = VTXInst; let TSFlags{13} = TEXInst; let TSFlags{14} = ALUInst; + let TSFlags{15} = LDS_1A; + let TSFlags{16} = LDS_1A1D; } //===----------------------------------------------------------------------===// // ALU instructions //===----------------------------------------------------------------------===// -class R600ALU_Word0 { +class R600_ALU_LDS_Word0 { field bits<32> Word0; bits<11> src0; - bits<1> src0_neg; bits<1> src0_rel; bits<11> src1; bits<1> src1_rel; - bits<1> src1_neg; bits<3> index_mode = 0; bits<2> pred_sel; bits<1> last; @@ -76,16 +78,23 @@ class R600ALU_Word0 { let Word0{8-0} = src0_sel; let Word0{9} = src0_rel; let Word0{11-10} = src0_chan; - let Word0{12} = src0_neg; let Word0{21-13} = src1_sel; let Word0{22} = src1_rel; let Word0{24-23} = src1_chan; - let Word0{25} = src1_neg; let Word0{28-26} = index_mode; let Word0{30-29} = pred_sel; let Word0{31} = last; } +class R600ALU_Word0 : R600_ALU_LDS_Word0 { + + bits<1> src0_neg; + bits<1> src1_neg; + + let Word0{12} = src0_neg; + let Word0{25} = src1_neg; +} + class R600ALU_Word1 { field bits<32> Word1; @@ -138,6 +147,30 @@ class R600ALU_Word1_OP3 alu_inst> : R600ALU_Word1{ let Word1{17-13} = alu_inst; } +class R600LDS_Word1 { + field bits<32> Word1; + + bits<11> src2; + bits<9> src2_sel = src2{8-0}; + bits<2> src2_chan = src2{10-9}; + bits<1> src2_rel; + // offset specifies the stride offset to the second set of data to be read + // from. This is a dword offset. + bits<5> alu_inst = 17; // OP3_INST_LDS_IDX_OP + bits<3> bank_swizzle; + bits<6> lds_op; + bits<2> dst_chan = 0; + + let Word1{8-0} = src2_sel; + let Word1{9} = src2_rel; + let Word1{11-10} = src2_chan; + let Word1{17-13} = alu_inst; + let Word1{20-18} = bank_swizzle; + let Word1{26-21} = lds_op; + let Word1{30-29} = dst_chan; +} + + /* XXX: R600 subtarget uses a slightly different encoding than the other subtargets. We currently handle this in R600MCCodeEmitter, but we may diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 3b1a240..f05390e 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -136,6 +136,21 @@ bool R600InstrInfo::isALUInstr(unsigned Opcode) const { return (TargetFlags & R600_InstFlag::ALU_INST); } +bool R600InstrInfo::hasInstrModifiers(unsigned Opcode) const { + unsigned TargetFlags = get(Opcode).TSFlags; + + return ((TargetFlags & R600_InstFlag::OP1) | + (TargetFlags & R600_InstFlag::OP2) | + (TargetFlags & R600_InstFlag::OP3)); +} + +bool R600InstrInfo::isLDSInstr(unsigned Opcode) const { + unsigned TargetFlags = get(Opcode).TSFlags; + + return ((TargetFlags & R600_InstFlag::LDS_1A) | + (TargetFlags & R600_InstFlag::LDS_1A1D)); +} + bool R600InstrInfo::isTransOnly(unsigned Opcode) const { return (get(Opcode).TSFlags & R600_InstFlag::TRANS_ONLY); } @@ -245,6 +260,9 @@ R600InstrInfo::ExtractSrcs(MachineInstr *MI, unsigned Reg = Srcs[i].first->getReg(); unsigned Index = RI.getEncodingValue(Reg) & 0xff; unsigned Chan = RI.getHWRegChan(Reg); + if (Reg == AMDGPU::OQAP) { + Result.push_back(std::pair(Index, 0)); + } if (Index > 127) { Result.push_back(DummyPair); continue; @@ -287,10 +305,11 @@ Swizzle(std::vector > Src, return Src; } -static bool -isLegal(const std::vector > > &IGSrcs, - const std::vector &Swz, - unsigned CheckedSize) { +bool +R600InstrInfo::isLegal( + const std::vector > > &IGSrcs, + const std::vector &Swz, + unsigned CheckedSize) const { int Vector[4][3]; memset(Vector, -1, sizeof(Vector)); for (unsigned i = 0; i < CheckedSize; i++) { @@ -300,6 +319,16 @@ isLegal(const std::vector > > &IGSrcs, const std::pair &Src = Srcs[j]; if (Src.first < 0) continue; + if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) { + if (Swz[i] != R600InstrInfo::ALU_VEC_012 && + Swz[i] != R600InstrInfo::ALU_VEC_021) { + // The value from output queue A (denoted by register OQAP) can + // only be fetched during the first cycle. + return false; + } + // OQAP does not count towards the normal read port restrictions + continue; + } if (Vector[Src.second][j] < 0) Vector[Src.second][j] = Src.first; if (Vector[Src.second][j] != Src.first) @@ -309,10 +338,11 @@ isLegal(const std::vector > > &IGSrcs, return true; } -static bool recursiveFitsFPLimitation( -const std::vector > > &IGSrcs, -std::vector &SwzCandidate, -unsigned Depth = 0) { +bool +R600InstrInfo::recursiveFitsFPLimitation( + const std::vector > > &IGSrcs, + std::vector &SwzCandidate, + unsigned Depth) const { if (!isLegal(IGSrcs, SwzCandidate, Depth)) return false; if (IGSrcs.size() == Depth) diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h index 3c2e50b..a375288 100644 --- a/lib/Target/R600/R600InstrInfo.h +++ b/lib/Target/R600/R600InstrInfo.h @@ -63,6 +63,8 @@ namespace llvm { /// \returns true if this \p Opcode represents an ALU instruction. bool isALUInstr(unsigned Opcode) const; + bool hasInstrModifiers(unsigned Opcode) const; + bool isLDSInstr(unsigned Opcode) const; bool isTransOnly(unsigned Opcode) const; bool isTransOnly(const MachineInstr *MI) const; @@ -82,6 +84,15 @@ namespace llvm { SmallVector, 3> getSrcs(MachineInstr *MI) const; + bool isLegal( + const std::vector > > &IGSrcs, + const std::vector &Swz, + unsigned CheckedSize) const; + bool recursiveFitsFPLimitation( + const std::vector > > &IGSrcs, + std::vector &SwzCandidate, + unsigned Depth = 0) const; + /// Given the order VEC_012 < VEC_021 < VEC_120 < VEC_102 < VEC_201 < VEC_210 /// returns true and the first (in lexical order) BankSwizzle affectation /// starting from the one already provided in the Instruction Group MIs that diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index f42501a..fd585f8 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1529,6 +1529,81 @@ def GROUP_BARRIER : InstR600 < let ALUInst = 1; } +//===----------------------------------------------------------------------===// +// LDS Instructions +//===----------------------------------------------------------------------===// +class R600_LDS op, dag outs, dag ins, string asm, + list pattern = []> : + + InstR600 , + R600_ALU_LDS_Word0, + R600LDS_Word1 { + + bits<6> offset = 0; + let lds_op = op; + + let Word1{27} = offset{0}; + let Word1{12} = offset{1}; + let Word1{28} = offset{2}; + let Word1{31} = offset{3}; + let Word0{12} = offset{4}; + let Word0{25} = offset{5}; + + + let Inst{31-0} = Word0; + let Inst{63-32} = Word1; + + let ALUInst = 1; + let HasNativeOperands = 1; + let UseNamedOperandTable = 1; +} + +class R600_LDS_1A lds_op, string name, list pattern> : R600_LDS < + lds_op, + (outs R600_Reg32:$dst), + (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel, + LAST:$last, R600_Pred:$pred_sel, + BANK_SWIZZLE:$bank_swizzle), + " "#name#" $last OQAP, $src0$src0_rel $pred_sel", + pattern + > { + + let src1 = 0; + let src1_rel = 0; + let src2 = 0; + let src2_rel = 0; + + let Defs = [OQAP]; + let usesCustomInserter = 1; + let LDS_1A = 1; + let DisableEncoding = "$dst"; +} + +class R600_LDS_1A1D lds_op, string name, list pattern> : + R600_LDS < + lds_op, + (outs), + (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel, + R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel, + LAST:$last, R600_Pred:$pred_sel, + BANK_SWIZZLE:$bank_swizzle), + " "#name#" $last $src0$src0_rel, $src1$src1_rel, $pred_sel", + pattern + > { + + let src2 = 0; + let src2_rel = 0; + let LDS_1A1D = 1; +} + +def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET", + [(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))] +>; + +def LDS_WRITE : R600_LDS_1A1D <0xD, "LDS_WRITE", + [(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)] +>; + // TRUNC is used for the FLT_TO_INT instructions to work around a // perceived problem where the rounding modes are applied differently // depending on the instruction and the slot they are in. diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp index acc1b4d..7e28f9d 100644 --- a/lib/Target/R600/R600MachineScheduler.cpp +++ b/lib/Target/R600/R600MachineScheduler.cpp @@ -278,6 +278,10 @@ R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const { return AluT_XYZW; } + if (TII->isLDSInstr(MI->getOpcode())) { + return AluT_X; + } + // Is the result already assigned to a channel ? unsigned DestSubReg = MI->getOperand(0).getSubReg(); switch (DestSubReg) { @@ -371,14 +375,18 @@ void R600SchedStrategy::PrepareNextSlot() { } void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) { - unsigned DestReg = MI->getOperand(0).getReg(); + int DstIndex = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst); + if (DstIndex == -1) { + return; + } + unsigned DestReg = MI->getOperand(DstIndex).getReg(); // PressureRegister crashes if an operand is def and used in the same inst // and we try to constraint its regclass for (MachineInstr::mop_iterator It = MI->operands_begin(), E = MI->operands_end(); It != E; ++It) { MachineOperand &MO = *It; if (MO.isReg() && !MO.isDef() && - MO.getReg() == MI->getOperand(0).getReg()) + MO.getReg() == DestReg) return; } // Constrains the regclass of DestReg to assign it to Slot diff --git a/lib/Target/R600/R600Packetizer.cpp b/lib/Target/R600/R600Packetizer.cpp index 4c72d22..6fc15de 100644 --- a/lib/Target/R600/R600Packetizer.cpp +++ b/lib/Target/R600/R600Packetizer.cpp @@ -92,6 +92,9 @@ private: Result[Dst] = AMDGPU::PV_X; continue; } + if (Dst == AMDGPU::OQAP) { + continue; + } unsigned PVReg = 0; switch (TRI.getHWRegChan(Dst)) { case 0: diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td index a8b9b70..60a93e3 100644 --- a/lib/Target/R600/R600RegisterInfo.td +++ b/lib/Target/R600/R600RegisterInfo.td @@ -101,6 +101,7 @@ def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>; def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>; def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>; def AR_X : R600Reg<"AR.x", 0>; +def OQAP : R600Reg<"OQAP", 221>; def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32, (add (sequence "ArrayBase%u", 448, 480))>; @@ -170,7 +171,7 @@ def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add R600_ArrayBase, R600_Addr, ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF, - ALU_CONST, ALU_PARAM + ALU_CONST, ALU_PARAM, OQAP )>; def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add diff --git a/lib/Target/R600/R600Schedule.td b/lib/Target/R600/R600Schedule.td index 207233d..df62bf8 100644 --- a/lib/Target/R600/R600Schedule.td +++ b/lib/Target/R600/R600Schedule.td @@ -23,6 +23,7 @@ def TRANS : FuncUnit; def AnyALU : InstrItinClass; def VecALU : InstrItinClass; def TransALU : InstrItinClass; +def XALU : InstrItinClass; def R600_VLIW5_Itin : ProcessorItineraries < [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS, ALU_NULL], @@ -31,6 +32,7 @@ def R600_VLIW5_Itin : ProcessorItineraries < InstrItinData]>, InstrItinData]>, InstrItinData]>, + InstrItinData]>, InstrItinData]> ] >; -- cgit v1.1 From 10ddc4d7f232507933c266180d0052f12e65c4ab Mon Sep 17 00:00:00 2001 From: Weiming Zhao Date: Fri, 28 Jun 2013 17:26:02 +0000 Subject: Bug 13662: Enable GPRPair for all i64 operands of inline asm on ARM This patch assigns paired GPRs for inline asm with 64-bit data on ARM. It's enabled for both ARM and Thumb to support modifiers like %H, %Q, %R. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185169 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMAsmPrinter.cpp | 24 ++++++++++++++++++++++-- lib/Target/ARM/ARMISelDAGToDAG.cpp | 33 +++++++++++++++++++++++---------- 2 files changed, 45 insertions(+), 12 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 18c97f4..13a22b1 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -464,8 +464,14 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, // This takes advantage of the 2 operand-ness of ldm/stm and that we've // already got the operands in registers that are operands to the // inline asm statement. - - O << "{" << ARMInstPrinter::getRegisterName(RegBegin); + O << "{"; + if (ARM::GPRPairRegClass.contains(RegBegin)) { + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + unsigned Reg0 = TRI->getSubReg(RegBegin, ARM::gsub_0); + O << ARMInstPrinter::getRegisterName(Reg0) << ", ";; + RegBegin = TRI->getSubReg(RegBegin, ARM::gsub_1); + } + O << ARMInstPrinter::getRegisterName(RegBegin); // FIXME: The register allocator not only may not have given us the // registers in sequence, but may not be in ascending registers. This @@ -491,6 +497,20 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, return true; unsigned Flags = FlagsOP.getImm(); unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + unsigned RC; + InlineAsm::hasRegClassConstraint(Flags, RC); + if (RC == ARM::GPRPairRegClassID) { + if (NumVals != 1) + return true; + const MachineOperand &MO = MI->getOperand(OpNum); + if (!MO.isReg()) + return true; + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + unsigned Reg = TRI->getSubReg(MO.getReg(), ExtraCode[0] == 'Q' ? + ARM::gsub_0 : ARM::gsub_1); + O << ARMInstPrinter::getRegisterName(Reg); + return false; + } if (NumVals != 2) return true; unsigned RegOp = ExtraCode[0] == 'Q' ? OpNum : OpNum + 1; diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 3e23253..03a7e5d 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -3472,16 +3472,16 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs // respectively. Since there is no constraint to explicitly specify a - // reg pair, we search %H operand inside the asm string. If it is found, the - // transformation below enforces a GPRPair reg class for "%r" for 64-bit data. - if (AsmString.find(":H}") == StringRef::npos) - return NULL; + // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb, + // the 64-bit data may be referred by H, Q, R modifiers, so we still pack + // them into a GPRPair. SDLoc dl(N); - SDValue Glue = N->getOperand(NumOps-1); + SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) : SDValue(0,0); + SmallVector OpChanged; // Glue node will be appended late. - for(unsigned i = 0; i < NumOps -1; ++i) { + for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) { SDValue op = N->getOperand(i); AsmNodeOperands.push_back(op); @@ -3495,17 +3495,28 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ else continue; + unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag); + if (NumRegs) + OpChanged.push_back(false); + + unsigned DefIdx = 0; + bool IsTiedToChangedOp = false; + // If it's a use that is tied with a previous def, it has no + // reg class constraint. + if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx)) + IsTiedToChangedOp = OpChanged[DefIdx]; + if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef && Kind != InlineAsm::Kind_RegDefEarlyClobber) continue; - unsigned RegNum = InlineAsm::getNumOperandRegisters(Flag); unsigned RC; bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); - if (!HasRC || RC != ARM::GPRRegClassID || RegNum != 2) + if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID)) + || NumRegs != 2) continue; - assert((i+2 < NumOps-1) && "Invalid number of operands in inline asm"); + assert((i+2 < NumOps) && "Invalid number of operands in inline asm"); SDValue V0 = N->getOperand(i+1); SDValue V1 = N->getOperand(i+2); unsigned Reg0 = cast(V0)->getReg(); @@ -3566,6 +3577,7 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ Changed = true; if(PairedReg.getNode()) { + OpChanged[OpChanged.size() -1 ] = true; Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); // Replace the current flag. @@ -3578,7 +3590,8 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ } } - AsmNodeOperands.push_back(Glue); + if (Glue.getNode()) + AsmNodeOperands.push_back(Glue); if (!Changed) return NULL; -- cgit v1.1 From 00df1252286eb71b093d8e45eb4c9cee4ca5761d Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Fri, 28 Jun 2013 17:57:51 +0000 Subject: [NVPTX] Add infrastructure for vector loads/stores of parameters git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185171 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXISelLowering.cpp | 12 +++ lib/Target/NVPTX/NVPTXISelLowering.h | 6 ++ lib/Target/NVPTX/NVPTXInstrInfo.td | 137 +++++++++++++++++++++++++++++++++ 3 files changed, 155 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index be8e130..5fa9e84 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -210,8 +210,16 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { return "NVPTXISD::PrintCall"; case NVPTXISD::LoadParam: return "NVPTXISD::LoadParam"; + case NVPTXISD::LoadParamV2: + return "NVPTXISD::LoadParamV2"; + case NVPTXISD::LoadParamV4: + return "NVPTXISD::LoadParamV4"; case NVPTXISD::StoreParam: return "NVPTXISD::StoreParam"; + case NVPTXISD::StoreParamV2: + return "NVPTXISD::StoreParamV2"; + case NVPTXISD::StoreParamV4: + return "NVPTXISD::StoreParamV4"; case NVPTXISD::StoreParamS32: return "NVPTXISD::StoreParamS32"; case NVPTXISD::StoreParamU32: @@ -242,6 +250,10 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { return "NVPTXISD::MoveToRetval"; case NVPTXISD::StoreRetval: return "NVPTXISD::StoreRetval"; + case NVPTXISD::StoreRetvalV2: + return "NVPTXISD::StoreRetvalV2"; + case NVPTXISD::StoreRetvalV4: + return "NVPTXISD::StoreRetvalV4"; case NVPTXISD::PseudoUseParam: return "NVPTXISD::PseudoUseParam"; case NVPTXISD::RETURN: diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index 2ec9436..b0dad0f 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -36,7 +36,11 @@ enum NodeType { DeclareRet, DeclareScalarRet, LoadParam, + LoadParamV2, + LoadParamV4, StoreParam, + StoreParamV2, + StoreParamV4, StoreParamS32, // to sext and store a <32bit value, not used currently StoreParamU32, // to zext and store a <32bit value, not used currently MoveToParam, @@ -54,6 +58,8 @@ enum NodeType { MoveRetval, MoveToRetval, StoreRetval, + StoreRetvalV2, + StoreRetvalV4, PseudoUseParam, RETURN, CallSeqBegin, diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td index da6dd39..c980237 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1751,9 +1751,13 @@ def SDTDeclareParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, def SDTDeclareScalarParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>]>; def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; +def SDTLoadParamV2Profile : SDTypeProfile<2, 2, [SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisInt<3>]>; +def SDTLoadParamV4Profile : SDTypeProfile<4, 2, [SDTCisInt<4>, SDTCisInt<5>]>; def SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDTPrintCallUniProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>; +def SDTStoreParamV2Profile : SDTypeProfile<0, 4, [SDTCisInt<0>, SDTCisInt<1>]>; +def SDTStoreParamV4Profile : SDTypeProfile<0, 6, [SDTCisInt<0>, SDTCisInt<1>]>; def SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>; def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>; def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>; @@ -1762,6 +1766,8 @@ def SDTCallValProfile : SDTypeProfile<1, 0, []>; def SDTMoveParamProfile : SDTypeProfile<1, 1, []>; def SDTMoveRetvalProfile : SDTypeProfile<0, 1, []>; def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>; +def SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>; +def SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>; def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>; def DeclareParam : SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile, @@ -1776,12 +1782,20 @@ def DeclareRet : SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile, [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; def LoadParam : SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile, [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; +def LoadParamV2 : SDNode<"NVPTXISD::LoadParamV2", SDTLoadParamV2Profile, + [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; +def LoadParamV4 : SDNode<"NVPTXISD::LoadParamV4", SDTLoadParamV4Profile, + [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; def PrintCall : SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile, [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; def PrintCallUni : SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile, [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; def StoreParam : SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile, [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def StoreParamV2 : SDNode<"NVPTXISD::StoreParamV2", SDTStoreParamV2Profile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def StoreParamV4 : SDNode<"NVPTXISD::StoreParamV4", SDTStoreParamV4Profile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; def StoreParamU32 : SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile, [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; def StoreParamS32 : SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile, @@ -1808,6 +1822,10 @@ def MoveRetval : SDNode<"NVPTXISD::MoveRetval", SDTMoveRetvalProfile, [SDNPHasChain, SDNPSideEffect]>; def StoreRetval : SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile, [SDNPHasChain, SDNPSideEffect]>; +def StoreRetvalV2 : SDNode<"NVPTXISD::StoreRetvalV2", SDTStoreRetvalV2Profile, + [SDNPHasChain, SDNPSideEffect]>; +def StoreRetvalV4 : SDNode<"NVPTXISD::StoreRetvalV4", SDTStoreRetvalV4Profile, + [SDNPHasChain, SDNPSideEffect]>; def MoveToRetval : SDNode<"NVPTXISD::MoveToRetval", SDTStoreRetvalProfile, [SDNPHasChain, SDNPSideEffect]>; def PseudoUseParam : SDNode<"NVPTXISD::PseudoUseParam", @@ -1828,12 +1846,43 @@ class LoadParamRegInst : "\t$dst, retval$b;"), [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>; +// FIXME: A bug in tablegen currently prevents us from using multi-output +// patterns here, so we have to custom select these in C++. +class LoadParamV2MemInst : + NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins i32imm:$b), + !strconcat(!strconcat("ld.param.v2", opstr), + "\t{{$dst, $dst2}}, [retval0+$b];"), []>; + +class LoadParamV4MemInst : + NVPTXInst<(outs regclass:$dst, regclass:$dst2, regclass:$dst3, + regclass:$dst4), + (ins i32imm:$b), + !strconcat(!strconcat("ld.param.v4", opstr), + "\t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"), []>; + class StoreParamInst : NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b), !strconcat(!strconcat("st.param", opstr), "\t[param$a+$b], $val;"), [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>; +class StoreParamV2Inst : + NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, + i32imm:$a, i32imm:$b), + !strconcat(!strconcat("st.param.v2", opstr), + "\t[param$a+$b], {{$val, $val2}};"), + [(StoreParamV2 (i32 imm:$a), (i32 imm:$b), regclass:$val, + regclass:$val2)]>; + +class StoreParamV4Inst : + NVPTXInst<(outs), (ins regclass:$val, regclass:$val1, regclass:$val2, + regclass:$val3, i32imm:$a, i32imm:$b), + !strconcat(!strconcat("st.param.v4", opstr), + "\t[param$a+$b], {{$val, $val2, $val3, $val4}};"), + [(StoreParamV4 (i32 imm:$a), (i32 imm:$b), regclass:$val, + regclass:$val2, regclass:$val3, + regclass:$val4)]>; + class MoveToParamInst : NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b), !strconcat(!strconcat("mov", opstr), @@ -1846,6 +1895,21 @@ class StoreRetvalInst : "\t[func_retval0+$a], $val;"), [(StoreRetval (i32 imm:$a), regclass:$val)]>; +class StoreRetvalV2Inst : + NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a), + !strconcat(!strconcat("st.param.v2", opstr), + "\t[func_retval0+$a], {{$val, $val2}};"), + [(StoreRetvalV2 (i32 imm:$a), regclass:$val, regclass:$val2)]>; + +class StoreRetvalV4Inst : + NVPTXInst<(outs), + (ins regclass:$val, regclass:$val2, regclass:$val3, + regclass:$val4, i32imm:$a), + !strconcat(!strconcat("st.param.v4", opstr), + "\t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"), + [(StoreRetvalV4 (i32 imm:$a), regclass:$val, regclass:$val2, + regclass:$val3, regclass:$val4)]>; + class MoveToRetvalInst : NVPTXInst<(outs), (ins i32imm:$num, regclass:$val), !strconcat(!strconcat("mov", opstr), @@ -1920,6 +1984,13 @@ def LoadParamMemI64 : LoadParamMemInst; def LoadParamMemI32 : LoadParamMemInst; def LoadParamMemI16 : LoadParamMemInst; def LoadParamMemI8 : LoadParamMemInst; +def LoadParamMemV2I64 : LoadParamV2MemInst; +def LoadParamMemV2I32 : LoadParamV2MemInst; +def LoadParamMemV2I16 : LoadParamV2MemInst; +def LoadParamMemV2I8 : LoadParamV2MemInst; +def LoadParamMemV4I32 : LoadParamV4MemInst; +def LoadParamMemV4I16 : LoadParamV4MemInst; +def LoadParamMemV4I8 : LoadParamV4MemInst; //def LoadParamMemI16 : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b), // !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t", @@ -1932,6 +2003,9 @@ def LoadParamMemI8 : LoadParamMemInst; def LoadParamMemF32 : LoadParamMemInst; def LoadParamMemF64 : LoadParamMemInst; +def LoadParamMemV2F32 : LoadParamV2MemInst; +def LoadParamMemV2F64 : LoadParamV2MemInst; +def LoadParamMemV4F32 : LoadParamV4MemInst; def LoadParamRegI64 : LoadParamRegInst; def LoadParamRegI32 : LoadParamRegInst; @@ -1961,6 +2035,47 @@ def StoreParamI8 : NVPTXInst<(outs), [(StoreParam (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>; +def StoreParamV2I64 : StoreParamV2Inst; +def StoreParamV2I32 : StoreParamV2Inst; + +def StoreParamV2I16 : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2, + i32imm:$a, i32imm:$b), + "st.param.v2.b16\t[param$a+$b], {{$val, $val2}};", + [(StoreParamV2 (i32 imm:$a), (i32 imm:$b), + Int16Regs:$val, Int16Regs:$val2)]>; + +def StoreParamV2I8 : NVPTXInst<(outs), (ins Int8Regs:$val, Int8Regs:$val2, + i32imm:$a, i32imm:$b), + "st.param.v2.b8\t[param$a+$b], {{$val, $val2}};", + [(StoreParamV2 (i32 imm:$a), (i32 imm:$b), + Int8Regs:$val, Int8Regs:$val2)]>; + +// FIXME: StoreParamV4Inst crashes llvm-tblgen :( +//def StoreParamV4I32 : StoreParamV4Inst; +def StoreParamV4I32 : NVPTXInst<(outs), (ins Int32Regs:$val, Int32Regs:$val2, + Int32Regs:$val3, Int32Regs:$val4, + i32imm:$a, i32imm:$b), + "st.param.b32\t[param$a+$b], {{$val, $val2, $val3, $val4}};", + [(StoreParamV4 (i32 imm:$a), (i32 imm:$b), + Int32Regs:$val, Int32Regs:$val2, + Int32Regs:$val3, Int32Regs:$val4)]>; + +def StoreParamV4I16 : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2, + Int16Regs:$val3, Int16Regs:$val4, + i32imm:$a, i32imm:$b), + "st.param.v4.b16\t[param$a+$b], {{$val, $val2, $val3, $val4}};", + [(StoreParamV4 (i32 imm:$a), (i32 imm:$b), + Int16Regs:$val, Int16Regs:$val2, + Int16Regs:$val3, Int16Regs:$val4)]>; + +def StoreParamV4I8 : NVPTXInst<(outs), (ins Int8Regs:$val, Int8Regs:$val2, + Int8Regs:$val3, Int8Regs:$val4, + i32imm:$a, i32imm:$b), + "st.param.v4.b8\t[param$a+$b], {{$val, $val2, $val3, $val4}};", + [(StoreParamV4 (i32 imm:$a), (i32 imm:$b), + Int8Regs:$val, Int8Regs:$val2, + Int8Regs:$val3, Int8Regs:$val4)]>; + def StoreParamS32I16 : NVPTXInst<(outs), (ins Int16Regs:$val, i32imm:$a, i32imm:$b), !strconcat("cvt.s32.s16\ttemp_param_reg, $val;\n\t", @@ -1985,6 +2100,18 @@ def StoreParamS32I8 : NVPTXInst<(outs), def StoreParamF32 : StoreParamInst; def StoreParamF64 : StoreParamInst; +def StoreParamV2F32 : StoreParamV2Inst; +def StoreParamV2F64 : StoreParamV2Inst; +// FIXME: StoreParamV4Inst crashes llvm-tblgen :( +//def StoreParamV4F32 : StoreParamV4Inst; +def StoreParamV4F32 : NVPTXInst<(outs), + (ins Float32Regs:$val, Float32Regs:$val2, + Float32Regs:$val3, Float32Regs:$val4, + i32imm:$a, i32imm:$b), + "st.param.v4.f32\t[param$a+$b], {{$val, $val2, $val3, $val4}};", + [(StoreParamV4 (i32 imm:$a), (i32 imm:$b), + Float32Regs:$val, Float32Regs:$val2, + Float32Regs:$val3, Float32Regs:$val4)]>; def MoveToParamI64 : MoveToParamInst; def MoveToParamI32 : MoveToParamInst; @@ -2005,6 +2132,13 @@ def StoreRetvalI64 : StoreRetvalInst; def StoreRetvalI32 : StoreRetvalInst; def StoreRetvalI16 : StoreRetvalInst; def StoreRetvalI8 : StoreRetvalInst; +def StoreRetvalV2I64 : StoreRetvalV2Inst; +def StoreRetvalV2I32 : StoreRetvalV2Inst; +def StoreRetvalV2I16 : StoreRetvalV2Inst; +def StoreRetvalV2I8 : StoreRetvalV2Inst; +def StoreRetvalV4I32 : StoreRetvalV4Inst; +def StoreRetvalV4I16 : StoreRetvalV4Inst; +def StoreRetvalV4I8 : StoreRetvalV4Inst; //def StoreRetvalI16 : NVPTXInst<(outs), (ins Int16Regs:$val, i32imm:$a), // !strconcat("\{\n\t", @@ -2021,6 +2155,9 @@ def StoreRetvalI8 : StoreRetvalInst; def StoreRetvalF64 : StoreRetvalInst; def StoreRetvalF32 : StoreRetvalInst; +def StoreRetvalV2F64 : StoreRetvalV2Inst; +def StoreRetvalV2F32 : StoreRetvalV2Inst; +def StoreRetvalV4F32 : StoreRetvalV4Inst; def MoveRetvalI64 : MoveRetvalInst; def MoveRetvalI32 : MoveRetvalInst; -- cgit v1.1 From b67366514316bbb3cc3cb57f72f2d1439ec474bc Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Fri, 28 Jun 2013 17:57:53 +0000 Subject: [NVPTX] Clean up handling of formal arguments and enable generation of vector parameter loads git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185172 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXISelLowering.cpp | 316 +++++++++++++++++++++------------ 1 file changed, 202 insertions(+), 114 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 5fa9e84..42bfab1 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -1066,12 +1066,16 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( const Function *F = MF.getFunction(); const AttributeSet &PAL = F->getAttributes(); + const TargetLowering *TLI = nvTM->getTargetLowering(); SDValue Root = DAG.getRoot(); std::vector OutChains; bool isKernel = llvm::isKernelFunction(*F); bool isABI = (nvptxSubtarget.getSmVersion() >= 20); + assert(isABI && "Non-ABI compilation is not supported"); + if (!isABI) + return Chain; std::vector argTypes; std::vector theArgs; @@ -1080,15 +1084,20 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( theArgs.push_back(I); argTypes.push_back(I->getType()); } - //assert(argTypes.size() == Ins.size() && - // "Ins types and function types did not match"); + // argTypes.size() (or theArgs.size()) and Ins.size() need not match. + // Ins.size() will be larger + // * if there is an aggregate argument with multiple fields (each field + // showing up separately in Ins) + // * if there is a vector argument with more than typical vector-length + // elements (generally if more than 4) where each vector element is + // individually present in Ins. + // So a different index should be used for indexing into Ins. + // See similar issue in LowerCall. + unsigned InsIdx = 0; int idx = 0; - for (unsigned i = 0, e = argTypes.size(); i != e; ++i, ++idx) { + for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) { Type *Ty = argTypes[i]; - EVT ObjectVT = getValueType(Ty); - //assert(ObjectVT == Ins[i].VT && - // "Ins type did not match function type"); // If the kernel argument is image*_t or sampler_t, convert it to // a i32 constant holding the parameter position. This can later @@ -1104,142 +1113,220 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( if (theArgs[i]->use_empty()) { // argument is dead - if (ObjectVT.isVector()) { - EVT EltVT = ObjectVT.getVectorElementType(); - unsigned NumElts = ObjectVT.getVectorNumElements(); - for (unsigned vi = 0; vi < NumElts; ++vi) { - InVals.push_back(DAG.getNode(ISD::UNDEF, dl, EltVT)); + if (Ty->isAggregateType()) { + SmallVector vtparts; + + ComputeValueVTs(*this, Ty, vtparts); + assert(vtparts.size() > 0 && "empty aggregate type not expected"); + for (unsigned parti = 0, parte = vtparts.size(); parti != parte; + ++parti) { + EVT partVT = vtparts[parti]; + InVals.push_back(DAG.getNode(ISD::UNDEF, dl, partVT)); + ++InsIdx; } - } else { - InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT)); + if (vtparts.size() > 0) + --InsIdx; + continue; } + if (Ty->isVectorTy()) { + EVT ObjectVT = getValueType(Ty); + unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT); + for (unsigned parti = 0; parti < NumRegs; ++parti) { + InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); + ++InsIdx; + } + if (NumRegs > 0) + --InsIdx; + continue; + } + InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); continue; } // In the following cases, assign a node order of "idx+1" - // to newly created nodes. The SDNOdes for params have to + // to newly created nodes. The SDNodes for params have to // appear in the same order as their order of appearance // in the original function. "idx+1" holds that order. if (PAL.hasAttribute(i + 1, Attribute::ByVal) == false) { - if (ObjectVT.isVector()) { + if (Ty->isAggregateType()) { + SmallVector vtparts; + SmallVector offsets; + + ComputeValueVTs(*this, Ty, vtparts, &offsets, 0); + assert(vtparts.size() > 0 && "empty aggregate type not expected"); + bool aggregateIsPacked = false; + if (StructType *STy = llvm::dyn_cast(Ty)) + aggregateIsPacked = STy->isPacked(); + + SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); + for (unsigned parti = 0, parte = vtparts.size(); parti != parte; + ++parti) { + EVT partVT = vtparts[parti]; + Value *srcValue = Constant::getNullValue( + PointerType::get(partVT.getTypeForEVT(F->getContext()), + llvm::ADDRESS_SPACE_PARAM)); + SDValue srcAddr = + DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, + DAG.getConstant(offsets[parti], getPointerTy())); + unsigned partAlign = + aggregateIsPacked ? 1 + : TD->getABITypeAlignment( + partVT.getTypeForEVT(F->getContext())); + SDValue p = DAG.getLoad(partVT, dl, Root, srcAddr, + MachinePointerInfo(srcValue), false, false, + true, partAlign); + if (p.getNode()) + p.getNode()->setIROrder(idx + 1); + InVals.push_back(p); + ++InsIdx; + } + if (vtparts.size() > 0) + --InsIdx; + continue; + } + if (Ty->isVectorTy()) { + EVT ObjectVT = getValueType(Ty); + SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); unsigned NumElts = ObjectVT.getVectorNumElements(); + assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts && + "Vector was not scalarized"); + unsigned Ofst = 0; EVT EltVT = ObjectVT.getVectorElementType(); - unsigned Offset = 0; - for (unsigned vi = 0; vi < NumElts; ++vi) { - SDValue A = getParamSymbol(DAG, idx, getPointerTy()); - SDValue B = DAG.getIntPtrConstant(Offset); - SDValue Addr = DAG.getNode(ISD::ADD, dl, getPointerTy(), - //getParamSymbol(DAG, idx, EltVT), - //DAG.getConstant(Offset, getPointerTy())); - A, B); + + // V1 load + // f32 = load ... + if (NumElts == 1) { + // We only have one element, so just directly load it Value *SrcValue = Constant::getNullValue(PointerType::get( EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); - SDValue Ld = DAG.getLoad( - EltVT, dl, Root, Addr, MachinePointerInfo(SrcValue), false, false, - false, + SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, + DAG.getConstant(Ofst, getPointerTy())); + SDValue P = DAG.getLoad( + EltVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false, + false, true, TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext()))); - Offset += EltVT.getStoreSizeInBits() / 8; - InVals.push_back(Ld); + if (P.getNode()) + P.getNode()->setIROrder(idx + 1); + + InVals.push_back(P); + Ofst += TD->getTypeAllocSize(EltVT.getTypeForEVT(F->getContext())); + ++InsIdx; + } else if (NumElts == 2) { + // V2 load + // f32,f32 = load ... + EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, 2); + Value *SrcValue = Constant::getNullValue(PointerType::get( + VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); + SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, + DAG.getConstant(Ofst, getPointerTy())); + SDValue P = DAG.getLoad( + VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false, + false, true, + TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); + if (P.getNode()) + P.getNode()->setIROrder(idx + 1); + + SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, + DAG.getIntPtrConstant(0)); + SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, + DAG.getIntPtrConstant(1)); + InVals.push_back(Elt0); + InVals.push_back(Elt1); + Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); + InsIdx += 2; + } else { + // V4 loads + // We have at least 4 elements (<3 x Ty> expands to 4 elements) and + // the + // vector will be expanded to a power of 2 elements, so we know we can + // always round up to the next multiple of 4 when creating the vector + // loads. + // e.g. 4 elem => 1 ld.v4 + // 6 elem => 2 ld.v4 + // 8 elem => 2 ld.v4 + // 11 elem => 3 ld.v4 + unsigned VecSize = 4; + if (EltVT.getSizeInBits() == 64) { + VecSize = 2; + } + EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize); + for (unsigned i = 0; i < NumElts; i += VecSize) { + Value *SrcValue = Constant::getNullValue( + PointerType::get(VecVT.getTypeForEVT(F->getContext()), + llvm::ADDRESS_SPACE_PARAM)); + SDValue SrcAddr = + DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, + DAG.getConstant(Ofst, getPointerTy())); + SDValue P = DAG.getLoad( + VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false, + false, true, + TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); + if (P.getNode()) + P.getNode()->setIROrder(idx + 1); + + for (unsigned j = 0; j < VecSize; ++j) { + if (i + j >= NumElts) + break; + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, + DAG.getIntPtrConstant(j)); + InVals.push_back(Elt); + } + Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); + InsIdx += VecSize; + } } + + if (NumElts > 0) + --InsIdx; continue; } - // A plain scalar. - if (isABI || isKernel) { - // If ABI, load from the param symbol - SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); - // Conjure up a value that we can get the address space from. - // FIXME: Using a constant here is a hack. - Value *srcValue = Constant::getNullValue( - PointerType::get(ObjectVT.getTypeForEVT(F->getContext()), - llvm::ADDRESS_SPACE_PARAM)); - SDValue p = DAG.getLoad( - ObjectVT, dl, Root, Arg, MachinePointerInfo(srcValue), false, false, - false, - TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); - if (p.getNode()) - p.getNode()->setIROrder(idx + 1); - InVals.push_back(p); - } else { - // If no ABI, just move the param symbol - SDValue Arg = getParamSymbol(DAG, idx, ObjectVT); - SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); - if (p.getNode()) - p.getNode()->setIROrder(idx + 1); - InVals.push_back(p); - } + EVT ObjectVT = getValueType(Ty); + assert(ObjectVT == Ins[InsIdx].VT && + "Ins type did not match function type"); + // If ABI, load from the param symbol + SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); + Value *srcValue = Constant::getNullValue(PointerType::get( + ObjectVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); + SDValue p = DAG.getLoad( + ObjectVT, dl, Root, Arg, MachinePointerInfo(srcValue), false, false, + true, + TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); + if (p.getNode()) + p.getNode()->setIROrder(idx + 1); + InVals.push_back(p); continue; } // Param has ByVal attribute - if (isABI || isKernel) { - // Return MoveParam(param symbol). - // Ideally, the param symbol can be returned directly, - // but when SDNode builder decides to use it in a CopyToReg(), - // machine instruction fails because TargetExternalSymbol - // (not lowered) is target dependent, and CopyToReg assumes - // the source is lowered. - SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); - SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); - if (p.getNode()) - p.getNode()->setIROrder(idx + 1); - if (isKernel) - InVals.push_back(p); - else { - SDValue p2 = DAG.getNode( - ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT, - DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), p); - InVals.push_back(p2); - } - } else { - // Have to move a set of param symbols to registers and - // store them locally and return the local pointer in InVals - const PointerType *elemPtrType = dyn_cast(argTypes[i]); - assert(elemPtrType && "Byval parameter should be a pointer type"); - Type *elemType = elemPtrType->getElementType(); - // Compute the constituent parts - SmallVector vtparts; - SmallVector offsets; - ComputeValueVTs(*this, elemType, vtparts, &offsets, 0); - unsigned totalsize = 0; - for (unsigned j = 0, je = vtparts.size(); j != je; ++j) - totalsize += vtparts[j].getStoreSizeInBits(); - SDValue localcopy = DAG.getFrameIndex( - MF.getFrameInfo()->CreateStackObject(totalsize / 8, 16, false), - getPointerTy()); - unsigned sizesofar = 0; - std::vector theChains; - for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { - unsigned numElems = 1; - if (vtparts[j].isVector()) - numElems = vtparts[j].getVectorNumElements(); - for (unsigned k = 0, ke = numElems; k != ke; ++k) { - EVT tmpvt = vtparts[j]; - if (tmpvt.isVector()) - tmpvt = tmpvt.getVectorElementType(); - SDValue arg = DAG.getNode(NVPTXISD::MoveParam, dl, tmpvt, - getParamSymbol(DAG, idx, tmpvt)); - SDValue addr = - DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy, - DAG.getConstant(sizesofar, getPointerTy())); - theChains.push_back(DAG.getStore( - Chain, dl, arg, addr, MachinePointerInfo(), false, false, 0)); - sizesofar += tmpvt.getStoreSizeInBits() / 8; - ++idx; - } - } - --idx; - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &theChains[0], - theChains.size()); - InVals.push_back(localcopy); + // Return MoveParam(param symbol). + // Ideally, the param symbol can be returned directly, + // but when SDNode builder decides to use it in a CopyToReg(), + // machine instruction fails because TargetExternalSymbol + // (not lowered) is target dependent, and CopyToReg assumes + // the source is lowered. + EVT ObjectVT = getValueType(Ty); + assert(ObjectVT == Ins[InsIdx].VT && + "Ins type did not match function type"); + SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); + SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); + if (p.getNode()) + p.getNode()->setIROrder(idx + 1); + if (isKernel) + InVals.push_back(p); + else { + SDValue p2 = DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT, + DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), p); + InVals.push_back(p2); } } // Clang will check explicit VarArg and issue error if any. However, Clang // will let code with - // implicit var arg like f() pass. + // implicit var arg like f() pass. See bug 617733. // We treat this case as if the arg list is empty. - //if (F.isVarArg()) { + // if (F.isVarArg()) { // assert(0 && "VarArg not supported yet!"); //} @@ -1250,6 +1337,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( return Chain; } + SDValue NVPTXTargetLowering::LowerReturn( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, -- cgit v1.1 From bc48ce87ef608730616c3250b18c013b1b4a39fc Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Fri, 28 Jun 2013 17:57:55 +0000 Subject: [NVPTX] Add support for vectorized function return values git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185173 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXISelLowering.cpp | 164 +++++++++++++++++++++++++++------ 1 file changed, 137 insertions(+), 27 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 42bfab1..9679b05 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -1338,37 +1338,147 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( } -SDValue NVPTXTargetLowering::LowerReturn( - SDValue Chain, CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, SDLoc dl, - SelectionDAG &DAG) const { +SDValue +NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + SDLoc dl, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + const Function *F = MF.getFunction(); + const Type *RetTy = F->getReturnType(); + const DataLayout *TD = getDataLayout(); bool isABI = (nvptxSubtarget.getSmVersion() >= 20); + assert(isABI && "Non-ABI compilation is not supported"); + if (!isABI) + return Chain; - unsigned sizesofar = 0; - unsigned idx = 0; - for (unsigned i = 0, e = Outs.size(); i != e; ++i) { - SDValue theVal = OutVals[i]; - EVT theValType = theVal.getValueType(); - unsigned numElems = 1; - if (theValType.isVector()) - numElems = theValType.getVectorNumElements(); - for (unsigned j = 0, je = numElems; j != je; ++j) { - SDValue tmpval = theVal; - if (theValType.isVector()) - tmpval = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - theValType.getVectorElementType(), tmpval, - DAG.getIntPtrConstant(j)); - Chain = DAG.getNode( - isABI ? NVPTXISD::StoreRetval : NVPTXISD::MoveToRetval, dl, - MVT::Other, Chain, DAG.getConstant(isABI ? sizesofar : idx, MVT::i32), - tmpval); + if (const VectorType *VTy = dyn_cast(RetTy)) { + // If we have a vector type, the OutVals array will be the scalarized + // components and we have combine them into 1 or more vector stores. + unsigned NumElts = VTy->getNumElements(); + assert(NumElts == Outs.size() && "Bad scalarization of return value"); + + // V1 store + if (NumElts == 1) { + SDValue StoreVal = OutVals[0]; + // We only have one element, so just directly store it + if (StoreVal.getValueType().getSizeInBits() < 8) + StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, StoreVal); + Chain = DAG.getNode(NVPTXISD::StoreRetval, dl, MVT::Other, Chain, + DAG.getConstant(0, MVT::i32), StoreVal); + } else if (NumElts == 2) { + // V2 store + SDValue StoreVal0 = OutVals[0]; + SDValue StoreVal1 = OutVals[1]; + + if (StoreVal0.getValueType().getSizeInBits() < 8) { + StoreVal0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, StoreVal0); + StoreVal1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, StoreVal1); + } + + Chain = DAG.getNode(NVPTXISD::StoreRetvalV2, dl, MVT::Other, Chain, + DAG.getConstant(0, MVT::i32), StoreVal0, StoreVal1); + } else { + // V4 stores + // We have at least 4 elements (<3 x Ty> expands to 4 elements) and the + // vector will be expanded to a power of 2 elements, so we know we can + // always round up to the next multiple of 4 when creating the vector + // stores. + // e.g. 4 elem => 1 st.v4 + // 6 elem => 2 st.v4 + // 8 elem => 2 st.v4 + // 11 elem => 3 st.v4 + + unsigned VecSize = 4; + if (OutVals[0].getValueType().getSizeInBits() == 64) + VecSize = 2; + + unsigned Offset = 0; + + EVT VecVT = + EVT::getVectorVT(F->getContext(), OutVals[0].getValueType(), VecSize); + unsigned PerStoreOffset = + TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); + + bool Extend = false; + if (OutVals[0].getValueType().getSizeInBits() < 8) + Extend = true; + + for (unsigned i = 0; i < NumElts; i += VecSize) { + // Get values + SDValue StoreVal; + SmallVector Ops; + Ops.push_back(Chain); + Ops.push_back(DAG.getConstant(Offset, MVT::i32)); + unsigned Opc = NVPTXISD::StoreRetvalV2; + EVT ExtendedVT = (Extend) ? MVT::i8 : OutVals[0].getValueType(); + + StoreVal = OutVals[i]; + if (Extend) + StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, StoreVal); + Ops.push_back(StoreVal); + + if (i + 1 < NumElts) { + StoreVal = OutVals[i + 1]; + if (Extend) + StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, StoreVal); + } else { + StoreVal = DAG.getUNDEF(ExtendedVT); + } + Ops.push_back(StoreVal); + + if (VecSize == 4) { + Opc = NVPTXISD::StoreRetvalV4; + if (i + 2 < NumElts) { + StoreVal = OutVals[i + 2]; + if (Extend) + StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, StoreVal); + } else { + StoreVal = DAG.getUNDEF(ExtendedVT); + } + Ops.push_back(StoreVal); + + if (i + 3 < NumElts) { + StoreVal = OutVals[i + 3]; + if (Extend) + StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, StoreVal); + } else { + StoreVal = DAG.getUNDEF(ExtendedVT); + } + Ops.push_back(StoreVal); + } + + Chain = DAG.getNode(Opc, dl, MVT::Other, &Ops[0], Ops.size()); + Offset += PerStoreOffset; + } + } + } else { + unsigned sizesofar = 0; + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + SDValue theVal = OutVals[i]; + EVT theValType = theVal.getValueType(); + unsigned numElems = 1; if (theValType.isVector()) - sizesofar += theValType.getVectorElementType().getStoreSizeInBits() / 8; - else - sizesofar += theValType.getStoreSizeInBits() / 8; - ++idx; + numElems = theValType.getVectorNumElements(); + for (unsigned j = 0, je = numElems; j != je; ++j) { + SDValue tmpval = theVal; + if (theValType.isVector()) + tmpval = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + theValType.getVectorElementType(), tmpval, + DAG.getIntPtrConstant(j)); + EVT theStoreType = tmpval.getValueType(); + if (theStoreType.getSizeInBits() < 8) + tmpval = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, tmpval); + Chain = DAG.getNode(NVPTXISD::StoreRetval, dl, MVT::Other, Chain, + DAG.getConstant(sizesofar, MVT::i32), tmpval); + if (theValType.isVector()) + sizesofar += + theValType.getVectorElementType().getStoreSizeInBits() / 8; + else + sizesofar += theValType.getStoreSizeInBits() / 8; + } } } -- cgit v1.1 From 1c07dae9fcd04469779edf7b86fef37fecc9466c Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Fri, 28 Jun 2013 17:57:59 +0000 Subject: [NVPTX] Remove i8 register class. PTX support for i8 (.b8, .u8, .s8) is rather poor and we're better off just ignoring it and letting LLVM expand all i8 ops out to i16. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185174 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 3 - lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 413 +++++++++++++- lib/Target/NVPTX/NVPTXISelDAGToDAG.h | 5 +- lib/Target/NVPTX/NVPTXISelLowering.cpp | 995 ++++++++++++++++++++++++--------- lib/Target/NVPTX/NVPTXISelLowering.h | 30 +- lib/Target/NVPTX/NVPTXInstrInfo.cpp | 3 - lib/Target/NVPTX/NVPTXInstrInfo.td | 609 ++------------------ lib/Target/NVPTX/NVPTXIntrinsics.td | 36 +- lib/Target/NVPTX/NVPTXRegisterInfo.cpp | 6 - lib/Target/NVPTX/NVPTXRegisterInfo.td | 2 - 10 files changed, 1243 insertions(+), 859 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 84b0884..9188262 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -2016,7 +2016,6 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) { case NVPTX::CallArgI32: case NVPTX::CallArgI32imm: case NVPTX::CallArgI64: - case NVPTX::CallArgI8: case NVPTX::CallArgParam: case NVPTX::CallVoidInst: case NVPTX::CallVoidInstReg: @@ -2050,7 +2049,6 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) { case NVPTX::LastCallArgI32: case NVPTX::LastCallArgI32imm: case NVPTX::LastCallArgI64: - case NVPTX::LastCallArgI8: case NVPTX::LastCallArgParam: case NVPTX::LoadParamMemF32: case NVPTX::LoadParamMemF64: @@ -2063,7 +2061,6 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) { case NVPTX::LoadParamRegI16: case NVPTX::LoadParamRegI32: case NVPTX::LoadParamRegI64: - case NVPTX::LoadParamRegI8: case NVPTX::PrototypeInst: case NVPTX::DBG_VALUE: return true; diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index ac6dbb9..7a0a59f 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -116,6 +116,23 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) { case NVPTXISD::StoreV4: ResNode = SelectStoreVector(N); break; + case NVPTXISD::LoadParam: + case NVPTXISD::LoadParamV2: + case NVPTXISD::LoadParamV4: + ResNode = SelectLoadParam(N); + break; + case NVPTXISD::StoreRetval: + case NVPTXISD::StoreRetvalV2: + case NVPTXISD::StoreRetvalV4: + ResNode = SelectStoreRetval(N); + break; + case NVPTXISD::StoreParam: + case NVPTXISD::StoreParamV2: + case NVPTXISD::StoreParamV4: + case NVPTXISD::StoreParamS32: + case NVPTXISD::StoreParamU32: + ResNode = SelectStoreParam(N); + break; default: break; } @@ -771,7 +788,9 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { SDLoc DL(N); SDNode *LD; - EVT RetVT = N->getValueType(0); + MemSDNode *Mem = cast(N); + + EVT RetVT = Mem->getMemoryVT().getVectorElementType(); // Select opcode if (Subtarget.is64Bit()) { @@ -1571,6 +1590,398 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { return ST; } +SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) { + SDValue Chain = Node->getOperand(0); + SDValue Offset = Node->getOperand(2); + SDValue Flag = Node->getOperand(3); + SDLoc DL(Node); + MemSDNode *Mem = cast(Node); + + unsigned VecSize; + switch (Node->getOpcode()) { + default: + return NULL; + case NVPTXISD::LoadParam: + VecSize = 1; + break; + case NVPTXISD::LoadParamV2: + VecSize = 2; + break; + case NVPTXISD::LoadParamV4: + VecSize = 4; + break; + } + + EVT EltVT = Node->getValueType(0); + EVT MemVT = Mem->getMemoryVT(); + + unsigned Opc = 0; + + switch (VecSize) { + default: + return NULL; + case 1: + switch (MemVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i1: + Opc = NVPTX::LoadParamMemI8; + break; + case MVT::i8: + Opc = NVPTX::LoadParamMemI8; + break; + case MVT::i16: + Opc = NVPTX::LoadParamMemI16; + break; + case MVT::i32: + Opc = NVPTX::LoadParamMemI32; + break; + case MVT::i64: + Opc = NVPTX::LoadParamMemI64; + break; + case MVT::f32: + Opc = NVPTX::LoadParamMemF32; + break; + case MVT::f64: + Opc = NVPTX::LoadParamMemF64; + break; + } + break; + case 2: + switch (MemVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i1: + Opc = NVPTX::LoadParamMemV2I8; + break; + case MVT::i8: + Opc = NVPTX::LoadParamMemV2I8; + break; + case MVT::i16: + Opc = NVPTX::LoadParamMemV2I16; + break; + case MVT::i32: + Opc = NVPTX::LoadParamMemV2I32; + break; + case MVT::i64: + Opc = NVPTX::LoadParamMemV2I64; + break; + case MVT::f32: + Opc = NVPTX::LoadParamMemV2F32; + break; + case MVT::f64: + Opc = NVPTX::LoadParamMemV2F64; + break; + } + break; + case 4: + switch (MemVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i1: + Opc = NVPTX::LoadParamMemV4I8; + break; + case MVT::i8: + Opc = NVPTX::LoadParamMemV4I8; + break; + case MVT::i16: + Opc = NVPTX::LoadParamMemV4I16; + break; + case MVT::i32: + Opc = NVPTX::LoadParamMemV4I32; + break; + case MVT::f32: + Opc = NVPTX::LoadParamMemV4F32; + break; + } + break; + } + + SDVTList VTs; + if (VecSize == 1) { + VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue); + } else if (VecSize == 2) { + VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue); + } else { + EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue }; + VTs = CurDAG->getVTList(&EVTs[0], 5); + } + + unsigned OffsetVal = cast(Offset)->getZExtValue(); + + SmallVector Ops; + Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32)); + Ops.push_back(Chain); + Ops.push_back(Flag); + + SDNode *Ret = + CurDAG->getMachineNode(Opc, DL, Node->getVTList(), Ops); + return Ret; +} + +SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) { + SDLoc DL(N); + SDValue Chain = N->getOperand(0); + SDValue Offset = N->getOperand(1); + unsigned OffsetVal = cast(Offset)->getZExtValue(); + MemSDNode *Mem = cast(N); + + // How many elements do we have? + unsigned NumElts = 1; + switch (N->getOpcode()) { + default: + return NULL; + case NVPTXISD::StoreRetval: + NumElts = 1; + break; + case NVPTXISD::StoreRetvalV2: + NumElts = 2; + break; + case NVPTXISD::StoreRetvalV4: + NumElts = 4; + break; + } + + // Build vector of operands + SmallVector Ops; + for (unsigned i = 0; i < NumElts; ++i) + Ops.push_back(N->getOperand(i + 2)); + Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32)); + Ops.push_back(Chain); + + // Determine target opcode + // If we have an i1, use an 8-bit store. The lowering code in + // NVPTXISelLowering will have already emitted an upcast. + unsigned Opcode = 0; + switch (NumElts) { + default: + return NULL; + case 1: + switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i1: + Opcode = NVPTX::StoreRetvalI8; + break; + case MVT::i8: + Opcode = NVPTX::StoreRetvalI8; + break; + case MVT::i16: + Opcode = NVPTX::StoreRetvalI16; + break; + case MVT::i32: + Opcode = NVPTX::StoreRetvalI32; + break; + case MVT::i64: + Opcode = NVPTX::StoreRetvalI64; + break; + case MVT::f32: + Opcode = NVPTX::StoreRetvalF32; + break; + case MVT::f64: + Opcode = NVPTX::StoreRetvalF64; + break; + } + break; + case 2: + switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i1: + Opcode = NVPTX::StoreRetvalV2I8; + break; + case MVT::i8: + Opcode = NVPTX::StoreRetvalV2I8; + break; + case MVT::i16: + Opcode = NVPTX::StoreRetvalV2I16; + break; + case MVT::i32: + Opcode = NVPTX::StoreRetvalV2I32; + break; + case MVT::i64: + Opcode = NVPTX::StoreRetvalV2I64; + break; + case MVT::f32: + Opcode = NVPTX::StoreRetvalV2F32; + break; + case MVT::f64: + Opcode = NVPTX::StoreRetvalV2F64; + break; + } + break; + case 4: + switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i1: + Opcode = NVPTX::StoreRetvalV4I8; + break; + case MVT::i8: + Opcode = NVPTX::StoreRetvalV4I8; + break; + case MVT::i16: + Opcode = NVPTX::StoreRetvalV4I16; + break; + case MVT::i32: + Opcode = NVPTX::StoreRetvalV4I32; + break; + case MVT::f32: + Opcode = NVPTX::StoreRetvalV4F32; + break; + } + break; + } + + SDNode *Ret = + CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops); + MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); + MemRefs0[0] = cast(N)->getMemOperand(); + cast(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1); + + return Ret; +} + +SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) { + SDLoc DL(N); + SDValue Chain = N->getOperand(0); + SDValue Param = N->getOperand(1); + unsigned ParamVal = cast(Param)->getZExtValue(); + SDValue Offset = N->getOperand(2); + unsigned OffsetVal = cast(Offset)->getZExtValue(); + MemSDNode *Mem = cast(N); + SDValue Flag = N->getOperand(N->getNumOperands() - 1); + + // How many elements do we have? + unsigned NumElts = 1; + switch (N->getOpcode()) { + default: + return NULL; + case NVPTXISD::StoreParamU32: + case NVPTXISD::StoreParamS32: + case NVPTXISD::StoreParam: + NumElts = 1; + break; + case NVPTXISD::StoreParamV2: + NumElts = 2; + break; + case NVPTXISD::StoreParamV4: + NumElts = 4; + break; + } + + // Build vector of operands + SmallVector Ops; + for (unsigned i = 0; i < NumElts; ++i) + Ops.push_back(N->getOperand(i + 3)); + Ops.push_back(CurDAG->getTargetConstant(ParamVal, MVT::i32)); + Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32)); + Ops.push_back(Chain); + Ops.push_back(Flag); + + // Determine target opcode + // If we have an i1, use an 8-bit store. The lowering code in + // NVPTXISelLowering will have already emitted an upcast. + unsigned Opcode = 0; + switch (N->getOpcode()) { + default: + switch (NumElts) { + default: + return NULL; + case 1: + switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i1: + Opcode = NVPTX::StoreParamI8; + break; + case MVT::i8: + Opcode = NVPTX::StoreParamI8; + break; + case MVT::i16: + Opcode = NVPTX::StoreParamI16; + break; + case MVT::i32: + Opcode = NVPTX::StoreParamI32; + break; + case MVT::i64: + Opcode = NVPTX::StoreParamI64; + break; + case MVT::f32: + Opcode = NVPTX::StoreParamF32; + break; + case MVT::f64: + Opcode = NVPTX::StoreParamF64; + break; + } + break; + case 2: + switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i1: + Opcode = NVPTX::StoreParamV2I8; + break; + case MVT::i8: + Opcode = NVPTX::StoreParamV2I8; + break; + case MVT::i16: + Opcode = NVPTX::StoreParamV2I16; + break; + case MVT::i32: + Opcode = NVPTX::StoreParamV2I32; + break; + case MVT::i64: + Opcode = NVPTX::StoreParamV2I64; + break; + case MVT::f32: + Opcode = NVPTX::StoreParamV2F32; + break; + case MVT::f64: + Opcode = NVPTX::StoreParamV2F64; + break; + } + break; + case 4: + switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i1: + Opcode = NVPTX::StoreParamV4I8; + break; + case MVT::i8: + Opcode = NVPTX::StoreParamV4I8; + break; + case MVT::i16: + Opcode = NVPTX::StoreParamV4I16; + break; + case MVT::i32: + Opcode = NVPTX::StoreParamV4I32; + break; + case MVT::f32: + Opcode = NVPTX::StoreParamV4F32; + break; + } + break; + } + break; + case NVPTXISD::StoreParamU32: + Opcode = NVPTX::StoreParamU32I16; + break; + case NVPTXISD::StoreParamS32: + Opcode = NVPTX::StoreParamS32I16; + break; + } + + SDNode *Ret = + CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); + MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); + MemRefs0[0] = cast(N)->getMemOperand(); + cast(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1); + + return Ret; +} + // SelectDirectAddr - Match a direct address for DAG. // A direct address could be a globaladdress or externalsymbol. bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index ed16d44..428e7b2 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -80,7 +80,10 @@ private: SDNode *SelectLDGLDUVector(SDNode *N); SDNode *SelectStore(SDNode *N); SDNode *SelectStoreVector(SDNode *N); - + SDNode *SelectLoadParam(SDNode *N); + SDNode *SelectStoreRetval(SDNode *N); + SDNode *SelectStoreParam(SDNode *N); + inline SDValue getI32Imm(unsigned Imm) { return CurDAG->getTargetConstant(Imm, MVT::i32); } diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 9679b05..0396a64 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -51,6 +51,8 @@ static bool IsPTXVectorType(MVT VT) { switch (VT.SimpleTy) { default: return false; + case MVT::v2i1: + case MVT::v4i1: case MVT::v2i8: case MVT::v4i8: case MVT::v2i16: @@ -65,6 +67,37 @@ static bool IsPTXVectorType(MVT VT) { } } +/// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive +/// EVTs that compose it. Unlike ComputeValueVTs, this will break apart vectors +/// into their primitive components. +/// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the +/// same number of types as the Ins/Outs arrays in LowerFormalArguments, +/// LowerCall, and LowerReturn. +static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty, + SmallVectorImpl &ValueVTs, + SmallVectorImpl *Offsets = 0, + uint64_t StartingOffset = 0) { + SmallVector TempVTs; + SmallVector TempOffsets; + + ComputeValueVTs(TLI, Ty, TempVTs, &TempOffsets, StartingOffset); + for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) { + EVT VT = TempVTs[i]; + uint64_t Off = TempOffsets[i]; + if (VT.isVector()) + for (unsigned j = 0, je = VT.getVectorNumElements(); j != je; ++j) { + ValueVTs.push_back(VT.getVectorElementType()); + if (Offsets) + Offsets->push_back(Off+j*VT.getVectorElementType().getStoreSize()); + } + else { + ValueVTs.push_back(VT); + if (Offsets) + Offsets->push_back(Off); + } + } +} + // NVPTXTargetLowering Constructor. NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) : TargetLowering(TM, new NVPTXTargetObjectFile()), nvTM(&TM), @@ -90,7 +123,6 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) setSchedulingPreference(Sched::Source); addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass); - addRegisterClass(MVT::i8, &NVPTX::Int8RegsRegClass); addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass); addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass); addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass); @@ -181,6 +213,9 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) } } + // Custom handling for i8 intrinsics + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); + // Now deduce the information based on the above mentioned // actions computeRegisterProperties(); @@ -293,6 +328,7 @@ NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op); } +/* std::string NVPTXTargetLowering::getPrototype( Type *retTy, const ArgListTy &Args, const SmallVectorImpl &Outs, unsigned retAlignment) const { @@ -442,6 +478,152 @@ std::string NVPTXTargetLowering::getPrototype( } O << ");"; return O.str(); +}*/ + +std::string +NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, + const SmallVectorImpl &Outs, + unsigned retAlignment, + const ImmutableCallSite *CS) const { + + bool isABI = (nvptxSubtarget.getSmVersion() >= 20); + assert(isABI && "Non-ABI compilation is not supported"); + if (!isABI) + return ""; + + std::stringstream O; + O << "prototype_" << uniqueCallSite << " : .callprototype "; + + if (retTy->getTypeID() == Type::VoidTyID) { + O << "()"; + } else { + O << "("; + if (retTy->isPrimitiveType() || retTy->isIntegerTy()) { + unsigned size = 0; + if (const IntegerType *ITy = dyn_cast(retTy)) { + size = ITy->getBitWidth(); + if (size < 32) + size = 32; + } else { + assert(retTy->isFloatingPointTy() && + "Floating point type expected here"); + size = retTy->getPrimitiveSizeInBits(); + } + + O << ".param .b" << size << " _"; + } else if (isa(retTy)) { + O << ".param .b" << getPointerTy().getSizeInBits() << " _"; + } else { + if ((retTy->getTypeID() == Type::StructTyID) || isa(retTy)) { + SmallVector vtparts; + ComputeValueVTs(*this, retTy, vtparts); + unsigned totalsz = 0; + for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { + unsigned elems = 1; + EVT elemtype = vtparts[i]; + if (vtparts[i].isVector()) { + elems = vtparts[i].getVectorNumElements(); + elemtype = vtparts[i].getVectorElementType(); + } + // TODO: no need to loop + for (unsigned j = 0, je = elems; j != je; ++j) { + unsigned sz = elemtype.getSizeInBits(); + if (elemtype.isInteger() && (sz < 8)) + sz = 8; + totalsz += sz / 8; + } + } + O << ".param .align " << retAlignment << " .b8 _[" << totalsz << "]"; + } else { + assert(false && "Unknown return type"); + } + } + O << ") "; + } + O << "_ ("; + + bool first = true; + MVT thePointerTy = getPointerTy(); + + unsigned OIdx = 0; + for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { + Type *Ty = Args[i].Ty; + if (!first) { + O << ", "; + } + first = false; + + if (Outs[OIdx].Flags.isByVal() == false) { + if (Ty->isAggregateType() || Ty->isVectorTy()) { + unsigned align = 0; + const CallInst *CallI = cast(CS->getInstruction()); + const DataLayout *TD = getDataLayout(); + // +1 because index 0 is reserved for return type alignment + if (!llvm::getAlign(*CallI, i + 1, align)) + align = TD->getABITypeAlignment(Ty); + unsigned sz = TD->getTypeAllocSize(Ty); + O << ".param .align " << align << " .b8 "; + O << "_"; + O << "[" << sz << "]"; + // update the index for Outs + SmallVector vtparts; + ComputeValueVTs(*this, Ty, vtparts); + if (unsigned len = vtparts.size()) + OIdx += len - 1; + continue; + } + assert(getValueType(Ty) == Outs[OIdx].VT && + "type mismatch between callee prototype and arguments"); + // scalar type + unsigned sz = 0; + if (isa(Ty)) { + sz = cast(Ty)->getBitWidth(); + if (sz < 32) + sz = 32; + } else if (isa(Ty)) + sz = thePointerTy.getSizeInBits(); + else + sz = Ty->getPrimitiveSizeInBits(); + O << ".param .b" << sz << " "; + O << "_"; + continue; + } + const PointerType *PTy = dyn_cast(Ty); + assert(PTy && "Param with byval attribute should be a pointer type"); + Type *ETy = PTy->getElementType(); + + unsigned align = Outs[OIdx].Flags.getByValAlign(); + unsigned sz = getDataLayout()->getTypeAllocSize(ETy); + O << ".param .align " << align << " .b8 "; + O << "_"; + O << "[" << sz << "]"; + } + O << ");"; + return O.str(); +} + +unsigned +NVPTXTargetLowering::getArgumentAlignment(SDValue Callee, + const ImmutableCallSite *CS, + Type *Ty, + unsigned Idx) const { + const DataLayout *TD = getDataLayout(); + unsigned align = 0; + GlobalAddressSDNode *Func = dyn_cast(Callee.getNode()); + + if (Func) { // direct call + assert(CS->getCalledFunction() && + "direct call cannot find callee"); + if (!llvm::getAlign(*(CS->getCalledFunction()), Idx, align)) + align = TD->getABITypeAlignment(Ty); + } + else { // indirect call + const CallInst *CallI = dyn_cast(CS->getInstruction()); + if (!llvm::getAlign(*CallI, Idx, align)) + align = TD->getABITypeAlignment(Ty); + } + + return align; } SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, @@ -459,54 +641,257 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, ImmutableCallSite *CS = CLI.CS; bool isABI = (nvptxSubtarget.getSmVersion() >= 20); + assert(isABI && "Non-ABI compilation is not supported"); + if (!isABI) + return Chain; + const DataLayout *TD = getDataLayout(); + MachineFunction &MF = DAG.getMachineFunction(); + const Function *F = MF.getFunction(); + const TargetLowering *TLI = nvTM->getTargetLowering(); SDValue tempChain = Chain; - Chain = DAG.getCALLSEQ_START(Chain, - DAG.getIntPtrConstant(uniqueCallSite, true), - dl); + Chain = + DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(uniqueCallSite, true), + dl); SDValue InFlag = Chain.getValue(1); - assert((Outs.size() == Args.size()) && - "Unexpected number of arguments to function call"); unsigned paramCount = 0; + // Args.size() and Outs.size() need not match. + // Outs.size() will be larger + // * if there is an aggregate argument with multiple fields (each field + // showing up separately in Outs) + // * if there is a vector argument with more than typical vector-length + // elements (generally if more than 4) where each vector element is + // individually present in Outs. + // So a different index should be used for indexing into Outs/OutVals. + // See similar issue in LowerFormalArguments. + unsigned OIdx = 0; // Declare the .params or .reg need to pass values // to the function - for (unsigned i = 0, e = Outs.size(); i != e; ++i) { - EVT VT = Outs[i].VT; + for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { + EVT VT = Outs[OIdx].VT; + Type *Ty = Args[i].Ty; - if (Outs[i].Flags.isByVal() == false) { + if (Outs[OIdx].Flags.isByVal() == false) { + if (Ty->isAggregateType()) { + // aggregate + SmallVector vtparts; + ComputeValueVTs(*this, Ty, vtparts); + + unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1); + // declare .param .align .b8 .param[]; + unsigned sz = TD->getTypeAllocSize(Ty); + SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, MVT::i32), + DAG.getConstant(paramCount, MVT::i32), + DAG.getConstant(sz, MVT::i32), InFlag }; + Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, + DeclareParamOps, 5); + InFlag = Chain.getValue(1); + unsigned curOffset = 0; + for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { + unsigned elems = 1; + EVT elemtype = vtparts[j]; + if (vtparts[j].isVector()) { + elems = vtparts[j].getVectorNumElements(); + elemtype = vtparts[j].getVectorElementType(); + } + for (unsigned k = 0, ke = elems; k != ke; ++k) { + unsigned sz = elemtype.getSizeInBits(); + if (elemtype.isInteger() && (sz < 8)) + sz = 8; + SDValue StVal = OutVals[OIdx]; + if (elemtype.getSizeInBits() < 16) { + StVal = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, StVal); + } + SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue CopyParamOps[] = { Chain, + DAG.getConstant(paramCount, MVT::i32), + DAG.getConstant(curOffset, MVT::i32), + StVal, InFlag }; + Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, + CopyParamVTs, &CopyParamOps[0], 5, + elemtype, MachinePointerInfo()); + InFlag = Chain.getValue(1); + curOffset += sz / 8; + ++OIdx; + } + } + if (vtparts.size() > 0) + --OIdx; + ++paramCount; + continue; + } + if (Ty->isVectorTy()) { + EVT ObjectVT = getValueType(Ty); + unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1); + // declare .param .align .b8 .param[]; + unsigned sz = TD->getTypeAllocSize(Ty); + SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, MVT::i32), + DAG.getConstant(paramCount, MVT::i32), + DAG.getConstant(sz, MVT::i32), InFlag }; + Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, + DeclareParamOps, 5); + InFlag = Chain.getValue(1); + unsigned NumElts = ObjectVT.getVectorNumElements(); + EVT EltVT = ObjectVT.getVectorElementType(); + EVT MemVT = EltVT; + bool NeedExtend = false; + if (EltVT.getSizeInBits() < 16) { + NeedExtend = true; + EltVT = MVT::i16; + } + + // V1 store + if (NumElts == 1) { + SDValue Elt = OutVals[OIdx++]; + if (NeedExtend) + Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt); + + SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue CopyParamOps[] = { Chain, + DAG.getConstant(paramCount, MVT::i32), + DAG.getConstant(0, MVT::i32), Elt, + InFlag }; + Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, + CopyParamVTs, &CopyParamOps[0], 5, + MemVT, MachinePointerInfo()); + InFlag = Chain.getValue(1); + } else if (NumElts == 2) { + SDValue Elt0 = OutVals[OIdx++]; + SDValue Elt1 = OutVals[OIdx++]; + if (NeedExtend) { + Elt0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt0); + Elt1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt1); + } + + SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue CopyParamOps[] = { Chain, + DAG.getConstant(paramCount, MVT::i32), + DAG.getConstant(0, MVT::i32), Elt0, Elt1, + InFlag }; + Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParamV2, dl, + CopyParamVTs, &CopyParamOps[0], 6, + MemVT, MachinePointerInfo()); + InFlag = Chain.getValue(1); + } else { + unsigned curOffset = 0; + // V4 stores + // We have at least 4 elements (<3 x Ty> expands to 4 elements) and + // the + // vector will be expanded to a power of 2 elements, so we know we can + // always round up to the next multiple of 4 when creating the vector + // stores. + // e.g. 4 elem => 1 st.v4 + // 6 elem => 2 st.v4 + // 8 elem => 2 st.v4 + // 11 elem => 3 st.v4 + unsigned VecSize = 4; + if (EltVT.getSizeInBits() == 64) + VecSize = 2; + + // This is potentially only part of a vector, so assume all elements + // are packed together. + unsigned PerStoreOffset = MemVT.getStoreSizeInBits() / 8 * VecSize; + + for (unsigned i = 0; i < NumElts; i += VecSize) { + // Get values + SDValue StoreVal; + SmallVector Ops; + Ops.push_back(Chain); + Ops.push_back(DAG.getConstant(paramCount, MVT::i32)); + Ops.push_back(DAG.getConstant(curOffset, MVT::i32)); + + unsigned Opc = NVPTXISD::StoreParamV2; + + StoreVal = OutVals[OIdx++]; + if (NeedExtend) + StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); + Ops.push_back(StoreVal); + + if (i + 1 < NumElts) { + StoreVal = OutVals[OIdx++]; + if (NeedExtend) + StoreVal = + DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); + } else { + StoreVal = DAG.getUNDEF(EltVT); + } + Ops.push_back(StoreVal); + + if (VecSize == 4) { + Opc = NVPTXISD::StoreParamV4; + if (i + 2 < NumElts) { + StoreVal = OutVals[OIdx++]; + if (NeedExtend) + StoreVal = + DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); + } else { + StoreVal = DAG.getUNDEF(EltVT); + } + Ops.push_back(StoreVal); + + if (i + 3 < NumElts) { + StoreVal = OutVals[OIdx++]; + if (NeedExtend) + StoreVal = + DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); + } else { + StoreVal = DAG.getUNDEF(EltVT); + } + Ops.push_back(StoreVal); + } + + SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, &Ops[0], + Ops.size(), MemVT, + MachinePointerInfo()); + InFlag = Chain.getValue(1); + curOffset += PerStoreOffset; + } + } + ++paramCount; + --OIdx; + continue; + } // Plain scalar // for ABI, declare .param .b .param; - // for nonABI, declare .reg .b .param; - unsigned isReg = 1; - if (isABI) - isReg = 0; unsigned sz = VT.getSizeInBits(); - if (VT.isInteger() && (sz < 32)) - sz = 32; + bool needExtend = false; + if (VT.isInteger()) { + if (sz < 16) + needExtend = true; + if (sz < 32) + sz = 32; + } SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue DeclareParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32), - DAG.getConstant(isReg, MVT::i32), InFlag }; + DAG.getConstant(0, MVT::i32), InFlag }; Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, DeclareParamOps, 5); InFlag = Chain.getValue(1); + SDValue OutV = OutVals[OIdx]; + if (needExtend) { + // zext/sext i1 to i16 + unsigned opc = ISD::ZERO_EXTEND; + if (Outs[OIdx].Flags.isSExt()) + opc = ISD::SIGN_EXTEND; + OutV = DAG.getNode(opc, dl, MVT::i16, OutV); + } SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), - DAG.getConstant(0, MVT::i32), OutVals[i], - InFlag }; + DAG.getConstant(0, MVT::i32), OutV, InFlag }; unsigned opcode = NVPTXISD::StoreParam; - if (isReg) - opcode = NVPTXISD::MoveToParam; - else { - if (Outs[i].Flags.isZExt()) - opcode = NVPTXISD::StoreParamU32; - else if (Outs[i].Flags.isSExt()) - opcode = NVPTXISD::StoreParamS32; - } - Chain = DAG.getNode(opcode, dl, CopyParamVTs, CopyParamOps, 5); + if (Outs[OIdx].Flags.isZExt()) + opcode = NVPTXISD::StoreParamU32; + else if (Outs[OIdx].Flags.isSExt()) + opcode = NVPTXISD::StoreParamS32; + Chain = DAG.getMemIntrinsicNode(opcode, dl, CopyParamVTs, CopyParamOps, 5, + VT, MachinePointerInfo()); InFlag = Chain.getValue(1); ++paramCount; @@ -518,55 +903,20 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, assert(PTy && "Type of a byval parameter should be pointer"); ComputeValueVTs(*this, PTy->getElementType(), vtparts); - if (isABI) { - // declare .param .align 16 .b8 .param[]; - unsigned sz = Outs[i].Flags.getByValSize(); - SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); - // The ByValAlign in the Outs[i].Flags is alway set at this point, so we - // don't need to - // worry about natural alignment or not. See TargetLowering::LowerCallTo() - SDValue DeclareParamOps[] = { - Chain, DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32), - DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32), - InFlag - }; - Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, - DeclareParamOps, 5); - InFlag = Chain.getValue(1); - unsigned curOffset = 0; - for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { - unsigned elems = 1; - EVT elemtype = vtparts[j]; - if (vtparts[j].isVector()) { - elems = vtparts[j].getVectorNumElements(); - elemtype = vtparts[j].getVectorElementType(); - } - for (unsigned k = 0, ke = elems; k != ke; ++k) { - unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 8)) - sz = 8; - SDValue srcAddr = - DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i], - DAG.getConstant(curOffset, getPointerTy())); - SDValue theVal = - DAG.getLoad(elemtype, dl, tempChain, srcAddr, - MachinePointerInfo(), false, false, false, 0); - SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue CopyParamOps[] = { Chain, - DAG.getConstant(paramCount, MVT::i32), - DAG.getConstant(curOffset, MVT::i32), - theVal, InFlag }; - Chain = DAG.getNode(NVPTXISD::StoreParam, dl, CopyParamVTs, - CopyParamOps, 5); - InFlag = Chain.getValue(1); - curOffset += sz / 8; - } - } - ++paramCount; - continue; - } - // Non-abi, struct or vector - // Declare a bunch or .reg .b .param + // declare .param .align .b8 .param[]; + unsigned sz = Outs[OIdx].Flags.getByValSize(); + SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); + // The ByValAlign in the Outs[OIdx].Flags is alway set at this point, + // so we don't need to worry about natural alignment or not. + // See TargetLowering::LowerCallTo(). + SDValue DeclareParamOps[] = { + Chain, DAG.getConstant(Outs[OIdx].Flags.getByValAlign(), MVT::i32), + DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32), + InFlag + }; + Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, + DeclareParamOps, 5); + InFlag = Chain.getValue(1); unsigned curOffset = 0; for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { unsigned elems = 1; @@ -577,107 +927,66 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } for (unsigned k = 0, ke = elems; k != ke; ++k) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 32)) - sz = 32; - SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue DeclareParamOps[] = { Chain, - DAG.getConstant(paramCount, MVT::i32), - DAG.getConstant(sz, MVT::i32), - DAG.getConstant(1, MVT::i32), InFlag }; - Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, - DeclareParamOps, 5); - InFlag = Chain.getValue(1); + if (elemtype.isInteger() && (sz < 8)) + sz = 8; SDValue srcAddr = - DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i], + DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[OIdx], DAG.getConstant(curOffset, getPointerTy())); - SDValue theVal = - DAG.getLoad(elemtype, dl, tempChain, srcAddr, MachinePointerInfo(), - false, false, false, 0); + SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, + MachinePointerInfo(), false, false, false, + 0); + if (elemtype.getSizeInBits() < 16) { + theVal = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, theVal); + } SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), - DAG.getConstant(0, MVT::i32), theVal, + DAG.getConstant(curOffset, MVT::i32), theVal, InFlag }; - Chain = DAG.getNode(NVPTXISD::MoveToParam, dl, CopyParamVTs, - CopyParamOps, 5); + Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs, + CopyParamOps, 5, elemtype, + MachinePointerInfo()); + InFlag = Chain.getValue(1); - ++paramCount; + curOffset += sz / 8; } } + ++paramCount; } GlobalAddressSDNode *Func = dyn_cast(Callee.getNode()); unsigned retAlignment = 0; // Handle Result - unsigned retCount = 0; if (Ins.size() > 0) { SmallVector resvtparts; ComputeValueVTs(*this, retTy, resvtparts); - // Declare one .param .align 16 .b8 func_retval0[] for ABI or - // individual .reg .b func_retval<0..> for non ABI - unsigned resultsz = 0; - for (unsigned i = 0, e = resvtparts.size(); i != e; ++i) { - unsigned elems = 1; - EVT elemtype = resvtparts[i]; - if (resvtparts[i].isVector()) { - elems = resvtparts[i].getVectorNumElements(); - elemtype = resvtparts[i].getVectorElementType(); - } - for (unsigned j = 0, je = elems; j != je; ++j) { - unsigned sz = elemtype.getSizeInBits(); - if (isABI == false) { - if (elemtype.isInteger() && (sz < 32)) - sz = 32; - } else { - if (elemtype.isInteger() && (sz < 8)) - sz = 8; - } - if (isABI == false) { - SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue DeclareRetOps[] = { Chain, DAG.getConstant(2, MVT::i32), - DAG.getConstant(sz, MVT::i32), - DAG.getConstant(retCount, MVT::i32), - InFlag }; - Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, - DeclareRetOps, 5); - InFlag = Chain.getValue(1); - ++retCount; - } - resultsz += sz; - } - } - if (isABI) { - if (retTy->isPrimitiveType() || retTy->isIntegerTy() || - retTy->isPointerTy()) { - // Scalar needs to be at least 32bit wide - if (resultsz < 32) - resultsz = 32; - SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32), - DAG.getConstant(resultsz, MVT::i32), - DAG.getConstant(0, MVT::i32), InFlag }; - Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, - DeclareRetOps, 5); - InFlag = Chain.getValue(1); - } else { - if (Func) { // direct call - if (!llvm::getAlign(*(CS->getCalledFunction()), 0, retAlignment)) - retAlignment = getDataLayout()->getABITypeAlignment(retTy); - } else { // indirect call - const CallInst *CallI = dyn_cast(CS->getInstruction()); - if (!llvm::getAlign(*CallI, 0, retAlignment)) - retAlignment = getDataLayout()->getABITypeAlignment(retTy); - } - SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue DeclareRetOps[] = { Chain, - DAG.getConstant(retAlignment, MVT::i32), - DAG.getConstant(resultsz / 8, MVT::i32), - DAG.getConstant(0, MVT::i32), InFlag }; - Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, - DeclareRetOps, 5); - InFlag = Chain.getValue(1); - } + // Declare + // .param .align 16 .b8 retval0[], or + // .param .b retval0 + unsigned resultsz = TD->getTypeAllocSizeInBits(retTy); + if (retTy->isPrimitiveType() || retTy->isIntegerTy() || + retTy->isPointerTy()) { + // Scalar needs to be at least 32bit wide + if (resultsz < 32) + resultsz = 32; + SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32), + DAG.getConstant(resultsz, MVT::i32), + DAG.getConstant(0, MVT::i32), InFlag }; + Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, + DeclareRetOps, 5); + InFlag = Chain.getValue(1); + } else { + retAlignment = getArgumentAlignment(Callee, CS, retTy, 0); + SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue DeclareRetOps[] = { Chain, + DAG.getConstant(retAlignment, MVT::i32), + DAG.getConstant(resultsz / 8, MVT::i32), + DAG.getConstant(0, MVT::i32), InFlag }; + Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, + DeclareRetOps, 5); + InFlag = Chain.getValue(1); } } @@ -690,7 +999,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // The prototype is embedded in a string and put as the operand for an // INLINEASM SDNode. SDVTList InlineAsmVTs = DAG.getVTList(MVT::Other, MVT::Glue); - std::string proto_string = getPrototype(retTy, Args, Outs, retAlignment); + std::string proto_string = + getPrototype(retTy, Args, Outs, retAlignment, CS); const char *asmstr = nvTM->getManagedStrPool() ->getManagedString(proto_string.c_str())->c_str(); SDValue InlineAsmOps[] = { @@ -703,9 +1013,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Op to just print "call" SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue PrintCallOps[] = { - Chain, - DAG.getConstant(isABI ? ((Ins.size() == 0) ? 0 : 1) : retCount, MVT::i32), - InFlag + Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, MVT::i32), InFlag }; Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall), dl, PrintCallVTs, PrintCallOps, 3); @@ -753,59 +1061,172 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Generate loads from param memory/moves from registers for result if (Ins.size() > 0) { - if (isABI) { - unsigned resoffset = 0; - for (unsigned i = 0, e = Ins.size(); i != e; ++i) { - unsigned sz = Ins[i].VT.getSizeInBits(); - if (Ins[i].VT.isInteger() && (sz < 8)) - sz = 8; - EVT LoadRetVTs[] = { Ins[i].VT, MVT::Other, MVT::Glue }; - SDValue LoadRetOps[] = { Chain, DAG.getConstant(1, MVT::i32), - DAG.getConstant(resoffset, MVT::i32), InFlag }; - SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, LoadRetVTs, - LoadRetOps, array_lengthof(LoadRetOps)); + unsigned resoffset = 0; + if (retTy && retTy->isVectorTy()) { + EVT ObjectVT = getValueType(retTy); + unsigned NumElts = ObjectVT.getVectorNumElements(); + EVT EltVT = ObjectVT.getVectorElementType(); + assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts && + "Vector was not scalarized"); + unsigned sz = EltVT.getSizeInBits(); + bool needTruncate = sz < 16 ? true : false; + + if (NumElts == 1) { + // Just a simple load + std::vector LoadRetVTs; + if (needTruncate) { + // If loading i1 result, generate + // load i16 + // trunc i16 to i1 + LoadRetVTs.push_back(MVT::i16); + } else + LoadRetVTs.push_back(EltVT); + LoadRetVTs.push_back(MVT::Other); + LoadRetVTs.push_back(MVT::Glue); + std::vector LoadRetOps; + LoadRetOps.push_back(Chain); + LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); + LoadRetOps.push_back(DAG.getConstant(0, MVT::i32)); + LoadRetOps.push_back(InFlag); + SDValue retval = DAG.getMemIntrinsicNode( + NVPTXISD::LoadParam, dl, + DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), &LoadRetOps[0], + LoadRetOps.size(), EltVT, MachinePointerInfo()); Chain = retval.getValue(1); InFlag = retval.getValue(2); - InVals.push_back(retval); - resoffset += sz / 8; + SDValue Ret0 = retval; + if (needTruncate) + Ret0 = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Ret0); + InVals.push_back(Ret0); + } else if (NumElts == 2) { + // LoadV2 + std::vector LoadRetVTs; + if (needTruncate) { + // If loading i1 result, generate + // load i16 + // trunc i16 to i1 + LoadRetVTs.push_back(MVT::i16); + LoadRetVTs.push_back(MVT::i16); + } else { + LoadRetVTs.push_back(EltVT); + LoadRetVTs.push_back(EltVT); + } + LoadRetVTs.push_back(MVT::Other); + LoadRetVTs.push_back(MVT::Glue); + std::vector LoadRetOps; + LoadRetOps.push_back(Chain); + LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); + LoadRetOps.push_back(DAG.getConstant(0, MVT::i32)); + LoadRetOps.push_back(InFlag); + SDValue retval = DAG.getMemIntrinsicNode( + NVPTXISD::LoadParamV2, dl, + DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), &LoadRetOps[0], + LoadRetOps.size(), EltVT, MachinePointerInfo()); + Chain = retval.getValue(2); + InFlag = retval.getValue(3); + SDValue Ret0 = retval.getValue(0); + SDValue Ret1 = retval.getValue(1); + if (needTruncate) { + Ret0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret0); + InVals.push_back(Ret0); + Ret1 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret1); + InVals.push_back(Ret1); + } else { + InVals.push_back(Ret0); + InVals.push_back(Ret1); + } + } else { + // Split into N LoadV4 + unsigned Ofst = 0; + unsigned VecSize = 4; + unsigned Opc = NVPTXISD::LoadParamV4; + if (EltVT.getSizeInBits() == 64) { + VecSize = 2; + Opc = NVPTXISD::LoadParamV2; + } + EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize); + for (unsigned i = 0; i < NumElts; i += VecSize) { + SmallVector LoadRetVTs; + if (needTruncate) { + // If loading i1 result, generate + // load i16 + // trunc i16 to i1 + for (unsigned j = 0; j < VecSize; ++j) + LoadRetVTs.push_back(MVT::i16); + } else { + for (unsigned j = 0; j < VecSize; ++j) + LoadRetVTs.push_back(EltVT); + } + LoadRetVTs.push_back(MVT::Other); + LoadRetVTs.push_back(MVT::Glue); + SmallVector LoadRetOps; + LoadRetOps.push_back(Chain); + LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); + LoadRetOps.push_back(DAG.getConstant(Ofst, MVT::i32)); + LoadRetOps.push_back(InFlag); + SDValue retval = DAG.getMemIntrinsicNode( + Opc, dl, DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), + &LoadRetOps[0], LoadRetOps.size(), EltVT, MachinePointerInfo()); + if (VecSize == 2) { + Chain = retval.getValue(2); + InFlag = retval.getValue(3); + } else { + Chain = retval.getValue(4); + InFlag = retval.getValue(5); + } + + for (unsigned j = 0; j < VecSize; ++j) { + if (i + j >= NumElts) + break; + SDValue Elt = retval.getValue(j); + if (needTruncate) + Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); + InVals.push_back(Elt); + } + Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); + } } } else { - SmallVector resvtparts; - ComputeValueVTs(*this, retTy, resvtparts); - - assert(Ins.size() == resvtparts.size() && - "Unexpected number of return values in non-ABI case"); - unsigned paramNum = 0; + SmallVector VTs; + ComputePTXValueVTs(*this, retTy, VTs); + assert(VTs.size() == Ins.size() && "Bad value decomposition"); for (unsigned i = 0, e = Ins.size(); i != e; ++i) { - assert(EVT(Ins[i].VT) == resvtparts[i] && - "Unexpected EVT type in non-ABI case"); - unsigned numelems = 1; - EVT elemtype = Ins[i].VT; - if (Ins[i].VT.isVector()) { - numelems = Ins[i].VT.getVectorNumElements(); - elemtype = Ins[i].VT.getVectorElementType(); - } - std::vector tempRetVals; - for (unsigned j = 0; j < numelems; ++j) { - EVT MoveRetVTs[] = { elemtype, MVT::Other, MVT::Glue }; - SDValue MoveRetOps[] = { Chain, DAG.getConstant(0, MVT::i32), - DAG.getConstant(paramNum, MVT::i32), - InFlag }; - SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, MoveRetVTs, - MoveRetOps, array_lengthof(MoveRetOps)); - Chain = retval.getValue(1); - InFlag = retval.getValue(2); - tempRetVals.push_back(retval); - ++paramNum; - } - if (Ins[i].VT.isVector()) - InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, dl, Ins[i].VT, - &tempRetVals[0], tempRetVals.size())); - else - InVals.push_back(tempRetVals[0]); + unsigned sz = VTs[i].getSizeInBits(); + bool needTruncate = sz < 8 ? true : false; + if (VTs[i].isInteger() && (sz < 8)) + sz = 8; + + SmallVector LoadRetVTs; + if (sz < 16) { + // If loading i1/i8 result, generate + // load i8 (-> i16) + // trunc i16 to i1/i8 + LoadRetVTs.push_back(MVT::i16); + } else + LoadRetVTs.push_back(Ins[i].VT); + LoadRetVTs.push_back(MVT::Other); + LoadRetVTs.push_back(MVT::Glue); + + SmallVector LoadRetOps; + LoadRetOps.push_back(Chain); + LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); + LoadRetOps.push_back(DAG.getConstant(resoffset, MVT::i32)); + LoadRetOps.push_back(InFlag); + SDValue retval = DAG.getMemIntrinsicNode( + NVPTXISD::LoadParam, dl, + DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), &LoadRetOps[0], + LoadRetOps.size(), VTs[i], MachinePointerInfo()); + Chain = retval.getValue(1); + InFlag = retval.getValue(2); + SDValue Ret0 = retval.getValue(0); + if (needTruncate) + Ret0 = DAG.getNode(ISD::TRUNCATE, dl, Ins[i].VT, Ret0); + InVals.push_back(Ret0); + resoffset += sz / 8; } } } + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(uniqueCallSite, true), DAG.getIntPtrConstant(uniqueCallSite + 1, true), InFlag, dl); @@ -874,8 +1295,8 @@ SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { // v = ld i1* addr // => -// v1 = ld i8* addr -// v = trunc v1 to i1 +// v1 = ld i8* addr (-> i16) +// v = trunc i16 to i1 SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); LoadSDNode *LD = cast(Node); @@ -884,7 +1305,7 @@ SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { assert(Node->getValueType(0) == MVT::i1 && "Custom lowering for i1 load only"); SDValue newLD = - DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(), + DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), LD->getAlignment()); SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD); @@ -942,9 +1363,9 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { // Since StoreV2 is a target node, we cannot rely on DAG type legalization. // Therefore, we must ensure the type is legal. For i1 and i8, we set the // stored type to i16 and propogate the "real" type as the memory type. - bool NeedExt = false; + bool NeedSExt = false; if (EltVT.getSizeInBits() < 16) - NeedExt = true; + NeedSExt = true; switch (NumElts) { default: @@ -967,10 +1388,8 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { for (unsigned i = 0; i < NumElts; ++i) { SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val, DAG.getIntPtrConstant(i)); - if (NeedExt) - // ANY_EXTEND is correct here since the store will only look at the - // lower-order bits anyway. - ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal); + if (NeedSExt) + ExtVal = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i16, ExtVal); Ops.push_back(ExtVal); } @@ -994,8 +1413,8 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { // st i1 v, addr // => -// v1 = zxt v to i8 -// st i8, addr +// v1 = zxt v to i16 +// st.u8 i16, addr SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); SDLoc dl(Node); @@ -1007,9 +1426,10 @@ SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); - Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, Tmp3); - SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3); + SDValue Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, + ST->getPointerInfo(), MVT::i8, isNonTemporal, + isVolatile, Alignment); return Result; } @@ -1116,7 +1536,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( if (Ty->isAggregateType()) { SmallVector vtparts; - ComputeValueVTs(*this, Ty, vtparts); + ComputePTXValueVTs(*this, Ty, vtparts); assert(vtparts.size() > 0 && "empty aggregate type not expected"); for (unsigned parti = 0, parte = vtparts.size(); parti != parte; ++parti) { @@ -1152,7 +1572,10 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( SmallVector vtparts; SmallVector offsets; - ComputeValueVTs(*this, Ty, vtparts, &offsets, 0); + // NOTE: Here, we lose the ability to issue vector loads for vectors + // that are a part of a struct. This should be investigated in the + // future. + ComputePTXValueVTs(*this, Ty, vtparts, &offsets, 0); assert(vtparts.size() > 0 && "empty aggregate type not expected"); bool aggregateIsPacked = false; if (StructType *STy = llvm::dyn_cast(Ty)) @@ -1172,9 +1595,15 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( aggregateIsPacked ? 1 : TD->getABITypeAlignment( partVT.getTypeForEVT(F->getContext())); - SDValue p = DAG.getLoad(partVT, dl, Root, srcAddr, - MachinePointerInfo(srcValue), false, false, - true, partAlign); + SDValue p; + if (Ins[InsIdx].VT.getSizeInBits() > partVT.getSizeInBits()) + p = DAG.getExtLoad(ISD::SEXTLOAD, dl, Ins[InsIdx].VT, Root, srcAddr, + MachinePointerInfo(srcValue), partVT, false, + false, partAlign); + else + p = DAG.getLoad(partVT, dl, Root, srcAddr, + MachinePointerInfo(srcValue), false, false, false, + partAlign); if (p.getNode()) p.getNode()->setIROrder(idx + 1); InVals.push_back(p); @@ -1208,6 +1637,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( if (P.getNode()) P.getNode()->setIROrder(idx + 1); + if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) + P = DAG.getNode(ISD::SIGN_EXTEND, dl, Ins[InsIdx].VT, P); InVals.push_back(P); Ofst += TD->getTypeAllocSize(EltVT.getTypeForEVT(F->getContext())); ++InsIdx; @@ -1230,6 +1661,12 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( DAG.getIntPtrConstant(0)); SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, DAG.getIntPtrConstant(1)); + + if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) { + Elt0 = DAG.getNode(ISD::SIGN_EXTEND, dl, Ins[InsIdx].VT, Elt0); + Elt1 = DAG.getNode(ISD::SIGN_EXTEND, dl, Ins[InsIdx].VT, Elt1); + } + InVals.push_back(Elt0); InVals.push_back(Elt1); Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); @@ -1269,6 +1706,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( break; SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, DAG.getIntPtrConstant(j)); + if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) + Elt = DAG.getNode(ISD::SIGN_EXTEND, dl, Ins[InsIdx].VT, Elt); InVals.push_back(Elt); } Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); @@ -1282,16 +1721,19 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( } // A plain scalar. EVT ObjectVT = getValueType(Ty); - assert(ObjectVT == Ins[InsIdx].VT && - "Ins type did not match function type"); // If ABI, load from the param symbol SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); Value *srcValue = Constant::getNullValue(PointerType::get( ObjectVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); - SDValue p = DAG.getLoad( - ObjectVT, dl, Root, Arg, MachinePointerInfo(srcValue), false, false, - true, - TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); + SDValue p; + if (ObjectVT.getSizeInBits() < Ins[InsIdx].VT.getSizeInBits()) + p = DAG.getExtLoad(ISD::SEXTLOAD, dl, Ins[InsIdx].VT, Root, Arg, + MachinePointerInfo(srcValue), ObjectVT, false, false, + TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); + else + p = DAG.getLoad(Ins[InsIdx].VT, dl, Root, Arg, + MachinePointerInfo(srcValue), false, false, false, + TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); if (p.getNode()) p.getNode()->setIROrder(idx + 1); InVals.push_back(p); @@ -1360,26 +1802,38 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, unsigned NumElts = VTy->getNumElements(); assert(NumElts == Outs.size() && "Bad scalarization of return value"); + // const_cast can be removed in later LLVM versions + EVT EltVT = getValueType(const_cast(RetTy)).getVectorElementType(); + bool NeedExtend = false; + if (EltVT.getSizeInBits() < 16) + NeedExtend = true; + // V1 store if (NumElts == 1) { SDValue StoreVal = OutVals[0]; // We only have one element, so just directly store it - if (StoreVal.getValueType().getSizeInBits() < 8) - StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, StoreVal); - Chain = DAG.getNode(NVPTXISD::StoreRetval, dl, MVT::Other, Chain, - DAG.getConstant(0, MVT::i32), StoreVal); + if (NeedExtend) + StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); + SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal }; + Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, + DAG.getVTList(MVT::Other), &Ops[0], 3, + EltVT, MachinePointerInfo()); + } else if (NumElts == 2) { // V2 store SDValue StoreVal0 = OutVals[0]; SDValue StoreVal1 = OutVals[1]; - if (StoreVal0.getValueType().getSizeInBits() < 8) { - StoreVal0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, StoreVal0); - StoreVal1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, StoreVal1); + if (NeedExtend) { + StoreVal0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal0); + StoreVal1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal1); } - Chain = DAG.getNode(NVPTXISD::StoreRetvalV2, dl, MVT::Other, Chain, - DAG.getConstant(0, MVT::i32), StoreVal0, StoreVal1); + SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal0, + StoreVal1 }; + Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetvalV2, dl, + DAG.getVTList(MVT::Other), &Ops[0], 4, + EltVT, MachinePointerInfo()); } else { // V4 stores // We have at least 4 elements (<3 x Ty> expands to 4 elements) and the @@ -1402,10 +1856,6 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, unsigned PerStoreOffset = TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); - bool Extend = false; - if (OutVals[0].getValueType().getSizeInBits() < 8) - Extend = true; - for (unsigned i = 0; i < NumElts; i += VecSize) { // Get values SDValue StoreVal; @@ -1413,17 +1863,17 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, Ops.push_back(Chain); Ops.push_back(DAG.getConstant(Offset, MVT::i32)); unsigned Opc = NVPTXISD::StoreRetvalV2; - EVT ExtendedVT = (Extend) ? MVT::i8 : OutVals[0].getValueType(); + EVT ExtendedVT = (NeedExtend) ? MVT::i16 : OutVals[0].getValueType(); StoreVal = OutVals[i]; - if (Extend) - StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, StoreVal); + if (NeedExtend) + StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); Ops.push_back(StoreVal); if (i + 1 < NumElts) { StoreVal = OutVals[i + 1]; - if (Extend) - StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, StoreVal); + if (NeedExtend) + StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); } else { StoreVal = DAG.getUNDEF(ExtendedVT); } @@ -1433,8 +1883,9 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, Opc = NVPTXISD::StoreRetvalV4; if (i + 2 < NumElts) { StoreVal = OutVals[i + 2]; - if (Extend) - StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, StoreVal); + if (NeedExtend) + StoreVal = + DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); } else { StoreVal = DAG.getUNDEF(ExtendedVT); } @@ -1442,19 +1893,29 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, if (i + 3 < NumElts) { StoreVal = OutVals[i + 3]; - if (Extend) - StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, StoreVal); + if (NeedExtend) + StoreVal = + DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); } else { StoreVal = DAG.getUNDEF(ExtendedVT); } Ops.push_back(StoreVal); } - Chain = DAG.getNode(Opc, dl, MVT::Other, &Ops[0], Ops.size()); + // Chain = DAG.getNode(Opc, dl, MVT::Other, &Ops[0], Ops.size()); + Chain = + DAG.getMemIntrinsicNode(Opc, dl, DAG.getVTList(MVT::Other), &Ops[0], + Ops.size(), EltVT, MachinePointerInfo()); Offset += PerStoreOffset; } } } else { + SmallVector ValVTs; + // const_cast is necessary since we are still using an LLVM version from + // before the type system re-write. + ComputePTXValueVTs(*this, const_cast(RetTy), ValVTs); + assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition"); + unsigned sizesofar = 0; for (unsigned i = 0, e = Outs.size(); i != e; ++i) { SDValue theVal = OutVals[i]; @@ -1471,13 +1932,15 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, EVT theStoreType = tmpval.getValueType(); if (theStoreType.getSizeInBits() < 8) tmpval = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, tmpval); - Chain = DAG.getNode(NVPTXISD::StoreRetval, dl, MVT::Other, Chain, - DAG.getConstant(sizesofar, MVT::i32), tmpval); + SDValue Ops[] = { Chain, DAG.getConstant(sizesofar, MVT::i32), tmpval }; + Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, + DAG.getVTList(MVT::Other), &Ops[0], 3, + ValVTs[i], MachinePointerInfo()); if (theValType.isVector()) sizesofar += - theValType.getVectorElementType().getStoreSizeInBits() / 8; + ValVTs[i].getVectorElementType().getStoreSizeInBits() / 8; else - sizesofar += theValType.getStoreSizeInBits() / 8; + sizesofar += ValVTs[i].getStoreSizeInBits() / 8; } } } @@ -1485,6 +1948,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain); } + void NVPTXTargetLowering::LowerAsmOperandForConstraint( SDValue Op, std::string &Constraint, std::vector &Ops, SelectionDAG &DAG) const { @@ -1548,9 +2012,9 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.opc = ISD::INTRINSIC_W_CHAIN; if (Intrinsic == Intrinsic::nvvm_ldu_global_i) - Info.memVT = MVT::i32; + Info.memVT = getValueType(I.getType()); else if (Intrinsic == Intrinsic::nvvm_ldu_global_p) - Info.memVT = getPointerTy(); + Info.memVT = getValueType(I.getType()); else Info.memVT = MVT::f32; Info.ptrVal = I.getArgOperand(0); @@ -1635,7 +2099,7 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, if (Constraint.size() == 1) { switch (Constraint[0]) { case 'c': - return std::make_pair(0U, &NVPTX::Int8RegsRegClass); + return std::make_pair(0U, &NVPTX::Int16RegsRegClass); case 'h': return std::make_pair(0U, &NVPTX::Int16RegsRegClass); case 'r': @@ -1775,7 +2239,8 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, unsigned NumElts = ResVT.getVectorNumElements(); EVT EltVT = ResVT.getVectorElementType(); - // Since LDU/LDG are target nodes, we cannot rely on DAG type legalization. + // Since LDU/LDG are target nodes, we cannot rely on DAG type + // legalization. // Therefore, we must ensure the type is legal. For i1 and i8, we set the // loaded type to i16 and propogate the "real" type as the memory type. bool NeedTrunc = false; @@ -1834,7 +2299,7 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, OtherOps.push_back(Chain); // Chain // Skip operand 1 (intrinsic ID) - // Others + // Others for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) OtherOps.push_back(N->getOperand(i)); diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index b0dad0f..43c63ae 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -35,14 +35,6 @@ enum NodeType { DeclareRetParam, DeclareRet, DeclareScalarRet, - LoadParam, - LoadParamV2, - LoadParamV4, - StoreParam, - StoreParamV2, - StoreParamV4, - StoreParamS32, // to sext and store a <32bit value, not used currently - StoreParamU32, // to zext and store a <32bit value, not used currently MoveToParam, PrintCall, PrintCallUni, @@ -57,9 +49,6 @@ enum NodeType { MoveParam, MoveRetval, MoveToRetval, - StoreRetval, - StoreRetvalV2, - StoreRetvalV4, PseudoUseParam, RETURN, CallSeqBegin, @@ -73,7 +62,18 @@ enum NodeType { LDUV2, // LDU.v2 LDUV4, // LDU.v4 StoreV2, - StoreV4 + StoreV4, + LoadParam, + LoadParamV2, + LoadParamV4, + StoreParam, + StoreParamV2, + StoreParamV4, + StoreParamS32, // to sext and store a <32bit value, not used currently + StoreParamU32, // to zext and store a <32bit value, not used currently + StoreRetval, + StoreRetvalV2, + StoreRetvalV4 }; } @@ -126,7 +126,8 @@ public: std::string getPrototype(Type *, const ArgListTy &, const SmallVectorImpl &, - unsigned retAlignment) const; + unsigned retAlignment, + const ImmutableCallSite *CS) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -164,6 +165,9 @@ private: virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const; + + unsigned getArgumentAlignment(SDValue Callee, const ImmutableCallSite *CS, + Type *Ty, unsigned Idx) const; }; } // namespace llvm diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp index 80af163..b406aa9 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -51,9 +51,6 @@ void NVPTXInstrInfo::copyPhysReg( else if (DestRC == &NVPTX::Int16RegsRegClass) BuildMI(MBB, I, DL, get(NVPTX::IMOV16rr), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); - else if (DestRC == &NVPTX::Int8RegsRegClass) - BuildMI(MBB, I, DL, get(NVPTX::IMOV8rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); else if (DestRC == &NVPTX::Int64RegsRegClass) BuildMI(MBB, I, DL, get(NVPTX::IMOV64rr), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td index c980237..965af51 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -82,101 +82,6 @@ def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">; def true : Predicate<"1">; -//===----------------------------------------------------------------------===// -// Special Handling for 8-bit Operands and Operations -// -// PTX supports 8-bit signed and unsigned types, but does not support 8-bit -// operations (like add, shift, etc) except for ld/st/cvt. SASS does not have -// 8-bit registers. -// -// PTX ld, st and cvt instructions permit source and destination data operands -// to be wider than the instruction-type size, so that narrow values may be -// loaded, stored, and converted using regular-width registers. -// -// So in PTX generation, we -// - always use 16-bit registers in place in 8-bit registers. -// (8-bit variables should stay as 8-bit as they represent memory layout.) -// - for the following 8-bit operations, we sign-ext/zero-ext the 8-bit values -// before operation -// . div -// . rem -// . neg (sign) -// . set, setp -// . shr -// -// We are patching the operations by inserting the cvt instructions in the -// asm strings of the affected instructions. -// -// Since vector operations, except for ld/st, are eventually elementized. We -// do not need to special-hand the vector 8-bit operations. -// -// -//===----------------------------------------------------------------------===// - -// Generate string block like -// { -// .reg .s16 %temp1; -// .reg .s16 %temp2; -// cvt.s16.s8 %temp1, %a; -// cvt.s16.s8 %temp2, %b; -// opc.s16 %dst, %temp1, %temp2; -// } -// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8 -class Handle_i8rr { - string s = !strconcat("{{\n\t", - !strconcat(".reg .", !strconcat(TypeStr, - !strconcat(" \t%temp1;\n\t", - !strconcat(".reg .", !strconcat(TypeStr, - !strconcat(" \t%temp2;\n\t", - !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t", - !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t", - !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}")))))))))))); -} - -// Generate string block like -// { -// .reg .s16 %temp1; -// .reg .s16 %temp2; -// cvt.s16.s8 %temp1, %a; -// mov.b16 %temp2, %b; -// cvt.s16.s8 %temp2, %temp2; -// opc.s16 %dst, %temp1, %temp2; -// } -// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8 -class Handle_i8ri { - string s = !strconcat("{{\n\t", - !strconcat(".reg .", !strconcat(TypeStr, - !strconcat(" \t%temp1;\n\t", - !strconcat(".reg .", - !strconcat(TypeStr, !strconcat(" \t%temp2;\n\t", - !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t", - !strconcat("mov.b16 \t%temp2, $b;\n\t", - !strconcat(CVTStr, !strconcat(" \t%temp2, %temp2;\n\t", - !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}"))))))))))))); -} - -// Generate string block like -// { -// .reg .s16 %temp1; -// .reg .s16 %temp2; -// mov.b16 %temp1, %b; -// cvt.s16.s8 %temp1, %temp1; -// cvt.s16.s8 %temp2, %a; -// opc.s16 %dst, %temp1, %temp2; -// } -// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8 -class Handle_i8ir { - string s = !strconcat("{{\n\t", - !strconcat(".reg .", !strconcat(TypeStr, - !strconcat(" \t%temp1;\n\t", - !strconcat(".reg .", !strconcat(TypeStr, - !strconcat(" \t%temp2;\n\t", - !strconcat("mov.b16 \t%temp1, $a;\n\t", - !strconcat(CVTStr, !strconcat(" \t%temp1, %temp1;\n\t", - !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t", - !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}"))))))))))))); -} - //===----------------------------------------------------------------------===// // Some Common Instruction Class Templates @@ -204,66 +109,6 @@ multiclass I3 { def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), !strconcat(OpcStr, "16 \t$dst, $a, $b;"), [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>; - def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), - !strconcat(OpcStr, "16 \t$dst, $a, $b;"), - [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>; - def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b), - !strconcat(OpcStr, "16 \t$dst, $a, $b;"), - [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>; -} - -multiclass I3_i8 { - def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), - !strconcat(OpcStr, "64 \t$dst, $a, $b;"), - [(set Int64Regs:$dst, (OpNode Int64Regs:$a, - Int64Regs:$b))]>; - def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), - !strconcat(OpcStr, "64 \t$dst, $a, $b;"), - [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; - def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), - !strconcat(OpcStr, "32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, - Int32Regs:$b))]>; - def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), - !strconcat(OpcStr, "32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; - def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), - !strconcat(OpcStr, "16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode Int16Regs:$a, - Int16Regs:$b))]>; - def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), - !strconcat(OpcStr, "16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>; - def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), - Handle_i8rr.s, - [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>; - def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b), - Handle_i8ri.s, - [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>; -} - -multiclass I3_noi8 { - def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), - !strconcat(OpcStr, "64 \t$dst, $a, $b;"), - [(set Int64Regs:$dst, (OpNode Int64Regs:$a, - Int64Regs:$b))]>; - def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), - !strconcat(OpcStr, "64 \t$dst, $a, $b;"), - [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; - def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), - !strconcat(OpcStr, "32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, - Int32Regs:$b))]>; - def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), - !strconcat(OpcStr, "32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; - def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), - !strconcat(OpcStr, "16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode Int16Regs:$a, - Int16Regs:$b))]>; - def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), - !strconcat(OpcStr, "16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>; } multiclass ADD_SUB_INT_32 { @@ -522,81 +367,17 @@ def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)), defm MULT : I3<"mul.lo.s", mul>; -defm MULTHS : I3_noi8<"mul.hi.s", mulhs>; -defm MULTHU : I3_noi8<"mul.hi.u", mulhu>; -def MULTHSi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), - !strconcat("{{ \n\t", - !strconcat(".reg \t.s16 temp1; \n\t", - !strconcat(".reg \t.s16 temp2; \n\t", - !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t", - !strconcat("cvt.s16.s8 \ttemp2, $b; \n\t", - !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t", - !strconcat("shr.s16 \t$dst, $dst, 8; \n\t", - !strconcat("}}", "")))))))), - [(set Int8Regs:$dst, (mulhs Int8Regs:$a, Int8Regs:$b))]>; -def MULTHSi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b), - !strconcat("{{ \n\t", - !strconcat(".reg \t.s16 temp1; \n\t", - !strconcat(".reg \t.s16 temp2; \n\t", - !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t", - !strconcat("mov.b16 \ttemp2, $b; \n\t", - !strconcat("cvt.s16.s8 \ttemp2, temp2; \n\t", - !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t", - !strconcat("shr.s16 \t$dst, $dst, 8; \n\t", - !strconcat("}}", ""))))))))), - [(set Int8Regs:$dst, (mulhs Int8Regs:$a, imm:$b))]>; -def MULTHUi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), - !strconcat("{{ \n\t", - !strconcat(".reg \t.u16 temp1; \n\t", - !strconcat(".reg \t.u16 temp2; \n\t", - !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t", - !strconcat("cvt.u16.u8 \ttemp2, $b; \n\t", - !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t", - !strconcat("shr.u16 \t$dst, $dst, 8; \n\t", - !strconcat("}}", "")))))))), - [(set Int8Regs:$dst, (mulhu Int8Regs:$a, Int8Regs:$b))]>; -def MULTHUi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b), - !strconcat("{{ \n\t", - !strconcat(".reg \t.u16 temp1; \n\t", - !strconcat(".reg \t.u16 temp2; \n\t", - !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t", - !strconcat("mov.b16 \ttemp2, $b; \n\t", - !strconcat("cvt.u16.u8 \ttemp2, temp2; \n\t", - !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t", - !strconcat("shr.u16 \t$dst, $dst, 8; \n\t", - !strconcat("}}", ""))))))))), - [(set Int8Regs:$dst, (mulhu Int8Regs:$a, imm:$b))]>; - - -defm SDIV : I3_i8<"div.s", sdiv, "s16", "cvt.s16.s8">; -defm UDIV : I3_i8<"div.u", udiv, "u16", "cvt.u16.u8">; - -defm SREM : I3_i8<"rem.s", srem, "s16", "cvt.s16.s8">; +defm MULTHS : I3<"mul.hi.s", mulhs>; +defm MULTHU : I3<"mul.hi.u", mulhu>; + +defm SDIV : I3<"div.s", sdiv>; +defm UDIV : I3<"div.u", udiv>; + +defm SREM : I3<"rem.s", srem>; // The ri version will not be selected as DAGCombiner::visitSREM will lower it. -defm UREM : I3_i8<"rem.u", urem, "u16", "cvt.u16.u8">; +defm UREM : I3<"rem.u", urem>; // The ri version will not be selected as DAGCombiner::visitUREM will lower it. -def MAD8rrr : NVPTXInst<(outs Int8Regs:$dst), - (ins Int8Regs:$a, Int8Regs:$b, Int8Regs:$c), - "mad.lo.s16 \t$dst, $a, $b, $c;", - [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b), - Int8Regs:$c))]>; -def MAD8rri : NVPTXInst<(outs Int8Regs:$dst), - (ins Int8Regs:$a, Int8Regs:$b, i8imm:$c), - "mad.lo.s16 \t$dst, $a, $b, $c;", - [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b), - imm:$c))]>; -def MAD8rir : NVPTXInst<(outs Int8Regs:$dst), - (ins Int8Regs:$a, i8imm:$b, Int8Regs:$c), - "mad.lo.s16 \t$dst, $a, $b, $c;", - [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b), - Int8Regs:$c))]>; -def MAD8rii : NVPTXInst<(outs Int8Regs:$dst), - (ins Int8Regs:$a, i8imm:$b, i8imm:$c), - "mad.lo.s16 \t$dst, $a, $b, $c;", - [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b), - imm:$c))]>; - def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c), "mad.lo.s16 \t$dst, $a, $b, $c;", @@ -661,10 +442,6 @@ def MAD64rii : NVPTXInst<(outs Int64Regs:$dst), (mul Int64Regs:$a, imm:$b), imm:$c))]>; -def INEG8 : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src), - !strconcat("cvt.s16.s8 \t$dst, $src;\n\t", - "neg.s16 \t$dst, $dst;"), - [(set Int8Regs:$dst, (ineg Int8Regs:$src))]>; def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), "neg.s16 \t$dst, $src;", [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>; @@ -974,12 +751,6 @@ multiclass LOG_FORMAT { def b1ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b), !strconcat(OpcStr, ".pred \t$dst, $a, $b;"), [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>; - def b8rr: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), - !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), - [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>; - def b8ri: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b), - !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), - [(set Int8Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>; def b16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), [(set Int16Regs:$dst, (OpNode Int16Regs:$a, @@ -1010,9 +781,6 @@ defm XOR : LOG_FORMAT<"xor", xor>; def NOT1: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src), "not.pred \t$dst, $src;", [(set Int1Regs:$dst, (not Int1Regs:$src))]>; -def NOT8: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src), - "not.b16 \t$dst, $src;", - [(set Int8Regs:$dst, (not Int8Regs:$src))]>; def NOT16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), "not.b16 \t$dst, $src;", [(set Int16Regs:$dst, (not Int16Regs:$src))]>; @@ -1056,14 +824,6 @@ multiclass LSHIFT_FORMAT { !strconcat(OpcStr, "16 \t$dst, $a, $b;"), [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (i32 imm:$b)))]>; - def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b), - !strconcat(OpcStr, "16 \t$dst, $a, $b;"), - [(set Int8Regs:$dst, (OpNode Int8Regs:$a, - Int32Regs:$b))]>; - def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b), - !strconcat(OpcStr, "16 \t$dst, $a, $b;"), - [(set Int8Regs:$dst, (OpNode Int8Regs:$a, - (i32 imm:$b)))]>; } defm SHL : LSHIFT_FORMAT<"shl.b", shl>; @@ -1102,16 +862,6 @@ multiclass RSHIFT_FORMAT { !strconcat(OpcStr, "16 \t$dst, $a, $b;"), [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (i32 imm:$b)))]>; - def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b), - !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t", - !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))), - [(set Int8Regs:$dst, (OpNode Int8Regs:$a, - Int32Regs:$b))]>; - def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b), - !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t", - !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))), - [(set Int8Regs:$dst, (OpNode Int8Regs:$a, - (i32 imm:$b)))]>; } defm SRA : RSHIFT_FORMAT<"shr.s", sra, "cvt.s16.s8">; @@ -1257,8 +1007,6 @@ def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a), let IsSimpleMove=1 in { def IMOV1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss), "mov.pred \t$dst, $sss;", []>; -def IMOV8rr: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$sss), - "mov.u16 \t$dst, $sss;", []>; def IMOV16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss), "mov.u16 \t$dst, $sss;", []>; def IMOV32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss), @@ -1274,9 +1022,6 @@ def FMOV64rr: NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src), def IMOV1ri: NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src), "mov.pred \t$dst, $src;", [(set Int1Regs:$dst, imm:$src)]>; -def IMOV8ri: NVPTXInst<(outs Int8Regs:$dst), (ins i8imm:$src), - "mov.u16 \t$dst, $src;", - [(set Int8Regs:$dst, imm:$src)]>; def IMOV16ri: NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src), "mov.u16 \t$dst, $src;", [(set Int16Regs:$dst, imm:$src)]>; @@ -1331,47 +1076,8 @@ class Set_Str { - string t1 = "{{\n\t.reg .pred p;\n\t"; - string t2 = !strconcat(t1, ".reg ."); - string t3 = !strconcat(t2, type); - string t4 = !strconcat(t3, " %temp1;\n\t"); - string t5 = !strconcat(t4, ".reg ."); - string t6 = !strconcat(t5, type); - string t7 = !strconcat(t6, " %temp2;\n\t"); - string t8 = !strconcat(t7, cvt); - string t9 = !strconcat(t8, " \t%temp1, "); - string t10 = !strconcat(t9, a); - string t11 = !strconcat(t10, ";\n\t"); - string t12 = !strconcat(t11, cvt); - string t13 = !strconcat(t12, " \t%temp2, "); - string t14 = !strconcat(t13, b); - string t15 = !strconcat(t14, ";\n\t"); - string t16 = !strconcat(t15, OpcStr); - string t17 = !strconcat(t16, "16"); - string t18 = !strconcat(t17, " \tp, %temp1, %temp2;\n\t"); - string t19 = !strconcat(t18, "selp.s16 \t"); - string t20 = !strconcat(t19, d); - string s = !strconcat(t20, ", -1, 0, p;\n\t}}"); -} - multiclass ISET_FORMAT { - def i8rr_toi8: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), - Set_Stri8.s, - []>; def i16rr_toi16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), Set_Str.s, @@ -1385,15 +1091,6 @@ multiclass ISET_FORMAT.s, []>; - def i8rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), - Handle_i8rr.s, - [(set Int1Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>; - def i8ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, i8imm:$b), - Handle_i8ri.s, - [(set Int1Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>; - def i8ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i8imm:$a, Int8Regs:$b), - Handle_i8ir.s, - [(set Int1Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>; def i16rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, "16 \t$dst, $a, $b;"), [(set Int1Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>; @@ -1422,15 +1119,6 @@ multiclass ISET_FORMAT; - def i8rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), - Handle_i8rr.s, - [(set Int32Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>; - def i8ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, i8imm:$b), - Handle_i8ri.s, - [(set Int32Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>; - def i8ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i8imm:$a, Int8Regs:$b), - Handle_i8ir.s, - [(set Int32Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>; def i16rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"), @@ -1639,22 +1327,6 @@ defm FSetNAN : FSET_FORMAT<"setp.nan.", "set.nan.u32.",setuo>; def SELECTi1rr : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)), (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a), (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>; -def SELECTi8rr : NVPTXInst<(outs Int8Regs:$dst), - (ins Int8Regs:$a, Int8Regs:$b, Int1Regs:$p), - "selp.b16 \t$dst, $a, $b, $p;", - [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, Int8Regs:$b))]>; -def SELECTi8ri : NVPTXInst<(outs Int8Regs:$dst), - (ins Int8Regs:$a, i8imm:$b, Int1Regs:$p), - "selp.b16 \t$dst, $a, $b, $p;", - [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, imm:$b))]>; -def SELECTi8ir : NVPTXInst<(outs Int8Regs:$dst), - (ins i8imm:$a, Int8Regs:$b, Int1Regs:$p), - "selp.b16 \t$dst, $a, $b, $p;", - [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, Int8Regs:$b))]>; -def SELECTi8ii : NVPTXInst<(outs Int8Regs:$dst), - (ins i8imm:$a, i8imm:$b, Int1Regs:$p), - "selp.b16 \t$dst, $a, $b, $p;", - [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>; def SELECTi16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b, Int1Regs:$p), @@ -1838,7 +1510,7 @@ class LoadParamMemInst : NVPTXInst<(outs regclass:$dst), (ins i32imm:$b), !strconcat(!strconcat("ld.param", opstr), "\t$dst, [retval0+$b];"), - [(set regclass:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>; + []>; class LoadParamRegInst : NVPTXInst<(outs regclass:$dst), (ins i32imm:$b), @@ -1846,8 +1518,6 @@ class LoadParamRegInst : "\t$dst, retval$b;"), [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>; -// FIXME: A bug in tablegen currently prevents us from using multi-output -// patterns here, so we have to custom select these in C++. class LoadParamV2MemInst : NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins i32imm:$b), !strconcat(!strconcat("ld.param.v2", opstr), @@ -1864,24 +1534,21 @@ class StoreParamInst : NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b), !strconcat(!strconcat("st.param", opstr), "\t[param$a+$b], $val;"), - [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>; + []>; class StoreParamV2Inst : NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a, i32imm:$b), !strconcat(!strconcat("st.param.v2", opstr), "\t[param$a+$b], {{$val, $val2}};"), - [(StoreParamV2 (i32 imm:$a), (i32 imm:$b), regclass:$val, - regclass:$val2)]>; + []>; class StoreParamV4Inst : NVPTXInst<(outs), (ins regclass:$val, regclass:$val1, regclass:$val2, regclass:$val3, i32imm:$a, i32imm:$b), !strconcat(!strconcat("st.param.v4", opstr), "\t[param$a+$b], {{$val, $val2, $val3, $val4}};"), - [(StoreParamV4 (i32 imm:$a), (i32 imm:$b), regclass:$val, - regclass:$val2, regclass:$val3, - regclass:$val4)]>; + []>; class MoveToParamInst : NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b), @@ -1893,13 +1560,13 @@ class StoreRetvalInst : NVPTXInst<(outs), (ins regclass:$val, i32imm:$a), !strconcat(!strconcat("st.param", opstr), "\t[func_retval0+$a], $val;"), - [(StoreRetval (i32 imm:$a), regclass:$val)]>; + []>; class StoreRetvalV2Inst : NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a), !strconcat(!strconcat("st.param.v2", opstr), "\t[func_retval0+$a], {{$val, $val2}};"), - [(StoreRetvalV2 (i32 imm:$a), regclass:$val, regclass:$val2)]>; + []>; class StoreRetvalV4Inst : NVPTXInst<(outs), @@ -1907,8 +1574,7 @@ class StoreRetvalV4Inst : regclass:$val4, i32imm:$a), !strconcat(!strconcat("st.param.v4", opstr), "\t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"), - [(StoreRetvalV4 (i32 imm:$a), regclass:$val, regclass:$val2, - regclass:$val3, regclass:$val4)]>; + []>; class MoveToRetvalInst : NVPTXInst<(outs), (ins i32imm:$num, regclass:$val), @@ -1983,29 +1649,19 @@ def PrintCallUniNoRetInst : NVPTXInst<(outs), (ins), "call.uni ", def LoadParamMemI64 : LoadParamMemInst; def LoadParamMemI32 : LoadParamMemInst; def LoadParamMemI16 : LoadParamMemInst; -def LoadParamMemI8 : LoadParamMemInst; -def LoadParamMemV2I64 : LoadParamV2MemInst; -def LoadParamMemV2I32 : LoadParamV2MemInst; -def LoadParamMemV2I16 : LoadParamV2MemInst; -def LoadParamMemV2I8 : LoadParamV2MemInst; -def LoadParamMemV4I32 : LoadParamV4MemInst; -def LoadParamMemV4I16 : LoadParamV4MemInst; -def LoadParamMemV4I8 : LoadParamV4MemInst; - -//def LoadParamMemI16 : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b), -// !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t", -// "cvt.u16.u32\t$dst, temp_param_reg;"), -// [(set Int16Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>; -//def LoadParamMemI8 : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b), -// !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t", -// "cvt.u16.u32\t$dst, temp_param_reg;"), -// [(set Int8Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>; - +def LoadParamMemI8 : LoadParamMemInst; +def LoadParamMemV2I64 : LoadParamV2MemInst; +def LoadParamMemV2I32 : LoadParamV2MemInst; +def LoadParamMemV2I16 : LoadParamV2MemInst; +def LoadParamMemV2I8 : LoadParamV2MemInst; +def LoadParamMemV4I32 : LoadParamV4MemInst; +def LoadParamMemV4I16 : LoadParamV4MemInst; +def LoadParamMemV4I8 : LoadParamV4MemInst; def LoadParamMemF32 : LoadParamMemInst; def LoadParamMemF64 : LoadParamMemInst; -def LoadParamMemV2F32 : LoadParamV2MemInst; -def LoadParamMemV2F64 : LoadParamV2MemInst; -def LoadParamMemV4F32 : LoadParamV4MemInst; +def LoadParamMemV2F32 : LoadParamV2MemInst; +def LoadParamMemV2F64 : LoadParamV2MemInst; +def LoadParamMemV4F32 : LoadParamV4MemInst; def LoadParamRegI64 : LoadParamRegInst; def LoadParamRegI32 : LoadParamRegInst; @@ -2013,10 +1669,6 @@ def LoadParamRegI16 : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b), "cvt.u16.u32\t$dst, retval$b;", [(set Int16Regs:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>; -def LoadParamRegI8 : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b), - "cvt.u16.u32\t$dst, retval$b;", - [(set Int8Regs:$dst, - (LoadParam (i32 0), (i32 imm:$b)))]>; def LoadParamRegF32 : LoadParamRegInst; def LoadParamRegF64 : LoadParamRegInst; @@ -2024,31 +1676,12 @@ def LoadParamRegF64 : LoadParamRegInst; def StoreParamI64 : StoreParamInst; def StoreParamI32 : StoreParamInst; -def StoreParamI16 : NVPTXInst<(outs), - (ins Int16Regs:$val, i32imm:$a, i32imm:$b), - "st.param.b16\t[param$a+$b], $val;", - [(StoreParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>; - -def StoreParamI8 : NVPTXInst<(outs), - (ins Int8Regs:$val, i32imm:$a, i32imm:$b), - "st.param.b8\t[param$a+$b], $val;", - [(StoreParam - (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>; - -def StoreParamV2I64 : StoreParamV2Inst; -def StoreParamV2I32 : StoreParamV2Inst; - -def StoreParamV2I16 : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2, - i32imm:$a, i32imm:$b), - "st.param.v2.b16\t[param$a+$b], {{$val, $val2}};", - [(StoreParamV2 (i32 imm:$a), (i32 imm:$b), - Int16Regs:$val, Int16Regs:$val2)]>; - -def StoreParamV2I8 : NVPTXInst<(outs), (ins Int8Regs:$val, Int8Regs:$val2, - i32imm:$a, i32imm:$b), - "st.param.v2.b8\t[param$a+$b], {{$val, $val2}};", - [(StoreParamV2 (i32 imm:$a), (i32 imm:$b), - Int8Regs:$val, Int8Regs:$val2)]>; +def StoreParamI16 : StoreParamInst; +def StoreParamI8 : StoreParamInst; +def StoreParamV2I64 : StoreParamV2Inst; +def StoreParamV2I32 : StoreParamV2Inst; +def StoreParamV2I16 : StoreParamV2Inst; +def StoreParamV2I8 : StoreParamV2Inst; // FIXME: StoreParamV4Inst crashes llvm-tblgen :( //def StoreParamV4I32 : StoreParamV4Inst; @@ -2056,47 +1689,41 @@ def StoreParamV4I32 : NVPTXInst<(outs), (ins Int32Regs:$val, Int32Regs:$val2, Int32Regs:$val3, Int32Regs:$val4, i32imm:$a, i32imm:$b), "st.param.b32\t[param$a+$b], {{$val, $val2, $val3, $val4}};", - [(StoreParamV4 (i32 imm:$a), (i32 imm:$b), - Int32Regs:$val, Int32Regs:$val2, - Int32Regs:$val3, Int32Regs:$val4)]>; + []>; def StoreParamV4I16 : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2, Int16Regs:$val3, Int16Regs:$val4, i32imm:$a, i32imm:$b), "st.param.v4.b16\t[param$a+$b], {{$val, $val2, $val3, $val4}};", - [(StoreParamV4 (i32 imm:$a), (i32 imm:$b), - Int16Regs:$val, Int16Regs:$val2, - Int16Regs:$val3, Int16Regs:$val4)]>; + []>; -def StoreParamV4I8 : NVPTXInst<(outs), (ins Int8Regs:$val, Int8Regs:$val2, - Int8Regs:$val3, Int8Regs:$val4, +def StoreParamV4I8 : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2, + Int16Regs:$val3, Int16Regs:$val4, i32imm:$a, i32imm:$b), "st.param.v4.b8\t[param$a+$b], {{$val, $val2, $val3, $val4}};", - [(StoreParamV4 (i32 imm:$a), (i32 imm:$b), - Int8Regs:$val, Int8Regs:$val2, - Int8Regs:$val3, Int8Regs:$val4)]>; + []>; def StoreParamS32I16 : NVPTXInst<(outs), (ins Int16Regs:$val, i32imm:$a, i32imm:$b), !strconcat("cvt.s32.s16\ttemp_param_reg, $val;\n\t", "st.param.b32\t[param$a+$b], temp_param_reg;"), - [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>; + []>; def StoreParamU32I16 : NVPTXInst<(outs), (ins Int16Regs:$val, i32imm:$a, i32imm:$b), !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t", "st.param.b32\t[param$a+$b], temp_param_reg;"), - [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>; + []>; def StoreParamU32I8 : NVPTXInst<(outs), - (ins Int8Regs:$val, i32imm:$a, i32imm:$b), + (ins Int16Regs:$val, i32imm:$a, i32imm:$b), !strconcat("cvt.u32.u8\ttemp_param_reg, $val;\n\t", "st.param.b32\t[param$a+$b], temp_param_reg;"), - [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>; + []>; def StoreParamS32I8 : NVPTXInst<(outs), - (ins Int8Regs:$val, i32imm:$a, i32imm:$b), + (ins Int16Regs:$val, i32imm:$a, i32imm:$b), !strconcat("cvt.s32.s8\ttemp_param_reg, $val;\n\t", "st.param.b32\t[param$a+$b], temp_param_reg;"), - [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>; + []>; def StoreParamF32 : StoreParamInst; def StoreParamF64 : StoreParamInst; @@ -2109,9 +1736,7 @@ def StoreParamV4F32 : NVPTXInst<(outs), Float32Regs:$val3, Float32Regs:$val4, i32imm:$a, i32imm:$b), "st.param.v4.f32\t[param$a+$b], {{$val, $val2, $val3, $val4}};", - [(StoreParamV4 (i32 imm:$a), (i32 imm:$b), - Float32Regs:$val, Float32Regs:$val2, - Float32Regs:$val3, Float32Regs:$val4)]>; + []>; def MoveToParamI64 : MoveToParamInst; def MoveToParamI32 : MoveToParamInst; @@ -2122,36 +1747,18 @@ def MoveToParamI16 : NVPTXInst<(outs), !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t", "mov.b32\tparam$a, temp_param_reg;"), [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>; -def MoveToParamI8 : NVPTXInst<(outs), - (ins Int8Regs:$val, i32imm:$a, i32imm:$b), - !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t", - "mov.b32\tparam$a, temp_param_reg;"), - [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>; def StoreRetvalI64 : StoreRetvalInst; def StoreRetvalI32 : StoreRetvalInst; def StoreRetvalI16 : StoreRetvalInst; -def StoreRetvalI8 : StoreRetvalInst; +def StoreRetvalI8 : StoreRetvalInst; def StoreRetvalV2I64 : StoreRetvalV2Inst; def StoreRetvalV2I32 : StoreRetvalV2Inst; def StoreRetvalV2I16 : StoreRetvalV2Inst; -def StoreRetvalV2I8 : StoreRetvalV2Inst; +def StoreRetvalV2I8 : StoreRetvalV2Inst; def StoreRetvalV4I32 : StoreRetvalV4Inst; def StoreRetvalV4I16 : StoreRetvalV4Inst; -def StoreRetvalV4I8 : StoreRetvalV4Inst; - -//def StoreRetvalI16 : NVPTXInst<(outs), (ins Int16Regs:$val, i32imm:$a), -// !strconcat("\{\n\t", -// !strconcat(".reg .b32 temp_retval_reg;\n\t", -// !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t", -// "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))), -// [(StoreRetval (i32 imm:$a), Int16Regs:$val)]>; -//def StoreRetvalI8 : NVPTXInst<(outs), (ins Int8Regs:$val, i32imm:$a), -// !strconcat("\{\n\t", -// !strconcat(".reg .b32 temp_retval_reg;\n\t", -// !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t", -// "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))), -// [(StoreRetval (i32 imm:$a), Int8Regs:$val)]>; +def StoreRetvalV4I8 : StoreRetvalV4Inst; def StoreRetvalF64 : StoreRetvalInst; def StoreRetvalF32 : StoreRetvalInst; @@ -2162,7 +1769,7 @@ def StoreRetvalV4F32 : StoreRetvalV4Inst; def MoveRetvalI64 : MoveRetvalInst; def MoveRetvalI32 : MoveRetvalInst; def MoveRetvalI16 : MoveRetvalInst; -def MoveRetvalI8 : MoveRetvalInst; +def MoveRetvalI8 : MoveRetvalInst; def MoveRetvalF64 : MoveRetvalInst; def MoveRetvalF32 : MoveRetvalInst; @@ -2173,9 +1780,6 @@ def MoveToRetvalF32 : MoveToRetvalInst; def MoveToRetvalI16 : NVPTXInst<(outs), (ins i32imm:$num, Int16Regs:$val), "cvt.u32.u16\tfunc_retval$num, $val;", [(MoveToRetval (i32 imm:$num), Int16Regs:$val)]>; -def MoveToRetvalI8 : NVPTXInst<(outs), (ins i32imm:$num, Int8Regs:$val), - "cvt.u32.u16\tfunc_retval$num, $val;", - [(MoveToRetval (i32 imm:$num), Int8Regs:$val)]>; def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>; def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>; @@ -2193,7 +1797,6 @@ class LastCallArgInst : def CallArgI64 : CallArgInst; def CallArgI32 : CallArgInst; def CallArgI16 : CallArgInst; -def CallArgI8 : CallArgInst; def CallArgF64 : CallArgInst; def CallArgF32 : CallArgInst; @@ -2201,7 +1804,6 @@ def CallArgF32 : CallArgInst; def LastCallArgI64 : LastCallArgInst; def LastCallArgI32 : LastCallArgInst; def LastCallArgI16 : LastCallArgInst; -def LastCallArgI8 : LastCallArgInst; def LastCallArgF64 : LastCallArgInst; def LastCallArgF32 : LastCallArgInst; @@ -2261,9 +1863,6 @@ def MoveParamI32 : MoveParamInst; def MoveParamI16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), "cvt.u16.u32\t$dst, $src;", [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>; -def MoveParamI8 : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src), - "cvt.u16.u32\t$dst, $src;", - [(set Int8Regs:$dst, (MoveParam Int8Regs:$src))]>; def MoveParamF64 : MoveParamInst; def MoveParamF32 : MoveParamInst; @@ -2275,7 +1874,6 @@ class PseudoUseParamInst : def PseudoUseParamI64 : PseudoUseParamInst; def PseudoUseParamI32 : PseudoUseParamInst; def PseudoUseParamI16 : PseudoUseParamInst; -def PseudoUseParamI8 : PseudoUseParamInst; def PseudoUseParamF64 : PseudoUseParamInst; def PseudoUseParamF32 : PseudoUseParamInst; @@ -2317,7 +1915,7 @@ multiclass LD { } let mayLoad=1, neverHasSideEffects=1 in { -defm LD_i8 : LD; +defm LD_i8 : LD; defm LD_i16 : LD; defm LD_i32 : LD; defm LD_i64 : LD; @@ -2359,7 +1957,7 @@ multiclass ST { } let mayStore=1, neverHasSideEffects=1 in { -defm ST_i8 : ST; +defm ST_i8 : ST; defm ST_i16 : ST; defm ST_i32 : ST; defm ST_i64 : ST; @@ -2443,7 +2041,7 @@ multiclass LD_VEC { []>; } let mayLoad=1, neverHasSideEffects=1 in { -defm LDV_i8 : LD_VEC; +defm LDV_i8 : LD_VEC; defm LDV_i16 : LD_VEC; defm LDV_i32 : LD_VEC; defm LDV_i64 : LD_VEC; @@ -2526,7 +2124,7 @@ multiclass ST_VEC { []>; } let mayStore=1, neverHasSideEffects=1 in { -defm STV_i8 : ST_VEC; +defm STV_i8 : ST_VEC; defm STV_i16 : ST_VEC; defm STV_i32 : ST_VEC; defm STV_i64 : ST_VEC; @@ -2539,10 +2137,6 @@ defm STV_f64 : ST_VEC; multiclass CVT_INT_TO_FP { // FIXME: need to add f16 support -// def CVTf16i8 : -// NVPTXInst<(outs Float16Regs:$d), (ins Int8Regs:$a), -// !strconcat(!strconcat("cvt.rn.f16.", OpStr), "8 \t$d, $a;"), -// [(set Float16Regs:$d, (OpNode Int8Regs:$a))]>; // def CVTf16i16 : // NVPTXInst<(outs Float16Regs:$d), (ins Int16Regs:$a), // !strconcat(!strconcat("cvt.rn.f16.", OpStr), "16 \t$d, $a;"), @@ -2560,10 +2154,6 @@ multiclass CVT_INT_TO_FP { NVPTXInst<(outs Float32Regs:$d), (ins Int1Regs:$a), "selp.f32 \t$d, 1.0, 0.0, $a;", [(set Float32Regs:$d, (OpNode Int1Regs:$a))]>; - def CVTf32i8 : - NVPTXInst<(outs Float32Regs:$d), (ins Int8Regs:$a), - !strconcat(!strconcat("cvt.rn.f32.", OpStr), "8 \t$d, $a;"), - [(set Float32Regs:$d, (OpNode Int8Regs:$a))]>; def CVTf32i16 : NVPTXInst<(outs Float32Regs:$d), (ins Int16Regs:$a), !strconcat(!strconcat("cvt.rn.f32.", OpStr), "16 \t$d, $a;"), @@ -2581,10 +2171,6 @@ multiclass CVT_INT_TO_FP { NVPTXInst<(outs Float64Regs:$d), (ins Int1Regs:$a), "selp.f64 \t$d, 1.0, 0.0, $a;", [(set Float64Regs:$d, (OpNode Int1Regs:$a))]>; - def CVTf64i8 : - NVPTXInst<(outs Float64Regs:$d), (ins Int8Regs:$a), - !strconcat(!strconcat("cvt.rn.f64.", OpStr), "8 \t$d, $a;"), - [(set Float64Regs:$d, (OpNode Int8Regs:$a))]>; def CVTf64i16 : NVPTXInst<(outs Float64Regs:$d), (ins Int16Regs:$a), !strconcat(!strconcat("cvt.rn.f64.", OpStr), "16 \t$d, $a;"), @@ -2604,24 +2190,6 @@ defm Uint_to_fp : CVT_INT_TO_FP <"u", uint_to_fp>; multiclass CVT_FP_TO_INT { // FIXME: need to add f16 support -// def CVTi8f16: -// NVPTXInst<(outs Int8Regs:$d), (ins Float16Regs:$a), -// !strconcat(!strconcat("cvt.rzi.", OpStr), "8.f16 $d, $a;"), -// [(set Int8Regs:$d, (OpNode Float16Regs:$a))]>; - def CVTi8f32_ftz: - NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a), - !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"), - [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>; - def CVTi8f32: - NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a), - !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"), - [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>; - def CVTi8f64: - NVPTXInst<(outs Int8Regs:$d), (ins Float64Regs:$a), - !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"), - [(set Int8Regs:$d, (OpNode Float64Regs:$a))]>; - -// FIXME: need to add f16 support // def CVTi16f16: // NVPTXInst<(outs Int16Regs:$d), (ins Float16Regs:$a), // !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f16 \t$d, $a;"), @@ -2680,10 +2248,6 @@ defm Fp_to_sint : CVT_FP_TO_INT <"s", fp_to_sint>; defm Fp_to_uint : CVT_FP_TO_INT <"u", fp_to_uint>; multiclass INT_EXTEND_UNSIGNED_1 { - def ext1to8: - NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a), - "selp.u16 \t$d, 1, 0, $a;", - [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>; def ext1to16: NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a), "selp.u16 \t$d, 1, 0, $a;", @@ -2699,10 +2263,6 @@ multiclass INT_EXTEND_UNSIGNED_1 { } multiclass INT_EXTEND_SIGNED_1 { - def ext1to8: - NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a), - "selp.s16 \t$d, -1, 0, $a;", - [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>; def ext1to16: NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a), "selp.s16 \t$d, -1, 0, $a;", @@ -2718,23 +2278,6 @@ multiclass INT_EXTEND_SIGNED_1 { } multiclass INT_EXTEND { - // All Int8Regs are emiited as 16bit registers in ptx. - // And there is no selp.u8 in ptx. - def ext8to16: - NVPTXInst<(outs Int16Regs:$d), (ins Int8Regs:$a), - !strconcat("cvt.", !strconcat(OpStr, !strconcat("16.", - !strconcat(OpStr, "8 \t$d, $a;")))), - [(set Int16Regs:$d, (OpNode Int8Regs:$a))]>; - def ext8to32: - NVPTXInst<(outs Int32Regs:$d), (ins Int8Regs:$a), - !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.", - !strconcat(OpStr, "8 \t$d, $a;")))), - [(set Int32Regs:$d, (OpNode Int8Regs:$a))]>; - def ext8to64: - NVPTXInst<(outs Int64Regs:$d), (ins Int8Regs:$a), - !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.", - !strconcat(OpStr, "8 \t$d, $a;")))), - [(set Int64Regs:$d, (OpNode Int8Regs:$a))]>; def ext16to32: NVPTXInst<(outs Int32Regs:$d), (ins Int16Regs:$a), !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.", @@ -2778,18 +2321,9 @@ def TRUNC_64to32 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), def TRUNC_64to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int64Regs:$a), "cvt.u16.u64 \t$d, $a;", [(set Int16Regs:$d, (trunc Int64Regs:$a))]>; -def TRUNC_64to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int64Regs:$a), - "cvt.u8.u64 \t$d, $a;", - [(set Int8Regs:$d, (trunc Int64Regs:$a))]>; def TRUNC_32to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int32Regs:$a), "cvt.u16.u32 \t$d, $a;", [(set Int16Regs:$d, (trunc Int32Regs:$a))]>; -def TRUNC_32to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int32Regs:$a), - "cvt.u8.u32 \t$d, $a;", - [(set Int8Regs:$d, (trunc Int32Regs:$a))]>; -def TRUNC_16to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int16Regs:$a), - "cvt.u8.u16 \t$d, $a;", - [(set Int8Regs:$d, (trunc Int16Regs:$a))]>; def TRUNC_64to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), TRUNC_to1_asm<".b64">.s, [(set Int1Regs:$d, (trunc Int64Regs:$a))]>; @@ -2799,13 +2333,8 @@ def TRUNC_32to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), def TRUNC_16to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int16Regs:$a), TRUNC_to1_asm<".b16">.s, [(set Int1Regs:$d, (trunc Int16Regs:$a))]>; -def TRUNC_8to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int8Regs:$a), - TRUNC_to1_asm<".b16">.s, - [(set Int1Regs:$d, (trunc Int8Regs:$a))]>; // Select instructions -def : Pat<(select Int32Regs:$pred, Int8Regs:$a, Int8Regs:$b), - (SELECTi8rr Int8Regs:$a, Int8Regs:$b, (TRUNC_32to1 Int32Regs:$pred))>; def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b), (SELECTi16rr Int16Regs:$a, Int16Regs:$b, (TRUNC_32to1 Int32Regs:$pred))>; @@ -2834,28 +2363,11 @@ def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>; def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>; // pack a set of smaller int registers to a larger int register -def V4I8toI32 : NVPTXInst<(outs Int32Regs:$d), - (ins Int8Regs:$s1, Int8Regs:$s2, - Int8Regs:$s3, Int8Regs:$s4), - !strconcat("{{\n\t.reg .b8\t%t<4>;", - !strconcat("\n\tcvt.u8.u8\t%t0, $s1;", - !strconcat("\n\tcvt.u8.u8\t%t1, $s2;", - !strconcat("\n\tcvt.u8.u8\t%t2, $s3;", - !strconcat("\n\tcvt.u8.u8\t%t3, $s4;", - "\n\tmov.b32\t$d, {%t0, %t1, %t2, %t3};\n\t}}"))))), - []>; def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d), (ins Int16Regs:$s1, Int16Regs:$s2, Int16Regs:$s3, Int16Regs:$s4), "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};", []>; -def V2I8toI16 : NVPTXInst<(outs Int16Regs:$d), - (ins Int8Regs:$s1, Int8Regs:$s2), - !strconcat("{{\n\t.reg .b8\t%t<2>;", - !strconcat("\n\tcvt.u8.u8\t%t0, $s1;", - !strconcat("\n\tcvt.u8.u8\t%t1, $s2;", - "\n\tmov.b16\t$d, {%t0, %t1};\n\t}}"))), - []>; def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d), (ins Int16Regs:$s1, Int16Regs:$s2), "mov.b32\t$d, {{$s1, $s2}};", @@ -2870,28 +2382,11 @@ def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d), []>; // unpack a larger int register to a set of smaller int registers -def I32toV4I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2, - Int8Regs:$d3, Int8Regs:$d4), - (ins Int32Regs:$s), - !strconcat("{{\n\t.reg .b8\t%t<4>;", - !strconcat("\n\tmov.b32\t{%t0, %t1, %t2, %t3}, $s;", - !strconcat("\n\tcvt.u8.u8\t$d1, %t0;", - !strconcat("\n\tcvt.u8.u8\t$d2, %t1;", - !strconcat("\n\tcvt.u8.u8\t$d3, %t2;", - "\n\tcvt.u8.u8\t$d4, %t3;\n\t}}"))))), - []>; def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2, Int16Regs:$d3, Int16Regs:$d4), (ins Int64Regs:$s), "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;", []>; -def I16toV2I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2), - (ins Int16Regs:$s), - !strconcat("{{\n\t.reg .b8\t%t<2>;", - !strconcat("\n\tmov.b16\t{%t0, %t1}, $s;", - !strconcat("\n\tcvt.u8.u8\t$d1, %t0;", - "\n\tcvt.u8.u8\t$d2, %t1;\n\t}}"))), - []>; def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2), (ins Int32Regs:$s), "mov.b32\t{{$d1, $d2}}, $s;", diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td index 24037ca..caa7775 100644 --- a/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1270,6 +1270,11 @@ def INT_PTX_SREG_WARPSIZE : F_SREG<"mov.u32 \t$dst, WARP_SZ;", Int32Regs, // Support for ldu on sm_20 or later //----------------------------------- +def ldu_i8 : PatFrag<(ops node:$ptr), (int_nvvm_ldu_global_i node:$ptr), [{ + MemIntrinsicSDNode *M = cast(N); + return M->getMemoryVT() == MVT::i8; +}]>; + // Scalar // @TODO: Revisit this, Changed imemAny to imem multiclass LDU_G { @@ -1291,8 +1296,27 @@ multiclass LDU_G { [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDU]>; } -defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int8Regs, -int_nvvm_ldu_global_i>; +multiclass LDU_G_NOINTRIN { + def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), + !strconcat("ldu.global.", TyStr), + [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDU]>; + def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), + !strconcat("ldu.global.", TyStr), + [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDU]>; + def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src), + !strconcat("ldu.global.", TyStr), + [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>, + Requires<[hasLDU]>; + def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), + !strconcat("ldu.global.", TyStr), + [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDU]>; + def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), + !strconcat("ldu.global.", TyStr), + [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDU]>; +} + +defm INT_PTX_LDU_GLOBAL_i8 : LDU_G_NOINTRIN<"u8 \t$result, [$src];", Int16Regs, + ldu_i8>; defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs, int_nvvm_ldu_global_i>; defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs, @@ -1330,7 +1354,7 @@ multiclass VLDU_G_ELE_V4 { } defm INT_PTX_LDU_G_v2i8_ELE - : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int8Regs>; + : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; defm INT_PTX_LDU_G_v2i16_ELE : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; defm INT_PTX_LDU_G_v2i32_ELE @@ -1342,7 +1366,7 @@ defm INT_PTX_LDU_G_v2i64_ELE defm INT_PTX_LDU_G_v2f64_ELE : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; defm INT_PTX_LDU_G_v4i8_ELE - : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int8Regs>; + : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; defm INT_PTX_LDU_G_v4i16_ELE : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; @@ -1542,10 +1566,6 @@ def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result), // nvvm.move intrinsicc -def nvvm_move_i8 : NVPTXInst<(outs Int8Regs:$r), (ins Int8Regs:$s), - "mov.b16 \t$r, $s;", - [(set Int8Regs:$r, - (int_nvvm_move_i8 Int8Regs:$s))]>; def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s), "mov.b16 \t$r, $s;", [(set Int16Regs:$r, diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp index b749b05..4d3a1d9 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp @@ -38,10 +38,6 @@ std::string getNVPTXRegClassName(TargetRegisterClass const *RC) { return ".s32"; } else if (RC == &NVPTX::Int16RegsRegClass) { return ".s16"; - } - // Int8Regs become 16-bit registers in PTX - else if (RC == &NVPTX::Int8RegsRegClass) { - return ".s16"; } else if (RC == &NVPTX::Int1RegsRegClass) { return ".pred"; } else if (RC == &NVPTX::SpecialRegsRegClass) { @@ -64,8 +60,6 @@ std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) { return "%r"; } else if (RC == &NVPTX::Int16RegsRegClass) { return "%rs"; - } else if (RC == &NVPTX::Int8RegsRegClass) { - return "%rc"; } else if (RC == &NVPTX::Int1RegsRegClass) { return "%p"; } else if (RC == &NVPTX::SpecialRegsRegClass) { diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.td b/lib/Target/NVPTX/NVPTXRegisterInfo.td index 8d100d6..bc705b8 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.td +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.td @@ -31,7 +31,6 @@ def VRDepot : NVPTXReg<"%Depot">; foreach i = 0-395 in { def P#i : NVPTXReg<"%p"#i>; // Predicate - def RC#i : NVPTXReg<"%rc"#i>; // 8-bit def RS#i : NVPTXReg<"%rs"#i>; // 16-bit def R#i : NVPTXReg<"%r"#i>; // 32-bit def RL#i : NVPTXReg<"%rl"#i>; // 64-bit @@ -49,7 +48,6 @@ foreach i = 0-395 in { // Register classes //===----------------------------------------------------------------------===// def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%u", 0, 395))>; -def Int8Regs : NVPTXRegClass<[i8], 8, (add (sequence "RC%u", 0, 395))>; def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%u", 0, 395))>; def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%u", 0, 395))>; def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%u", 0, 395))>; -- cgit v1.1 From ef0ccc93203e99077632cec7a0a15b8e1b704aee Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Fri, 28 Jun 2013 17:58:04 +0000 Subject: [NVPTX] Clean up comparison/select/convert patterns and factor out PTX instructions from their patterns Test case is no breakage git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185175 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTX.h | 47 ++ lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 133 +++- lib/Target/NVPTX/NVPTXAsmPrinter.h | 4 + lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 23 +- lib/Target/NVPTX/NVPTXISelLowering.cpp | 6 - lib/Target/NVPTX/NVPTXISelLowering.h | 3 - lib/Target/NVPTX/NVPTXInstrInfo.td | 1332 ++++++++++++++++---------------- lib/Target/NVPTX/NVPTXIntrinsics.td | 530 ++++++------- 8 files changed, 1129 insertions(+), 949 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h index 85cdb8b..7be3f9a 100644 --- a/lib/Target/NVPTX/NVPTX.h +++ b/lib/Target/NVPTX/NVPTX.h @@ -131,6 +131,53 @@ enum VecType { V4 = 4 }; } + +/// PTXCvtMode - Conversion code enumeration +namespace PTXCvtMode { +enum CvtMode { + NONE = 0, + RNI, + RZI, + RMI, + RPI, + RN, + RZ, + RM, + RP, + + BASE_MASK = 0x0F, + FTZ_FLAG = 0x10, + SAT_FLAG = 0x20 +}; +} + +/// PTXCmpMode - Comparison mode enumeration +namespace PTXCmpMode { +enum CmpMode { + EQ = 0, + NE, + LT, + LE, + GT, + GE, + LO, + LS, + HI, + HS, + EQU, + NEU, + LTU, + LEU, + GTU, + GEU, + NUM, + // NAN is a MACRO + NotANumber, + + BASE_MASK = 0xFF, + FTZ_FLAG = 0x100 +}; +} } } // end namespace llvm; diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 9188262..d7eeced 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -693,6 +693,130 @@ void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum, llvm_unreachable("Empty Modifier"); } +void NVPTXAsmPrinter::printCvtMode(const MachineInstr *MI, int OpNum, + raw_ostream &O, const char *Modifier) { + const MachineOperand &MO = MI->getOperand(OpNum); + int64_t Imm = MO.getImm(); + + if (strcmp(Modifier, "ftz") == 0) { + // FTZ flag + if (Imm & NVPTX::PTXCvtMode::FTZ_FLAG) + O << ".ftz"; + } else if (strcmp(Modifier, "sat") == 0) { + // SAT flag + if (Imm & NVPTX::PTXCvtMode::SAT_FLAG) + O << ".sat"; + } else if (strcmp(Modifier, "base") == 0) { + // Default operand + switch (Imm & NVPTX::PTXCvtMode::BASE_MASK) { + default: + return; + case NVPTX::PTXCvtMode::NONE: + break; + case NVPTX::PTXCvtMode::RNI: + O << ".rni"; + break; + case NVPTX::PTXCvtMode::RZI: + O << ".rzi"; + break; + case NVPTX::PTXCvtMode::RMI: + O << ".rmi"; + break; + case NVPTX::PTXCvtMode::RPI: + O << ".rpi"; + break; + case NVPTX::PTXCvtMode::RN: + O << ".rn"; + break; + case NVPTX::PTXCvtMode::RZ: + O << ".rz"; + break; + case NVPTX::PTXCvtMode::RM: + O << ".rm"; + break; + case NVPTX::PTXCvtMode::RP: + O << ".rp"; + break; + } + } else { + llvm_unreachable("Invalid conversion modifier"); + } +} + +void NVPTXAsmPrinter::printCmpMode(const MachineInstr *MI, int OpNum, + raw_ostream &O, const char *Modifier) { + const MachineOperand &MO = MI->getOperand(OpNum); + int64_t Imm = MO.getImm(); + + if (strcmp(Modifier, "ftz") == 0) { + // FTZ flag + if (Imm & NVPTX::PTXCmpMode::FTZ_FLAG) + O << ".ftz"; + } else if (strcmp(Modifier, "base") == 0) { + switch (Imm & NVPTX::PTXCmpMode::BASE_MASK) { + default: + return; + case NVPTX::PTXCmpMode::EQ: + O << ".eq"; + break; + case NVPTX::PTXCmpMode::NE: + O << ".ne"; + break; + case NVPTX::PTXCmpMode::LT: + O << ".lt"; + break; + case NVPTX::PTXCmpMode::LE: + O << ".le"; + break; + case NVPTX::PTXCmpMode::GT: + O << ".gt"; + break; + case NVPTX::PTXCmpMode::GE: + O << ".ge"; + break; + case NVPTX::PTXCmpMode::LO: + O << ".lo"; + break; + case NVPTX::PTXCmpMode::LS: + O << ".ls"; + break; + case NVPTX::PTXCmpMode::HI: + O << ".hi"; + break; + case NVPTX::PTXCmpMode::HS: + O << ".hs"; + break; + case NVPTX::PTXCmpMode::EQU: + O << ".equ"; + break; + case NVPTX::PTXCmpMode::NEU: + O << ".neu"; + break; + case NVPTX::PTXCmpMode::LTU: + O << ".ltu"; + break; + case NVPTX::PTXCmpMode::LEU: + O << ".leu"; + break; + case NVPTX::PTXCmpMode::GTU: + O << ".gtu"; + break; + case NVPTX::PTXCmpMode::GEU: + O << ".geu"; + break; + case NVPTX::PTXCmpMode::NUM: + O << ".num"; + break; + case NVPTX::PTXCmpMode::NotANumber: + O << ".nan"; + break; + } + } else { + llvm_unreachable("Empty Modifier"); + } +} + + void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) { emitLinkageDirective(F, O); @@ -2033,10 +2157,6 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) { case NVPTX::StoreParamI32: case NVPTX::StoreParamI64: case NVPTX::StoreParamI8: - case NVPTX::StoreParamS32I8: - case NVPTX::StoreParamU32I8: - case NVPTX::StoreParamS32I16: - case NVPTX::StoreParamU32I16: case NVPTX::StoreRetvalF32: case NVPTX::StoreRetvalF64: case NVPTX::StoreRetvalI16: @@ -2056,11 +2176,6 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) { case NVPTX::LoadParamMemI32: case NVPTX::LoadParamMemI64: case NVPTX::LoadParamMemI8: - case NVPTX::LoadParamRegF32: - case NVPTX::LoadParamRegF64: - case NVPTX::LoadParamRegI16: - case NVPTX::LoadParamRegI32: - case NVPTX::LoadParamRegI64: case NVPTX::PrototypeInst: case NVPTX::DBG_VALUE: return true; diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h index 55f2943..c7b7fb0 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -198,6 +198,10 @@ private: const char *Modifier = 0); void printLdStCode(const MachineInstr *MI, int opNum, raw_ostream &O, const char *Modifier = 0); + void printCvtMode(const MachineInstr *MI, int OpNum, raw_ostream &O, + const char *Modifier = 0); + void printCmpMode(const MachineInstr *MI, int OpNum, raw_ostream &O, + const char *Modifier = 0); void printVecModifiedImmediate(const MachineOperand &MO, const char *Modifier, raw_ostream &O); void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 7a0a59f..4457ec3 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -1965,13 +1965,28 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) { break; } break; - case NVPTXISD::StoreParamU32: - Opcode = NVPTX::StoreParamU32I16; + // Special case: if we have a sign-extend/zero-extend node, insert the + // conversion instruction first, and use that as the value operand to + // the selected StoreParam node. + case NVPTXISD::StoreParamU32: { + Opcode = NVPTX::StoreParamI32; + SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, + MVT::i32); + SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL, + MVT::i32, Ops[0], CvtNone); + Ops[0] = SDValue(Cvt, 0); break; - case NVPTXISD::StoreParamS32: - Opcode = NVPTX::StoreParamS32I16; + } + case NVPTXISD::StoreParamS32: { + Opcode = NVPTX::StoreParamI32; + SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, + MVT::i32); + SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL, + MVT::i32, Ops[0], CvtNone); + Ops[0] = SDValue(Cvt, 0); break; } + } SDNode *Ret = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 0396a64..338fe7c 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -259,8 +259,6 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { return "NVPTXISD::StoreParamS32"; case NVPTXISD::StoreParamU32: return "NVPTXISD::StoreParamU32"; - case NVPTXISD::MoveToParam: - return "NVPTXISD::MoveToParam"; case NVPTXISD::CallArgBegin: return "NVPTXISD::CallArgBegin"; case NVPTXISD::CallArg: @@ -279,10 +277,6 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { return "NVPTXISD::Prototype"; case NVPTXISD::MoveParam: return "NVPTXISD::MoveParam"; - case NVPTXISD::MoveRetval: - return "NVPTXISD::MoveRetval"; - case NVPTXISD::MoveToRetval: - return "NVPTXISD::MoveToRetval"; case NVPTXISD::StoreRetval: return "NVPTXISD::StoreRetval"; case NVPTXISD::StoreRetvalV2: diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index 43c63ae..5e26b1c 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -35,7 +35,6 @@ enum NodeType { DeclareRetParam, DeclareRet, DeclareScalarRet, - MoveToParam, PrintCall, PrintCallUni, CallArgBegin, @@ -47,8 +46,6 @@ enum NodeType { CallSymbol, Prototype, MoveParam, - MoveRetval, - MoveToRetval, PseudoUseParam, RETURN, CallSeqBegin, diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td index 965af51..3219364 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -32,6 +32,86 @@ def isVecOther : VecInstTypeEnum<15>; def brtarget : Operand; +// CVT conversion modes +// These must match the enum in NVPTX.h +def CvtNONE : PatLeaf<(i32 0x0)>; +def CvtRNI : PatLeaf<(i32 0x1)>; +def CvtRZI : PatLeaf<(i32 0x2)>; +def CvtRMI : PatLeaf<(i32 0x3)>; +def CvtRPI : PatLeaf<(i32 0x4)>; +def CvtRN : PatLeaf<(i32 0x5)>; +def CvtRZ : PatLeaf<(i32 0x6)>; +def CvtRM : PatLeaf<(i32 0x7)>; +def CvtRP : PatLeaf<(i32 0x8)>; + +def CvtNONE_FTZ : PatLeaf<(i32 0x10)>; +def CvtRNI_FTZ : PatLeaf<(i32 0x11)>; +def CvtRZI_FTZ : PatLeaf<(i32 0x12)>; +def CvtRMI_FTZ : PatLeaf<(i32 0x13)>; +def CvtRPI_FTZ : PatLeaf<(i32 0x14)>; +def CvtRN_FTZ : PatLeaf<(i32 0x15)>; +def CvtRZ_FTZ : PatLeaf<(i32 0x16)>; +def CvtRM_FTZ : PatLeaf<(i32 0x17)>; +def CvtRP_FTZ : PatLeaf<(i32 0x18)>; + +def CvtSAT : PatLeaf<(i32 0x20)>; +def CvtSAT_FTZ : PatLeaf<(i32 0x30)>; + +def CvtMode : Operand { + let PrintMethod = "printCvtMode"; +} + +// Compare modes +// These must match the enum in NVPTX.h +def CmpEQ : PatLeaf<(i32 0)>; +def CmpNE : PatLeaf<(i32 1)>; +def CmpLT : PatLeaf<(i32 2)>; +def CmpLE : PatLeaf<(i32 3)>; +def CmpGT : PatLeaf<(i32 4)>; +def CmpGE : PatLeaf<(i32 5)>; +def CmpLO : PatLeaf<(i32 6)>; +def CmpLS : PatLeaf<(i32 7)>; +def CmpHI : PatLeaf<(i32 8)>; +def CmpHS : PatLeaf<(i32 9)>; +def CmpEQU : PatLeaf<(i32 10)>; +def CmpNEU : PatLeaf<(i32 11)>; +def CmpLTU : PatLeaf<(i32 12)>; +def CmpLEU : PatLeaf<(i32 13)>; +def CmpGTU : PatLeaf<(i32 14)>; +def CmpGEU : PatLeaf<(i32 15)>; +def CmpNUM : PatLeaf<(i32 16)>; +def CmpNAN : PatLeaf<(i32 17)>; + +def CmpEQ_FTZ : PatLeaf<(i32 0x100)>; +def CmpNE_FTZ : PatLeaf<(i32 0x101)>; +def CmpLT_FTZ : PatLeaf<(i32 0x102)>; +def CmpLE_FTZ : PatLeaf<(i32 0x103)>; +def CmpGT_FTZ : PatLeaf<(i32 0x104)>; +def CmpGE_FTZ : PatLeaf<(i32 0x105)>; +def CmpLO_FTZ : PatLeaf<(i32 0x106)>; +def CmpLS_FTZ : PatLeaf<(i32 0x107)>; +def CmpHI_FTZ : PatLeaf<(i32 0x108)>; +def CmpHS_FTZ : PatLeaf<(i32 0x109)>; +def CmpEQU_FTZ : PatLeaf<(i32 0x10A)>; +def CmpNEU_FTZ : PatLeaf<(i32 0x10B)>; +def CmpLTU_FTZ : PatLeaf<(i32 0x10C)>; +def CmpLEU_FTZ : PatLeaf<(i32 0x10D)>; +def CmpGTU_FTZ : PatLeaf<(i32 0x10E)>; +def CmpGEU_FTZ : PatLeaf<(i32 0x10F)>; +def CmpNUM_FTZ : PatLeaf<(i32 0x110)>; +def CmpNAN_FTZ : PatLeaf<(i32 0x111)>; + +def CmpMode : Operand { + let PrintMethod = "printCmpMode"; +} + +def F32ConstZero : Operand, PatLeaf<(f32 fpimm)>, SDNodeXFormgetTargetConstantFP(0.0, MVT::f32); + }]>; +def F32ConstOne : Operand, PatLeaf<(f32 fpimm)>, SDNodeXFormgetTargetConstantFP(1.0, MVT::f32); + }]>; + //===----------------------------------------------------------------------===// // NVPTX Instruction Predicate Definitions //===----------------------------------------------------------------------===// @@ -214,6 +294,72 @@ multiclass F2 { //===----------------------------------------------------------------------===// //----------------------------------- +// General Type Conversion +//----------------------------------- + +// Generate a cvt to the given type from all possible types. +// Each instance takes a CvtMode immediate that defines the conversion mode to +// use. It can be CvtNONE to omit a conversion mode. +multiclass CVT_FROM_ALL { + def _s16 : NVPTXInst<(outs RC:$dst), + (ins Int16Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".s16\t$dst, $src;"), + []>; + def _u16 : NVPTXInst<(outs RC:$dst), + (ins Int16Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".u16\t$dst, $src;"), + []>; + def _f16 : NVPTXInst<(outs RC:$dst), + (ins Int16Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".f16\t$dst, $src;"), + []>; + def _s32 : NVPTXInst<(outs RC:$dst), + (ins Int32Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".s32\t$dst, $src;"), + []>; + def _u32 : NVPTXInst<(outs RC:$dst), + (ins Int32Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".u32\t$dst, $src;"), + []>; + def _s64 : NVPTXInst<(outs RC:$dst), + (ins Int64Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".s64\t$dst, $src;"), + []>; + def _u64 : NVPTXInst<(outs RC:$dst), + (ins Int64Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".u64\t$dst, $src;"), + []>; + def _f32 : NVPTXInst<(outs RC:$dst), + (ins Float32Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".f32\t$dst, $src;"), + []>; + def _f64 : NVPTXInst<(outs RC:$dst), + (ins Float64Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".f64\t$dst, $src;"), + []>; +} + +// Generate a cvt to all possible types. +defm CVT_s16 : CVT_FROM_ALL<"s16", Int16Regs>; +defm CVT_u16 : CVT_FROM_ALL<"u16", Int16Regs>; +defm CVT_f16 : CVT_FROM_ALL<"f16", Int16Regs>; +defm CVT_s32 : CVT_FROM_ALL<"s32", Int32Regs>; +defm CVT_u32 : CVT_FROM_ALL<"u32", Int32Regs>; +defm CVT_s64 : CVT_FROM_ALL<"s64", Int64Regs>; +defm CVT_u64 : CVT_FROM_ALL<"u64", Int64Regs>; +defm CVT_f32 : CVT_FROM_ALL<"f32", Float32Regs>; +defm CVT_f64 : CVT_FROM_ALL<"f64", Float64Regs>; + +//----------------------------------- // Integer Arithmetic //----------------------------------- @@ -740,6 +886,41 @@ def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), "cos.approx.f32 \t$dst, $src;", [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>; +// Lower (frem x, y) into (sub x, (mul (floor (div x, y)) y)) +// e.g. "poor man's fmod()" + +// frem - f32 FTZ +def : Pat<(frem Float32Regs:$x, Float32Regs:$y), + (FSUBf32rr_ftz Float32Regs:$x, (FMULf32rr_ftz (CVT_f32_f32 + (FDIV32rr_prec_ftz Float32Regs:$x, Float32Regs:$y), CvtRMI_FTZ), + Float32Regs:$y))>, + Requires<[doF32FTZ]>; +def : Pat<(frem Float32Regs:$x, fpimm:$y), + (FSUBf32rr_ftz Float32Regs:$x, (FMULf32ri_ftz (CVT_f32_f32 + (FDIV32ri_prec_ftz Float32Regs:$x, fpimm:$y), CvtRMI_FTZ), + fpimm:$y))>, + Requires<[doF32FTZ]>; + +// frem - f32 +def : Pat<(frem Float32Regs:$x, Float32Regs:$y), + (FSUBf32rr Float32Regs:$x, (FMULf32rr (CVT_f32_f32 + (FDIV32rr_prec Float32Regs:$x, Float32Regs:$y), CvtRMI), + Float32Regs:$y))>; +def : Pat<(frem Float32Regs:$x, fpimm:$y), + (FSUBf32rr Float32Regs:$x, (FMULf32ri (CVT_f32_f32 + (FDIV32ri_prec Float32Regs:$x, fpimm:$y), CvtRMI), + fpimm:$y))>; + +// frem - f64 +def : Pat<(frem Float64Regs:$x, Float64Regs:$y), + (FSUBf64rr Float64Regs:$x, (FMULf64rr (CVT_f64_f64 + (FDIV64rr Float64Regs:$x, Float64Regs:$y), CvtRMI), + Float64Regs:$y))>; +def : Pat<(frem Float64Regs:$x, fpimm:$y), + (FSUBf64rr Float64Regs:$x, (FMULf64ri (CVT_f64_f64 + (FDIV64ri Float64Regs:$x, fpimm:$y), CvtRMI), + fpimm:$y))>; + //----------------------------------- // Logical Arithmetic //----------------------------------- @@ -830,7 +1011,7 @@ defm SHL : LSHIFT_FORMAT<"shl.b", shl>; // For shifts, the second src operand must be 32-bit value // Need to add cvt for the 8-bits. -multiclass RSHIFT_FORMAT { +multiclass RSHIFT_FORMAT { def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int32Regs:$b), !strconcat(OpcStr, "64 \t$dst, $a, $b;"), @@ -864,8 +1045,8 @@ multiclass RSHIFT_FORMAT { (i32 imm:$b)))]>; } -defm SRA : RSHIFT_FORMAT<"shr.s", sra, "cvt.s16.s8">; -defm SRL : RSHIFT_FORMAT<"shr.u", srl, "cvt.u16.u8">; +defm SRA : RSHIFT_FORMAT<"shr.s", sra>; +defm SRL : RSHIFT_FORMAT<"shr.u", srl>; // 32bit def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst), @@ -963,6 +1144,120 @@ def ROTR64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, //----------------------------------- +// General Comparison +//----------------------------------- + +// General setp instructions +multiclass SETP { + def rr : NVPTXInst<(outs Int1Regs:$dst), + (ins RC:$a, RC:$b, CmpMode:$cmp), + !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"), + []>; + def ri : NVPTXInst<(outs Int1Regs:$dst), + (ins RC:$a, ImmCls:$b, CmpMode:$cmp), + !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"), + []>; + def ir : NVPTXInst<(outs Int1Regs:$dst), + (ins ImmCls:$a, RC:$b, CmpMode:$cmp), + !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"), + []>; +} + +defm SETP_b16 : SETP<"b16", Int16Regs, i16imm>; +defm SETP_s16 : SETP<"s16", Int16Regs, i16imm>; +defm SETP_u16 : SETP<"u16", Int16Regs, i16imm>; +defm SETP_b32 : SETP<"b32", Int32Regs, i32imm>; +defm SETP_s32 : SETP<"s32", Int32Regs, i32imm>; +defm SETP_u32 : SETP<"u32", Int32Regs, i32imm>; +defm SETP_b64 : SETP<"b64", Int64Regs, i64imm>; +defm SETP_s64 : SETP<"s64", Int64Regs, i64imm>; +defm SETP_u64 : SETP<"u64", Int64Regs, i64imm>; +defm SETP_f32 : SETP<"f32", Float32Regs, f32imm>; +defm SETP_f64 : SETP<"f64", Float64Regs, f64imm>; + +// General set instructions +multiclass SET { + def rr : NVPTXInst<(outs Int32Regs:$dst), + (ins RC:$a, RC:$b, CmpMode:$cmp), + !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>; + def ri : NVPTXInst<(outs Int32Regs:$dst), + (ins RC:$a, ImmCls:$b, CmpMode:$cmp), + !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>; + def ir : NVPTXInst<(outs Int32Regs:$dst), + (ins ImmCls:$a, RC:$b, CmpMode:$cmp), + !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>; +} + +defm SET_b16 : SET<"b16", Int16Regs, i16imm>; +defm SET_s16 : SET<"s16", Int16Regs, i16imm>; +defm SET_u16 : SET<"u16", Int16Regs, i16imm>; +defm SET_b32 : SET<"b32", Int32Regs, i32imm>; +defm SET_s32 : SET<"s32", Int32Regs, i32imm>; +defm SET_u32 : SET<"u32", Int32Regs, i32imm>; +defm SET_b64 : SET<"b64", Int64Regs, i64imm>; +defm SET_s64 : SET<"s64", Int64Regs, i64imm>; +defm SET_u64 : SET<"u64", Int64Regs, i64imm>; +defm SET_f32 : SET<"f32", Float32Regs, f32imm>; +defm SET_f64 : SET<"f64", Float64Regs, f64imm>; + +//----------------------------------- +// General Selection +//----------------------------------- + +// General selp instructions +multiclass SELP { + def rr : NVPTXInst<(outs RC:$dst), + (ins RC:$a, RC:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>; + def ri : NVPTXInst<(outs RC:$dst), + (ins RC:$a, ImmCls:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>; + def ir : NVPTXInst<(outs RC:$dst), + (ins ImmCls:$a, RC:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>; + def ii : NVPTXInst<(outs RC:$dst), + (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>; +} + +multiclass SELP_PATTERN { + def rr : NVPTXInst<(outs RC:$dst), + (ins RC:$a, RC:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), + [(set RC:$dst, (select Int1Regs:$p, RC:$a, RC:$b))]>; + def ri : NVPTXInst<(outs RC:$dst), + (ins RC:$a, ImmCls:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), + [(set RC:$dst, (select Int1Regs:$p, RC:$a, ImmNode:$b))]>; + def ir : NVPTXInst<(outs RC:$dst), + (ins ImmCls:$a, RC:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), + [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, RC:$b))]>; + def ii : NVPTXInst<(outs RC:$dst), + (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), + [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, ImmNode:$b))]>; +} + +defm SELP_b16 : SELP_PATTERN<"b16", Int16Regs, i16imm, imm>; +defm SELP_s16 : SELP<"s16", Int16Regs, i16imm>; +defm SELP_u16 : SELP<"u16", Int16Regs, i16imm>; +defm SELP_b32 : SELP_PATTERN<"b32", Int32Regs, i32imm, imm>; +defm SELP_s32 : SELP<"s32", Int32Regs, i32imm>; +defm SELP_u32 : SELP<"u32", Int32Regs, i32imm>; +defm SELP_b64 : SELP_PATTERN<"b64", Int64Regs, i64imm, imm>; +defm SELP_s64 : SELP<"s64", Int64Regs, i64imm>; +defm SELP_u64 : SELP<"u64", Int64Regs, i64imm>; +defm SELP_f32 : SELP_PATTERN<"f32", Float32Regs, f32imm, fpimm>; +defm SELP_f64 : SELP_PATTERN<"f64", Float64Regs, f64imm, fpimm>; + +// Special select for predicate operands +def : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)), + (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a), + (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>; + +//----------------------------------- // Data Movement (Load / Store, Move) //----------------------------------- @@ -1053,367 +1348,194 @@ def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr), // Comparison and Selection //----------------------------------- -// Generate string block like -// { -// .reg .pred p; -// setp.gt.s16 p, %a, %b; -// selp.s16 %dst, -1, 0, p; -// } -// when OpcStr=setp.gt.s sz1=16 sz2=16 d=%dst a=%a b=%b -class Set_Str { - string t1 = "{{\n\t.reg .pred p;\n\t"; - string t2 = !strconcat(t1 , OpcStr); - string t3 = !strconcat(t2 , sz1); - string t4 = !strconcat(t3 , " \tp, "); - string t5 = !strconcat(t4 , a); - string t6 = !strconcat(t5 , ", "); - string t7 = !strconcat(t6 , b); - string t8 = !strconcat(t7 , ";\n\tselp.s"); - string t9 = !strconcat(t8 , sz2); - string t10 = !strconcat(t9, " \t"); - string t11 = !strconcat(t10, d); - string s = !strconcat(t11, ", -1, 0, p;\n\t}}"); +multiclass ISET_FORMAT { + // i16 -> pred + def : Pat<(i1 (OpNode Int16Regs:$a, Int16Regs:$b)), + (setp_16rr Int16Regs:$a, Int16Regs:$b, Mode)>; + def : Pat<(i1 (OpNode Int16Regs:$a, imm:$b)), + (setp_16ri Int16Regs:$a, imm:$b, Mode)>; + def : Pat<(i1 (OpNode imm:$a, Int16Regs:$b)), + (setp_16ir imm:$a, Int16Regs:$b, Mode)>; + // i32 -> pred + def : Pat<(i1 (OpNode Int32Regs:$a, Int32Regs:$b)), + (setp_32rr Int32Regs:$a, Int32Regs:$b, Mode)>; + def : Pat<(i1 (OpNode Int32Regs:$a, imm:$b)), + (setp_32ri Int32Regs:$a, imm:$b, Mode)>; + def : Pat<(i1 (OpNode imm:$a, Int32Regs:$b)), + (setp_32ir imm:$a, Int32Regs:$b, Mode)>; + // i64 -> pred + def : Pat<(i1 (OpNode Int64Regs:$a, Int64Regs:$b)), + (setp_64rr Int64Regs:$a, Int64Regs:$b, Mode)>; + def : Pat<(i1 (OpNode Int64Regs:$a, imm:$b)), + (setp_64ri Int64Regs:$a, imm:$b, Mode)>; + def : Pat<(i1 (OpNode imm:$a, Int64Regs:$b)), + (setp_64ir imm:$a, Int64Regs:$b, Mode)>; + + // i16 -> i32 + def : Pat<(i32 (OpNode Int16Regs:$a, Int16Regs:$b)), + (set_16rr Int16Regs:$a, Int16Regs:$b, Mode)>; + def : Pat<(i32 (OpNode Int16Regs:$a, imm:$b)), + (set_16ri Int16Regs:$a, imm:$b, Mode)>; + def : Pat<(i32 (OpNode imm:$a, Int16Regs:$b)), + (set_16ir imm:$a, Int16Regs:$b, Mode)>; + // i32 -> i32 + def : Pat<(i32 (OpNode Int32Regs:$a, Int32Regs:$b)), + (set_32rr Int32Regs:$a, Int32Regs:$b, Mode)>; + def : Pat<(i32 (OpNode Int32Regs:$a, imm:$b)), + (set_32ri Int32Regs:$a, imm:$b, Mode)>; + def : Pat<(i32 (OpNode imm:$a, Int32Regs:$b)), + (set_32ir imm:$a, Int32Regs:$b, Mode)>; + // i64 -> i32 + def : Pat<(i32 (OpNode Int64Regs:$a, Int64Regs:$b)), + (set_64rr Int64Regs:$a, Int64Regs:$b, Mode)>; + def : Pat<(i32 (OpNode Int64Regs:$a, imm:$b)), + (set_64ri Int64Regs:$a, imm:$b, Mode)>; + def : Pat<(i32 (OpNode imm:$a, Int64Regs:$b)), + (set_64ir imm:$a, Int64Regs:$b, Mode)>; } -multiclass ISET_FORMAT { - def i16rr_toi16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, - Int16Regs:$b), - Set_Str.s, - []>; - def i32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, - Int32Regs:$b), - Set_Str.s, - []>; - def i64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, - Int64Regs:$b), - Set_Str.s, - []>; - - def i16rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), - !strconcat(OpcStr, "16 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>; - def i16ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, i16imm:$b), - !strconcat(OpcStr, "16 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>; - def i16ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i16imm:$a, Int16Regs:$b), - !strconcat(OpcStr, "16 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>; - def i32rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), - !strconcat(OpcStr, "32 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; - def i32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, i32imm:$b), - !strconcat(OpcStr, "32 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; - def i32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i32imm:$a, Int32Regs:$b), - !strconcat(OpcStr, "32 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>; - def i64rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), - !strconcat(OpcStr, "64 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>; - def i64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, i64imm:$b), - !strconcat(OpcStr, "64 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; - def i64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i64imm:$a, Int64Regs:$b), - !strconcat(OpcStr, "64 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>; - - def i16rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, - Int16Regs:$b), - !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>; - def i16ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b), - !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>; - def i16ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i16imm:$a, Int16Regs:$b), - !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>; - def i32rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, - Int32Regs:$b), - !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; - def i32ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), - !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; - def i32ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, Int32Regs:$b), - !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>; - def i64rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a, - Int64Regs:$b), - !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>; - def i64ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a, i64imm:$b), - !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; - def i64ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i64imm:$a, Int64Regs:$b), - !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>; +multiclass ISET_FORMAT_SIGNED + : ISET_FORMAT { + // TableGen doesn't like empty multiclasses + def : PatLeaf<(i32 0)>; } -multiclass FSET_FORMAT { - def f32rr_toi32_ftz: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a, - Float32Regs:$b), - Set_Str.s, - []>, Requires<[doF32FTZ]>; - def f32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a, - Float32Regs:$b), - Set_Str.s, - []>; - def f64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Float64Regs:$a, - Float64Regs:$b), - Set_Str.s, - []>; - def f64rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float64Regs:$a, - Float64Regs:$b), - Set_Str.s, - []>; - - def f32rr_p_ftz: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a - , Float32Regs:$b), - !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]> - , Requires<[doF32FTZ]>; - def f32rr_p: NVPTXInst<(outs Int1Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), - !strconcat(OpcStr, "f32 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>; - def f32ri_p_ftz: NVPTXInst<(outs Int1Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), - !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, - Requires<[doF32FTZ]>; - def f32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a, f32imm:$b), - !strconcat(OpcStr, "f32 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>; - def f32ir_p_ftz: NVPTXInst<(outs Int1Regs:$dst), - (ins f32imm:$a, Float32Regs:$b), - !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>, - Requires<[doF32FTZ]>; - def f32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f32imm:$a, Float32Regs:$b), - !strconcat(OpcStr, "f32 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>; - def f64rr_p: NVPTXInst<(outs Int1Regs:$dst), - (ins Float64Regs:$a, Float64Regs:$b), - !strconcat(OpcStr, "f64 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>; - def f64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float64Regs:$a, f64imm:$b), - !strconcat(OpcStr, "f64 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>; - def f64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f64imm:$a, Float64Regs:$b), - !strconcat(OpcStr, "f64 \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>; - - def f32rr_u32_ftz: NVPTXInst<(outs Int32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), - !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>; - def f32rr_u32: NVPTXInst<(outs Int32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), - !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>; - def f32ri_u32_ftz: NVPTXInst<(outs Int32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), - !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>; - def f32ri_u32: NVPTXInst<(outs Int32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), - !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>; - def f32ir_u32_ftz: NVPTXInst<(outs Int32Regs:$dst), - (ins f32imm:$a, Float32Regs:$b), - !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>; - def f32ir_u32: NVPTXInst<(outs Int32Regs:$dst), - (ins f32imm:$a, Float32Regs:$b), - !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>; - def f64rr_u32: NVPTXInst<(outs Int32Regs:$dst), - (ins Float64Regs:$a, Float64Regs:$b), - !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>; - def f64ri_u32: NVPTXInst<(outs Int32Regs:$dst), - (ins Float64Regs:$a, f64imm:$b), - !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>; - def f64ir_u32: NVPTXInst<(outs Int32Regs:$dst), - (ins f64imm:$a, Float64Regs:$b), - !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>; +multiclass ISET_FORMAT_UNSIGNED + : ISET_FORMAT { + // TableGen doesn't like empty multiclasses + def : PatLeaf<(i32 0)>; } -defm ISetSGT -: ISET_FORMAT<"setp.gt.s", "set.gt.u32.s", setgt, "s16", "cvt.s16.s8">; -defm ISetUGT -: ISET_FORMAT<"setp.gt.u", "set.gt.u32.u", setugt, "u16", "cvt.u16.u8">; -defm ISetSLT -: ISET_FORMAT<"setp.lt.s", "set.lt.u32.s", setlt, "s16", "cvt.s16.s8">; -defm ISetULT -: ISET_FORMAT<"setp.lt.u", "set.lt.u32.u", setult, "u16", "cvt.u16.u8">; -defm ISetSGE -: ISET_FORMAT<"setp.ge.s", "set.ge.u32.s", setge, "s16", "cvt.s16.s8">; -defm ISetUGE -: ISET_FORMAT<"setp.ge.u", "set.ge.u32.u", setuge, "u16", "cvt.u16.u8">; -defm ISetSLE -: ISET_FORMAT<"setp.le.s", "set.le.u32.s", setle, "s16", "cvt.s16.s8">; -defm ISetULE -: ISET_FORMAT<"setp.le.u", "set.le.u32.u", setule, "u16", "cvt.u16.u8">; -defm ISetSEQ -: ISET_FORMAT<"setp.eq.s", "set.eq.u32.s", seteq, "s16", "cvt.s16.s8">; -defm ISetUEQ -: ISET_FORMAT<"setp.eq.u", "set.eq.u32.u", setueq, "u16", "cvt.u16.u8">; -defm ISetSNE -: ISET_FORMAT<"setp.ne.s", "set.ne.u32.s", setne, "s16", "cvt.s16.s8">; -defm ISetUNE -: ISET_FORMAT<"setp.ne.u", "set.ne.u32.u", setune, "u16", "cvt.u16.u8">; - -def ISetSNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst), - (ins Int1Regs:$a, Int1Regs:$b), - "xor.pred \t$dst, $a, $b;", - [(set Int1Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>; -def ISetUNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst), - (ins Int1Regs:$a, Int1Regs:$b), - "xor.pred \t$dst, $a, $b;", - [(set Int1Regs:$dst, (setune Int1Regs:$a, Int1Regs:$b))]>; -def ISetSEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst), - (ins Int1Regs:$a, Int1Regs:$b), - !strconcat("{{\n\t", - !strconcat(".reg .pred temp;\n\t", - !strconcat("xor.pred \ttemp, $a, $b;\n\t", - !strconcat("not.pred \t$dst, temp;\n\t}}","")))), - [(set Int1Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>; -def ISetUEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst), - (ins Int1Regs:$a, Int1Regs:$b), - !strconcat("{{\n\t", - !strconcat(".reg .pred temp;\n\t", - !strconcat("xor.pred \ttemp, $a, $b;\n\t", - !strconcat("not.pred \t$dst, temp;\n\t}}","")))), - [(set Int1Regs:$dst, (setueq Int1Regs:$a, Int1Regs:$b))]>; - -// Compare 2 i1's and produce a u32 -def ISETSNEi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst), - (ins Int1Regs:$a, Int1Regs:$b), - !strconcat("{{\n\t", - !strconcat(".reg .pred temp;\n\t", - !strconcat("xor.pred \ttemp, $a, $b;\n\t", - !strconcat("selp.u32 \t$dst, -1, 0, temp;", "\n\t}}")))), - [(set Int32Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>; -def ISETSEQi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst), - (ins Int1Regs:$a, Int1Regs:$b), - !strconcat("{{\n\t", - !strconcat(".reg .pred temp;\n\t", - !strconcat("xor.pred \ttemp, $a, $b;\n\t", - !strconcat("selp.u32 \t$dst, 0, -1, temp;", "\n\t}}")))), - [(set Int32Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>; - -defm FSetGT : FSET_FORMAT<"setp.gt.", "set.gt.u32.", setogt>; -defm FSetLT : FSET_FORMAT<"setp.lt.", "set.lt.u32.", setolt>; -defm FSetGE : FSET_FORMAT<"setp.ge.", "set.ge.u32.", setoge>; -defm FSetLE : FSET_FORMAT<"setp.le.", "set.le.u32.", setole>; -defm FSetEQ : FSET_FORMAT<"setp.eq.", "set.eq.u32.", setoeq>; -defm FSetNE : FSET_FORMAT<"setp.ne.", "set.ne.u32.", setone>; - -defm FSetUGT : FSET_FORMAT<"setp.gtu.", "set.gtu.u32.", setugt>; -defm FSetULT : FSET_FORMAT<"setp.ltu.", "set.ltu.u32.",setult>; -defm FSetUGE : FSET_FORMAT<"setp.geu.", "set.geu.u32.",setuge>; -defm FSetULE : FSET_FORMAT<"setp.leu.", "set.leu.u32.",setule>; -defm FSetUEQ : FSET_FORMAT<"setp.equ.", "set.equ.u32.",setueq>; -defm FSetUNE : FSET_FORMAT<"setp.neu.", "set.neu.u32.",setune>; - -defm FSetNUM : FSET_FORMAT<"setp.num.", "set.num.u32.",seto>; -defm FSetNAN : FSET_FORMAT<"setp.nan.", "set.nan.u32.",setuo>; - -def SELECTi1rr : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)), - (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a), - (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>; - -def SELECTi16rr : NVPTXInst<(outs Int16Regs:$dst), - (ins Int16Regs:$a, Int16Regs:$b, Int1Regs:$p), - "selp.b16 \t$dst, $a, $b, $p;", - [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, Int16Regs:$b))]>; -def SELECTi16ri : NVPTXInst<(outs Int16Regs:$dst), - (ins Int16Regs:$a, i16imm:$b, Int1Regs:$p), - "selp.b16 \t$dst, $a, $b, $p;", - [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, imm:$b))]>; -def SELECTi16ir : NVPTXInst<(outs Int16Regs:$dst), - (ins i16imm:$a, Int16Regs:$b, Int1Regs:$p), - "selp.b16 \t$dst, $a, $b, $p;", - [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, Int16Regs:$b))]>; -def SELECTi16ii : NVPTXInst<(outs Int16Regs:$dst), - (ins i16imm:$a, i16imm:$b, Int1Regs:$p), - "selp.b16 \t$dst, $a, $b, $p;", - [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>; - -def SELECTi32rr : NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$a, Int32Regs:$b, Int1Regs:$p), - "selp.b32 \t$dst, $a, $b, $p;", - [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, Int32Regs:$b))]>; -def SELECTi32ri : NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$a, i32imm:$b, Int1Regs:$p), - "selp.b32 \t$dst, $a, $b, $p;", - [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, imm:$b))]>; -def SELECTi32ir : NVPTXInst<(outs Int32Regs:$dst), - (ins i32imm:$a, Int32Regs:$b, Int1Regs:$p), - "selp.b32 \t$dst, $a, $b, $p;", - [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, Int32Regs:$b))]>; -def SELECTi32ii : NVPTXInst<(outs Int32Regs:$dst), - (ins i32imm:$a, i32imm:$b, Int1Regs:$p), - "selp.b32 \t$dst, $a, $b, $p;", - [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>; - -def SELECTi64rr : NVPTXInst<(outs Int64Regs:$dst), - (ins Int64Regs:$a, Int64Regs:$b, Int1Regs:$p), - "selp.b64 \t$dst, $a, $b, $p;", - [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, Int64Regs:$b))]>; -def SELECTi64ri : NVPTXInst<(outs Int64Regs:$dst), - (ins Int64Regs:$a, i64imm:$b, Int1Regs:$p), - "selp.b64 \t$dst, $a, $b, $p;", - [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, imm:$b))]>; -def SELECTi64ir : NVPTXInst<(outs Int64Regs:$dst), - (ins i64imm:$a, Int64Regs:$b, Int1Regs:$p), - "selp.b64 \t$dst, $a, $b, $p;", - [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, Int64Regs:$b))]>; -def SELECTi64ii : NVPTXInst<(outs Int64Regs:$dst), - (ins i64imm:$a, i64imm:$b, Int1Regs:$p), - "selp.b64 \t$dst, $a, $b, $p;", - [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>; - -def SELECTf32rr : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b, Int1Regs:$p), - "selp.f32 \t$dst, $a, $b, $p;", - [(set Float32Regs:$dst, - (select Int1Regs:$p, Float32Regs:$a, Float32Regs:$b))]>; -def SELECTf32ri : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b, Int1Regs:$p), - "selp.f32 \t$dst, $a, $b, $p;", - [(set Float32Regs:$dst, (select Int1Regs:$p, Float32Regs:$a, fpimm:$b))]>; -def SELECTf32ir : NVPTXInst<(outs Float32Regs:$dst), - (ins f32imm:$a, Float32Regs:$b, Int1Regs:$p), - "selp.f32 \t$dst, $a, $b, $p;", - [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float32Regs:$b))]>; -def SELECTf32ii : NVPTXInst<(outs Float32Regs:$dst), - (ins f32imm:$a, f32imm:$b, Int1Regs:$p), - "selp.f32 \t$dst, $a, $b, $p;", - [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>; - -def SELECTf64rr : NVPTXInst<(outs Float64Regs:$dst), - (ins Float64Regs:$a, Float64Regs:$b, Int1Regs:$p), - "selp.f64 \t$dst, $a, $b, $p;", - [(set Float64Regs:$dst, - (select Int1Regs:$p, Float64Regs:$a, Float64Regs:$b))]>; -def SELECTf64ri : NVPTXInst<(outs Float64Regs:$dst), - (ins Float64Regs:$a, f64imm:$b, Int1Regs:$p), - "selp.f64 \t$dst, $a, $b, $p;", - [(set Float64Regs:$dst, (select Int1Regs:$p, Float64Regs:$a, fpimm:$b))]>; -def SELECTf64ir : NVPTXInst<(outs Float64Regs:$dst), - (ins f64imm:$a, Float64Regs:$b, Int1Regs:$p), - "selp.f64 \t$dst, $a, $b, $p;", - [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float64Regs:$b))]>; -def SELECTf64ii : NVPTXInst<(outs Float64Regs:$dst), - (ins f64imm:$a, f64imm:$b, Int1Regs:$p), - "selp.f64 \t $dst, $a, $b, $p;", - [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>; +defm : ISET_FORMAT_SIGNED; +defm : ISET_FORMAT_UNSIGNED; +defm : ISET_FORMAT_SIGNED; +defm : ISET_FORMAT_UNSIGNED; +defm : ISET_FORMAT_SIGNED; +defm : ISET_FORMAT_UNSIGNED; +defm : ISET_FORMAT_SIGNED; +defm : ISET_FORMAT_UNSIGNED; +defm : ISET_FORMAT_SIGNED; +defm : ISET_FORMAT_UNSIGNED; +defm : ISET_FORMAT_SIGNED; +defm : ISET_FORMAT_UNSIGNED; + +// i1 compares +def : Pat<(setne Int1Regs:$a, Int1Regs:$b), + (XORb1rr Int1Regs:$a, Int1Regs:$b)>; +def : Pat<(setune Int1Regs:$a, Int1Regs:$b), + (XORb1rr Int1Regs:$a, Int1Regs:$b)>; + +def : Pat<(seteq Int1Regs:$a, Int1Regs:$b), + (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>; +def : Pat<(setueq Int1Regs:$a, Int1Regs:$b), + (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>; + +// i1 compare -> i32 +def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)), + (SELP_u32ii -1, 0, (XORb1rr Int1Regs:$a, Int1Regs:$b))>; +def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)), + (SELP_u32ii 0, -1, (XORb1rr Int1Regs:$a, Int1Regs:$b))>; + + + +multiclass FSET_FORMAT { + // f32 -> pred + def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)), + (SETP_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>, + Requires<[doF32FTZ]>; + def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)), + (SETP_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>; + def : Pat<(i1 (OpNode Float32Regs:$a, fpimm:$b)), + (SETP_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>, + Requires<[doF32FTZ]>; + def : Pat<(i1 (OpNode Float32Regs:$a, fpimm:$b)), + (SETP_f32ri Float32Regs:$a, fpimm:$b, Mode)>; + def : Pat<(i1 (OpNode fpimm:$a, Float32Regs:$b)), + (SETP_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>, + Requires<[doF32FTZ]>; + def : Pat<(i1 (OpNode fpimm:$a, Float32Regs:$b)), + (SETP_f32ir fpimm:$a, Float32Regs:$b, Mode)>; + + // f64 -> pred + def : Pat<(i1 (OpNode Float64Regs:$a, Float64Regs:$b)), + (SETP_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>; + def : Pat<(i1 (OpNode Float64Regs:$a, fpimm:$b)), + (SETP_f64ri Float64Regs:$a, fpimm:$b, Mode)>; + def : Pat<(i1 (OpNode fpimm:$a, Float64Regs:$b)), + (SETP_f64ir fpimm:$a, Float64Regs:$b, Mode)>; + + // f32 -> i32 + def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)), + (SET_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>, + Requires<[doF32FTZ]>; + def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)), + (SET_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>; + def : Pat<(i32 (OpNode Float32Regs:$a, fpimm:$b)), + (SET_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>, + Requires<[doF32FTZ]>; + def : Pat<(i32 (OpNode Float32Regs:$a, fpimm:$b)), + (SET_f32ri Float32Regs:$a, fpimm:$b, Mode)>; + def : Pat<(i32 (OpNode fpimm:$a, Float32Regs:$b)), + (SET_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>, + Requires<[doF32FTZ]>; + def : Pat<(i32 (OpNode fpimm:$a, Float32Regs:$b)), + (SET_f32ir fpimm:$a, Float32Regs:$b, Mode)>; + + // f64 -> i32 + def : Pat<(i32 (OpNode Float64Regs:$a, Float64Regs:$b)), + (SET_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>; + def : Pat<(i32 (OpNode Float64Regs:$a, fpimm:$b)), + (SET_f64ri Float64Regs:$a, fpimm:$b, Mode)>; + def : Pat<(i32 (OpNode fpimm:$a, Float64Regs:$b)), + (SET_f64ir fpimm:$a, Float64Regs:$b, Mode)>; +} + +defm FSetGT : FSET_FORMAT; +defm FSetLT : FSET_FORMAT; +defm FSetGE : FSET_FORMAT; +defm FSetLE : FSET_FORMAT; +defm FSetEQ : FSET_FORMAT; +defm FSetNE : FSET_FORMAT; + +defm FSetUGT : FSET_FORMAT; +defm FSetULT : FSET_FORMAT; +defm FSetUGE : FSET_FORMAT; +defm FSetULE : FSET_FORMAT; +defm FSetUEQ : FSET_FORMAT; +defm FSetUNE : FSET_FORMAT; + +defm FSetNUM : FSET_FORMAT; +defm FSetNAN : FSET_FORMAT; //def ld_param : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad, // [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; @@ -1436,7 +1558,6 @@ def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>; def SDTCallVoidProfile : SDTypeProfile<0, 1, []>; def SDTCallValProfile : SDTypeProfile<1, 0, []>; def SDTMoveParamProfile : SDTypeProfile<1, 1, []>; -def SDTMoveRetvalProfile : SDTypeProfile<0, 1, []>; def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>; def SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>; def SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>; @@ -1472,8 +1593,6 @@ def StoreParamU32 : SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile, [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; def StoreParamS32 : SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile, [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def MoveToParam : SDNode<"NVPTXISD::MoveToParam", SDTStoreParamProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; def CallArgBegin : SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile, [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; def CallArg : SDNode<"NVPTXISD::CallArg", SDTCallArgProfile, @@ -1490,16 +1609,12 @@ def CallVal : SDNode<"NVPTXISD::CallVal", SDTCallValProfile, [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; def MoveParam : SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, []>; -def MoveRetval : SDNode<"NVPTXISD::MoveRetval", SDTMoveRetvalProfile, - [SDNPHasChain, SDNPSideEffect]>; def StoreRetval : SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile, [SDNPHasChain, SDNPSideEffect]>; def StoreRetvalV2 : SDNode<"NVPTXISD::StoreRetvalV2", SDTStoreRetvalV2Profile, [SDNPHasChain, SDNPSideEffect]>; def StoreRetvalV4 : SDNode<"NVPTXISD::StoreRetvalV4", SDTStoreRetvalV4Profile, [SDNPHasChain, SDNPSideEffect]>; -def MoveToRetval : SDNode<"NVPTXISD::MoveToRetval", SDTStoreRetvalProfile, - [SDNPHasChain, SDNPSideEffect]>; def PseudoUseParam : SDNode<"NVPTXISD::PseudoUseParam", SDTPseudoUseParamProfile, [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; @@ -1550,12 +1665,6 @@ class StoreParamV4Inst : "\t[param$a+$b], {{$val, $val2, $val3, $val4}};"), []>; -class MoveToParamInst : - NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b), - !strconcat(!strconcat("mov", opstr), - "\tparam$a, $val;"), - [(MoveToParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>; - class StoreRetvalInst : NVPTXInst<(outs), (ins regclass:$val, i32imm:$a), !strconcat(!strconcat("st.param", opstr), @@ -1576,18 +1685,6 @@ class StoreRetvalV4Inst : "\t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"), []>; -class MoveToRetvalInst : - NVPTXInst<(outs), (ins i32imm:$num, regclass:$val), - !strconcat(!strconcat("mov", opstr), - "\tfunc_retval$num, $val;"), - [(MoveToRetval (i32 imm:$num), regclass:$val)]>; - -class MoveRetvalInst : - NVPTXInst<(outs), (ins regclass:$val), - !strconcat(!strconcat("mov", opstr), - "\tfunc_retval0, $val;"), - [(MoveRetval regclass:$val)]>; - def PrintCallRetInst1 : NVPTXInst<(outs), (ins), "call (retval0), ", [(PrintCall (i32 1))]>; @@ -1663,16 +1760,6 @@ def LoadParamMemV2F32 : LoadParamV2MemInst; def LoadParamMemV2F64 : LoadParamV2MemInst; def LoadParamMemV4F32 : LoadParamV4MemInst; -def LoadParamRegI64 : LoadParamRegInst; -def LoadParamRegI32 : LoadParamRegInst; -def LoadParamRegI16 : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b), - "cvt.u16.u32\t$dst, retval$b;", - [(set Int16Regs:$dst, - (LoadParam (i32 0), (i32 imm:$b)))]>; - -def LoadParamRegF32 : LoadParamRegInst; -def LoadParamRegF64 : LoadParamRegInst; - def StoreParamI64 : StoreParamInst; def StoreParamI32 : StoreParamInst; @@ -1703,28 +1790,6 @@ def StoreParamV4I8 : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2, "st.param.v4.b8\t[param$a+$b], {{$val, $val2, $val3, $val4}};", []>; -def StoreParamS32I16 : NVPTXInst<(outs), - (ins Int16Regs:$val, i32imm:$a, i32imm:$b), - !strconcat("cvt.s32.s16\ttemp_param_reg, $val;\n\t", - "st.param.b32\t[param$a+$b], temp_param_reg;"), - []>; -def StoreParamU32I16 : NVPTXInst<(outs), - (ins Int16Regs:$val, i32imm:$a, i32imm:$b), - !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t", - "st.param.b32\t[param$a+$b], temp_param_reg;"), - []>; - -def StoreParamU32I8 : NVPTXInst<(outs), - (ins Int16Regs:$val, i32imm:$a, i32imm:$b), - !strconcat("cvt.u32.u8\ttemp_param_reg, $val;\n\t", - "st.param.b32\t[param$a+$b], temp_param_reg;"), - []>; -def StoreParamS32I8 : NVPTXInst<(outs), - (ins Int16Regs:$val, i32imm:$a, i32imm:$b), - !strconcat("cvt.s32.s8\ttemp_param_reg, $val;\n\t", - "st.param.b32\t[param$a+$b], temp_param_reg;"), - []>; - def StoreParamF32 : StoreParamInst; def StoreParamF64 : StoreParamInst; def StoreParamV2F32 : StoreParamV2Inst; @@ -1738,15 +1803,6 @@ def StoreParamV4F32 : NVPTXInst<(outs), "st.param.v4.f32\t[param$a+$b], {{$val, $val2, $val3, $val4}};", []>; -def MoveToParamI64 : MoveToParamInst; -def MoveToParamI32 : MoveToParamInst; -def MoveToParamF64 : MoveToParamInst; -def MoveToParamF32 : MoveToParamInst; -def MoveToParamI16 : NVPTXInst<(outs), - (ins Int16Regs:$val, i32imm:$a, i32imm:$b), - !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t", - "mov.b32\tparam$a, temp_param_reg;"), - [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>; def StoreRetvalI64 : StoreRetvalInst; def StoreRetvalI32 : StoreRetvalInst; @@ -1766,21 +1822,6 @@ def StoreRetvalV2F64 : StoreRetvalV2Inst; def StoreRetvalV2F32 : StoreRetvalV2Inst; def StoreRetvalV4F32 : StoreRetvalV4Inst; -def MoveRetvalI64 : MoveRetvalInst; -def MoveRetvalI32 : MoveRetvalInst; -def MoveRetvalI16 : MoveRetvalInst; -def MoveRetvalI8 : MoveRetvalInst; -def MoveRetvalF64 : MoveRetvalInst; -def MoveRetvalF32 : MoveRetvalInst; - -def MoveToRetvalI64 : MoveToRetvalInst; -def MoveToRetvalI32 : MoveToRetvalInst; -def MoveToRetvalF64 : MoveToRetvalInst; -def MoveToRetvalF32 : MoveToRetvalInst; -def MoveToRetvalI16 : NVPTXInst<(outs), (ins i32imm:$num, Int16Regs:$val), - "cvt.u32.u16\tfunc_retval$num, $val;", - [(MoveToRetval (i32 imm:$num), Int16Regs:$val)]>; - def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>; def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>; def CallArgEndInst0 : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>; @@ -2135,221 +2176,186 @@ defm STV_f64 : ST_VEC; //---- Conversion ---- -multiclass CVT_INT_TO_FP { -// FIXME: need to add f16 support -// def CVTf16i16 : -// NVPTXInst<(outs Float16Regs:$d), (ins Int16Regs:$a), -// !strconcat(!strconcat("cvt.rn.f16.", OpStr), "16 \t$d, $a;"), -// [(set Float16Regs:$d, (OpNode Int16Regs:$a))]>; -// def CVTf16i32 : -// NVPTXInst<(outs Float16Regs:$d), (ins Int32Regs:$a), -// !strconcat(!strconcat("cvt.rn.f16.", OpStr), "32 \t$d, $a;"), -// [(set Float16Regs:$d, (OpNode Int32Regs:$a))]>; -// def CVTf16i64: -// NVPTXInst<(outs Float16Regs:$d), (ins Int64Regs:$a), -// !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"), -// [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>; - - def CVTf32i1 : - NVPTXInst<(outs Float32Regs:$d), (ins Int1Regs:$a), - "selp.f32 \t$d, 1.0, 0.0, $a;", - [(set Float32Regs:$d, (OpNode Int1Regs:$a))]>; - def CVTf32i16 : - NVPTXInst<(outs Float32Regs:$d), (ins Int16Regs:$a), - !strconcat(!strconcat("cvt.rn.f32.", OpStr), "16 \t$d, $a;"), - [(set Float32Regs:$d, (OpNode Int16Regs:$a))]>; - def CVTf32i32 : - NVPTXInst<(outs Float32Regs:$d), (ins Int32Regs:$a), - !strconcat(!strconcat("cvt.rn.f32.", OpStr), "32 \t$d, $a;"), - [(set Float32Regs:$d, (OpNode Int32Regs:$a))]>; - def CVTf32i64: - NVPTXInst<(outs Float32Regs:$d), (ins Int64Regs:$a), - !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"), - [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>; - - def CVTf64i1 : - NVPTXInst<(outs Float64Regs:$d), (ins Int1Regs:$a), - "selp.f64 \t$d, 1.0, 0.0, $a;", - [(set Float64Regs:$d, (OpNode Int1Regs:$a))]>; - def CVTf64i16 : - NVPTXInst<(outs Float64Regs:$d), (ins Int16Regs:$a), - !strconcat(!strconcat("cvt.rn.f64.", OpStr), "16 \t$d, $a;"), - [(set Float64Regs:$d, (OpNode Int16Regs:$a))]>; - def CVTf64i32 : - NVPTXInst<(outs Float64Regs:$d), (ins Int32Regs:$a), - !strconcat(!strconcat("cvt.rn.f64.", OpStr), "32 \t$d, $a;"), - [(set Float64Regs:$d, (OpNode Int32Regs:$a))]>; - def CVTf64i64: - NVPTXInst<(outs Float64Regs:$d), (ins Int64Regs:$a), - !strconcat(!strconcat("cvt.rn.f64.", OpStr), "64 \t$d, $a;"), - [(set Float64Regs:$d, (OpNode Int64Regs:$a))]>; -} - -defm Sint_to_fp : CVT_INT_TO_FP <"s", sint_to_fp>; -defm Uint_to_fp : CVT_INT_TO_FP <"u", uint_to_fp>; - -multiclass CVT_FP_TO_INT { -// FIXME: need to add f16 support -// def CVTi16f16: -// NVPTXInst<(outs Int16Regs:$d), (ins Float16Regs:$a), -// !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f16 \t$d, $a;"), -// [(set Int16Regs:$d, (OpNode Float16Regs:$a))]>; - def CVTi16f32_ftz: - NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a), - !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"), - [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>; - def CVTi16f32: - NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a), - !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"), - [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>; - def CVTi16f64: - NVPTXInst<(outs Int16Regs:$d), (ins Float64Regs:$a), - !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"), - [(set Int16Regs:$d, (OpNode Float64Regs:$a))]>; - -// FIXME: need to add f16 support -// def CVTi32f16: def CVTi32f16: -// NVPTXInst<(outs Int32Regs:$d), (ins Float16Regs:$a), -// !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f16 \t$d, $a;"), -// [(set Int32Regs:$d, (OpNode Float16Regs:$a))]>; - def CVTi32f32_ftz: - NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a), - !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "32.f32 \t$d, $a;"), - [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>; - def CVTi32f32: - NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a), - !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f32 \t$d, $a;"), - [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>; - def CVTi32f64: - NVPTXInst<(outs Int32Regs:$d), (ins Float64Regs:$a), - !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f64 \t$d, $a;"), - [(set Int32Regs:$d, (OpNode Float64Regs:$a))]>; - -// FIXME: need to add f16 support -// def CVTi64f16: -// NVPTXInst<(outs Int64Regs:$d), (ins Float16Regs:$a), -// !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f16 \t$d, $a;"), -// [(set Int64Regs:$d, (OpNode Float16Regs:$a))]>; - def CVTi64f32_ftz: - NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a), - !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "64.f32 \t$d, $a;"), - [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>; - def CVTi64f32: - NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a), - !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f32 \t$d, $a;"), - [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>; - def CVTi64f64: - NVPTXInst<(outs Int64Regs:$d), (ins Float64Regs:$a), - !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f64 \t$d, $a;"), - [(set Int64Regs:$d, (OpNode Float64Regs:$a))]>; -} - -defm Fp_to_sint : CVT_FP_TO_INT <"s", fp_to_sint>; -defm Fp_to_uint : CVT_FP_TO_INT <"u", fp_to_uint>; - -multiclass INT_EXTEND_UNSIGNED_1 { - def ext1to16: - NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a), - "selp.u16 \t$d, 1, 0, $a;", - [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>; - def ext1to32: - NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a), - "selp.u32 \t$d, 1, 0, $a;", - [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>; - def ext1to64: - NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a), - "selp.u64 \t$d, 1, 0, $a;", - [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>; -} - -multiclass INT_EXTEND_SIGNED_1 { - def ext1to16: - NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a), - "selp.s16 \t$d, -1, 0, $a;", - [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>; - def ext1to32: - NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a), - "selp.s32 \t$d, -1, 0, $a;", - [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>; - def ext1to64: - NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a), - "selp.s64 \t$d, -1, 0, $a;", - [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>; -} - -multiclass INT_EXTEND { - def ext16to32: - NVPTXInst<(outs Int32Regs:$d), (ins Int16Regs:$a), - !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.", - !strconcat(OpStr, "16 \t$d, $a;")))), - [(set Int32Regs:$d, (OpNode Int16Regs:$a))]>; - def ext16to64: - NVPTXInst<(outs Int64Regs:$d), (ins Int16Regs:$a), - !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.", - !strconcat(OpStr, "16 \t$d, $a;")))), - [(set Int64Regs:$d, (OpNode Int16Regs:$a))]>; - def ext32to64: - NVPTXInst<(outs Int64Regs:$d), (ins Int32Regs:$a), - !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.", - !strconcat(OpStr, "32 \t$d, $a;")))), - [(set Int64Regs:$d, (OpNode Int32Regs:$a))]>; -} - -defm Sint_extend_1 : INT_EXTEND_SIGNED_1; -defm Zint_extend_1 : INT_EXTEND_UNSIGNED_1; -defm Aint_extend_1 : INT_EXTEND_UNSIGNED_1; - -defm Sint_extend : INT_EXTEND <"s", sext>; -defm Zint_extend : INT_EXTEND <"u", zext>; -defm Aint_extend : INT_EXTEND <"u", anyext>; - -class TRUNC_to1_asm { - string s = !strconcat("{{\n\t", - !strconcat(".reg ", - !strconcat(sz, - !strconcat(" temp;\n\t", - !strconcat("and", - !strconcat(sz, - !strconcat("\t temp, $a, 1;\n\t", - !strconcat("setp", - !strconcat(sz, ".eq \t $d, temp, 1;\n\t}}"))))))))); -} - -def TRUNC_64to32 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), - "cvt.u32.u64 \t$d, $a;", - [(set Int32Regs:$d, (trunc Int64Regs:$a))]>; -def TRUNC_64to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int64Regs:$a), - "cvt.u16.u64 \t$d, $a;", - [(set Int16Regs:$d, (trunc Int64Regs:$a))]>; -def TRUNC_32to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int32Regs:$a), - "cvt.u16.u32 \t$d, $a;", - [(set Int16Regs:$d, (trunc Int32Regs:$a))]>; -def TRUNC_64to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), - TRUNC_to1_asm<".b64">.s, - [(set Int1Regs:$d, (trunc Int64Regs:$a))]>; -def TRUNC_32to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), - TRUNC_to1_asm<".b32">.s, - [(set Int1Regs:$d, (trunc Int32Regs:$a))]>; -def TRUNC_16to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int16Regs:$a), - TRUNC_to1_asm<".b16">.s, - [(set Int1Regs:$d, (trunc Int16Regs:$a))]>; - -// Select instructions +// NOTE: pred->fp are currently sub-optimal due to an issue in TableGen where +// we cannot specify floating-point literals in isel patterns. Therefore, we +// use an integer selp to select either 1 or 0 and then cvt to floating-point. + +// sint -> f32 +def : Pat<(f32 (sint_to_fp Int1Regs:$a)), + (CVT_f32_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; +def : Pat<(f32 (sint_to_fp Int16Regs:$a)), + (CVT_f32_s16 Int16Regs:$a, CvtRN)>; +def : Pat<(f32 (sint_to_fp Int32Regs:$a)), + (CVT_f32_s32 Int32Regs:$a, CvtRN)>; +def : Pat<(f32 (sint_to_fp Int64Regs:$a)), + (CVT_f32_s64 Int64Regs:$a, CvtRN)>; + +// uint -> f32 +def : Pat<(f32 (uint_to_fp Int1Regs:$a)), + (CVT_f32_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; +def : Pat<(f32 (uint_to_fp Int16Regs:$a)), + (CVT_f32_u16 Int16Regs:$a, CvtRN)>; +def : Pat<(f32 (uint_to_fp Int32Regs:$a)), + (CVT_f32_u32 Int32Regs:$a, CvtRN)>; +def : Pat<(f32 (uint_to_fp Int64Regs:$a)), + (CVT_f32_u64 Int64Regs:$a, CvtRN)>; + +// sint -> f64 +def : Pat<(f64 (sint_to_fp Int1Regs:$a)), + (CVT_f64_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; +def : Pat<(f64 (sint_to_fp Int16Regs:$a)), + (CVT_f64_s16 Int16Regs:$a, CvtRN)>; +def : Pat<(f64 (sint_to_fp Int32Regs:$a)), + (CVT_f64_s32 Int32Regs:$a, CvtRN)>; +def : Pat<(f64 (sint_to_fp Int64Regs:$a)), + (CVT_f64_s64 Int64Regs:$a, CvtRN)>; + +// uint -> f64 +def : Pat<(f64 (uint_to_fp Int1Regs:$a)), + (CVT_f64_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; +def : Pat<(f64 (uint_to_fp Int16Regs:$a)), + (CVT_f64_u16 Int16Regs:$a, CvtRN)>; +def : Pat<(f64 (uint_to_fp Int32Regs:$a)), + (CVT_f64_u32 Int32Regs:$a, CvtRN)>; +def : Pat<(f64 (uint_to_fp Int64Regs:$a)), + (CVT_f64_u64 Int64Regs:$a, CvtRN)>; + + +// f32 -> sint +def : Pat<(i16 (fp_to_sint Float32Regs:$a)), + (CVT_s16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(i16 (fp_to_sint Float32Regs:$a)), + (CVT_s16_f32 Float32Regs:$a, CvtRZI)>; +def : Pat<(i32 (fp_to_sint Float32Regs:$a)), + (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(i32 (fp_to_sint Float32Regs:$a)), + (CVT_s32_f32 Float32Regs:$a, CvtRZI)>; +def : Pat<(i64 (fp_to_sint Float32Regs:$a)), + (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(i64 (fp_to_sint Float32Regs:$a)), + (CVT_s64_f32 Float32Regs:$a, CvtRZI)>; + +// f32 -> uint +def : Pat<(i16 (fp_to_uint Float32Regs:$a)), + (CVT_u16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(i16 (fp_to_uint Float32Regs:$a)), + (CVT_u16_f32 Float32Regs:$a, CvtRZI)>; +def : Pat<(i32 (fp_to_uint Float32Regs:$a)), + (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(i32 (fp_to_uint Float32Regs:$a)), + (CVT_u32_f32 Float32Regs:$a, CvtRZI)>; +def : Pat<(i64 (fp_to_uint Float32Regs:$a)), + (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(i64 (fp_to_uint Float32Regs:$a)), + (CVT_u64_f32 Float32Regs:$a, CvtRZI)>; + +// f64 -> sint +def : Pat<(i16 (fp_to_sint Float64Regs:$a)), + (CVT_s16_f64 Float64Regs:$a, CvtRZI)>; +def : Pat<(i32 (fp_to_sint Float64Regs:$a)), + (CVT_s32_f64 Float64Regs:$a, CvtRZI)>; +def : Pat<(i64 (fp_to_sint Float64Regs:$a)), + (CVT_s64_f64 Float64Regs:$a, CvtRZI)>; + +// f64 -> uint +def : Pat<(i16 (fp_to_uint Float64Regs:$a)), + (CVT_u16_f64 Float64Regs:$a, CvtRZI)>; +def : Pat<(i32 (fp_to_uint Float64Regs:$a)), + (CVT_u32_f64 Float64Regs:$a, CvtRZI)>; +def : Pat<(i64 (fp_to_uint Float64Regs:$a)), + (CVT_u64_f64 Float64Regs:$a, CvtRZI)>; + +// sext i1 +def : Pat<(i16 (sext Int1Regs:$a)), + (SELP_s16ii -1, 0, Int1Regs:$a)>; +def : Pat<(i32 (sext Int1Regs:$a)), + (SELP_s32ii -1, 0, Int1Regs:$a)>; +def : Pat<(i64 (sext Int1Regs:$a)), + (SELP_s64ii -1, 0, Int1Regs:$a)>; + +// zext i1 +def : Pat<(i16 (zext Int1Regs:$a)), + (SELP_u16ii 1, 0, Int1Regs:$a)>; +def : Pat<(i32 (zext Int1Regs:$a)), + (SELP_u32ii 1, 0, Int1Regs:$a)>; +def : Pat<(i64 (zext Int1Regs:$a)), + (SELP_u64ii 1, 0, Int1Regs:$a)>; + +// anyext i1 +def : Pat<(i16 (anyext Int1Regs:$a)), + (SELP_u16ii 1, 0, Int1Regs:$a)>; +def : Pat<(i32 (anyext Int1Regs:$a)), + (SELP_u32ii 1, 0, Int1Regs:$a)>; +def : Pat<(i64 (anyext Int1Regs:$a)), + (SELP_u64ii 1, 0, Int1Regs:$a)>; + +// sext i16 +def : Pat<(i32 (sext Int16Regs:$a)), + (CVT_s32_s16 Int16Regs:$a, CvtNONE)>; +def : Pat<(i64 (sext Int16Regs:$a)), + (CVT_s64_s16 Int16Regs:$a, CvtNONE)>; + +// zext i16 +def : Pat<(i32 (zext Int16Regs:$a)), + (CVT_u32_u16 Int16Regs:$a, CvtNONE)>; +def : Pat<(i64 (zext Int16Regs:$a)), + (CVT_u64_u16 Int16Regs:$a, CvtNONE)>; + +// anyext i16 +def : Pat<(i32 (anyext Int16Regs:$a)), + (CVT_u32_u16 Int16Regs:$a, CvtNONE)>; +def : Pat<(i64 (anyext Int16Regs:$a)), + (CVT_u64_u16 Int16Regs:$a, CvtNONE)>; + +// sext i32 +def : Pat<(i64 (sext Int32Regs:$a)), + (CVT_s64_s32 Int32Regs:$a, CvtNONE)>; + +// zext i32 +def : Pat<(i64 (zext Int32Regs:$a)), + (CVT_u64_u32 Int32Regs:$a, CvtNONE)>; + +// anyext i32 +def : Pat<(i64 (anyext Int32Regs:$a)), + (CVT_u64_u32 Int32Regs:$a, CvtNONE)>; + + +// truncate i64 +def : Pat<(i32 (trunc Int64Regs:$a)), + (CVT_u32_u64 Int64Regs:$a, CvtNONE)>; +def : Pat<(i16 (trunc Int64Regs:$a)), + (CVT_u16_u64 Int64Regs:$a, CvtNONE)>; +def : Pat<(i1 (trunc Int64Regs:$a)), + (SETP_b64ri (ANDb64ri Int64Regs:$a, 1), 1, CmpEQ)>; + +// truncate i32 +def : Pat<(i16 (trunc Int32Regs:$a)), + (CVT_u16_u32 Int32Regs:$a, CvtNONE)>; +def : Pat<(i1 (trunc Int32Regs:$a)), + (SETP_b32ri (ANDb32ri Int32Regs:$a, 1), 1, CmpEQ)>; + +// truncate i16 +def : Pat<(i1 (trunc Int16Regs:$a)), + (SETP_b16ri (ANDb16ri Int16Regs:$a, 1), 1, CmpEQ)>; + + +// Select instructions with 32-bit predicates def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b), - (SELECTi16rr Int16Regs:$a, Int16Regs:$b, - (TRUNC_32to1 Int32Regs:$pred))>; + (SELP_b16rr Int16Regs:$a, Int16Regs:$b, + (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b), - (SELECTi32rr Int32Regs:$a, Int32Regs:$b, - (TRUNC_32to1 Int32Regs:$pred))>; + (SELP_b32rr Int32Regs:$a, Int32Regs:$b, + (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b), - (SELECTi64rr Int64Regs:$a, Int64Regs:$b, - (TRUNC_32to1 Int32Regs:$pred))>; + (SELP_b64rr Int64Regs:$a, Int64Regs:$b, + (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b), - (SELECTf32rr Float32Regs:$a, Float32Regs:$b, - (TRUNC_32to1 Int32Regs:$pred))>; + (SELP_f32rr Float32Regs:$a, Float32Regs:$b, + (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b), - (SELECTf64rr Float64Regs:$a, Float64Regs:$b, - (TRUNC_32to1 Int32Regs:$pred))>; + (SELP_f64rr Float64Regs:$a, Float64Regs:$b, + (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; + class F_BITCONVERT : @@ -2400,21 +2406,17 @@ def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2), "mov.b64\t{{$d1, $d2}}, $s;", []>; -def FPRound_ftz : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a), - "cvt.rn.ftz.f32.f64 \t$d, $a;", - [(set Float32Regs:$d, (fround Float64Regs:$a))]>, Requires<[doF32FTZ]>; - -def FPRound : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a), - "cvt.rn.f32.f64 \t$d, $a;", - [(set Float32Regs:$d, (fround Float64Regs:$a))]>; - -def FPExtend_ftz : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a), - "cvt.ftz.f64.f32 \t$d, $a;", - [(set Float64Regs:$d, (fextend Float32Regs:$a))]>, Requires<[doF32FTZ]>; +// fround f64 -> f32 +def : Pat<(f32 (fround Float64Regs:$a)), + (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(f32 (fround Float64Regs:$a)), + (CVT_f32_f64 Float64Regs:$a, CvtRN)>; -def FPExtend : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a), - "cvt.f64.f32 \t$d, $a;", - [(set Float64Regs:$d, (fextend Float32Regs:$a))]>; +// fextend f32 -> f64 +def : Pat<(f64 (fextend Float32Regs:$a)), + (CVT_f64_f32 Float32Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(f64 (fextend Float32Regs:$a)), + (CVT_f64_f32 Float32Regs:$a, CvtNONE)>; def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInGlue]>; @@ -2442,8 +2444,8 @@ let isTerminator=1 in { [(br bb:$target)]>; } -def : Pat<(brcond Int32Regs:$a, bb:$target), (CBranch - (ISetUNEi32ri_p Int32Regs:$a, 0), bb:$target)>; +def : Pat<(brcond Int32Regs:$a, bb:$target), + (CBranch (SETP_u32ri Int32Regs:$a, 0, CmpNE), bb:$target)>; // SelectionDAGBuilder::visitSWitchCase() will invert the condition of a // conditional branch if diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td index caa7775..93cdfef 100644 --- a/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -82,49 +82,36 @@ def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>; //----------------------------------- // Map min(1.0, max(0.0, x)) to sat(x) -multiclass SAT { - - // fmin(1.0, fmax(0.0, x)) => sat(x) - def SAT11 : NVPTXInst<(outs regclass:$dst), - (ins fimm:$srcf0, fimm:$srcf1, regclass:$src), - OpStr, - [(set regclass:$dst, (IntMinOp f1:$srcf0 , - (IntMaxOp f0:$srcf1, regclass:$src)))]>; - - // fmin(1.0, fmax(x, 0.0)) => sat(x) - def SAT12 : NVPTXInst<(outs regclass:$dst), - (ins fimm:$srcf0, fimm:$srcf1, regclass:$src), - OpStr, - [(set regclass:$dst, (IntMinOp f1:$srcf0 , - (IntMaxOp regclass:$src, f0:$srcf1)))]>; - - // fmin(fmax(0.0, x), 1.0) => sat(x) - def SAT13 : NVPTXInst<(outs regclass:$dst), - (ins fimm:$srcf0, fimm:$srcf1, regclass:$src), - OpStr, - [(set regclass:$dst, (IntMinOp - (IntMaxOp f0:$srcf0, regclass:$src), f1:$srcf1))]>; - - // fmin(fmax(x, 0.0), 1.0) => sat(x) - def SAT14 : NVPTXInst<(outs regclass:$dst), - (ins fimm:$srcf0, fimm:$srcf1, regclass:$src), - OpStr, - [(set regclass:$dst, (IntMinOp - (IntMaxOp regclass:$src, f0:$srcf0), f1:$srcf1))]>; - -} -// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x -// is NaN +// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is +// NaN // max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0. // Same story for fmax, fmin. -defm SAT_fmin_fmax_f : SAT; -defm SAT_fmin_fmax_d : SAT; +def : Pat<(int_nvvm_fmin_f immFloat1, + (int_nvvm_fmax_f immFloat0, Float32Regs:$a)), + (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; +def : Pat<(int_nvvm_fmin_f immFloat1, + (int_nvvm_fmax_f Float32Regs:$a, immFloat0)), + (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; +def : Pat<(int_nvvm_fmin_f + (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1), + (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; +def : Pat<(int_nvvm_fmin_f + (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1), + (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; + +def : Pat<(int_nvvm_fmin_d immDouble1, + (int_nvvm_fmax_d immDouble0, Float64Regs:$a)), + (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; +def : Pat<(int_nvvm_fmin_d immDouble1, + (int_nvvm_fmax_d Float64Regs:$a, immDouble0)), + (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; +def : Pat<(int_nvvm_fmin_d + (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1), + (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; +def : Pat<(int_nvvm_fmin_d + (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1), + (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; // We need a full string for OpcStr here because we need to deal with case like @@ -312,19 +299,19 @@ def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;", // Floor Ceil // -def INT_NVVM_FLOOR_FTZ_F : F_MATH_1<"cvt.rmi.ftz.f32.f32 \t$dst, $src0;", - Float32Regs, Float32Regs, int_nvvm_floor_ftz_f>; -def INT_NVVM_FLOOR_F : F_MATH_1<"cvt.rmi.f32.f32 \t$dst, $src0;", - Float32Regs, Float32Regs, int_nvvm_floor_f>; -def INT_NVVM_FLOOR_D : F_MATH_1<"cvt.rmi.f64.f64 \t$dst, $src0;", - Float64Regs, Float64Regs, int_nvvm_floor_d>; +def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a), + (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>; +def : Pat<(int_nvvm_floor_f Float32Regs:$a), + (CVT_f32_f32 Float32Regs:$a, CvtRMI)>; +def : Pat<(int_nvvm_floor_d Float64Regs:$a), + (CVT_f64_f64 Float64Regs:$a, CvtRMI)>; -def INT_NVVM_CEIL_FTZ_F : F_MATH_1<"cvt.rpi.ftz.f32.f32 \t$dst, $src0;", - Float32Regs, Float32Regs, int_nvvm_ceil_ftz_f>; -def INT_NVVM_CEIL_F : F_MATH_1<"cvt.rpi.f32.f32 \t$dst, $src0;", - Float32Regs, Float32Regs, int_nvvm_ceil_f>; -def INT_NVVM_CEIL_D : F_MATH_1<"cvt.rpi.f64.f64 \t$dst, $src0;", - Float64Regs, Float64Regs, int_nvvm_ceil_d>; +def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a), + (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>; +def : Pat<(int_nvvm_ceil_f Float32Regs:$a), + (CVT_f32_f32 Float32Regs:$a, CvtRPI)>; +def : Pat<(int_nvvm_ceil_d Float64Regs:$a), + (CVT_f64_f64 Float64Regs:$a, CvtRPI)>; // // Abs @@ -347,37 +334,34 @@ def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs, // Round // -def INT_NVVM_ROUND_FTZ_F : F_MATH_1<"cvt.rni.ftz.f32.f32 \t$dst, $src0;", - Float32Regs, Float32Regs, int_nvvm_round_ftz_f>; -def INT_NVVM_ROUND_F : F_MATH_1<"cvt.rni.f32.f32 \t$dst, $src0;", Float32Regs, - Float32Regs, int_nvvm_round_f>; - -def INT_NVVM_ROUND_D : F_MATH_1<"cvt.rni.f64.f64 \t$dst, $src0;", Float64Regs, - Float64Regs, int_nvvm_round_d>; +def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a), + (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>; +def : Pat<(int_nvvm_round_f Float32Regs:$a), + (CVT_f32_f32 Float32Regs:$a, CvtRNI)>; +def : Pat<(int_nvvm_round_d Float64Regs:$a), + (CVT_f64_f64 Float64Regs:$a, CvtRNI)>; // // Trunc // -def INT_NVVM_TRUNC_FTZ_F : F_MATH_1<"cvt.rzi.ftz.f32.f32 \t$dst, $src0;", - Float32Regs, Float32Regs, int_nvvm_trunc_ftz_f>; -def INT_NVVM_TRUNC_F : F_MATH_1<"cvt.rzi.f32.f32 \t$dst, $src0;", Float32Regs, - Float32Regs, int_nvvm_trunc_f>; - -def INT_NVVM_TRUNC_D : F_MATH_1<"cvt.rzi.f64.f64 \t$dst, $src0;", Float64Regs, - Float64Regs, int_nvvm_trunc_d>; +def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a), + (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>; +def : Pat<(int_nvvm_trunc_f Float32Regs:$a), + (CVT_f32_f32 Float32Regs:$a, CvtRZI)>; +def : Pat<(int_nvvm_trunc_d Float64Regs:$a), + (CVT_f64_f64 Float64Regs:$a, CvtRZI)>; // // Saturate // -def INT_NVVM_SATURATE_FTZ_F : F_MATH_1<"cvt.sat.ftz.f32.f32 \t$dst, $src0;", - Float32Regs, Float32Regs, int_nvvm_saturate_ftz_f>; -def INT_NVVM_SATURATE_F : F_MATH_1<"cvt.sat.f32.f32 \t$dst, $src0;", - Float32Regs, Float32Regs, int_nvvm_saturate_f>; - -def INT_NVVM_SATURATE_D : F_MATH_1<"cvt.sat.f64.f64 \t$dst, $src0;", - Float64Regs, Float64Regs, int_nvvm_saturate_d>; +def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a), + (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>; +def : Pat<(int_nvvm_saturate_f Float32Regs:$a), + (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; +def : Pat<(int_nvvm_saturate_d Float64Regs:$a), + (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; // // Exp2 Log2 @@ -568,110 +552,110 @@ def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;", // Convert // -def INT_NVVM_D2F_RN_FTZ : F_MATH_1<"cvt.rn.ftz.f32.f64 \t$dst, $src0;", - Float32Regs, Float64Regs, int_nvvm_d2f_rn_ftz>; -def INT_NVVM_D2F_RN : F_MATH_1<"cvt.rn.f32.f64 \t$dst, $src0;", - Float32Regs, Float64Regs, int_nvvm_d2f_rn>; -def INT_NVVM_D2F_RZ_FTZ : F_MATH_1<"cvt.rz.ftz.f32.f64 \t$dst, $src0;", - Float32Regs, Float64Regs, int_nvvm_d2f_rz_ftz>; -def INT_NVVM_D2F_RZ : F_MATH_1<"cvt.rz.f32.f64 \t$dst, $src0;", - Float32Regs, Float64Regs, int_nvvm_d2f_rz>; -def INT_NVVM_D2F_RM_FTZ : F_MATH_1<"cvt.rm.ftz.f32.f64 \t$dst, $src0;", - Float32Regs, Float64Regs, int_nvvm_d2f_rm_ftz>; -def INT_NVVM_D2F_RM : F_MATH_1<"cvt.rm.f32.f64 \t$dst, $src0;", - Float32Regs, Float64Regs, int_nvvm_d2f_rm>; -def INT_NVVM_D2F_RP_FTZ : F_MATH_1<"cvt.rp.ftz.f32.f64 \t$dst, $src0;", - Float32Regs, Float64Regs, int_nvvm_d2f_rp_ftz>; -def INT_NVVM_D2F_RP : F_MATH_1<"cvt.rp.f32.f64 \t$dst, $src0;", - Float32Regs, Float64Regs, int_nvvm_d2f_rp>; - -def INT_NVVM_D2I_RN : F_MATH_1<"cvt.rni.s32.f64 \t$dst, $src0;", - Int32Regs, Float64Regs, int_nvvm_d2i_rn>; -def INT_NVVM_D2I_RZ : F_MATH_1<"cvt.rzi.s32.f64 \t$dst, $src0;", - Int32Regs, Float64Regs, int_nvvm_d2i_rz>; -def INT_NVVM_D2I_RM : F_MATH_1<"cvt.rmi.s32.f64 \t$dst, $src0;", - Int32Regs, Float64Regs, int_nvvm_d2i_rm>; -def INT_NVVM_D2I_RP : F_MATH_1<"cvt.rpi.s32.f64 \t$dst, $src0;", - Int32Regs, Float64Regs, int_nvvm_d2i_rp>; - -def INT_NVVM_D2UI_RN : F_MATH_1<"cvt.rni.u32.f64 \t$dst, $src0;", - Int32Regs, Float64Regs, int_nvvm_d2ui_rn>; -def INT_NVVM_D2UI_RZ : F_MATH_1<"cvt.rzi.u32.f64 \t$dst, $src0;", - Int32Regs, Float64Regs, int_nvvm_d2ui_rz>; -def INT_NVVM_D2UI_RM : F_MATH_1<"cvt.rmi.u32.f64 \t$dst, $src0;", - Int32Regs, Float64Regs, int_nvvm_d2ui_rm>; -def INT_NVVM_D2UI_RP : F_MATH_1<"cvt.rpi.u32.f64 \t$dst, $src0;", - Int32Regs, Float64Regs, int_nvvm_d2ui_rp>; - -def INT_NVVM_I2D_RN : F_MATH_1<"cvt.rn.f64.s32 \t$dst, $src0;", - Float64Regs, Int32Regs, int_nvvm_i2d_rn>; -def INT_NVVM_I2D_RZ : F_MATH_1<"cvt.rz.f64.s32 \t$dst, $src0;", - Float64Regs, Int32Regs, int_nvvm_i2d_rz>; -def INT_NVVM_I2D_RM : F_MATH_1<"cvt.rm.f64.s32 \t$dst, $src0;", - Float64Regs, Int32Regs, int_nvvm_i2d_rm>; -def INT_NVVM_I2D_RP : F_MATH_1<"cvt.rp.f64.s32 \t$dst, $src0;", - Float64Regs, Int32Regs, int_nvvm_i2d_rp>; - -def INT_NVVM_UI2D_RN : F_MATH_1<"cvt.rn.f64.u32 \t$dst, $src0;", - Float64Regs, Int32Regs, int_nvvm_ui2d_rn>; -def INT_NVVM_UI2D_RZ : F_MATH_1<"cvt.rz.f64.u32 \t$dst, $src0;", - Float64Regs, Int32Regs, int_nvvm_ui2d_rz>; -def INT_NVVM_UI2D_RM : F_MATH_1<"cvt.rm.f64.u32 \t$dst, $src0;", - Float64Regs, Int32Regs, int_nvvm_ui2d_rm>; -def INT_NVVM_UI2D_RP : F_MATH_1<"cvt.rp.f64.u32 \t$dst, $src0;", - Float64Regs, Int32Regs, int_nvvm_ui2d_rp>; - -def INT_NVVM_F2I_RN_FTZ : F_MATH_1<"cvt.rni.ftz.s32.f32 \t$dst, $src0;", - Int32Regs, Float32Regs, int_nvvm_f2i_rn_ftz>; -def INT_NVVM_F2I_RN : F_MATH_1<"cvt.rni.s32.f32 \t$dst, $src0;", Int32Regs, - Float32Regs, int_nvvm_f2i_rn>; -def INT_NVVM_F2I_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.s32.f32 \t$dst, $src0;", - Int32Regs, Float32Regs, int_nvvm_f2i_rz_ftz>; -def INT_NVVM_F2I_RZ : F_MATH_1<"cvt.rzi.s32.f32 \t$dst, $src0;", Int32Regs, - Float32Regs, int_nvvm_f2i_rz>; -def INT_NVVM_F2I_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.s32.f32 \t$dst, $src0;", - Int32Regs, Float32Regs, int_nvvm_f2i_rm_ftz>; -def INT_NVVM_F2I_RM : F_MATH_1<"cvt.rmi.s32.f32 \t$dst, $src0;", Int32Regs, - Float32Regs, int_nvvm_f2i_rm>; -def INT_NVVM_F2I_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.s32.f32 \t$dst, $src0;", - Int32Regs, Float32Regs, int_nvvm_f2i_rp_ftz>; -def INT_NVVM_F2I_RP : F_MATH_1<"cvt.rpi.s32.f32 \t$dst, $src0;", Int32Regs, - Float32Regs, int_nvvm_f2i_rp>; - -def INT_NVVM_F2UI_RN_FTZ : F_MATH_1<"cvt.rni.ftz.u32.f32 \t$dst, $src0;", - Int32Regs, Float32Regs, int_nvvm_f2ui_rn_ftz>; -def INT_NVVM_F2UI_RN : F_MATH_1<"cvt.rni.u32.f32 \t$dst, $src0;", Int32Regs, - Float32Regs, int_nvvm_f2ui_rn>; -def INT_NVVM_F2UI_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.u32.f32 \t$dst, $src0;", - Int32Regs, Float32Regs, int_nvvm_f2ui_rz_ftz>; -def INT_NVVM_F2UI_RZ : F_MATH_1<"cvt.rzi.u32.f32 \t$dst, $src0;", Int32Regs, - Float32Regs, int_nvvm_f2ui_rz>; -def INT_NVVM_F2UI_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.u32.f32 \t$dst, $src0;", - Int32Regs, Float32Regs, int_nvvm_f2ui_rm_ftz>; -def INT_NVVM_F2UI_RM : F_MATH_1<"cvt.rmi.u32.f32 \t$dst, $src0;", Int32Regs, - Float32Regs, int_nvvm_f2ui_rm>; -def INT_NVVM_F2UI_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.u32.f32 \t$dst, $src0;", - Int32Regs, Float32Regs, int_nvvm_f2ui_rp_ftz>; -def INT_NVVM_F2UI_RP : F_MATH_1<"cvt.rpi.u32.f32 \t$dst, $src0;", Int32Regs, - Float32Regs, int_nvvm_f2ui_rp>; - -def INT_NVVM_I2F_RN : F_MATH_1<"cvt.rn.f32.s32 \t$dst, $src0;", Float32Regs, - Int32Regs, int_nvvm_i2f_rn>; -def INT_NVVM_I2F_RZ : F_MATH_1<"cvt.rz.f32.s32 \t$dst, $src0;", Float32Regs, - Int32Regs, int_nvvm_i2f_rz>; -def INT_NVVM_I2F_RM : F_MATH_1<"cvt.rm.f32.s32 \t$dst, $src0;", Float32Regs, - Int32Regs, int_nvvm_i2f_rm>; -def INT_NVVM_I2F_RP : F_MATH_1<"cvt.rp.f32.s32 \t$dst, $src0;", Float32Regs, - Int32Regs, int_nvvm_i2f_rp>; - -def INT_NVVM_UI2F_RN : F_MATH_1<"cvt.rn.f32.u32 \t$dst, $src0;", Float32Regs, - Int32Regs, int_nvvm_ui2f_rn>; -def INT_NVVM_UI2F_RZ : F_MATH_1<"cvt.rz.f32.u32 \t$dst, $src0;", Float32Regs, - Int32Regs, int_nvvm_ui2f_rz>; -def INT_NVVM_UI2F_RM : F_MATH_1<"cvt.rm.f32.u32 \t$dst, $src0;", Float32Regs, - Int32Regs, int_nvvm_ui2f_rm>; -def INT_NVVM_UI2F_RP : F_MATH_1<"cvt.rp.f32.u32 \t$dst, $src0;", Float32Regs, - Int32Regs, int_nvvm_ui2f_rp>; +def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a), + (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>; +def : Pat<(int_nvvm_d2f_rn Float64Regs:$a), + (CVT_f32_f64 Float64Regs:$a, CvtRN)>; +def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a), + (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>; +def : Pat<(int_nvvm_d2f_rz Float64Regs:$a), + (CVT_f32_f64 Float64Regs:$a, CvtRZ)>; +def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a), + (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>; +def : Pat<(int_nvvm_d2f_rm Float64Regs:$a), + (CVT_f32_f64 Float64Regs:$a, CvtRM)>; +def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a), + (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>; +def : Pat<(int_nvvm_d2f_rp Float64Regs:$a), + (CVT_f32_f64 Float64Regs:$a, CvtRP)>; + +def : Pat<(int_nvvm_d2i_rn Float64Regs:$a), + (CVT_s32_f64 Float64Regs:$a, CvtRNI)>; +def : Pat<(int_nvvm_d2i_rz Float64Regs:$a), + (CVT_s32_f64 Float64Regs:$a, CvtRZI)>; +def : Pat<(int_nvvm_d2i_rm Float64Regs:$a), + (CVT_s32_f64 Float64Regs:$a, CvtRMI)>; +def : Pat<(int_nvvm_d2i_rp Float64Regs:$a), + (CVT_s32_f64 Float64Regs:$a, CvtRPI)>; + +def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a), + (CVT_u32_f64 Float64Regs:$a, CvtRNI)>; +def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a), + (CVT_u32_f64 Float64Regs:$a, CvtRZI)>; +def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a), + (CVT_u32_f64 Float64Regs:$a, CvtRMI)>; +def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a), + (CVT_u32_f64 Float64Regs:$a, CvtRPI)>; + +def : Pat<(int_nvvm_i2d_rn Int32Regs:$a), + (CVT_f64_s32 Int32Regs:$a, CvtRN)>; +def : Pat<(int_nvvm_i2d_rz Int32Regs:$a), + (CVT_f64_s32 Int32Regs:$a, CvtRZ)>; +def : Pat<(int_nvvm_i2d_rm Int32Regs:$a), + (CVT_f64_s32 Int32Regs:$a, CvtRM)>; +def : Pat<(int_nvvm_i2d_rp Int32Regs:$a), + (CVT_f64_s32 Int32Regs:$a, CvtRP)>; + +def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a), + (CVT_f64_u32 Int32Regs:$a, CvtRN)>; +def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a), + (CVT_f64_u32 Int32Regs:$a, CvtRZ)>; +def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a), + (CVT_f64_u32 Int32Regs:$a, CvtRM)>; +def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a), + (CVT_f64_u32 Int32Regs:$a, CvtRP)>; + +def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a), + (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>; +def : Pat<(int_nvvm_f2i_rn Float32Regs:$a), + (CVT_s32_f32 Float32Regs:$a, CvtRNI)>; +def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a), + (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>; +def : Pat<(int_nvvm_f2i_rz Float32Regs:$a), + (CVT_s32_f32 Float32Regs:$a, CvtRZI)>; +def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a), + (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>; +def : Pat<(int_nvvm_f2i_rm Float32Regs:$a), + (CVT_s32_f32 Float32Regs:$a, CvtRMI)>; +def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a), + (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>; +def : Pat<(int_nvvm_f2i_rp Float32Regs:$a), + (CVT_s32_f32 Float32Regs:$a, CvtRPI)>; + +def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a), + (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>; +def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a), + (CVT_u32_f32 Float32Regs:$a, CvtRNI)>; +def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a), + (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>; +def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a), + (CVT_u32_f32 Float32Regs:$a, CvtRZI)>; +def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a), + (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>; +def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a), + (CVT_u32_f32 Float32Regs:$a, CvtRMI)>; +def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a), + (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>; +def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a), + (CVT_u32_f32 Float32Regs:$a, CvtRPI)>; + +def : Pat<(int_nvvm_i2f_rn Int32Regs:$a), + (CVT_f32_s32 Int32Regs:$a, CvtRN)>; +def : Pat<(int_nvvm_i2f_rz Int32Regs:$a), + (CVT_f32_s32 Int32Regs:$a, CvtRZ)>; +def : Pat<(int_nvvm_i2f_rm Int32Regs:$a), + (CVT_f32_s32 Int32Regs:$a, CvtRM)>; +def : Pat<(int_nvvm_i2f_rp Int32Regs:$a), + (CVT_f32_s32 Int32Regs:$a, CvtRP)>; + +def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a), + (CVT_f32_u32 Int32Regs:$a, CvtRN)>; +def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a), + (CVT_f32_u32 Int32Regs:$a, CvtRZ)>; +def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a), + (CVT_f32_u32 Int32Regs:$a, CvtRM)>; +def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a), + (CVT_f32_u32 Int32Regs:$a, CvtRP)>; def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};", Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>; @@ -687,91 +671,106 @@ def INT_NVVM_D2I_HI : F_MATH_1; -def INT_NVVM_F2LL_RN_FTZ : F_MATH_1<"cvt.rni.ftz.s64.f32 \t$dst, $src0;", - Int64Regs, Float32Regs, int_nvvm_f2ll_rn_ftz>; -def INT_NVVM_F2LL_RN : F_MATH_1<"cvt.rni.s64.f32 \t$dst, $src0;", Int64Regs, - Float32Regs, int_nvvm_f2ll_rn>; -def INT_NVVM_F2LL_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.s64.f32 \t$dst, $src0;", - Int64Regs, Float32Regs, int_nvvm_f2ll_rz_ftz>; -def INT_NVVM_F2LL_RZ : F_MATH_1<"cvt.rzi.s64.f32 \t$dst, $src0;", Int64Regs, - Float32Regs, int_nvvm_f2ll_rz>; -def INT_NVVM_F2LL_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.s64.f32 \t$dst, $src0;", - Int64Regs, Float32Regs, int_nvvm_f2ll_rm_ftz>; -def INT_NVVM_F2LL_RM : F_MATH_1<"cvt.rmi.s64.f32 \t$dst, $src0;", Int64Regs, - Float32Regs, int_nvvm_f2ll_rm>; -def INT_NVVM_F2LL_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.s64.f32 \t$dst, $src0;", - Int64Regs, Float32Regs, int_nvvm_f2ll_rp_ftz>; -def INT_NVVM_F2LL_RP : F_MATH_1<"cvt.rpi.s64.f32 \t$dst, $src0;", Int64Regs, - Float32Regs, int_nvvm_f2ll_rp>; - -def INT_NVVM_F2ULL_RN_FTZ : F_MATH_1<"cvt.rni.ftz.u64.f32 \t$dst, $src0;", - Int64Regs, Float32Regs, int_nvvm_f2ull_rn_ftz>; -def INT_NVVM_F2ULL_RN : F_MATH_1<"cvt.rni.u64.f32 \t$dst, $src0;", Int64Regs, - Float32Regs, int_nvvm_f2ull_rn>; -def INT_NVVM_F2ULL_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.u64.f32 \t$dst, $src0;", - Int64Regs, Float32Regs, int_nvvm_f2ull_rz_ftz>; -def INT_NVVM_F2ULL_RZ : F_MATH_1<"cvt.rzi.u64.f32 \t$dst, $src0;", Int64Regs, - Float32Regs, int_nvvm_f2ull_rz>; -def INT_NVVM_F2ULL_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.u64.f32 \t$dst, $src0;", - Int64Regs, Float32Regs, int_nvvm_f2ull_rm_ftz>; -def INT_NVVM_F2ULL_RM : F_MATH_1<"cvt.rmi.u64.f32 \t$dst, $src0;", Int64Regs, - Float32Regs, int_nvvm_f2ull_rm>; -def INT_NVVM_F2ULL_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.u64.f32 \t$dst, $src0;", - Int64Regs, Float32Regs, int_nvvm_f2ull_rp_ftz>; -def INT_NVVM_F2ULL_RP : F_MATH_1<"cvt.rpi.u64.f32 \t$dst, $src0;", Int64Regs, - Float32Regs, int_nvvm_f2ull_rp>; - -def INT_NVVM_D2LL_RN : F_MATH_1<"cvt.rni.s64.f64 \t$dst, $src0;", Int64Regs, - Float64Regs, int_nvvm_d2ll_rn>; -def INT_NVVM_D2LL_RZ : F_MATH_1<"cvt.rzi.s64.f64 \t$dst, $src0;", Int64Regs, - Float64Regs, int_nvvm_d2ll_rz>; -def INT_NVVM_D2LL_RM : F_MATH_1<"cvt.rmi.s64.f64 \t$dst, $src0;", Int64Regs, - Float64Regs, int_nvvm_d2ll_rm>; -def INT_NVVM_D2LL_RP : F_MATH_1<"cvt.rpi.s64.f64 \t$dst, $src0;", Int64Regs, - Float64Regs, int_nvvm_d2ll_rp>; - -def INT_NVVM_D2ULL_RN : F_MATH_1<"cvt.rni.u64.f64 \t$dst, $src0;", Int64Regs, - Float64Regs, int_nvvm_d2ull_rn>; -def INT_NVVM_D2ULL_RZ : F_MATH_1<"cvt.rzi.u64.f64 \t$dst, $src0;", Int64Regs, - Float64Regs, int_nvvm_d2ull_rz>; -def INT_NVVM_D2ULL_RM : F_MATH_1<"cvt.rmi.u64.f64 \t$dst, $src0;", Int64Regs, - Float64Regs, int_nvvm_d2ull_rm>; -def INT_NVVM_D2ULL_RP : F_MATH_1<"cvt.rpi.u64.f64 \t$dst, $src0;", Int64Regs, - Float64Regs, int_nvvm_d2ull_rp>; - -def INT_NVVM_LL2F_RN : F_MATH_1<"cvt.rn.f32.s64 \t$dst, $src0;", Float32Regs, - Int64Regs, int_nvvm_ll2f_rn>; -def INT_NVVM_LL2F_RZ : F_MATH_1<"cvt.rz.f32.s64 \t$dst, $src0;", Float32Regs, - Int64Regs, int_nvvm_ll2f_rz>; -def INT_NVVM_LL2F_RM : F_MATH_1<"cvt.rm.f32.s64 \t$dst, $src0;", Float32Regs, - Int64Regs, int_nvvm_ll2f_rm>; -def INT_NVVM_LL2F_RP : F_MATH_1<"cvt.rp.f32.s64 \t$dst, $src0;", Float32Regs, - Int64Regs, int_nvvm_ll2f_rp>; -def INT_NVVM_ULL2F_RN : F_MATH_1<"cvt.rn.f32.u64 \t$dst, $src0;", Float32Regs, - Int64Regs, int_nvvm_ull2f_rn>; -def INT_NVVM_ULL2F_RZ : F_MATH_1<"cvt.rz.f32.u64 \t$dst, $src0;", Float32Regs, - Int64Regs, int_nvvm_ull2f_rz>; -def INT_NVVM_ULL2F_RM : F_MATH_1<"cvt.rm.f32.u64 \t$dst, $src0;", Float32Regs, - Int64Regs, int_nvvm_ull2f_rm>; -def INT_NVVM_ULL2F_RP : F_MATH_1<"cvt.rp.f32.u64 \t$dst, $src0;", Float32Regs, - Int64Regs, int_nvvm_ull2f_rp>; - -def INT_NVVM_LL2D_RN : F_MATH_1<"cvt.rn.f64.s64 \t$dst, $src0;", Float64Regs, - Int64Regs, int_nvvm_ll2d_rn>; -def INT_NVVM_LL2D_RZ : F_MATH_1<"cvt.rz.f64.s64 \t$dst, $src0;", Float64Regs, - Int64Regs, int_nvvm_ll2d_rz>; -def INT_NVVM_LL2D_RM : F_MATH_1<"cvt.rm.f64.s64 \t$dst, $src0;", Float64Regs, - Int64Regs, int_nvvm_ll2d_rm>; -def INT_NVVM_LL2D_RP : F_MATH_1<"cvt.rp.f64.s64 \t$dst, $src0;", Float64Regs, - Int64Regs, int_nvvm_ll2d_rp>; -def INT_NVVM_ULL2D_RN : F_MATH_1<"cvt.rn.f64.u64 \t$dst, $src0;", Float64Regs, - Int64Regs, int_nvvm_ull2d_rn>; -def INT_NVVM_ULL2D_RZ : F_MATH_1<"cvt.rz.f64.u64 \t$dst, $src0;", Float64Regs, - Int64Regs, int_nvvm_ull2d_rz>; -def INT_NVVM_ULL2D_RM : F_MATH_1<"cvt.rm.f64.u64 \t$dst, $src0;", Float64Regs, - Int64Regs, int_nvvm_ull2d_rm>; -def INT_NVVM_ULL2D_RP : F_MATH_1<"cvt.rp.f64.u64 \t$dst, $src0;", Float64Regs, - Int64Regs, int_nvvm_ull2d_rp>; +def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a), + (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>; +def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a), + (CVT_s64_f32 Float32Regs:$a, CvtRNI)>; +def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a), + (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>; +def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a), + (CVT_s64_f32 Float32Regs:$a, CvtRZI)>; +def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a), + (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>; +def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a), + (CVT_s64_f32 Float32Regs:$a, CvtRMI)>; +def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a), + (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>; +def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a), + (CVT_s64_f32 Float32Regs:$a, CvtRPI)>; + +def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a), + (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>; +def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a), + (CVT_u64_f32 Float32Regs:$a, CvtRNI)>; +def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a), + (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>; +def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a), + (CVT_u64_f32 Float32Regs:$a, CvtRZI)>; +def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a), + (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>; +def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a), + (CVT_u64_f32 Float32Regs:$a, CvtRMI)>; +def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a), + (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>; +def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a), + (CVT_u64_f32 Float32Regs:$a, CvtRPI)>; + +def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a), + (CVT_s64_f64 Float64Regs:$a, CvtRNI)>; +def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a), + (CVT_s64_f64 Float64Regs:$a, CvtRZI)>; +def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a), + (CVT_s64_f64 Float64Regs:$a, CvtRMI)>; +def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a), + (CVT_s64_f64 Float64Regs:$a, CvtRPI)>; + +def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a), + (CVT_u64_f64 Float64Regs:$a, CvtRNI)>; +def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a), + (CVT_u64_f64 Float64Regs:$a, CvtRZI)>; +def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a), + (CVT_u64_f64 Float64Regs:$a, CvtRMI)>; +def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a), + (CVT_u64_f64 Float64Regs:$a, CvtRPI)>; + +def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a), + (CVT_f32_s64 Int64Regs:$a, CvtRN)>; +def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a), + (CVT_f32_s64 Int64Regs:$a, CvtRZ)>; +def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a), + (CVT_f32_s64 Int64Regs:$a, CvtRM)>; +def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a), + (CVT_f32_s64 Int64Regs:$a, CvtRP)>; + +def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a), + (CVT_f32_u64 Int64Regs:$a, CvtRN)>; +def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a), + (CVT_f32_u64 Int64Regs:$a, CvtRZ)>; +def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a), + (CVT_f32_u64 Int64Regs:$a, CvtRM)>; +def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a), + (CVT_f32_u64 Int64Regs:$a, CvtRP)>; + +def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a), + (CVT_f64_s64 Int64Regs:$a, CvtRN)>; +def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a), + (CVT_f64_s64 Int64Regs:$a, CvtRZ)>; +def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a), + (CVT_f64_s64 Int64Regs:$a, CvtRM)>; +def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a), + (CVT_f64_s64 Int64Regs:$a, CvtRP)>; + +def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a), + (CVT_f64_u64 Int64Regs:$a, CvtRN)>; +def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a), + (CVT_f64_u64 Int64Regs:$a, CvtRZ)>; +def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a), + (CVT_f64_u64 Int64Regs:$a, CvtRM)>; +def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a), + (CVT_f64_u64 Int64Regs:$a, CvtRP)>; + + +// FIXME: Ideally, we could use these patterns instead of the scope-creating +// patterns, but ptxas does not like these since .s16 is not compatible with +// .f16. The solution is to use .bXX for all integer register types, but we +// are not there yet. +//def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a), +// (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>; +//def : Pat<(int_nvvm_f2h_rn Float32Regs:$a), +// (CVT_f16_f32 Float32Regs:$a, CvtRN)>; +// +//def : Pat<(int_nvvm_h2f Int16Regs:$a), +// (CVT_f32_f16 Int16Regs:$a, CvtNONE)>; def INT_NVVM_F2H_RN_FTZ : F_MATH_1; +def : Pat<(f32 (f16_to_f32 Int16Regs:$a)), + (CVT_f32_f16 Int16Regs:$a, CvtNONE)>; +def : Pat<(i16 (f32_to_f16 Float32Regs:$a)), + (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(i16 (f32_to_f16 Float32Regs:$a)), + (CVT_f16_f32 Float32Regs:$a, CvtRN)>; + // // Bitcast // -- cgit v1.1 From 331ba2739d484b670000bd59b170fe1e993786d2 Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Fri, 28 Jun 2013 17:58:07 +0000 Subject: [NVPTX] Add support for cttz/ctlz/ctpop git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185176 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXISelLowering.cpp | 16 ++++++++++ lib/Target/NVPTX/NVPTXInstrInfo.td | 58 ++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 338fe7c..8877d13 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -216,6 +216,22 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) // Custom handling for i8 intrinsics setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); + setOperationAction(ISD::CTLZ, MVT::i16, Legal); + setOperationAction(ISD::CTLZ, MVT::i32, Legal); + setOperationAction(ISD::CTLZ, MVT::i64, Legal); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Legal); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Legal); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Legal); + setOperationAction(ISD::CTTZ, MVT::i16, Expand); + setOperationAction(ISD::CTTZ, MVT::i32, Expand); + setOperationAction(ISD::CTTZ, MVT::i64, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); + setOperationAction(ISD::CTPOP, MVT::i16, Legal); + setOperationAction(ISD::CTPOP, MVT::i32, Legal); + setOperationAction(ISD::CTPOP, MVT::i64, Legal); + // Now deduce the information based on the above mentioned // actions computeRegisterProperties(); diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td index 3219364..553a6ba 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -2406,6 +2406,64 @@ def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2), "mov.b64\t{{$d1, $d2}}, $s;", []>; +// Count leading zeros +def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), + "clz.b32\t$d, $a;", + []>; +def CLZr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "clz.b64\t$d, $a;", + []>; + +// 32-bit has a direct PTX instruction +def : Pat<(ctlz Int32Regs:$a), + (CLZr32 Int32Regs:$a)>; +def : Pat<(ctlz_zero_undef Int32Regs:$a), + (CLZr32 Int32Regs:$a)>; + +// For 64-bit, the result in PTX is actually 32-bit so we zero-extend +// to 64-bit to match the LLVM semantics +def : Pat<(ctlz Int64Regs:$a), + (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>; +def : Pat<(ctlz_zero_undef Int64Regs:$a), + (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>; + +// For 16-bit, we zero-extend to 32-bit, then trunc the result back +// to 16-bits (ctlz of a 16-bit value is guaranteed to require less +// than 16 bits to store). We also need to subtract 16 because the +// high-order 16 zeros were counted. +def : Pat<(ctlz Int16Regs:$a), + (SUBi16ri (CVT_u16_u32 (CLZr32 + (CVT_u32_u16 Int16Regs:$a, CvtNONE)), + CvtNONE), 16)>; +def : Pat<(ctlz_zero_undef Int16Regs:$a), + (SUBi16ri (CVT_u16_u32 (CLZr32 + (CVT_u32_u16 Int16Regs:$a, CvtNONE)), + CvtNONE), 16)>; + +// Population count +def POPCr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), + "popc.b32\t$d, $a;", + []>; +def POPCr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "popc.b64\t$d, $a;", + []>; + +// 32-bit has a direct PTX instruction +def : Pat<(ctpop Int32Regs:$a), + (POPCr32 Int32Regs:$a)>; + +// For 64-bit, the result in PTX is actually 32-bit so we zero-extend +// to 64-bit to match the LLVM semantics +def : Pat<(ctpop Int64Regs:$a), + (CVT_u64_u32 (POPCr64 Int64Regs:$a), CvtNONE)>; + +// For 16-bit, we zero-extend to 32-bit, then trunc the result back +// to 16-bits (ctpop of a 16-bit value is guaranteed to require less +// than 16 bits to store) +def : Pat<(ctpop Int16Regs:$a), + (CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), + CvtNONE)>; + // fround f64 -> f32 def : Pat<(f32 (fround Float64Regs:$a)), (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>; -- cgit v1.1 From ac78a0645ddd2046fb66237ba4cfadffa2d367d7 Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Fri, 28 Jun 2013 17:58:10 +0000 Subject: [NVPTX] Calling conventions fix Fix ABI handling for function returning bool -- use st.param.b32 to return the value and use ld.param.b32 in caller to load the return value. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185177 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXISelLowering.cpp | 64 +++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 24 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 8877d13..0ff1a98 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -1207,7 +1207,14 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, sz = 8; SmallVector LoadRetVTs; - if (sz < 16) { + EVT TheLoadType = VTs[i]; + if (retTy->isIntegerTy() && + TD->getTypeAllocSizeInBits(retTy) < 32) { + // This is for integer types only, and specifically not for + // aggregates. + LoadRetVTs.push_back(MVT::i32); + TheLoadType = MVT::i32; + } else if (sz < 16) { // If loading i1/i8 result, generate // load i8 (-> i16) // trunc i16 to i1/i8 @@ -1225,7 +1232,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDValue retval = DAG.getMemIntrinsicNode( NVPTXISD::LoadParam, dl, DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), &LoadRetOps[0], - LoadRetOps.size(), VTs[i], MachinePointerInfo()); + LoadRetOps.size(), TheLoadType, MachinePointerInfo()); Chain = retval.getValue(1); InFlag = retval.getValue(2); SDValue Ret0 = retval.getValue(0); @@ -1798,7 +1805,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, SDLoc dl, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); const Function *F = MF.getFunction(); - const Type *RetTy = F->getReturnType(); + Type *RetTy = F->getReturnType(); const DataLayout *TD = getDataLayout(); bool isABI = (nvptxSubtarget.getSmVersion() >= 20); @@ -1806,14 +1813,14 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, if (!isABI) return Chain; - if (const VectorType *VTy = dyn_cast(RetTy)) { + if (VectorType *VTy = dyn_cast(RetTy)) { // If we have a vector type, the OutVals array will be the scalarized // components and we have combine them into 1 or more vector stores. unsigned NumElts = VTy->getNumElements(); assert(NumElts == Outs.size() && "Bad scalarization of return value"); // const_cast can be removed in later LLVM versions - EVT EltVT = getValueType(const_cast(RetTy)).getVectorElementType(); + EVT EltVT = getValueType(RetTy).getVectorElementType(); bool NeedExtend = false; if (EltVT.getSizeInBits() < 16) NeedExtend = true; @@ -1923,34 +1930,43 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, SmallVector ValVTs; // const_cast is necessary since we are still using an LLVM version from // before the type system re-write. - ComputePTXValueVTs(*this, const_cast(RetTy), ValVTs); + ComputePTXValueVTs(*this, RetTy, ValVTs); assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition"); - unsigned sizesofar = 0; + unsigned SizeSoFar = 0; for (unsigned i = 0, e = Outs.size(); i != e; ++i) { SDValue theVal = OutVals[i]; - EVT theValType = theVal.getValueType(); + EVT TheValType = theVal.getValueType(); unsigned numElems = 1; - if (theValType.isVector()) - numElems = theValType.getVectorNumElements(); + if (TheValType.isVector()) + numElems = TheValType.getVectorNumElements(); for (unsigned j = 0, je = numElems; j != je; ++j) { - SDValue tmpval = theVal; - if (theValType.isVector()) - tmpval = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - theValType.getVectorElementType(), tmpval, + SDValue TmpVal = theVal; + if (TheValType.isVector()) + TmpVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + TheValType.getVectorElementType(), TmpVal, DAG.getIntPtrConstant(j)); - EVT theStoreType = tmpval.getValueType(); - if (theStoreType.getSizeInBits() < 8) - tmpval = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, tmpval); - SDValue Ops[] = { Chain, DAG.getConstant(sizesofar, MVT::i32), tmpval }; + EVT TheStoreType = ValVTs[i]; + if (RetTy->isIntegerTy() && + TD->getTypeAllocSizeInBits(RetTy) < 32) { + // The following zero-extension is for integer types only, and + // specifically not for aggregates. + TmpVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, TmpVal); + TheStoreType = MVT::i32; + } + else if (TmpVal.getValueType().getSizeInBits() < 16) + TmpVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, TmpVal); + + SDValue Ops[] = { Chain, DAG.getConstant(SizeSoFar, MVT::i32), TmpVal }; Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, - DAG.getVTList(MVT::Other), &Ops[0], 3, - ValVTs[i], MachinePointerInfo()); - if (theValType.isVector()) - sizesofar += - ValVTs[i].getVectorElementType().getStoreSizeInBits() / 8; + DAG.getVTList(MVT::Other), &Ops[0], + 3, TheStoreType, + MachinePointerInfo()); + if(TheValType.isVector()) + SizeSoFar += + TheStoreType.getVectorElementType().getStoreSizeInBits() / 8; else - sizesofar += ValVTs[i].getStoreSizeInBits() / 8; + SizeSoFar += TheStoreType.getStoreSizeInBits()/8; } } } -- cgit v1.1 From f51a22e815a7494cbe1aba25d275015a70b5c31d Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Fri, 28 Jun 2013 17:58:13 +0000 Subject: [NVPTX] Add (1.0 / sqrt(x)) => rsqrt(x) generation when allowable by FP flags git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185178 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXInstrInfo.td | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td index 553a6ba..b6f7526 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -136,7 +136,8 @@ def hasLDG : Predicate<"Subtarget.hasLDG()">; def hasLDU : Predicate<"Subtarget.hasLDU()">; def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">; -def doF32FTZ : Predicate<"UseF32FTZ">; +def doF32FTZ : Predicate<"UseF32FTZ==1">; +def doNoF32FTZ : Predicate<"UseF32FTZ==0">; def doFMAF32 : Predicate<"doFMAF32">; def doFMAF32_ftz : Predicate<"(doFMAF32 && UseF32FTZ)">; @@ -765,6 +766,16 @@ def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst), (fdiv Float32Regs:$a, fpimm:$b))]>, Requires<[reqPTX20]>; +// +// F32 rsqrt +// + +def RSQRTF32approx1r : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$b), + "rsqrt.approx.f32 \t$dst, $b;", []>; + +def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f Float32Regs:$b)), + (RSQRTF32approx1r Float32Regs:$b)>, + Requires<[do_DIVF32_FULL, do_SQRTF32_APPROX, doNoF32FTZ]>; multiclass FPCONTRACT32 { def rrr : NVPTXInst<(outs Float32Regs:$dst), -- cgit v1.1 From b206498b95e09358d62a19fbff4a98258b179c7a Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Fri, 28 Jun 2013 17:58:15 +0000 Subject: [NVPTX] Select -1 instead of 1 when anyextend'ing i1 types This makes it more consistent with the ZeroOrNegativeOneBooleanContent flag git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185179 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXInstrInfo.td | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td index b6f7526..013e24c 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -2294,11 +2294,11 @@ def : Pat<(i64 (zext Int1Regs:$a)), // anyext i1 def : Pat<(i16 (anyext Int1Regs:$a)), - (SELP_u16ii 1, 0, Int1Regs:$a)>; + (SELP_u16ii -1, 0, Int1Regs:$a)>; def : Pat<(i32 (anyext Int1Regs:$a)), - (SELP_u32ii 1, 0, Int1Regs:$a)>; + (SELP_u32ii -1, 0, Int1Regs:$a)>; def : Pat<(i64 (anyext Int1Regs:$a)), - (SELP_u64ii 1, 0, Int1Regs:$a)>; + (SELP_u64ii -1, 0, Int1Regs:$a)>; // sext i16 def : Pat<(i32 (sext Int16Regs:$a)), -- cgit v1.1 From f52578c08c71dc356428c25b0ba8759fd7ee2c66 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Fri, 28 Jun 2013 18:03:54 +0000 Subject: Remove unused variables. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185180 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelDAGToDAG.cpp | 4 ---- 1 file changed, 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 03a7e5d..c85dcb4 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -3464,10 +3464,6 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ bool Changed = false; unsigned NumOps = N->getNumOperands(); - ExternalSymbolSDNode *S = dyn_cast( - N->getOperand(InlineAsm::Op_AsmString)); - StringRef AsmString = StringRef(S->getSymbol()); - // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs -- cgit v1.1 From a5545bc2b9b1295f8443f6350487ec9b775b2d73 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Fri, 28 Jun 2013 18:57:01 +0000 Subject: Fix an off-by-one error. Also make the code a little more explicit in what it is trying to do. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185191 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FloatingPoint.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 8522c8c..a4ea1a9 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -115,9 +115,10 @@ namespace { unsigned Mask = 0; for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), E = MBB->livein_end(); I != E; ++I) { - unsigned Reg = *I - X86::FP0; - if (Reg < 8) - Mask |= 1 << Reg; + unsigned Reg = *I; + if (Reg < X86::FP0 || Reg > X86::FP6) + continue; + Mask |= 1 << (Reg - X86::FP0); } return Mask; } -- cgit v1.1 From 2c77a625b79908f6e1238890caae630d28c48bee Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Fri, 28 Jun 2013 20:00:07 +0000 Subject: Fix a PPC rlwimi instruction-selection bug Under certain (evidently rare) circumstances, this code used to convert OR(a, AND(x, y)) into OR(a, x). This was incorrect. While there, I've added a comment to the code immediately above. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185201 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 35f4b7c..f8d990c 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -447,10 +447,10 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) { unsigned SHOpc = Op1.getOperand(0).getOpcode(); if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) { + // Note that Value must be in range here (less than 32) because + // otherwise there would not be any bits set in InsertMask. Op1 = Op1.getOperand(0).getOperand(0); SH = (SHOpc == ISD::SHL) ? Value : 32 - Value; - } else { - Op1 = Op1.getOperand(0); } } -- cgit v1.1 From 59eaa3874663f80ce111a4781b8f1db82995210c Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Fri, 28 Jun 2013 20:05:11 +0000 Subject: DebugInfo: PR14728: TLS support Based on GCC's output for TLS variables (OP_constNu, x@dtpoff, OP_lo_user), this implements debug info support for TLS in ELF. Verified that this output is correct/sufficient on Linux (using gold - if you're using binutils-ld, you'll need something with the fix for http://sourceware.org/bugzilla/show_bug.cgi?id=15685 in it). Support on non-ELF is sort of "arbitrary" at the moment - if Apple folks want to discuss (or just go ahead & implement) how this should work in MachO, etc, I'm open. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185203 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/TargetLoweringObjectFile.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index f5121e3..fc50aa5 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -317,3 +317,9 @@ getTTypeReference(const MCSymbolRefExpr *Sym, unsigned Encoding, } } } + +const MCSymbolRefExpr *TargetLoweringObjectFile::getDebugThreadLocalSymbol(const MCSymbol *Sym) const { + // FIXME: It's not clear what, if any, default this should have - perhaps a + // null return could mean 'no location' & we should just do that here. + return MCSymbolRefExpr::Create(Sym, *Ctx); +} -- cgit v1.1 From 74cf767093d3dd46dc3c7cf5666060e8c1ee0be0 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 28 Jun 2013 20:23:29 +0000 Subject: R600/SI: Add processor types for each CIK variant Patch By: Alex Deucher Reviewed-by: Tom Stellard Signed-off-by: Alex Deucher git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185209 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/Processors.td | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td index 81f407e..a0735d4 100644 --- a/lib/Target/R600/Processors.td +++ b/lib/Target/R600/Processors.td @@ -48,3 +48,6 @@ def : Proc<"pitcairn", SI_Itin, [FeatureSouthernIslands]>; def : Proc<"verde", SI_Itin, [FeatureSouthernIslands]>; def : Proc<"oland", SI_Itin, [FeatureSouthernIslands]>; def : Proc<"hainan", SI_Itin, [FeatureSouthernIslands]>; +def : Proc<"bonaire", SI_Itin, [FeatureSouthernIslands]>; +def : Proc<"kabini", SI_Itin, [FeatureSouthernIslands]>; +def : Proc<"kaveri", SI_Itin, [FeatureSouthernIslands]>; \ No newline at end of file -- cgit v1.1 From 75f29256f3587b19740398adb9678b6ba376912f Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Fri, 28 Jun 2013 21:28:01 +0000 Subject: Remove unused member git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185219 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMSubtarget.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index def6fbb..63ba6c5 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -195,10 +195,6 @@ protected: public: enum { - isELF, isDarwin - } TargetType; - - enum { ARM_ABI_APCS, ARM_ABI_AAPCS // ARM EABI } TargetABI; -- cgit v1.1 From c09324483cf8c8904b7250f072d8f64e9b00656e Mon Sep 17 00:00:00 2001 From: Richard Trieu Date: Fri, 28 Jun 2013 21:54:25 +0000 Subject: Fix broken asserts that never fire. Change assert("text") to assert(0 && "text"). The first case is a const char * to bool conversion, which always evaluates to true, never triggering the assert. The second case will always trigger the assert. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185227 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp index 36da6df..3105665 100644 --- a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp +++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp @@ -179,7 +179,7 @@ void HexagonInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const { // Branches can take an immediate operand. This is used by the branch // selection pass to print $+8, an eight byte displacement from the PC. - assert("Unknown branch operand."); + assert(0 && "Unknown branch operand."); } void HexagonInstPrinter::printCallOperand(const MCInst *MI, unsigned OpNo, @@ -203,7 +203,7 @@ void HexagonInstPrinter::printSymbol(const MCInst *MI, unsigned OpNo, O << '#'; printOperand(MI, OpNo, O); } else { - assert("Unknown symbol operand"); + assert(0 && "Unknown symbol operand"); printOperand(MI, OpNo, O); } O << ')'; -- cgit v1.1 From 6a636a813f33b46b3271ec8517ee1936a0c92c9f Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Fri, 28 Jun 2013 22:29:56 +0000 Subject: PPC: Ignore spill/restore requests for VRSAVE (except on Darwin) This fixes PR16418, which reports that a function calling __builtin_unwind_init() asserts. The cause is that this generates a spill/restore for VRSAVE, and we support that only on Darwin (because VRSAVE is only really used on Darwin). The test case checks only that we don't crash. We can add correctness checks once someone verifies what behavior the function is supposed to have. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185235 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCFrameLowering.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index dc734d1..4c57cf6 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -1145,6 +1145,12 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); + // Only Darwin actually uses the VRSAVE register, but it can still appear + // here if, for example, @llvm.eh.unwind.init() is used. If we're not on + // Darwin, ignore it. + if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI()) + continue; + // CR2 through CR4 are the nonvolatile CR fields. bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; @@ -1294,6 +1300,12 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); + // Only Darwin actually uses the VRSAVE register, but it can still appear + // here if, for example, @llvm.eh.unwind.init() is used. If we're not on + // Darwin, ignore it. + if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI()) + continue; + if (Reg == PPC::CR2) { CR2Spilled = true; // The spill slot is associated only with CR2, which is the -- cgit v1.1 From d1fe8d5212fbcb12dd592239f76f95ff2db7870d Mon Sep 17 00:00:00 2001 From: Richard Trieu Date: Fri, 28 Jun 2013 23:46:19 +0000 Subject: Change assert(0 && "text") to llvm_unreachable(0 && "text") git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185243 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp index 3105665..065971e 100644 --- a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp +++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp @@ -179,7 +179,7 @@ void HexagonInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const { // Branches can take an immediate operand. This is used by the branch // selection pass to print $+8, an eight byte displacement from the PC. - assert(0 && "Unknown branch operand."); + llvm_unreachable("Unknown branch operand."); } void HexagonInstPrinter::printCallOperand(const MCInst *MI, unsigned OpNo, @@ -203,7 +203,7 @@ void HexagonInstPrinter::printSymbol(const MCInst *MI, unsigned OpNo, O << '#'; printOperand(MI, OpNo, O); } else { - assert(0 && "Unknown symbol operand"); + llvm_unreachable("Unknown symbol operand"); printOperand(MI, OpNo, O); } O << ')'; -- cgit v1.1 From 7d1a0d4e3ebf058a8b1d0dea9b6119444ed041c8 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Sat, 29 Jun 2013 19:32:29 +0000 Subject: R600: Bank Swizzle now display SCL equivalent git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185267 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp | 6 +++--- lib/Target/R600/R600InstrInfo.cpp | 8 ++++---- lib/Target/R600/R600InstrInfo.h | 10 +++++----- 3 files changed, 12 insertions(+), 12 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp index 8c814e0..fac3c39 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp @@ -178,13 +178,13 @@ void AMDGPUInstPrinter::printBankSwizzle(const MCInst *MI, unsigned OpNo, int BankSwizzle = MI->getOperand(OpNo).getImm(); switch (BankSwizzle) { case 1: - O << "BS:VEC_021"; + O << "BS:VEC_021/SCL_122"; break; case 2: - O << "BS:VEC_120"; + O << "BS:VEC_120/SCL_212"; break; case 3: - O << "BS:VEC_102"; + O << "BS:VEC_102/SCL_221"; break; case 4: O << "BS:VEC_201"; diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index f05390e..354f039 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -282,15 +282,15 @@ static std::vector > Swizzle(std::vector > Src, R600InstrInfo::BankSwizzle Swz) { switch (Swz) { - case R600InstrInfo::ALU_VEC_012: + case R600InstrInfo::ALU_VEC_012_SCL_210: break; - case R600InstrInfo::ALU_VEC_021: + case R600InstrInfo::ALU_VEC_021_SCL_122: std::swap(Src[1], Src[2]); break; - case R600InstrInfo::ALU_VEC_102: + case R600InstrInfo::ALU_VEC_102_SCL_221: std::swap(Src[0], Src[1]); break; - case R600InstrInfo::ALU_VEC_120: + case R600InstrInfo::ALU_VEC_120_SCL_212: std::swap(Src[0], Src[1]); std::swap(Src[0], Src[2]); break; diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h index a375288..a6add83 100644 --- a/lib/Target/R600/R600InstrInfo.h +++ b/lib/Target/R600/R600InstrInfo.h @@ -36,14 +36,14 @@ namespace llvm { int getBranchInstr(const MachineOperand &op) const; std::vector > - ExtractSrcs(MachineInstr *MI, const DenseMap &PV) const; + ExtractSrcs(MachineInstr *MI, const DenseMap &PV, unsigned &ConstCount) const; public: enum BankSwizzle { - ALU_VEC_012 = 0, - ALU_VEC_021, - ALU_VEC_120, - ALU_VEC_102, + ALU_VEC_012_SCL_210 = 0, + ALU_VEC_021_SCL_122, + ALU_VEC_120_SCL_212, + ALU_VEC_102_SCL_221, ALU_VEC_201, ALU_VEC_210 }; -- cgit v1.1 From 8f9fbd67c3f803f7397843fdf4b2a7b7ca10189e Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Sat, 29 Jun 2013 19:32:43 +0000 Subject: R600: Support schedule and packetization of trans-only inst git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185268 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600InstrInfo.cpp | 185 ++++++++++++++++++++++++------- lib/Target/R600/R600InstrInfo.h | 32 ++++-- lib/Target/R600/R600Instructions.td | 2 + lib/Target/R600/R600MachineScheduler.cpp | 25 +++-- lib/Target/R600/R600MachineScheduler.h | 1 + lib/Target/R600/R600Packetizer.cpp | 90 +++++++++------ lib/Target/R600/R600RegisterInfo.td | 1 + 7 files changed, 246 insertions(+), 90 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 354f039..974a579 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -250,8 +250,9 @@ R600InstrInfo::getSrcs(MachineInstr *MI) const { std::vector > R600InstrInfo::ExtractSrcs(MachineInstr *MI, - const DenseMap &PV) - const { + const DenseMap &PV, + unsigned &ConstCount) const { + ConstCount = 0; const SmallVector, 3> Srcs = getSrcs(MI); const std::pair DummyPair(-1, 0); std::vector > Result; @@ -259,18 +260,20 @@ R600InstrInfo::ExtractSrcs(MachineInstr *MI, for (unsigned n = Srcs.size(); i < n; ++i) { unsigned Reg = Srcs[i].first->getReg(); unsigned Index = RI.getEncodingValue(Reg) & 0xff; - unsigned Chan = RI.getHWRegChan(Reg); if (Reg == AMDGPU::OQAP) { Result.push_back(std::pair(Index, 0)); } - if (Index > 127) { - Result.push_back(DummyPair); + if (PV.find(Reg) != PV.end()) { + // 255 is used to tells its a PS/PV reg + Result.push_back(std::pair(255, 0)); continue; } - if (PV.find(Reg) != PV.end()) { + if (Index > 127) { + ConstCount++; Result.push_back(DummyPair); continue; } + unsigned Chan = RI.getHWRegChan(Reg); Result.push_back(std::pair(Index, Chan)); } for (; i < 3; ++i) @@ -305,23 +308,51 @@ Swizzle(std::vector > Src, return Src; } -bool -R600InstrInfo::isLegal( - const std::vector > > &IGSrcs, - const std::vector &Swz, - unsigned CheckedSize) const { +static unsigned +getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) { + switch (Swz) { + case R600InstrInfo::ALU_VEC_012_SCL_210: { + unsigned Cycles[3] = { 2, 1, 0}; + return Cycles[Op]; + } + case R600InstrInfo::ALU_VEC_021_SCL_122: { + unsigned Cycles[3] = { 1, 2, 2}; + return Cycles[Op]; + } + case R600InstrInfo::ALU_VEC_120_SCL_212: { + unsigned Cycles[3] = { 2, 1, 2}; + return Cycles[Op]; + } + case R600InstrInfo::ALU_VEC_102_SCL_221: { + unsigned Cycles[3] = { 2, 2, 1}; + return Cycles[Op]; + } + default: + llvm_unreachable("Wrong Swizzle for Trans Slot"); + return 0; + } +} + +/// returns how many MIs (whose inputs are represented by IGSrcs) can be packed +/// in the same Instruction Group while meeting read port limitations given a +/// Swz swizzle sequence. +unsigned R600InstrInfo::isLegalUpTo( + const std::vector > > &IGSrcs, + const std::vector &Swz, + const std::vector > &TransSrcs, + R600InstrInfo::BankSwizzle TransSwz) const { int Vector[4][3]; memset(Vector, -1, sizeof(Vector)); - for (unsigned i = 0; i < CheckedSize; i++) { + for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) { const std::vector > &Srcs = Swizzle(IGSrcs[i], Swz[i]); for (unsigned j = 0; j < 3; j++) { const std::pair &Src = Srcs[j]; - if (Src.first < 0) + if (Src.first < 0 || Src.first == 255) continue; if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) { - if (Swz[i] != R600InstrInfo::ALU_VEC_012 && - Swz[i] != R600InstrInfo::ALU_VEC_021) { + if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 && + Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) { // The value from output queue A (denoted by register OQAP) can // only be fetched during the first cycle. return false; @@ -332,51 +363,126 @@ R600InstrInfo::isLegal( if (Vector[Src.second][j] < 0) Vector[Src.second][j] = Src.first; if (Vector[Src.second][j] != Src.first) - return false; + return i; } } - return true; + // Now check Trans Alu + for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) { + const std::pair &Src = TransSrcs[i]; + unsigned Cycle = getTransSwizzle(TransSwz, i); + if (Src.first < 0) + continue; + if (Src.first == 255) + continue; + if (Vector[Src.second][Cycle] < 0) + Vector[Src.second][Cycle] = Src.first; + if (Vector[Src.second][Cycle] != Src.first) + return IGSrcs.size() - 1; + } + return IGSrcs.size(); } -bool -R600InstrInfo::recursiveFitsFPLimitation( - const std::vector > > &IGSrcs, - std::vector &SwzCandidate, - unsigned Depth) const { - if (!isLegal(IGSrcs, SwzCandidate, Depth)) +/// Given a swizzle sequence SwzCandidate and an index Idx, returns the next +/// (in lexicographic term) swizzle sequence assuming that all swizzles after +/// Idx can be skipped +static bool +NextPossibleSolution( + std::vector &SwzCandidate, + unsigned Idx) { + assert(Idx < SwzCandidate.size()); + int ResetIdx = Idx; + while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210) + ResetIdx --; + for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) { + SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210; + } + if (ResetIdx == -1) return false; - if (IGSrcs.size() == Depth) - return true; - unsigned i = SwzCandidate[Depth]; - for (; i < 6; i++) { - SwzCandidate[Depth] = (R600InstrInfo::BankSwizzle) i; - if (recursiveFitsFPLimitation(IGSrcs, SwzCandidate, Depth + 1)) + SwzCandidate[ResetIdx]++; + return true; +} + +/// Enumerate all possible Swizzle sequence to find one that can meet all +/// read port requirements. +bool R600InstrInfo::FindSwizzleForVectorSlot( + const std::vector > > &IGSrcs, + std::vector &SwzCandidate, + const std::vector > &TransSrcs, + R600InstrInfo::BankSwizzle TransSwz) const { + unsigned ValidUpTo = 0; + do { + ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz); + if (ValidUpTo == IGSrcs.size()) return true; - } - SwzCandidate[Depth] = R600InstrInfo::ALU_VEC_012; + } while (NextPossibleSolution(SwzCandidate, ValidUpTo)); return false; } +/// Instructions in Trans slot can't read gpr at cycle 0 if they also read +/// a const, and can't read a gpr at cycle 1 if they read 2 const. +static bool +isConstCompatible(R600InstrInfo::BankSwizzle TransSwz, + const std::vector > &TransOps, + unsigned ConstCount) { + for (unsigned i = 0, e = TransOps.size(); i < e; ++i) { + const std::pair &Src = TransOps[i]; + unsigned Cycle = getTransSwizzle(TransSwz, i); + if (Src.first < 0) + continue; + if (ConstCount > 0 && Cycle == 0) + return false; + if (ConstCount > 1 && Cycle == 1) + return false; + } + return true; +} + bool R600InstrInfo::fitsReadPortLimitations(const std::vector &IG, - const DenseMap &PV, - std::vector &ValidSwizzle) + const DenseMap &PV, + std::vector &ValidSwizzle, + bool isLastAluTrans) const { //Todo : support shared src0 - src1 operand std::vector > > IGSrcs; ValidSwizzle.clear(); + unsigned ConstCount; + BankSwizzle TransBS; for (unsigned i = 0, e = IG.size(); i < e; ++i) { - IGSrcs.push_back(ExtractSrcs(IG[i], PV)); + IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount)); unsigned Op = getOperandIdx(IG[i]->getOpcode(), AMDGPU::OpName::bank_swizzle); ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle) IG[i]->getOperand(Op).getImm()); } - bool Result = recursiveFitsFPLimitation(IGSrcs, ValidSwizzle); - if (!Result) - return false; - return true; + std::vector > TransOps; + if (!isLastAluTrans) + return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS); + + TransOps = IGSrcs.back(); + IGSrcs.pop_back(); + ValidSwizzle.pop_back(); + + static const R600InstrInfo::BankSwizzle TransSwz[] = { + ALU_VEC_012_SCL_210, + ALU_VEC_021_SCL_122, + ALU_VEC_120_SCL_212, + ALU_VEC_102_SCL_221 + }; + for (unsigned i = 0; i < 4; i++) { + TransBS = TransSwz[i]; + if (!isConstCompatible(TransBS, TransOps, ConstCount)) + continue; + bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, + TransBS); + if (Result) { + ValidSwizzle.push_back(TransBS); + return true; + } + } + + return false; } @@ -406,7 +512,8 @@ R600InstrInfo::fitsConstReadLimitations(const std::vector &Consts) } bool -R600InstrInfo::canBundle(const std::vector &MIs) const { +R600InstrInfo::fitsConstReadLimitations(const std::vector &MIs) + const { std::vector Consts; for (unsigned i = 0, n = MIs.size(); i < n; i++) { MachineInstr *MI = MIs[i]; diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h index a6add83..1ba4160 100644 --- a/lib/Target/R600/R600InstrInfo.h +++ b/lib/Target/R600/R600InstrInfo.h @@ -84,26 +84,38 @@ namespace llvm { SmallVector, 3> getSrcs(MachineInstr *MI) const; - bool isLegal( - const std::vector > > &IGSrcs, - const std::vector &Swz, - unsigned CheckedSize) const; - bool recursiveFitsFPLimitation( - const std::vector > > &IGSrcs, - std::vector &SwzCandidate, - unsigned Depth = 0) const; + unsigned isLegalUpTo( + const std::vector > > &IGSrcs, + const std::vector &Swz, + const std::vector > &TransSrcs, + R600InstrInfo::BankSwizzle TransSwz) const; + + bool FindSwizzleForVectorSlot( + const std::vector > > &IGSrcs, + std::vector &SwzCandidate, + const std::vector > &TransSrcs, + R600InstrInfo::BankSwizzle TransSwz) const; /// Given the order VEC_012 < VEC_021 < VEC_120 < VEC_102 < VEC_201 < VEC_210 /// returns true and the first (in lexical order) BankSwizzle affectation /// starting from the one already provided in the Instruction Group MIs that /// fits Read Port limitations in BS if available. Otherwise returns false /// and undefined content in BS. + /// isLastAluTrans should be set if the last Alu of MIs will be executed on + /// Trans ALU. In this case, ValidTSwizzle returns the BankSwizzle value to + /// apply to the last instruction. /// PV holds GPR to PV registers in the Instruction Group MIs. bool fitsReadPortLimitations(const std::vector &MIs, const DenseMap &PV, - std::vector &BS) const; + std::vector &BS, + bool isLastAluTrans) const; + + /// An instruction group can only access 2 channel pair (either [XY] or [ZW]) + /// from KCache bank on R700+. This function check if MI set in input meet + /// this limitations + bool fitsConstReadLimitations(const std::vector &) const; + /// Same but using const index set instead of MI set. bool fitsConstReadLimitations(const std::vector&) const; - bool canBundle(const std::vector &) const; /// \breif Vector instructions are instructions that must fill all /// instruction slots within an instruction group. diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index fd585f8..15dcf14 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1489,6 +1489,8 @@ let hasSideEffects = 1 in { def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> { let Pattern = []; + let TransOnly = 0; + let Itinerary = AnyALU; } def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>; diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp index 7e28f9d..0dc0365 100644 --- a/lib/Target/R600/R600MachineScheduler.cpp +++ b/lib/Target/R600/R600MachineScheduler.cpp @@ -32,7 +32,7 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) { MRI = &DAG->MRI; CurInstKind = IDOther; CurEmitted = 0; - OccupedSlotsMask = 15; + OccupedSlotsMask = 31; InstKindLimit[IDAlu] = TII->getMaxAlusPerClause(); InstKindLimit[IDOther] = 32; @@ -160,7 +160,7 @@ void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { if (NextInstKind != CurInstKind) { DEBUG(dbgs() << "Instruction Type Switch\n"); if (NextInstKind != IDAlu) - OccupedSlotsMask = 15; + OccupedSlotsMask |= 31; CurEmitted = 0; CurInstKind = NextInstKind; } @@ -251,6 +251,9 @@ bool R600SchedStrategy::regBelongsToClass(unsigned Reg, R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const { MachineInstr *MI = SU->getInstr(); + if (TII->isTransOnly(MI)) + return AluTrans; + switch (MI->getOpcode()) { case AMDGPU::PRED_X: return AluPredX; @@ -346,7 +349,7 @@ SUnit *R600SchedStrategy::PopInst(std::vector &Q) { It != E; ++It) { SUnit *SU = *It; InstructionsGroupCandidate.push_back(SU->getInstr()); - if (TII->canBundle(InstructionsGroupCandidate)) { + if (TII->fitsConstReadLimitations(InstructionsGroupCandidate)) { InstructionsGroupCandidate.pop_back(); Q.erase((It + 1).base()); return SU; @@ -421,7 +424,8 @@ unsigned R600SchedStrategy::AvailablesAluCount() const { return AvailableAlus[AluAny].size() + AvailableAlus[AluT_XYZW].size() + AvailableAlus[AluT_X].size() + AvailableAlus[AluT_Y].size() + AvailableAlus[AluT_Z].size() + AvailableAlus[AluT_W].size() + - AvailableAlus[AluDiscarded].size() + AvailableAlus[AluPredX].size(); + AvailableAlus[AluTrans].size() + AvailableAlus[AluDiscarded].size() + + AvailableAlus[AluPredX].size(); } SUnit* R600SchedStrategy::pickAlu() { @@ -429,20 +433,27 @@ SUnit* R600SchedStrategy::pickAlu() { if (!OccupedSlotsMask) { // Bottom up scheduling : predX must comes first if (!AvailableAlus[AluPredX].empty()) { - OccupedSlotsMask = 15; + OccupedSlotsMask |= 31; return PopInst(AvailableAlus[AluPredX]); } // Flush physical reg copies (RA will discard them) if (!AvailableAlus[AluDiscarded].empty()) { - OccupedSlotsMask = 15; + OccupedSlotsMask |= 31; return PopInst(AvailableAlus[AluDiscarded]); } // If there is a T_XYZW alu available, use it if (!AvailableAlus[AluT_XYZW].empty()) { - OccupedSlotsMask = 15; + OccupedSlotsMask |= 15; return PopInst(AvailableAlus[AluT_XYZW]); } } + bool TransSlotOccuped = OccupedSlotsMask & 16; + if (!TransSlotOccuped) { + if (!AvailableAlus[AluTrans].empty()) { + OccupedSlotsMask |= 16; + return PopInst(AvailableAlus[AluTrans]); + } + } for (int Chan = 3; Chan > -1; --Chan) { bool isOccupied = OccupedSlotsMask & (1 << Chan); if (!isOccupied) { diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h index aae8b3f..f8965d8 100644 --- a/lib/Target/R600/R600MachineScheduler.h +++ b/lib/Target/R600/R600MachineScheduler.h @@ -46,6 +46,7 @@ class R600SchedStrategy : public MachineSchedStrategy { AluT_W, AluT_XYZW, AluPredX, + AluTrans, AluDiscarded, // LLVM Instructions that are going to be eliminated AluLast }; diff --git a/lib/Target/R600/R600Packetizer.cpp b/lib/Target/R600/R600Packetizer.cpp index 6fc15de..5ee51fa 100644 --- a/lib/Target/R600/R600Packetizer.cpp +++ b/lib/Target/R600/R600Packetizer.cpp @@ -77,8 +77,6 @@ private: do { if (TII->isPredicated(BI)) continue; - if (TII->isTransOnly(BI)) - continue; int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write); if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0) continue; @@ -87,6 +85,10 @@ private: continue; } unsigned Dst = BI->getOperand(DstIdx).getReg(); + if (TII->isTransOnly(BI)) { + Result[Dst] = AMDGPU::PS; + continue; + } if (BI->getOpcode() == AMDGPU::DOT4_r600 || BI->getOpcode() == AMDGPU::DOT4_eg) { Result[Dst] = AMDGPU::PV_X; @@ -157,10 +159,6 @@ public: return true; if (!TII->isALUInstr(MI->getOpcode())) return true; - if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::TRANS_ONLY) - return true; - if (TII->isTransOnly(MI)) - return true; if (MI->getOpcode() == AMDGPU::GROUP_BARRIER) return true; return false; @@ -170,7 +168,7 @@ public: // together. bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr(); - if (getSlot(MII) <= getSlot(MIJ)) + if (getSlot(MII) <= getSlot(MIJ) && !TII->isTransOnly(MII)) return false; // Does MII and MIJ share the same pred_sel ? int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel), @@ -204,11 +202,16 @@ public: MI->getOperand(LastOp).setImm(Bit); } - MachineBasicBlock::iterator addToPacket(MachineInstr *MI) { + bool isBundlableWithCurrentPMI(MachineInstr *MI, + const DenseMap &PV, + std::vector &BS, + bool &isTransSlot) { + isTransSlot = TII->isTransOnly(MI); + + // Are the Constants limitations met ? CurrentPacketMIs.push_back(MI); - bool FitsConstLimits = TII->canBundle(CurrentPacketMIs); - DEBUG( - if (!FitsConstLimits) { + if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) { + DEBUG( dbgs() << "Couldn't pack :\n"; MI->dump(); dbgs() << "with the following packets :\n"; @@ -217,14 +220,15 @@ public: dbgs() << "\n"; } dbgs() << "because of Consts read limitations\n"; - }); - const DenseMap &PV = - getPreviousVector(CurrentPacketMIs.front()); - std::vector BS; - bool FitsReadPortLimits = - TII->fitsReadPortLimitations(CurrentPacketMIs, PV, BS); - DEBUG( - if (!FitsReadPortLimits) { + ); + CurrentPacketMIs.pop_back(); + return false; + } + + // Is there a BankSwizzle set that meet Read Port limitations ? + if (!TII->fitsReadPortLimitations(CurrentPacketMIs, + PV, BS, isTransSlot)) { + DEBUG( dbgs() << "Couldn't pack :\n"; MI->dump(); dbgs() << "with the following packets :\n"; @@ -233,25 +237,43 @@ public: dbgs() << "\n"; } dbgs() << "because of Read port limitations\n"; - }); - bool isBundlable = FitsConstLimits && FitsReadPortLimits; - if (isBundlable) { + ); + CurrentPacketMIs.pop_back(); + return false; + } + + CurrentPacketMIs.pop_back(); + return true; + } + + MachineBasicBlock::iterator addToPacket(MachineInstr *MI) { + MachineBasicBlock::iterator FirstInBundle = + CurrentPacketMIs.empty() ? MI : CurrentPacketMIs.front(); + const DenseMap &PV = + getPreviousVector(FirstInBundle); + std::vector BS; + bool isTransSlot; + + if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) { for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) { MachineInstr *MI = CurrentPacketMIs[i]; - unsigned Op = TII->getOperandIdx(MI->getOpcode(), - AMDGPU::OpName::bank_swizzle); - MI->getOperand(Op).setImm(BS[i]); + unsigned Op = TII->getOperandIdx(MI->getOpcode(), + AMDGPU::OpName::bank_swizzle); + MI->getOperand(Op).setImm(BS[i]); } + unsigned Op = TII->getOperandIdx(MI->getOpcode(), + AMDGPU::OpName::bank_swizzle); + MI->getOperand(Op).setImm(BS.back()); + if (!CurrentPacketMIs.empty()) + setIsLastBit(CurrentPacketMIs.back(), 0); + substitutePV(MI, PV); + MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI); + if (isTransSlot) { + endPacket(llvm::next(It)->getParent(), llvm::next(It)); + } + return It; } - CurrentPacketMIs.pop_back(); - if (!isBundlable) { - endPacket(MI->getParent(), MI); - substitutePV(MI, getPreviousVector(MI)); - return VLIWPacketizerList::addToPacket(MI); - } - if (!CurrentPacketMIs.empty()) - setIsLastBit(CurrentPacketMIs.back(), 0); - substitutePV(MI, PV); + endPacket(MI->getParent(), MI); return VLIWPacketizerList::addToPacket(MI); } }; diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td index 60a93e3..66aa916 100644 --- a/lib/Target/R600/R600RegisterInfo.td +++ b/lib/Target/R600/R600RegisterInfo.td @@ -96,6 +96,7 @@ def PV_X : R600RegWithChan<"PV.X", 254, "X">; def PV_Y : R600RegWithChan<"PV.Y", 254, "Y">; def PV_Z : R600RegWithChan<"PV.Z", 254, "Z">; def PV_W : R600RegWithChan<"PV.W", 254, "W">; +def PS: R600Reg<"PS", 255>; def PREDICATE_BIT : R600Reg<"PredicateBit", 0>; def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>; def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>; -- cgit v1.1 From 97daabf318ff4751aca49bc1c334d2553b125671 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 29 Jun 2013 20:04:19 +0000 Subject: R600: Unbreak GCC build. operator++ on an enum is not legal. clang happens to accept it anyways, I think that's a known bug. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185269 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600InstrInfo.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 974a579..302e402 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -398,7 +398,8 @@ NextPossibleSolution( } if (ResetIdx == -1) return false; - SwzCandidate[ResetIdx]++; + int NextSwizzle = SwzCandidate[ResetIdx] + 1; + SwzCandidate[ResetIdx] = (R600InstrInfo::BankSwizzle)NextSwizzle; return true; } -- cgit v1.1 From bebf6222f6b7cc871cfbc51f78cdb33d17fedb6c Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 29 Jun 2013 22:51:12 +0000 Subject: NVPTX: Fold otherwise unused variable into assert. Avoids unused variable warnings in release builds. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185271 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXISelLowering.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 0ff1a98..871bc3c 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -657,7 +657,6 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, const DataLayout *TD = getDataLayout(); MachineFunction &MF = DAG.getMachineFunction(); const Function *F = MF.getFunction(); - const TargetLowering *TLI = nvTM->getTargetLowering(); SDValue tempChain = Chain; Chain = @@ -1076,7 +1075,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, EVT ObjectVT = getValueType(retTy); unsigned NumElts = ObjectVT.getVectorNumElements(); EVT EltVT = ObjectVT.getVectorElementType(); - assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts && + assert(nvTM->getTargetLowering()->getNumRegisters(F->getContext(), + ObjectVT) == NumElts && "Vector was not scalarized"); unsigned sz = EltVT.getSizeInBits(); bool needTruncate = sz < 16 ? true : false; -- cgit v1.1 From 27bf6e96a73bf35d31ce8f287fa43d5906656747 Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Sun, 30 Jun 2013 20:44:50 +0000 Subject: X86: POP*rmm: move address operand to (ins) from (outs). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185292 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.td | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index d67203e..f33ae2a 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -803,11 +803,11 @@ def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", [], IIC_POP_REG>; def POP16rmr: I<0x8F, MRM0r, (outs GR16:$reg), (ins), "pop{w}\t$reg", [], IIC_POP_REG>, OpSize; -def POP16rmm: I<0x8F, MRM0m, (outs i16mem:$dst), (ins), "pop{w}\t$dst", [], +def POP16rmm: I<0x8F, MRM0m, (outs), (ins i16mem:$dst), "pop{w}\t$dst", [], IIC_POP_MEM>, OpSize; def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", [], IIC_POP_REG>; -def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", [], +def POP32rmm: I<0x8F, MRM0m, (outs), (ins i32mem:$dst), "pop{l}\t$dst", [], IIC_POP_MEM>; def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", [], IIC_POP_F>, OpSize; @@ -851,7 +851,7 @@ def POP64r : I<0x58, AddRegFrm, (outs GR64:$reg), (ins), "pop{q}\t$reg", [], IIC_POP_REG>; def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", [], IIC_POP_REG>; -def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", [], +def POP64rmm: I<0x8F, MRM0m, (outs), (ins i64mem:$dst), "pop{q}\t$dst", [], IIC_POP_MEM>; } // mayLoad, SchedRW let mayStore = 1, SchedRW = [WriteStore] in { -- cgit v1.1 From c36a8d2b3d6e543de8d9f210ecd39a9a0641d826 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Sun, 30 Jun 2013 21:44:06 +0000 Subject: R600: Fix an unitialized variable in R600InstrInfo.cpp git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185294 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600InstrInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 302e402..969a7ce 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -449,7 +449,7 @@ R600InstrInfo::fitsReadPortLimitations(const std::vector &IG, std::vector > > IGSrcs; ValidSwizzle.clear(); unsigned ConstCount; - BankSwizzle TransBS; + BankSwizzle TransBS = ALU_VEC_012_SCL_210; for (unsigned i = 0, e = IG.size(); i < e; ++i) { IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount)); unsigned Op = getOperandIdx(IG[i]->getOpcode(), -- cgit v1.1 From 30b13ebd0b3d29d5f2d3dcbccee31d3a55917277 Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Mon, 1 Jul 2013 12:58:48 +0000 Subject: [NVPTX] Make sure we zero out high-order 24 bits for 8-bit load into 32-bit value git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185328 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXISelLowering.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 871bc3c..b9d8d8f 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -2373,7 +2373,8 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, &Ops[0], Ops.size(), MVT::i8, MemSD->getMemOperand()); - Results.push_back(NewLD.getValue(0)); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, + NewLD.getValue(0))); Results.push_back(NewLD.getValue(1)); } } -- cgit v1.1 From 9bc8feeb4fd15883949900194c93fd1704c404b4 Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Mon, 1 Jul 2013 12:58:52 +0000 Subject: [NVPTX] Add isel patterns for [reg+offset] form of ldg/ldu. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185329 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 476 ++++++++++++++++++++++++++------- lib/Target/NVPTX/NVPTXIntrinsics.td | 66 +++-- 2 files changed, 430 insertions(+), 112 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 4457ec3..03a3aa4 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -787,194 +787,476 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { unsigned Opcode; SDLoc DL(N); SDNode *LD; - MemSDNode *Mem = cast(N); + SDValue Base, Offset, Addr; - EVT RetVT = Mem->getMemoryVT().getVectorElementType(); + EVT EltVT = Mem->getMemoryVT().getVectorElementType(); - // Select opcode - if (Subtarget.is64Bit()) { + if (SelectDirectAddr(Op1, Addr)) { switch (N->getOpcode()) { default: return NULL; case NVPTXISD::LDGV2: - switch (RetVT.getSimpleVT().SimpleTy) { + switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: - Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64; + Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar; break; case MVT::i16: - Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64; + Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar; break; case MVT::i32: - Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64; + Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar; break; case MVT::i64: - Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64; + Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar; break; case MVT::f32: - Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64; + Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar; break; case MVT::f64: - Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64; + Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar; break; } break; - case NVPTXISD::LDGV4: - switch (RetVT.getSimpleVT().SimpleTy) { + case NVPTXISD::LDUV2: + switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: - Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64; + Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar; break; case MVT::i16: - Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64; + Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar; break; case MVT::i32: - Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64; + Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar; break; case MVT::f32: - Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64; + Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar; break; } break; - case NVPTXISD::LDUV2: - switch (RetVT.getSimpleVT().SimpleTy) { + case NVPTXISD::LDGV4: + switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: - Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64; + Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar; break; case MVT::i16: - Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64; + Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar; break; case MVT::i32: - Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64; - break; - case MVT::i64: - Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64; + Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar; break; case MVT::f32: - Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64; - break; - case MVT::f64: - Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64; + Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar; break; } break; case NVPTXISD::LDUV4: - switch (RetVT.getSimpleVT().SimpleTy) { + switch (EltVT.getSimpleVT().SimpleTy) { default: return NULL; case MVT::i8: - Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64; + Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar; break; case MVT::i16: - Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64; + Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar; break; case MVT::i32: - Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64; + Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar; break; case MVT::f32: - Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64; + Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar; break; } break; } - } else { - switch (N->getOpcode()) { - default: - return NULL; - case NVPTXISD::LDGV2: - switch (RetVT.getSimpleVT().SimpleTy) { + + SDValue Ops[] = { Addr, Chain }; + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), + ArrayRef(Ops, 2)); + } else if (Subtarget.is64Bit() + ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) + : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { + if (Subtarget.is64Bit()) { + switch (N->getOpcode()) { default: return NULL; - case MVT::i8: - Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32; - break; - case MVT::i16: - Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32; - break; - case MVT::i32: - Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32; + case NVPTXISD::LDGV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64; + break; + } break; - case MVT::i64: - Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32; + case NVPTXISD::LDUV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64; + break; + } break; - case MVT::f32: - Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32; + case NVPTXISD::LDGV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64; + break; + } break; - case MVT::f64: - Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32; + case NVPTXISD::LDUV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64; + break; + } break; } - break; - case NVPTXISD::LDGV4: - switch (RetVT.getSimpleVT().SimpleTy) { + } else { + switch (N->getOpcode()) { default: return NULL; - case MVT::i8: - Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32; + case NVPTXISD::LDGV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32; + break; + } break; - case MVT::i16: - Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32; + case NVPTXISD::LDUV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32; + break; + } break; - case MVT::i32: - Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32; + case NVPTXISD::LDGV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32; + break; + } break; - case MVT::f32: - Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32; + case NVPTXISD::LDUV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32; + break; + } break; } - break; - case NVPTXISD::LDUV2: - switch (RetVT.getSimpleVT().SimpleTy) { + } + + SDValue Ops[] = { Base, Offset, Chain }; + + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), + ArrayRef(Ops, 3)); + } else { + if (Subtarget.is64Bit()) { + switch (N->getOpcode()) { default: return NULL; - case MVT::i8: - Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32; - break; - case MVT::i16: - Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32; - break; - case MVT::i32: - Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32; + case NVPTXISD::LDGV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64; + break; + } break; - case MVT::i64: - Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32; + case NVPTXISD::LDUV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64; + break; + } break; - case MVT::f32: - Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32; + case NVPTXISD::LDGV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64; + break; + } break; - case MVT::f64: - Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32; + case NVPTXISD::LDUV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64; + break; + } break; } - break; - case NVPTXISD::LDUV4: - switch (RetVT.getSimpleVT().SimpleTy) { + } else { + switch (N->getOpcode()) { default: return NULL; - case MVT::i8: - Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32; + case NVPTXISD::LDGV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32; + break; + } break; - case MVT::i16: - Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32; + case NVPTXISD::LDUV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32; + break; + } break; - case MVT::i32: - Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32; + case NVPTXISD::LDGV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32; + break; + } break; - case MVT::f32: - Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32; + case NVPTXISD::LDUV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32; + break; + } break; } - break; } - } - SDValue Ops[] = { Op1, Chain }; - LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); + SDValue Ops[] = { Op1, Chain }; + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), + ArrayRef(Ops, 2)); + } MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); MemRefs0[0] = cast(N)->getMemOperand(); diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td index 93cdfef..14049b1 100644 --- a/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1342,20 +1342,38 @@ int_nvvm_ldu_global_p>; // Elementized vector ldu multiclass VLDU_G_ELE_V2 { - def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), - (ins Int32Regs:$src), + def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins Int32Regs:$src), + !strconcat("ldu.global.", TyStr), []>; + def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins Int64Regs:$src), + !strconcat("ldu.global.", TyStr), []>; + def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins MEMri:$src), !strconcat("ldu.global.", TyStr), []>; - def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), - (ins Int64Regs:$src), + def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins MEMri64:$src), + !strconcat("ldu.global.", TyStr), []>; + def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins imemAny:$src), !strconcat("ldu.global.", TyStr), []>; } -multiclass VLDU_G_ELE_V4 { - def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins Int32Regs:$src), +multiclass VLDU_G_ELE_V4 { + def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins Int32Regs:$src), + !strconcat("ldu.global.", TyStr), []>; + def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins Int64Regs:$src), + !strconcat("ldu.global.", TyStr), []>; + def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins MEMri:$src), !strconcat("ldu.global.", TyStr), []>; - def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins Int64Regs:$src), + def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins MEMri64:$src), + !strconcat("ldu.global.", TyStr), []>; + def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins imemAny:$src), !strconcat("ldu.global.", TyStr), []>; } @@ -1452,20 +1470,38 @@ defm INT_PTX_LDG_GLOBAL_p64 // Elementized vector ldg multiclass VLDG_G_ELE_V2 { - def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), (ins Int32Regs:$src), !strconcat("ld.global.nc.", TyStr), []>; - def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), (ins Int64Regs:$src), !strconcat("ld.global.nc.", TyStr), []>; + def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins MEMri:$src), + !strconcat("ld.global.nc.", TyStr), []>; + def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins MEMri64:$src), + !strconcat("ld.global.nc.", TyStr), []>; + def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins imemAny:$src), + !strconcat("ld.global.nc.", TyStr), []>; } multiclass VLDG_G_ELE_V4 { - def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, - regclass:$dst3, regclass:$dst4), (ins Int32Regs:$src), + def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins Int32Regs:$src), + !strconcat("ld.global.nc.", TyStr), []>; + def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins Int64Regs:$src), + !strconcat("ld.global.nc.", TyStr), []>; + def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins MEMri:$src), + !strconcat("ld.global.nc.", TyStr), []>; + def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins MEMri64:$src), !strconcat("ld.global.nc.", TyStr), []>; - def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, - regclass:$dst3, regclass:$dst4), (ins Int64Regs:$src), + def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins imemAny:$src), !strconcat("ld.global.nc.", TyStr), []>; } -- cgit v1.1 From 8834184a463514530728a8032df165558393cd9c Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Mon, 1 Jul 2013 12:58:56 +0000 Subject: [NVPTX] Add support for native SIGN_EXTEND_INREG where available git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185330 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXISelLowering.cpp | 10 ++++++---- lib/Target/NVPTX/NVPTXInstrInfo.td | 26 ++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index b9d8d8f..725bc9e 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -138,10 +138,12 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) setOperationAction(ISD::BR_CC, MVT::i16, Expand); setOperationAction(ISD::BR_CC, MVT::i32, Expand); setOperationAction(ISD::BR_CC, MVT::i64, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); + // Some SIGN_EXTEND_INREG can be done using cvt instruction. + // For others we will expand to a SHL/SRA pair. + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); if (nvptxSubtarget.hasROT64()) { diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td index 013e24c..e6335a0 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -298,6 +298,7 @@ multiclass F2 { // General Type Conversion //----------------------------------- +let neverHasSideEffects = 1 in { // Generate a cvt to the given type from all possible types. // Each instance takes a CvtMode immediate that defines the conversion mode to // use. It can be CvtNONE to omit a conversion mode. @@ -360,6 +361,23 @@ defm CVT_u64 : CVT_FROM_ALL<"u64", Int64Regs>; defm CVT_f32 : CVT_FROM_ALL<"f32", Float32Regs>; defm CVT_f64 : CVT_FROM_ALL<"f64", Float64Regs>; +// This set of cvt is different from the above. The type of the source +// and target are the same. +// +def CVT_INREG_s16_s8 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), + "cvt.s16.s8 \t$dst, $src;", []>; +def CVT_INREG_s32_s8 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), + "cvt.s32.s8 \t$dst, $src;", []>; +def CVT_INREG_s32_s16 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), + "cvt.s32.s16 \t$dst, $src;", []>; +def CVT_INREG_s64_s8 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), + "cvt.s64.s8 \t$dst, $src;", []>; +def CVT_INREG_s64_s16 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), + "cvt.s64.s16 \t$dst, $src;", []>; +def CVT_INREG_s64_s32 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), + "cvt.s64.s32 \t$dst, $src;", []>; +} + //----------------------------------- // Integer Arithmetic //----------------------------------- @@ -2349,6 +2367,14 @@ def : Pat<(i1 (trunc Int32Regs:$a)), def : Pat<(i1 (trunc Int16Regs:$a)), (SETP_b16ri (ANDb16ri Int16Regs:$a, 1), 1, CmpEQ)>; +// sext_inreg +def : Pat<(sext_inreg Int16Regs:$a, i8), (CVT_INREG_s16_s8 Int16Regs:$a)>; +def : Pat<(sext_inreg Int32Regs:$a, i8), (CVT_INREG_s32_s8 Int32Regs:$a)>; +def : Pat<(sext_inreg Int32Regs:$a, i16), (CVT_INREG_s32_s16 Int32Regs:$a)>; +def : Pat<(sext_inreg Int64Regs:$a, i8), (CVT_INREG_s64_s8 Int64Regs:$a)>; +def : Pat<(sext_inreg Int64Regs:$a, i16), (CVT_INREG_s64_s16 Int64Regs:$a)>; +def : Pat<(sext_inreg Int64Regs:$a, i32), (CVT_INREG_s64_s32 Int64Regs:$a)>; + // Select instructions with 32-bit predicates def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b), -- cgit v1.1 From 03e5bb2c87c05ca42f9940e14f22275b136a9883 Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Mon, 1 Jul 2013 12:58:58 +0000 Subject: [NVPTX] Handle signext/zeroext attributes properly Fix a case where we were incorrectly sign-extending a value when we should have been zero-extending the value. Also change some SIGN_EXTEND to ANY_EXTEND because we really dont care and may have more opportunity to fold subexpressions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185331 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXISelLowering.cpp | 44 +++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 19 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 725bc9e..4590916 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -713,7 +713,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, sz = 8; SDValue StVal = OutVals[OIdx]; if (elemtype.getSizeInBits() < 16) { - StVal = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, StVal); + StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal); } SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue CopyParamOps[] = { Chain, @@ -947,7 +947,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, MachinePointerInfo(), false, false, false, 0); if (elemtype.getSizeInBits() < 16) { - theVal = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, theVal); + theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal); } SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), @@ -1382,9 +1382,9 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { // Since StoreV2 is a target node, we cannot rely on DAG type legalization. // Therefore, we must ensure the type is legal. For i1 and i8, we set the // stored type to i16 and propogate the "real" type as the memory type. - bool NeedSExt = false; + bool NeedExt = false; if (EltVT.getSizeInBits() < 16) - NeedSExt = true; + NeedExt = true; switch (NumElts) { default: @@ -1407,8 +1407,8 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { for (unsigned i = 0; i < NumElts; ++i) { SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val, DAG.getIntPtrConstant(i)); - if (NeedSExt) - ExtVal = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i16, ExtVal); + if (NeedExt) + ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal); Ops.push_back(ExtVal); } @@ -1614,15 +1614,18 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( aggregateIsPacked ? 1 : TD->getABITypeAlignment( partVT.getTypeForEVT(F->getContext())); - SDValue p; - if (Ins[InsIdx].VT.getSizeInBits() > partVT.getSizeInBits()) - p = DAG.getExtLoad(ISD::SEXTLOAD, dl, Ins[InsIdx].VT, Root, srcAddr, + SDValue p; + if (Ins[InsIdx].VT.getSizeInBits() > partVT.getSizeInBits()) { + ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? + ISD::SEXTLOAD : ISD::ZEXTLOAD; + p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, srcAddr, MachinePointerInfo(srcValue), partVT, false, false, partAlign); - else + } else { p = DAG.getLoad(partVT, dl, Root, srcAddr, MachinePointerInfo(srcValue), false, false, false, partAlign); + } if (p.getNode()) p.getNode()->setIROrder(idx + 1); InVals.push_back(p); @@ -1657,7 +1660,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( P.getNode()->setIROrder(idx + 1); if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) - P = DAG.getNode(ISD::SIGN_EXTEND, dl, Ins[InsIdx].VT, P); + P = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, P); InVals.push_back(P); Ofst += TD->getTypeAllocSize(EltVT.getTypeForEVT(F->getContext())); ++InsIdx; @@ -1682,8 +1685,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( DAG.getIntPtrConstant(1)); if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) { - Elt0 = DAG.getNode(ISD::SIGN_EXTEND, dl, Ins[InsIdx].VT, Elt0); - Elt1 = DAG.getNode(ISD::SIGN_EXTEND, dl, Ins[InsIdx].VT, Elt1); + Elt0 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt0); + Elt1 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt1); } InVals.push_back(Elt0); @@ -1726,7 +1729,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, DAG.getIntPtrConstant(j)); if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) - Elt = DAG.getNode(ISD::SIGN_EXTEND, dl, Ins[InsIdx].VT, Elt); + Elt = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt); InVals.push_back(Elt); } Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); @@ -1745,14 +1748,17 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( Value *srcValue = Constant::getNullValue(PointerType::get( ObjectVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); SDValue p; - if (ObjectVT.getSizeInBits() < Ins[InsIdx].VT.getSizeInBits()) - p = DAG.getExtLoad(ISD::SEXTLOAD, dl, Ins[InsIdx].VT, Root, Arg, + if (ObjectVT.getSizeInBits() < Ins[InsIdx].VT.getSizeInBits()) { + ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? + ISD::SEXTLOAD : ISD::ZEXTLOAD; + p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, Arg, MachinePointerInfo(srcValue), ObjectVT, false, false, - TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); - else + TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); + } else { p = DAG.getLoad(Ins[InsIdx].VT, dl, Root, Arg, MachinePointerInfo(srcValue), false, false, false, - TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); + TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); + } if (p.getNode()) p.getNode()->setIROrder(idx + 1); InVals.push_back(p); -- cgit v1.1 From be08c60533d27f2714a4a88ba91125bcede32a1e Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Mon, 1 Jul 2013 12:59:01 +0000 Subject: [NVPTX] Fix vector loads from parameters that span multiple loads, and fix some typos git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185332 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 5 +- lib/Target/NVPTX/NVPTXISelLowering.cpp | 160 ++------------------------------- 2 files changed, 9 insertions(+), 156 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 03a3aa4..b613587 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -1997,7 +1997,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) { Ops.push_back(Flag); SDNode *Ret = - CurDAG->getMachineNode(Opc, DL, Node->getVTList(), Ops); + CurDAG->getMachineNode(Opc, DL, VTs, Ops); return Ret; } @@ -2270,8 +2270,9 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) { } } + SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue); SDNode *Ret = - CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); + CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops); MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); MemRefs0[0] = cast(N)->getMemOperand(); cast(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1); diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 4590916..f257858 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -340,158 +340,6 @@ NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op); } -/* -std::string NVPTXTargetLowering::getPrototype( - Type *retTy, const ArgListTy &Args, - const SmallVectorImpl &Outs, unsigned retAlignment) const { - - bool isABI = (nvptxSubtarget.getSmVersion() >= 20); - - std::stringstream O; - O << "prototype_" << uniqueCallSite << " : .callprototype "; - - if (retTy->getTypeID() == Type::VoidTyID) - O << "()"; - else { - O << "("; - if (isABI) { - if (retTy->isPrimitiveType() || retTy->isIntegerTy()) { - unsigned size = 0; - if (const IntegerType *ITy = dyn_cast(retTy)) { - size = ITy->getBitWidth(); - if (size < 32) - size = 32; - } else { - assert(retTy->isFloatingPointTy() && - "Floating point type expected here"); - size = retTy->getPrimitiveSizeInBits(); - } - - O << ".param .b" << size << " _"; - } else if (isa(retTy)) - O << ".param .b" << getPointerTy().getSizeInBits() << " _"; - else { - if ((retTy->getTypeID() == Type::StructTyID) || - isa(retTy)) { - SmallVector vtparts; - ComputeValueVTs(*this, retTy, vtparts); - unsigned totalsz = 0; - for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { - unsigned elems = 1; - EVT elemtype = vtparts[i]; - if (vtparts[i].isVector()) { - elems = vtparts[i].getVectorNumElements(); - elemtype = vtparts[i].getVectorElementType(); - } - for (unsigned j = 0, je = elems; j != je; ++j) { - unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 8)) - sz = 8; - totalsz += sz / 8; - } - } - O << ".param .align " << retAlignment << " .b8 _[" << totalsz << "]"; - } else { - assert(false && "Unknown return type"); - } - } - } else { - SmallVector vtparts; - ComputeValueVTs(*this, retTy, vtparts); - unsigned idx = 0; - for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { - unsigned elems = 1; - EVT elemtype = vtparts[i]; - if (vtparts[i].isVector()) { - elems = vtparts[i].getVectorNumElements(); - elemtype = vtparts[i].getVectorElementType(); - } - - for (unsigned j = 0, je = elems; j != je; ++j) { - unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 32)) - sz = 32; - O << ".reg .b" << sz << " _"; - if (j < je - 1) - O << ", "; - ++idx; - } - if (i < e - 1) - O << ", "; - } - } - O << ") "; - } - O << "_ ("; - - bool first = true; - MVT thePointerTy = getPointerTy(); - - for (unsigned i = 0, e = Args.size(); i != e; ++i) { - const Type *Ty = Args[i].Ty; - if (!first) { - O << ", "; - } - first = false; - - if (Outs[i].Flags.isByVal() == false) { - unsigned sz = 0; - if (isa(Ty)) { - sz = cast(Ty)->getBitWidth(); - if (sz < 32) - sz = 32; - } else if (isa(Ty)) - sz = thePointerTy.getSizeInBits(); - else - sz = Ty->getPrimitiveSizeInBits(); - if (isABI) - O << ".param .b" << sz << " "; - else - O << ".reg .b" << sz << " "; - O << "_"; - continue; - } - const PointerType *PTy = dyn_cast(Ty); - assert(PTy && "Param with byval attribute should be a pointer type"); - Type *ETy = PTy->getElementType(); - - if (isABI) { - unsigned align = Outs[i].Flags.getByValAlign(); - unsigned sz = getDataLayout()->getTypeAllocSize(ETy); - O << ".param .align " << align << " .b8 "; - O << "_"; - O << "[" << sz << "]"; - continue; - } else { - SmallVector vtparts; - ComputeValueVTs(*this, ETy, vtparts); - for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { - unsigned elems = 1; - EVT elemtype = vtparts[i]; - if (vtparts[i].isVector()) { - elems = vtparts[i].getVectorNumElements(); - elemtype = vtparts[i].getVectorElementType(); - } - - for (unsigned j = 0, je = elems; j != je; ++j) { - unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 32)) - sz = 32; - O << ".reg .b" << sz << " "; - O << "_"; - if (j < je - 1) - O << ", "; - } - if (i < e - 1) - O << ", "; - } - continue; - } - } - O << ");"; - return O.str(); -}*/ - std::string NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, const SmallVectorImpl &Outs, @@ -584,7 +432,9 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, OIdx += len - 1; continue; } - assert(getValueType(Ty) == Outs[OIdx].VT && + // i8 types in IR will be i16 types in SDAG + assert((getValueType(Ty) == Outs[OIdx].VT || + (getValueType(Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && "type mismatch between callee prototype and arguments"); // scalar type unsigned sz = 0; @@ -854,6 +704,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Ops.push_back(StoreVal); } + Ops.push_back(InFlag); + SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, &Ops[0], Ops.size(), MemVT, @@ -1733,8 +1585,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( InVals.push_back(Elt); } Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); - InsIdx += VecSize; } + InsIdx += VecSize; } if (NumElts > 0) -- cgit v1.1 From fc32eb472ae74e96435ce70c67d6c1edeb6f3e9f Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Mon, 1 Jul 2013 12:59:04 +0000 Subject: [NVPTX] 64-bit ADDC/ADDE are not legal git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185333 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXISelLowering.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index f257858..04fb784 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -204,6 +204,9 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) // TRAP can be lowered to PTX trap setOperationAction(ISD::TRAP, MVT::Other, Legal); + setOperationAction(ISD::ADDC, MVT::i64, Expand); + setOperationAction(ISD::ADDE, MVT::i64, Expand); + // Register custom handling for vector loads/stores for (int i = MVT::FIRST_VECTOR_VALUETYPE; i <= MVT::LAST_VECTOR_VALUETYPE; ++i) { -- cgit v1.1 From c676590614a0547242620fc2eefb16945e66c68a Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Mon, 1 Jul 2013 12:59:06 +0000 Subject: [NVPTX] Cut down on physical register defs We are using virtual registers throughout now, but we still need to keep a few physical registers per class around to keep the infrastructure happy. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185334 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 1 - lib/Target/NVPTX/NVPTXNumRegisters.h | 16 ---------------- lib/Target/NVPTX/NVPTXRegisterInfo.td | 24 +++++++++++++----------- 3 files changed, 13 insertions(+), 28 deletions(-) delete mode 100644 lib/Target/NVPTX/NVPTXNumRegisters.h (limited to 'lib/Target') diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index d7eeced..9662f4c 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -16,7 +16,6 @@ #include "MCTargetDesc/NVPTXMCAsmInfo.h" #include "NVPTX.h" #include "NVPTXInstrInfo.h" -#include "NVPTXNumRegisters.h" #include "NVPTXRegisterInfo.h" #include "NVPTXTargetMachine.h" #include "NVPTXUtilities.h" diff --git a/lib/Target/NVPTX/NVPTXNumRegisters.h b/lib/Target/NVPTX/NVPTXNumRegisters.h deleted file mode 100644 index a95c16b..0000000 --- a/lib/Target/NVPTX/NVPTXNumRegisters.h +++ /dev/null @@ -1,16 +0,0 @@ - -//===-- NVPTXNumRegisters.h - PTX Register Info ---------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef NVPTX_NUM_REGISTERS_H -#define NVPTX_NUM_REGISTERS_H - -namespace llvm { const unsigned NVPTXNumRegisters = 396; } - -#endif diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.td b/lib/Target/NVPTX/NVPTXRegisterInfo.td index bc705b8..7a38a66 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.td +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.td @@ -29,7 +29,9 @@ def VRFrameLocal : NVPTXReg<"%SPL">; // Special Registers used as the stack def VRDepot : NVPTXReg<"%Depot">; -foreach i = 0-395 in { +// We use virtual registers, but define a few physical registers here to keep +// SDAG and the MachineInstr layers happy. +foreach i = 0-4 in { def P#i : NVPTXReg<"%p"#i>; // Predicate def RS#i : NVPTXReg<"%rs"#i>; // 16-bit def R#i : NVPTXReg<"%r"#i>; // 32-bit @@ -47,16 +49,16 @@ foreach i = 0-395 in { //===----------------------------------------------------------------------===// // Register classes //===----------------------------------------------------------------------===// -def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%u", 0, 395))>; -def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%u", 0, 395))>; -def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%u", 0, 395))>; -def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%u", 0, 395))>; -def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%u", 0, 395))>; -def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%u", 0, 395))>; -def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%u", 0, 395))>; -def Int64ArgRegs : NVPTXRegClass<[i64], 64, (add (sequence "la%u", 0, 395))>; -def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%u", 0, 395))>; -def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%u", 0, 395))>; +def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%u", 0, 4))>; +def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%u", 0, 4))>; +def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%u", 0, 4))>; +def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%u", 0, 4))>; +def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%u", 0, 4))>; +def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%u", 0, 4))>; +def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%u", 0, 4))>; +def Int64ArgRegs : NVPTXRegClass<[i64], 64, (add (sequence "la%u", 0, 4))>; +def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%u", 0, 4))>; +def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%u", 0, 4))>; // Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used. def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>; -- cgit v1.1 From 1fd09172bba8539e198db0360ac66f4694b4a3e0 Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Mon, 1 Jul 2013 12:59:08 +0000 Subject: [NVPTX] We dont use NVBuiltin anymore git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185335 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXISelLowering.cpp | 2 -- lib/Target/NVPTX/NVPTXISelLowering.h | 1 - 2 files changed, 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 04fb784..d4cc31b 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -252,8 +252,6 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { return "NVPTXISD::RET_FLAG"; case NVPTXISD::Wrapper: return "NVPTXISD::Wrapper"; - case NVPTXISD::NVBuiltin: - return "NVPTXISD::NVBuiltin"; case NVPTXISD::DeclareParam: return "NVPTXISD::DeclareParam"; case NVPTXISD::DeclareScalarParam: diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index 5e26b1c..3418437 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -29,7 +29,6 @@ enum NodeType { CALL, RET_FLAG, LOAD_PARAM, - NVBuiltin, DeclareParam, DeclareScalarParam, DeclareRetParam, -- cgit v1.1 From 728af3d574895dd9e4bb5c418c7398297c4f39fe Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Mon, 1 Jul 2013 13:00:14 +0000 Subject: [NVPTX] Add support for module-scope inline asm Since we were explicitly not calling AsmPrinter::doInitialization, any module-scope inline asm was not being printed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185336 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 9662f4c..93ba8d5 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -1043,6 +1043,16 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) { // Already commented out //bool Result = AsmPrinter::doInitialization(M); + // Emit module-level inline asm if it exists. + if (!M.getModuleInlineAsm().empty()) { + OutStreamer.AddComment("Start of file scope inline assembly"); + OutStreamer.AddBlankLine(); + OutStreamer.EmitRawText(StringRef(M.getModuleInlineAsm())); + OutStreamer.AddBlankLine(); + OutStreamer.AddComment("End of file scope inline assembly"); + OutStreamer.AddBlankLine(); + } + if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) recordAndEmitFilenames(M); -- cgit v1.1 From d59fc0af0a3ebd13c7004511e64e3233dfe87b17 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Mon, 1 Jul 2013 14:48:48 +0000 Subject: ARM: relax the atomic release barrier to "dmb ishst" I believe the full "dmb ish" barrier is not required to guarantee release semantics for atomic operations. The weaker "dmb ishst" prevents previous operations being reordered with a store executed afterwards, which is enough. A key point to note (fortunately already correct) is that this barrier alone is *insufficient* for sequential consistency, no matter how liberally placed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185339 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index ff8571b..370962d 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -2557,8 +2557,12 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, DAG.getConstant(0, MVT::i32)); } + ConstantSDNode *OrdN = cast(Op.getOperand(1)); + AtomicOrdering Ord = static_cast(OrdN->getZExtValue()); + unsigned Domain = Ord == Release ? ARM_MB::ISHST : ARM_MB::ISH; + return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), - DAG.getConstant(ARM_MB::ISH, MVT::i32)); + DAG.getConstant(Domain, MVT::i32)); } static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, -- cgit v1.1 From af679a22923d2b61e3bfb6721bd562b99546bfad Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Mon, 1 Jul 2013 16:37:52 +0000 Subject: [PowerPC] Add variants of "sync" instruction This adds support for the "sync $L" instruction with operand, and provides aliases for "lwsync" and "ptesync". git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185344 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCInstrFormats.td | 5 ++++- lib/Target/PowerPC/PPCInstrInfo.td | 12 ++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index b316fa6..9f5435e 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -473,8 +473,11 @@ class XForm_24 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, class XForm_24_sync opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> : I { + bits<2> L; + let Pattern = pattern; - let Inst{6-10} = 0; + let Inst{6-8} = 0; + let Inst{9-10} = L; let Inst{11-15} = 0; let Inst{16-20} = 0; let Inst{21-30} = xo; diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 28396fd..d05bd0d 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -1508,9 +1508,9 @@ def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iPTR:$ptroff), def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iPTR:$ptroff), (STFDUX $rS, $ptrreg, $ptroff)>; -def SYNC : XForm_24_sync<31, 598, (outs), (ins), - "sync", LdStSync, - [(int_ppc_sync)]>; +def SYNC : XForm_24_sync<31, 598, (outs), (ins i32imm:$L), + "sync $L", LdStSync, []>; +def : Pat<(int_ppc_sync), (SYNC 0)>; //===----------------------------------------------------------------------===// // PPC32 Arithmetic Instructions. @@ -2231,7 +2231,7 @@ def : Pat<(f64 (extloadf32 xaddr:$src)), def : Pat<(f64 (fextend f32:$src)), (COPY_TO_REGCLASS $src, F8RC)>; -def : Pat<(atomic_fence (imm), (imm)), (SYNC)>; +def : Pat<(atomic_fence (imm), (imm)), (SYNC 0)>; // Additional FNMSUB patterns: -a*c + b == -(a*c - b) def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B), @@ -2279,6 +2279,10 @@ class PPCAsmPseudo def : InstAlias<"sc", (SC 0)>; +def : InstAlias<"sync", (SYNC 0)>; +def : InstAlias<"lwsync", (SYNC 1)>; +def : InstAlias<"ptesync", (SYNC 2)>; + def : InstAlias<"xnop", (XORI R0, R0, 0)>; def : InstAlias<"mr $rA, $rB", (OR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>; -- cgit v1.1 From c0a6b981de8efd2c68125edb94bf9ffb933df727 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Mon, 1 Jul 2013 17:06:26 +0000 Subject: [PowerPC] Support "eieio" instruction This adds support for the "eieio" instruction to the asm parser. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185349 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCInstrFormats.td | 6 ++++++ lib/Target/PowerPC/PPCInstrInfo.td | 3 +++ 2 files changed, 9 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index 9f5435e..42adc02 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -484,6 +484,12 @@ class XForm_24_sync opcode, bits<10> xo, dag OOL, dag IOL, let Inst{31} = 0; } +class XForm_24_eieio opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list pattern> + : XForm_24_sync { + let L = 0; +} + class XForm_25 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> : XForm_base_r3xo { diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index d05bd0d..fc925e1 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -2257,6 +2257,9 @@ def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins), def ICBI : XForm_1a<31, 982, (outs), (ins memrr:$src), "icbi $src", LdStICBI, []>; +def EIEIO : XForm_24_eieio<31, 854, (outs), (ins), + "eieio", LdStLoad, []>; + //===----------------------------------------------------------------------===// // PowerPC Assembler Instruction Aliases // -- cgit v1.1 From 62c1baf8b58a40d37f56a5431214e6514e42970f Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Mon, 1 Jul 2013 17:21:23 +0000 Subject: [PowerPC] Add "wait" instruction This adds the "wait" instruction and its extended mnemonics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185350 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCInstrInfo.td | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index fc925e1..5c2c59f 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -2260,6 +2260,9 @@ def ICBI : XForm_1a<31, 982, (outs), (ins memrr:$src), def EIEIO : XForm_24_eieio<31, 854, (outs), (ins), "eieio", LdStLoad, []>; +def WAIT : XForm_24_sync<31, 62, (outs), (ins i32imm:$L), + "wait $L", LdStLoad, []>; + //===----------------------------------------------------------------------===// // PowerPC Assembler Instruction Aliases // @@ -2286,6 +2289,10 @@ def : InstAlias<"sync", (SYNC 0)>; def : InstAlias<"lwsync", (SYNC 1)>; def : InstAlias<"ptesync", (SYNC 2)>; +def : InstAlias<"wait", (WAIT 0)>; +def : InstAlias<"waitrsv", (WAIT 1)>; +def : InstAlias<"waitimpl", (WAIT 2)>; + def : InstAlias<"xnop", (XORI R0, R0, 0)>; def : InstAlias<"mr $rA, $rB", (OR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>; -- cgit v1.1 From 222e781d92541017c3a9c5dd40cb52e334cdb86f Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Mon, 1 Jul 2013 18:19:56 +0000 Subject: [PowerPC] Fix @got references to local symbols A @got reference must always result in a relocation, so that the linker has a chance to set up the GOT entry, even if the symbol happens to be local. Add a PPCELFObjectWriter::ExplicitRelSym routine that enforces a relocation to be emitted for GOT references. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185353 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp | 34 ++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index c2bf251..c26b545 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -30,6 +30,11 @@ namespace { virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel, bool IsRelocWithSymbol, int64_t Addend) const; + virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm, + const MCValue &Target, + const MCFragment &F, + const MCFixup &Fixup, + bool IsPCRel) const; virtual const MCSymbol *undefinedExplicitRelSym(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const; @@ -328,6 +333,35 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, return getRelocTypeInner(Target, Fixup, IsPCRel); } +const MCSymbol *PPCELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm, + const MCValue &Target, + const MCFragment &F, + const MCFixup &Fixup, + bool IsPCRel) const { + assert(Target.getSymA() && "SymA cannot be 0"); + MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ? + MCSymbolRefExpr::VK_None : Target.getSymA()->getKind(); + + bool EmitThisSym; + switch (Modifier) { + // GOT references always need a relocation, even if the + // target symbol is local. + case MCSymbolRefExpr::VK_GOT: + case MCSymbolRefExpr::VK_PPC_GOT_LO: + case MCSymbolRefExpr::VK_PPC_GOT_HI: + case MCSymbolRefExpr::VK_PPC_GOT_HA: + EmitThisSym = true; + break; + default: + EmitThisSym = false; + break; + } + + if (EmitThisSym) + return &Target.getSymA()->getSymbol().AliasedSymbol(); + return NULL; +} + const MCSymbol *PPCELFObjectWriter::undefinedExplicitRelSym(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const { -- cgit v1.1 From 40d0492cdea1023463a9902ee81b3c5251204039 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Mon, 1 Jul 2013 18:37:33 +0000 Subject: Revert r185339 (ARM: relax the atomic release barrier to "dmb ishst") Turns out I'd misread the architecture reference manual and thought that was a load/store-store barrier, when it's not. Thanks for pointing it out Eli! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185356 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 370962d..ff8571b 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -2557,12 +2557,8 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, DAG.getConstant(0, MVT::i32)); } - ConstantSDNode *OrdN = cast(Op.getOperand(1)); - AtomicOrdering Ord = static_cast(OrdN->getZExtValue()); - unsigned Domain = Ord == Release ? ARM_MB::ISHST : ARM_MB::ISH; - return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), - DAG.getConstant(Domain, MVT::i32)); + DAG.getConstant(ARM_MB::ISH, MVT::i32)); } static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, -- cgit v1.1 From 6711fc28a41c05e1c8398393c7794c41b2ee0202 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Mon, 1 Jul 2013 19:23:10 +0000 Subject: AArch64: correct CodeGen of MOVZ/MOVK combinations. According to the AArch64 ELF specification (4.6.8), it's the assembler's responsibility to make sure the shift amount is correct in relocated MOVZ/MOVK instructions. This wasn't being obeyed by either the MCJIT CodeGen or RuntimeDyldELF (which happened to work out well for JIT tests). This commit should make us compliant in this area. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185360 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 5 +++-- lib/Target/AArch64/AArch64InstrInfo.td | 19 +++++++++++-------- 2 files changed, 14 insertions(+), 10 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 2e37cb4..f258a96 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -70,10 +70,11 @@ public: /// Used for pre-lowered address-reference nodes, so we already know /// the fields match. This operand's job is simply to add an - /// appropriate shift operand (i.e. 0) to the MOVZ/MOVK instruction. + /// appropriate shift operand to the MOVZ/MOVK instruction. + template bool SelectMOVWAddressRef(SDValue N, SDValue &Imm, SDValue &Shift) { Imm = N; - Shift = CurDAG->getTargetConstant(0, MVT::i32); + Shift = CurDAG->getTargetConstant(LogShift, MVT::i32); return true; } diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index d2cfc7d..725a121 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -3974,14 +3974,17 @@ def : movalias; def : movalias; def : movalias; -def movw_addressref : ComplexPattern; - -def : Pat<(A64WrapperLarge movw_addressref:$G3, movw_addressref:$G2, - movw_addressref:$G1, movw_addressref:$G0), - (MOVKxii (MOVKxii (MOVKxii (MOVZxii movw_addressref:$G3), - movw_addressref:$G2), - movw_addressref:$G1), - movw_addressref:$G0)>; +def movw_addressref_g0 : ComplexPattern">; +def movw_addressref_g1 : ComplexPattern">; +def movw_addressref_g2 : ComplexPattern">; +def movw_addressref_g3 : ComplexPattern">; + +def : Pat<(A64WrapperLarge movw_addressref_g3:$G3, movw_addressref_g2:$G2, + movw_addressref_g1:$G1, movw_addressref_g0:$G0), + (MOVKxii (MOVKxii (MOVKxii (MOVZxii movw_addressref_g3:$G3), + movw_addressref_g2:$G2), + movw_addressref_g1:$G1), + movw_addressref_g0:$G0)>; //===----------------------------------------------------------------------===// // PC-relative addressing instructions -- cgit v1.1 From b5f7b0f9780cd1bc6f948b194adfc57176d41711 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Mon, 1 Jul 2013 19:34:59 +0000 Subject: Don't form PPC CTR loops for over-sized exit counts Although you can't generate this from C on PPC64, if you have a loop using a 64-bit counter on PPC32 then you can't form a CTR-based loop for it. This had been cauing the PPCCTRLoops pass to assert. Thanks to Joerg Sonnenberger for providing a test case! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185361 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCCTRLoops.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index 08247c2..bfc9495 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -415,6 +415,9 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) { } else if (!SE->isLoopInvariant(EC, L)) continue; + if (SE->getTypeSizeInBits(EC->getType()) > (TT.isArch64Bit() ? 64 : 32)) + continue; + // We now have a loop-invariant count of loop iterations (which is not the // constant zero) for which we know that this loop will not exit via this // exisiting block. -- cgit v1.1 From 27253f5edd04791bfbd0b5dd6e228be1d8071fce Mon Sep 17 00:00:00 2001 From: Anton Korobeynikov Date: Mon, 1 Jul 2013 19:44:44 +0000 Subject: Add jump tables handling for MSP430. Patch by Job Noorman! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185364 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/MSP430/MSP430ISelLowering.cpp | 10 ++++++++++ lib/Target/MSP430/MSP430ISelLowering.h | 1 + 2 files changed, 11 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 3c19213..168e3f1 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -169,6 +169,7 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : setOperationAction(ISD::VAARG, MVT::Other, Expand); setOperationAction(ISD::VAEND, MVT::Other, Expand); setOperationAction(ISD::VACOPY, MVT::Other, Expand); + setOperationAction(ISD::JumpTable, MVT::i16, Custom); // Libcalls names. if (HWMultMode == HWMultIntr) { @@ -199,6 +200,7 @@ SDValue MSP430TargetLowering::LowerOperation(SDValue Op, case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); + case ISD::JumpTable: return LowerJumpTable(Op, DAG); default: llvm_unreachable("unimplemented operand"); } @@ -981,6 +983,14 @@ SDValue MSP430TargetLowering::LowerVASTART(SDValue Op, false, false, 0); } +SDValue MSP430TargetLowering::LowerJumpTable(SDValue Op, + SelectionDAG &DAG) const { + JumpTableSDNode *JT = cast(Op); + SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()); + Result.getNode()->setDebugLoc(JT->getDebugLoc()); + return Result; +} + /// getPostIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if this node can be /// combined with a load / store to form a post-indexed load / store. diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h index 878b207..85a861e 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.h +++ b/lib/Target/MSP430/MSP430ISelLowering.h @@ -93,6 +93,7 @@ namespace llvm { SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; TargetLowering::ConstraintType -- cgit v1.1 From db8e0bbedb46c9f781f8a32728b1019f34089ed8 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Mon, 1 Jul 2013 20:31:44 +0000 Subject: [mips] Increase the number of floating point control registers available to 32. Create a dedicated register class for floating point condition code registers and move FCC0 from register class CCR to the new register class. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185373 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsRegisterInfo.td | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index 3687084..a5320bb 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -248,8 +248,9 @@ let Namespace = "Mips" in { def LO64 : RegisterWithSubRegs<"lo", [LO]>; } - // Status flags register - def FCR31 : Register<"31">; + // FP control registers. + foreach I = 0-31 in + def FCR#I : MipsReg<#I, ""#I>; // fcc0 register def FCC0 : MipsReg<0, "fcc0">; @@ -357,8 +358,12 @@ def AFGR64 : RegisterClass<"Mips", [f64], 64, (add def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)>; -// Condition Register for floating point operations -def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31,FCC0)>, Unallocatable; +// FP control registers. +def CCR : RegisterClass<"Mips", [i32], 32, (sequence "FCR%u", 0, 31)>, + Unallocatable; + +// FP condition code registers. +def FCC : RegisterClass<"Mips", [i32], 32, (add FCC0)>, Unallocatable; // Hi/Lo Registers def LORegs : RegisterClass<"Mips", [i32], 32, (add LO)>; -- cgit v1.1 From 3bd2b92267df204c5633329611cc7ae3e1c11834 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Mon, 1 Jul 2013 20:39:50 +0000 Subject: [PowerPC] Also add "msync" alias This adds an alias for "msync" (which is used on Book E systems instead of "sync"). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185375 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCInstrInfo.td | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 5c2c59f..d535deb 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -2286,6 +2286,7 @@ class PPCAsmPseudo def : InstAlias<"sc", (SC 0)>; def : InstAlias<"sync", (SYNC 0)>; +def : InstAlias<"msync", (SYNC 0)>; def : InstAlias<"lwsync", (SYNC 1)>; def : InstAlias<"ptesync", (SYNC 2)>; -- cgit v1.1 From 5112243aec9486a669d44b72e6648e8a920c9931 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Mon, 1 Jul 2013 20:39:53 +0000 Subject: [mips] Reverse the order of source operands of shift and rotate instructions that have three register operands. No intended functionality changes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185376 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 14 +++++++------- lib/Target/Mips/MipsInstrInfo.td | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index a58f177..bb24b8a 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -1084,9 +1084,9 @@ MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI, BuildMI(BB, DL, TII->get(Mips::ORi), MaskUpper) .addReg(Mips::ZERO).addImm(MaskImm); BuildMI(BB, DL, TII->get(Mips::SLLV), Mask) - .addReg(ShiftAmt).addReg(MaskUpper); + .addReg(MaskUpper).addReg(ShiftAmt); BuildMI(BB, DL, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask); - BuildMI(BB, DL, TII->get(Mips::SLLV), Incr2).addReg(ShiftAmt).addReg(Incr); + BuildMI(BB, DL, TII->get(Mips::SLLV), Incr2).addReg(Incr).addReg(ShiftAmt); // atomic.load.binop // loopMBB: @@ -1147,7 +1147,7 @@ MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI, BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal1) .addReg(OldVal).addReg(Mask); BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes) - .addReg(ShiftAmt).addReg(MaskedOldVal1); + .addReg(MaskedOldVal1).addReg(ShiftAmt); BuildMI(BB, DL, TII->get(Mips::SLL), SllRes) .addReg(SrlRes).addImm(ShiftImm); BuildMI(BB, DL, TII->get(Mips::SRA), Dest) @@ -1334,16 +1334,16 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI, BuildMI(BB, DL, TII->get(Mips::ORi), MaskUpper) .addReg(Mips::ZERO).addImm(MaskImm); BuildMI(BB, DL, TII->get(Mips::SLLV), Mask) - .addReg(ShiftAmt).addReg(MaskUpper); + .addReg(MaskUpper).addReg(ShiftAmt); BuildMI(BB, DL, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask); BuildMI(BB, DL, TII->get(Mips::ANDi), MaskedCmpVal) .addReg(CmpVal).addImm(MaskImm); BuildMI(BB, DL, TII->get(Mips::SLLV), ShiftedCmpVal) - .addReg(ShiftAmt).addReg(MaskedCmpVal); + .addReg(MaskedCmpVal).addReg(ShiftAmt); BuildMI(BB, DL, TII->get(Mips::ANDi), MaskedNewVal) .addReg(NewVal).addImm(MaskImm); BuildMI(BB, DL, TII->get(Mips::SLLV), ShiftedNewVal) - .addReg(ShiftAmt).addReg(MaskedNewVal); + .addReg(MaskedNewVal).addReg(ShiftAmt); // loop1MBB: // ll oldval,0(alginedaddr) @@ -1379,7 +1379,7 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI, int64_t ShiftImm = (Size == 1) ? 24 : 16; BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes) - .addReg(ShiftAmt).addReg(MaskedOldVal0); + .addReg(MaskedOldVal0).addReg(ShiftAmt); BuildMI(BB, DL, TII->get(Mips::SLL), SllRes) .addReg(SrlRes).addImm(ShiftImm); BuildMI(BB, DL, TII->get(Mips::SRA), Dest) diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index dc3e4be..9279bd4 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -421,7 +421,7 @@ class shift_rotate_imm: - InstSE<(outs RC:$rd), (ins CPURegsOpnd:$rs, RC:$rt), + InstSE<(outs RC:$rd), (ins RC:$rt, CPURegsOpnd:$rs), !strconcat(opstr, "\t$rd, $rt, $rs"), [(set RC:$rd, (OpNode RC:$rt, CPURegsOpnd:$rs))], IIAlu, FrmR, opstr>; -- cgit v1.1 From e29e2afc738348c74966ed81b3568779247c9fbd Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Mon, 1 Jul 2013 20:49:23 +0000 Subject: [ARMAsmParser] Sort the ARM register lists based on the encoding value, not the tablegen enum values. This should be the last fix due to fallout from r185094. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185379 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 38 +++++++++++++++++++------------ 1 file changed, 23 insertions(+), 15 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index ba2bf8e..8595ce3 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -2281,21 +2281,24 @@ public: } static ARMOperand * - CreateRegList(const SmallVectorImpl > &Regs, + CreateRegList(SmallVectorImpl > &Regs, SMLoc StartLoc, SMLoc EndLoc) { + assert (Regs.size() > 0 && "RegList contains no registers?"); KindTy Kind = k_RegisterList; - if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Regs.front().first)) + if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Regs.front().second)) Kind = k_DPRRegisterList; else if (ARMMCRegisterClasses[ARM::SPRRegClassID]. - contains(Regs.front().first)) + contains(Regs.front().second)) Kind = k_SPRRegisterList; + // Sort based on the register encoding values. + array_pod_sort(Regs.begin(), Regs.end()); + ARMOperand *Op = new ARMOperand(Kind); - for (SmallVectorImpl >::const_iterator + for (SmallVectorImpl >::const_iterator I = Regs.begin(), E = Regs.end(); I != E; ++I) - Op->Registers.push_back(I->first); - array_pod_sort(Op->Registers.begin(), Op->Registers.end()); + Op->Registers.push_back(I->second); Op->StartLoc = StartLoc; Op->EndLoc = EndLoc; return Op; @@ -2975,12 +2978,14 @@ parseRegisterList(SmallVectorImpl &Operands) { // The reglist instructions have at most 16 registers, so reserve // space for that many. - SmallVector, 16> Registers; + int EReg = 0; + SmallVector, 16> Registers; // Allow Q regs and just interpret them as the two D sub-registers. if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { Reg = getDRegFromQReg(Reg); - Registers.push_back(std::pair(Reg, RegLoc)); + EReg = MRI->getEncodingValue(Reg); + Registers.push_back(std::pair(EReg, Reg)); ++Reg; } const MCRegisterClass *RC; @@ -2994,7 +2999,8 @@ parseRegisterList(SmallVectorImpl &Operands) { return Error(RegLoc, "invalid register in register list"); // Store the register. - Registers.push_back(std::pair(Reg, RegLoc)); + EReg = MRI->getEncodingValue(Reg); + Registers.push_back(std::pair(EReg, Reg)); // This starts immediately after the first register token in the list, // so we can see either a comma or a minus (range separator) as a legal @@ -3024,7 +3030,8 @@ parseRegisterList(SmallVectorImpl &Operands) { // Add all the registers in the range to the register list. while (Reg != EndReg) { Reg = getNextRegister(Reg); - Registers.push_back(std::pair(Reg, RegLoc)); + EReg = MRI->getEncodingValue(Reg); + Registers.push_back(std::pair(EReg, Reg)); } continue; } @@ -3057,14 +3064,15 @@ parseRegisterList(SmallVectorImpl &Operands) { continue; } // VFP register lists must also be contiguous. - // It's OK to use the enumeration values directly here rather, as the - // VFP register classes have the enum sorted properly. if (RC != &ARMMCRegisterClasses[ARM::GPRRegClassID] && Reg != OldReg + 1) return Error(RegLoc, "non-contiguous register range"); - Registers.push_back(std::pair(Reg, RegLoc)); - if (isQReg) - Registers.push_back(std::pair(++Reg, RegLoc)); + EReg = MRI->getEncodingValue(Reg); + Registers.push_back(std::pair(EReg, Reg)); + if (isQReg) { + EReg = MRI->getEncodingValue(++Reg); + Registers.push_back(std::pair(EReg, Reg)); + } } if (Parser.getTok().isNot(AsmToken::RCurly)) -- cgit v1.1 From c38c1d135cb9d617254c396c22949baca024dd35 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Mon, 1 Jul 2013 20:52:27 +0000 Subject: Index: test/CodeGen/PowerPC/reloc-align.ll =================================================================== --- test/CodeGen/PowerPC/reloc-align.ll (revision 0) +++ test/CodeGen/PowerPC/reloc-align.ll (revision 0) @@ -0,0 +1,34 @@ +; RUN: llc -mcpu=pwr7 -O1 < %s | FileCheck %s + +; This test verifies that the peephole optimization of address accesses +; does not produce a load or store with a relocation that can't be +; satisfied for a given instruction encoding. Reduced from a test supplied +; by Hal Finkel. + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +%struct.S1 = type { [8 x i8] } + +@main.l_1554 = internal global { i8, i8, i8, i8, i8, i8, i8, i8 } { i8 -1, i8 -6, i8 57, i8 62, i8 -48, i8 0, i8 58, i8 80 }, align 1 + +; Function Attrs: nounwind readonly +define signext i32 @main() #0 { +entry: + %call = tail call fastcc signext i32 @func_90(%struct.S1* byval bitcast ({ i8, i8, i8, i8, i8, i8, i8, i8 }* @main.l_1554 to %struct.S1*)) +; CHECK-NOT: ld {{[0-9]+}}, main.l_1554@toc@l + ret i32 %call +} + +; Function Attrs: nounwind readonly +define internal fastcc signext i32 @func_90(%struct.S1* byval nocapture %p_91) #0 { +entry: + %0 = bitcast %struct.S1* %p_91 to i64* + %bf.load = load i64* %0, align 1 + %bf.shl = shl i64 %bf.load, 26 + %bf.ashr = ashr i64 %bf.shl, 54 + %bf.cast = trunc i64 %bf.ashr to i32 + ret i32 %bf.cast +} + +attributes #0 = { nounwind readonly "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } Index: lib/Target/PowerPC/PPCAsmPrinter.cpp =================================================================== --- lib/Target/PowerPC/PPCAsmPrinter.cpp (revision 185327) +++ lib/Target/PowerPC/PPCAsmPrinter.cpp (working copy) @@ -679,7 +679,26 @@ void PPCAsmPrinter::EmitInstruction(const MachineI OutStreamer.EmitRawText(StringRef("\tmsync")); return; } + break; + case PPC::LD: + case PPC::STD: + case PPC::LWA: { + // Verify alignment is legal, so we don't create relocations + // that can't be supported. + // FIXME: This test is currently disabled for Darwin. The test + // suite shows a handful of test cases that fail this check for + // Darwin. Those need to be investigated before this sanity test + // can be enabled for those subtargets. + if (!Subtarget.isDarwin()) { + unsigned OpNum = (MI->getOpcode() == PPC::STD) ? 2 : 1; + const MachineOperand &MO = MI->getOperand(OpNum); + if (MO.isGlobal() && MO.getGlobal()->getAlignment() < 4) + llvm_unreachable("Global must be word-aligned for LD, STD, LWA!"); + } + // Now process the instruction normally. + break; } + } LowerPPCMachineInstrToMCInst(MI, TmpInst, *this); OutStreamer.EmitInstruction(TmpInst); Index: lib/Target/PowerPC/PPCISelDAGToDAG.cpp =================================================================== --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp (revision 185327) +++ lib/Target/PowerPC/PPCISelDAGToDAG.cpp (working copy) @@ -1530,6 +1530,14 @@ void PPCDAGToDAGISel::PostprocessISelDAG() { if (GlobalAddressSDNode *GA = dyn_cast(ImmOpnd)) { SDLoc dl(GA); const GlobalValue *GV = GA->getGlobal(); + // We can't perform this optimization for data whose alignment + // is insufficient for the instruction encoding. + if (GV->getAlignment() < 4 && + (StorageOpcode == PPC::LD || StorageOpcode == PPC::STD || + StorageOpcode == PPC::LWA)) { + DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n"); + continue; + } ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags); } else if (ConstantPoolSDNode *CP = dyn_cast(ImmOpnd)) { git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185380 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCAsmPrinter.cpp | 19 +++++++++++++++++++ lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 8 ++++++++ 2 files changed, 27 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 6af3072..8d350a4 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -679,6 +679,25 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitRawText(StringRef("\tmsync")); return; } + break; + case PPC::LD: + case PPC::STD: + case PPC::LWA: { + // Verify alignment is legal, so we don't create relocations + // that can't be supported. + // FIXME: This test is currently disabled for Darwin. The test + // suite shows a handful of test cases that fail this check for + // Darwin. Those need to be investigated before this sanity test + // can be enabled for those subtargets. + if (!Subtarget.isDarwin()) { + unsigned OpNum = (MI->getOpcode() == PPC::STD) ? 2 : 1; + const MachineOperand &MO = MI->getOperand(OpNum); + if (MO.isGlobal() && MO.getGlobal()->getAlignment() < 4) + llvm_unreachable("Global must be word-aligned for LD, STD, LWA!"); + } + // Now process the instruction normally. + break; + } } LowerPPCMachineInstrToMCInst(MI, TmpInst, *this); diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index f8d990c..cc4478b 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -1530,6 +1530,14 @@ void PPCDAGToDAGISel::PostprocessISelDAG() { if (GlobalAddressSDNode *GA = dyn_cast(ImmOpnd)) { SDLoc dl(GA); const GlobalValue *GV = GA->getGlobal(); + // We can't perform this optimization for data whose alignment + // is insufficient for the instruction encoding. + if (GV->getAlignment() < 4 && + (StorageOpcode == PPC::LD || StorageOpcode == PPC::STD || + StorageOpcode == PPC::LWA)) { + DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n"); + continue; + } ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags); } else if (ConstantPoolSDNode *CP = dyn_cast(ImmOpnd)) { -- cgit v1.1 From bde84a96ea67737e275d2adee2da86a0fa875785 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Mon, 1 Jul 2013 21:31:10 +0000 Subject: Add a newline. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185385 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/Processors.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td index a0735d4..4631c04 100644 --- a/lib/Target/R600/Processors.td +++ b/lib/Target/R600/Processors.td @@ -50,4 +50,4 @@ def : Proc<"oland", SI_Itin, [FeatureSouthernIslands]>; def : Proc<"hainan", SI_Itin, [FeatureSouthernIslands]>; def : Proc<"bonaire", SI_Itin, [FeatureSouthernIslands]>; def : Proc<"kabini", SI_Itin, [FeatureSouthernIslands]>; -def : Proc<"kaveri", SI_Itin, [FeatureSouthernIslands]>; \ No newline at end of file +def : Proc<"kaveri", SI_Itin, [FeatureSouthernIslands]>; -- cgit v1.1 From 1307d8300f6fe97059998480c42b44faefbc9b99 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Mon, 1 Jul 2013 21:40:54 +0000 Subject: [PowerPC] Support all condition register logical instructions This adds support for all missing condition register logical instructions and extended mnemonics to the asm parser. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185387 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCInstrInfo.td | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index d535deb..096fd65 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -1776,15 +1776,37 @@ def MCRF : XLForm_3<19, 0, (outs crrc:$BF), (ins crrc:$BFA), "mcrf $BF, $BFA", BrMCR>, PPC970_DGroup_First, PPC970_Unit_CRU; +def CRAND : XLForm_1<19, 257, (outs crbitrc:$CRD), + (ins crbitrc:$CRA, crbitrc:$CRB), + "crand $CRD, $CRA, $CRB", BrCR, []>; + +def CRNAND : XLForm_1<19, 225, (outs crbitrc:$CRD), + (ins crbitrc:$CRA, crbitrc:$CRB), + "crnand $CRD, $CRA, $CRB", BrCR, []>; + +def CROR : XLForm_1<19, 449, (outs crbitrc:$CRD), + (ins crbitrc:$CRA, crbitrc:$CRB), + "cror $CRD, $CRA, $CRB", BrCR, []>; + +def CRXOR : XLForm_1<19, 193, (outs crbitrc:$CRD), + (ins crbitrc:$CRA, crbitrc:$CRB), + "crxor $CRD, $CRA, $CRB", BrCR, []>; + +def CRNOR : XLForm_1<19, 33, (outs crbitrc:$CRD), + (ins crbitrc:$CRA, crbitrc:$CRB), + "crnor $CRD, $CRA, $CRB", BrCR, []>; + def CREQV : XLForm_1<19, 289, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "creqv $CRD, $CRA, $CRB", BrCR, - []>; + "creqv $CRD, $CRA, $CRB", BrCR, []>; -def CROR : XLForm_1<19, 449, (outs crbitrc:$CRD), +def CRANDC : XLForm_1<19, 129, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "cror $CRD, $CRA, $CRB", BrCR, - []>; + "crandc $CRD, $CRA, $CRB", BrCR, []>; + +def CRORC : XLForm_1<19, 417, (outs crbitrc:$CRD), + (ins crbitrc:$CRA, crbitrc:$CRB), + "crorc $CRD, $CRA, $CRB", BrCR, []>; let isCodeGenOnly = 1 in { def CRSET : XLForm_1_ext<19, 289, (outs crbitrc:$dst), (ins), @@ -2294,6 +2316,11 @@ def : InstAlias<"wait", (WAIT 0)>; def : InstAlias<"waitrsv", (WAIT 1)>; def : InstAlias<"waitimpl", (WAIT 2)>; +def : InstAlias<"crset $bx", (CREQV crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>; +def : InstAlias<"crclr $bx", (CRXOR crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>; +def : InstAlias<"crmove $bx, $by", (CROR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>; +def : InstAlias<"crnot $bx, $by", (CRNOR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>; + def : InstAlias<"xnop", (XORI R0, R0, 0)>; def : InstAlias<"mr $rA, $rB", (OR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>; -- cgit v1.1 From a35ae962918258207f9092ccbdf4fffa1f2c70f1 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Mon, 1 Jul 2013 21:45:25 +0000 Subject: PR16493: DebugInfo with TLS on PPC crashing due to invalid relocation Restrict the current TLS support to X86 ELF for now. Test that we don't produce it on PPC & we can flesh that test case out with the right thing once someone implements it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185389 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86TargetObjectFile.cpp | 6 ++++++ lib/Target/X86/X86TargetObjectFile.h | 3 +++ 2 files changed, 9 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index 871dacd..a00e8d4 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -47,3 +47,9 @@ X86LinuxTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) { TargetLoweringObjectFileELF::Initialize(Ctx, TM); InitializeELF(TM.Options.UseInitArray); } + +const MCSymbolRefExpr * +X86LinuxTargetObjectFile::getDebugThreadLocalSymbol( + const MCSymbol *Sym) const { + return MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_DTPOFF, getContext()); +} diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h index 9d26d38..7baedd2 100644 --- a/lib/Target/X86/X86TargetObjectFile.h +++ b/lib/Target/X86/X86TargetObjectFile.h @@ -36,6 +36,9 @@ namespace llvm { /// and x86-64. class X86LinuxTargetObjectFile : public TargetLoweringObjectFileELF { virtual void Initialize(MCContext &Ctx, const TargetMachine &TM); + + /// \brief Describe a TLS variable address within debug info. + virtual const MCSymbolRefExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const; }; } // end namespace llvm -- cgit v1.1 From a785a7bf51726d3fbfb907beb3112eed6b850a3c Mon Sep 17 00:00:00 2001 From: Richard Trieu Date: Mon, 1 Jul 2013 23:06:23 +0000 Subject: Change if (cond) ... else llvm_unreachable("text") to assert(cond && "text") ... git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185392 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp index 065971e..bcfd9bb 100644 --- a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp +++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp @@ -199,12 +199,10 @@ void HexagonInstPrinter::printSymbol(const MCInst *MI, unsigned OpNo, const MCOperand& MO = MI->getOperand(OpNo); O << '#' << (hi? "HI": "LO") << '('; - if (MO.isImm()) { - O << '#'; - printOperand(MI, OpNo, O); - } else { - llvm_unreachable("Unknown symbol operand"); - printOperand(MI, OpNo, O); - } + + assert(MO.isImm() && "Unknown symbol operand"); + + O << '#'; + printOperand(MI, OpNo, O); O << ')'; } -- cgit v1.1 From 228e0afcfd0d5f167a95c6ddbec2c6a4a90b6d2b Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Mon, 1 Jul 2013 23:33:29 +0000 Subject: [PowerPC] Add support for TLS data relocations This adds support for TLS data relocations and modifiers: .quad target@dtpmod .quad target@tprel .quad target@dtprel Currently exploited by the asm parser only. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185394 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index c26b545..13cd099 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -312,6 +312,15 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_None: Type = ELF::R_PPC64_ADDR64; break; + case MCSymbolRefExpr::VK_PPC_DTPMOD: + Type = ELF::R_PPC64_DTPMOD64; + break; + case MCSymbolRefExpr::VK_PPC_TPREL: + Type = ELF::R_PPC64_TPREL64; + break; + case MCSymbolRefExpr::VK_PPC_DTPREL: + Type = ELF::R_PPC64_DTPREL64; + break; } break; case FK_Data_4: -- cgit v1.1 From a66aacf6d7e57b21dcd9e866d28749567cfba74b Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Tue, 2 Jul 2013 00:00:02 +0000 Subject: [mips] Add new InstrItinClasses for move from/to coprocessor instructions and floating point loads and stores. No changes in functionality. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185399 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips64InstrInfo.td | 22 +++++++------- lib/Target/Mips/MipsInstrFPU.td | 62 ++++++++++++++++++++------------------ lib/Target/Mips/MipsInstrInfo.td | 34 ++++++++++++--------- lib/Target/Mips/MipsSchedule.td | 8 ++++- 4 files changed, 70 insertions(+), 56 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index df717fe..b55679e 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -131,17 +131,17 @@ let Predicates = [HasMips64r2, HasStdEnc], let DecoderNamespace = "Mips64" in { /// Load and Store Instructions /// aligned -defm LB64 : LoadM<"lb", CPU64Regs, sextloadi8>, LW_FM<0x20>; -defm LBu64 : LoadM<"lbu", CPU64Regs, zextloadi8>, LW_FM<0x24>; -defm LH64 : LoadM<"lh", CPU64Regs, sextloadi16>, LW_FM<0x21>; -defm LHu64 : LoadM<"lhu", CPU64Regs, zextloadi16>, LW_FM<0x25>; -defm LW64 : LoadM<"lw", CPU64Regs, sextloadi32>, LW_FM<0x23>; -defm LWu64 : LoadM<"lwu", CPU64Regs, zextloadi32>, LW_FM<0x27>; -defm SB64 : StoreM<"sb", CPU64Regs, truncstorei8>, LW_FM<0x28>; -defm SH64 : StoreM<"sh", CPU64Regs, truncstorei16>, LW_FM<0x29>; -defm SW64 : StoreM<"sw", CPU64Regs, truncstorei32>, LW_FM<0x2b>; -defm LD : LoadM<"ld", CPU64Regs, load>, LW_FM<0x37>; -defm SD : StoreM<"sd", CPU64Regs, store>, LW_FM<0x3f>; +defm LB64 : LoadM<"lb", CPU64Regs, sextloadi8, IILoad>, LW_FM<0x20>; +defm LBu64 : LoadM<"lbu", CPU64Regs, zextloadi8, IILoad>, LW_FM<0x24>; +defm LH64 : LoadM<"lh", CPU64Regs, sextloadi16, IILoad>, LW_FM<0x21>; +defm LHu64 : LoadM<"lhu", CPU64Regs, zextloadi16, IILoad>, LW_FM<0x25>; +defm LW64 : LoadM<"lw", CPU64Regs, sextloadi32, IILoad>, LW_FM<0x23>; +defm LWu64 : LoadM<"lwu", CPU64Regs, zextloadi32, IILoad>, LW_FM<0x27>; +defm SB64 : StoreM<"sb", CPU64Regs, truncstorei8, IIStore>, LW_FM<0x28>; +defm SH64 : StoreM<"sh", CPU64Regs, truncstorei16, IIStore>, LW_FM<0x29>; +defm SW64 : StoreM<"sw", CPU64Regs, truncstorei32, IIStore>, LW_FM<0x2b>; +defm LD : LoadM<"ld", CPU64Regs, load, IILoad>, LW_FM<0x37>; +defm SD : StoreM<"sd", CPU64Regs, store, IIStore>, LW_FM<0x3f>; /// load/store left/right defm LWL64 : LoadLeftRightM<"lwl", MipsLWL, CPU64Regs>, LW_FM<0x22>; diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index 6b2b859..c2acec1 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -289,10 +289,12 @@ defm FSQRT : ABSS_M<"sqrt.d", IIFsqrtDouble, fsqrt>, ABSS_FM<0x4, 17>; /// Move Control Registers From/To CPU Registers def CFC1 : MFC1_FT_CCR<"cfc1", CPURegs, CCROpnd, IIFmove>, MFC1_FM<2>; def CTC1 : MTC1_FT_CCR<"ctc1", CCROpnd, CPURegs, IIFmove>, MFC1_FM<6>; -def MFC1 : MFC1_FT<"mfc1", CPURegs, FGR32, IIFmove, bitconvert>, MFC1_FM<0>; -def MTC1 : MTC1_FT<"mtc1", FGR32, CPURegs, IIFmove, bitconvert>, MFC1_FM<4>; -def DMFC1 : MFC1_FT<"dmfc1", CPU64Regs, FGR64, IIFmove, bitconvert>, MFC1_FM<1>; -def DMTC1 : MTC1_FT<"dmtc1", FGR64, CPU64Regs, IIFmove, bitconvert>, MFC1_FM<5>; +def MFC1 : MFC1_FT<"mfc1", CPURegs, FGR32, IIFmoveC1, bitconvert>, MFC1_FM<0>; +def MTC1 : MTC1_FT<"mtc1", FGR32, CPURegs, IIFmoveC1, bitconvert>, MFC1_FM<4>; +def DMFC1 : MFC1_FT<"dmfc1", CPU64Regs, FGR64, IIFmoveC1, bitconvert>, + MFC1_FM<1>; +def DMTC1 : MTC1_FT<"dmtc1", FGR64, CPU64Regs, IIFmoveC1, bitconvert>, + MFC1_FM<5>; def FMOV_S : ABSS_FT<"mov.s", FGR32, FGR32, IIFmove>, ABSS_FM<0x6, 16>; def FMOV_D32 : ABSS_FT<"mov.d", AFGR64, AFGR64, IIFmove>, ABSS_FM<0x6, 17>, @@ -304,86 +306,86 @@ def FMOV_D64 : ABSS_FT<"mov.d", FGR64, FGR64, IIFmove>, ABSS_FM<0x6, 17>, /// Floating Point Memory Instructions let Predicates = [IsN64, HasStdEnc], DecoderNamespace = "Mips64" in { - def LWC1_P8 : LW_FT<"lwc1", FGR32RegsOpnd, IILoad, mem64, load>, LW_FM<0x31>; - def SWC1_P8 : SW_FT<"swc1", FGR32RegsOpnd, IIStore, mem64, store>, + def LWC1_P8 : LW_FT<"lwc1", FGR32RegsOpnd, IIFLoad, mem64, load>, LW_FM<0x31>; + def SWC1_P8 : SW_FT<"swc1", FGR32RegsOpnd, IIFStore, mem64, store>, LW_FM<0x39>; - def LDC164_P8 : LW_FT<"ldc1", FGR64RegsOpnd, IILoad, mem64, load>, + def LDC164_P8 : LW_FT<"ldc1", FGR64RegsOpnd, IIFLoad, mem64, load>, LW_FM<0x35> { let isCodeGenOnly =1; } - def SDC164_P8 : SW_FT<"sdc1", FGR64RegsOpnd, IIStore, mem64, store>, + def SDC164_P8 : SW_FT<"sdc1", FGR64RegsOpnd, IIFStore, mem64, store>, LW_FM<0x3d> { let isCodeGenOnly =1; } } let Predicates = [NotN64, HasStdEnc] in { - def LWC1 : LW_FT<"lwc1", FGR32RegsOpnd, IILoad, mem, load>, LW_FM<0x31>; - def SWC1 : SW_FT<"swc1", FGR32RegsOpnd, IIStore, mem, store>, LW_FM<0x39>; + def LWC1 : LW_FT<"lwc1", FGR32RegsOpnd, IIFLoad, mem, load>, LW_FM<0x31>; + def SWC1 : SW_FT<"swc1", FGR32RegsOpnd, IIFStore, mem, store>, LW_FM<0x39>; } let Predicates = [NotN64, HasMips64, HasStdEnc], DecoderNamespace = "Mips64" in { - def LDC164 : LW_FT<"ldc1", FGR64RegsOpnd, IILoad, mem, load>, LW_FM<0x35>; - def SDC164 : SW_FT<"sdc1", FGR64RegsOpnd, IIStore, mem, store>, LW_FM<0x3d>; + def LDC164 : LW_FT<"ldc1", FGR64RegsOpnd, IIFLoad, mem, load>, LW_FM<0x35>; + def SDC164 : SW_FT<"sdc1", FGR64RegsOpnd, IIFStore, mem, store>, LW_FM<0x3d>; } let Predicates = [NotN64, NotMips64, HasStdEnc] in { let isPseudo = 1, isCodeGenOnly = 1 in { - def PseudoLDC1 : LW_FT<"", AFGR64RegsOpnd, IILoad, mem, load>; - def PseudoSDC1 : SW_FT<"", AFGR64RegsOpnd, IIStore, mem, store>; + def PseudoLDC1 : LW_FT<"", AFGR64RegsOpnd, IIFLoad, mem, load>; + def PseudoSDC1 : SW_FT<"", AFGR64RegsOpnd, IIFStore, mem, store>; } - def LDC1 : LW_FT<"ldc1", AFGR64RegsOpnd, IILoad, mem>, LW_FM<0x35>; - def SDC1 : SW_FT<"sdc1", AFGR64RegsOpnd, IIStore, mem>, LW_FM<0x3d>; + def LDC1 : LW_FT<"ldc1", AFGR64RegsOpnd, IIFLoad, mem>, LW_FM<0x35>; + def SDC1 : SW_FT<"sdc1", AFGR64RegsOpnd, IIFStore, mem>, LW_FM<0x3d>; } // Indexed loads and stores. let Predicates = [HasFPIdx, HasStdEnc] in { - def LWXC1 : LWXC1_FT<"lwxc1", FGR32RegsOpnd, CPURegsOpnd, IILoad, load>, + def LWXC1 : LWXC1_FT<"lwxc1", FGR32RegsOpnd, CPURegsOpnd, IIFLoad, load>, LWXC1_FM<0>; - def SWXC1 : SWXC1_FT<"swxc1", FGR32RegsOpnd, CPURegsOpnd, IIStore, store>, + def SWXC1 : SWXC1_FT<"swxc1", FGR32RegsOpnd, CPURegsOpnd, IIFStore, store>, SWXC1_FM<8>; } let Predicates = [HasMips32r2, NotMips64, HasStdEnc] in { - def LDXC1 : LWXC1_FT<"ldxc1", AFGR64RegsOpnd, CPURegsOpnd, IILoad, load>, + def LDXC1 : LWXC1_FT<"ldxc1", AFGR64RegsOpnd, CPURegsOpnd, IIFLoad, load>, LWXC1_FM<1>; - def SDXC1 : SWXC1_FT<"sdxc1", AFGR64RegsOpnd, CPURegsOpnd, IIStore, store>, + def SDXC1 : SWXC1_FT<"sdxc1", AFGR64RegsOpnd, CPURegsOpnd, IIFStore, store>, SWXC1_FM<9>; } let Predicates = [HasMips64, NotN64, HasStdEnc], DecoderNamespace="Mips64" in { - def LDXC164 : LWXC1_FT<"ldxc1", FGR64RegsOpnd, CPURegsOpnd, IILoad, load>, + def LDXC164 : LWXC1_FT<"ldxc1", FGR64RegsOpnd, CPURegsOpnd, IIFLoad, load>, LWXC1_FM<1>; - def SDXC164 : SWXC1_FT<"sdxc1", FGR64RegsOpnd, CPURegsOpnd, IIStore, store>, + def SDXC164 : SWXC1_FT<"sdxc1", FGR64RegsOpnd, CPURegsOpnd, IIFStore, store>, SWXC1_FM<9>; } // n64 let Predicates = [IsN64, HasStdEnc], isCodeGenOnly=1 in { - def LWXC1_P8 : LWXC1_FT<"lwxc1", FGR32RegsOpnd, CPU64RegsOpnd, IILoad, load>, + def LWXC1_P8 : LWXC1_FT<"lwxc1", FGR32RegsOpnd, CPU64RegsOpnd, IIFLoad, load>, LWXC1_FM<0>; - def LDXC164_P8 : LWXC1_FT<"ldxc1", FGR64RegsOpnd, CPU64RegsOpnd, IILoad, + def LDXC164_P8 : LWXC1_FT<"ldxc1", FGR64RegsOpnd, CPU64RegsOpnd, IIFLoad, load>, LWXC1_FM<1>; - def SWXC1_P8 : SWXC1_FT<"swxc1", FGR32RegsOpnd, CPU64RegsOpnd, IIStore, + def SWXC1_P8 : SWXC1_FT<"swxc1", FGR32RegsOpnd, CPU64RegsOpnd, IIFStore, store>, SWXC1_FM<8>; - def SDXC164_P8 : SWXC1_FT<"sdxc1", FGR64RegsOpnd, CPU64RegsOpnd, IIStore, + def SDXC164_P8 : SWXC1_FT<"sdxc1", FGR64RegsOpnd, CPU64RegsOpnd, IIFStore, store>, SWXC1_FM<9>; } // Load/store doubleword indexed unaligned. let Predicates = [NotMips64, HasStdEnc] in { - def LUXC1 : LWXC1_FT<"luxc1", AFGR64RegsOpnd, CPURegsOpnd, IILoad>, + def LUXC1 : LWXC1_FT<"luxc1", AFGR64RegsOpnd, CPURegsOpnd, IIFLoad>, LWXC1_FM<0x5>; - def SUXC1 : SWXC1_FT<"suxc1", AFGR64RegsOpnd, CPURegsOpnd, IIStore>, + def SUXC1 : SWXC1_FT<"suxc1", AFGR64RegsOpnd, CPURegsOpnd, IIFStore>, SWXC1_FM<0xd>; } let Predicates = [HasMips64, HasStdEnc], DecoderNamespace="Mips64" in { - def LUXC164 : LWXC1_FT<"luxc1", FGR64RegsOpnd, CPURegsOpnd, IILoad>, + def LUXC164 : LWXC1_FT<"luxc1", FGR64RegsOpnd, CPURegsOpnd, IIFLoad>, LWXC1_FM<0x5>; - def SUXC164 : SWXC1_FT<"suxc1", FGR64RegsOpnd, CPURegsOpnd, IIStore>, + def SUXC164 : SWXC1_FT<"suxc1", FGR64RegsOpnd, CPURegsOpnd, IIFStore>, SWXC1_FM<0xd>; } diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 9279bd4..d2164f7 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -443,7 +443,8 @@ class FMem op, dag outs, dag ins, string asmstr, list pattern, // Memory Load/Store class Load : + InstrItinClass Itin, Operand MemOpnd, ComplexPattern Addr, + string ofsuffix> : InstSE<(outs RC:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), [(set RC:$rt, (OpNode Addr:$addr))], NoItinerary, FrmI, !strconcat(opstr, ofsuffix)> { @@ -453,7 +454,8 @@ class Load : + InstrItinClass Itin, Operand MemOpnd, ComplexPattern Addr, + string ofsuffix> : InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), [(OpNode RC:$rt, Addr:$addr)], NoItinerary, FrmI, !strconcat(opstr, ofsuffix)> { @@ -463,10 +465,11 @@ class Store { - def NAME : Load, + def NAME : Load, Requires<[NotN64, HasStdEnc]>; - def _P8 : Load, + def _P8 : Load, Requires<[IsN64, HasStdEnc]> { let DecoderNamespace = "Mips64"; let isCodeGenOnly = 1; @@ -475,10 +478,11 @@ multiclass LoadM { - def NAME : Store, + def NAME : Store, Requires<[NotN64, HasStdEnc]>; - def _P8 : Store, + def _P8 : Store, Requires<[IsN64, HasStdEnc]> { let DecoderNamespace = "Mips64"; let isCodeGenOnly = 1; @@ -917,14 +921,16 @@ let Predicates = [HasMips32r2, HasStdEnc] in { /// Load and Store Instructions /// aligned -defm LB : LoadM<"lb", CPURegs, sextloadi8>, MMRel, LW_FM<0x20>; -defm LBu : LoadM<"lbu", CPURegs, zextloadi8, addrDefault>, MMRel, LW_FM<0x24>; -defm LH : LoadM<"lh", CPURegs, sextloadi16, addrDefault>, MMRel, LW_FM<0x21>; -defm LHu : LoadM<"lhu", CPURegs, zextloadi16>, MMRel, LW_FM<0x25>; -defm LW : LoadM<"lw", CPURegs, load, addrDefault>, MMRel, LW_FM<0x23>; -defm SB : StoreM<"sb", CPURegs, truncstorei8>, MMRel, LW_FM<0x28>; -defm SH : StoreM<"sh", CPURegs, truncstorei16>, MMRel, LW_FM<0x29>; -defm SW : StoreM<"sw", CPURegs, store>, MMRel, LW_FM<0x2b>; +defm LB : LoadM<"lb", CPURegs, sextloadi8, IILoad>, MMRel, LW_FM<0x20>; +defm LBu : LoadM<"lbu", CPURegs, zextloadi8, IILoad, addrDefault>, MMRel, + LW_FM<0x24>; +defm LH : LoadM<"lh", CPURegs, sextloadi16, IILoad, addrDefault>, MMRel, + LW_FM<0x21>; +defm LHu : LoadM<"lhu", CPURegs, zextloadi16, IILoad>, MMRel, LW_FM<0x25>; +defm LW : LoadM<"lw", CPURegs, load, IILoad, addrDefault>, MMRel, LW_FM<0x23>; +defm SB : StoreM<"sb", CPURegs, truncstorei8, IIStore>, MMRel, LW_FM<0x28>; +defm SH : StoreM<"sh", CPURegs, truncstorei16, IIStore>, MMRel, LW_FM<0x29>; +defm SW : StoreM<"sw", CPURegs, store, IIStore>, MMRel, LW_FM<0x2b>; /// load/store left/right defm LWL : LoadLeftRightM<"lwl", MipsLWL, CPURegs>, LW_FM<0x22>; diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td index 1add02f..bf6319d 100644 --- a/lib/Target/Mips/MipsSchedule.td +++ b/lib/Target/Mips/MipsSchedule.td @@ -35,6 +35,9 @@ def IIFdivDouble : InstrItinClass; def IIFsqrtSingle : InstrItinClass; def IIFsqrtDouble : InstrItinClass; def IIFrecipFsqrtStep : InstrItinClass; +def IIFLoad : InstrItinClass; +def IIFStore : InstrItinClass; +def IIFmoveC1 : InstrItinClass; def IIPseudo : InstrItinClass; //===----------------------------------------------------------------------===// @@ -59,5 +62,8 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [ InstrItinData]>, InstrItinData]>, InstrItinData]>, - InstrItinData]> + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]> ]>; -- cgit v1.1 From 75dd57a8f0407be32551cf695e63a106dd051a27 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Tue, 2 Jul 2013 03:39:34 +0000 Subject: Cleanup PPC Altivec registers in CSR lists and improve VRSAVE handling There are a couple of (small) related changes here: 1. The printed name of the VRSAVE register has been changed from VRsave to vrsave in order to match the name accepted by GNU binutils. 2. Support for parsing vrsave has been added to the asm parser (it seems that there was no test case specifically covering this code, so I've added one). 3. The list of Altivec registers, which was common to all calling conventions, has been separated out. This allows us to define the base CSR lists, and then lists for each ABI with Altivec included. This allows SjLj, for example, to work correctly on non-Altivec targets without using unnatural definitions of the NoRegs CSR list. 4. VRSAVE is now always reserved on non-Darwin targets and all Altivec registers are reserved when Altivec is disabled. With these changes, it is now possible to compile a function containing __builtin_unwind_init() on Linux/PPC64 with debugging information. This did not work previously because GNU binutils assumes that all .cfi_offset offsets will be 8-byte aligned on PPC64 (and errors out if you provide a non-8-byte-aligned offset). This is not true for the vrsave register, however, because this register is used only on Darwin, GCC does not bother printing a .cfi_offset entry for it (even though there is a slot in the stack frame for it as specified by the ABI). This change allows us to do the same: we will also not print .cfi_offset directives for vrsave. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185409 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 4 +++ lib/Target/PowerPC/PPCCallingConv.td | 39 +++++++++++--------- lib/Target/PowerPC/PPCRegisterInfo.cpp | 52 ++++++++++++++++++--------- lib/Target/PowerPC/PPCRegisterInfo.td | 2 +- 4 files changed, 62 insertions(+), 35 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index cbe1321..7a654ea 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -752,6 +752,10 @@ MatchRegisterName(const AsmToken &Tok, unsigned &RegNo, int64_t &IntVal) { RegNo = isPPC64()? PPC::CTR8 : PPC::CTR; IntVal = 9; return false; + } else if (Name.equals_lower("vrsave")) { + RegNo = PPC::VRSAVE; + IntVal = 256; + return false; } else if (Name.substr(0, 1).equals_lower("r") && !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) { RegNo = isPPC64()? XRegs[IntVal] : RRegs[IntVal]; diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index c8a29a3..a584188 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -105,40 +105,45 @@ def CC_PPC32_SVR4_ByVal : CallingConv<[ CCCustom<"CC_PPC32_SVR4_Custom_Dummy"> ]>; +def CSR_Altivec : CalleeSavedRegs<(add V20, V21, V22, V23, V24, V25, V26, V27, + V28, V29, V30, V31)>; + def CSR_Darwin32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, F14, F15, F16, F17, F18, F19, F20, F21, F22, F23, F24, F25, F26, - F27, F28, F29, F30, F31, CR2, CR3, CR4, - V20, V21, V22, V23, V24, V25, V26, V27, - V28, V29, V30, V31)>; + F27, F28, F29, F30, F31, CR2, CR3, CR4 + )>; + +def CSR_Darwin32_Altivec : CalleeSavedRegs<(add CSR_Darwin32, CSR_Altivec)>; -def CSR_SVR432 : CalleeSavedRegs<(add R14, R15, R16, R17, R18, R19, R20, VRSAVE, +def CSR_SVR432 : CalleeSavedRegs<(add R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, F14, F15, F16, F17, F18, F19, F20, F21, F22, F23, F24, F25, F26, - F27, F28, F29, F30, F31, CR2, CR3, CR4, - V20, V21, V22, V23, V24, V25, V26, V27, - V28, V29, V30, V31)>; + F27, F28, F29, F30, F31, CR2, CR3, CR4 + )>; + +def CSR_SVR432_Altivec : CalleeSavedRegs<(add CSR_SVR432, CSR_Altivec)>; def CSR_Darwin64 : CalleeSavedRegs<(add X13, X14, X15, X16, X17, X18, X19, X20, X21, X22, X23, X24, X25, X26, X27, X28, X29, X30, X31, F14, F15, F16, F17, F18, F19, F20, F21, F22, F23, F24, F25, F26, - F27, F28, F29, F30, F31, CR2, CR3, CR4, - V20, V21, V22, V23, V24, V25, V26, V27, - V28, V29, V30, V31)>; + F27, F28, F29, F30, F31, CR2, CR3, CR4 + )>; -def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, VRSAVE, +def CSR_Darwin64_Altivec : CalleeSavedRegs<(add CSR_Darwin64, CSR_Altivec)>; + +def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, X21, X22, X23, X24, X25, X26, X27, X28, X29, X30, X31, F14, F15, F16, F17, F18, F19, F20, F21, F22, F23, F24, F25, F26, - F27, F28, F29, F30, F31, CR2, CR3, CR4, - V20, V21, V22, V23, V24, V25, V26, V27, - V28, V29, V30, V31)>; + F27, F28, F29, F30, F31, CR2, CR3, CR4 + )>; + -def CSR_NoRegs : CalleeSavedRegs<(add VRSAVE)>; -def CSR_NoRegs_Darwin : CalleeSavedRegs<(add)>; +def CSR_SVR464_Altivec : CalleeSavedRegs<(add CSR_SVR464, CSR_Altivec)>; -def CSR_NoRegs_Altivec : CalleeSavedRegs<(add (sequence "V%u", 0, 31), VRSAVE)>; +def CSR_NoRegs : CalleeSavedRegs<(add)>; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 96b5bb6..06788fe 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -91,32 +91,41 @@ PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) const uint16_t* PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { if (Subtarget.isDarwinABI()) - return Subtarget.isPPC64() ? CSR_Darwin64_SaveList : - CSR_Darwin32_SaveList; - - return Subtarget.isPPC64() ? CSR_SVR464_SaveList : CSR_SVR432_SaveList; + return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ? + CSR_Darwin64_Altivec_SaveList : + CSR_Darwin64_SaveList) : + (Subtarget.hasAltivec() ? + CSR_Darwin32_Altivec_SaveList : + CSR_Darwin32_SaveList); + + return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ? + CSR_SVR464_Altivec_SaveList : + CSR_SVR464_SaveList) : + (Subtarget.hasAltivec() ? + CSR_SVR432_Altivec_SaveList : + CSR_SVR432_SaveList); } const uint32_t* PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { if (Subtarget.isDarwinABI()) - return Subtarget.isPPC64() ? CSR_Darwin64_RegMask : - CSR_Darwin32_RegMask; - - return Subtarget.isPPC64() ? CSR_SVR464_RegMask : CSR_SVR432_RegMask; + return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ? + CSR_Darwin64_Altivec_RegMask : + CSR_Darwin64_RegMask) : + (Subtarget.hasAltivec() ? + CSR_Darwin32_Altivec_RegMask : + CSR_Darwin32_RegMask); + + return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ? + CSR_SVR464_Altivec_RegMask : + CSR_SVR464_RegMask) : + (Subtarget.hasAltivec() ? + CSR_SVR432_Altivec_RegMask : + CSR_SVR432_RegMask); } const uint32_t* PPCRegisterInfo::getNoPreservedMask() const { - // The naming here is inverted: The CSR_NoRegs_Altivec has the - // Altivec registers masked so that they're not saved and restored around - // instructions with this preserved mask. - - if (!Subtarget.hasAltivec()) - return CSR_NoRegs_Altivec_RegMask; - - if (Subtarget.isDarwin()) - return CSR_NoRegs_Darwin_RegMask; return CSR_NoRegs_RegMask; } @@ -145,6 +154,9 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::LR8); Reserved.set(PPC::RM); + if (!Subtarget.isDarwinABI() || !Subtarget.hasAltivec()) + Reserved.set(PPC::VRSAVE); + // The SVR4 ABI reserves r2 and r13 if (Subtarget.isSVR4ABI()) { Reserved.set(PPC::R2); // System-reserved register @@ -170,6 +182,12 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { if (PPCFI->needsFP(MF)) Reserved.set(PPC::R31); + // Reserve Altivec registers when Altivec is unavailable. + if (!Subtarget.hasAltivec()) + for (TargetRegisterClass::iterator I = PPC::VRRCRegClass.begin(), + IE = PPC::VRRCRegClass.end(); I != IE; ++I) + Reserved.set(*I); + return Reserved; } diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index b1b4f06..003e7c3 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -150,7 +150,7 @@ def CTR : SPR<9, "ctr">, DwarfRegNum<[-2, 66]>; def CTR8 : SPR<9, "ctr">, DwarfRegNum<[66, -2]>; // VRsave register -def VRSAVE: SPR<256, "VRsave">, DwarfRegNum<[109]>; +def VRSAVE: SPR<256, "vrsave">, DwarfRegNum<[109]>; // Carry bit. In the architecture this is really bit 0 of the XER register // (which really is SPR register 1); this is the only bit interesting to a -- cgit v1.1 From 0a39e264330c5f6eb9e5e9e60d276613985e178d Mon Sep 17 00:00:00 2001 From: Logan Chien Date: Tue, 2 Jul 2013 12:43:27 +0000 Subject: Fix ARM EHABI compact model 1 and 2 without handlerdata. According to ARM EHABI section 9.2, if the __aeabi_unwind_cpp_pr1() or __aeabi_unwind_cpp_pr2() is used, then the handler data must be emitted after the unwind opcodes. The handler data consists of several words, and should be terminated by zero. In case that the .handlerdata directive is not specified by the programmer, we should emit zero to terminate the handler data. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185422 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index dc3d945..e8b6a5a 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -204,7 +204,7 @@ private: void EmitPersonalityFixup(StringRef Name); void FlushPendingOffset(); - void FlushUnwindOpcodes(bool AllowCompactModel0); + void FlushUnwindOpcodes(bool NoHandlerData); void SwitchToEHSection(const char *Prefix, unsigned Type, unsigned Flags, SectionKind Kind, const MCSymbol &Fn); @@ -377,7 +377,7 @@ void ARMELFStreamer::FlushPendingOffset() { } } -void ARMELFStreamer::FlushUnwindOpcodes(bool AllowCompactModel0) { +void ARMELFStreamer::FlushUnwindOpcodes(bool NoHandlerData) { // Emit the unwind opcode to restore $sp. if (UsedFP) { const MCRegisterInfo *MRI = getContext().getRegisterInfo(); @@ -394,7 +394,7 @@ void ARMELFStreamer::FlushUnwindOpcodes(bool AllowCompactModel0) { // For compact model 0, we have to emit the unwind opcodes in the .ARM.exidx // section. Thus, we don't have to create an entry in the .ARM.extab // section. - if (AllowCompactModel0 && PersonalityIndex == AEABI_UNWIND_CPP_PR0) + if (NoHandlerData && PersonalityIndex == AEABI_UNWIND_CPP_PR0) return; // Switch to .ARM.extab section. @@ -418,6 +418,16 @@ void ARMELFStreamer::FlushUnwindOpcodes(bool AllowCompactModel0) { // Emit unwind opcodes EmitBytes(StringRef(reinterpret_cast(Opcodes.data()), Opcodes.size()), 0); + + // According to ARM EHABI section 9.2, if the __aeabi_unwind_cpp_pr1() or + // __aeabi_unwind_cpp_pr2() is used, then the handler data must be emitted + // after the unwind opcodes. The handler data consists of several 32-bit + // words, and should be terminated by zero. + // + // In case that the .handlerdata directive is not specified by the + // programmer, we should emit zero to terminate the handler data. + if (NoHandlerData && !Personality) + EmitIntValue(0, 4); } void ARMELFStreamer::EmitHandlerData() { -- cgit v1.1 From 850ba41ed4252aae61e62140cdf0c61dbb444563 Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Tue, 2 Jul 2013 14:46:34 +0000 Subject: [XCore] Fix instruction selection for zext, mkmsk instructions. r182680 replaced CountLeadingZeros_32 with a template function countLeadingZeros that relies on using the correct argument type to give the right result. The type passed in the XCore backend after this revision was incorrect in a couple of places. Patch by Robert Lytton. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185430 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCoreISelDAGToDAG.cpp | 2 +- lib/Target/XCore/XCoreInstrInfo.td | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp index 768cba6..e28f84f 100644 --- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -115,7 +115,7 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) { if (immMskBitp(N)) { // Transformation function: get the size of a mask // Look for the first non-zero bit - SDValue MskSize = getI32Imm(32 - countLeadingZeros(Val)); + SDValue MskSize = getI32Imm(32 - countLeadingZeros((uint32_t)Val)); return CurDAG->getMachineNode(XCore::MKMSK_rus, dl, MVT::i32, MskSize); } diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index e06419a..be152ae 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -84,7 +84,7 @@ def msksize_xform : SDNodeXFormgetZExtValue())); // look for the first non-zero bit - return getI32Imm(32 - countLeadingZeros(N->getZExtValue())); + return getI32Imm(32 - countLeadingZeros((uint32_t)N->getZExtValue())); }]>; def neg_xform : SDNodeXForm Date: Tue, 2 Jul 2013 14:56:45 +0000 Subject: [SystemZ] Add the MVC instruction This is the first use of D(L,B) addressing, which required a fair bit of surgery. For that reason, the patch just adds the instruction definition and the associated assembler and disassembler support. A later patch will actually make use of it for codegen. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185433 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp | 203 ++++++++++++--------- .../SystemZ/Disassembler/SystemZDisassembler.cpp | 19 ++ .../SystemZ/InstPrinter/SystemZInstPrinter.cpp | 11 ++ .../SystemZ/InstPrinter/SystemZInstPrinter.h | 1 + .../SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp | 14 +- lib/Target/SystemZ/SystemZInstrFormats.td | 13 ++ lib/Target/SystemZ/SystemZInstrInfo.td | 6 + lib/Target/SystemZ/SystemZOperands.td | 100 ++++++---- 8 files changed, 243 insertions(+), 124 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index 7c28abd..25df0ec 100644 --- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -29,19 +29,25 @@ static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue) { } namespace { +enum RegisterKind { + GR32Reg, + GR64Reg, + GR128Reg, + ADDR32Reg, + ADDR64Reg, + FP32Reg, + FP64Reg, + FP128Reg +}; + +enum MemoryKind { + BDMem, + BDXMem, + BDLMem +}; + class SystemZOperand : public MCParsedAsmOperand { public: - enum RegisterKind { - GR32Reg, - GR64Reg, - GR128Reg, - ADDR32Reg, - ADDR64Reg, - FP32Reg, - FP64Reg, - FP128Reg - }; - private: enum OperandKind { KindInvalid, @@ -77,12 +83,15 @@ private: // Base + Disp + Index, where Base and Index are LLVM registers or 0. // RegKind says what type the registers have (ADDR32Reg or ADDR64Reg). + // Length is the operand length for D(L,B)-style operands, otherwise + // it is null. struct MemOp { unsigned Base : 8; unsigned Index : 8; unsigned RegKind : 8; unsigned Unused : 8; const MCExpr *Disp; + const MCExpr *Length; }; union { @@ -139,12 +148,14 @@ public: } static SystemZOperand *createMem(RegisterKind RegKind, unsigned Base, const MCExpr *Disp, unsigned Index, - SMLoc StartLoc, SMLoc EndLoc) { + const MCExpr *Length, SMLoc StartLoc, + SMLoc EndLoc) { SystemZOperand *Op = new SystemZOperand(KindMem, StartLoc, EndLoc); Op->Mem.RegKind = RegKind; Op->Mem.Base = Base; Op->Mem.Index = Index; Op->Mem.Disp = Disp; + Op->Mem.Length = Length; return Op; } @@ -191,16 +202,20 @@ public: virtual bool isMem() const LLVM_OVERRIDE { return Kind == KindMem; } - bool isMem(RegisterKind RegKind, bool HasIndex) const { + bool isMem(RegisterKind RegKind, MemoryKind MemKind) const { return (Kind == KindMem && Mem.RegKind == RegKind && - (HasIndex || !Mem.Index)); + (MemKind == BDXMem || !Mem.Index) && + (MemKind == BDLMem) == (Mem.Length != 0)); + } + bool isMemDisp12(RegisterKind RegKind, MemoryKind MemKind) const { + return isMem(RegKind, MemKind) && inRange(Mem.Disp, 0, 0xfff); } - bool isMemDisp12(RegisterKind RegKind, bool HasIndex) const { - return isMem(RegKind, HasIndex) && inRange(Mem.Disp, 0, 0xfff); + bool isMemDisp20(RegisterKind RegKind, MemoryKind MemKind) const { + return isMem(RegKind, MemKind) && inRange(Mem.Disp, -524288, 524287); } - bool isMemDisp20(RegisterKind RegKind, bool HasIndex) const { - return isMem(RegKind, HasIndex) && inRange(Mem.Disp, -524288, 524287); + bool isMemDisp12Len8(RegisterKind RegKind) const { + return isMemDisp12(RegKind, BDLMem) && inRange(Mem.Length, 1, 0x100); } // Override MCParsedAsmOperand. @@ -236,6 +251,13 @@ public: addExpr(Inst, Mem.Disp); Inst.addOperand(MCOperand::CreateReg(Mem.Index)); } + void addBDLAddrOperands(MCInst &Inst, unsigned N) const { + assert(N == 3 && "Invalid number of operands"); + assert(Kind == KindMem && "Invalid operand type"); + Inst.addOperand(MCOperand::CreateReg(Mem.Base)); + addExpr(Inst, Mem.Disp); + addExpr(Inst, Mem.Length); + } // Used by the TableGen code to check for particular operand types. bool isGR32() const { return isReg(GR32Reg); } @@ -247,12 +269,13 @@ public: bool isFP32() const { return isReg(FP32Reg); } bool isFP64() const { return isReg(FP64Reg); } bool isFP128() const { return isReg(FP128Reg); } - bool isBDAddr32Disp12() const { return isMemDisp12(ADDR32Reg, false); } - bool isBDAddr32Disp20() const { return isMemDisp20(ADDR32Reg, false); } - bool isBDAddr64Disp12() const { return isMemDisp12(ADDR64Reg, false); } - bool isBDAddr64Disp20() const { return isMemDisp20(ADDR64Reg, false); } - bool isBDXAddr64Disp12() const { return isMemDisp12(ADDR64Reg, true); } - bool isBDXAddr64Disp20() const { return isMemDisp20(ADDR64Reg, true); } + bool isBDAddr32Disp12() const { return isMemDisp12(ADDR32Reg, BDMem); } + bool isBDAddr32Disp20() const { return isMemDisp20(ADDR32Reg, BDMem); } + bool isBDAddr64Disp12() const { return isMemDisp12(ADDR64Reg, BDMem); } + bool isBDAddr64Disp20() const { return isMemDisp20(ADDR64Reg, BDMem); } + bool isBDXAddr64Disp12() const { return isMemDisp12(ADDR64Reg, BDXMem); } + bool isBDXAddr64Disp20() const { return isMemDisp20(ADDR64Reg, BDXMem); } + bool isBDLAddr64Disp12Len8() const { return isMemDisp12Len8(ADDR64Reg); } bool isU4Imm() const { return isImm(0, 15); } bool isU6Imm() const { return isImm(0, 63); } bool isU8Imm() const { return isImm(0, 255); } @@ -288,19 +311,16 @@ private: OperandMatchResultTy parseRegister(SmallVectorImpl &Operands, - RegisterGroup Group, const unsigned *Regs, - SystemZOperand::RegisterKind Kind, - bool IsAddress = false); + RegisterGroup Group, const unsigned *Regs, RegisterKind Kind); bool parseAddress(unsigned &Base, const MCExpr *&Disp, - unsigned &Index, const unsigned *Regs, - SystemZOperand::RegisterKind RegKind, - bool HasIndex); + unsigned &Index, const MCExpr *&Length, + const unsigned *Regs, RegisterKind RegKind); OperandMatchResultTy parseAddress(SmallVectorImpl &Operands, - const unsigned *Regs, SystemZOperand::RegisterKind RegKind, - bool HasIndex); + const unsigned *Regs, RegisterKind RegKind, + MemoryKind MemKind); bool parseOperand(SmallVectorImpl &Operands, StringRef Mnemonic); @@ -331,28 +351,23 @@ public: // Used by the TableGen code to parse particular operand types. OperandMatchResultTy parseGR32(SmallVectorImpl &Operands) { - return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, - SystemZOperand::GR32Reg); + return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, GR32Reg); } OperandMatchResultTy parseGR64(SmallVectorImpl &Operands) { - return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, - SystemZOperand::GR64Reg); + return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, GR64Reg); } OperandMatchResultTy parseGR128(SmallVectorImpl &Operands) { - return parseRegister(Operands, RegGR, SystemZMC::GR128Regs, - SystemZOperand::GR128Reg); + return parseRegister(Operands, RegGR, SystemZMC::GR128Regs, GR128Reg); } OperandMatchResultTy parseADDR32(SmallVectorImpl &Operands) { - return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, - SystemZOperand::ADDR32Reg, true); + return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, ADDR32Reg); } OperandMatchResultTy parseADDR64(SmallVectorImpl &Operands) { - return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, - SystemZOperand::ADDR64Reg, true); + return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, ADDR64Reg); } OperandMatchResultTy parseADDR128(SmallVectorImpl &Operands) { @@ -360,33 +375,31 @@ public: } OperandMatchResultTy parseFP32(SmallVectorImpl &Operands) { - return parseRegister(Operands, RegFP, SystemZMC::FP32Regs, - SystemZOperand::FP32Reg); + return parseRegister(Operands, RegFP, SystemZMC::FP32Regs, FP32Reg); } OperandMatchResultTy parseFP64(SmallVectorImpl &Operands) { - return parseRegister(Operands, RegFP, SystemZMC::FP64Regs, - SystemZOperand::FP64Reg); + return parseRegister(Operands, RegFP, SystemZMC::FP64Regs, FP64Reg); } OperandMatchResultTy parseFP128(SmallVectorImpl &Operands) { - return parseRegister(Operands, RegFP, SystemZMC::FP128Regs, - SystemZOperand::FP128Reg); + return parseRegister(Operands, RegFP, SystemZMC::FP128Regs, FP128Reg); } OperandMatchResultTy parseBDAddr32(SmallVectorImpl &Operands) { - return parseAddress(Operands, SystemZMC::GR32Regs, - SystemZOperand::ADDR32Reg, false); + return parseAddress(Operands, SystemZMC::GR32Regs, ADDR32Reg, BDMem); } OperandMatchResultTy parseBDAddr64(SmallVectorImpl &Operands) { - return parseAddress(Operands, SystemZMC::GR64Regs, - SystemZOperand::ADDR64Reg, false); + return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDMem); } OperandMatchResultTy parseBDXAddr64(SmallVectorImpl &Operands) { - return parseAddress(Operands, SystemZMC::GR64Regs, - SystemZOperand::ADDR64Reg, true); + return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDXMem); + } + OperandMatchResultTy + parseBDLAddr64(SmallVectorImpl &Operands) { + return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDLMem); } OperandMatchResultTy parseAccessReg(SmallVectorImpl &Operands); @@ -474,12 +487,12 @@ bool SystemZAsmParser::parseRegister(Register &Reg, RegisterGroup Group, SystemZAsmParser::OperandMatchResultTy SystemZAsmParser::parseRegister(SmallVectorImpl &Operands, RegisterGroup Group, const unsigned *Regs, - SystemZOperand::RegisterKind Kind, - bool IsAddress) { + RegisterKind Kind) { if (Parser.getTok().isNot(AsmToken::Percent)) return MatchOperand_NoMatch; Register Reg; + bool IsAddress = (Kind == ADDR32Reg || Kind == ADDR64Reg); if (parseRegister(Reg, Group, Regs, IsAddress)) return MatchOperand_ParseFail; @@ -488,14 +501,13 @@ SystemZAsmParser::parseRegister(SmallVectorImpl &Operands, return MatchOperand_Success; } -// Parse a memory operand into Base, Disp and Index. Regs maps asm -// register numbers to LLVM register numbers and RegKind says what kind -// of address register we're using (ADDR32Reg or ADDR64Reg). HasIndex -// says whether the address allows index registers. +// Parse a memory operand into Base, Disp, Index and Length. +// Regs maps asm register numbers to LLVM register numbers and RegKind +// says what kind of address register we're using (ADDR32Reg or ADDR64Reg). bool SystemZAsmParser::parseAddress(unsigned &Base, const MCExpr *&Disp, - unsigned &Index, const unsigned *Regs, - SystemZOperand::RegisterKind RegKind, - bool HasIndex) { + unsigned &Index, const MCExpr *&Length, + const unsigned *Regs, + RegisterKind RegKind) { // Parse the displacement, which must always be present. if (getParser().parseExpression(Disp)) return true; @@ -503,27 +515,33 @@ bool SystemZAsmParser::parseAddress(unsigned &Base, const MCExpr *&Disp, // Parse the optional base and index. Index = 0; Base = 0; + Length = 0; if (getLexer().is(AsmToken::LParen)) { Parser.Lex(); - // Parse the first register. - Register Reg; - if (parseRegister(Reg, RegGR, Regs, RegKind)) - return true; + if (getLexer().is(AsmToken::Percent)) { + // Parse the first register and decide whether it's a base or an index. + Register Reg; + if (parseRegister(Reg, RegGR, Regs, RegKind)) + return true; + if (getLexer().is(AsmToken::Comma)) + Index = Reg.Num; + else + Base = Reg.Num; + } else { + // Parse the length. + if (getParser().parseExpression(Length)) + return true; + } - // Check whether there's a second register. If so, the one that we - // just parsed was the index. + // Check whether there's a second register. It's the base if so. if (getLexer().is(AsmToken::Comma)) { Parser.Lex(); - - if (!HasIndex) - return Error(Reg.StartLoc, "invalid use of indexed addressing"); - - Index = Reg.Num; + Register Reg; if (parseRegister(Reg, RegGR, Regs, RegKind)) return true; + Base = Reg.Num; } - Base = Reg.Num; // Consume the closing bracket. if (getLexer().isNot(AsmToken::RParen)) @@ -537,19 +555,37 @@ bool SystemZAsmParser::parseAddress(unsigned &Base, const MCExpr *&Disp, // are as above. SystemZAsmParser::OperandMatchResultTy SystemZAsmParser::parseAddress(SmallVectorImpl &Operands, - const unsigned *Regs, - SystemZOperand::RegisterKind RegKind, - bool HasIndex) { + const unsigned *Regs, RegisterKind RegKind, + MemoryKind MemKind) { SMLoc StartLoc = Parser.getTok().getLoc(); unsigned Base, Index; const MCExpr *Disp; - if (parseAddress(Base, Disp, Index, Regs, RegKind, HasIndex)) + const MCExpr *Length; + if (parseAddress(Base, Disp, Index, Length, Regs, RegKind)) return MatchOperand_ParseFail; + if (Index && MemKind != BDXMem) + { + Error(StartLoc, "invalid use of indexed addressing"); + return MatchOperand_ParseFail; + } + + if (Length && MemKind != BDLMem) + { + Error(StartLoc, "invalid use of length addressing"); + return MatchOperand_ParseFail; + } + + if (!Length && MemKind == BDLMem) + { + Error(StartLoc, "missing length in address"); + return MatchOperand_ParseFail; + } + SMLoc EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(SystemZOperand::createMem(RegKind, Base, Disp, Index, - StartLoc, EndLoc)); + Length, StartLoc, EndLoc)); return MatchOperand_Success; } @@ -639,14 +675,13 @@ parseOperand(SmallVectorImpl &Operands, // so we treat any plain expression as an immediate. SMLoc StartLoc = Parser.getTok().getLoc(); unsigned Base, Index; - const MCExpr *Expr; - if (parseAddress(Base, Expr, Index, SystemZMC::GR64Regs, - SystemZOperand::ADDR64Reg, true)) + const MCExpr *Expr, *Length; + if (parseAddress(Base, Expr, Index, Length, SystemZMC::GR64Regs, ADDR64Reg)) return true; SMLoc EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); - if (Base || Index) + if (Base || Index || Length) Operands.push_back(SystemZOperand::createInvalid(StartLoc, EndLoc)); else Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc)); diff --git a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp index 4e4816b..79469b6 100644 --- a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp +++ b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp @@ -226,6 +226,18 @@ static DecodeStatus decodeBDXAddr20Operand(MCInst &Inst, uint64_t Field, return MCDisassembler::Success; } +static DecodeStatus decodeBDLAddr12Len8Operand(MCInst &Inst, uint64_t Field, + const unsigned *Regs) { + uint64_t Length = Field >> 16; + uint64_t Base = (Field >> 12) & 0xf; + uint64_t Disp = Field & 0xfff; + assert(Length < 256 && "Invalid BDLAddr12Len8"); + Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base])); + Inst.addOperand(MCOperand::CreateImm(Disp)); + Inst.addOperand(MCOperand::CreateImm(Length + 1)); + return MCDisassembler::Success; +} + static DecodeStatus decodeBDAddr32Disp12Operand(MCInst &Inst, uint64_t Field, uint64_t Address, const void *Decoder) { @@ -262,6 +274,13 @@ static DecodeStatus decodeBDXAddr64Disp20Operand(MCInst &Inst, uint64_t Field, return decodeBDXAddr20Operand(Inst, Field, SystemZMC::GR64Regs); } +static DecodeStatus decodeBDLAddr64Disp12Len8Operand(MCInst &Inst, + uint64_t Field, + uint64_t Address, + const void *Decoder) { + return decodeBDLAddr12Len8Operand(Inst, Field, SystemZMC::GR64Regs); +} + #include "SystemZGenDisassemblerTables.inc" DecodeStatus SystemZDisassembler::getInstruction(MCInst &MI, uint64_t &Size, diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp index 369802b..37ebff3 100644 --- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp +++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp @@ -154,6 +154,17 @@ void SystemZInstPrinter::printBDXAddrOperand(const MCInst *MI, int OpNum, MI->getOperand(OpNum + 2).getReg(), O); } +void SystemZInstPrinter::printBDLAddrOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + unsigned Base = MI->getOperand(OpNum).getReg(); + uint64_t Disp = MI->getOperand(OpNum + 1).getImm(); + uint64_t Length = MI->getOperand(OpNum + 2).getImm(); + O << Disp << '(' << Length; + if (Base) + O << ",%" << getRegisterName(Base); + O << ')'; +} + void SystemZInstPrinter::printCond4Operand(const MCInst *MI, int OpNum, raw_ostream &O) { static const char *const CondNames[] = { diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h index f77282e..30cdee5 100644 --- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h +++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h @@ -48,6 +48,7 @@ private: void printOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printBDAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printBDXAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printBDLAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printU4ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printU6ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printS8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp index f8f8998..bda7714 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp @@ -49,7 +49,7 @@ private: SmallVectorImpl &Fixups) const; // Called by the TableGen code to get the binary encoding of an address. - // The index, if any, is encoded first, followed by the base, + // The index or length, if any, is encoded first, followed by the base, // followed by the displacement. In a 20-bit displacement, // the low 12 bits are encoded before the high 8 bits. uint64_t getBDAddr12Encoding(const MCInst &MI, unsigned OpNum, @@ -60,6 +60,8 @@ private: SmallVectorImpl &Fixups) const; uint64_t getBDXAddr20Encoding(const MCInst &MI, unsigned OpNum, SmallVectorImpl &Fixups) const; + uint64_t getBDLAddr12Len8Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl &Fixups) const; // Operand OpNum of MI needs a PC-relative fixup of kind Kind at // Offset bytes from the start of MI. Add the fixup to Fixups @@ -157,6 +159,16 @@ getBDXAddr20Encoding(const MCInst &MI, unsigned OpNum, | ((Disp & 0xff000) >> 12); } +uint64_t SystemZMCCodeEmitter:: +getBDLAddr12Len8Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl &Fixups) const { + uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups); + uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups); + uint64_t Len = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups) - 1; + assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<8>(Len)); + return (Len << 16) | (Base << 12) | Disp; +} + uint64_t SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum, SmallVectorImpl &Fixups, diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index ac0300c..58110ec 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -383,6 +383,19 @@ class InstSIY op, dag outs, dag ins, string asmstr, list pattern> let Has20BitOffset = 1; } +class InstSS op, dag outs, dag ins, string asmstr, list pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<24> BDL1; + bits<16> BD2; + + let Inst{47-40} = op; + let Inst{39-16} = BDL1; + let Inst{15-0} = BD2; +} + //===----------------------------------------------------------------------===// // Instruction definitions with semantics //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index ff0d566..3af41e5 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -282,6 +282,12 @@ def MVHHI : StoreSIL<"mvhhi", 0xE544, truncstorei16, imm32sx16trunc>; def MVHI : StoreSIL<"mvhi", 0xE54C, store, imm32sx16>; def MVGHI : StoreSIL<"mvghi", 0xE548, store, imm64sx16>; +// Memory-to-memory moves. +let mayLoad = 1, mayStore = 1 in + def MVC : InstSS<0xD2, (outs), (ins bdladdr12onlylen8:$BDL1, + bdaddr12only:$BD2), + "mvc\t$BDL1, $BD2", []>; + //===----------------------------------------------------------------------===// // Sign extensions //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td index 66d9c5f..620876e 100644 --- a/lib/Target/SystemZ/SystemZOperands.td +++ b/lib/Target/SystemZ/SystemZOperands.td @@ -53,49 +53,63 @@ class PCRelAddress // Constructs an AsmOperandClass for addressing mode FORMAT, treating the // registers as having BITSIZE bits and displacements as having DISPSIZE bits. -class AddressAsmOperand +// LENGTH is "LenN" for addresses with an N-bit length field, otherwise it +// is "". +class AddressAsmOperand : AsmOperandClass { - let Name = format##bitsize##"Disp"##dispsize; + let Name = format##bitsize##"Disp"##dispsize##length; let ParserMethod = "parse"##format##bitsize; let RenderMethod = "add"##format##"Operands"; } // Constructs both a DAG pattern and instruction operand for an addressing mode. -// The mode is selected by custom code in select(), -// encoded by custom code in getEncoding() and decoded -// by custom code in decodeDispOperand(). -// The address registers have BITSIZE bits and displacements have -// DISPSIZE bits. NUMOPS is the number of operands that make up an -// address and OPERANDS lists the types of those operands using (ops ...). -// FORMAT is the type of addressing mode, which needs to match the names -// used in AddressAsmOperand. -class AddressingMode +// FORMAT, BITSIZE, DISPSIZE and LENGTH are the parameters to an associated +// AddressAsmOperand. OPERANDS is a list of NUMOPS individual operands +// (base register, displacement, etc.). SELTYPE is the type of the memory +// operand for selection purposes; sometimes we want different selection +// choices for the same underlying addressing mode. SUFFIX is similarly +// a suffix appended to the displacement for selection purposes; +// e.g. we want to reject small 20-bit displacements if a 12-bit form +// also exists, but we want to accept them otherwise. +class AddressingMode : ComplexPattern("i"##bitsize), numops, - "select"##type##dispsize##suffix, + "select"##seltype##dispsize##suffix##length, [add, sub, or, frameindex, z_adjdynalloc]>, Operand("i"##bitsize)> { let PrintMethod = "print"##format##"Operand"; - let EncoderMethod = "get"##format##dispsize##"Encoding"; - let DecoderMethod = "decode"##format##bitsize##"Disp"##dispsize##"Operand"; + let EncoderMethod = "get"##format##dispsize##length##"Encoding"; + let DecoderMethod = + "decode"##format##bitsize##"Disp"##dispsize##length##"Operand"; let MIOperandInfo = operands; let ParserMatchClass = - !cast(format##bitsize##"Disp"##dispsize); + !cast(format##bitsize##"Disp"##dispsize##length); } // An addressing mode with a base and displacement but no index. class BDMode - : AddressingMode("ADDR"##bitsize), !cast("disp"##dispsize##"imm"##bitsize))>; // An addressing mode with a base, displacement and index. class BDXMode - : AddressingMode("ADDR"##bitsize), !cast("disp"##dispsize##"imm"##bitsize), !cast("ADDR"##bitsize))>; +// A BDMode paired with an immediate length operand of LENSIZE bits. +class BDLMode + : AddressingMode("ADDR"##bitsize), + !cast("disp"##dispsize##"imm"##bitsize), + !cast("imm"##bitsize))>; + //===----------------------------------------------------------------------===// // Extracting immediate operands from nodes // These all create MVT::i64 nodes to ensure the value is not sign-extended @@ -402,15 +416,16 @@ def disp12imm64 : Operand; def disp20imm32 : Operand; def disp20imm64 : Operand; -def BDAddr32Disp12 : AddressAsmOperand<"BDAddr", "32", "12">; -def BDAddr32Disp20 : AddressAsmOperand<"BDAddr", "32", "20">; -def BDAddr64Disp12 : AddressAsmOperand<"BDAddr", "64", "12">; -def BDAddr64Disp20 : AddressAsmOperand<"BDAddr", "64", "20">; -def BDXAddr64Disp12 : AddressAsmOperand<"BDXAddr", "64", "12">; -def BDXAddr64Disp20 : AddressAsmOperand<"BDXAddr", "64", "20">; +def BDAddr32Disp12 : AddressAsmOperand<"BDAddr", "32", "12">; +def BDAddr32Disp20 : AddressAsmOperand<"BDAddr", "32", "20">; +def BDAddr64Disp12 : AddressAsmOperand<"BDAddr", "64", "12">; +def BDAddr64Disp20 : AddressAsmOperand<"BDAddr", "64", "20">; +def BDXAddr64Disp12 : AddressAsmOperand<"BDXAddr", "64", "12">; +def BDXAddr64Disp20 : AddressAsmOperand<"BDXAddr", "64", "20">; +def BDLAddr64Disp12Len8 : AddressAsmOperand<"BDLAddr", "64", "12", "Len8">; // DAG patterns and operands for addressing modes. Each mode has -// the form where: +// the form [] where: // // is one of: // shift : base + displacement (32-bit) @@ -418,6 +433,7 @@ def BDXAddr64Disp20 : AddressAsmOperand<"BDXAddr", "64", "20">; // bdxaddr : base + displacement + index // laaddr : like bdxaddr, but used for Load Address operations // dynalloc : base + displacement + index + ADJDYNALLOC +// bdladdr : base + displacement with a length field // // is one of: // 12 : the displacement is an unsigned 12-bit value @@ -428,20 +444,26 @@ def BDXAddr64Disp20 : AddressAsmOperand<"BDXAddr", "64", "20">; // range value (12 or 20) // only : used when there is no equivalent instruction with the opposite // range value -def shift12only : BDMode <"BDAddr", "32", "12", "Only">; -def shift20only : BDMode <"BDAddr", "32", "20", "Only">; -def bdaddr12only : BDMode <"BDAddr", "64", "12", "Only">; -def bdaddr12pair : BDMode <"BDAddr", "64", "12", "Pair">; -def bdaddr20only : BDMode <"BDAddr", "64", "20", "Only">; -def bdaddr20pair : BDMode <"BDAddr", "64", "20", "Pair">; -def bdxaddr12only : BDXMode<"BDXAddr", "64", "12", "Only">; -def bdxaddr12pair : BDXMode<"BDXAddr", "64", "12", "Pair">; -def bdxaddr20only : BDXMode<"BDXAddr", "64", "20", "Only">; -def bdxaddr20only128 : BDXMode<"BDXAddr", "64", "20", "Only128">; -def bdxaddr20pair : BDXMode<"BDXAddr", "64", "20", "Pair">; -def dynalloc12only : BDXMode<"DynAlloc", "64", "12", "Only">; -def laaddr12pair : BDXMode<"LAAddr", "64", "12", "Pair">; -def laaddr20pair : BDXMode<"LAAddr", "64", "20", "Pair">; +// +// is one of: +// +// : there is no length field +// len8 : the length field is 8 bits, with a range of [1, 0x100]. +def shift12only : BDMode <"BDAddr", "32", "12", "Only">; +def shift20only : BDMode <"BDAddr", "32", "20", "Only">; +def bdaddr12only : BDMode <"BDAddr", "64", "12", "Only">; +def bdaddr12pair : BDMode <"BDAddr", "64", "12", "Pair">; +def bdaddr20only : BDMode <"BDAddr", "64", "20", "Only">; +def bdaddr20pair : BDMode <"BDAddr", "64", "20", "Pair">; +def bdxaddr12only : BDXMode<"BDXAddr", "64", "12", "Only">; +def bdxaddr12pair : BDXMode<"BDXAddr", "64", "12", "Pair">; +def bdxaddr20only : BDXMode<"BDXAddr", "64", "20", "Only">; +def bdxaddr20only128 : BDXMode<"BDXAddr", "64", "20", "Only128">; +def bdxaddr20pair : BDXMode<"BDXAddr", "64", "20", "Pair">; +def dynalloc12only : BDXMode<"DynAlloc", "64", "12", "Only">; +def laaddr12pair : BDXMode<"LAAddr", "64", "12", "Pair">; +def laaddr20pair : BDXMode<"LAAddr", "64", "20", "Pair">; +def bdladdr12onlylen8 : BDLMode<"BDLAddr", "64", "12", "Only", "8">; //===----------------------------------------------------------------------===// // Miscellaneous -- cgit v1.1 From 1ce4894a3f1ce6e63c1b109c24235d81dea2908f Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 2 Jul 2013 15:28:56 +0000 Subject: [SystemZ] Use MVC to spill loads and stores Try to use MVC when spilling the destination of a simple load or the source of a simple store. As explained in the comment, this doesn't yet handle the case where the load or store location is also a frame index, since that could lead to two simultaneous scavenger spills, something the backend can't handle yet. spill-02.py tests that this restriction kicks in, but unfortunately I've not yet found a case that would fail without it. The volatile trick I used for other scavenger tests doesn't work here because we can't use MVC for volatile accesses anyway. I'm planning on relaxing the restriction later, hopefully with a test that does trigger the problem... Tests @f8 and @f9 also showed that L(G)RL and ST(G)RL were wrongly classified as SimpleBDX{Load,Store}. It wouldn't be easy to test for that bug separately, which is why I didn't split out the fix as a separate patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185434 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZInstrInfo.cpp | 94 ++++++++++++++++++++++++++++++++- lib/Target/SystemZ/SystemZInstrInfo.h | 8 +++ lib/Target/SystemZ/SystemZInstrInfo.td | 24 ++++----- 3 files changed, 113 insertions(+), 13 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 0d30432..af3b711 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -13,6 +13,7 @@ #include "SystemZInstrInfo.h" #include "SystemZInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetMachine.h" #define GET_INSTRINFO_CTOR @@ -80,7 +81,8 @@ void SystemZInstrInfo::splitAdjDynAlloc(MachineBasicBlock::iterator MI) const { // Return 0 otherwise. // // Flag is SimpleBDXLoad for loads and SimpleBDXStore for stores. -static int isSimpleMove(const MachineInstr *MI, int &FrameIndex, int Flag) { +static int isSimpleMove(const MachineInstr *MI, int &FrameIndex, + unsigned Flag) { const MCInstrDesc &MCID = MI->getDesc(); if ((MCID.TSFlags & Flag) && MI->getOperand(1).isFI() && @@ -315,6 +317,96 @@ SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, FrameIdx); } +// Return true if MI is a simple load or store with a 12-bit displacement +// and no index. Flag is SimpleBDXLoad for loads and SimpleBDXStore for stores. +static bool isSimpleBD12Move(const MachineInstr *MI, unsigned Flag) { + const MCInstrDesc &MCID = MI->getDesc(); + return ((MCID.TSFlags & Flag) && + isUInt<12>(MI->getOperand(2).getImm()) && + MI->getOperand(3).getReg() == 0); +} + +// Return a MachineMemOperand for FrameIndex with flags MMOFlags. +// Offset is the byte offset from the start of FrameIndex. +static MachineMemOperand *getFrameMMO(MachineFunction &MF, int FrameIndex, + uint64_t &Offset, unsigned MMOFlags) { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const Value *V = PseudoSourceValue::getFixedStack(FrameIndex); + return MF.getMachineMemOperand(MachinePointerInfo(V, Offset), MMOFlags, + MFI->getObjectSize(FrameIndex), + MFI->getObjectAlignment(FrameIndex)); +} + +MachineInstr * +SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + const SmallVectorImpl &Ops, + int FrameIndex) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned Size = MFI->getObjectSize(FrameIndex); + + // Eary exit for cases we don't care about + if (Ops.size() != 1) + return 0; + + unsigned OpNum = Ops[0]; + unsigned Reg = MI->getOperand(OpNum).getReg(); + unsigned RegSize = MF.getRegInfo().getRegClass(Reg)->getSize(); + assert(Size == RegSize && "Invalid size combination"); + + // Look for cases where the source of a simple store or the destination + // of a simple load is being spilled. Try to use MVC instead. + // + // Although MVC is in practice a fast choice in these cases, it is still + // logically a bytewise copy. This means that we cannot use it if the + // load or store is volatile. It also means that the transformation is + // not valid in cases where the two memories partially overlap; however, + // that is not a problem here, because we know that one of the memories + // is a full frame index. + // + // For now we punt if the load or store is also to a frame index. + // In that case we might end up eliminating both of them to out-of-range + // offsets, which might then force the register scavenger to spill two + // other registers. The backend can only handle one such scavenger spill + // at a time. + if (OpNum == 0 && MI->hasOneMemOperand()) { + MachineMemOperand *MMO = *MI->memoperands_begin(); + if (MMO->getSize() == Size && !MMO->isVolatile()) { + // Handle conversion of loads. + if (isSimpleBD12Move(MI, SystemZII::SimpleBDXLoad) && + !MI->getOperand(1).isFI()) { + uint64_t Offset = 0; + MachineMemOperand *FrameMMO = getFrameMMO(MF, FrameIndex, Offset, + MachineMemOperand::MOStore); + return BuildMI(MF, MI->getDebugLoc(), get(SystemZ::MVC)) + .addFrameIndex(FrameIndex).addImm(Offset).addImm(Size) + .addOperand(MI->getOperand(1)).addImm(MI->getOperand(2).getImm()) + .addMemOperand(FrameMMO).addMemOperand(MMO); + } + // Handle conversion of stores. + if (isSimpleBD12Move(MI, SystemZII::SimpleBDXStore) && + !MI->getOperand(1).isFI()) { + uint64_t Offset = 0; + MachineMemOperand *FrameMMO = getFrameMMO(MF, FrameIndex, Offset, + MachineMemOperand::MOLoad); + return BuildMI(MF, MI->getDebugLoc(), get(SystemZ::MVC)) + .addOperand(MI->getOperand(1)).addImm(MI->getOperand(2).getImm()) + .addImm(Size).addFrameIndex(FrameIndex).addImm(Offset) + .addMemOperand(MMO).addMemOperand(FrameMMO); + } + } + } + + return 0; +} + +MachineInstr * +SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, + const SmallVectorImpl &Ops, + MachineInstr* LoadMI) const { + return 0; +} + bool SystemZInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { switch (MI->getOpcode()) { diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index d6980f7..8d9a3ea 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -111,6 +111,14 @@ public: unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const LLVM_OVERRIDE; + virtual MachineInstr * + foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, + const SmallVectorImpl &Ops, + int FrameIndex) const; + virtual MachineInstr * + foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, + const SmallVectorImpl &Ops, + MachineInstr* LoadMI) const; virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const LLVM_OVERRIDE; virtual bool diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 3af41e5..1b53eb0 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -242,11 +242,8 @@ let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1, // Register loads. let canFoldAsLoad = 1, SimpleBDXLoad = 1 in { - defm L : UnaryRXPair<"l", 0x58, 0xE358, load, GR32>; - def LRL : UnaryRILPC<"lrl", 0xC4D, aligned_load, GR32>; - - def LG : UnaryRXY<"lg", 0xE304, load, GR64>; - def LGRL : UnaryRILPC<"lgrl", 0xC48, aligned_load, GR64>; + defm L : UnaryRXPair<"l", 0x58, 0xE358, load, GR32>; + def LG : UnaryRXY<"lg", 0xE304, load, GR64>; // These instructions are split after register allocation, so we don't // want a custom inserter. @@ -255,16 +252,16 @@ let canFoldAsLoad = 1, SimpleBDXLoad = 1 in { [(set GR128:$dst, (load bdxaddr20only128:$src))]>; } } +let canFoldAsLoad = 1 in { + def LRL : UnaryRILPC<"lrl", 0xC4D, aligned_load, GR32>; + def LGRL : UnaryRILPC<"lgrl", 0xC48, aligned_load, GR64>; +} // Register stores. let SimpleBDXStore = 1 in { - let isCodeGenOnly = 1 in { - defm ST32 : StoreRXPair<"st", 0x50, 0xE350, store, GR32>; - def STRL32 : StoreRILPC<"strl", 0xC4F, aligned_store, GR32>; - } - - def STG : StoreRXY<"stg", 0xE324, store, GR64>; - def STGRL : StoreRILPC<"stgrl", 0xC4B, aligned_store, GR64>; + let isCodeGenOnly = 1 in + defm ST32 : StoreRXPair<"st", 0x50, 0xE350, store, GR32>; + def STG : StoreRXY<"stg", 0xE324, store, GR64>; // These instructions are split after register allocation, so we don't // want a custom inserter. @@ -273,6 +270,9 @@ let SimpleBDXStore = 1 in { [(store GR128:$src, bdxaddr20only128:$dst)]>; } } +let isCodeGenOnly = 1 in + def STRL32 : StoreRILPC<"strl", 0xC4F, aligned_store, GR32>; +def STGRL : StoreRILPC<"stgrl", 0xC4B, aligned_store, GR64>; // 8-bit immediate stores to 8-bit fields. defm MVI : StoreSIPair<"mvi", 0x92, 0xEB52, truncstorei8, imm32zx8trunc>; -- cgit v1.1 From 35b7bebe1162326c38217ff80d4a49fbbffcc365 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 2 Jul 2013 15:40:22 +0000 Subject: [SystemZ] Use DSGFR over DSGR in more cases Fixes some cases where we were using full 64-bit division for (sdiv i32, i32) and (sdiv i64, i32). The "32" in "SDIVREM32" just refers to the second operand. The first operand of all *DIVREM*s is a GR128. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185435 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelLowering.cpp | 11 ++++++++--- lib/Target/SystemZ/SystemZISelLowering.h | 1 + lib/Target/SystemZ/SystemZInstrInfo.td | 5 ++--- lib/Target/SystemZ/SystemZOperators.td | 1 + 4 files changed, 12 insertions(+), 6 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 955b88e..da4ad38 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1269,18 +1269,23 @@ SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op, SDValue Op1 = Op.getOperand(1); EVT VT = Op.getValueType(); SDLoc DL(Op); + unsigned Opcode; // We use DSGF for 32-bit division. if (is32Bit(VT)) { Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0); - Op1 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op1); - } + Opcode = SystemZISD::SDIVREM32; + } else if (DAG.ComputeNumSignBits(Op1) > 32) { + Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1); + Opcode = SystemZISD::SDIVREM32; + } else + Opcode = SystemZISD::SDIVREM64; // DSG(F) takes a 64-bit dividend, so the even register in the GR128 // input is "don't care". The instruction returns the remainder in // the even register and the quotient in the odd register. SDValue Ops[2]; - lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::SDIVREM64, + lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, Opcode, Op0, Op1, Ops[1], Ops[0]); return DAG.getMergeValues(Ops, 2, DL); } diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index f6c49f0..21b4d72 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -68,6 +68,7 @@ namespace SystemZISD { // first input operands are GR128s. The trailing numbers are the // widths of the second operand in bits. UMUL_LOHI64, + SDIVREM32, SDIVREM64, UDIVREM32, UDIVREM64, diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 1b53eb0..44be5da 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -750,14 +750,13 @@ def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load>; //===----------------------------------------------------------------------===// // Division and remainder, from registers. -def DSGFR : BinaryRRE<"dsgfr", 0xB91D, null_frag, GR128, GR32>; +def DSGFR : BinaryRRE<"dsgfr", 0xB91D, z_sdivrem32, GR128, GR32>; def DSGR : BinaryRRE<"dsgr", 0xB90D, z_sdivrem64, GR128, GR64>; def DLR : BinaryRRE<"dlr", 0xB997, z_udivrem32, GR128, GR32>; def DLGR : BinaryRRE<"dlgr", 0xB987, z_udivrem64, GR128, GR64>; -defm : SXB; // Division and remainder, from memory. -def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem64, GR128, sextloadi32>; +def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem32, GR128, load>; def DSG : BinaryRXY<"dsg", 0xE30D, z_sdivrem64, GR128, load>; def DL : BinaryRXY<"dl", 0xE397, z_udivrem32, GR128, load>; def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load>; diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index 021824e..a84af7a 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -81,6 +81,7 @@ def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>; def z_extract_access : SDNode<"SystemZISD::EXTRACT_ACCESS", SDT_ZExtractAccess>; def z_umul_lohi64 : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>; +def z_sdivrem32 : SDNode<"SystemZISD::SDIVREM32", SDT_ZGR128Binary32>; def z_sdivrem64 : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>; def z_udivrem32 : SDNode<"SystemZISD::UDIVREM32", SDT_ZGR128Binary32>; def z_udivrem64 : SDNode<"SystemZISD::UDIVREM64", SDT_ZGR128Binary64>; -- cgit v1.1 From a3863ea2dacafc925a8272ebf9884fc64bef686c Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Tue, 2 Jul 2013 15:49:13 +0000 Subject: Remove address spaces from MC. This is dead code since PIC16 was removed in 2010. The result was an odd mix, where some parts would carefully pass it along and others would assert it was zero (most of the object streamer for example). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185436 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64AsmPrinter.cpp | 2 +- lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 2 +- .../AArch64/MCTargetDesc/AArch64ELFStreamer.cpp | 9 ++++----- lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp | 21 ++++++++++----------- lib/Target/PowerPC/PPCAsmPrinter.cpp | 2 +- lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp | 5 ----- lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h | 1 - lib/Target/SystemZ/SystemZAsmPrinter.cpp | 2 +- 8 files changed, 18 insertions(+), 26 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp index 03d99c6..9498722 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -289,7 +289,7 @@ void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) { for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { OutStreamer.EmitLabel(Stubs[i].first); OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), - TD->getPointerSize(0), 0); + TD->getPointerSize(0)); } Stubs.clear(); } diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 69bb80a..4a0237d 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -1918,7 +1918,7 @@ bool AArch64AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { if (getParser().parseExpression(Value)) return true; - getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/); + getParser().getStreamer().EmitValue(Value, Size); if (getLexer().is(AsmToken::EndOfStatement)) break; diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index 3b811df..104e4d2 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -85,18 +85,17 @@ public: /// This is one of the functions used to emit data into an ELF section, so the /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d) /// if necessary. - virtual void EmitBytes(StringRef Data, unsigned AddrSpace) { + virtual void EmitBytes(StringRef Data) { EmitDataMappingSymbol(); - MCELFStreamer::EmitBytes(Data, AddrSpace); + MCELFStreamer::EmitBytes(Data); } /// This is one of the functions used to emit data into an ELF section, so the /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d) /// if necessary. - virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, - unsigned AddrSpace) { + virtual void EmitValueImpl(const MCExpr *Value, unsigned Size) { EmitDataMappingSymbol(); - MCELFStreamer::EmitValueImpl(Value, Size, AddrSpace); + MCELFStreamer::EmitValueImpl(Value, Size); } private: diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index e8b6a5a..6b98205 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -109,18 +109,17 @@ public: /// This is one of the functions used to emit data into an ELF section, so the /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if /// necessary. - virtual void EmitBytes(StringRef Data, unsigned AddrSpace) { + virtual void EmitBytes(StringRef Data) { EmitDataMappingSymbol(); - MCELFStreamer::EmitBytes(Data, AddrSpace); + MCELFStreamer::EmitBytes(Data); } /// This is one of the functions used to emit data into an ELF section, so the /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if /// necessary. - virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, - unsigned AddrSpace) { + virtual void EmitValueImpl(const MCExpr *Value, unsigned Size) { EmitDataMappingSymbol(); - MCELFStreamer::EmitValueImpl(Value, Size, AddrSpace); + MCELFStreamer::EmitValueImpl(Value, Size); } virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) { @@ -336,17 +335,17 @@ void ARMELFStreamer::EmitFnEnd() { MCSymbolRefExpr::VK_ARM_PREL31, getContext()); - EmitValue(FnStartRef, 4, 0); + EmitValue(FnStartRef, 4); if (CantUnwind) { - EmitIntValue(EXIDX_CANTUNWIND, 4, 0); + EmitIntValue(EXIDX_CANTUNWIND, 4); } else if (ExTab) { // Emit a reference to the unwind opcodes in the ".ARM.extab" section. const MCSymbolRefExpr *ExTabEntryRef = MCSymbolRefExpr::Create(ExTab, MCSymbolRefExpr::VK_ARM_PREL31, getContext()); - EmitValue(ExTabEntryRef, 4, 0); + EmitValue(ExTabEntryRef, 4); } else { // For the __aeabi_unwind_cpp_pr0, we have to emit the unwind opcodes in // the second word of exception index table entry. The size of the unwind @@ -356,7 +355,7 @@ void ARMELFStreamer::EmitFnEnd() { assert(Opcodes.size() == 4u && "Unwind opcode size for __aeabi_cpp_unwind_pr0 must be equal to 4"); EmitBytes(StringRef(reinterpret_cast(Opcodes.data()), - Opcodes.size()), 0); + Opcodes.size())); } // Switch to the section containing FnStart @@ -412,12 +411,12 @@ void ARMELFStreamer::FlushUnwindOpcodes(bool NoHandlerData) { MCSymbolRefExpr::VK_ARM_PREL31, getContext()); - EmitValue(PersonalityRef, 4, 0); + EmitValue(PersonalityRef, 4); } // Emit unwind opcodes EmitBytes(StringRef(reinterpret_cast(Opcodes.data()), - Opcodes.size()), 0); + Opcodes.size())); // According to ARM EHABI section 9.2, if the __aeabi_unwind_cpp_pr1() or // __aeabi_unwind_cpp_pr2() is used, then the handler data must be emitted diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 8d350a4..ca84f2f 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -769,7 +769,7 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { // .long _foo OutStreamer.EmitValue(MCSymbolRefExpr::Create(Stubs[i].second.getPointer(), OutContext), - isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); + isPPC64 ? 8 : 4/*size*/); } Stubs.clear(); diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp index f1c44df..59136f3 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -70,11 +70,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() { SupportsDebugInformation = true; } -const char* -AMDGPUMCAsmInfo::getDataASDirective(unsigned int Size, unsigned int AS) const { - return 0; -} - const MCSection* AMDGPUMCAsmInfo::getNonexecutableStackSection(MCContext &CTX) const { return 0; diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h index 485167b..22afd63 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h @@ -22,7 +22,6 @@ class StringRef; class AMDGPUMCAsmInfo : public MCAsmInfo { public: explicit AMDGPUMCAsmInfo(StringRef &TT); - const char* getDataASDirective(unsigned int Size, unsigned int AS) const; const MCSection* getNonexecutableStackSection(MCContext &CTX) const; }; } // namespace llvm diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp index 1e15ab1..3a57ea0 100644 --- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -100,7 +100,7 @@ void SystemZAsmPrinter::EmitEndOfAsmFile(Module &M) { for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { OutStreamer.EmitLabel(Stubs[i].first); OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), - TD->getPointerSize(0), 0); + TD->getPointerSize(0)); } Stubs.clear(); } -- cgit v1.1 From b0bbfaf3b3e659d22c6dcdf5c1ea71e1089dabba Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 2 Jul 2013 17:24:00 +0000 Subject: Hexagon: Avoid unused variable warnings in Release builds. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185445 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp index bcfd9bb..2ea0d2e 100644 --- a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp +++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp @@ -196,13 +196,9 @@ void HexagonInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, void HexagonInstPrinter::printSymbol(const MCInst *MI, unsigned OpNo, raw_ostream &O, bool hi) const { - const MCOperand& MO = MI->getOperand(OpNo); - - O << '#' << (hi? "HI": "LO") << '('; - - assert(MO.isImm() && "Unknown symbol operand"); + assert(MI->getOperand(OpNo).isImm() && "Unknown symbol operand"); - O << '#'; + O << '#' << (hi ? "HI" : "LO") << "(#"; printOperand(MI, OpNo, O); O << ')'; } -- cgit v1.1 From 716a94f0c96d6bef575cd286bafb2cc507adc6b0 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Tue, 2 Jul 2013 18:47:09 +0000 Subject: [DebugInfo] Allow getDebugThreadLocalSymbol to return MCExpr This allows getDebugThreadLocalSymbol to return a generic MCExpr instead of just a MCSymbolRefExpr. This is in preparation for supporting debug info for TLS variables on PowerPC, where we need to describe the variable location using a more complex expression than just MCSymbolRefExpr. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185460 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/TargetLoweringObjectFile.cpp | 2 +- lib/Target/X86/X86TargetObjectFile.cpp | 2 +- lib/Target/X86/X86TargetObjectFile.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index fc50aa5..cd810b6 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -318,7 +318,7 @@ getTTypeReference(const MCSymbolRefExpr *Sym, unsigned Encoding, } } -const MCSymbolRefExpr *TargetLoweringObjectFile::getDebugThreadLocalSymbol(const MCSymbol *Sym) const { +const MCExpr *TargetLoweringObjectFile::getDebugThreadLocalSymbol(const MCSymbol *Sym) const { // FIXME: It's not clear what, if any, default this should have - perhaps a // null return could mean 'no location' & we should just do that here. return MCSymbolRefExpr::Create(Sym, *Ctx); diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index a00e8d4..a19c5a6 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -48,7 +48,7 @@ X86LinuxTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) { InitializeELF(TM.Options.UseInitArray); } -const MCSymbolRefExpr * +const MCExpr * X86LinuxTargetObjectFile::getDebugThreadLocalSymbol( const MCSymbol *Sym) const { return MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_DTPOFF, getContext()); diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h index 7baedd2..79c861d 100644 --- a/lib/Target/X86/X86TargetObjectFile.h +++ b/lib/Target/X86/X86TargetObjectFile.h @@ -38,7 +38,7 @@ namespace llvm { virtual void Initialize(MCContext &Ctx, const TargetMachine &TM); /// \brief Describe a TLS variable address within debug info. - virtual const MCSymbolRefExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const; + virtual const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const; }; } // end namespace llvm -- cgit v1.1 From b843060ecfa29efb5f896350f6530fa81184e420 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Tue, 2 Jul 2013 18:47:35 +0000 Subject: [PowerPC] Support TLS variables in debug info This adds an implementation of getDebugThreadLocalSymbol for (64-bit) PowerPC. This needs to return a generic MCExpr since on ppc64, we need to add a bias of 0x8000 to the value returned by the R_PPC64_DTPREL64 relocation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185461 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCTargetObjectFile.cpp | 10 ++++++++++ lib/Target/PowerPC/PPCTargetObjectFile.h | 3 +++ 2 files changed, 13 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.cpp b/lib/Target/PowerPC/PPCTargetObjectFile.cpp index 90e4f15..ec1e606 100644 --- a/lib/Target/PowerPC/PPCTargetObjectFile.cpp +++ b/lib/Target/PowerPC/PPCTargetObjectFile.cpp @@ -55,3 +55,13 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, return DefaultSection; } + +const MCExpr *PPC64LinuxTargetObjectFile:: +getDebugThreadLocalSymbol(const MCSymbol *Sym) const { + const MCExpr *Expr = + MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_PPC_DTPREL, getContext()); + return MCBinaryExpr::CreateAdd(Expr, + MCConstantExpr::Create(0x8000, getContext()), + getContext()); +} + diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.h b/lib/Target/PowerPC/PPCTargetObjectFile.h index 9203e23..262c522 100644 --- a/lib/Target/PowerPC/PPCTargetObjectFile.h +++ b/lib/Target/PowerPC/PPCTargetObjectFile.h @@ -25,6 +25,9 @@ namespace llvm { virtual const MCSection * SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine &TM) const; + + /// \brief Describe a TLS variable address within debug info. + virtual const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const; }; } // end namespace llvm -- cgit v1.1 From 24dd7dbe7f2a3338a20314b3863f6b738cc1c298 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 2 Jul 2013 21:17:31 +0000 Subject: SystemZ: Fold variable into assertion. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185475 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZInstrInfo.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index af3b711..4f919e9 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -351,8 +351,8 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, unsigned OpNum = Ops[0]; unsigned Reg = MI->getOperand(OpNum).getReg(); - unsigned RegSize = MF.getRegInfo().getRegClass(Reg)->getSize(); - assert(Size == RegSize && "Invalid size combination"); + assert(Size == MF.getRegInfo().getRegClass(Reg)->getSize() && + "Invalid size combination"); // Look for cases where the source of a simple store or the destination // of a simple load is being spilled. Try to use MVC instead. -- cgit v1.1 From 58fc1f52ce070003acbdfedc85d52ba999a2bd11 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Tue, 2 Jul 2013 21:29:06 +0000 Subject: [PowerPC] Remove VK_PPC_TLSGD and VK_PPC_TLSLD The PowerPC-specific modifiers VK_PPC_TLSGD and VK_PPC_TLSLD correspond exactly to the generic modifiers VK_TLSGD and VK_TLSLD. This causes some confusion with the asm parser, since VK_PPC_TLSGD is output as @tlsgd, which is then read back in as VK_TLSGD. To avoid this confusion, this patch removes the PowerPC-specific modifiers and uses the generic modifiers throughout. (The only drawback is that the generic modifiers are printed in upper case while the usual convention on PowerPC is to use lower-case modifiers. But this is just a cosmetic issue.) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185476 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp | 4 ++-- lib/Target/PowerPC/PPCAsmPrinter.cpp | 6 ++---- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 13cd099..76cf43f 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -295,10 +295,10 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case PPC::fixup_ppc_nofixup: switch (Modifier) { default: llvm_unreachable("Unsupported Modifier"); - case MCSymbolRefExpr::VK_PPC_TLSGD: + case MCSymbolRefExpr::VK_TLSGD: Type = ELF::R_PPC64_TLSGD; break; - case MCSymbolRefExpr::VK_PPC_TLSLD: + case MCSymbolRefExpr::VK_TLSLD: Type = ELF::R_PPC64_TLSLD; break; } diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index ca84f2f..1be9dfc 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -573,8 +573,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = Mang->getSymbol(GValue); const MCExpr *SymVar = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD, - OutContext); + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_TLSGD, OutContext); OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLSGD) .addExpr(TlsRef) .addExpr(SymVar)); @@ -625,8 +624,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = Mang->getSymbol(GValue); const MCExpr *SymVar = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD, - OutContext); + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_TLSLD, OutContext); OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLSLD) .addExpr(TlsRef) .addExpr(SymVar)); -- cgit v1.1 From a17a7e1868076a4430cfa16694bcb42884130928 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Tue, 2 Jul 2013 21:31:04 +0000 Subject: [PowerPC] Rework TLS call operand processing As part of the global-dynamic and local-dynamic TLS sequences, we need to use a special form of the call instruction: bl __tls_get_addr(sym@tlsld) bl __tls_get_addr(sym@tlsgd) which generates two fixups. The current implementation of this causes problems with recognizing this form in the asm parser. To fix this, this patch reworks operand processing for this special form by using a single operand to hold both __tls_get_addr and sym@tlsld and defining a print method to output the above form, and an encoding method to generate the two fixups. As a side simplification, the patch replaces the two instruction patterns BL8_NOP_TLSGD and BL8_NOP_TLSLD by a single BL8_NOP_TLS, since the patterns already operate in an identical fashion (whether we have a local-dynamic or global-dynamic symbol is already encoded in the symbol modifier). No change in code generation intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185477 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp | 7 ++++++ lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h | 1 + .../PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp | 26 ++++++++++++---------- lib/Target/PowerPC/PPCAsmPrinter.cpp | 8 +++---- lib/Target/PowerPC/PPCCodeEmitter.cpp | 6 +++++ lib/Target/PowerPC/PPCInstr64Bit.td | 15 +++++++------ 6 files changed, 40 insertions(+), 23 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index a676302..08d7665 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -273,6 +273,13 @@ void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo, printOperand(MI, OpNo+1, O); } +void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + printBranchOperand(MI, OpNo, O); + O << '('; + printOperand(MI, OpNo+1, O); + O << ')'; +} /// stripRegisterPrefix - This method strips the character prefix from a diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index da09810..270c241 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -52,6 +52,7 @@ public: void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printAbsBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printTLSCall(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printcrbitm(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 0657475..021c082 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -60,6 +60,8 @@ public: SmallVectorImpl &Fixups) const; unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups) const; + unsigned getTLSCallEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups) const; unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups) const; @@ -80,7 +82,7 @@ public: unsigned Size = 4; // FIXME: Have Desc.getSize() return the correct value! unsigned Opcode = MI.getOpcode(); if (Opcode == PPC::BL8_NOP || Opcode == PPC::BLA8_NOP || - Opcode == PPC::BL8_NOP_TLSGD || Opcode == PPC::BL8_NOP_TLSLD) + Opcode == PPC::BL8_NOP_TLS) Size = 8; // Output the constant in big endian byte order. @@ -113,17 +115,6 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo, // Add a fixup for the branch target. Fixups.push_back(MCFixup::Create(0, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_br24)); - - // For special TLS calls, add another fixup for the symbol. Apparently - // BL8_NOP, BL8_NOP_TLSGD, and BL8_NOP_TLSLD are sufficiently - // similar that TblGen will not generate a separate case for the latter - // two, so this is the only way to get the extra fixup generated. - unsigned Opcode = MI.getOpcode(); - if (Opcode == PPC::BL8_NOP_TLSGD || Opcode == PPC::BL8_NOP_TLSLD) { - const MCOperand &MO2 = MI.getOperand(OpNo+1); - Fixups.push_back(MCFixup::Create(0, MO2.getExpr(), - (MCFixupKind)PPC::fixup_ppc_nofixup)); - } return 0; } @@ -222,6 +213,17 @@ unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo, return CTX.getRegisterInfo()->getEncodingValue(PPC::X13); } +unsigned PPCMCCodeEmitter::getTLSCallEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups) const { + // For special TLS calls, we need two fixups; one for the branch target + // (__tls_get_addr), which we create via getDirectBrEncoding as usual, + // and one for the TLSGD or TLSLD symbol, which is emitted here. + const MCOperand &MO = MI.getOperand(OpNo+1); + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_nofixup)); + return getDirectBrEncoding(MI, OpNo, Fixups); +} + unsigned PPCMCCodeEmitter:: get_crbitm_encoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups) const { diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 1be9dfc..849c356 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -562,7 +562,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case PPC::GETtlsADDR: { // Transform: %X3 = GETtlsADDR %X3, - // Into: BL8_NOP_TLSGD __tls_get_addr(sym@tlsgd) + // Into: BL8_NOP_TLS __tls_get_addr(sym@tlsgd) assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); StringRef Name = "__tls_get_addr"; @@ -574,7 +574,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbol *MOSymbol = Mang->getSymbol(GValue); const MCExpr *SymVar = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_TLSGD, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLSGD) + OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLS) .addExpr(TlsRef) .addExpr(SymVar)); return; @@ -613,7 +613,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case PPC::GETtlsldADDR: { // Transform: %X3 = GETtlsldADDR %X3, - // Into: BL8_NOP_TLSLD __tls_get_addr(sym@tlsld) + // Into: BL8_NOP_TLS __tls_get_addr(sym@tlsld) assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); StringRef Name = "__tls_get_addr"; @@ -625,7 +625,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbol *MOSymbol = Mang->getSymbol(GValue); const MCExpr *SymVar = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_TLSLD, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLSLD) + OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLS) .addExpr(TlsRef) .addExpr(SymVar)); return; diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp index 382d709..3c7a285 100644 --- a/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -71,6 +71,7 @@ namespace { unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getTLSCallEncoding(const MachineInstr &MI, unsigned OpNo) const; const char *getPassName() const { return "PowerPC Machine Code Emitter"; } @@ -263,6 +264,11 @@ unsigned PPCCodeEmitter::getTLSRegEncoding(const MachineInstr &MI, return 0; } +unsigned PPCCodeEmitter::getTLSCallEncoding(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("TLS not supported on the old JIT."); + return 0; +} unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, const MachineOperand &MO) const { diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index b0386c3..a2130e3 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -40,6 +40,11 @@ def tlsreg : Operand { let EncoderMethod = "getTLSRegEncoding"; } def tlsgd : Operand {} +def tlscall : Operand { + let PrintMethod = "printTLSCall"; + let MIOperandInfo = (ops calltarget:$func, tlsgd:$sym); + let EncoderMethod = "getTLSCallEncoding"; +} //===----------------------------------------------------------------------===// // 64-bit transformation functions. @@ -119,13 +124,9 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { (outs), (ins calltarget:$func), "bl $func\n\tnop", BrB, []>; - def BL8_NOP_TLSGD : IForm_and_DForm_4_zero<18, 0, 1, 24, - (outs), (ins calltarget:$func, tlsgd:$sym), - "bl $func($sym)\n\tnop", BrB, []>; - - def BL8_NOP_TLSLD : IForm_and_DForm_4_zero<18, 0, 1, 24, - (outs), (ins calltarget:$func, tlsgd:$sym), - "bl $func($sym)\n\tnop", BrB, []>; + def BL8_NOP_TLS : IForm_and_DForm_4_zero<18, 0, 1, 24, + (outs), (ins tlscall:$func), + "bl $func\n\tnop", BrB, []>; def BLA8_NOP : IForm_and_DForm_4_zero<18, 1, 1, 24, (outs), (ins abscalltarget:$func), -- cgit v1.1 From 25b9bbae69befa03cc48d4be73b741eff8e523bc Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Tue, 2 Jul 2013 21:31:59 +0000 Subject: [PowerPC] PR16512 - Support TLS call sequences in the asm parser This patch now adds support for recognizing TLS call sequences in the asm parser. This needs a new pattern BL8_TLS, which is like BL8_NOP_TLS except without nop. That pattern is used for the asm parser only. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185478 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 25 +++++++++++++++++++++++-- lib/Target/PowerPC/PPCInstr64Bit.td | 3 +++ 2 files changed, 26 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 7a654ea..4892963 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -940,8 +940,29 @@ ParseOperand(SmallVectorImpl &Operands) { // Push the parsed operand into the list of operands Operands.push_back(Op); - // Check for D-form memory operands - if (getLexer().is(AsmToken::LParen)) { + // Check whether this is a TLS call expression + bool TLSCall = false; + if (const MCSymbolRefExpr *Ref = dyn_cast(EVal)) + TLSCall = Ref->getSymbol().getName() == "__tls_get_addr"; + + if (TLSCall && getLexer().is(AsmToken::LParen)) { + const MCExpr *TLSSym; + + Parser.Lex(); // Eat the '('. + S = Parser.getTok().getLoc(); + if (ParseExpression(TLSSym)) + return Error(S, "invalid TLS call expression"); + if (getLexer().isNot(AsmToken::RParen)) + return Error(Parser.getTok().getLoc(), "missing ')'"); + E = Parser.getTok().getLoc(); + Parser.Lex(); // Eat the ')'. + + Op = PPCOperand::CreateExpr(TLSSym, S, E, isPPC64()); + Operands.push_back(Op); + } + + // Otherwise, check for D-form memory operands + if (!TLSCall && getLexer().is(AsmToken::LParen)) { Parser.Lex(); // Eat the '('. S = Parser.getTok().getLoc(); diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index a2130e3..f3c2892 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -116,6 +116,9 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { def BL8 : IForm<18, 0, 1, (outs), (ins calltarget:$func), "bl $func", BrB, []>; // See Pat patterns below. + def BL8_TLS : IForm<18, 0, 1, (outs), (ins tlscall:$func), + "bl $func", BrB, []>; + def BLA8 : IForm<18, 1, 1, (outs), (ins abscalltarget:$func), "bla $func", BrB, [(PPCcall (i64 imm:$func))]>; } -- cgit v1.1 From 985148ea873db018dbd2b53f066f5817a9b11aad Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Wed, 3 Jul 2013 02:20:49 +0000 Subject: SystemZInstrInfo.cpp: Tweak an assertion. [-Wunused-variable] git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185499 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZInstrInfo.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 4f919e9..e9829d5 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -350,8 +350,8 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, return 0; unsigned OpNum = Ops[0]; - unsigned Reg = MI->getOperand(OpNum).getReg(); - assert(Size == MF.getRegInfo().getRegClass(Reg)->getSize() && + assert(Size == MF.getRegInfo() + .getRegClass(MI->getOperand(OpNum).getReg())->getSize() && "Invalid size combination"); // Look for cases where the source of a simple store or the destination -- cgit v1.1 From f0f85eab0469ac93f9bb6c7d19aca2c35868d83a Mon Sep 17 00:00:00 2001 From: Michael Gottesman Date: Wed, 3 Jul 2013 04:00:51 +0000 Subject: Added posix function gettimeofday to LibFunc::Func for all platforms but Windows. *NOTE* In a recent version of posix, they added the restrict keyword to the arguments for this function. From some spelunking it seems that on some platforms, the call has restrict on its arguments and others it does not. Thus I left off the restrict keyword from the function prototype in the comment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185501 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/TargetLibraryInfo.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp index d2967d9..99fff59 100644 --- a/lib/Target/TargetLibraryInfo.cpp +++ b/lib/Target/TargetLibraryInfo.cpp @@ -168,6 +168,7 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "getlogin_r", "getpwnam", "gets", + "gettimeofday", "htonl", "htons", "iprintf", @@ -490,6 +491,7 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T, TLI.setUnavailable(LibFunc::getitimer); TLI.setUnavailable(LibFunc::getlogin_r); TLI.setUnavailable(LibFunc::getpwnam); + TLI.setUnavailable(LibFunc::gettimeofday); TLI.setUnavailable(LibFunc::htonl); TLI.setUnavailable(LibFunc::htons); TLI.setUnavailable(LibFunc::lchown); -- cgit v1.1 From 79186beb28b94c587714cc4b854389348a869b82 Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Wed, 3 Jul 2013 07:48:50 +0000 Subject: [XCore] Add ISel pattern for LDWCP Patch by Robert Lytton. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185518 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCoreInstrInfo.td | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index be152ae..529fa13 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -279,12 +279,6 @@ multiclass FRU6_LRU6_backwards_branch opc, string OpcStr> { !strconcat(OpcStr, " $a, $b"), []>; } -multiclass FRU6_LRU6_cp opc, string OpcStr> { - def _ru6: _FRU6; - def _lru6: _FLRU6; -} // U6 multiclass FU6_LU6 opc, string OpcStr, SDNode OpNode> { @@ -539,8 +533,13 @@ def STWDP_lru6 : _FLRU6<0b010100, (outs), (ins RRegs:$a, i32imm:$b), [(store RRegs:$a, (dprelwrapper tglobaladdr:$b))]>; //let Uses = [CP] in .. -let mayLoad = 1, isReMaterializable = 1, neverHasSideEffects = 1 in -defm LDWCP : FRU6_LRU6_cp<0b011011, "ldw">; +let mayLoad = 1, isReMaterializable = 1, neverHasSideEffects = 1 in { +def LDWCP_ru6 : _FRU6<0b011011, (outs RRegs:$a), (ins i32imm:$b), + "ldw $a, cp[$b]", []>; +def LDWCP_lru6: _FLRU6<0b011011, (outs RRegs:$a), (ins i32imm:$b), + "ldw $a, cp[$b]", + [(set RRegs:$a, (load (cprelwrapper tglobaladdr:$b)))]>; +} let Uses = [SP] in { let mayStore=1 in { -- cgit v1.1 From 9e333ca4457f2665c50229db967ec53c38abcb9b Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Wed, 3 Jul 2013 07:49:03 +0000 Subject: [XCore] Whitespace fixes, no functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185519 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCoreInstrInfo.td | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 529fa13..81fa84d 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -535,10 +535,10 @@ def STWDP_lru6 : _FLRU6<0b010100, (outs), (ins RRegs:$a, i32imm:$b), //let Uses = [CP] in .. let mayLoad = 1, isReMaterializable = 1, neverHasSideEffects = 1 in { def LDWCP_ru6 : _FRU6<0b011011, (outs RRegs:$a), (ins i32imm:$b), - "ldw $a, cp[$b]", []>; + "ldw $a, cp[$b]", []>; def LDWCP_lru6: _FLRU6<0b011011, (outs RRegs:$a), (ins i32imm:$b), - "ldw $a, cp[$b]", - [(set RRegs:$a, (load (cprelwrapper tglobaladdr:$b)))]>; + "ldw $a, cp[$b]", + [(set RRegs:$a, (load (cprelwrapper tglobaladdr:$b)))]>; } let Uses = [SP] in { -- cgit v1.1 From 52c28b07f039da61a20f21275793c8d3b9b97fe9 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 3 Jul 2013 09:11:00 +0000 Subject: [SystemZ] Fix caller-allocated save slot FIXME Get rid of some old code (and associated FIXME) for handling the caller-allocated register save area. No behavioural change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185525 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZFrameLowering.cpp | 42 ++++++++++--------------- lib/Target/SystemZ/SystemZFrameLowering.h | 14 ++------- lib/Target/SystemZ/SystemZMachineFunctionInfo.h | 11 ++----- 3 files changed, 21 insertions(+), 46 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp index 43f1e47..cd80d4e 100644 --- a/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -18,15 +18,10 @@ using namespace llvm; -SystemZFrameLowering::SystemZFrameLowering(const SystemZTargetMachine &tm, - const SystemZSubtarget &sti) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, - -SystemZMC::CallFrameSize), - TM(tm), - STI(sti) { +namespace { // The ABI-defined register save slots, relative to the incoming stack // pointer. - static const unsigned SpillOffsetTable[][2] = { + static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = { { SystemZ::R2D, 0x10 }, { SystemZ::R3D, 0x18 }, { SystemZ::R4D, 0x20 }, @@ -46,11 +41,23 @@ SystemZFrameLowering::SystemZFrameLowering(const SystemZTargetMachine &tm, { SystemZ::F4D, 0x90 }, { SystemZ::F6D, 0x98 } }; +} +SystemZFrameLowering::SystemZFrameLowering(const SystemZTargetMachine &tm, + const SystemZSubtarget &sti) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, + -SystemZMC::CallFrameSize, 8), + TM(tm), STI(sti) { // Create a mapping from register number to save slot offset. RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS); for (unsigned I = 0, E = array_lengthof(SpillOffsetTable); I != E; ++I) - RegSpillOffsets[SpillOffsetTable[I][0]] = SpillOffsetTable[I][1]; + RegSpillOffsets[SpillOffsetTable[I].Reg] = SpillOffsetTable[I].Offset; +} + +const TargetFrameLowering::SpillSlot * +SystemZFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const { + NumEntries = array_lengthof(SpillOffsetTable); + return SpillOffsetTable; } void SystemZFrameLowering:: @@ -127,14 +134,12 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); // Scan the call-saved GPRs and find the bounds of the register spill area. - unsigned SavedGPRFrameSize = 0; unsigned LowGPR = 0; unsigned HighGPR = SystemZ::R15D; unsigned StartOffset = -1U; for (unsigned I = 0, E = CSI.size(); I != E; ++I) { unsigned Reg = CSI[I].getReg(); if (SystemZ::GR64BitRegClass.contains(Reg)) { - SavedGPRFrameSize += 8; unsigned Offset = RegSpillOffsets[Reg]; assert(Offset && "Unexpected GPR save"); if (StartOffset > Offset) { @@ -144,9 +149,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, } } - // Save information about the range and location of the call-saved - // registers, for use by the epilogue inserter. - ZFI->setSavedGPRFrameSize(SavedGPRFrameSize); + // Save the range of call-saved registers, for use by the epilogue inserter. ZFI->setLowSavedGPR(LowGPR); ZFI->setHighSavedGPR(HighGPR); @@ -449,11 +452,6 @@ int SystemZFrameLowering::getFrameIndexOffset(const MachineFunction &MF, // offset is therefore negative. int64_t Offset = (MFFrame->getObjectOffset(FI) + MFFrame->getOffsetAdjustment()); - if (FI >= 0) - // Non-fixed objects are allocated below the incoming stack pointer. - // Account for the space at the top of the frame that we choose not - // to allocate. - Offset += getUnallocatedTopBytes(MF); // Make the offset relative to the incoming stack pointer. Offset -= getOffsetOfLocalArea(); @@ -465,20 +463,12 @@ int SystemZFrameLowering::getFrameIndexOffset(const MachineFunction &MF, } uint64_t SystemZFrameLowering:: -getUnallocatedTopBytes(const MachineFunction &MF) const { - return MF.getInfo()->getSavedGPRFrameSize(); -} - -uint64_t SystemZFrameLowering:: getAllocatedStackSize(const MachineFunction &MF) const { const MachineFrameInfo *MFFrame = MF.getFrameInfo(); // Start with the size of the local variables and spill slots. uint64_t StackSize = MFFrame->getStackSize(); - // Remove any bytes that we choose not to allocate. - StackSize -= getUnallocatedTopBytes(MF); - // Include space for an emergency spill slot, if one might be needed. StackSize += getEmergencySpillSlotSize(MF); diff --git a/lib/Target/SystemZ/SystemZFrameLowering.h b/lib/Target/SystemZ/SystemZFrameLowering.h index 5ca049c..08321e0 100644 --- a/lib/Target/SystemZ/SystemZFrameLowering.h +++ b/lib/Target/SystemZ/SystemZFrameLowering.h @@ -29,7 +29,9 @@ public: SystemZFrameLowering(const SystemZTargetMachine &tm, const SystemZSubtarget &sti); - // Override FrameLowering. + // Override TargetFrameLowering. + virtual const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const + LLVM_OVERRIDE; virtual void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const LLVM_OVERRIDE; @@ -59,16 +61,6 @@ public: MachineBasicBlock::iterator MI) const LLVM_OVERRIDE; - // The target-independent code automatically allocates save slots for - // call-saved GPRs. However, we don't need those slots for SystemZ, - // because the ABI sets aside GPR save slots in the caller-allocated part - // of the frame. Since the target-independent code puts this unneeded - // area at the top of the callee-allocated part of frame, we choose not - // to allocate it and adjust the offsets accordingly. Return the - // size of this unallocated area. - // FIXME: seems a bit hackish. - uint64_t getUnallocatedTopBytes(const MachineFunction &MF) const; - // Return the number of bytes in the callee-allocated part of the frame. uint64_t getAllocatedStackSize(const MachineFunction &MF) const; diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h index 1dc05a7..69c2691 100644 --- a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h +++ b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h @@ -15,7 +15,6 @@ namespace llvm { class SystemZMachineFunctionInfo : public MachineFunctionInfo { - unsigned SavedGPRFrameSize; unsigned LowSavedGPR; unsigned HighSavedGPR; unsigned VarArgsFirstGPR; @@ -26,14 +25,8 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo { public: explicit SystemZMachineFunctionInfo(MachineFunction &MF) - : SavedGPRFrameSize(0), LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0), - VarArgsFirstFPR(0), VarArgsFrameIndex(0), RegSaveFrameIndex(0), - ManipulatesSP(false) {} - - // Get and set the number of bytes allocated by generic code to store - // call-saved GPRs. - unsigned getSavedGPRFrameSize() const { return SavedGPRFrameSize; } - void setSavedGPRFrameSize(unsigned bytes) { SavedGPRFrameSize = bytes; } + : LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0), VarArgsFirstFPR(0), + VarArgsFrameIndex(0), RegSaveFrameIndex(0), ManipulatesSP(false) {} // Get and set the first call-saved GPR that should be saved and restored // by this function. This is 0 if no GPRs need to be saved or restored. -- cgit v1.1 From b997b56383a99f739d7e2aa14e6945fea477e597 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 3 Jul 2013 09:19:58 +0000 Subject: [SystemZ] Rename mapping table fields Rename Function->DispKey and PairType->DispSize. I'd originally used "Function" because I thought it might be useful for other InstMappings. However, it turns out that having two very similar instructions with the same Function makes it pretty useless for anything other than the displacement size key. Other InstMappings will want the key to be defined for only one instruction in the pair. No behavioural change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185526 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZInstrFormats.td | 68 +++++++++++++++---------------- lib/Target/SystemZ/SystemZInstrInfo.td | 6 +-- 2 files changed, 37 insertions(+), 37 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index 58110ec..d720fee 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -21,12 +21,12 @@ class InstSystemZ opcode, SDPatternOperator operator, multiclass StoreRXPair rxOpcode, bits<16> rxyOpcode, SDPatternOperator operator, RegisterOperand cls> { - let Function = mnemonic ## #cls in { - let PairType = "12" in + let DispKey = mnemonic ## #cls in { + let DispSize = "12" in def "" : StoreRX; - let PairType = "20" in + let DispSize = "20" in def Y : StoreRXY; } } @@ -549,10 +549,10 @@ class StoreSIL opcode, SDPatternOperator operator, multiclass StoreSIPair siOpcode, bits<16> siyOpcode, SDPatternOperator operator, Immediate imm> { - let Function = mnemonic in { - let PairType = "12" in + let DispKey = mnemonic in { + let DispSize = "12" in def "" : StoreSI; - let PairType = "20" in + let DispSize = "20" in def Y : StoreSIY; } } @@ -624,10 +624,10 @@ class UnaryRXY opcode, SDPatternOperator operator, multiclass UnaryRXPair rxOpcode, bits<16> rxyOpcode, SDPatternOperator operator, RegisterOperand cls> { - let Function = mnemonic ## #cls in { - let PairType = "12" in + let DispKey = mnemonic ## #cls in { + let DispSize = "12" in def "" : UnaryRX; - let PairType = "20" in + let DispSize = "20" in def Y : UnaryRXY; } } @@ -710,10 +710,10 @@ class BinaryRXY opcode, SDPatternOperator operator, multiclass BinaryRXPair rxOpcode, bits<16> rxyOpcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load> { - let Function = mnemonic ## #cls in { - let PairType = "12" in + let DispKey = mnemonic ## #cls in { + let DispSize = "12" in def "" : BinaryRX; - let PairType = "20" in + let DispSize = "20" in def Y : BinaryRXY; } @@ -740,10 +740,10 @@ class BinarySIY opcode, SDPatternOperator operator, multiclass BinarySIPair siOpcode, bits<16> siyOpcode, SDPatternOperator operator, Operand imm> { - let Function = mnemonic ## #cls in { - let PairType = "12" in + let DispKey = mnemonic ## #cls in { + let DispSize = "12" in def "" : BinarySI; - let PairType = "20" in + let DispSize = "20" in def Y : BinarySIY; } } @@ -829,11 +829,11 @@ class CompareRXY opcode, SDPatternOperator operator, multiclass CompareRXPair rxOpcode, bits<16> rxyOpcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load> { - let Function = mnemonic ## #cls in { - let PairType = "12" in + let DispKey = mnemonic ## #cls in { + let DispSize = "12" in def "" : CompareRX; - let PairType = "20" in + let DispSize = "20" in def Y : CompareRXY; } @@ -868,10 +868,10 @@ class CompareSIY opcode, SDPatternOperator operator, multiclass CompareSIPair siOpcode, bits<16> siyOpcode, SDPatternOperator operator, SDPatternOperator load, Immediate imm> { - let Function = mnemonic in { - let PairType = "12" in + let DispKey = mnemonic in { + let DispSize = "12" in def "" : CompareSI; - let PairType = "20" in + let DispSize = "20" in def Y : CompareSIY; } @@ -922,10 +922,10 @@ class CmpSwapRSY opcode, SDPatternOperator operator, multiclass CmpSwapRSPair rsOpcode, bits<16> rsyOpcode, SDPatternOperator operator, RegisterOperand cls> { - let Function = mnemonic ## #cls in { - let PairType = "12" in + let DispKey = mnemonic ## #cls in { + let DispSize = "12" in def "" : CmpSwapRS; - let PairType = "20" in + let DispSize = "20" in def Y : CmpSwapRSY; } } diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 44be5da..44b28fd 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -424,12 +424,12 @@ def STRVG : StoreRXY<"strvg", 0xE32F, storeu, GR64>; // Load BDX-style addresses. let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isReMaterializable = 1, - Function = "la" in { - let PairType = "12" in + DispKey = "la" in { + let DispSize = "12" in def LA : InstRX<0x41, (outs GR64:$R1), (ins laaddr12pair:$XBD2), "la\t$R1, $XBD2", [(set GR64:$R1, laaddr12pair:$XBD2)]>; - let PairType = "20" in + let DispSize = "20" in def LAY : InstRXY<0xE371, (outs GR64:$R1), (ins laaddr20pair:$XBD2), "lay\t$R1, $XBD2", [(set GR64:$R1, laaddr20pair:$XBD2)]>; -- cgit v1.1 From a10c01a6c62792be825c562314a646437b21bfec Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Wed, 3 Jul 2013 09:20:36 +0000 Subject: ARM: relax the atomic release barrier to "dmb ishst" on Swift Swift cores implement store barriers that are stronger than the ARM specification but weaker than general barriers. They are, in fact, just about enough to provide the ordering needed for atomic operations with release semantics. This patch makes use of that quirk. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185527 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index ff8571b..cc09754 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -2557,8 +2557,18 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, DAG.getConstant(0, MVT::i32)); } + ConstantSDNode *OrdN = cast(Op.getOperand(1)); + AtomicOrdering Ord = static_cast(OrdN->getZExtValue()); + unsigned Domain = ARM_MB::ISH; + if (Subtarget->isSwift() && Ord == Release) { + // Swift happens to implement ISHST barriers in a way that's compatible with + // Release semantics but weaker than ISH so we'd be fools not to use + // it. Beware: other processors probably don't! + Domain = ARM_MB::ISHST; + } + return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), - DAG.getConstant(ARM_MB::ISH, MVT::i32)); + DAG.getConstant(Domain, MVT::i32)); } static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, -- cgit v1.1 From b81b477cd4392a51112c3af0659ea9fc176e74f1 Mon Sep 17 00:00:00 2001 From: Mihai Popa Date: Wed, 3 Jul 2013 09:21:44 +0000 Subject: This corrects the implementation of Thumb ADR instruction. There are three issues: 1. it should accept only 4-byte aligned addresses 2. the maximum offset should be 1020 3. it should be encoded with the offset scaled by two bits git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185528 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrInfo.td | 2 +- lib/Target/ARM/ARMInstrThumb.td | 21 ++++++++++++++++----- lib/Target/ARM/ARMInstrThumb2.td | 2 +- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 25 +++++++++++++++++++++++++ lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp | 3 ++- lib/Target/ARM/InstPrinter/ARMInstPrinter.h | 1 + 6 files changed, 46 insertions(+), 8 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 2492c4e..4d550ee 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -458,7 +458,7 @@ def AdrLabelAsmOperand : AsmOperandClass { let Name = "AdrLabel"; } def adrlabel : Operand { let EncoderMethod = "getAdrLabelOpValue"; let ParserMatchClass = AdrLabelAsmOperand; - let PrintMethod = "printAdrLabelOperand"; + let PrintMethod = "printAdrLabelOperand<0>"; } def neon_vcvt_imm32 : Operand { diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 1fff41d..a0edaba 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -69,11 +69,6 @@ def thumb_immshifted_shamt : SDNodeXFormgetTargetConstant(V, MVT::i32); }]>; -// ADR instruction labels. -def t_adrlabel : Operand { - let EncoderMethod = "getThumbAdrLabelOpValue"; -} - // Scaled 4 immediate. def t_imm0_1020s4_asmoperand: AsmOperandClass { let Name = "Imm0_1020s4"; } def t_imm0_1020s4 : Operand { @@ -97,12 +92,27 @@ def t_imm0_508s4_neg : Operand { // Define Thumb specific addressing modes. +// unsigned 8-bit, 2-scaled memory offset +class OperandUnsignedOffset_b8s2 : AsmOperandClass { + let Name = "UnsignedOffset_b8s2"; + let PredicateMethod = "isUnsignedOffset<8, 2>"; +} + +def UnsignedOffset_b8s2 : OperandUnsignedOffset_b8s2; + let OperandType = "OPERAND_PCREL" in { def t_brtarget : Operand { let EncoderMethod = "getThumbBRTargetOpValue"; let DecoderMethod = "DecodeThumbBROperand"; } +// ADR instruction labels. +def t_adrlabel : Operand { + let EncoderMethod = "getThumbAdrLabelOpValue"; + let PrintMethod = "printAdrLabelOperand<2>"; + let ParserMatchClass = UnsignedOffset_b8s2; +} + def t_bcctarget : Operand { let EncoderMethod = "getThumbBCCTargetOpValue"; let DecoderMethod = "DecodeThumbBCCTargetOperand"; @@ -505,6 +515,7 @@ let isBranch = 1, isTerminator = 1 in let Inst{7-0} = target; } + // Tail calls let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { // IOS versions. diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index fa87fb9..d71824e 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -173,7 +173,7 @@ def t2ldr_pcrel_imm12 : Operand { // ADR instruction labels. def t2adrlabel : Operand { let EncoderMethod = "getT2AdrLabelOpValue"; - let PrintMethod = "printAdrLabelOperand"; + let PrintMethod = "printAdrLabelOperand<0>"; } // t2addrmode_posimm8 := reg + imm8 diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 8595ce3..c270ed0 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -629,6 +629,20 @@ public: bool isITMask() const { return Kind == k_ITCondMask; } bool isITCondCode() const { return Kind == k_CondCode; } bool isImm() const { return Kind == k_Immediate; } + // checks whether this operand is an unsigned offset which fits is a field + // of specified width and scaled by a specific number of bits + template + bool isUnsignedOffset() const { + if (!isImm()) return false; + if (dyn_cast(Imm.Val)) return true; + if (const MCConstantExpr *CE = dyn_cast(Imm.Val)) { + int64_t Val = CE->getValue(); + int64_t Align = 1LL << scale; + int64_t Max = Align * ((1LL << width) - 1); + return ((Val % Align) == 0) && (Val >= 0) && (Val <= Max); + } + return false; + } bool isFPImm() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast(getImm()); @@ -1707,6 +1721,17 @@ public: Inst.addOperand(MCOperand::CreateImm(-CE->getValue())); } + void addUnsignedOffset_b8s2Operands(MCInst &Inst, unsigned N) const { + if(const MCConstantExpr *CE = dyn_cast(getImm())) { + Inst.addOperand(MCOperand::CreateImm(CE->getValue() >> 2)); + return; + } + + const MCSymbolRefExpr *SR = dyn_cast(Imm.Val); + assert(SR && "Unknown value type!"); + Inst.addOperand(MCOperand::CreateExpr(SR)); + } + void addARMSOImmNotOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The operand is actually a so_imm, but we have its bitwise diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 1797c6c..97da232 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -900,6 +900,7 @@ void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum, llvm_unreachable("Unhandled PC-relative pseudo-instruction!"); } +template void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); @@ -909,7 +910,7 @@ void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum, return; } - int32_t OffImm = (int32_t)MO.getImm(); + int32_t OffImm = (int32_t)MO.getImm() << scale; O << markup(" void printAdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printThumbSRImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); -- cgit v1.1 From fa487e83a83c260d6a50f3df00a0eb012553a912 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 3 Jul 2013 10:10:02 +0000 Subject: [SystemZ] Fold more spills Add a mapping from register-based R instructions to the corresponding memory-based . Use it to cut down on the number of spill loads. Some instructions extend their operands from smaller fields, so this required a new TSFlags field to say how big the unextended operand is. This optimisation doesn't trigger for C(G)R and CL(G)R because in practice we always combine those instructions with a branch. Adding a test for every other case probably seems excessive, but it did catch a missed optimisation for DSGF (fixed in r185435). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185529 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZInstrFP.td | 182 +++++++++++----------- lib/Target/SystemZ/SystemZInstrFormats.td | 165 +++++++++++++++----- lib/Target/SystemZ/SystemZInstrInfo.cpp | 24 +++ lib/Target/SystemZ/SystemZInstrInfo.h | 8 +- lib/Target/SystemZ/SystemZInstrInfo.td | 241 +++++++++++++++--------------- 5 files changed, 369 insertions(+), 251 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td index 7499d2f..4317306 100644 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -27,26 +27,26 @@ defm CondStoreF64 : CondStores; - def LZDR : InherentRRE<"lzdr", 0xB375, FP64, (fpimm0)>; - def LZXR : InherentRRE<"lzxr", 0xB376, FP128, (fpimm0)>; + def LZER : InherentRRE<"lze", 0xB374, FP32, (fpimm0)>; + def LZDR : InherentRRE<"lzd", 0xB375, FP64, (fpimm0)>; + def LZXR : InherentRRE<"lzx", 0xB376, FP128, (fpimm0)>; } // Moves between two floating-point registers. let neverHasSideEffects = 1 in { - def LER : UnaryRR <"ler", 0x38, null_frag, FP32, FP32>; - def LDR : UnaryRR <"ldr", 0x28, null_frag, FP64, FP64>; - def LXR : UnaryRRE<"lxr", 0xB365, null_frag, FP128, FP128>; + def LER : UnaryRR <"le", 0x38, null_frag, FP32, FP32>; + def LDR : UnaryRR <"ld", 0x28, null_frag, FP64, FP64>; + def LXR : UnaryRRE<"lx", 0xB365, null_frag, FP128, FP128>; } // Moves between 64-bit integer and floating-point registers. -def LGDR : UnaryRRE<"lgdr", 0xB3CD, bitconvert, GR64, FP64>; -def LDGR : UnaryRRE<"ldgr", 0xB3C1, bitconvert, FP64, GR64>; +def LGDR : UnaryRRE<"lgd", 0xB3CD, bitconvert, GR64, FP64>; +def LDGR : UnaryRRE<"ldg", 0xB3C1, bitconvert, FP64, GR64>; // fcopysign with an FP32 result. let isCodeGenOnly = 1 in { - def CPSDRss : BinaryRRF<"cpsdr", 0xB372, fcopysign, FP32, FP32>; - def CPSDRsd : BinaryRRF<"cpsdr", 0xB372, fcopysign, FP32, FP64>; + def CPSDRss : BinaryRRF<"cpsd", 0xB372, fcopysign, FP32, FP32>; + def CPSDRsd : BinaryRRF<"cpsd", 0xB372, fcopysign, FP32, FP64>; } // The sign of an FP128 is in the high register. @@ -55,8 +55,8 @@ def : Pat<(fcopysign FP32:$src1, FP128:$src2), // fcopysign with an FP64 result. let isCodeGenOnly = 1 in - def CPSDRds : BinaryRRF<"cpsdr", 0xB372, fcopysign, FP64, FP32>; -def CPSDRdd : BinaryRRF<"cpsdr", 0xB372, fcopysign, FP64, FP64>; + def CPSDRds : BinaryRRF<"cpsd", 0xB372, fcopysign, FP64, FP32>; +def CPSDRdd : BinaryRRF<"cpsd", 0xB372, fcopysign, FP64, FP64>; // The sign of an FP128 is in the high register. def : Pat<(fcopysign FP64:$src1, FP128:$src2), @@ -80,8 +80,8 @@ def : CopySign128; - defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64>; + defm LE : UnaryRXPair<"le", 0x78, 0xED64, load, FP32, 4>; + defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64, 8>; // These instructions are split after register allocation, so we don't // want a custom inserter. @@ -96,8 +96,8 @@ let canFoldAsLoad = 1, SimpleBDXLoad = 1 in { //===----------------------------------------------------------------------===// let SimpleBDXStore = 1 in { - defm STE : StoreRXPair<"ste", 0x70, 0xED66, store, FP32>; - defm STD : StoreRXPair<"std", 0x60, 0xED67, store, FP64>; + defm STE : StoreRXPair<"ste", 0x70, 0xED66, store, FP32, 4>; + defm STD : StoreRXPair<"std", 0x60, 0xED67, store, FP64, 8>; // These instructions are split after register allocation, so we don't // want a custom inserter. @@ -114,9 +114,9 @@ let SimpleBDXStore = 1 in { // Convert floating-point values to narrower representations, rounding // according to the current mode. The destination of LEXBR and LDXBR // is a 128-bit value, but only the first register of the pair is used. -def LEDBR : UnaryRRE<"ledbr", 0xB344, fround, FP32, FP64>; -def LEXBR : UnaryRRE<"lexbr", 0xB346, null_frag, FP128, FP128>; -def LDXBR : UnaryRRE<"ldxbr", 0xB345, null_frag, FP128, FP128>; +def LEDBR : UnaryRRE<"ledb", 0xB344, fround, FP32, FP64>; +def LEXBR : UnaryRRE<"lexb", 0xB346, null_frag, FP128, FP128>; +def LDXBR : UnaryRRE<"ldxb", 0xB345, null_frag, FP128, FP128>; def : Pat<(f32 (fround FP128:$src)), (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_32bit)>; @@ -124,36 +124,36 @@ def : Pat<(f64 (fround FP128:$src)), (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_high)>; // Extend register floating-point values to wider representations. -def LDEBR : UnaryRRE<"ldebr", 0xB304, fextend, FP64, FP32>; -def LXEBR : UnaryRRE<"lxebr", 0xB306, fextend, FP128, FP32>; -def LXDBR : UnaryRRE<"lxdbr", 0xB305, fextend, FP128, FP64>; +def LDEBR : UnaryRRE<"ldeb", 0xB304, fextend, FP64, FP32>; +def LXEBR : UnaryRRE<"lxeb", 0xB306, fextend, FP128, FP32>; +def LXDBR : UnaryRRE<"lxdb", 0xB305, fextend, FP128, FP64>; // Extend memory floating-point values to wider representations. -def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64>; -def LXEB : UnaryRXE<"lxeb", 0xED06, extloadf32, FP128>; -def LXDB : UnaryRXE<"lxdb", 0xED05, extloadf64, FP128>; +def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64, 4>; +def LXEB : UnaryRXE<"lxeb", 0xED06, extloadf32, FP128, 4>; +def LXDB : UnaryRXE<"lxdb", 0xED05, extloadf64, FP128, 8>; // Convert a signed integer register value to a floating-point one. let Defs = [CC] in { - def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32, GR32>; - def CDFBR : UnaryRRE<"cdfbr", 0xB395, sint_to_fp, FP64, GR32>; - def CXFBR : UnaryRRE<"cxfbr", 0xB396, sint_to_fp, FP128, GR32>; + def CEFBR : UnaryRRE<"cefb", 0xB394, sint_to_fp, FP32, GR32>; + def CDFBR : UnaryRRE<"cdfb", 0xB395, sint_to_fp, FP64, GR32>; + def CXFBR : UnaryRRE<"cxfb", 0xB396, sint_to_fp, FP128, GR32>; - def CEGBR : UnaryRRE<"cegbr", 0xB3A4, sint_to_fp, FP32, GR64>; - def CDGBR : UnaryRRE<"cdgbr", 0xB3A5, sint_to_fp, FP64, GR64>; - def CXGBR : UnaryRRE<"cxgbr", 0xB3A6, sint_to_fp, FP128, GR64>; + def CEGBR : UnaryRRE<"cegb", 0xB3A4, sint_to_fp, FP32, GR64>; + def CDGBR : UnaryRRE<"cdgb", 0xB3A5, sint_to_fp, FP64, GR64>; + def CXGBR : UnaryRRE<"cxgb", 0xB3A6, sint_to_fp, FP128, GR64>; } // Convert a floating-point register value to a signed integer value, // with the second operand (modifier M3) specifying the rounding mode. let Defs = [CC] in { - def CFEBR : UnaryRRF<"cfebr", 0xB398, GR32, FP32>; - def CFDBR : UnaryRRF<"cfdbr", 0xB399, GR32, FP64>; - def CFXBR : UnaryRRF<"cfxbr", 0xB39A, GR32, FP128>; + def CFEBR : UnaryRRF<"cfeb", 0xB398, GR32, FP32>; + def CFDBR : UnaryRRF<"cfdb", 0xB399, GR32, FP64>; + def CFXBR : UnaryRRF<"cfxb", 0xB39A, GR32, FP128>; - def CGEBR : UnaryRRF<"cgebr", 0xB3A8, GR64, FP32>; - def CGDBR : UnaryRRF<"cgdbr", 0xB3A9, GR64, FP64>; - def CGXBR : UnaryRRF<"cgxbr", 0xB3AA, GR64, FP128>; + def CGEBR : UnaryRRF<"cgeb", 0xB3A8, GR64, FP32>; + def CGDBR : UnaryRRF<"cgdb", 0xB3A9, GR64, FP64>; + def CGXBR : UnaryRRF<"cgxb", 0xB3AA, GR64, FP128>; } // fp_to_sint always rounds towards zero, which is modifier value 5. @@ -171,32 +171,32 @@ def : Pat<(i64 (fp_to_sint FP128:$src)), (CGXBR 5, FP128:$src)>; // Negation (Load Complement). let Defs = [CC] in { - def LCEBR : UnaryRRE<"lcebr", 0xB303, fneg, FP32, FP32>; - def LCDBR : UnaryRRE<"lcdbr", 0xB313, fneg, FP64, FP64>; - def LCXBR : UnaryRRE<"lcxbr", 0xB343, fneg, FP128, FP128>; + def LCEBR : UnaryRRE<"lceb", 0xB303, fneg, FP32, FP32>; + def LCDBR : UnaryRRE<"lcdb", 0xB313, fneg, FP64, FP64>; + def LCXBR : UnaryRRE<"lcxb", 0xB343, fneg, FP128, FP128>; } // Absolute value (Load Positive). let Defs = [CC] in { - def LPEBR : UnaryRRE<"lpebr", 0xB300, fabs, FP32, FP32>; - def LPDBR : UnaryRRE<"lpdbr", 0xB310, fabs, FP64, FP64>; - def LPXBR : UnaryRRE<"lpxbr", 0xB340, fabs, FP128, FP128>; + def LPEBR : UnaryRRE<"lpeb", 0xB300, fabs, FP32, FP32>; + def LPDBR : UnaryRRE<"lpdb", 0xB310, fabs, FP64, FP64>; + def LPXBR : UnaryRRE<"lpxb", 0xB340, fabs, FP128, FP128>; } // Negative absolute value (Load Negative). let Defs = [CC] in { - def LNEBR : UnaryRRE<"lnebr", 0xB301, fnabs, FP32, FP32>; - def LNDBR : UnaryRRE<"lndbr", 0xB311, fnabs, FP64, FP64>; - def LNXBR : UnaryRRE<"lnxbr", 0xB341, fnabs, FP128, FP128>; + def LNEBR : UnaryRRE<"lneb", 0xB301, fnabs, FP32, FP32>; + def LNDBR : UnaryRRE<"lndb", 0xB311, fnabs, FP64, FP64>; + def LNXBR : UnaryRRE<"lnxb", 0xB341, fnabs, FP128, FP128>; } // Square root. -def SQEBR : UnaryRRE<"sqebr", 0xB314, fsqrt, FP32, FP32>; -def SQDBR : UnaryRRE<"sqdbr", 0xB315, fsqrt, FP64, FP64>; -def SQXBR : UnaryRRE<"sqxbr", 0xB316, fsqrt, FP128, FP128>; +def SQEBR : UnaryRRE<"sqeb", 0xB314, fsqrt, FP32, FP32>; +def SQDBR : UnaryRRE<"sqdb", 0xB315, fsqrt, FP64, FP64>; +def SQXBR : UnaryRRE<"sqxb", 0xB316, fsqrt, FP128, FP128>; -def SQEB : UnaryRXE<"sqeb", 0xED14, loadu, FP32>; -def SQDB : UnaryRXE<"sqdb", 0xED15, loadu, FP64>; +def SQEB : UnaryRXE<"sqeb", 0xED14, loadu, FP32, 4>; +def SQDB : UnaryRXE<"sqdb", 0xED15, loadu, FP64, 8>; // Round to an integer, with the second operand (modifier M3) specifying // the rounding mode. @@ -205,9 +205,9 @@ def SQDB : UnaryRXE<"sqdb", 0xED15, loadu, FP64>; // that allow this to suppressed (as for fnearbyint), but we don't yet // support -march=z196. let Defs = [CC] in { - def FIEBR : UnaryRRF<"fiebr", 0xB357, FP32, FP32>; - def FIDBR : UnaryRRF<"fidbr", 0xB35F, FP64, FP64>; - def FIXBR : UnaryRRF<"fixbr", 0xB347, FP128, FP128>; + def FIEBR : UnaryRRF<"fieb", 0xB357, FP32, FP32>; + def FIDBR : UnaryRRF<"fidb", 0xB35F, FP64, FP64>; + def FIXBR : UnaryRRF<"fixb", 0xB347, FP128, FP128>; } // frint rounds according to the current mode (modifier 0) and detects @@ -223,92 +223,92 @@ def : Pat<(frint FP128:$src), (FIXBR 0, FP128:$src)>; // Addition. let Defs = [CC] in { let isCommutable = 1 in { - def AEBR : BinaryRRE<"aebr", 0xB30A, fadd, FP32, FP32>; - def ADBR : BinaryRRE<"adbr", 0xB31A, fadd, FP64, FP64>; - def AXBR : BinaryRRE<"axbr", 0xB34A, fadd, FP128, FP128>; + def AEBR : BinaryRRE<"aeb", 0xB30A, fadd, FP32, FP32>; + def ADBR : BinaryRRE<"adb", 0xB31A, fadd, FP64, FP64>; + def AXBR : BinaryRRE<"axb", 0xB34A, fadd, FP128, FP128>; } - def AEB : BinaryRXE<"aeb", 0xED0A, fadd, FP32, load>; - def ADB : BinaryRXE<"adb", 0xED1A, fadd, FP64, load>; + def AEB : BinaryRXE<"aeb", 0xED0A, fadd, FP32, load, 4>; + def ADB : BinaryRXE<"adb", 0xED1A, fadd, FP64, load, 8>; } // Subtraction. let Defs = [CC] in { - def SEBR : BinaryRRE<"sebr", 0xB30B, fsub, FP32, FP32>; - def SDBR : BinaryRRE<"sdbr", 0xB31B, fsub, FP64, FP64>; - def SXBR : BinaryRRE<"sxbr", 0xB34B, fsub, FP128, FP128>; + def SEBR : BinaryRRE<"seb", 0xB30B, fsub, FP32, FP32>; + def SDBR : BinaryRRE<"sdb", 0xB31B, fsub, FP64, FP64>; + def SXBR : BinaryRRE<"sxb", 0xB34B, fsub, FP128, FP128>; - def SEB : BinaryRXE<"seb", 0xED0B, fsub, FP32, load>; - def SDB : BinaryRXE<"sdb", 0xED1B, fsub, FP64, load>; + def SEB : BinaryRXE<"seb", 0xED0B, fsub, FP32, load, 4>; + def SDB : BinaryRXE<"sdb", 0xED1B, fsub, FP64, load, 8>; } // Multiplication. let isCommutable = 1 in { - def MEEBR : BinaryRRE<"meebr", 0xB317, fmul, FP32, FP32>; - def MDBR : BinaryRRE<"mdbr", 0xB31C, fmul, FP64, FP64>; - def MXBR : BinaryRRE<"mxbr", 0xB34C, fmul, FP128, FP128>; + def MEEBR : BinaryRRE<"meeb", 0xB317, fmul, FP32, FP32>; + def MDBR : BinaryRRE<"mdb", 0xB31C, fmul, FP64, FP64>; + def MXBR : BinaryRRE<"mxb", 0xB34C, fmul, FP128, FP128>; } -def MEEB : BinaryRXE<"meeb", 0xED17, fmul, FP32, load>; -def MDB : BinaryRXE<"mdb", 0xED1C, fmul, FP64, load>; +def MEEB : BinaryRXE<"meeb", 0xED17, fmul, FP32, load, 4>; +def MDB : BinaryRXE<"mdb", 0xED1C, fmul, FP64, load, 8>; // f64 multiplication of two FP32 registers. -def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>; +def MDEBR : BinaryRRE<"mdeb", 0xB30C, null_frag, FP64, FP32>; def : Pat<(fmul (f64 (fextend FP32:$src1)), (f64 (fextend FP32:$src2))), (MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_32bit), FP32:$src2)>; // f64 multiplication of an FP32 register and an f32 memory. -def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load>; +def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>; def : Pat<(fmul (f64 (fextend FP32:$src1)), (f64 (extloadf32 bdxaddr12only:$addr))), (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_32bit), bdxaddr12only:$addr)>; // f128 multiplication of two FP64 registers. -def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>; +def MXDBR : BinaryRRE<"mxdb", 0xB307, null_frag, FP128, FP64>; def : Pat<(fmul (f128 (fextend FP64:$src1)), (f128 (fextend FP64:$src2))), (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_high), FP64:$src2)>; // f128 multiplication of an FP64 register and an f64 memory. -def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load>; +def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>; def : Pat<(fmul (f128 (fextend FP64:$src1)), (f128 (extloadf64 bdxaddr12only:$addr))), (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_high), bdxaddr12only:$addr)>; // Fused multiply-add. -def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32>; -def MADBR : TernaryRRD<"madbr", 0xB31E, z_fma, FP64>; +def MAEBR : TernaryRRD<"maeb", 0xB30E, z_fma, FP32>; +def MADBR : TernaryRRD<"madb", 0xB31E, z_fma, FP64>; -def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, load>; -def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, load>; +def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, load, 4>; +def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, load, 8>; // Fused multiply-subtract. -def MSEBR : TernaryRRD<"msebr", 0xB30F, z_fms, FP32>; -def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_fms, FP64>; +def MSEBR : TernaryRRD<"mseb", 0xB30F, z_fms, FP32>; +def MSDBR : TernaryRRD<"msdb", 0xB31F, z_fms, FP64>; -def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, load>; -def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, load>; +def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, load, 4>; +def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, load, 8>; // Division. -def DEBR : BinaryRRE<"debr", 0xB30D, fdiv, FP32, FP32>; -def DDBR : BinaryRRE<"ddbr", 0xB31D, fdiv, FP64, FP64>; -def DXBR : BinaryRRE<"dxbr", 0xB34D, fdiv, FP128, FP128>; +def DEBR : BinaryRRE<"deb", 0xB30D, fdiv, FP32, FP32>; +def DDBR : BinaryRRE<"ddb", 0xB31D, fdiv, FP64, FP64>; +def DXBR : BinaryRRE<"dxb", 0xB34D, fdiv, FP128, FP128>; -def DEB : BinaryRXE<"deb", 0xED0D, fdiv, FP32, load>; -def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load>; +def DEB : BinaryRXE<"deb", 0xED0D, fdiv, FP32, load, 4>; +def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load, 8>; //===----------------------------------------------------------------------===// // Comparisons //===----------------------------------------------------------------------===// let Defs = [CC] in { - def CEBR : CompareRRE<"cebr", 0xB309, z_cmp, FP32, FP32>; - def CDBR : CompareRRE<"cdbr", 0xB319, z_cmp, FP64, FP64>; - def CXBR : CompareRRE<"cxbr", 0xB349, z_cmp, FP128, FP128>; + def CEBR : CompareRRE<"ceb", 0xB309, z_cmp, FP32, FP32>; + def CDBR : CompareRRE<"cdb", 0xB319, z_cmp, FP64, FP64>; + def CXBR : CompareRRE<"cxb", 0xB349, z_cmp, FP128, FP128>; - def CEB : CompareRXE<"ceb", 0xED09, z_cmp, FP32, load>; - def CDB : CompareRXE<"cdb", 0xED19, z_cmp, FP64, load>; + def CEB : CompareRXE<"ceb", 0xED09, z_cmp, FP32, load, 4>; + def CDB : CompareRXE<"cdb", 0xED19, z_cmp, FP64, load, 8>; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index d720fee..fb530cc 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -28,6 +28,12 @@ class InstSystemZR instructions have a memory-based + // counterpart. OpKey uniquely identifies , while OpType is + // "reg" for R and "mem" for . + string OpKey = ""; + string OpType = "none"; + // True if this instruction is a simple D(X,B) load of a register // (with no sign or zero extension). bit SimpleBDXLoad = 0; @@ -46,11 +52,15 @@ class InstSystemZ AccessBytes = 0; + let TSFlags{0} = SimpleBDXLoad; let TSFlags{1} = SimpleBDXStore; let TSFlags{2} = Has20BitOffset; let TSFlags{3} = HasIndex; let TSFlags{4} = Is128Bit; + let TSFlags{9-5} = AccessBytes; } //===----------------------------------------------------------------------===// @@ -76,6 +86,14 @@ def getDisp20Opcode : InstrMapping { let ValueCols = [["20"]]; } +def getMemOpcode : InstrMapping { + let FilterClass = "InstSystemZ"; + let RowFields = ["OpKey"]; + let ColFields = ["OpType"]; + let KeyCol = ["reg"]; + let ValueCols = [["mem"]]; +} + //===----------------------------------------------------------------------===// // Instruction formats //===----------------------------------------------------------------------===// @@ -468,7 +486,7 @@ class InstSS op, dag outs, dag ins, string asmstr, list pattern> class InherentRRE opcode, RegisterOperand cls, dag src> : InstRRE { let R2 = 0; } @@ -492,28 +510,38 @@ class StoreRILPC opcode, SDPatternOperator operator, } class StoreRX opcode, SDPatternOperator operator, - RegisterOperand cls, AddressingMode mode = bdxaddr12only> + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdxaddr12only> : InstRX { + let OpKey = mnemonic ## cls; + let OpType = "mem"; let mayStore = 1; + let AccessBytes = bytes; } class StoreRXY opcode, SDPatternOperator operator, - RegisterOperand cls, AddressingMode mode = bdxaddr20only> + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdxaddr20only> : InstRXY { + let OpKey = mnemonic ## cls; + let OpType = "mem"; let mayStore = 1; + let AccessBytes = bytes; } multiclass StoreRXPair rxOpcode, bits<16> rxyOpcode, - SDPatternOperator operator, RegisterOperand cls> { + SDPatternOperator operator, RegisterOperand cls, + bits<5> bytes> { let DispKey = mnemonic ## #cls in { let DispSize = "12" in - def "" : StoreRX; + def "" : StoreRX; let DispSize = "20" in - def Y : StoreRXY; + def Y : StoreRXY; } } @@ -560,19 +588,28 @@ multiclass StoreSIPair siOpcode, bits<16> siyOpcode, class UnaryRR opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> : InstRR; + mnemonic#"r\t$R1, $R2", + [(set cls1:$R1, (operator cls2:$R2))]> { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; +} class UnaryRRE opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> : InstRRE; + mnemonic#"r\t$R1, $R2", + [(set cls1:$R1, (operator cls2:$R2))]> { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; +} class UnaryRRF opcode, RegisterOperand cls1, RegisterOperand cls2> : InstRRF; + mnemonic#"r\t$R1, $R3, $R2", []> { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; +} class UnaryRI opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> @@ -599,44 +636,59 @@ class UnaryRILPC opcode, SDPatternOperator operator, } class UnaryRX opcode, SDPatternOperator operator, - RegisterOperand cls, AddressingMode mode = bdxaddr12only> + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdxaddr12only> : InstRX { + let OpKey = mnemonic ## cls; + let OpType = "mem"; let mayLoad = 1; + let AccessBytes = bytes; } class UnaryRXE opcode, SDPatternOperator operator, - RegisterOperand cls> + RegisterOperand cls, bits<5> bytes> : InstRXE { + let OpKey = mnemonic ## cls; + let OpType = "mem"; let mayLoad = 1; + let AccessBytes = bytes; } class UnaryRXY opcode, SDPatternOperator operator, - RegisterOperand cls, AddressingMode mode = bdxaddr20only> + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdxaddr20only> : InstRXY { + let OpKey = mnemonic ## cls; + let OpType = "mem"; let mayLoad = 1; + let AccessBytes = bytes; } multiclass UnaryRXPair rxOpcode, bits<16> rxyOpcode, - SDPatternOperator operator, RegisterOperand cls> { + SDPatternOperator operator, RegisterOperand cls, + bits<5> bytes> { let DispKey = mnemonic ## #cls in { let DispSize = "12" in - def "" : UnaryRX; + def "" : UnaryRX; let DispSize = "20" in - def Y : UnaryRXY; + def Y : UnaryRXY; } } class BinaryRR opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> : InstRR { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; } @@ -644,8 +696,10 @@ class BinaryRR opcode, SDPatternOperator operator, class BinaryRRE opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> : InstRRE { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; } @@ -653,8 +707,11 @@ class BinaryRRE opcode, SDPatternOperator operator, class BinaryRRF opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> : InstRRF; + mnemonic#"r\t$R1, $R3, $R2", + [(set cls1:$R1, (operator cls1:$R3, cls2:$R2))]> { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; +} class BinaryRI opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> @@ -675,46 +732,56 @@ class BinaryRIL opcode, SDPatternOperator operator, } class BinaryRX opcode, SDPatternOperator operator, - RegisterOperand cls, SDPatternOperator load, + RegisterOperand cls, SDPatternOperator load, bits<5> bytes, AddressingMode mode = bdxaddr12only> : InstRX { + let OpKey = mnemonic ## cls; + let OpType = "mem"; let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; let mayLoad = 1; + let AccessBytes = bytes; } class BinaryRXE opcode, SDPatternOperator operator, - RegisterOperand cls, SDPatternOperator load> + RegisterOperand cls, SDPatternOperator load, bits<5> bytes> : InstRXE { + let OpKey = mnemonic ## cls; + let OpType = "mem"; let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; let mayLoad = 1; + let AccessBytes = bytes; } class BinaryRXY opcode, SDPatternOperator operator, - RegisterOperand cls, SDPatternOperator load, + RegisterOperand cls, SDPatternOperator load, bits<5> bytes, AddressingMode mode = bdxaddr20only> : InstRXY { + let OpKey = mnemonic ## cls; + let OpType = "mem"; let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; let mayLoad = 1; + let AccessBytes = bytes; } multiclass BinaryRXPair rxOpcode, bits<16> rxyOpcode, SDPatternOperator operator, RegisterOperand cls, - SDPatternOperator load> { + SDPatternOperator load, bits<5> bytes> { let DispKey = mnemonic ## #cls in { let DispSize = "12" in - def "" : BinaryRX; + def "" : BinaryRX; let DispSize = "20" in - def Y : BinaryRXY; } } @@ -767,14 +834,20 @@ class ShiftRSY opcode, SDPatternOperator operator, class CompareRR opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> : InstRR; + mnemonic#"r\t$R1, $R2", + [(operator cls1:$R1, cls2:$R2)]> { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; +} class CompareRRE opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> : InstRRE; + mnemonic#"r\t$R1, $R2", + [(operator cls1:$R1, cls2:$R2)]> { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; +} class CompareRI opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> @@ -801,41 +874,50 @@ class CompareRILPC opcode, SDPatternOperator operator, } class CompareRX opcode, SDPatternOperator operator, - RegisterOperand cls, SDPatternOperator load, + RegisterOperand cls, SDPatternOperator load, bits<5> bytes, AddressingMode mode = bdxaddr12only> : InstRX { + let OpKey = mnemonic ## cls; + let OpType = "mem"; let mayLoad = 1; + let AccessBytes = bytes; } class CompareRXE opcode, SDPatternOperator operator, - RegisterOperand cls, SDPatternOperator load> + RegisterOperand cls, SDPatternOperator load, bits<5> bytes> : InstRXE { + let OpKey = mnemonic ## cls; + let OpType = "mem"; let mayLoad = 1; + let AccessBytes = bytes; } class CompareRXY opcode, SDPatternOperator operator, - RegisterOperand cls, SDPatternOperator load, + RegisterOperand cls, SDPatternOperator load, bits<5> bytes, AddressingMode mode = bdxaddr20only> : InstRXY { + let OpKey = mnemonic ## cls; + let OpType = "mem"; let mayLoad = 1; + let AccessBytes = bytes; } multiclass CompareRXPair rxOpcode, bits<16> rxyOpcode, SDPatternOperator operator, RegisterOperand cls, - SDPatternOperator load> { + SDPatternOperator load, bits<5> bytes> { let DispKey = mnemonic ## #cls in { let DispSize = "12" in def "" : CompareRX; + load, bytes, bdxaddr12pair>; let DispSize = "20" in def Y : CompareRXY; + load, bytes, bdxaddr20pair>; } } @@ -880,22 +962,27 @@ multiclass CompareSIPair siOpcode, bits<16> siyOpcode, class TernaryRRD opcode, SDPatternOperator operator, RegisterOperand cls> : InstRRD { + let OpKey = mnemonic ## cls; + let OpType = "reg"; let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; } class TernaryRXF opcode, SDPatternOperator operator, - RegisterOperand cls, SDPatternOperator load> + RegisterOperand cls, SDPatternOperator load, bits<5> bytes> : InstRXF { + let OpKey = mnemonic ## cls; + let OpType = "mem"; let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; let mayLoad = 1; + let AccessBytes = bytes; } class CmpSwapRS opcode, SDPatternOperator operator, diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index e9829d5..16207b3 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -397,6 +397,30 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, } } + // If the spilled operand is the final one, try to change R + // into . + int MemOpcode = SystemZ::getMemOpcode(MI->getOpcode()); + if (MemOpcode >= 0) { + unsigned NumOps = MI->getNumExplicitOperands(); + if (OpNum == NumOps - 1) { + const MCInstrDesc &MemDesc = get(MemOpcode); + uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags); + assert(AccessBytes != 0 && "Size of access should be known"); + assert(AccessBytes <= Size && "Access outside the frame index"); + uint64_t Offset = Size - AccessBytes; + MachineMemOperand *FrameMMO = getFrameMMO(MF, FrameIndex, Offset, + MachineMemOperand::MOLoad); + MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(MemOpcode)); + for (unsigned I = 0; I < OpNum; ++I) + MIB.addOperand(MI->getOperand(I)); + MIB.addFrameIndex(FrameIndex).addImm(Offset); + if (MemDesc.TSFlags & SystemZII::HasIndex) + MIB.addReg(0); + MIB.addMemOperand(FrameMMO); + return MIB; + } + } + return 0; } diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index 8d9a3ea..11d486c 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -32,8 +32,14 @@ namespace SystemZII { SimpleBDXStore = (1 << 1), Has20BitOffset = (1 << 2), HasIndex = (1 << 3), - Is128Bit = (1 << 4) + Is128Bit = (1 << 4), + AccessSizeMask = (31 << 5), + AccessSizeShift = 5 }; + static inline unsigned getAccessSize(unsigned int Flags) { + return (Flags & AccessSizeMask) >> AccessSizeShift; + } + // SystemZ MachineOperand target flags. enum { // Masks out the bits for the access model. diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 44b28fd..6b74220 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -217,8 +217,8 @@ def AsmBASR : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2), // Register moves. let neverHasSideEffects = 1 in { - def LR : UnaryRR <"lr", 0x18, null_frag, GR32, GR32>; - def LGR : UnaryRRE<"lgr", 0xB904, null_frag, GR64, GR64>; + def LR : UnaryRR <"l", 0x18, null_frag, GR32, GR32>; + def LGR : UnaryRRE<"lg", 0xB904, null_frag, GR64, GR64>; } // Immediate moves. @@ -242,8 +242,8 @@ let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1, // Register loads. let canFoldAsLoad = 1, SimpleBDXLoad = 1 in { - defm L : UnaryRXPair<"l", 0x58, 0xE358, load, GR32>; - def LG : UnaryRXY<"lg", 0xE304, load, GR64>; + defm L : UnaryRXPair<"l", 0x58, 0xE358, load, GR32, 4>; + def LG : UnaryRXY<"lg", 0xE304, load, GR64, 8>; // These instructions are split after register allocation, so we don't // want a custom inserter. @@ -260,8 +260,8 @@ let canFoldAsLoad = 1 in { // Register stores. let SimpleBDXStore = 1 in { let isCodeGenOnly = 1 in - defm ST32 : StoreRXPair<"st", 0x50, 0xE350, store, GR32>; - def STG : StoreRXY<"stg", 0xE324, store, GR64>; + defm ST32 : StoreRXPair<"st", 0x50, 0xE350, store, GR32, 4>; + def STG : StoreRXY<"stg", 0xE324, store, GR64, 8>; // These instructions are split after register allocation, so we don't // want a custom inserter. @@ -294,15 +294,15 @@ let mayLoad = 1, mayStore = 1 in // 32-bit extensions from registers. let neverHasSideEffects = 1 in { - def LBR : UnaryRRE<"lbr", 0xB926, sext8, GR32, GR32>; - def LHR : UnaryRRE<"lhr", 0xB927, sext16, GR32, GR32>; + def LBR : UnaryRRE<"lb", 0xB926, sext8, GR32, GR32>; + def LHR : UnaryRRE<"lh", 0xB927, sext16, GR32, GR32>; } // 64-bit extensions from registers. let neverHasSideEffects = 1 in { - def LGBR : UnaryRRE<"lgbr", 0xB906, sext8, GR64, GR64>; - def LGHR : UnaryRRE<"lghr", 0xB907, sext16, GR64, GR64>; - def LGFR : UnaryRRE<"lgfr", 0xB914, sext32, GR64, GR32>; + def LGBR : UnaryRRE<"lgb", 0xB906, sext8, GR64, GR64>; + def LGHR : UnaryRRE<"lgh", 0xB907, sext16, GR64, GR64>; + def LGFR : UnaryRRE<"lgf", 0xB914, sext32, GR64, GR32>; } // Match 32-to-64-bit sign extensions in which the source is already @@ -311,14 +311,14 @@ def : Pat<(sext_inreg GR64:$src, i32), (LGFR (EXTRACT_SUBREG GR64:$src, subreg_32bit))>; // 32-bit extensions from memory. -def LB : UnaryRXY<"lb", 0xE376, sextloadi8, GR32>; -defm LH : UnaryRXPair<"lh", 0x48, 0xE378, sextloadi16, GR32>; +def LB : UnaryRXY<"lb", 0xE376, sextloadi8, GR32, 1>; +defm LH : UnaryRXPair<"lh", 0x48, 0xE378, sextloadi16, GR32, 2>; def LHRL : UnaryRILPC<"lhrl", 0xC45, aligned_sextloadi16, GR32>; // 64-bit extensions from memory. -def LGB : UnaryRXY<"lgb", 0xE377, sextloadi8, GR64>; -def LGH : UnaryRXY<"lgh", 0xE315, sextloadi16, GR64>; -def LGF : UnaryRXY<"lgf", 0xE314, sextloadi32, GR64>; +def LGB : UnaryRXY<"lgb", 0xE377, sextloadi8, GR64, 1>; +def LGH : UnaryRXY<"lgh", 0xE315, sextloadi16, GR64, 2>; +def LGF : UnaryRXY<"lgf", 0xE314, sextloadi32, GR64, 4>; def LGHRL : UnaryRILPC<"lghrl", 0xC44, aligned_sextloadi16, GR64>; def LGFRL : UnaryRILPC<"lgfrl", 0xC4C, aligned_sextloadi32, GR64>; @@ -339,15 +339,15 @@ def : Pat<(i64 (extloadi32 bdxaddr20only:$src)), (LGF bdxaddr20only:$src)>; // 32-bit extensions from registers. let neverHasSideEffects = 1 in { - def LLCR : UnaryRRE<"llcr", 0xB994, zext8, GR32, GR32>; - def LLHR : UnaryRRE<"llhr", 0xB995, zext16, GR32, GR32>; + def LLCR : UnaryRRE<"llc", 0xB994, zext8, GR32, GR32>; + def LLHR : UnaryRRE<"llh", 0xB995, zext16, GR32, GR32>; } // 64-bit extensions from registers. let neverHasSideEffects = 1 in { - def LLGCR : UnaryRRE<"llgcr", 0xB984, zext8, GR64, GR64>; - def LLGHR : UnaryRRE<"llghr", 0xB985, zext16, GR64, GR64>; - def LLGFR : UnaryRRE<"llgfr", 0xB916, zext32, GR64, GR32>; + def LLGCR : UnaryRRE<"llgc", 0xB984, zext8, GR64, GR64>; + def LLGHR : UnaryRRE<"llgh", 0xB985, zext16, GR64, GR64>; + def LLGFR : UnaryRRE<"llgf", 0xB916, zext32, GR64, GR32>; } // Match 32-to-64-bit zero extensions in which the source is already @@ -356,14 +356,14 @@ def : Pat<(and GR64:$src, 0xffffffff), (LLGFR (EXTRACT_SUBREG GR64:$src, subreg_32bit))>; // 32-bit extensions from memory. -def LLC : UnaryRXY<"llc", 0xE394, zextloadi8, GR32>; -def LLH : UnaryRXY<"llh", 0xE395, zextloadi16, GR32>; +def LLC : UnaryRXY<"llc", 0xE394, zextloadi8, GR32, 1>; +def LLH : UnaryRXY<"llh", 0xE395, zextloadi16, GR32, 2>; def LLHRL : UnaryRILPC<"llhrl", 0xC42, aligned_zextloadi16, GR32>; // 64-bit extensions from memory. -def LLGC : UnaryRXY<"llgc", 0xE390, zextloadi8, GR64>; -def LLGH : UnaryRXY<"llgh", 0xE391, zextloadi16, GR64>; -def LLGF : UnaryRXY<"llgf", 0xE316, zextloadi32, GR64>; +def LLGC : UnaryRXY<"llgc", 0xE390, zextloadi8, GR64, 1>; +def LLGH : UnaryRXY<"llgh", 0xE391, zextloadi16, GR64, 2>; +def LLGF : UnaryRXY<"llgf", 0xE316, zextloadi32, GR64, 4>; def LLGHRL : UnaryRILPC<"llghrl", 0xC46, aligned_zextloadi16, GR64>; def LLGFRL : UnaryRILPC<"llgfrl", 0xC4E, aligned_zextloadi32, GR64>; @@ -377,16 +377,16 @@ def : Pat<(i32 (trunc GR64:$src)), // Truncations of 32-bit registers to memory. let isCodeGenOnly = 1 in { - defm STC32 : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8, GR32>; - defm STH32 : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR32>; + defm STC32 : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8, GR32, 1>; + defm STH32 : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR32, 2>; def STHRL32 : StoreRILPC<"sthrl", 0xC47, aligned_truncstorei16, GR32>; } // Truncations of 64-bit registers to memory. -defm STC : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8, GR64>; -defm STH : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR64>; +defm STC : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8, GR64, 1>; +defm STH : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR64, 2>; def STHRL : StoreRILPC<"sthrl", 0xC47, aligned_truncstorei16, GR64>; -defm ST : StoreRXPair<"st", 0x50, 0xE350, truncstorei32, GR64>; +defm ST : StoreRXPair<"st", 0x50, 0xE350, truncstorei32, GR64, 4>; def STRL : StoreRILPC<"strl", 0xC4F, aligned_truncstorei32, GR64>; //===----------------------------------------------------------------------===// @@ -405,18 +405,19 @@ def STMG : StoreMultipleRSY<"stmg", 0xEB24, GR64>; // Byte-swapping register moves. let neverHasSideEffects = 1 in { - def LRVR : UnaryRRE<"lrvr", 0xB91F, bswap, GR32, GR32>; - def LRVGR : UnaryRRE<"lrvgr", 0xB90F, bswap, GR64, GR64>; + def LRVR : UnaryRRE<"lrv", 0xB91F, bswap, GR32, GR32>; + def LRVGR : UnaryRRE<"lrvg", 0xB90F, bswap, GR64, GR64>; } // Byte-swapping loads. Unlike normal loads, these instructions are // allowed to access storage more than once. -def LRV : UnaryRXY<"lrv", 0xE31E, loadu, GR32>; -def LRVG : UnaryRXY<"lrvg", 0xE30F, loadu, GR64>; +def LRV : UnaryRXY<"lrv", 0xE31E, loadu, GR32, 4>; +def LRVG : UnaryRXY<"lrvg", 0xE30F, loadu, GR64, 8>; // Likewise byte-swapping stores. -def STRV : StoreRXY<"strv", 0xE33E, storeu, GR32>; -def STRVG : StoreRXY<"strvg", 0xE32F, storeu, GR64>; +def STRV : StoreRXY<"strv", 0xE33E, storeu, GR32, 4>; +def STRVG : StoreRXY<"strvg", 0xE32F, storeu, + GR64, 8>; //===----------------------------------------------------------------------===// // Load address instructions @@ -449,9 +450,9 @@ let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1, //===----------------------------------------------------------------------===// let Defs = [CC] in { - def LCR : UnaryRR <"lcr", 0x13, ineg, GR32, GR32>; - def LCGR : UnaryRRE<"lcgr", 0xB903, ineg, GR64, GR64>; - def LCGFR : UnaryRRE<"lcgfr", 0xB913, null_frag, GR64, GR32>; + def LCR : UnaryRR <"lc", 0x13, ineg, GR32, GR32>; + def LCGR : UnaryRRE<"lcg", 0xB903, ineg, GR64, GR64>; + def LCGFR : UnaryRRE<"lcgf", 0xB913, null_frag, GR64, GR32>; } defm : SXU; @@ -460,8 +461,8 @@ defm : SXU; //===----------------------------------------------------------------------===// let isCodeGenOnly = 1 in - defm IC32 : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR32, zextloadi8>; -defm IC : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR64, zextloadi8>; + defm IC32 : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR32, zextloadi8, 1>; +defm IC : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR64, zextloadi8, 1>; defm : InsertMem<"inserti8", IC32, GR32, zextloadi8, bdxaddr12pair>; defm : InsertMem<"inserti8", IC32Y, GR32, zextloadi8, bdxaddr20pair>; @@ -506,10 +507,10 @@ def : Pat<(or (zext32 GR32:$src), imm64hf32:$imm), let Defs = [CC] in { // Addition of a register. let isCommutable = 1 in { - def AR : BinaryRR <"ar", 0x1A, add, GR32, GR32>; - def AGR : BinaryRRE<"agr", 0xB908, add, GR64, GR64>; + def AR : BinaryRR <"a", 0x1A, add, GR32, GR32>; + def AGR : BinaryRRE<"ag", 0xB908, add, GR64, GR64>; } - def AGFR : BinaryRRE<"agfr", 0xB918, null_frag, GR64, GR32>; + def AGFR : BinaryRRE<"agf", 0xB918, null_frag, GR64, GR32>; // Addition of signed 16-bit immediates. def AHI : BinaryRI<"ahi", 0xA7A, add, GR32, imm32sx16>; @@ -520,10 +521,10 @@ let Defs = [CC] in { def AGFI : BinaryRIL<"agfi", 0xC28, add, GR64, imm64sx32>; // Addition of memory. - defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, add, GR32, sextloadi16>; - defm A : BinaryRXPair<"a", 0x5A, 0xE35A, add, GR32, load>; - def AGF : BinaryRXY<"agf", 0xE318, add, GR64, sextloadi32>; - def AG : BinaryRXY<"ag", 0xE308, add, GR64, load>; + defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, add, GR32, sextloadi16, 2>; + defm A : BinaryRXPair<"a", 0x5A, 0xE35A, add, GR32, load, 4>; + def AGF : BinaryRXY<"agf", 0xE318, add, GR64, sextloadi32, 4>; + def AG : BinaryRXY<"ag", 0xE308, add, GR64, load, 8>; // Addition to memory. def ASI : BinarySIY<"asi", 0xEB6A, add, imm32sx8>; @@ -535,31 +536,31 @@ defm : SXB; let Defs = [CC] in { // Addition of a register. let isCommutable = 1 in { - def ALR : BinaryRR <"alr", 0x1E, addc, GR32, GR32>; - def ALGR : BinaryRRE<"algr", 0xB90A, addc, GR64, GR64>; + def ALR : BinaryRR <"al", 0x1E, addc, GR32, GR32>; + def ALGR : BinaryRRE<"alg", 0xB90A, addc, GR64, GR64>; } - def ALGFR : BinaryRRE<"algfr", 0xB91A, null_frag, GR64, GR32>; + def ALGFR : BinaryRRE<"algf", 0xB91A, null_frag, GR64, GR32>; // Addition of unsigned 32-bit immediates. def ALFI : BinaryRIL<"alfi", 0xC2B, addc, GR32, uimm32>; def ALGFI : BinaryRIL<"algfi", 0xC2A, addc, GR64, imm64zx32>; // Addition of memory. - defm AL : BinaryRXPair<"al", 0x5E, 0xE35E, addc, GR32, load>; - def ALGF : BinaryRXY<"algf", 0xE31A, addc, GR64, zextloadi32>; - def ALG : BinaryRXY<"alg", 0xE30A, addc, GR64, load>; + defm AL : BinaryRXPair<"al", 0x5E, 0xE35E, addc, GR32, load, 4>; + def ALGF : BinaryRXY<"algf", 0xE31A, addc, GR64, zextloadi32, 4>; + def ALG : BinaryRXY<"alg", 0xE30A, addc, GR64, load, 8>; } defm : ZXB; // Addition producing and using a carry. let Defs = [CC], Uses = [CC] in { // Addition of a register. - def ALCR : BinaryRRE<"alcr", 0xB998, adde, GR32, GR32>; - def ALCGR : BinaryRRE<"alcgr", 0xB988, adde, GR64, GR64>; + def ALCR : BinaryRRE<"alc", 0xB998, adde, GR32, GR32>; + def ALCGR : BinaryRRE<"alcg", 0xB988, adde, GR64, GR64>; // Addition of memory. - def ALC : BinaryRXY<"alc", 0xE398, adde, GR32, load>; - def ALCG : BinaryRXY<"alcg", 0xE388, adde, GR64, load>; + def ALC : BinaryRXY<"alc", 0xE398, adde, GR32, load, 4>; + def ALCG : BinaryRXY<"alcg", 0xE388, adde, GR64, load, 8>; } //===----------------------------------------------------------------------===// @@ -570,24 +571,24 @@ let Defs = [CC], Uses = [CC] in { // add-immediate instruction instead. let Defs = [CC] in { // Subtraction of a register. - def SR : BinaryRR <"sr", 0x1B, sub, GR32, GR32>; - def SGFR : BinaryRRE<"sgfr", 0xB919, null_frag, GR64, GR32>; - def SGR : BinaryRRE<"sgr", 0xB909, sub, GR64, GR64>; + def SR : BinaryRR <"s", 0x1B, sub, GR32, GR32>; + def SGFR : BinaryRRE<"sgf", 0xB919, null_frag, GR64, GR32>; + def SGR : BinaryRRE<"sg", 0xB909, sub, GR64, GR64>; // Subtraction of memory. - defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, sub, GR32, sextloadi16>; - defm S : BinaryRXPair<"s", 0x5B, 0xE35B, sub, GR32, load>; - def SGF : BinaryRXY<"sgf", 0xE319, sub, GR64, sextloadi32>; - def SG : BinaryRXY<"sg", 0xE309, sub, GR64, load>; + defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, sub, GR32, sextloadi16, 2>; + defm S : BinaryRXPair<"s", 0x5B, 0xE35B, sub, GR32, load, 4>; + def SGF : BinaryRXY<"sgf", 0xE319, sub, GR64, sextloadi32, 4>; + def SG : BinaryRXY<"sg", 0xE309, sub, GR64, load, 8>; } defm : SXB; // Subtraction producing a carry. let Defs = [CC] in { // Subtraction of a register. - def SLR : BinaryRR <"slr", 0x1F, subc, GR32, GR32>; - def SLGFR : BinaryRRE<"slgfr", 0xB91B, null_frag, GR64, GR32>; - def SLGR : BinaryRRE<"slgr", 0xB90B, subc, GR64, GR64>; + def SLR : BinaryRR <"sl", 0x1F, subc, GR32, GR32>; + def SLGFR : BinaryRRE<"slgf", 0xB91B, null_frag, GR64, GR32>; + def SLGR : BinaryRRE<"slg", 0xB90B, subc, GR64, GR64>; // Subtraction of unsigned 32-bit immediates. These don't match // subc because we prefer addc for constants. @@ -595,21 +596,21 @@ let Defs = [CC] in { def SLGFI : BinaryRIL<"slgfi", 0xC24, null_frag, GR64, imm64zx32>; // Subtraction of memory. - defm SL : BinaryRXPair<"sl", 0x5F, 0xE35F, subc, GR32, load>; - def SLGF : BinaryRXY<"slgf", 0xE31B, subc, GR64, zextloadi32>; - def SLG : BinaryRXY<"slg", 0xE30B, subc, GR64, load>; + defm SL : BinaryRXPair<"sl", 0x5F, 0xE35F, subc, GR32, load, 4>; + def SLGF : BinaryRXY<"slgf", 0xE31B, subc, GR64, zextloadi32, 4>; + def SLG : BinaryRXY<"slg", 0xE30B, subc, GR64, load, 8>; } defm : ZXB; // Subtraction producing and using a carry. let Defs = [CC], Uses = [CC] in { // Subtraction of a register. - def SLBR : BinaryRRE<"slbr", 0xB999, sube, GR32, GR32>; - def SLGBR : BinaryRRE<"slbgr", 0xB989, sube, GR64, GR64>; + def SLBR : BinaryRRE<"slb", 0xB999, sube, GR32, GR32>; + def SLGBR : BinaryRRE<"slbg", 0xB989, sube, GR64, GR64>; // Subtraction of memory. - def SLB : BinaryRXY<"slb", 0xE399, sube, GR32, load>; - def SLBG : BinaryRXY<"slbg", 0xE389, sube, GR64, load>; + def SLB : BinaryRXY<"slb", 0xE399, sube, GR32, load, 4>; + def SLBG : BinaryRXY<"slbg", 0xE389, sube, GR64, load, 8>; } //===----------------------------------------------------------------------===// @@ -619,8 +620,8 @@ let Defs = [CC], Uses = [CC] in { let Defs = [CC] in { // ANDs of a register. let isCommutable = 1 in { - def NR : BinaryRR <"nr", 0x14, and, GR32, GR32>; - def NGR : BinaryRRE<"ngr", 0xB980, and, GR64, GR64>; + def NR : BinaryRR <"n", 0x14, and, GR32, GR32>; + def NGR : BinaryRRE<"ng", 0xB980, and, GR64, GR64>; } // ANDs of a 16-bit immediate, leaving other bits unaffected. @@ -640,8 +641,8 @@ let Defs = [CC] in { def NIHF : BinaryRIL<"nihf", 0xC0A, and, GR64, imm64hf32c>; // ANDs of memory. - defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load>; - def NG : BinaryRXY<"ng", 0xE380, and, GR64, load>; + defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>; + def NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>; // AND to memory defm NI : BinarySIPair<"ni", 0x94, 0xEB54, null_frag, uimm8>; @@ -656,8 +657,8 @@ defm : RMWIByte; let Defs = [CC] in { // ORs of a register. let isCommutable = 1 in { - def OR : BinaryRR <"or", 0x16, or, GR32, GR32>; - def OGR : BinaryRRE<"ogr", 0xB981, or, GR64, GR64>; + def OR : BinaryRR <"o", 0x16, or, GR32, GR32>; + def OGR : BinaryRRE<"og", 0xB981, or, GR64, GR64>; } // ORs of a 16-bit immediate, leaving other bits unaffected. @@ -677,8 +678,8 @@ let Defs = [CC] in { def OIHF : BinaryRIL<"oihf", 0xC0C, or, GR64, imm64hf32>; // ORs of memory. - defm O : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load>; - def OG : BinaryRXY<"og", 0xE381, or, GR64, load>; + defm O : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load, 4>; + def OG : BinaryRXY<"og", 0xE381, or, GR64, load, 8>; // OR to memory defm OI : BinarySIPair<"oi", 0x96, 0xEB56, null_frag, uimm8>; @@ -693,8 +694,8 @@ defm : RMWIByte; let Defs = [CC] in { // XORs of a register. let isCommutable = 1 in { - def XR : BinaryRR <"xr", 0x17, xor, GR32, GR32>; - def XGR : BinaryRRE<"xgr", 0xB982, xor, GR64, GR64>; + def XR : BinaryRR <"x", 0x17, xor, GR32, GR32>; + def XGR : BinaryRRE<"xg", 0xB982, xor, GR64, GR64>; } // XORs of a 32-bit immediate, leaving other bits unaffected. @@ -704,8 +705,8 @@ let Defs = [CC] in { def XIHF : BinaryRIL<"xihf", 0xC06, xor, GR64, imm64hf32>; // XORs of memory. - defm X : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load>; - def XG : BinaryRXY<"xg", 0xE382, xor, GR64, load>; + defm X : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load, 4>; + def XG : BinaryRXY<"xg", 0xE382, xor, GR64, load, 8>; // XOR to memory defm XI : BinarySIPair<"xi", 0x97, 0xEB57, null_frag, uimm8>; @@ -719,10 +720,10 @@ defm : RMWIByte; // Multiplication of a register. let isCommutable = 1 in { - def MSR : BinaryRRE<"msr", 0xB252, mul, GR32, GR32>; - def MSGR : BinaryRRE<"msgr", 0xB90C, mul, GR64, GR64>; + def MSR : BinaryRRE<"ms", 0xB252, mul, GR32, GR32>; + def MSGR : BinaryRRE<"msg", 0xB90C, mul, GR64, GR64>; } -def MSGFR : BinaryRRE<"msgfr", 0xB91C, null_frag, GR64, GR32>; +def MSGFR : BinaryRRE<"msgf", 0xB91C, null_frag, GR64, GR32>; defm : SXB; // Multiplication of a signed 16-bit immediate. @@ -734,32 +735,32 @@ def MSFI : BinaryRIL<"msfi", 0xC21, mul, GR32, simm32>; def MSGFI : BinaryRIL<"msgfi", 0xC20, mul, GR64, imm64sx32>; // Multiplication of memory. -defm MH : BinaryRXPair<"mh", 0x4C, 0xE37C, mul, GR32, sextloadi16>; -defm MS : BinaryRXPair<"ms", 0x71, 0xE351, mul, GR32, load>; -def MSGF : BinaryRXY<"msgf", 0xE31C, mul, GR64, sextloadi32>; -def MSG : BinaryRXY<"msg", 0xE30C, mul, GR64, load>; +defm MH : BinaryRXPair<"mh", 0x4C, 0xE37C, mul, GR32, sextloadi16, 2>; +defm MS : BinaryRXPair<"ms", 0x71, 0xE351, mul, GR32, load, 4>; +def MSGF : BinaryRXY<"msgf", 0xE31C, mul, GR64, sextloadi32, 4>; +def MSG : BinaryRXY<"msg", 0xE30C, mul, GR64, load, 8>; // Multiplication of a register, producing two results. -def MLGR : BinaryRRE<"mlgr", 0xB986, z_umul_lohi64, GR128, GR64>; +def MLGR : BinaryRRE<"mlg", 0xB986, z_umul_lohi64, GR128, GR64>; // Multiplication of memory, producing two results. -def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load>; +def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load, 8>; //===----------------------------------------------------------------------===// // Division and remainder //===----------------------------------------------------------------------===// // Division and remainder, from registers. -def DSGFR : BinaryRRE<"dsgfr", 0xB91D, z_sdivrem32, GR128, GR32>; -def DSGR : BinaryRRE<"dsgr", 0xB90D, z_sdivrem64, GR128, GR64>; -def DLR : BinaryRRE<"dlr", 0xB997, z_udivrem32, GR128, GR32>; -def DLGR : BinaryRRE<"dlgr", 0xB987, z_udivrem64, GR128, GR64>; +def DSGFR : BinaryRRE<"dsgf", 0xB91D, z_sdivrem32, GR128, GR32>; +def DSGR : BinaryRRE<"dsg", 0xB90D, z_sdivrem64, GR128, GR64>; +def DLR : BinaryRRE<"dl", 0xB997, z_udivrem32, GR128, GR32>; +def DLGR : BinaryRRE<"dlg", 0xB987, z_udivrem64, GR128, GR64>; // Division and remainder, from memory. -def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem32, GR128, load>; -def DSG : BinaryRXY<"dsg", 0xE30D, z_sdivrem64, GR128, load>; -def DL : BinaryRXY<"dl", 0xE397, z_udivrem32, GR128, load>; -def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load>; +def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem32, GR128, load, 4>; +def DSG : BinaryRXY<"dsg", 0xE30D, z_sdivrem64, GR128, load, 8>; +def DL : BinaryRXY<"dl", 0xE397, z_udivrem32, GR128, load, 4>; +def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load, 8>; //===----------------------------------------------------------------------===// // Shifts @@ -805,9 +806,9 @@ let Defs = [CC] in { // Signed comparisons. let Defs = [CC] in { // Comparison with a register. - def CR : CompareRR <"cr", 0x19, z_cmp, GR32, GR32>; - def CGFR : CompareRRE<"cgfr", 0xB930, null_frag, GR64, GR32>; - def CGR : CompareRRE<"cgr", 0xB920, z_cmp, GR64, GR64>; + def CR : CompareRR <"c", 0x19, z_cmp, GR32, GR32>; + def CGFR : CompareRRE<"cgf", 0xB930, null_frag, GR64, GR32>; + def CGR : CompareRRE<"cg", 0xB920, z_cmp, GR64, GR64>; // Comparison with a signed 16-bit immediate. def CHI : CompareRI<"chi", 0xA7E, z_cmp, GR32, imm32sx16>; @@ -818,11 +819,11 @@ let Defs = [CC] in { def CGFI : CompareRIL<"cgfi", 0xC2C, z_cmp, GR64, imm64sx32>; // Comparison with memory. - defm CH : CompareRXPair<"ch", 0x49, 0xE379, z_cmp, GR32, sextloadi16>; - defm C : CompareRXPair<"c", 0x59, 0xE359, z_cmp, GR32, load>; - def CGH : CompareRXY<"cgh", 0xE334, z_cmp, GR64, sextloadi16>; - def CGF : CompareRXY<"cgf", 0xE330, z_cmp, GR64, sextloadi32>; - def CG : CompareRXY<"cg", 0xE320, z_cmp, GR64, load>; + defm CH : CompareRXPair<"ch", 0x49, 0xE379, z_cmp, GR32, sextloadi16, 2>; + defm C : CompareRXPair<"c", 0x59, 0xE359, z_cmp, GR32, load, 4>; + def CGH : CompareRXY<"cgh", 0xE334, z_cmp, GR64, sextloadi16, 2>; + def CGF : CompareRXY<"cgf", 0xE330, z_cmp, GR64, sextloadi32, 4>; + def CG : CompareRXY<"cg", 0xE320, z_cmp, GR64, load, 8>; def CHRL : CompareRILPC<"chrl", 0xC65, z_cmp, GR32, aligned_sextloadi16>; def CRL : CompareRILPC<"crl", 0xC6D, z_cmp, GR32, aligned_load>; def CGHRL : CompareRILPC<"cghrl", 0xC64, z_cmp, GR64, aligned_sextloadi16>; @@ -839,18 +840,18 @@ defm : SXB; // Unsigned comparisons. let Defs = [CC] in { // Comparison with a register. - def CLR : CompareRR <"clr", 0x15, z_ucmp, GR32, GR32>; - def CLGFR : CompareRRE<"clgfr", 0xB931, null_frag, GR64, GR32>; - def CLGR : CompareRRE<"clgr", 0xB921, z_ucmp, GR64, GR64>; + def CLR : CompareRR <"cl", 0x15, z_ucmp, GR32, GR32>; + def CLGFR : CompareRRE<"clgf", 0xB931, null_frag, GR64, GR32>; + def CLGR : CompareRRE<"clg", 0xB921, z_ucmp, GR64, GR64>; // Comparison with a signed 32-bit immediate. def CLFI : CompareRIL<"clfi", 0xC2F, z_ucmp, GR32, uimm32>; def CLGFI : CompareRIL<"clgfi", 0xC2E, z_ucmp, GR64, imm64zx32>; // Comparison with memory. - defm CL : CompareRXPair<"cl", 0x55, 0xE355, z_ucmp, GR32, load>; - def CLGF : CompareRXY<"clgf", 0xE331, z_ucmp, GR64, zextloadi32>; - def CLG : CompareRXY<"clg", 0xE321, z_ucmp, GR64, load>; + defm CL : CompareRXPair<"cl", 0x55, 0xE355, z_ucmp, GR32, load, 4>; + def CLGF : CompareRXY<"clgf", 0xE331, z_ucmp, GR64, zextloadi32, 4>; + def CLG : CompareRXY<"clg", 0xE321, z_ucmp, GR64, load, 8>; def CLHRL : CompareRILPC<"clhrl", 0xC67, z_ucmp, GR32, aligned_zextloadi16>; def CLRL : CompareRILPC<"clrl", 0xC6F, z_ucmp, GR32, @@ -1003,7 +1004,7 @@ def EAR : InstRRE<0xB24F, (outs GR32:$R1), (ins access_reg:$R2), // and the second giving a copy of the source with the leftmost one bit // cleared. We only use the first result here. let Defs = [CC] in { - def FLOGR : UnaryRRE<"flogr", 0xB983, null_frag, GR128, GR64>; + def FLOGR : UnaryRRE<"flog", 0xB983, null_frag, GR128, GR64>; } def : Pat<(ctlz GR64:$src), (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_high)>; -- cgit v1.1 From 44175d9715268bfb7c2cb10ebf14474f4a411464 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Wed, 3 Jul 2013 12:32:41 +0000 Subject: [PowerPC] Support mtspr/mfspr in the asm parser This adds support for the generic forms of mtspr/mfspr for the asm parser. The compiler will continue to use the specialized patters for mtlr etc. since those are needed to correctly describe data flow. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185532 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCInstr64Bit.td | 2 +- lib/Target/PowerPC/PPCInstrInfo.td | 29 +++++++++++++++++++---------- 2 files changed, 20 insertions(+), 11 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index f3c2892..cd5f19d 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -304,7 +304,7 @@ def MTCTR8loop : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS), PPC970_DGroup_First, PPC970_Unit_FXU; } -let Pattern = [(set i64:$rT, readcyclecounter)] in +let isCodeGenOnly = 1, Pattern = [(set i64:$rT, readcyclecounter)] in def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs g8rc:$rT), (ins), "mfspr $rT, 268", SprMFTB>, PPC970_DGroup_First, PPC970_Unit_FXU; diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 096fd65..9320858 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -1830,6 +1830,12 @@ def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins), // XFX-Form instructions. Instructions that deal with SPRs. // + +def MFSPR : XFXForm_1<31, 339, (outs gprc:$RT), (ins i32imm:$SPR), + "mfspr $RT, $SPR", SprMFSPR>; +def MTSPR : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, gprc:$RT), + "mtspr $SPR, $RT", SprMTSPR>; + let Uses = [CTR] in { def MFCTR : XFXForm_1_ext<31, 339, 9, (outs gprc:$rT), (ins), "mfctr $rT", SprMFSPR>, @@ -1858,17 +1864,17 @@ def MFLR : XFXForm_1_ext<31, 339, 8, (outs gprc:$rT), (ins), PPC970_DGroup_First, PPC970_Unit_FXU; } -// Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed like -// a GPR on the PPC970. As such, copies in and out have the same performance -// characteristics as an OR instruction. -def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins gprc:$rS), - "mtspr 256, $rS", IntGeneral>, - PPC970_DGroup_Single, PPC970_Unit_FXU; -def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT), (ins), - "mfspr $rT, 256", IntGeneral>, - PPC970_DGroup_First, PPC970_Unit_FXU; - let isCodeGenOnly = 1 in { + // Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed + // like a GPR on the PPC970. As such, copies in and out have the same + // performance characteristics as an OR instruction. + def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins gprc:$rS), + "mtspr 256, $rS", IntGeneral>, + PPC970_DGroup_Single, PPC970_Unit_FXU; + def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT), (ins), + "mfspr $rT, 256", IntGeneral>, + PPC970_DGroup_First, PPC970_Unit_FXU; + def MTVRSAVEv : XFXForm_7_ext<31, 467, 256, (outs VRSAVERC:$reg), (ins gprc:$rS), "mtspr 256, $rS", IntGeneral>, @@ -2321,6 +2327,9 @@ def : InstAlias<"crclr $bx", (CRXOR crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>; def : InstAlias<"crmove $bx, $by", (CROR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>; def : InstAlias<"crnot $bx, $by", (CRNOR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>; +def : InstAlias<"mtxer $Rx", (MTSPR 1, gprc:$Rx)>; +def : InstAlias<"mfxer $Rx", (MFSPR gprc:$Rx, 1)>; + def : InstAlias<"xnop", (XORI R0, R0, 0)>; def : InstAlias<"mr $rA, $rB", (OR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>; -- cgit v1.1 From bf8eb3d55cc0fe37d0ef140c2492214083a48dcb Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Wed, 3 Jul 2013 12:51:09 +0000 Subject: [PowerPC] Make specialized AltiVec patterns isCodeGenOnly A couple of AltiVec patterns are just specialized forms of the generic instruction pattern, and should therefore be marked isCodeGenOnly to avoid confusing the asm parser: VCFSX_0, VCTUXS_0, VCFUX_0, VCTSXS_0, and V_SETALLONES. Noticed by inspection of the generated PPCGenAsmMatcher.inc. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185533 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCInstrAltivec.td | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index cc9cf0a..0976559 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -392,7 +392,7 @@ def VCTUXS : VXForm_1<906, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), // Defines with the UIM field set to 0 for floating-point // to integer (fp_to_sint/fp_to_uint) conversions and integer // to floating-point (sint_to_fp/uint_to_fp) conversions. -let VA = 0 in { +let isCodeGenOnly = 1, VA = 0 in { def VCFSX_0 : VXForm_1<842, (outs vrrc:$vD), (ins vrrc:$vB), "vcfsx $vD, $vB, 0", VecFP, [(set v4f32:$vD, @@ -664,7 +664,7 @@ def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>; def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>; def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>; -let isCodeGenOnly = 1 in +let isCodeGenOnly = 1 in { def V_SET0 : VXForm_setzero<1220, (outs vrrc:$vD), (ins), "vxor $vD, $vD, $vD", VecFP, [(set v4i32:$vD, (v4i32 immAllZerosV))]>; @@ -673,6 +673,7 @@ def V_SETALLONES : VXForm_3<908, (outs vrrc:$vD), (ins), "vspltisw $vD, -1", VecFP, [(set v4i32:$vD, (v4i32 immAllOnesV))]>; } +} } // VALU Operations. //===----------------------------------------------------------------------===// -- cgit v1.1 From 365ef0b197d7c841f8e501da64296df65be4ca23 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 3 Jul 2013 15:07:05 +0000 Subject: Use SmallVectorImpl::iterator/const_iterator instead of SmallVector to avoid specifying the vector size. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185540 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/A15SDOptimizer.cpp | 6 +++--- lib/Target/Mips/MipsDelaySlotFiller.cpp | 4 ++-- lib/Target/Mips/MipsISelLowering.h | 2 +- lib/Target/R600/AMDILCFGStructurizer.cpp | 24 ++++++++++++------------ lib/Target/X86/X86ISelLowering.cpp | 2 +- 5 files changed, 19 insertions(+), 19 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp index f0d4dbe..e8c2f7c 100644 --- a/lib/Target/ARM/A15SDOptimizer.cpp +++ b/lib/Target/ARM/A15SDOptimizer.cpp @@ -615,7 +615,7 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { SmallVector Defs = getReadDPRs(MI); bool Modified = false; - for (SmallVector::iterator I = Defs.begin(), E = Defs.end(); + for (SmallVectorImpl::iterator I = Defs.begin(), E = Defs.end(); I != E; ++I) { // Follow the def-use chain for this DPR through COPYs, and also through // PHIs (which are essentially multi-way COPYs). It is because of PHIs that @@ -630,7 +630,7 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { elideCopiesAndPHIs(Def, DefSrcs); - for (SmallVector::iterator II = DefSrcs.begin(), + for (SmallVectorImpl::iterator II = DefSrcs.begin(), EE = DefSrcs.end(); II != EE; ++II) { MachineInstr *MI = *II; @@ -655,7 +655,7 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { if (NewReg != 0) { Modified = true; - for (SmallVector::const_iterator I = Uses.begin(), + for (SmallVectorImpl::const_iterator I = Uses.begin(), E = Uses.end(); I != E; ++I) { DEBUG(dbgs() << "Replacing operand " << **I << " with " diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index 928a43d..545a38d 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -437,7 +437,7 @@ bool MemDefsUses::hasHazard_(const MachineInstr &MI) { // Check underlying object list. if (getUnderlyingObjects(MI, Objs)) { - for (SmallVector::const_iterator I = Objs.begin(); + for (SmallVectorImpl::const_iterator I = Objs.begin(); I != Objs.end(); ++I) HasHazard |= updateDefsUses(*I, MI.mayStore()); @@ -473,7 +473,7 @@ getUnderlyingObjects(const MachineInstr &MI, SmallVector Objs; GetUnderlyingObjects(const_cast(V), Objs); - for (SmallVector::iterator I = Objs.begin(), E = Objs.end(); + for (SmallVectorImpl::iterator I = Objs.begin(), E = Objs.end(); I != E; ++I) { if (const PseudoSourceValue *PSV = dyn_cast(*I)) { if (PSV->isAliased(MFI)) diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index d9b5ecd..6103db5 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -282,7 +282,7 @@ namespace llvm { /// Return pointer to array of integer argument registers. const uint16_t *intArgRegs() const; - typedef SmallVector::const_iterator byval_iterator; + typedef SmallVectorImpl::const_iterator byval_iterator; byval_iterator byval_begin() const { return ByValArgs.begin(); } byval_iterator byval_end() const { return ByValArgs.end(); } diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp index 4910e5d..437480c 100644 --- a/lib/Target/R600/AMDILCFGStructurizer.cpp +++ b/lib/Target/R600/AMDILCFGStructurizer.cpp @@ -291,8 +291,8 @@ private: bool hasBackEdge(BlockT *curBlock); unsigned getLoopDepth (LoopT *LoopRep); int countActiveBlock( - typename SmallVector::const_iterator IterStart, - typename SmallVector::const_iterator IterEnd); + typename SmallVectorImpl::const_iterator IterStart, + typename SmallVectorImpl::const_iterator IterEnd); BlockT *findNearestCommonPostDom(std::set&); BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2); @@ -367,7 +367,7 @@ bool CFGStructurizer::prepare(FuncT &func, PassT &pass, // Remove unconditional branch instr. // Add dummy exit block iff there are multiple returns. - for (typename SmallVector::const_iterator + for (typename SmallVectorImpl::const_iterator iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end(); iterBlk != iterEndBlk; ++iterBlk) { @@ -441,12 +441,12 @@ bool CFGStructurizer::run(FuncT &func, PassT &pass, << ", numRemaintedBlk = " << numRemainedBlk << "\n"; } - typename SmallVector::const_iterator + typename SmallVectorImpl::const_iterator iterBlk = orderedBlks.begin(); - typename SmallVector::const_iterator + typename SmallVectorImpl::const_iterator iterBlkEnd = orderedBlks.end(); - typename SmallVector::const_iterator + typename SmallVectorImpl::const_iterator sccBeginIter = iterBlk; BlockT *sccBeginBlk = NULL; int sccNumBlk = 0; // The number of active blocks, init to a @@ -571,7 +571,7 @@ bool CFGStructurizer::run(FuncT &func, PassT &pass, template void CFGStructurizer::printOrderedBlocks(llvm::raw_ostream &os) { size_t i = 0; - for (typename SmallVector::const_iterator + for (typename SmallVectorImpl::const_iterator iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) { @@ -993,7 +993,7 @@ int CFGStructurizer::loopcontPatternMatch(LoopT *loopRep, } } - for (typename SmallVector::iterator + for (typename SmallVectorImpl::iterator iter = contBlk.begin(), iterEnd = contBlk.end(); iter != iterEnd; ++iter) { (*iter)->removeSuccessor(loopHeader); @@ -2082,7 +2082,7 @@ void CFGStructurizer::addDummyExitBlock(SmallVectorpush_back(dummyExitBlk); //insert to function CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep); - for (typename SmallVector::iterator iter = + for (typename SmallVectorImpl::iterator iter = retBlks.begin(), iterEnd = retBlks.end(); iter != iterEnd; ++iter) { BlockT *curBlk = *iter; @@ -2206,7 +2206,7 @@ CFGStructurizer::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk, newLandBlk = funcRep->CreateMachineBasicBlock(); funcRep->push_back(newLandBlk); //insert to function newLandBlk->addSuccessor(landBlk); - for (typename SmallVector::iterator iter = + for (typename SmallVectorImpl::iterator iter = inpathBlks.begin(), iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) { BlockT *curBlk = *iter; @@ -2370,8 +2370,8 @@ unsigned CFGStructurizer::getLoopDepth(LoopT *loopRep) { template int CFGStructurizer::countActiveBlock -(typename SmallVector::const_iterator iterStart, - typename SmallVector::const_iterator iterEnd) { +(typename SmallVectorImpl::const_iterator iterStart, + typename SmallVectorImpl::const_iterator iterEnd) { int count = 0; while (iterStart != iterEnd) { if (!isRetiredBlock(*iterStart)) { diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 954790b..f69a5be 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -8837,7 +8837,7 @@ SDValue X86TargetLowering::LowerVectorAllZeroTest(SDValue Op, Opnds.push_back(N->getOperand(1)); for (unsigned Slot = 0, e = Opnds.size(); Slot < e; ++Slot) { - SmallVector::const_iterator I = Opnds.begin() + Slot; + SmallVectorImpl::const_iterator I = Opnds.begin() + Slot; // BFS traverse all OR'd operands. if (I->getOpcode() == ISD::OR) { Opnds.push_back(I->getOperand(0)); -- cgit v1.1 From f6b67dc7f8ed87443dc03856e789f42ba72ecaa8 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Wed, 3 Jul 2013 15:13:30 +0000 Subject: [PowerPC] Remove dead code from PPCDAGToDAGISel::SelectSETCC The subroutine getCRIdxForSetCC has a parameter "Other" and comment: If this returns with Other != -1, then the returned comparison is an or of two simpler comparisons. However for at least the last five years this routine has never returned a value of Other != -1; these cases are now handled differently to begin with. This patch removes the parameter and the code in SelectSETCC that attempted to handle the Other != -1 case. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185541 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 28 +++++----------------------- 1 file changed, 5 insertions(+), 23 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index cc4478b..ffd8ae9 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -594,12 +594,8 @@ static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) { /// getCRIdxForSetCC - Return the index of the condition register field /// associated with the SetCC condition, and whether or not the field is /// treated as inverted. That is, lt = 0; ge = 0 inverted. -/// -/// If this returns with Other != -1, then the returned comparison is an or of -/// two simpler comparisons. In this case, Invert is guaranteed to be false. -static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) { +static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) { Invert = false; - Other = -1; switch (CC) { default: llvm_unreachable("Unknown condition!"); case ISD::SETOLT: @@ -847,8 +843,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { } bool Inv; - int OtherCondIdx; - unsigned Idx = getCRIdxForSetCC(CC, Inv, OtherCondIdx); + unsigned Idx = getCRIdxForSetCC(CC, Inv); SDValue CCReg = SelectCC(LHS, RHS, CC, dl); SDValue IntCR; @@ -859,7 +854,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg, InFlag).getValue(1); - if (PPCSubTarget.hasMFOCRF() && OtherCondIdx == -1) + if (PPCSubTarget.hasMFOCRF()) IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg, CCReg), 0); else @@ -868,26 +863,13 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { SDValue Ops[] = { IntCR, getI32Imm((32-(3-Idx)) & 31), getI32Imm(31), getI32Imm(31) }; - if (OtherCondIdx == -1 && !Inv) + if (!Inv) return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); // Get the specified bit. SDValue Tmp = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); - if (Inv) { - assert(OtherCondIdx == -1 && "Can't have split plus negation"); - return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1)); - } - - // Otherwise, we have to turn an operation like SETONE -> SETOLT | SETOGT. - // We already got the bit for the first part of the comparison (e.g. SETULE). - - // Get the other bit of the comparison. - Ops[1] = getI32Imm((32-(3-OtherCondIdx)) & 31); - SDValue OtherCond = - SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); - - return CurDAG->SelectNodeTo(N, PPC::OR, MVT::i32, Tmp, OtherCond); + return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1)); } -- cgit v1.1 From 965b20e39c7fd73846e9b6ed55ba90e032ae3b1b Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Wed, 3 Jul 2013 17:05:42 +0000 Subject: [PowerPC] Always use mfocrf if available When accessing just a single CR register, it is always preferable to use mfocrf instead of mfcr, if the former is available on the CPU. Current code makes that distinction in many, but not all places where a single CR register value is retrieved. One missing location is PPCRegisterInfo::lowerCRSpilling. To fix this and make this simpler in the future, this patch changes the bulk of the back-end to always assume mfocrf is available and simply generate it when needed. On machines that actually do not support mfocrf, the instruction is replaced by mfcr at the very end, in EmitInstruction. This has the additional benefit that we no longer need the MFCRpseud hack, since before EmitInstruction we always have a MFOCRF instruction pattern, which already models data flow as required. The patch also adds the MFOCRF8 version of the instruction, which was missing so far. Except for the PPCRegisterInfo::lowerCRSpilling case, no change in generated code intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185556 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp | 8 ++++---- lib/Target/PowerPC/PPCAsmPrinter.cpp | 23 +++++++++++++--------- lib/Target/PowerPC/PPCCodeEmitter.cpp | 4 ++-- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 19 +++++------------- lib/Target/PowerPC/PPCISelLowering.cpp | 12 +++++------ lib/Target/PowerPC/PPCISelLowering.h | 9 ++++----- lib/Target/PowerPC/PPCInstr64Bit.td | 10 ++++------ lib/Target/PowerPC/PPCInstrInfo.td | 18 +---------------- lib/Target/PowerPC/PPCRegisterInfo.cpp | 4 ++-- 9 files changed, 42 insertions(+), 65 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 021c082..cb7f08b 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -228,9 +228,8 @@ unsigned PPCMCCodeEmitter:: get_crbitm_encoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups) const { const MCOperand &MO = MI.getOperand(OpNo); - assert((MI.getOpcode() == PPC::MTCRF || - MI.getOpcode() == PPC::MFOCRF || - MI.getOpcode() == PPC::MTCRF8) && + assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MTCRF8 || + MI.getOpcode() == PPC::MFOCRF || MI.getOpcode() == PPC::MFOCRF8) && (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)); return 0x80 >> CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); } @@ -242,7 +241,8 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, if (MO.isReg()) { // MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand. // The GPR operand should come through here though. - assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) || + assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MTCRF8 && + MI.getOpcode() != PPC::MFOCRF && MI.getOpcode() != PPC::MFOCRF8) || MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7); return CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); } diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 849c356..5129287 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -662,15 +662,20 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addExpr(SymDtprel)); return; } - case PPC::MFCRpseud: - case PPC::MFCR8pseud: - // Transform: %R3 = MFCRpseud %CR7 - // Into: %R3 = MFCR ;; cr7 - OutStreamer.AddComment(PPCInstPrinter:: - getRegisterName(MI->getOperand(1).getReg())); - OutStreamer.EmitInstruction(MCInstBuilder(Subtarget.isPPC64() ? PPC::MFCR8 : PPC::MFCR) - .addReg(MI->getOperand(0).getReg())); - return; + case PPC::MFOCRF: + case PPC::MFOCRF8: + if (!Subtarget.hasMFOCRF()) { + // Transform: %R3 = MFOCRF %CR7 + // Into: %R3 = MFCR ;; cr7 + unsigned NewOpcode = + MI->getOpcode() == PPC::MFOCRF ? PPC::MFCR : PPC::MFCR8; + OutStreamer.AddComment(PPCInstPrinter:: + getRegisterName(MI->getOperand(1).getReg())); + OutStreamer.EmitInstruction(MCInstBuilder(NewOpcode) + .addReg(MI->getOperand(0).getReg())); + return; + } + break; case PPC::SYNC: // In Book E sync is called msync, handle this special case here... if (Subtarget.isBookE()) { diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp index 3c7a285..e9aa4c0 100644 --- a/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -143,7 +143,7 @@ unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI, unsigned OpNo) const { const MachineOperand &MO = MI.getOperand(OpNo); assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MTCRF8 || - MI.getOpcode() == PPC::MFOCRF) && + MI.getOpcode() == PPC::MFOCRF || MI.getOpcode() == PPC::MFOCRF8) && (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)); return 0x80 >> TM.getRegisterInfo()->getEncodingValue(MO.getReg()); } @@ -277,7 +277,7 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, // MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand. // The GPR operand should come through here though. assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MTCRF8 && - MI.getOpcode() != PPC::MFOCRF) || + MI.getOpcode() != PPC::MFOCRF && MI.getOpcode() != PPC::MFOCRF8) || MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7); return TM.getRegisterInfo()->getEncodingValue(MO.getReg()); } diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index ffd8ae9..901d369 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -854,12 +854,8 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg, InFlag).getValue(1); - if (PPCSubTarget.hasMFOCRF()) - IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg, - CCReg), 0); - else - IntCR = SDValue(CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32, - CR7Reg, CCReg), 0); + IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg, + CCReg), 0); SDValue Ops[] = { IntCR, getI32Imm((32-(3-Idx)) & 31), getI32Imm(31), getI32Imm(31) }; @@ -974,15 +970,10 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { getSmallIPtrImm(0)); } - case PPCISD::MFCR: { + case PPCISD::MFOCRF: { SDValue InFlag = N->getOperand(1); - // Use MFOCRF if supported. - if (PPCSubTarget.hasMFOCRF()) - return CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, - N->getOperand(0), InFlag); - else - return CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32, - N->getOperand(0), InFlag); + return CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, + N->getOperand(0), InFlag); } case ISD::SDIV: { diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 3378ace..db49e21 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -626,7 +626,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; - case PPCISD::MFCR: return "PPCISD::MFCR"; + case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; case PPCISD::VCMP: return "PPCISD::VCMP"; case PPCISD::VCMPo: return "PPCISD::VCMPo"; case PPCISD::LBRX: return "PPCISD::LBRX"; @@ -5539,7 +5539,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, // Now that we have the comparison, emit a copy from the CR to a GPR. // This is flagged to the above dot comparison. - SDValue Flags = DAG.getNode(PPCISD::MFCR, dl, MVT::i32, + SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32, DAG.getRegister(PPC::CR6, MVT::i32), CompNode.getValue(1)); @@ -7293,16 +7293,16 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } } - // If the user is a MFCR instruction, we know this is safe. Otherwise we - // give up for right now. - if (FlagUser->getOpcode() == PPCISD::MFCR) + // If the user is a MFOCRF instruction, we know this is safe. + // Otherwise we give up for right now. + if (FlagUser->getOpcode() == PPCISD::MFOCRF) return SDValue(VCMPoNode, 0); } break; } case ISD::BR_CC: { // If this is a branch on an altivec predicate comparison, lower this so - // that we don't have to do a MFCR: instead, branch directly on CR6. This + // that we don't have to do a MFOCRF: instead, branch directly on CR6. This // lowering is done pre-legalize, because the legalizer lowers the predicate // compare down to code that is difficult to reassemble. ISD::CondCode CC = cast(N->getOperand(1))->get(); diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 1c0ad1b..4801a41 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -116,11 +116,10 @@ namespace llvm { /// Return with a flag operand, matched by 'blr' RET_FLAG, - /// R32 = MFCR(CRREG, INFLAG) - Represents the MFCRpseud/MFOCRF - /// instructions. This copies the bits corresponding to the specified - /// CRREG into the resultant GPR. Bits corresponding to other CR regs - /// are undefined. - MFCR, + /// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction. + /// This copies the bits corresponding to the specified CRREG into the + /// resultant GPR. Bits corresponding to other CR regs are undefined. + MFOCRF, // EH_SJLJ_SETJMP - SjLj exception handling setjmp. EH_SJLJ_SETJMP, diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index cd5f19d..2426dcd 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -261,16 +261,14 @@ def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins g8rc:$rS), "mtcrf $FXM, $rS", BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; -let isCodeGenOnly = 1 in -def MFCR8pseud: XFXForm_3<31, 19, (outs g8rc:$rT), (ins crbitm:$FXM), - "#MFCR8pseud", SprMFCR>, - PPC970_MicroCode, PPC970_Unit_CRU; -} // neverHasSideEffects = 1 +def MFOCRF8: XFXForm_5a<31, 19, (outs g8rc:$rT), (ins crbitm:$FXM), + "mfocrf $rT, $FXM", SprMFCR>, + PPC970_DGroup_First, PPC970_Unit_CRU; -let neverHasSideEffects = 1 in def MFCR8 : XFXForm_3<31, 19, (outs g8rc:$rT), (ins), "mfcr $rT", SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; +} // neverHasSideEffects = 1 let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { def EH_SjLj_SetJmp64 : Pseudo<(outs gprc:$dst), (ins memr:$buf), diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 9320858..e52adee 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -1902,30 +1902,14 @@ def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins gprc:$rS), "mtcrf $FXM, $rS", BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; -// This is a pseudo for MFCR, which implicitly uses all 8 of its subregisters; -// declaring that here gives the local register allocator problems with this: -// vreg = MCRF CR0 -// MFCR -// while not declaring it breaks DeadMachineInstructionElimination. -// As it turns out, in all cases where we currently use this, -// we're only interested in one subregister of it. Represent this in the -// instruction to keep the register allocator from becoming confused. -// -// FIXME: Make this a real Pseudo instruction when the JIT switches to MC. -let isCodeGenOnly = 1 in -def MFCRpseud: XFXForm_3<31, 19, (outs gprc:$rT), (ins crbitm:$FXM), - "#MFCRpseud", SprMFCR>, - PPC970_MicroCode, PPC970_Unit_CRU; - def MFOCRF: XFXForm_5a<31, 19, (outs gprc:$rT), (ins crbitm:$FXM), "mfocrf $rT, $FXM", SprMFCR>, PPC970_DGroup_First, PPC970_Unit_CRU; -} // neverHasSideEffects = 1 -let neverHasSideEffects = 1 in def MFCR : XFXForm_3<31, 19, (outs gprc:$rT), (ins), "mfcr $rT", SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; +} // neverHasSideEffects = 1 // Pseudo instruction to perform FADD in round-to-zero mode. let usesCustomInserter = 1, Uses = [RM] in { diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 06788fe..e5b4305 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -342,8 +342,8 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, unsigned SrcReg = MI.getOperand(0).getReg(); // We need to store the CR in the low 4-bits of the saved value. First, issue - // an MFCRpsued to save all of the CRBits and, if needed, kill the SrcReg. - BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFCR8pseud : PPC::MFCRpseud), Reg) + // an MFOCRF to save all of the CRBits and, if needed, kill the SrcReg. + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg) .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill())); // If the saved register wasn't CR0, shift the bits left so that they are in -- cgit v1.1 From 33efedc0481c4b0d9866ff526eb1161372b5919f Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Wed, 3 Jul 2013 17:59:07 +0000 Subject: [PowerPC] Use mtocrf when available Just as with mfocrf, it is also preferable to use mtocrf instead of mtcrf when only a single CR register is to be written. Current code however always emits mtcrf. This probably does not matter when using an external assembler, since the GNU assembler will in fact automatically replace mtcrf with mtocrf when possible. It does create inefficient code with the integrated assembler, however. To fix this, this patch adds MTOCRF/MTOCRF8 instruction patterns and uses those instead of MTCRF/MTCRF8 everything. Just as done in the MFOCRF patch committed as 185556, these patterns will be converted back to MTCRF if MTOCRF is not available on the machine. As a side effect, this allows to modify the MTCRF pattern to accept the full range of mask operands for the benefit of the asm parser. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185561 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp | 6 +++--- lib/Target/PowerPC/PPCAsmPrinter.cpp | 17 +++++++++++++++++ lib/Target/PowerPC/PPCCodeEmitter.cpp | 6 +++--- lib/Target/PowerPC/PPCFrameLowering.cpp | 4 ++-- lib/Target/PowerPC/PPCInstr64Bit.td | 6 +++++- lib/Target/PowerPC/PPCInstrInfo.td | 8 +++++++- lib/Target/PowerPC/PPCRegisterInfo.cpp | 2 +- 7 files changed, 38 insertions(+), 11 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index cb7f08b..27ad980 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -228,7 +228,7 @@ unsigned PPCMCCodeEmitter:: get_crbitm_encoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups) const { const MCOperand &MO = MI.getOperand(OpNo); - assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MTCRF8 || + assert((MI.getOpcode() == PPC::MTOCRF || MI.getOpcode() == PPC::MTOCRF8 || MI.getOpcode() == PPC::MFOCRF || MI.getOpcode() == PPC::MFOCRF8) && (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)); return 0x80 >> CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); @@ -239,9 +239,9 @@ unsigned PPCMCCodeEmitter:: getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl &Fixups) const { if (MO.isReg()) { - // MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand. + // MTOCRF/MFOCRF should go through get_crbitm_encoding for the CR operand. // The GPR operand should come through here though. - assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MTCRF8 && + assert((MI.getOpcode() != PPC::MTOCRF && MI.getOpcode() != PPC::MTOCRF8 && MI.getOpcode() != PPC::MFOCRF && MI.getOpcode() != PPC::MFOCRF8) || MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7); return CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 5129287..8f41b2e 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -676,6 +676,23 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } break; + case PPC::MTOCRF: + case PPC::MTOCRF8: + if (!Subtarget.hasMFOCRF()) { + // Transform: %CR7 = MTOCRF %R3 + // Into: MTCRF mask, %R3 ;; cr7 + unsigned NewOpcode = + MI->getOpcode() == PPC::MTOCRF ? PPC::MTCRF : PPC::MTCRF8; + unsigned Mask = 0x80 >> OutContext.getRegisterInfo() + ->getEncodingValue(MI->getOperand(0).getReg()); + OutStreamer.AddComment(PPCInstPrinter:: + getRegisterName(MI->getOperand(0).getReg())); + OutStreamer.EmitInstruction(MCInstBuilder(NewOpcode) + .addImm(Mask) + .addReg(MI->getOperand(1).getReg())); + return; + } + break; case PPC::SYNC: // In Book E sync is called msync, handle this special case here... if (Subtarget.isBookE()) { diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp index e9aa4c0..418736e 100644 --- a/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -142,7 +142,7 @@ void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) { unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI, unsigned OpNo) const { const MachineOperand &MO = MI.getOperand(OpNo); - assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MTCRF8 || + assert((MI.getOpcode() == PPC::MTOCRF || MI.getOpcode() == PPC::MTOCRF8 || MI.getOpcode() == PPC::MFOCRF || MI.getOpcode() == PPC::MFOCRF8) && (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)); return 0x80 >> TM.getRegisterInfo()->getEncodingValue(MO.getReg()); @@ -274,9 +274,9 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, const MachineOperand &MO) const { if (MO.isReg()) { - // MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand. + // MTOCRF/MFOCRF should go through get_crbitm_encoding for the CR operand. // The GPR operand should come through here though. - assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MTCRF8 && + assert((MI.getOpcode() != PPC::MTOCRF && MI.getOpcode() != PPC::MTOCRF8 && MI.getOpcode() != PPC::MFOCRF && MI.getOpcode() != PPC::MFOCRF8) || MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7); return TM.getRegisterInfo()->getEncodingValue(MO.getReg()); diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 4c57cf6..a19ce23 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -753,7 +753,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, if (!MustSaveCRs.empty()) for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) - BuildMI(MBB, MBBI, dl, TII.get(PPC::MTCRF8), MustSaveCRs[i]) + BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) .addReg(PPC::X12, getKillRegState(i == e-1)); if (MustSaveLR) @@ -1212,7 +1212,7 @@ restoreCRs(bool isPPC64, bool is31, MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), PPC::R12), CSI[CSIIndex].getFrameIdx())); - RestoreOp = PPC::MTCRF; + RestoreOp = PPC::MTOCRF; MoveReg = PPC::R12; } diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 2426dcd..d19a7d4 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -257,7 +257,11 @@ def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm), // 64-bit CR instructions let Interpretation64Bit = 1 in { let neverHasSideEffects = 1 in { -def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins g8rc:$rS), +def MTOCRF8: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins g8rc:$ST), + "mtocrf $FXM, $ST", BrMCRX>, + PPC970_DGroup_First, PPC970_Unit_CRU; + +def MTCRF8 : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, g8rc:$rS), "mtcrf $FXM, $rS", BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index e52adee..9a8e33b 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -1898,7 +1898,11 @@ def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F), "#RESTORE_VRSAVE", []>; let neverHasSideEffects = 1 in { -def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins gprc:$rS), +def MTOCRF: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins gprc:$ST), + "mtocrf $FXM, $ST", BrMCRX>, + PPC970_DGroup_First, PPC970_Unit_CRU; + +def MTCRF : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, gprc:$rS), "mtcrf $FXM, $rS", BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; @@ -2322,6 +2326,8 @@ def : InstAlias<"mr. $rA, $rB", (OR8o g8rc:$rA, g8rc:$rB, g8rc:$rB)>; def : InstAlias<"not $rA, $rB", (NOR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>; def : InstAlias<"not. $rA, $rB", (NOR8o g8rc:$rA, g8rc:$rB, g8rc:$rB)>; +def : InstAlias<"mtcr $rA", (MTCRF8 255, g8rc:$rA)>; + def LAx : PPCAsmPseudo<"la $rA, $addr", (ins gprc:$rA, memri:$addr)>; def SUBI : PPCAsmPseudo<"subi $rA, $rB, $imm", diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index e5b4305..8a0954c 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -403,7 +403,7 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, .addImm(31); } - BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTCRF8 : PPC::MTCRF), DestReg) + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTOCRF8 : PPC::MTOCRF), DestReg) .addReg(Reg, RegState::Kill); // Discard the pseudo instruction. -- cgit v1.1 From 51f558c9aed3bf74c2e8f3ff3bf365c94637ecdf Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Wed, 3 Jul 2013 18:29:47 +0000 Subject: [PowerPC] Support lmw/stmw in the asm parser This adds support for the load/store multiple instructions, currently used by the asm parser only. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185564 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCInstrInfo.td | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 9a8e33b..022c151 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -1378,6 +1378,10 @@ def LFIWZX : XForm_25<31, 887, (outs f8rc:$frD), (ins memrr:$src), [(set f64:$frD, (PPClfiwzx xoaddr:$src))]>; } +// Load Multiple +def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src), + "lmw $rD, $src", LdStLMW, []>; + //===----------------------------------------------------------------------===// // PPC32 Store Instructions. // @@ -1508,6 +1512,10 @@ def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iPTR:$ptroff), def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iPTR:$ptroff), (STFDUX $rS, $ptrreg, $ptroff)>; +// Store Multiple +def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst), + "stmw $rS, $dst", LdStLMW, []>; + def SYNC : XForm_24_sync<31, 598, (outs), (ins i32imm:$L), "sync $L", LdStSync, []>; def : Pat<(int_ppc_sync), (SYNC 0)>; -- cgit v1.1 From 79c163d6ddeb84ea1743eca0644688951bfc5a97 Mon Sep 17 00:00:00 2001 From: Tilmann Scheller Date: Wed, 3 Jul 2013 20:38:01 +0000 Subject: ARM: Prevent ARMAsmParser::shouldOmitCCOutOperand() from misidentifying certain Thumb2 add immediate T3 encodings. Before the fix Thumb2 instructions of type "add rD, rN, #imm" (T3 encoding, see ARM ARM A8.8.4) with rD and rN both being low registers (r0-r7) were classified as having the T4 encoding. The T4 encoding doesn't have a cc_out operand so for above instructions the operand gets erroneously removed, corrupting the token stream and leading to parse errors later in the process. This bug prevented "add r1, r7, #0xcbcbcbcb" from being assembled correctly. Fixes . git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185575 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index c270ed0..bd4ea53 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -5076,15 +5076,6 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, static_cast(Operands[5])->isImm()) { // Nest conditions rather than one big 'if' statement for readability. // - // If either register is a high reg, it's either one of the SP - // variants (handled above) or a 32-bit encoding, so we just - // check against T3. If the second register is the PC, this is an - // alternate form of ADR, which uses encoding T4, so check for that too. - if ((!isARMLowRegister(static_cast(Operands[3])->getReg()) || - !isARMLowRegister(static_cast(Operands[4])->getReg())) && - static_cast(Operands[4])->getReg() != ARM::PC && - static_cast(Operands[5])->isT2SOImm()) - return false; // If both registers are low, we're in an IT block, and the immediate is // in range, we should use encoding T1 instead, which has a cc_out. if (inITBlock() && @@ -5092,6 +5083,11 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, isARMLowRegister(static_cast(Operands[4])->getReg()) && static_cast(Operands[5])->isImm0_7()) return false; + // Check against T3. If the second register is the PC, this is an + // alternate form of ADR, which uses encoding T4, so check for that too. + if (static_cast(Operands[4])->getReg() != ARM::PC && + static_cast(Operands[5])->isT2SOImm()) + return false; // Otherwise, we use encoding T4, which does not have a cc_out // operand. -- cgit v1.1 From 1666c6a8c38a2bb2535fe667023b3f0537e03eb1 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Wed, 3 Jul 2013 21:03:35 +0000 Subject: [PowerPC] FreeBSD does not require f128 in its data layout string. Long double is 64 bits on FreeBSD PPC, so the f128 entry is superfluous. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185583 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCSubtarget.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 65b4d21..097f2bc 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -128,7 +128,7 @@ public: // documentation are wrong; these are correct (i.e. "what gcc does"). if (isPPC64() && isSVR4ABI()) { if (TargetTriple.getOS() == llvm::Triple::FreeBSD) - return "E-p:64:64-f64:64:64-i64:64:64-f128:64:64-v128:128:128-n32:64"; + return "E-p:64:64-f64:64:64-i64:64:64-v128:128:128-n32:64"; else return "E-p:64:64-f64:64:64-i64:64:64-f128:128:128-v128:128:128-n32:64"; } -- cgit v1.1 From 8e2e5ff0240dfb90c6dbc93e7fc441f71bfde400 Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Wed, 3 Jul 2013 21:42:57 +0000 Subject: [ARM] Improve the instruction selection of vector loads. In the ARM back-end, build_vector nodes are lowered to a target specific build_vector that uses floating point type. This works well, unless the inserted bitcasts survive until instruction selection. In that case, they incur moves between integer unit and floating point unit that may result in inefficient code. In other words, this conversion may introduce artificial dependencies when the code leading to the build vector cannot be completed with a floating point type. In particular, this happens when loads are not aligned. Before this patch, in that case, the compiler generates general purpose loads and creates the floating point vector from them, instead of directly using the vector unit. The patch uses a vector friendly sequence of code when the inserted bitcasts to floating point survived DAGCombine. This is done by a target specific DAGCombine that changes the target specific build_vector into a sequence of insert_vector_elt that get rid of the bitcasts. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185587 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 94 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index cc09754..9b68590 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -8773,6 +8773,98 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, return DAG.getNode(ISD::BITCAST, dl, VT, BV); } +/// \brief Target-specific dag combine xforms for ARMISD::BUILD_VECTOR. +static SDValue +PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { + // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR. + // At that time, we may have inserted bitcasts from integer to float. + // If these bitcasts have survived DAGCombine, change the lowering of this + // BUILD_VECTOR in something more vector friendly, i.e., that does not + // force to use floating point types. + + // Make sure we can change the type of the vector. + // This is possible iff: + // 1. The vector is only used in a bitcast to a integer type. I.e., + // 1.1. Vector is used only once. + // 1.2. Use is a bit convert to an integer type. + // 2. The size of its operands are 32-bits (64-bits are not legal). + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + + // Check 1.1. and 2. + if (EltVT.getSizeInBits() != 32 || !N->hasOneUse()) + return SDValue(); + + // By construction, the input type must be float. + assert(EltVT == MVT::f32 && "Unexpected type!"); + + // Check 1.2. + SDNode *Use = *N->use_begin(); + if (Use->getOpcode() != ISD::BITCAST || + Use->getValueType(0).isFloatingPoint()) + return SDValue(); + + // Check profitability. + // Model is, if more than half of the relevant operands are bitcast from + // i32, turn the build_vector into a sequence of insert_vector_elt. + // Relevant operands are everything that is not statically + // (i.e., at compile time) bitcasted. + unsigned NumOfBitCastedElts = 0; + unsigned NumElts = VT.getVectorNumElements(); + unsigned NumOfRelevantElts = NumElts; + for (unsigned Idx = 0; Idx < NumElts; ++Idx) { + SDValue Elt = N->getOperand(Idx); + if (Elt->getOpcode() == ISD::BITCAST) { + // Assume only bit cast to i32 will go away. + if (Elt->getOperand(0).getValueType() == MVT::i32) + ++NumOfBitCastedElts; + } else if (Elt.getOpcode() == ISD::UNDEF || isa(Elt)) + // Constants are statically casted, thus do not count them as + // relevant operands. + --NumOfRelevantElts; + } + + // Check if more than half of the elements require a non-free bitcast. + if (NumOfBitCastedElts <= NumOfRelevantElts / 2) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + // Create the new vector type. + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); + // Check if the type is legal. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isTypeLegal(VecVT)) + return SDValue(); + + // Combine: + // ARMISD::BUILD_VECTOR E1, E2, ..., EN. + // => BITCAST INSERT_VECTOR_ELT + // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1), + // (BITCAST EN), N. + SDValue Vec = DAG.getUNDEF(VecVT); + SDLoc dl(N); + for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) { + SDValue V = N->getOperand(Idx); + if (V.getOpcode() == ISD::UNDEF) + continue; + if (V.getOpcode() == ISD::BITCAST && + V->getOperand(0).getValueType() == MVT::i32) + // Fold obvious case. + V = V.getOperand(0); + else { + V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V); + // Make the DAGCombiner fold the bitcasts. + DCI.AddToWorklist(V.getNode()); + } + SDValue LaneIdx = DAG.getConstant(Idx, MVT::i32); + Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx); + } + Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec); + // Make the DAGCombiner fold the bitcasts. + DCI.AddToWorklist(Vec.getNode()); + return Vec; +} + /// PerformInsertEltCombine - Target-specific dag combine xforms for /// ISD::INSERT_VECTOR_ELT. static SDValue PerformInsertEltCombine(SDNode *N, @@ -9709,6 +9801,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ARMISD::VLD3DUP: case ARMISD::VLD4DUP: return CombineBaseUpdate(N, DCI); + case ARMISD::BUILD_VECTOR: + return PerformARMBUILD_VECTORCombine(N, DCI); case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: switch (cast(N->getOperand(1))->getZExtValue()) { -- cgit v1.1 From 62da588a2eb70166e1b6cc332d8084f03117dc12 Mon Sep 17 00:00:00 2001 From: Stephen Lin Date: Wed, 3 Jul 2013 23:39:13 +0000 Subject: Have ARMBaseRegisterInfo::getCallPreservedMask return the 'correct' mask for the GHC calling convention. This is purely academic because GHC calls are always tail calls so the register mask will never be used; however, this change makes the code clearer and brings the ARM implementation of the GHC calling convention in line with the X86 implementation. Also, it might save someone else some time trying to figuring out what is happening... git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185592 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseRegisterInfo.cpp | 17 +++++++++++++---- lib/Target/ARM/ARMCallingConv.td | 6 ------ 2 files changed, 13 insertions(+), 10 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 6a9bfc3..9274baf 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -59,14 +59,19 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { } if (ghcCall) - return CSR_GHC_SaveList; + // GHC set of callee saved regs is empty as all those regs are + // used for passing STG regs around + return CSR_NoRegs_SaveList; else return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) ? CSR_iOS_SaveList : CSR_AAPCS_SaveList; } const uint32_t* -ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const { +ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { + if (CC == CallingConv::GHC) + // This is academic becase all GHC calls are (supposed to be) tail calls + return CSR_NoRegs_RegMask; return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) ? CSR_iOS_RegMask : CSR_AAPCS_RegMask; } @@ -77,14 +82,18 @@ ARMBaseRegisterInfo::getNoPreservedMask() const { } const uint32_t* -ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const { +ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const { // This should return a register mask that is the same as that returned by // getCallPreservedMask but that additionally preserves the register used for // the first i32 argument (which must also be the register used to return a // single i32 return value) // // In case that the calling convention does not use the same register for - // both, the function should return NULL (does not currently apply) + // both or otherwise does not want to enable this optimization, the function + // should return NULL + if (CC == CallingConv::GHC) + // This is academic becase all GHC calls are (supposed to be) tail calls + return NULL; return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask; } diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index 8ff666e..89c5223 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -208,9 +208,3 @@ def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>; def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS_ThisReturn, R9))>; - -// GHC set of callee saved regs is empty as all those regs are -// used for passing STG regs around -// add is a workaround for not being able to compile empty list: -// def CSR_GHC : CalleeSavedRegs<()>; -def CSR_GHC : CalleeSavedRegs<(add)>; -- cgit v1.1 From 62204220e1dc2dc21256adf765728ae257b33eac Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Wed, 3 Jul 2013 23:56:31 +0000 Subject: Remove the EXCEPTIONADDR, EHSELECTION, and LSDAADDR ISD opcodes. These exception-related opcodes are not used any longer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185596 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 3 --- lib/Target/ARM/ARMISelLowering.cpp | 2 -- lib/Target/Hexagon/HexagonISelLowering.cpp | 5 ----- lib/Target/Mips/MipsISelLowering.cpp | 5 ----- lib/Target/PowerPC/PPCISelLowering.cpp | 5 ----- lib/Target/SystemZ/SystemZISelLowering.cpp | 5 ----- lib/Target/X86/X86ISelLowering.cpp | 4 ---- 7 files changed, 29 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index dff01f7..84051d4 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -249,9 +249,6 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setTruncStoreAction(MVT::f64, MVT::f16, Expand); setTruncStoreAction(MVT::f32, MVT::f16, Expand); - setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); - setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); - setExceptionPointerRegister(AArch64::X0); setExceptionSelectorRegister(AArch64::X1); } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 9b68590..991a703 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -717,8 +717,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (!Subtarget->isTargetDarwin()) { // Non-Darwin platforms may return values in these registers via the // personality function. - setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); - setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); setExceptionPointerRegister(ARM::R0); setExceptionSelectorRegister(ARM::R1); } diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index ab8a767..85e1045 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1428,11 +1428,6 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); - setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); - setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); - setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); - setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); - setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); if (TM.getSubtargetImpl()->isSubtargetV2()) { diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index bb24b8a..5fd50fd 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -346,11 +346,6 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::FNEG, MVT::f64, Expand); } - setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); - setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); - setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); - setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); - setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); setOperationAction(ISD::VAARG, MVT::Other, Expand); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index db49e21..c4f961c 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -228,11 +228,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // We cannot sextinreg(i1). Expand to shifts. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); - setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); - setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); - setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); - // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support // SjLj exception handling but a light-weight setjmp/longjmp replacement to // support continuation, user-level threading, and etc.. As a result, no diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index da4ad38..256c278 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -200,11 +200,6 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) setOperationAction(ISD::STACKSAVE, MVT::Other, Custom); setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom); - // Expand these using getExceptionSelectorRegister() and - // getExceptionPointerRegister(). - setOperationAction(ISD::EXCEPTIONADDR, PtrVT, Expand); - setOperationAction(ISD::EHSELECTION, PtrVT, Expand); - // Handle floating-point types. for (unsigned I = MVT::FIRST_FP_VALUETYPE; I <= MVT::LAST_FP_VALUETYPE; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f69a5be..174cc46 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -563,10 +563,6 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); } - setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); - setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); - setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); - setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); if (Subtarget->is64Bit()) { setExceptionPointerRegister(X86::RAX); setExceptionSelectorRegister(X86::RDX); -- cgit v1.1 From c93822901aef17aaf8bb1303f27b47025fd1d582 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Thu, 4 Jul 2013 00:26:30 +0000 Subject: Revert r185595-185596 which broke buildbots. Revert "Simplify landing pad lowering." Revert "Remove the EXCEPTIONADDR, EHSELECTION, and LSDAADDR ISD opcodes." git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185600 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 3 +++ lib/Target/ARM/ARMISelLowering.cpp | 2 ++ lib/Target/Hexagon/HexagonISelLowering.cpp | 5 +++++ lib/Target/Mips/MipsISelLowering.cpp | 5 +++++ lib/Target/PowerPC/PPCISelLowering.cpp | 5 +++++ lib/Target/SystemZ/SystemZISelLowering.cpp | 5 +++++ lib/Target/X86/X86ISelLowering.cpp | 4 ++++ 7 files changed, 29 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 84051d4..dff01f7 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -249,6 +249,9 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setTruncStoreAction(MVT::f64, MVT::f16, Expand); setTruncStoreAction(MVT::f32, MVT::f16, Expand); + setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); + setExceptionPointerRegister(AArch64::X0); setExceptionSelectorRegister(AArch64::X1); } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 991a703..9b68590 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -717,6 +717,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (!Subtarget->isTargetDarwin()) { // Non-Darwin platforms may return values in these registers via the // personality function. + setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); + setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); setExceptionPointerRegister(ARM::R0); setExceptionSelectorRegister(ARM::R1); } diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 85e1045..ab8a767 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1428,6 +1428,11 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); + setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); + setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); if (TM.getSubtargetImpl()->isSubtargetV2()) { diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 5fd50fd..bb24b8a 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -346,6 +346,11 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::FNEG, MVT::f64, Expand); } + setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); + setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); + setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); setOperationAction(ISD::VAARG, MVT::Other, Expand); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index c4f961c..db49e21 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -228,6 +228,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // We cannot sextinreg(i1). Expand to shifts. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); + setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); + // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support // SjLj exception handling but a light-weight setjmp/longjmp replacement to // support continuation, user-level threading, and etc.. As a result, no diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 256c278..da4ad38 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -200,6 +200,11 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) setOperationAction(ISD::STACKSAVE, MVT::Other, Custom); setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom); + // Expand these using getExceptionSelectorRegister() and + // getExceptionPointerRegister(). + setOperationAction(ISD::EXCEPTIONADDR, PtrVT, Expand); + setOperationAction(ISD::EHSELECTION, PtrVT, Expand); + // Handle floating-point types. for (unsigned I = MVT::FIRST_FP_VALUETYPE; I <= MVT::LAST_FP_VALUETYPE; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 174cc46..f69a5be 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -563,6 +563,10 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); } + setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); + setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); if (Subtarget->is64Bit()) { setExceptionPointerRegister(X86::RAX); setExceptionSelectorRegister(X86::RDX); -- cgit v1.1 From 6227d5c690504c7ada5780c00a635b282c46e275 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 4 Jul 2013 01:31:24 +0000 Subject: Use SmallVectorImpl::iterator/const_iterator instead of SmallVector to avoid specifying the vector size. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185606 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 8 ++++---- lib/Target/MBlaze/MBlazeFrameLowering.cpp | 8 ++++---- lib/Target/Mips/MipsLongBranch.cpp | 2 +- lib/Target/NVPTX/ManagedStringPool.h | 2 +- lib/Target/NVPTX/NVPTXGenericToNVVM.cpp | 2 +- lib/Target/PowerPC/PPCCTRLoops.cpp | 2 +- lib/Target/SystemZ/SystemZLongBranch.cpp | 6 +++--- 7 files changed, 15 insertions(+), 15 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 4e97dda..6d05ad2 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -1484,7 +1484,7 @@ namespace { unsigned &PredReg, ARMCC::CondCodes &Pred, bool &isT2); bool RescheduleOps(MachineBasicBlock *MBB, - SmallVector &Ops, + SmallVectorImpl &Ops, unsigned Base, bool isLd, DenseMap &MI2LocMap); bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); @@ -1656,7 +1656,7 @@ namespace { } bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, - SmallVector &Ops, + SmallVectorImpl &Ops, unsigned Base, bool isLd, DenseMap &MI2LocMap) { bool RetVal = false; @@ -1894,7 +1894,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { // Re-schedule loads. for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { unsigned Base = LdBases[i]; - SmallVector &Lds = Base2LdsMap[Base]; + SmallVectorImpl &Lds = Base2LdsMap[Base]; if (Lds.size() > 1) RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); } @@ -1902,7 +1902,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { // Re-schedule stores. for (unsigned i = 0, e = StBases.size(); i != e; ++i) { unsigned Base = StBases[i]; - SmallVector &Sts = Base2StsMap[Base]; + SmallVectorImpl &Sts = Base2StsMap[Base]; if (Sts.size() > 1) RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); } diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/lib/Target/MBlaze/MBlazeFrameLowering.cpp index 172304b..e453bcb 100644 --- a/lib/Target/MBlaze/MBlazeFrameLowering.cpp +++ b/lib/Target/MBlaze/MBlazeFrameLowering.cpp @@ -42,10 +42,10 @@ static void replaceFrameIndexes(MachineFunction &MF, SmallVector, 16> &FR) { MachineFrameInfo *MFI = MF.getFrameInfo(); MBlazeFunctionInfo *MBlazeFI = MF.getInfo(); - const SmallVector, 16>::iterator FRB = FR.begin(); - const SmallVector, 16>::iterator FRE = FR.end(); + const SmallVectorImpl>::iterator FRB = FR.begin(); + const SmallVectorImpl>::iterator FRE = FR.end(); - SmallVector, 16>::iterator FRI = FRB; + SmallVectorImpl>::iterator FRI = FRB; for (; FRI != FRE; ++FRI) { MFI->RemoveStackObject(FRI->first); int NFI = MFI->CreateFixedObject(4, FRI->second, true); @@ -91,7 +91,7 @@ static void analyzeFrameIndexes(MachineFunction &MF) { MachineRegisterInfo::livein_iterator LII = MRI.livein_begin(); MachineRegisterInfo::livein_iterator LIE = MRI.livein_end(); - const SmallVector &LiveInFI = MBlazeFI->getLiveIn(); + const SmallVectorImpl &LiveInFI = MBlazeFI->getLiveIn(); SmallVector EraseInstr; SmallVector, 16> FrameRelocate; diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp index 073daba..971176e 100644 --- a/lib/Target/Mips/MipsLongBranch.cpp +++ b/lib/Target/Mips/MipsLongBranch.cpp @@ -420,7 +420,7 @@ bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) { MF = &F; initMBBInfo(); - SmallVector::iterator I, E = MBBInfos.end(); + SmallVectorImpl::iterator I, E = MBBInfos.end(); bool EverMadeChange = false, MadeChange = true; while (MadeChange) { diff --git a/lib/Target/NVPTX/ManagedStringPool.h b/lib/Target/NVPTX/ManagedStringPool.h index d6c79b5..f9fb059 100644 --- a/lib/Target/NVPTX/ManagedStringPool.h +++ b/lib/Target/NVPTX/ManagedStringPool.h @@ -29,7 +29,7 @@ class ManagedStringPool { public: ManagedStringPool() {} ~ManagedStringPool() { - SmallVector::iterator Current = Pool.begin(); + SmallVectorImpl::iterator Current = Pool.begin(); while (Current != Pool.end()) { delete *Current; Current++; diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp index 1077c46..9f92a5b 100644 --- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp +++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp @@ -384,7 +384,7 @@ void GenericToNVVM::remapNamedMDNode(Module *M, NamedMDNode *N) { // Replace the old operands with the new operands. N->dropAllReferences(); - for (SmallVector::iterator I = NewOperands.begin(), + for (SmallVectorImpl::iterator I = NewOperands.begin(), E = NewOperands.end(); I != E; ++I) { N->addOperand(*I); diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index bfc9495..30181a0 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -402,7 +402,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) { BasicBlock *CountedExitBlock = 0; const SCEV *ExitCount = 0; BranchInst *CountedExitBranch = 0; - for (SmallVector::iterator I = ExitingBlocks.begin(), + for (SmallVectorImpl::iterator I = ExitingBlocks.begin(), IE = ExitingBlocks.end(); I != IE; ++I) { const SCEV *EC = SE->getExitCount(L, *I); DEBUG(dbgs() << "Exit Count for " << *L << " from block " << diff --git a/lib/Target/SystemZ/SystemZLongBranch.cpp b/lib/Target/SystemZ/SystemZLongBranch.cpp index 2cb5823..c9d5205 100644 --- a/lib/Target/SystemZ/SystemZLongBranch.cpp +++ b/lib/Target/SystemZ/SystemZLongBranch.cpp @@ -310,7 +310,7 @@ bool SystemZLongBranch::mustRelaxBranch(const TerminatorInfo &Terminator, // Return true if, under current assumptions, any terminator needs // to be relaxed. bool SystemZLongBranch::mustRelaxABranch() { - for (SmallVector::iterator TI = Terminators.begin(), + for (SmallVectorImpl::iterator TI = Terminators.begin(), TE = Terminators.end(); TI != TE; ++TI) if (mustRelaxBranch(*TI, TI->Address)) return true; @@ -322,7 +322,7 @@ bool SystemZLongBranch::mustRelaxABranch() { void SystemZLongBranch::setWorstCaseAddresses() { SmallVector::iterator TI = Terminators.begin(); BlockPosition Position(MF->getAlignment()); - for (SmallVector::iterator BI = MBBs.begin(), BE = MBBs.end(); + for (SmallVectorImpl::iterator BI = MBBs.begin(), BE = MBBs.end(); BI != BE; ++BI) { skipNonTerminators(Position, *BI); for (unsigned BTI = 0, BTE = BI->NumTerminators; BTI != BTE; ++BTI) { @@ -386,7 +386,7 @@ void SystemZLongBranch::relaxBranch(TerminatorInfo &Terminator) { void SystemZLongBranch::relaxBranches() { SmallVector::iterator TI = Terminators.begin(); BlockPosition Position(MF->getAlignment()); - for (SmallVector::iterator BI = MBBs.begin(), BE = MBBs.end(); + for (SmallVectorImpl::iterator BI = MBBs.begin(), BE = MBBs.end(); BI != BE; ++BI) { skipNonTerminators(Position, *BI); for (unsigned BTI = 0, BTE = BI->NumTerminators; BTI != BTE; ++BTI) { -- cgit v1.1 From a4389b0c504251d276ddd2ff1b1c9221909a9b46 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 4 Jul 2013 01:43:17 +0000 Subject: Add a space between closing template '>' to unbreak build. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185607 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/MBlaze/MBlazeFrameLowering.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/lib/Target/MBlaze/MBlazeFrameLowering.cpp index e453bcb..786eeee 100644 --- a/lib/Target/MBlaze/MBlazeFrameLowering.cpp +++ b/lib/Target/MBlaze/MBlazeFrameLowering.cpp @@ -42,10 +42,10 @@ static void replaceFrameIndexes(MachineFunction &MF, SmallVector, 16> &FR) { MachineFrameInfo *MFI = MF.getFrameInfo(); MBlazeFunctionInfo *MBlazeFI = MF.getInfo(); - const SmallVectorImpl>::iterator FRB = FR.begin(); - const SmallVectorImpl>::iterator FRE = FR.end(); + const SmallVectorImpl >::iterator FRB = FR.begin(); + const SmallVectorImpl >::iterator FRE = FR.end(); - SmallVectorImpl>::iterator FRI = FRB; + SmallVectorImpl >::iterator FRI = FRB; for (; FRI != FRE; ++FRI) { MFI->RemoveStackObject(FRI->first); int NFI = MFI->CreateFixedObject(4, FRI->second, true); -- cgit v1.1 From 929d9ef111cc0053e245d04464c5ba9fba7727b2 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Thu, 4 Jul 2013 10:04:08 +0000 Subject: Add a V8FP instruction 'vcvt{b,t}' to convert between half and double precision. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185620 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrInfo.td | 2 ++ lib/Target/ARM/ARMInstrVFP.td | 56 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 57 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 4d550ee..ed68b4e 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -203,6 +203,8 @@ def HasVFP3 : Predicate<"Subtarget->hasVFP3()">, AssemblerPredicate<"FeatureVFP3", "VFP3">; def HasVFP4 : Predicate<"Subtarget->hasVFP4()">, AssemblerPredicate<"FeatureVFP4", "VFP4">; +def HasV8FP : Predicate<"Subtarget->hasV8FP()">, + AssemblerPredicate<"FeatureV8FP", "V8FP">; def HasNEON : Predicate<"Subtarget->hasNEON()">, AssemblerPredicate<"FeatureNEON", "NEON">; def HasFP16 : Predicate<"Subtarget->hasFP16()">, diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 597b74a..e1d470f 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -468,7 +468,7 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm, let Inst{4} = 0; } -// Between half-precision and single-precision. For disassembly only. +// Between half, single and double-precision. For disassembly only. // FIXME: Verify encoding after integrated assembler is working. def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), @@ -493,6 +493,60 @@ def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>; +def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0, + (outs DPR:$Dd), (ins SPR:$Sm), + NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm", + []>, Requires<[HasV8FP]> { + // Instruction operands. + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; +} + +def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0, + (outs SPR:$Sd), (ins DPR:$Dm), + NoItinerary, "vcvtb", ".f16.f64\t$Sd, $Dm", + []>, Requires<[HasV8FP]> { + // Instruction operands. + bits<5> Sd; + bits<5> Dm; + + // Encode instruction operands. + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; +} + +def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0, + (outs DPR:$Dd), (ins SPR:$Sm), + NoItinerary, "vcvtt", ".f64.f16\t$Dd, $Sm", + []>, Requires<[HasV8FP]> { + // Instruction operands. + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; +} + +def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0, + (outs SPR:$Sd), (ins DPR:$Dm), + NoItinerary, "vcvtt", ".f16.f64\t$Sd, $Dm", + []>, Requires<[HasV8FP]> { + // Instruction operands. + bits<5> Sd; + bits<5> Dm; + + // Encode instruction operands. + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; +} + def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0, (outs DPR:$Dd), (ins DPR:$Dm), IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm", -- cgit v1.1 From f349a6e9e6ee0b589c403e0c5785266da121d05c Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Thu, 4 Jul 2013 13:54:20 +0000 Subject: Remove the EXCEPTIONADDR, EHSELECTION, and LSDAADDR ISD opcodes. These exception-related opcodes are not used any longer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185625 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 3 --- lib/Target/ARM/ARMISelLowering.cpp | 2 -- lib/Target/Hexagon/HexagonISelLowering.cpp | 5 ----- lib/Target/Mips/MipsISelLowering.cpp | 5 ----- lib/Target/PowerPC/PPCISelLowering.cpp | 5 ----- lib/Target/SystemZ/SystemZISelLowering.cpp | 5 ----- lib/Target/X86/X86ISelLowering.cpp | 4 ---- 7 files changed, 29 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index dff01f7..84051d4 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -249,9 +249,6 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setTruncStoreAction(MVT::f64, MVT::f16, Expand); setTruncStoreAction(MVT::f32, MVT::f16, Expand); - setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); - setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); - setExceptionPointerRegister(AArch64::X0); setExceptionSelectorRegister(AArch64::X1); } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 9b68590..991a703 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -717,8 +717,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (!Subtarget->isTargetDarwin()) { // Non-Darwin platforms may return values in these registers via the // personality function. - setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); - setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); setExceptionPointerRegister(ARM::R0); setExceptionSelectorRegister(ARM::R1); } diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index ab8a767..85e1045 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1428,11 +1428,6 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); - setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); - setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); - setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); - setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); - setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); if (TM.getSubtargetImpl()->isSubtargetV2()) { diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index bb24b8a..5fd50fd 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -346,11 +346,6 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::FNEG, MVT::f64, Expand); } - setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); - setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); - setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); - setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); - setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); setOperationAction(ISD::VAARG, MVT::Other, Expand); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index db49e21..c4f961c 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -228,11 +228,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // We cannot sextinreg(i1). Expand to shifts. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); - setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); - setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); - setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); - // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support // SjLj exception handling but a light-weight setjmp/longjmp replacement to // support continuation, user-level threading, and etc.. As a result, no diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index da4ad38..256c278 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -200,11 +200,6 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) setOperationAction(ISD::STACKSAVE, MVT::Other, Custom); setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom); - // Expand these using getExceptionSelectorRegister() and - // getExceptionPointerRegister(). - setOperationAction(ISD::EXCEPTIONADDR, PtrVT, Expand); - setOperationAction(ISD::EHSELECTION, PtrVT, Expand); - // Handle floating-point types. for (unsigned I = MVT::FIRST_FP_VALUETYPE; I <= MVT::LAST_FP_VALUETYPE; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f69a5be..174cc46 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -563,10 +563,6 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); } - setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); - setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); - setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); - setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); if (Subtarget->is64Bit()) { setExceptionPointerRegister(X86::RAX); setExceptionSelectorRegister(X86::RDX); -- cgit v1.1 From 5606fcae50951e9d9aef7def18531b5fd017971b Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Thu, 4 Jul 2013 14:24:00 +0000 Subject: [PowerPC] Add asm parser support for CR expressions This adds support for specifying condition registers and condition register fields via expressions using the symbols defined by the PowerISA, like "4*cr2+eq". git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185633 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 87 +++++++++++++++++++++++++-- lib/Target/PowerPC/PPCInstrInfo.td | 2 +- 2 files changed, 84 insertions(+), 5 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 4892963..e4fc3b9 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -107,6 +107,67 @@ static unsigned CRRegs[8] = { PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7 }; +// Evaluate an expression containing condition register +// or condition register field symbols. Returns positive +// value on success, or -1 on error. +static int64_t +EvaluateCRExpr(const MCExpr *E) { + switch (E->getKind()) { + case MCExpr::Target: + return -1; + + case MCExpr::Constant: { + int64_t Res = cast(E)->getValue(); + return Res < 0 ? -1 : Res; + } + + case MCExpr::SymbolRef: { + const MCSymbolRefExpr *SRE = cast(E); + StringRef Name = SRE->getSymbol().getName(); + + if (Name == "lt") return 0; + if (Name == "gt") return 1; + if (Name == "eq") return 2; + if (Name == "so") return 3; + if (Name == "un") return 3; + + if (Name == "cr0") return 0; + if (Name == "cr1") return 1; + if (Name == "cr2") return 2; + if (Name == "cr3") return 3; + if (Name == "cr4") return 4; + if (Name == "cr5") return 5; + if (Name == "cr6") return 6; + if (Name == "cr7") return 7; + + return -1; + } + + case MCExpr::Unary: + return -1; + + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast(E); + int64_t LHSVal = EvaluateCRExpr(BE->getLHS()); + int64_t RHSVal = EvaluateCRExpr(BE->getRHS()); + int64_t Res; + + if (LHSVal < 0 || RHSVal < 0) + return -1; + + switch (BE->getOpcode()) { + default: return -1; + case MCBinaryExpr::Add: Res = LHSVal + RHSVal; break; + case MCBinaryExpr::Mul: Res = LHSVal * RHSVal; break; + } + + return Res < 0 ? -1 : Res; + } + } + + llvm_unreachable("Invalid expression kind!"); +} + struct PPCOperand; class PPCAsmParser : public MCTargetAsmParser { @@ -193,6 +254,7 @@ struct PPCOperand : public MCParsedAsmOperand { struct ExprOp { const MCExpr *Val; + int64_t CRVal; // Cached result of EvaluateCRExpr(Val) }; union { @@ -240,6 +302,11 @@ public: return Expr.Val; } + int64_t getExprCRVal() const { + assert(Kind == Expression && "Invalid access!"); + return Expr.CRVal; + } + unsigned getReg() const { assert(isRegNumber() && "Invalid access!"); return (unsigned) Imm.Val; @@ -247,7 +314,12 @@ public: unsigned getCCReg() const { assert(isCCRegNumber() && "Invalid access!"); - return (unsigned) Imm.Val; + return (unsigned) (Kind == Immediate ? Imm.Val : Expr.CRVal); + } + + unsigned getCRBit() const { + assert(isCRBitNumber() && "Invalid access!"); + return (unsigned) (Kind == Immediate ? Imm.Val : Expr.CRVal); } unsigned getCRBitMask() const { @@ -276,8 +348,14 @@ public: (Kind == Immediate && isInt<16>(getImm()) && (getImm() & 3) == 0); } bool isRegNumber() const { return Kind == Immediate && isUInt<5>(getImm()); } - bool isCCRegNumber() const { return Kind == Immediate && - isUInt<3>(getImm()); } + bool isCCRegNumber() const { return (Kind == Expression + && isUInt<3>(getExprCRVal())) || + (Kind == Immediate + && isUInt<3>(getImm())); } + bool isCRBitNumber() const { return (Kind == Expression + && isUInt<5>(getExprCRVal())) || + (Kind == Immediate + && isUInt<5>(getImm())); } bool isCRBitMask() const { return Kind == Immediate && isUInt<8>(getImm()) && isPowerOf2_32(getImm()); } bool isMem() const { return false; } @@ -338,7 +416,7 @@ public: void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateReg(CRBITRegs[getReg()])); + Inst.addOperand(MCOperand::CreateReg(CRBITRegs[getCRBit()])); } void addRegCRRCOperands(MCInst &Inst, unsigned N) const { @@ -397,6 +475,7 @@ public: SMLoc S, SMLoc E, bool IsPPC64) { PPCOperand *Op = new PPCOperand(Expression); Op->Expr.Val = Val; + Op->Expr.CRVal = EvaluateCRExpr(Val); Op->StartLoc = S; Op->EndLoc = E; Op->IsPPC64 = IsPPC64; diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 022c151..e104ea5 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -392,7 +392,7 @@ def vrrc : RegisterOperand { let ParserMatchClass = PPCRegVRRCAsmOperand; } def PPCRegCRBITRCAsmOperand : AsmOperandClass { - let Name = "RegCRBITRC"; let PredicateMethod = "isRegNumber"; + let Name = "RegCRBITRC"; let PredicateMethod = "isCRBitNumber"; } def crbitrc : RegisterOperand { let ParserMatchClass = PPCRegCRBITRCAsmOperand; -- cgit v1.1 From 3c99602ca87f604080e367838180c3d63f6931f3 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Thu, 4 Jul 2013 14:40:12 +0000 Subject: [PowerPC] Add all trap mnemonics This adds support for all basic and extended variants of the trap instructions to the asm parser. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185638 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCInstrInfo.td | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index e104ea5..fbf61f0 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -1238,6 +1238,15 @@ def STWCX : XForm_1<31, 150, (outs), (ins gprc:$rS, memrr:$dst), let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStLoad, [(trap)]>; +def TWI : DForm_base<3, (outs), (ins u5imm:$to, gprc:$rA, s16imm:$imm), + "twi $to, $rA, $imm", IntTrapW, []>; +def TW : XForm_1<31, 4, (outs), (ins u5imm:$to, gprc:$rA, gprc:$rB), + "tw $to, $rA, $rB", IntTrapW, []>; +def TDI : DForm_base<2, (outs), (ins u5imm:$to, g8rc:$rA, s16imm:$imm), + "tdi $to, $rA, $imm", IntTrapD, []>; +def TD : XForm_1<31, 68, (outs), (ins u5imm:$to, g8rc:$rA, g8rc:$rB), + "td $to, $rA, $rB", IntTrapD, []>; + //===----------------------------------------------------------------------===// // PPC32 Load Instructions. // @@ -2569,3 +2578,25 @@ def : InstAlias<"cmpd $rA, $rB", (CMPD CR0, g8rc:$rA, g8rc:$rB)>; def : InstAlias<"cmpldi $rA, $imm", (CMPLDI CR0, g8rc:$rA, u16imm:$imm)>; def : InstAlias<"cmpld $rA, $rB", (CMPLD CR0, g8rc:$rA, g8rc:$rB)>; +multiclass TrapExtendedMnemonic { + def : InstAlias<"td"#name#"i $rA, $imm", (TDI to, g8rc:$rA, s16imm:$imm)>; + def : InstAlias<"td"#name#" $rA, $rB", (TD to, g8rc:$rA, g8rc:$rB)>; + def : InstAlias<"tw"#name#"i $rA, $imm", (TWI to, gprc:$rA, s16imm:$imm)>; + def : InstAlias<"tw"#name#" $rA, $rB", (TW to, gprc:$rA, gprc:$rB)>; +} +defm : TrapExtendedMnemonic<"lt", 16>; +defm : TrapExtendedMnemonic<"le", 20>; +defm : TrapExtendedMnemonic<"eq", 4>; +defm : TrapExtendedMnemonic<"ge", 12>; +defm : TrapExtendedMnemonic<"gt", 8>; +defm : TrapExtendedMnemonic<"nl", 12>; +defm : TrapExtendedMnemonic<"ne", 24>; +defm : TrapExtendedMnemonic<"ng", 20>; +defm : TrapExtendedMnemonic<"llt", 2>; +defm : TrapExtendedMnemonic<"lle", 6>; +defm : TrapExtendedMnemonic<"lge", 5>; +defm : TrapExtendedMnemonic<"lgt", 1>; +defm : TrapExtendedMnemonic<"lnl", 5>; +defm : TrapExtendedMnemonic<"lng", 6>; +defm : TrapExtendedMnemonic<"u", 31>; + -- cgit v1.1 From 4ea250524f77a67102118747dad6ee69f9f3b3aa Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Thu, 4 Jul 2013 14:57:20 +0000 Subject: Add support for MC assembling and disassembling of vsel{ge, gt, eq, vs} instructions. This adds a new decoder table/namespace 'VFPV8', as these instructions have their top 4 bits as 0b1111, while other Thumb instructions have 0b1110. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185642 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrFormats.td | 58 +++++++++++++++++++++++++ lib/Target/ARM/ARMInstrVFP.td | 19 ++++++++ lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 4 +- lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 15 +++++++ 4 files changed, 94 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 239632f..91564da 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1548,6 +1548,35 @@ class ADbI opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, let Inst{4} = op4; } +// FP, binary, not predicated +class ADbInp opcod1, bits<2> opcod2, dag oops, dag iops, + InstrItinClass itin, string asm, list pattern> + : VFPXI +{ + // Instruction operands. + bits<5> Dd; + bits<5> Dn; + bits<5> Dm; + + let Inst{31-28} = 0b1111; + + // Encode instruction operands. + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{19-16} = Dn{3-0}; + let Inst{7} = Dn{4}; + let Inst{15-12} = Dd{3-0}; + let Inst{22} = Dd{4}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-9} = 0b101; + let Inst{8} = 1; // double precision + let Inst{6} = 0; + let Inst{4} = 0; +} + // Single precision, unary class ASuI opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, @@ -1607,6 +1636,35 @@ class ASbI opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, let Inst{4} = op4; } +// Single precision, binary, not predicated +class ASbInp opcod1, bits<2> opcod2, dag oops, dag iops, + InstrItinClass itin, string asm, list pattern> + : VFPXI +{ + // Instruction operands. + bits<5> Sd; + bits<5> Sn; + bits<5> Sm; + + let Inst{31-28} = 0b1111; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{19-16} = Sn{4-1}; + let Inst{7} = Sn{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-9} = 0b101; + let Inst{8} = 0; // Single precision + let Inst{6} = 0; + let Inst{4} = 0; +} + // Single precision binary, if no NEON. Same as ASbI except not available if // NEON is enabled. class ASbIn opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index e1d470f..dcac754 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -333,6 +333,25 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0, let D = VFPNeonA8Domain; } +multiclass vsel_inst opc> { + let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in { + def S : ASbInp<0b11100, opc, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + NoItinerary, !strconcat("vsel", op, ".f32\t$Sd, $Sn, $Sm"), + []>, Requires<[HasV8FP]>; + + def D : ADbInp<0b11100, opc, + (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), + NoItinerary, !strconcat("vsel", op, ".f64\t$Dd, $Dn, $Dm"), + []>, Requires<[HasV8FP]>; + } +} + +defm VSELGT : vsel_inst<"gt", 0b11>; +defm VSELGE : vsel_inst<"ge", 0b10>; +defm VSELEQ : vsel_inst<"eq", 0b00>; +defm VSELVS : vsel_inst<"vs", 0b01>; + // Match reassociated forms only if not sign dependent rounding. def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)), (VNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index bd4ea53..687ea3f 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -4905,7 +4905,7 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "vcgt" || Mnemonic == "vcle" || Mnemonic == "smlal" || Mnemonic == "umaal" || Mnemonic == "umlal" || Mnemonic == "vabal" || Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal" || - Mnemonic == "fmuls") + Mnemonic == "fmuls" || Mnemonic.startswith("vsel")) return Mnemonic; // First, split out any predication code. Ignore mnemonics we know aren't @@ -5005,7 +5005,7 @@ getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, if (Mnemonic == "bkpt" || Mnemonic == "cbnz" || Mnemonic == "setend" || Mnemonic == "cps" || Mnemonic == "it" || Mnemonic == "cbz" || Mnemonic == "trap" || Mnemonic == "setend" || - Mnemonic.startswith("cps")) { + Mnemonic.startswith("cps") || Mnemonic.startswith("vsel")) { // These mnemonics are never predicable CanAcceptPredicationCode = false; } else if (!isThumb()) { diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 31941c1..d5b749d 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -456,6 +456,13 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } MI.clear(); + result = decodeInstruction(DecoderTableVFPV832, MI, insn, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + return result; + } + + MI.clear(); result = decodeInstruction(DecoderTableNEONData32, MI, insn, Address, this, STI); if (result != MCDisassembler::Fail) { @@ -764,6 +771,14 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } } + MI.clear(); + result = decodeInstruction(DecoderTableVFPV832, MI, insn32, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + UpdateThumbVFPPredicate(MI); + return result; + } + if (fieldFromInstruction(insn32, 28, 4) == 0xE) { MI.clear(); result = decodeInstruction(DecoderTableNEONDup32, MI, insn32, Address, -- cgit v1.1 From 1e9ddc229f3d837a79eed1d7ac43743db148f8d1 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Thu, 4 Jul 2013 15:58:38 +0000 Subject: Remove an unneeded call to 'UpdateThumbVFPPredicate', spotted by Amaury. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185651 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index d5b749d..32ce3be 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -775,7 +775,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, result = decodeInstruction(DecoderTableVFPV832, MI, insn32, Address, this, STI); if (result != MCDisassembler::Fail) { Size = 4; - UpdateThumbVFPPredicate(MI); return result; } -- cgit v1.1 From 972befb3f281f0f9ce08d7cf27b4e879327676b0 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Thu, 4 Jul 2013 18:28:46 +0000 Subject: [PowerPC] Implement writeNopData This implements a proper PPCAsmBackend::writeNopData routine that actually writes PowerPC nop instructions. This fixes the last remaining difference in object file output (text section) between the integrated assembler and GNU as that I've seen anywhere. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185662 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index e01f142..b37a179 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -148,10 +148,14 @@ public: } bool writeNopData(uint64_t Count, MCObjectWriter *OW) const { - // FIXME: Zero fill for now. That's not right, but at least will get the - // section size right. - for (uint64_t i = 0; i != Count; ++i) - OW->Write8(0); + // Can't emit NOP with size not multiple of 32-bits + if (Count % 4 != 0) + return false; + + uint64_t NumNops = Count / 4; + for (uint64_t i = 0; i != NumNops; ++i) + OW->Write32(0x60000000); + return true; } -- cgit v1.1 From 2c8ce86e7aae23f9d86cdf4e6027f9703c13ee7f Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Thu, 4 Jul 2013 19:20:00 +0000 Subject: Fix leak. Should bring back the valgrind bot. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185663 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/Target') diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index 1c397b5..af0e23d 100644 --- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -47,6 +47,7 @@ public: } ~AArch64Disassembler() { + delete RegInfo; } /// See MCDisassembler. -- cgit v1.1 From 8be527901ac87d49f7b0b56c96dbc6dadabb5069 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Thu, 4 Jul 2013 22:15:33 +0000 Subject: Use a OwningPtr instead of a manual delete. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185673 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index af0e23d..36dd704 100644 --- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -38,7 +38,7 @@ typedef MCDisassembler::DecodeStatus DecodeStatus; namespace { /// AArch64 disassembler for all AArch64 platforms. class AArch64Disassembler : public MCDisassembler { - const MCRegisterInfo *RegInfo; + OwningPtr RegInfo; public: /// Initializes the disassembler. /// @@ -46,9 +46,7 @@ public: : MCDisassembler(STI), RegInfo(Info) { } - ~AArch64Disassembler() { - delete RegInfo; - } + ~AArch64Disassembler() {} /// See MCDisassembler. DecodeStatus getInstruction(MCInst &instr, @@ -58,7 +56,7 @@ public: raw_ostream &vStream, raw_ostream &cStream) const; - const MCRegisterInfo *getRegInfo() const { return RegInfo; } + const MCRegisterInfo *getRegInfo() const { return RegInfo.get(); } }; } -- cgit v1.1 From 00d9fe2de7f0b8f9d1ea19ae30cc78b1a1e1fb92 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Fri, 5 Jul 2013 10:19:40 +0000 Subject: PR16490: fix a crash in ARMDAGToDAGISel::SelectInlineAsm. In the SelectionDAG immediate operands to inline asm are constructed as two separate operands. The first is a constant of value InlineAsm::Kind_Imm and the second is a constant with the value of the immediate. In ARMDAGToDAGISel::SelectInlineAsm, if we reach an operand of Kind_Imm we should skip over the next operand too. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185688 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelDAGToDAG.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index c85dcb4..da0fe2c 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -3491,6 +3491,12 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ else continue; + if (Kind == InlineAsm::Kind_Imm) { + SDValue op = N->getOperand(++i); + AsmNodeOperands.push_back(op); + continue; + } + unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag); if (NumRegs) OpChanged.push_back(false); -- cgit v1.1 From 23a72c8f7e46618ff8dbdbba4e8c1a2c4e44e3df Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Fri, 5 Jul 2013 12:22:36 +0000 Subject: [PowerPC] Support @tls in the asm parser This adds support for the last missing construct to parse TLS-related assembler code: add 3, 4, symbol@tls The ADD8TLS currently hard-codes the @tls into the assembler string. This cannot be handled by the asm parser, since @tls is parsed as a symbol variant. This patch changes ADD8TLS to have the @tls suffix printed as symbol variant on output too, which allows us to remove the isCodeGenOnly marker from ADD8TLS. This in turn means that we can add a AsmOperand to accept @tls marked symbols on input. As a side effect, this means that the fixup_ppc_tlsreg fixup type is no longer necessary and can be merged into fixup_ppc_nofixup. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185692 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 55 +++++++++++++++++++--- lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp | 3 -- .../PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp | 6 +-- lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h | 6 +-- .../PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp | 2 +- lib/Target/PowerPC/PPC.h | 5 +- lib/Target/PowerPC/PPCISelLowering.cpp | 4 +- lib/Target/PowerPC/PPCInstr64Bit.td | 8 +++- lib/Target/PowerPC/PPCMCInstLower.cpp | 3 ++ 9 files changed, 70 insertions(+), 22 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index e4fc3b9..237ecdc 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -237,7 +237,8 @@ struct PPCOperand : public MCParsedAsmOperand { enum KindTy { Token, Immediate, - Expression + Expression, + TLSRegister } Kind; SMLoc StartLoc, EndLoc; @@ -257,10 +258,15 @@ struct PPCOperand : public MCParsedAsmOperand { int64_t CRVal; // Cached result of EvaluateCRExpr(Val) }; + struct TLSRegOp { + const MCSymbolRefExpr *Sym; + }; + union { struct TokOp Tok; struct ImmOp Imm; struct ExprOp Expr; + struct TLSRegOp TLSReg; }; PPCOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} @@ -280,6 +286,9 @@ public: case Expression: Expr = o.Expr; break; + case TLSRegister: + TLSReg = o.TLSReg; + break; } } @@ -307,6 +316,11 @@ public: return Expr.CRVal; } + const MCExpr *getTLSReg() const { + assert(Kind == TLSRegister && "Invalid access!"); + return TLSReg.Sym; + } + unsigned getReg() const { assert(isRegNumber() && "Invalid access!"); return (unsigned) Imm.Val; @@ -341,6 +355,7 @@ public: (getImm() & 3) == 0); } bool isS17Imm() const { return Kind == Expression || (Kind == Immediate && isInt<17>(getImm())); } + bool isTLSReg() const { return Kind == TLSRegister; } bool isDirectBr() const { return Kind == Expression || (Kind == Immediate && isInt<26>(getImm()) && (getImm() & 3) == 0); } @@ -445,6 +460,11 @@ public: Inst.addOperand(MCOperand::CreateExpr(getExpr())); } + void addTLSRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateExpr(getTLSReg())); + } + StringRef getToken() const { assert(Kind == Token && "Invalid access!"); return StringRef(Tok.Data, Tok.Length); @@ -481,6 +501,28 @@ public: Op->IsPPC64 = IsPPC64; return Op; } + + static PPCOperand *CreateTLSReg(const MCSymbolRefExpr *Sym, + SMLoc S, SMLoc E, bool IsPPC64) { + PPCOperand *Op = new PPCOperand(TLSRegister); + Op->TLSReg.Sym = Sym; + Op->StartLoc = S; + Op->EndLoc = E; + Op->IsPPC64 = IsPPC64; + return Op; + } + + static PPCOperand *CreateFromMCExpr(const MCExpr *Val, + SMLoc S, SMLoc E, bool IsPPC64) { + if (const MCConstantExpr *CE = dyn_cast(Val)) + return CreateImm(CE->getValue(), S, E, IsPPC64); + + if (const MCSymbolRefExpr *SRE = dyn_cast(Val)) + if (SRE->getKind() == MCSymbolRefExpr::VK_PPC_TLS) + return CreateTLSReg(SRE, S, E, IsPPC64); + + return CreateExpr(Val, S, E, IsPPC64); + } }; } // end anonymous namespace. @@ -496,6 +538,9 @@ void PPCOperand::print(raw_ostream &OS) const { case Expression: getExpr()->print(OS); break; + case TLSRegister: + getTLSReg()->print(OS); + break; } } @@ -1011,12 +1056,8 @@ ParseOperand(SmallVectorImpl &Operands) { return Error(S, "unknown operand"); } - if (const MCConstantExpr *CE = dyn_cast(EVal)) - Op = PPCOperand::CreateImm(CE->getValue(), S, E, isPPC64()); - else - Op = PPCOperand::CreateExpr(EVal, S, E, isPPC64()); - // Push the parsed operand into the list of operands + Op = PPCOperand::CreateFromMCExpr(EVal, S, E, isPPC64()); Operands.push_back(Op); // Check whether this is a TLS call expression @@ -1036,7 +1077,7 @@ ParseOperand(SmallVectorImpl &Operands) { E = Parser.getTok().getLoc(); Parser.Lex(); // Eat the ')'. - Op = PPCOperand::CreateExpr(TLSSym, S, E, isPPC64()); + Op = PPCOperand::CreateFromMCExpr(TLSSym, S, E, isPPC64()); Operands.push_back(Op); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index b37a179..4f999a1 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -30,7 +30,6 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) { case FK_Data_2: case FK_Data_4: case FK_Data_8: - case PPC::fixup_ppc_tlsreg: case PPC::fixup_ppc_nofixup: return Value; case PPC::fixup_ppc_brcond14: @@ -64,7 +63,6 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { return 4; case FK_Data_8: return 8; - case PPC::fixup_ppc_tlsreg: case PPC::fixup_ppc_nofixup: return 0; } @@ -101,7 +99,6 @@ public: { "fixup_ppc_brcond14abs", 16, 14, 0 }, { "fixup_ppc_half16", 0, 16, 0 }, { "fixup_ppc_half16ds", 0, 14, 0 }, - { "fixup_ppc_tlsreg", 0, 0, 0 }, { "fixup_ppc_nofixup", 0, 0, 0 } }; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 76cf43f..ffc5002 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -289,9 +289,6 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, break; } break; - case PPC::fixup_ppc_tlsreg: - Type = ELF::R_PPC64_TLS; - break; case PPC::fixup_ppc_nofixup: switch (Modifier) { default: llvm_unreachable("Unsupported Modifier"); @@ -301,6 +298,9 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_TLSLD: Type = ELF::R_PPC64_TLSLD; break; + case MCSymbolRefExpr::VK_PPC_TLS: + Type = ELF::R_PPC64_TLS; + break; } break; case FK_Data_8: diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h index 0438c0e..68de8c1 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h @@ -41,11 +41,9 @@ enum Fixups { /// implied 2 zero bits for instrs like 'std'. fixup_ppc_half16ds, - /// fixup_ppc_tlsreg - Insert thread-pointer register number. - fixup_ppc_tlsreg, - /// fixup_ppc_nofixup - Not a true fixup, but ties a symbol to a call - /// to __tls_get_addr for the TLS general and local dynamic models. + /// to __tls_get_addr for the TLS general and local dynamic models, + /// or inserts the thread-pointer register number. fixup_ppc_nofixup, // Marker diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 27ad980..59ba9c4 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -209,7 +209,7 @@ unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo, // hint to the linker that this statement is part of a relocation sequence. // Return the thread-pointer register's encoding. Fixups.push_back(MCFixup::Create(0, MO.getExpr(), - (MCFixupKind)PPC::fixup_ppc_tlsreg)); + (MCFixupKind)PPC::fixup_ppc_nofixup)); return CTX.getRegisterInfo()->getEncodingValue(PPC::X13); } diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index d5a08ee..96b882a 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -85,7 +85,10 @@ namespace llvm { /// into memory operations. MO_DTPREL_LO = 5 << 4, MO_TLSLD_LO = 6 << 4, - MO_TOC_LO = 7 << 4 + MO_TOC_LO = 7 << 4, + + // Symbol for VK_PPC_TLS fixup attached to an ADD instruction + MO_TLS = 8 << 4 }; } // end namespace PPCII diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index c4f961c..0f79031 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1359,12 +1359,14 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, if (Model == TLSModel::InitialExec) { SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); + SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + PPCII::MO_TLS); SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA); SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, TPOffsetHi); - return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGA); + return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS); } if (Model == TLSModel::GeneralDynamic) { diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index d19a7d4..e7bb259 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -36,8 +36,13 @@ def s17imm64 : Operand { def tocentry : Operand { let MIOperandInfo = (ops i64imm:$imm); } +def PPCTLSRegOperand : AsmOperandClass { + let Name = "TLSReg"; let PredicateMethod = "isTLSReg"; + let RenderMethod = "addTLSRegOperands"; +} def tlsreg : Operand { let EncoderMethod = "getTLSRegEncoding"; + let ParserMatchClass = PPCTLSRegOperand; } def tlsgd : Operand {} def tlscall : Operand { @@ -404,9 +409,8 @@ defm ADD8 : XOForm_1r<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), [(set i64:$rT, (add i64:$rA, i64:$rB))]>; // ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the // initial-exec thread-local storage model. -let isCodeGenOnly = 1 in def ADD8TLS : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, tlsreg:$rB), - "add $rT, $rA, $rB@tls", IntSimple, + "add $rT, $rA, $rB", IntSimple, [(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>; defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index 1eefb7f..b7e88d4 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -127,6 +127,9 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, case PPCII::MO_TOC_LO: RefKind = MCSymbolRefExpr::VK_PPC_TOC_LO; break; + case PPCII::MO_TLS: + RefKind = MCSymbolRefExpr::VK_PPC_TLS; + break; } const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, RefKind, Ctx); -- cgit v1.1 From 7271ac2c0318043688ddc8686dd23777dca62c59 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 5 Jul 2013 12:55:00 +0000 Subject: [SystemZ] Clean up register scavenging code SystemZ wants normal register scavenging slots, as close to the stack or frame pointer as possible. The only reason it was using custom code was because PrologEpilogInserter assumed an x86-like layout, where the frame pointer is at the opposite end of the frame from the stack pointer. This meant that when frame pointer elimination was disabled, the slots ended up being as close as possible to the incoming stack pointer, which is the opposite of what we want on SystemZ. This patch adds a new knob to say which layout is used and converts SystemZ to use target-independent scavenging slots. It's one of the pieces needed to support frame-to-frame MVCs, where two slots might be required. The ABI requires us to allocate 160 bytes for calls, so one approach would be to use that area as temporary spill space instead. It would need some surgery to make sure that the slot isn't live across a call though. I stuck to the "isFPCloseToIncomingSP - ..." style comment on the "do what the surrounding code does" principle. The FP case is already covered by several Systemz/frame-* tests, which fail without the PrologueEpilogueInserter change, so no new ones are needed. No behavioural change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185696 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZFrameLowering.cpp | 29 +++++++++++++---------------- lib/Target/SystemZ/SystemZFrameLowering.h | 13 +++---------- lib/Target/SystemZ/SystemZRegisterInfo.cpp | 26 -------------------------- lib/Target/SystemZ/SystemZRegisterInfo.h | 5 ----- 4 files changed, 16 insertions(+), 57 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp index cd80d4e..3ae5978 100644 --- a/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -14,6 +14,7 @@ #include "SystemZTargetMachine.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/IR/Function.h" using namespace llvm; @@ -263,6 +264,18 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } +void SystemZFrameLowering:: +processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS) const { + MachineFrameInfo *MFFrame = MF.getFrameInfo(); + uint64_t MaxReach = (MFFrame->estimateStackSize(MF) + + SystemZMC::CallFrameSize * 2); + if (!isUInt<12>(MaxReach)) + // We may need a register scavenging slot if some parts of the frame + // are outside the reach of an unsigned 12-bit displacement. + RS->addScavengingFrameIndex(MFFrame->CreateStackObject(8, 8, false)); +} + // Emit instructions before MBBI (in MBB) to add NumBytes to Reg. static void emitIncrement(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, @@ -469,9 +482,6 @@ getAllocatedStackSize(const MachineFunction &MF) const { // Start with the size of the local variables and spill slots. uint64_t StackSize = MFFrame->getStackSize(); - // Include space for an emergency spill slot, if one might be needed. - StackSize += getEmergencySpillSlotSize(MF); - // We need to allocate the ABI-defined 160-byte base area whenever // we allocate stack space for our own use and whenever we call another // function. @@ -481,19 +491,6 @@ getAllocatedStackSize(const MachineFunction &MF) const { return StackSize; } -unsigned SystemZFrameLowering:: -getEmergencySpillSlotSize(const MachineFunction &MF) const { - const MachineFrameInfo *MFFrame = MF.getFrameInfo(); - uint64_t MaxReach = MFFrame->getStackSize() + SystemZMC::CallFrameSize * 2; - return isUInt<12>(MaxReach) ? 0 : 8; -} - -unsigned SystemZFrameLowering:: -getEmergencySpillSlotOffset(const MachineFunction &MF) const { - assert(getEmergencySpillSlotSize(MF) && "No emergency spill slot"); - return SystemZMC::CallFrameSize; -} - bool SystemZFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { // The ABI requires us to allocate 160 bytes of stack space for the callee, diff --git a/lib/Target/SystemZ/SystemZFrameLowering.h b/lib/Target/SystemZ/SystemZFrameLowering.h index 08321e0..9b0a1d5 100644 --- a/lib/Target/SystemZ/SystemZFrameLowering.h +++ b/lib/Target/SystemZ/SystemZFrameLowering.h @@ -30,6 +30,7 @@ public: const SystemZSubtarget &sti); // Override TargetFrameLowering. + virtual bool isFPCloseToIncomingSP() const LLVM_OVERRIDE { return false; } virtual const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const LLVM_OVERRIDE; virtual void @@ -47,6 +48,8 @@ public: const std::vector &CSI, const TargetRegisterInfo *TRI) const LLVM_OVERRIDE; + virtual void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS) const; virtual void emitPrologue(MachineFunction &MF) const LLVM_OVERRIDE; virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const LLVM_OVERRIDE; @@ -64,16 +67,6 @@ public: // Return the number of bytes in the callee-allocated part of the frame. uint64_t getAllocatedStackSize(const MachineFunction &MF) const; - // Return the number of frame bytes that should be reserved for - // an emergency spill slot, for use by the register scaveneger. - // Return 0 if register scaveging won't be needed. - unsigned getEmergencySpillSlotSize(const MachineFunction &MF) const; - - // Return the offset from the frame pointer of the emergency spill slot, - // which always fits within a 12-bit unsigned displacement field. - // Only valid if getEmergencySpillSlotSize(MF) returns nonzero. - unsigned getEmergencySpillSlotOffset(const MachineFunction &MF) const; - // Return the byte offset from the incoming stack pointer of Reg's // ABI-defined save slot. Return 0 if no slot is defined for Reg. unsigned getRegSpillOffset(unsigned Reg) const { diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index c695bb3..8ce6d6a 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -53,32 +53,6 @@ SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } -bool -SystemZRegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, - MachineBasicBlock::iterator SaveMBBI, - MachineBasicBlock::iterator &UseMBBI, - const TargetRegisterClass *RC, - unsigned Reg) const { - MachineFunction &MF = *MBB.getParent(); - const SystemZInstrInfo &TII = - *static_cast(TM.getInstrInfo()); - const SystemZFrameLowering *TFI = - static_cast(TM.getFrameLowering()); - unsigned Base = getFrameRegister(MF); - uint64_t Offset = TFI->getEmergencySpillSlotOffset(MF); - DebugLoc DL; - - unsigned LoadOpcode, StoreOpcode; - TII.getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode); - - // The offset must always be in range of a 12-bit unsigned displacement. - BuildMI(MBB, SaveMBBI, DL, TII.get(StoreOpcode)) - .addReg(Reg, RegState::Kill).addReg(Base).addImm(Offset).addReg(0); - BuildMI(MBB, UseMBBI, DL, TII.get(LoadOpcode), Reg) - .addReg(Base).addImm(Offset).addReg(0); - return true; -} - void SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h index 047cb4a..c447e4d 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -52,11 +52,6 @@ public: const LLVM_OVERRIDE; virtual BitVector getReservedRegs(const MachineFunction &MF) const LLVM_OVERRIDE; - virtual bool saveScavengerRegister(MachineBasicBlock &MBB, - MachineBasicBlock::iterator SaveMBBI, - MachineBasicBlock::iterator &UseMBBI, - const TargetRegisterClass *RC, - unsigned Reg) const LLVM_OVERRIDE; virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const LLVM_OVERRIDE; -- cgit v1.1 From 47734db936bd3b434d60f9daee5b34bc722ee3ba Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 5 Jul 2013 13:11:52 +0000 Subject: [SystemZ] Allocate a second register scavenging slot This is another prerequisite for frame-to-frame MVC copies. I'll commit the patch that makes use of the slot separately. The downside of trying to test many corner cases with each of the available addressing modes is that a fair few tests need to account for the new frame layout. I do still think it's useful to have all these tests though, since it's something that wouldn't get much coverage otherwise. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185698 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZFrameLowering.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp index 3ae5978..a58da90 100644 --- a/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -270,10 +270,14 @@ processFunctionBeforeFrameFinalized(MachineFunction &MF, MachineFrameInfo *MFFrame = MF.getFrameInfo(); uint64_t MaxReach = (MFFrame->estimateStackSize(MF) + SystemZMC::CallFrameSize * 2); - if (!isUInt<12>(MaxReach)) - // We may need a register scavenging slot if some parts of the frame + if (!isUInt<12>(MaxReach)) { + // We may need register scavenging slots if some parts of the frame // are outside the reach of an unsigned 12-bit displacement. + // Create 2 for the case where both addresses in an MVC are + // out of range. RS->addScavengingFrameIndex(MFFrame->CreateStackObject(8, 8, false)); + RS->addScavengingFrameIndex(MFFrame->CreateStackObject(8, 8, false)); + } } // Emit instructions before MBBI (in MBB) to add NumBytes to Reg. -- cgit v1.1 From 457571ed6977f78ca8d30b993fa7e86e2d7ad8d5 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Fri, 5 Jul 2013 13:49:46 +0000 Subject: [PowerPC] Add some special @got@tprel fixup cases When a target@got@tprel or target@got@tprel@l symbol variant is used in a fixup_ppc_half16 (*not* fixup_ppc_half16ds) context, we currently fail, since the corresponding R_PPC64_GOT_TPREL16 / R_PPC64_GOT_TPREL16_LO relocation types do not exist. However, since such symbol variants resolve to GOT offsets which are always 4-aligned, we can simply instead use the _DS variants of the relocation types, which *do* exist. The same applies for the @got@dtprel variants. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185700 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index ffc5002..0833b4e 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -228,9 +228,29 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HA: Type = ELF::R_PPC64_GOT_TLSLD16_HA; break; + case MCSymbolRefExpr::VK_PPC_GOT_TPREL: + /* We don't have R_PPC64_GOT_TPREL16, but since GOT offsets + are always 4-aligned, we can use R_PPC64_GOT_TPREL16_DS. */ + Type = ELF::R_PPC64_GOT_TPREL16_DS; + break; + case MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO: + /* We don't have R_PPC64_GOT_TPREL16_LO, but since GOT offsets + are always 4-aligned, we can use R_PPC64_GOT_TPREL16_LO_DS. */ + Type = ELF::R_PPC64_GOT_TPREL16_LO_DS; + break; case MCSymbolRefExpr::VK_PPC_GOT_TPREL_HI: Type = ELF::R_PPC64_GOT_TPREL16_HI; break; + case MCSymbolRefExpr::VK_PPC_GOT_DTPREL: + /* We don't have R_PPC64_GOT_DTPREL16, but since GOT offsets + are always 4-aligned, we can use R_PPC64_GOT_DTPREL16_DS. */ + Type = ELF::R_PPC64_GOT_DTPREL16_DS; + break; + case MCSymbolRefExpr::VK_PPC_GOT_DTPREL_LO: + /* We don't have R_PPC64_GOT_DTPREL16_LO, but since GOT offsets + are always 4-aligned, we can use R_PPC64_GOT_DTPREL16_LO_DS. */ + Type = ELF::R_PPC64_GOT_DTPREL16_LO_DS; + break; case MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA: Type = ELF::R_PPC64_GOT_TPREL16_HA; break; -- cgit v1.1 From cf1b5bd60ab7cf907bef20c3997ffb249b4fe90a Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 5 Jul 2013 14:02:01 +0000 Subject: [SystemZ] Enable the use of MVC for frame-to-frame spills ...now that the problem that prompted the restriction has been fixed. The original spill-02.py was a compromise because at the time I couldn't find an example that actually failed without the two scavenging slots. The version included here did. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185701 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZInstrInfo.cpp | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 16207b3..12bcd09 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -363,18 +363,11 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // not valid in cases where the two memories partially overlap; however, // that is not a problem here, because we know that one of the memories // is a full frame index. - // - // For now we punt if the load or store is also to a frame index. - // In that case we might end up eliminating both of them to out-of-range - // offsets, which might then force the register scavenger to spill two - // other registers. The backend can only handle one such scavenger spill - // at a time. if (OpNum == 0 && MI->hasOneMemOperand()) { MachineMemOperand *MMO = *MI->memoperands_begin(); if (MMO->getSize() == Size && !MMO->isVolatile()) { // Handle conversion of loads. - if (isSimpleBD12Move(MI, SystemZII::SimpleBDXLoad) && - !MI->getOperand(1).isFI()) { + if (isSimpleBD12Move(MI, SystemZII::SimpleBDXLoad)) { uint64_t Offset = 0; MachineMemOperand *FrameMMO = getFrameMMO(MF, FrameIndex, Offset, MachineMemOperand::MOStore); @@ -384,8 +377,7 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, .addMemOperand(FrameMMO).addMemOperand(MMO); } // Handle conversion of stores. - if (isSimpleBD12Move(MI, SystemZII::SimpleBDXStore) && - !MI->getOperand(1).isFI()) { + if (isSimpleBD12Move(MI, SystemZII::SimpleBDXStore)) { uint64_t Offset = 0; MachineMemOperand *FrameMMO = getFrameMMO(MF, FrameIndex, Offset, MachineMemOperand::MOLoad); -- cgit v1.1 From e684b96e3c6513f88137afee7c344a4d2d9f0694 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 5 Jul 2013 14:31:24 +0000 Subject: [SystemZ] Remove redundant frame MMOs This fixes foldMemoryOperandImpl() so that it doesn't create duplicated frame MMOs. I hadn't realized when writing r185434 that it was the caller's responsibility to add these. No behavioural change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185704 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZInstrInfo.cpp | 28 ++++------------------------ 1 file changed, 4 insertions(+), 24 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 12bcd09..81d72e0 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -326,17 +326,6 @@ static bool isSimpleBD12Move(const MachineInstr *MI, unsigned Flag) { MI->getOperand(3).getReg() == 0); } -// Return a MachineMemOperand for FrameIndex with flags MMOFlags. -// Offset is the byte offset from the start of FrameIndex. -static MachineMemOperand *getFrameMMO(MachineFunction &MF, int FrameIndex, - uint64_t &Offset, unsigned MMOFlags) { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const Value *V = PseudoSourceValue::getFixedStack(FrameIndex); - return MF.getMachineMemOperand(MachinePointerInfo(V, Offset), MMOFlags, - MFI->getObjectSize(FrameIndex), - MFI->getObjectAlignment(FrameIndex)); -} - MachineInstr * SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, @@ -368,23 +357,17 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, if (MMO->getSize() == Size && !MMO->isVolatile()) { // Handle conversion of loads. if (isSimpleBD12Move(MI, SystemZII::SimpleBDXLoad)) { - uint64_t Offset = 0; - MachineMemOperand *FrameMMO = getFrameMMO(MF, FrameIndex, Offset, - MachineMemOperand::MOStore); return BuildMI(MF, MI->getDebugLoc(), get(SystemZ::MVC)) - .addFrameIndex(FrameIndex).addImm(Offset).addImm(Size) + .addFrameIndex(FrameIndex).addImm(0).addImm(Size) .addOperand(MI->getOperand(1)).addImm(MI->getOperand(2).getImm()) - .addMemOperand(FrameMMO).addMemOperand(MMO); + .addMemOperand(MMO); } // Handle conversion of stores. if (isSimpleBD12Move(MI, SystemZII::SimpleBDXStore)) { - uint64_t Offset = 0; - MachineMemOperand *FrameMMO = getFrameMMO(MF, FrameIndex, Offset, - MachineMemOperand::MOLoad); return BuildMI(MF, MI->getDebugLoc(), get(SystemZ::MVC)) .addOperand(MI->getOperand(1)).addImm(MI->getOperand(2).getImm()) - .addImm(Size).addFrameIndex(FrameIndex).addImm(Offset) - .addMemOperand(MMO).addMemOperand(FrameMMO); + .addImm(Size).addFrameIndex(FrameIndex).addImm(0) + .addMemOperand(MMO); } } } @@ -400,15 +383,12 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, assert(AccessBytes != 0 && "Size of access should be known"); assert(AccessBytes <= Size && "Access outside the frame index"); uint64_t Offset = Size - AccessBytes; - MachineMemOperand *FrameMMO = getFrameMMO(MF, FrameIndex, Offset, - MachineMemOperand::MOLoad); MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(MemOpcode)); for (unsigned I = 0; I < OpNum; ++I) MIB.addOperand(MI->getOperand(I)); MIB.addFrameIndex(FrameIndex).addImm(Offset); if (MemDesc.TSFlags & SystemZII::HasIndex) MIB.addReg(0); - MIB.addMemOperand(FrameMMO); return MIB; } } -- cgit v1.1 From 71804149a3a6f6c081b874869b27fafe7d3288ce Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 5 Jul 2013 14:38:48 +0000 Subject: [SystemZ] Remove no-op MVCs The stack coloring pass has code to delete stores and loads that become trivially dead after coloring. Extend it to cope with single instructions that copy from one frame index to another. The testcase happens to show an example of this kicking in at the moment. It did occur in Real Code too though. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185705 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZInstrInfo.cpp | 25 +++++++++++++++++++++++++ lib/Target/SystemZ/SystemZInstrInfo.h | 2 ++ 2 files changed, 27 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 81d72e0..cfd270a 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -104,6 +104,31 @@ unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr *MI, return isSimpleMove(MI, FrameIndex, SystemZII::SimpleBDXStore); } +bool SystemZInstrInfo::isStackSlotCopy(const MachineInstr *MI, + int &DestFrameIndex, + int &SrcFrameIndex) const { + // Check for MVC 0(Length,FI1),0(FI2) + const MachineFrameInfo *MFI = MI->getParent()->getParent()->getFrameInfo(); + if (MI->getOpcode() != SystemZ::MVC || + !MI->getOperand(0).isFI() || + MI->getOperand(1).getImm() != 0 || + !MI->getOperand(3).isFI() || + MI->getOperand(4).getImm() != 0) + return false; + + // Check that Length covers the full slots. + int64_t Length = MI->getOperand(2).getImm(); + unsigned FI1 = MI->getOperand(0).getIndex(); + unsigned FI2 = MI->getOperand(3).getIndex(); + if (MFI->getObjectSize(FI1) != Length || + MFI->getObjectSize(FI2) != Length) + return false; + + DestFrameIndex = FI1; + SrcFrameIndex = FI2; + return true; +} + bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index 11d486c..3fe71d8 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -91,6 +91,8 @@ public: int &FrameIndex) const LLVM_OVERRIDE; virtual unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const LLVM_OVERRIDE; + virtual bool isStackSlotCopy(const MachineInstr *MI, int &DestFrameIndex, + int &SrcFrameIndex) const LLVM_OVERRIDE; virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, -- cgit v1.1 From ffd3bb8f0d875f4aae3097660f973b1e7512ee05 Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Fri, 5 Jul 2013 18:28:39 +0000 Subject: ARM: Fix incorrect pack pattern A "pkhtb x, x, y asr #num" uses the lower 16 bits of "y asr #num" and packs them in the bottom half of "x". An arithmetic and logic shift are only equivalent in this context if the shift amount is 16. We would be shifting in ones into the bottom 16bits instead of zeros if "y" is negative. radar://14338767 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185712 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrInfo.td | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index ed68b4e..75d3de9 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -4011,9 +4011,11 @@ def PKHTB : APKHI<0b01101000, 1, (outs GPRnopc:$Rd), // Alternate cases for PKHTB where identities eliminate some nodes. Note that // a shift amount of 0 is *not legal* here, it is PKHBT instead. +// We also can not replace a srl (17..31) by an arithmetic shift we would use in +// pkhtb src1, src2, asr (17..31). def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000), - (srl GPRnopc:$src2, imm16_31:$sh)), - (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm16_31:$sh)>; + (srl GPRnopc:$src2, imm16:$sh)), + (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm16:$sh)>; def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000), (and (srl GPRnopc:$src2, imm1_15:$sh), 0xFFFF)), (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm1_15:$sh)>; -- cgit v1.1 From fe3b2995aa38b25bada9fa2e850590b3988668b5 Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Fri, 5 Jul 2013 18:57:49 +0000 Subject: ARM: Add a pack pattern for matching arithmetic shift right git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185714 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrInfo.td | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 75d3de9..9eba553 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -4017,6 +4017,9 @@ def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000), (srl GPRnopc:$src2, imm16:$sh)), (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm16:$sh)>; def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000), + (sra GPRnopc:$src2, imm16_31:$sh)), + (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm16_31:$sh)>; +def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000), (and (srl GPRnopc:$src2, imm1_15:$sh), 0xFFFF)), (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm1_15:$sh)>; -- cgit v1.1 From 2b52880592a525cfe04d8f9008a35da8c2ea94c3 Mon Sep 17 00:00:00 2001 From: Nico Rieck Date: Sat, 6 Jul 2013 18:08:19 +0000 Subject: Proper va_arg/va_copy lowering on win64 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185763 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 174cc46..a6e894b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -582,10 +582,12 @@ void X86TargetLowering::resetOperationActions() { // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); setOperationAction(ISD::VAEND , MVT::Other, Expand); - if (Subtarget->is64Bit()) { + if (Subtarget->is64Bit() && !Subtarget->isTargetWin64()) { + // TargetInfo::X86_64ABIBuiltinVaList setOperationAction(ISD::VAARG , MVT::Other, Custom); setOperationAction(ISD::VACOPY , MVT::Other, Custom); } else { + // TargetInfo::CharPtrBuiltinVaList setOperationAction(ISD::VAARG , MVT::Other, Expand); setOperationAction(ISD::VACOPY , MVT::Other, Expand); } -- cgit v1.1 From 2a9683289b78a2533b261e1b341f9ea9724465a0 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Sat, 6 Jul 2013 20:50:18 +0000 Subject: Add MC support for the v8fp instructions: vmaxnm and vminnm. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185767 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrFormats.td | 8 ++++---- lib/Target/ARM/ARMInstrVFP.td | 21 +++++++++++++++++++-- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 6 ++++-- 3 files changed, 27 insertions(+), 8 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 91564da..16b7bc5 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1549,7 +1549,7 @@ class ADbI opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, } // FP, binary, not predicated -class ADbInp opcod1, bits<2> opcod2, dag oops, dag iops, +class ADbInp opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops, InstrItinClass itin, string asm, list pattern> : VFPXI @@ -1573,7 +1573,7 @@ class ADbInp opcod1, bits<2> opcod2, dag oops, dag iops, let Inst{21-20} = opcod2; let Inst{11-9} = 0b101; let Inst{8} = 1; // double precision - let Inst{6} = 0; + let Inst{6} = opcod3; let Inst{4} = 0; } @@ -1637,7 +1637,7 @@ class ASbI opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, } // Single precision, binary, not predicated -class ASbInp opcod1, bits<2> opcod2, dag oops, dag iops, +class ASbInp opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops, InstrItinClass itin, string asm, list pattern> : VFPXI @@ -1661,7 +1661,7 @@ class ASbInp opcod1, bits<2> opcod2, dag oops, dag iops, let Inst{21-20} = opcod2; let Inst{11-9} = 0b101; let Inst{8} = 0; // Single precision - let Inst{6} = 0; + let Inst{6} = opcod3; let Inst{4} = 0; } diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index dcac754..27e2df4 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -335,12 +335,12 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0, multiclass vsel_inst opc> { let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in { - def S : ASbInp<0b11100, opc, + def S : ASbInp<0b11100, opc, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), NoItinerary, !strconcat("vsel", op, ".f32\t$Sd, $Sn, $Sm"), []>, Requires<[HasV8FP]>; - def D : ADbInp<0b11100, opc, + def D : ADbInp<0b11100, opc, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), NoItinerary, !strconcat("vsel", op, ".f64\t$Dd, $Dn, $Dm"), []>, Requires<[HasV8FP]>; @@ -352,6 +352,23 @@ defm VSELGE : vsel_inst<"ge", 0b10>; defm VSELEQ : vsel_inst<"eq", 0b00>; defm VSELVS : vsel_inst<"vs", 0b01>; +multiclass vmaxmin_inst { + let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in { + def S : ASbInp<0b11101, 0b00, opc, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + NoItinerary, !strconcat(op, ".f32\t$Sd, $Sn, $Sm"), + []>, Requires<[HasV8FP]>; + + def D : ADbInp<0b11101, 0b00, opc, + (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), + NoItinerary, !strconcat(op, ".f64\t$Dd, $Dn, $Dm"), + []>, Requires<[HasV8FP]>; + } +} + +defm VMAXNM : vmaxmin_inst<"vmaxnm", 0>; +defm VMINNM : vmaxmin_inst<"vminnm", 1>; + // Match reassociated forms only if not sign dependent rounding. def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)), (VNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 687ea3f..f7f1901 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -4905,7 +4905,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "vcgt" || Mnemonic == "vcle" || Mnemonic == "smlal" || Mnemonic == "umaal" || Mnemonic == "umlal" || Mnemonic == "vabal" || Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal" || - Mnemonic == "fmuls" || Mnemonic.startswith("vsel")) + Mnemonic == "fmuls" || Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || + Mnemonic.startswith("vsel")) return Mnemonic; // First, split out any predication code. Ignore mnemonics we know aren't @@ -5005,7 +5006,8 @@ getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, if (Mnemonic == "bkpt" || Mnemonic == "cbnz" || Mnemonic == "setend" || Mnemonic == "cps" || Mnemonic == "it" || Mnemonic == "cbz" || Mnemonic == "trap" || Mnemonic == "setend" || - Mnemonic.startswith("cps") || Mnemonic.startswith("vsel")) { + Mnemonic.startswith("cps") || Mnemonic == "vmaxnm" || + Mnemonic == "vminnm" || Mnemonic.startswith("vsel")) { // These mnemonics are never predicable CanAcceptPredicationCode = false; } else if (!isThumb()) { -- cgit v1.1 From 01f8d579f7672872324208ac5bc4ac311e81b22e Mon Sep 17 00:00:00 2001 From: Nico Rieck Date: Sun, 7 Jul 2013 16:48:39 +0000 Subject: Reuse %rax after calling __chkstk on win64 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185778 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 13 ++++++++----- lib/Target/X86/X86ISelLowering.cpp | 3 +-- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 9d66bfd..5db431b 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -914,11 +914,14 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit) .setMIFlag(MachineInstr::FrameSetup); - // MSVC x64's __chkstk needs to adjust %rsp. - // FIXME: %rax preserves the offset and should be available. - if (isSPUpdateNeeded) - emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64, - UseLEA, TII, *RegInfo); + // MSVC x64's __chkstk does not adjust %rsp itself. + // It also does not clobber %rax so we can reuse it when adjusting %rsp. + if (isSPUpdateNeeded) { + BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), StackPtr) + .addReg(StackPtr) + .addReg(X86::RAX) + .setMIFlag(MachineInstr::FrameSetup); + } if (isEAXAlive) { // Restore EAX diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a6e894b..aaeaa5d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -14434,12 +14434,11 @@ X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI, } else { // __chkstk(MSVCRT): does not update stack pointer. // Clobbers R10, R11 and EFLAGS. - // FIXME: RAX(allocated size) might be reused and not killed. BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA)) .addExternalSymbol("__chkstk") .addReg(X86::RAX, RegState::Implicit) .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); - // RAX has the offset to subtracted from RSP. + // RAX has the offset to be subtracted from RSP. BuildMI(*BB, MI, DL, TII->get(X86::SUB64rr), X86::RSP) .addReg(X86::RSP) .addReg(X86::RAX); -- cgit v1.1 From d56e7e198d858439c884dbd909ee58d15742d5be Mon Sep 17 00:00:00 2001 From: Nico Rieck Date: Mon, 8 Jul 2013 01:30:57 +0000 Subject: Revert "Reuse %rax after calling __chkstk on win64" This reverts commit 01f8d579f7672872324208ac5bc4ac311e81b22e. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185781 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 13 +++++-------- lib/Target/X86/X86ISelLowering.cpp | 3 ++- 2 files changed, 7 insertions(+), 9 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 5db431b..9d66bfd 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -914,14 +914,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit) .setMIFlag(MachineInstr::FrameSetup); - // MSVC x64's __chkstk does not adjust %rsp itself. - // It also does not clobber %rax so we can reuse it when adjusting %rsp. - if (isSPUpdateNeeded) { - BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), StackPtr) - .addReg(StackPtr) - .addReg(X86::RAX) - .setMIFlag(MachineInstr::FrameSetup); - } + // MSVC x64's __chkstk needs to adjust %rsp. + // FIXME: %rax preserves the offset and should be available. + if (isSPUpdateNeeded) + emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64, + UseLEA, TII, *RegInfo); if (isEAXAlive) { // Restore EAX diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index aaeaa5d..a6e894b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -14434,11 +14434,12 @@ X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI, } else { // __chkstk(MSVCRT): does not update stack pointer. // Clobbers R10, R11 and EFLAGS. + // FIXME: RAX(allocated size) might be reused and not killed. BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA)) .addExternalSymbol("__chkstk") .addReg(X86::RAX, RegState::Implicit) .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); - // RAX has the offset to be subtracted from RSP. + // RAX has the offset to subtracted from RSP. BuildMI(*BB, MI, DL, TII->get(X86::SUB64rr), X86::RSP) .addReg(X86::RSP) .addReg(X86::RAX); -- cgit v1.1 From dff0009d0ced62b92cb5900bc2203ec40142ba15 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Mon, 8 Jul 2013 09:35:23 +0000 Subject: [SystemZ] Use MVC for memcpy Use MVC for memcpy in cases where a single MVC is enough. Using MVC is a win for longer copies too, but I'll leave that for later. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185802 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/CMakeLists.txt | 1 + lib/Target/SystemZ/SystemZISelLowering.cpp | 29 ++++++++++++++++ lib/Target/SystemZ/SystemZISelLowering.h | 9 +++++ lib/Target/SystemZ/SystemZInstrInfo.td | 6 ++++ lib/Target/SystemZ/SystemZOperands.td | 5 +++ lib/Target/SystemZ/SystemZOperators.td | 7 ++++ lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp | 46 ++++++++++++++++++++++++++ lib/Target/SystemZ/SystemZSelectionDAGInfo.h | 40 ++++++++++++++++++++++ lib/Target/SystemZ/SystemZTargetMachine.h | 4 +-- 9 files changed, 145 insertions(+), 2 deletions(-) create mode 100644 lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp create mode 100644 lib/Target/SystemZ/SystemZSelectionDAGInfo.h (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt index edb679d..04bbec5 100644 --- a/lib/Target/SystemZ/CMakeLists.txt +++ b/lib/Target/SystemZ/CMakeLists.txt @@ -22,6 +22,7 @@ add_llvm_target(SystemZCodeGen SystemZLongBranch.cpp SystemZMCInstLower.cpp SystemZRegisterInfo.cpp + SystemZSelectionDAGInfo.cpp SystemZSubtarget.cpp SystemZTargetMachine.cpp ) diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 256c278..b49e6a0 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -241,6 +241,12 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::VACOPY, MVT::Other, Custom); setOperationAction(ISD::VAEND, MVT::Other, Expand); + + // We want to use MVC in preference to even a single load/store pair. + MaxStoresPerMemcpy = 0; + MaxStoresPerMemcpyOptSize = 0; + MaxStoresPerMemmove = 0; + MaxStoresPerMemmoveOptSize = 0; } bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { @@ -1579,6 +1585,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(SDIVREM64); OPCODE(UDIVREM32); OPCODE(UDIVREM64); + OPCODE(MVC); OPCODE(ATOMIC_SWAPW); OPCODE(ATOMIC_LOADW_ADD); OPCODE(ATOMIC_LOADW_SUB); @@ -2143,6 +2150,26 @@ SystemZTargetLowering::emitExt128(MachineInstr *MI, return MBB; } +MachineBasicBlock * +SystemZTargetLowering::emitMVCWrapper(MachineInstr *MI, + MachineBasicBlock *MBB) const { + const SystemZInstrInfo *TII = TM.getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + + MachineOperand DestBase = MI->getOperand(0); + uint64_t DestDisp = MI->getOperand(1).getImm(); + MachineOperand SrcBase = MI->getOperand(2); + uint64_t SrcDisp = MI->getOperand(3).getImm(); + uint64_t Length = MI->getOperand(4).getImm(); + + BuildMI(*MBB, MI, DL, TII->get(SystemZ::MVC)) + .addOperand(DestBase).addImm(DestDisp).addImm(Length) + .addOperand(SrcBase).addImm(SrcDisp); + + MI->eraseFromParent(); + return MBB; +} + MachineBasicBlock *SystemZTargetLowering:: EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { switch (MI->getOpcode()) { @@ -2376,6 +2403,8 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { MI->getOperand(1).getMBB())) MI->eraseFromParent(); return MBB; + case SystemZ::MVCWrapper: + return emitMVCWrapper(MI, MBB); default: llvm_unreachable("Unexpected instr type to insert"); } diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 21b4d72..4ddfcbb 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -73,6 +73,13 @@ namespace SystemZISD { UDIVREM32, UDIVREM64, + // Use MVC to copy bytes from one memory location to another. + // The first operand is the target address, the second operand is the + // source address, and the third operand is the constant length. + // This isn't a memory opcode because we'd need to attach two + // MachineMemOperands rather than one. + MVC, + // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or // ATOMIC_LOAD_. // @@ -221,6 +228,8 @@ private: unsigned BitSize) const; MachineBasicBlock *emitAtomicCmpSwapW(MachineInstr *MI, MachineBasicBlock *BB) const; + MachineBasicBlock *emitMVCWrapper(MachineInstr *MI, + MachineBasicBlock *BB) const; }; } // end namespace llvm diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 6b74220..b4e5c25 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -288,6 +288,12 @@ let mayLoad = 1, mayStore = 1 in bdaddr12only:$BD2), "mvc\t$BDL1, $BD2", []>; +let mayLoad = 1, mayStore = 1, usesCustomInserter = 1 in + def MVCWrapper : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src, + imm32len8:$length), + [(z_mvc bdaddr12only:$dest, bdaddr12only:$src, + imm32len8:$length)]>; + //===----------------------------------------------------------------------===// // Sign extensions //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td index 620876e..9d79439 100644 --- a/lib/Target/SystemZ/SystemZOperands.td +++ b/lib/Target/SystemZ/SystemZOperands.td @@ -219,6 +219,11 @@ def uimm8 : Immediate; // i32 immediates //===----------------------------------------------------------------------===// +// Immediates for 8-bit lengths. +def imm32len8 : Immediate(N->getZExtValue() - 1); +}], NOOP_SDNodeXForm, "U32Imm">; + // Immediates for the lower and upper 16 bits of an i32, with the other // bits of the i32 being zero. def imm32ll16 : Immediate, SDTCisVT<5, i32>, SDTCisVT<6, i32>]>; +def SDT_ZCopy : SDTypeProfile<0, 3, + [SDTCisPtrTy<0>, + SDTCisPtrTy<1>, + SDTCisVT<2, i32>]>; //===----------------------------------------------------------------------===// // Node definitions @@ -103,6 +107,9 @@ def z_atomic_loadw_umin : AtomicWOp<"ATOMIC_LOADW_UMIN">; def z_atomic_loadw_umax : AtomicWOp<"ATOMIC_LOADW_UMAX">; def z_atomic_cmp_swapw : AtomicWOp<"ATOMIC_CMP_SWAPW", SDT_ZAtomicCmpSwapW>; +def z_mvc : SDNode<"SystemZISD::MVC", SDT_ZCopy, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; + //===----------------------------------------------------------------------===// // Pattern fragments //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp new file mode 100644 index 0000000..d2da9d2 --- /dev/null +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -0,0 +1,46 @@ +//===-- SystemZSelectionDAGInfo.cpp - SystemZ SelectionDAG Info -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SystemZSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "systemz-selectiondag-info" +#include "SystemZTargetMachine.h" +#include "llvm/CodeGen/SelectionDAG.h" + +using namespace llvm; + +SystemZSelectionDAGInfo:: +SystemZSelectionDAGInfo(const SystemZTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { +} + +SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() { +} + +SDValue SystemZSelectionDAGInfo:: +EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Dst, SDValue Src, SDValue Size, unsigned Align, + bool IsVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { + if (IsVolatile) + return SDValue(); + + if (ConstantSDNode *CSize = dyn_cast(Size)) { + uint64_t Bytes = CSize->getZExtValue(); + if (Bytes >= 1 && Bytes <= 0x100) { + // A single MVC. + return DAG.getNode(SystemZISD::MVC, DL, MVT::Other, + Chain, Dst, Src, Size); + } + } + return SDValue(); +} diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h new file mode 100644 index 0000000..39c1491 --- /dev/null +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -0,0 +1,40 @@ +//===-- SystemZSelectionDAGInfo.h - SystemZ SelectionDAG Info ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the SystemZ subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef SYSTEMZSELECTIONDAGINFO_H +#define SYSTEMZSELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class SystemZTargetMachine; + +class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo { +public: + explicit SystemZSelectionDAGInfo(const SystemZTargetMachine &TM); + ~SystemZSelectionDAGInfo(); + + virtual + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool IsVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const + LLVM_OVERRIDE; +}; + +} + +#endif diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h index 98614e7..a99a98e 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.h +++ b/lib/Target/SystemZ/SystemZTargetMachine.h @@ -20,10 +20,10 @@ #include "SystemZInstrInfo.h" #include "SystemZRegisterInfo.h" #include "SystemZSubtarget.h" +#include "SystemZSelectionDAGInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetSelectionDAGInfo.h" namespace llvm { @@ -32,7 +32,7 @@ class SystemZTargetMachine : public LLVMTargetMachine { const DataLayout DL; SystemZInstrInfo InstrInfo; SystemZTargetLowering TLInfo; - TargetSelectionDAGInfo TSInfo; + SystemZSelectionDAGInfo TSInfo; SystemZFrameLowering FrameLowering; public: -- cgit v1.1 From ce1c30ce39f6b1e3008376578d3cf593a4e90d87 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Mon, 8 Jul 2013 09:55:36 +0000 Subject: [SystemZ] Remove unwanted part from last commit I was originally going to use MVC for memmove too, but that's less of a clear win. Remove some accidental left-overs in the previous commit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185804 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelLowering.cpp | 2 -- 1 file changed, 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index b49e6a0..bf35946 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -245,8 +245,6 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) // We want to use MVC in preference to even a single load/store pair. MaxStoresPerMemcpy = 0; MaxStoresPerMemcpyOptSize = 0; - MaxStoresPerMemmove = 0; - MaxStoresPerMemmoveOptSize = 0; } bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { -- cgit v1.1 From 63e7778b53a1c8c190d5c67a9e8871c2acb00b26 Mon Sep 17 00:00:00 2001 From: Nico Rieck Date: Mon, 8 Jul 2013 11:19:44 +0000 Subject: Revert "Proper va_arg/va_copy lowering on win64" This reverts commit 2b52880592a525cfe04d8f9008a35da8c2ea94c3. Needs review. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185806 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a6e894b..174cc46 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -582,12 +582,10 @@ void X86TargetLowering::resetOperationActions() { // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); setOperationAction(ISD::VAEND , MVT::Other, Expand); - if (Subtarget->is64Bit() && !Subtarget->isTargetWin64()) { - // TargetInfo::X86_64ABIBuiltinVaList + if (Subtarget->is64Bit()) { setOperationAction(ISD::VAARG , MVT::Other, Custom); setOperationAction(ISD::VACOPY , MVT::Other, Custom); } else { - // TargetInfo::CharPtrBuiltinVaList setOperationAction(ISD::VAARG , MVT::Other, Expand); setOperationAction(ISD::VACOPY , MVT::Other, Expand); } -- cgit v1.1 From 4010110ccf21be0517034b6ccf9493628afaad77 Mon Sep 17 00:00:00 2001 From: Nico Rieck Date: Mon, 8 Jul 2013 11:20:11 +0000 Subject: Reuse %rax after calling __chkstk on win64 Reapply this as I reverted the wrong commit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185807 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 13 ++++++++----- lib/Target/X86/X86ISelLowering.cpp | 3 +-- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 9d66bfd..5db431b 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -914,11 +914,14 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit) .setMIFlag(MachineInstr::FrameSetup); - // MSVC x64's __chkstk needs to adjust %rsp. - // FIXME: %rax preserves the offset and should be available. - if (isSPUpdateNeeded) - emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64, - UseLEA, TII, *RegInfo); + // MSVC x64's __chkstk does not adjust %rsp itself. + // It also does not clobber %rax so we can reuse it when adjusting %rsp. + if (isSPUpdateNeeded) { + BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), StackPtr) + .addReg(StackPtr) + .addReg(X86::RAX) + .setMIFlag(MachineInstr::FrameSetup); + } if (isEAXAlive) { // Restore EAX diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 174cc46..a680ac0 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -14432,12 +14432,11 @@ X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI, } else { // __chkstk(MSVCRT): does not update stack pointer. // Clobbers R10, R11 and EFLAGS. - // FIXME: RAX(allocated size) might be reused and not killed. BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA)) .addExternalSymbol("__chkstk") .addReg(X86::RAX, RegState::Implicit) .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); - // RAX has the offset to subtracted from RSP. + // RAX has the offset to be subtracted from RSP. BuildMI(*BB, MI, DL, TII->get(X86::SUB64rr), X86::RSP) .addReg(X86::RSP) .addReg(X86::RAX); -- cgit v1.1 From 12ae7fd2da24e53c795c0cc17d06c91a0f09fb3d Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Mon, 8 Jul 2013 14:22:45 +0000 Subject: [PowerPC] Fix PR16556 (handle undef ppcf128 in LowerFP_TO_INT). PPCTargetLowering::LowerFP_TO_INT() expects its source operand to be either an f32 or f64, but this is not checked. A long double (ppcf128) operand will normally be custom-lowered to a conversion to f64 in this context. However, this isn't the case for an UNDEF node. This patch recognizes a ppcf128 as a legal source operand for FP_TO_INT only if it's an undef, in which case it creates an undef of the target type. At some point we might want to do a wholesale custom lowering of ISD::UNDEF when the type is ppcf128, but it's not really clear that's a great idea, and probably more work than it's worth for a situation that only arises in the case of a programming error. At this point I think simple is best. The test case comes from PR16556, and is a crash-test only. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185821 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 0f79031..9c2856f 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -4685,6 +4685,15 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, SDLoc dl) const { assert(Op.getOperand(0).getValueType().isFloatingPoint()); SDValue Src = Op.getOperand(0); + + // If we have a long double here, it must be that we have an undef of + // that type. In this case return an undef of the target type. + if (Src.getValueType() == MVT::ppcf128) { + assert(Src.getOpcode() == ISD::UNDEF && "Unhandled ppcf128!"); + return DAG.getNode(ISD::UNDEF, dl, + Op.getValueType().getSimpleVT().SimpleTy); + } + if (Src.getValueType() == MVT::f32) Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); -- cgit v1.1 From 9e5bbeab1f6f79375c24bfab87c28f5f4c5afea1 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Mon, 8 Jul 2013 14:49:37 +0000 Subject: [PowerPC] Support basic compare mnemonics This adds support for the basic mnemoics (with the L operand) for the fixed-point compare instructions. These are defined as aliases for the already existing CMPW/CMPD patterns, depending on the value of L. This requires use of InstAlias patterns with immediate literal operands. To make this work, we need two further changes: - define a RegisterPrefix, because otherwise literals 0 and 1 would be parsed as literal register names - provide a PPCAsmParser::validateTargetOperandClass routine to recognize immediate literals (like ARM does) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185826 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 24 ++++++++++++++++++++++++ lib/Target/PowerPC/PPC.td | 10 ++++++++++ lib/Target/PowerPC/PPCInstrInfo.td | 9 +++++++++ 3 files changed, 43 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 237ecdc..790a98e 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -229,6 +229,8 @@ public: SmallVectorImpl &Operands); virtual bool ParseDirective(AsmToken DirectiveID); + + unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, unsigned Kind); }; /// PPCOperand - Instances of this class represent a parsed PowerPC machine @@ -1232,3 +1234,25 @@ extern "C" void LLVMInitializePowerPCAsmParser() { #define GET_REGISTER_MATCHER #define GET_MATCHER_IMPLEMENTATION #include "PPCGenAsmMatcher.inc" + +// Define this matcher function after the auto-generated include so we +// have the match class enum definitions. +unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp, + unsigned Kind) { + // If the kind is a token for a literal immediate, check if our asm + // operand matches. This is for InstAliases which have a fixed-value + // immediate in the syntax. + int64_t ImmVal; + switch (Kind) { + case MCK_0: ImmVal = 0; break; + case MCK_1: ImmVal = 1; break; + default: return Match_InvalidOperand; + } + + PPCOperand *Op = static_cast(AsmOp); + if (Op->isImm() && Op->getImm() == ImmVal) + return Match_Success; + + return Match_InvalidOperand; +} + diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index eb73c67..d7e2cad 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -272,10 +272,20 @@ def PPCAsmParser : AsmParser { let ShouldEmitMatchRegisterName = 0; } +def PPCAsmParserVariant : AsmParserVariant { + int Variant = 0; + + // We do not use hard coded registers in asm strings. However, some + // InstAlias definitions use immediate literals. Set RegisterPrefix + // so that those are not misinterpreted as registers. + string RegisterPrefix = "%"; +} + def PPC : Target { // Information about the instructions. let InstructionSet = PPCInstrInfo; let AssemblyWriters = [PPCAsmWriter]; let AssemblyParsers = [PPCAsmParser]; + let AssemblyParserVariants = [PPCAsmParserVariant]; } diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index fbf61f0..3aafb5c 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -2578,6 +2578,15 @@ def : InstAlias<"cmpd $rA, $rB", (CMPD CR0, g8rc:$rA, g8rc:$rB)>; def : InstAlias<"cmpldi $rA, $imm", (CMPLDI CR0, g8rc:$rA, u16imm:$imm)>; def : InstAlias<"cmpld $rA, $rB", (CMPLD CR0, g8rc:$rA, g8rc:$rB)>; +def : InstAlias<"cmpi $bf, 0, $rA, $imm", (CMPWI crrc:$bf, gprc:$rA, s16imm:$imm)>; +def : InstAlias<"cmp $bf, 0, $rA, $rB", (CMPW crrc:$bf, gprc:$rA, gprc:$rB)>; +def : InstAlias<"cmpli $bf, 0, $rA, $imm", (CMPLWI crrc:$bf, gprc:$rA, u16imm:$imm)>; +def : InstAlias<"cmpl $bf, 0, $rA, $rB", (CMPLW crrc:$bf, gprc:$rA, gprc:$rB)>; +def : InstAlias<"cmpi $bf, 1, $rA, $imm", (CMPDI crrc:$bf, g8rc:$rA, s16imm:$imm)>; +def : InstAlias<"cmp $bf, 1, $rA, $rB", (CMPD crrc:$bf, g8rc:$rA, g8rc:$rB)>; +def : InstAlias<"cmpli $bf, 1, $rA, $imm", (CMPLDI crrc:$bf, g8rc:$rA, u16imm:$imm)>; +def : InstAlias<"cmpl $bf, 1, $rA, $rB", (CMPLD crrc:$bf, g8rc:$rA, g8rc:$rB)>; + multiclass TrapExtendedMnemonic { def : InstAlias<"td"#name#"i $rA, $imm", (TDI to, g8rc:$rA, s16imm:$imm)>; def : InstAlias<"td"#name#" $rA, $rB", (TD to, g8rc:$rA, g8rc:$rB)>; -- cgit v1.1 From 19d2b78978905cfde0a0d7190c8480219fb2d1c6 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Mon, 8 Jul 2013 15:20:38 +0000 Subject: [PowerPC] Support time base instructions This adds support for the old-style time base instructions; while new programs are supposed to use mfspr, the mftb instructions are still supported and in use by existing assembler files. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185829 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCInstrInfo.td | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 3aafb5c..d4969f6 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -1853,6 +1853,9 @@ def MFSPR : XFXForm_1<31, 339, (outs gprc:$RT), (ins i32imm:$SPR), def MTSPR : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, gprc:$RT), "mtspr $SPR, $RT", SprMTSPR>; +def MFTB : XFXForm_1<31, 371, (outs gprc:$RT), (ins i32imm:$SPR), + "mftb $RT, $SPR", SprMFTB>; + let Uses = [CTR] in { def MFCTR : XFXForm_1_ext<31, 339, 9, (outs gprc:$rT), (ins), "mfctr $rT", SprMFSPR>, @@ -2335,6 +2338,9 @@ def : InstAlias<"crnot $bx, $by", (CRNOR crbitrc:$bx, crbitrc:$by, crbitrc:$by)> def : InstAlias<"mtxer $Rx", (MTSPR 1, gprc:$Rx)>; def : InstAlias<"mfxer $Rx", (MFSPR gprc:$Rx, 1)>; +def : InstAlias<"mftb $Rx", (MFTB gprc:$Rx, 268)>; +def : InstAlias<"mftbu $Rx", (MFTB gprc:$Rx, 269)>; + def : InstAlias<"xnop", (XORI R0, R0, 0)>; def : InstAlias<"mr $rA, $rB", (OR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>; -- cgit v1.1 From ad3b34d1bc4eaa92a95c56fe32fd18a6f36f62f4 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Mon, 8 Jul 2013 17:30:25 +0000 Subject: PPC: Mark vector FREM as Expand by default Another bug found by llvm-stress! This fixes crashing with: LLVM ERROR: Cannot select: v4f32 = frem ... git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185840 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 9c2856f..685b082 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -392,6 +392,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::UDIV, VT, Expand); setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::FDIV, VT, Expand); + setOperationAction(ISD::FREM, VT, Expand); setOperationAction(ISD::FNEG, VT, Expand); setOperationAction(ISD::FSQRT, VT, Expand); setOperationAction(ISD::FLOG, VT, Expand); -- cgit v1.1 From dc2d418dd29ad9396aea06f2b72c9a7d29b30940 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Mon, 8 Jul 2013 18:18:52 +0000 Subject: ARM: Improve codegen for generic vselect. Fall back to by-element insert rather than building it up on the stack. rdar://14351991 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185846 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 991a703..8c4a3f1 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -4734,6 +4734,24 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, return DAG.getNode(ISD::BITCAST, dl, VT, Val); } + // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we + // know the default expansion would otherwise fall back on something even + // worse. For a vector with one or two non-undef values, that's + // scalar_to_vector for the elements followed by a shuffle (provided the + // shuffle is valid for the target) and materialization element by element + // on the stack followed by a load for everything else. + if (!isConstant && !usesOnlyOneValue) { + SDValue Vec = DAG.getUNDEF(VT); + for (unsigned i = 0 ; i < NumElts; ++i) { + SDValue V = Op.getOperand(i); + if (V.getOpcode() == ISD::UNDEF) + continue; + SDValue LaneIdx = DAG.getConstant(i, MVT::i32); + Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx); + } + return Vec; + } + return SDValue(); } -- cgit v1.1 From 881b0b5c77ec3f6849e32b7763b6c75057b81501 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Mon, 8 Jul 2013 19:52:51 +0000 Subject: Add a comment to this change, requested by Eric Christopher. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185853 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelDAGToDAG.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index da0fe2c..4eda5dc 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -3491,6 +3491,10 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ else continue; + // Immediate operands to inline asm in the SelectionDAG are modeled with + // two operands. The first is a constant of value InlineAsm::Kind_Imm, and + // the second is a constant with the value of the immediate. If we get here + // and we have a Kind_Imm, skip the next operand, and continue. if (Kind == InlineAsm::Kind_Imm) { SDValue op = N->getOperand(++i); AsmNodeOperands.push_back(op); -- cgit v1.1 From 947d447ee0ac927cc308e5e53062e0edb71e7d8e Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Mon, 8 Jul 2013 20:00:03 +0000 Subject: PPC: Mark vector CC action for SETO and SETONE as Expand Another bug found by llvm-stress! This fixes hitting llvm_unreachable("Invalid integer vector compare condition"); at the end of getVCmpInst in PPCISelDAGToDAG. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185855 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 685b082..1759c04 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -487,6 +487,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand); setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand); setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand); + + setCondCodeAction(ISD::SETO, MVT::v4f32, Expand); + setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand); } if (Subtarget->has64BitSupport()) { -- cgit v1.1 From a68f58ab2bec6a024afae498e4082ddd8b01f178 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Mon, 8 Jul 2013 20:20:51 +0000 Subject: [PowerPC] Always use "assembler dialect" 1 A setting in MCAsmInfo defines the "assembler dialect" to use. This is used by common code to choose between alternatives in a multi-alternative GNU inline asm statement like the following: __asm__ ("{sfe|subfe} %0,%1,%2" : "=r" (out) : "r" (in1), "r" (in2)); The meaning of these dialects is platform specific, and GCC defines those for PowerPC to use dialect 0 for old-style (POWER) mnemonics and 1 for new-style (PowerPC) mnemonics, like in the example above. To be compatible with inline asm used with GCC, LLVM ought to do the same. Specifically, this means we should always use assembler dialect 1 since old-style mnemonics really aren't supported on any current platform. However, the current LLVM back-end uses: AssemblerDialect = 1; // New-Style mnemonics. in PPCMCAsmInfoDarwin, and AssemblerDialect = 0; // Old-Style mnemonics. in PPCLinuxMCAsmInfo. The Linux setting really isn't correct, we should be using new-style mnemonics everywhere. This is changed by this commit. Unfortunately, the setting of this variable is overloaded in the back-end to decide whether or not we are on a Darwin target. This is done in PPCInstPrinter (the "SyntaxVariant" is initialized from the MCAsmInfo AssemblerDialect setting), and also in PPCMCExpr. Setting AssemblerDialect to 1 for both Darwin and Linux no longer allows us to make this distinction. Instead, this patch uses the MCSubtargetInfo passed to createPPCMCInstPrinter to distinguish Darwin targets, and ignores the SyntaxVariant parameter. As to PPCMCExpr, this patch adds an explicit isDarwin argument that needs to be passed in by the caller when creating a target MCExpr. (To do so this patch implicitly also reverts commit 184441.) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185858 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 2 +- lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h | 9 ++++---- lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp | 2 +- lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp | 5 ++--- lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h | 25 ++++++++++++---------- .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp | 3 ++- lib/Target/PowerPC/PPC.h | 2 +- lib/Target/PowerPC/PPCAsmPrinter.cpp | 23 +++++++++++--------- lib/Target/PowerPC/PPCMCInstLower.cpp | 18 ++++++++-------- 9 files changed, 47 insertions(+), 42 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 790a98e..af91ffb 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -1015,7 +1015,7 @@ ParseExpression(const MCExpr *&EVal) { PPCMCExpr::VariantKind Variant; const MCExpr *E = ExtractModifierFromExpr(EVal, Variant); if (E) - EVal = PPCMCExpr::Create(Variant, E, getParser().getContext()); + EVal = PPCMCExpr::Create(Variant, E, false, getParser().getContext()); return false; } diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index 270c241..8a4c03d 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -21,15 +21,14 @@ namespace llvm { class MCOperand; class PPCInstPrinter : public MCInstPrinter { - // 0 -> AIX, 1 -> Darwin. - unsigned SyntaxVariant; + bool IsDarwin; public: PPCInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, unsigned syntaxVariant) - : MCInstPrinter(MAI, MII, MRI), SyntaxVariant(syntaxVariant) {} + const MCRegisterInfo &MRI, bool isDarwin) + : MCInstPrinter(MAI, MII, MRI), IsDarwin(isDarwin) {} bool isDarwinSyntax() const { - return SyntaxVariant == 1; + return IsDarwin; } virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index bb7ce6f..6822507 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -66,6 +66,6 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { ZeroDirective = "\t.space\t"; Data64bitsDirective = is64Bit ? "\t.quad\t" : 0; - AssemblerDialect = 0; // Old-Style mnemonics. + AssemblerDialect = 1; // New-Style mnemonics. } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp index db0f57d..9529267 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp @@ -17,9 +17,8 @@ using namespace llvm; const PPCMCExpr* PPCMCExpr::Create(VariantKind Kind, const MCExpr *Expr, - MCContext &Ctx) { - int AssemblerDialect = Ctx.getAsmInfo()->getAssemblerDialect(); - return new (Ctx) PPCMCExpr(Kind, Expr, AssemblerDialect); + bool isDarwin, MCContext &Ctx) { + return new (Ctx) PPCMCExpr(Kind, Expr, isDarwin); } void PPCMCExpr::PrintImpl(raw_ostream &OS) const { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h index 3cbb493..e44c7c1 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h @@ -32,29 +32,32 @@ public: private: const VariantKind Kind; const MCExpr *Expr; - const int AssemblerDialect; + bool IsDarwin; explicit PPCMCExpr(VariantKind _Kind, const MCExpr *_Expr, - int _AssemblerDialect) - : Kind(_Kind), Expr(_Expr), AssemblerDialect(_AssemblerDialect) {} + bool _IsDarwin) + : Kind(_Kind), Expr(_Expr), IsDarwin(_IsDarwin) {} public: /// @name Construction /// @{ static const PPCMCExpr *Create(VariantKind Kind, const MCExpr *Expr, - MCContext &Ctx); + bool isDarwin, MCContext &Ctx); - static const PPCMCExpr *CreateLo(const MCExpr *Expr, MCContext &Ctx) { - return Create(VK_PPC_LO, Expr, Ctx); + static const PPCMCExpr *CreateLo(const MCExpr *Expr, + bool isDarwin, MCContext &Ctx) { + return Create(VK_PPC_LO, Expr, isDarwin, Ctx); } - static const PPCMCExpr *CreateHi(const MCExpr *Expr, MCContext &Ctx) { - return Create(VK_PPC_HI, Expr, Ctx); + static const PPCMCExpr *CreateHi(const MCExpr *Expr, + bool isDarwin, MCContext &Ctx) { + return Create(VK_PPC_HI, Expr, isDarwin, Ctx); } - static const PPCMCExpr *CreateHa(const MCExpr *Expr, MCContext &Ctx) { - return Create(VK_PPC_HA, Expr, Ctx); + static const PPCMCExpr *CreateHa(const MCExpr *Expr, + bool isDarwin, MCContext &Ctx) { + return Create(VK_PPC_HA, Expr, isDarwin, Ctx); } /// @} @@ -68,7 +71,7 @@ public: const MCExpr *getSubExpr() const { return Expr; } /// isDarwinSyntax - True if expression is to be printed using Darwin syntax. - bool isDarwinSyntax() const { return AssemblerDialect == 1; } + bool isDarwinSyntax() const { return IsDarwin; } /// @} diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 2da30f9..29c49a5 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -117,7 +117,8 @@ static MCInstPrinter *createPPCMCInstPrinter(const Target &T, const MCInstrInfo &MII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { - return new PPCInstPrinter(MAI, MII, MRI, SyntaxVariant); + bool isDarwin = Triple(STI.getTargetTriple()).isOSDarwin(); + return new PPCInstPrinter(MAI, MII, MRI, isDarwin); } extern "C" void LLVMInitializePowerPCTargetMC() { diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index 96b882a..f0d5af2 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -40,7 +40,7 @@ namespace llvm { FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM, JITCodeEmitter &MCE); void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, - AsmPrinter &AP); + AsmPrinter &AP, bool isDarwin); /// \brief Creates an PPC-specific Target Transformation Info pass. ImmutablePass *createPPCTargetTransformInfoPass(const PPCTargetMachine *TM); diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 8f41b2e..8a6c514 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -352,7 +352,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::LDtocCPT: case PPC::LDtoc: { // Transform %X3 = LDtoc , %X2 - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); // Change the opcode to LD, and the global address operand to be a // reference to the TOC entry we will synthesize later. @@ -381,7 +381,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::ADDIStocHA: { // Transform %Xd = ADDIStocHA %X2, - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); // Change the opcode to ADDIS8. If the global address is external, // has common linkage, is a function address, or is a jump table @@ -425,7 +425,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case PPC::LDtocL: { // Transform %Xd = LDtocL , %Xs - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); // Change the opcode to LD. If the global address is external, has // common linkage, or is a jump table address, then reference the @@ -462,7 +462,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case PPC::ADDItocL: { // Transform %Xd = ADDItocL %Xs, - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); // Change the opcode to ADDI8. If the global address is external, then // generate a TOC entry and reference that. Otherwise reference the @@ -514,7 +514,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case PPC::LDgotTprelL: { // Transform %Xd = LDgotTprelL , %Xs - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); // Change the opcode to LD. TmpInst.setOpcode(PPC::LD); @@ -720,7 +720,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } } - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); OutStreamer.EmitInstruction(TmpInst); } @@ -891,6 +891,7 @@ static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) { void PPCDarwinAsmPrinter:: EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64; + bool isDarwin = Subtarget.isDarwin(); const TargetLoweringObjectFileMachO &TLOFMacho = static_cast(getObjFileLowering()); @@ -930,7 +931,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { // mflr r11 OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R11)); // addis r11, r11, ha16(LazyPtr - AnonSymbol) - const MCExpr *SubHa16 = PPCMCExpr::CreateHa(Sub, OutContext); + const MCExpr *SubHa16 = PPCMCExpr::CreateHa(Sub, isDarwin, OutContext); OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS) .addReg(PPC::R11) .addReg(PPC::R11) @@ -940,7 +941,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { // ldu r12, lo16(LazyPtr - AnonSymbol)(r11) // lwzu r12, lo16(LazyPtr - AnonSymbol)(r11) - const MCExpr *SubLo16 = PPCMCExpr::CreateLo(Sub, OutContext); + const MCExpr *SubLo16 = PPCMCExpr::CreateLo(Sub, isDarwin, OutContext); OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU) .addReg(PPC::R12) .addExpr(SubLo16).addExpr(SubLo16) @@ -985,14 +986,16 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); // lis r11, ha16(LazyPtr) - const MCExpr *LazyPtrHa16 = PPCMCExpr::CreateHa(LazyPtrExpr, OutContext); + const MCExpr *LazyPtrHa16 = + PPCMCExpr::CreateHa(LazyPtrExpr, isDarwin, OutContext); OutStreamer.EmitInstruction(MCInstBuilder(PPC::LIS) .addReg(PPC::R11) .addExpr(LazyPtrHa16)); // ldu r12, lo16(LazyPtr)(r11) // lwzu r12, lo16(LazyPtr)(r11) - const MCExpr *LazyPtrLo16 = PPCMCExpr::CreateLo(LazyPtrExpr, OutContext); + const MCExpr *LazyPtrLo16 = + PPCMCExpr::CreateLo(LazyPtrExpr, isDarwin, OutContext); OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU) .addReg(PPC::R12) .addExpr(LazyPtrLo16).addExpr(LazyPtrLo16) diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index b7e88d4..d69aa4a 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -105,7 +105,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){ } static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, - AsmPrinter &Printer) { + AsmPrinter &Printer, bool isDarwin) { MCContext &Ctx = Printer.OutContext; MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; @@ -150,10 +150,10 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, // Add ha16() / lo16() markers if required. switch (access) { case PPCII::MO_LO: - Expr = PPCMCExpr::CreateLo(Expr, Ctx); + Expr = PPCMCExpr::CreateLo(Expr, isDarwin, Ctx); break; case PPCII::MO_HA: - Expr = PPCMCExpr::CreateHa(Expr, Ctx); + Expr = PPCMCExpr::CreateHa(Expr, isDarwin, Ctx); break; } @@ -161,7 +161,7 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, } void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, - AsmPrinter &AP) { + AsmPrinter &AP, bool isDarwin) { OutMI.setOpcode(MI->getOpcode()); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -185,17 +185,17 @@ void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, break; case MachineOperand::MO_GlobalAddress: case MachineOperand::MO_ExternalSymbol: - MCOp = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP); + MCOp = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP, isDarwin); break; case MachineOperand::MO_JumpTableIndex: - MCOp = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP); + MCOp = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP, isDarwin); break; case MachineOperand::MO_ConstantPoolIndex: - MCOp = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP); + MCOp = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP, isDarwin); break; case MachineOperand::MO_BlockAddress: - MCOp = GetSymbolRef(MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()), - AP); + MCOp = GetSymbolRef(MO,AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP, + isDarwin); break; case MachineOperand::MO_RegisterMask: continue; -- cgit v1.1 From cc64dc66e740c0d78ecaca39c33c81b4062edd2e Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Tue, 9 Jul 2013 02:07:25 +0000 Subject: X86 fast-isel: Avoid explicit AH subreg reference for [SU]Rem. Explicit references to %AH for an i8 remainder instruction can lead to references to %AH in a REX prefixed instruction, which causes things to blow up. Do the same thing in FastISel as we do for DAG isel and instead shift %AX right by 8 bits and then extract the 8-bit subreg from that result. rdar://14203849 http://llvm.org/bugs/show_bug.cgi?id=16105 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185899 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 669108f..f8f06f6 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1376,10 +1376,37 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) { // Generate the DIV/IDIV instruction. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpEntry.OpDivRem)).addReg(Op1Reg); - // Copy output register into result register. - unsigned ResultReg = createResultReg(TypeEntry.RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(Copy), ResultReg).addReg(OpEntry.DivRemResultReg); + // For i8 remainder, we can't reference AH directly, as we'll end + // up with bogus copies like %R9B = COPY %AH. Reference AX + // instead to prevent AH references in a REX instruction. + // + // The current assumption of the fast register allocator is that isel + // won't generate explicit references to the GPR8_NOREX registers. If + // the allocator and/or the backend get enhanced to be more robust in + // that regard, this can be, and should be, removed. + unsigned ResultReg = 0; + if ((I->getOpcode() == Instruction::SRem || + I->getOpcode() == Instruction::URem) && + OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) { + unsigned SourceSuperReg = createResultReg(&X86::GR16RegClass); + unsigned ResultSuperReg = createResultReg(&X86::GR16RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Copy), SourceSuperReg).addReg(X86::AX); + + // Shift AX right by 8 bits instead of using AH. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SHR16ri), + ResultSuperReg).addReg(SourceSuperReg).addImm(8); + + // Now reference the 8-bit subreg of the result. + ResultReg = FastEmitInst_extractsubreg(MVT::i8, ResultSuperReg, + /*Kill=*/true, X86::sub_8bit); + } + // Copy the result out of the physreg if we haven't already. + if (!ResultReg) { + ResultReg = createResultReg(TypeEntry.RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Copy), ResultReg) + .addReg(OpEntry.DivRemResultReg); + } UpdateValueMap(I, ResultReg); return true; -- cgit v1.1 From 842b1bdd940e365898581d6ff54794b8fa1a13c9 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Tue, 9 Jul 2013 02:07:28 +0000 Subject: X86: Add comment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185900 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelDAGToDAG.cpp | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index b079281..9465420 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2531,6 +2531,11 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // Prevent use of AH in a REX instruction by referencing AX instead. // Shift it down 8 bits. + // + // The current assumption of the register allocator is that isel + // won't generate explicit references to the GPR8_NOREX registers. If + // the allocator and/or the backend get enhanced to be more robust in + // that regard, this can be, and should be, removed. if (HiReg == X86::AH && Subtarget->is64Bit() && !SDValue(Node, 1).use_empty()) { SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, -- cgit v1.1 From fa55969acb64da32acf6305064c9f6e3c237b74e Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Tue, 9 Jul 2013 06:34:51 +0000 Subject: PPC: Allocate RS spill slot for unaligned i64 load/store This fixes another bug found by llvm-stress! If we happen to be doing an i64 load or store into a stack slot that has less than a 4-byte alignment, then the frame-index elimination may need to use an indexed load or store instruction (because the offset may not be a multiple of 4, a requirement of the STD/LD instructions). The extra register needed to hold the offset comes from the register scavenger, and it is possible that the scavenger will need to use an emergency spill slot. As a result, we need to make sure that a spill slot is allocated when doing an i64 load/store into a less-than-4-byte-aligned stack slot. Because test cases for things like this tend to be fairly fragile, I've concatenated a few small bugpoint-reduced test cases together to form the regression test. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185907 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 35 ++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 1759c04..cf41c02 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1030,6 +1030,35 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, return false; } +// If we happen to be doing an i64 load or store into a stack slot that has +// less than a 4-byte alignment, then the frame-index elimination may need to +// use an indexed load or store instruction (because the offset may not be a +// multiple of 4). The extra register needed to hold the offset comes from the +// register scavenger, and it is possible that the scavenger will need to use +// an emergency spill slot. As a result, we need to make sure that a spill slot +// is allocated when doing an i64 load/store into a less-than-4-byte-aligned +// stack slot. +static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) { + // FIXME: This does not handle the LWA case. + if (VT != MVT::i64) + return; + + // This should not be needed for negative FIs, which come from argument + // lowering, because the ABI should guarentee the necessary alignment. + if (FrameIdx < 0) + return; + + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + unsigned Align = MFI->getObjectAlignment(FrameIdx); + if (Align >= 4) + return; + + PPCFunctionInfo *FuncInfo = MF.getInfo(); + FuncInfo->setHasNonRISpills(); +} + /// Returns true if the address N can be represented by a base register plus /// a signed 16-bit displacement [r+imm], and if it is not better /// represented as reg+reg. If Aligned is true, only accept displacements @@ -1051,6 +1080,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, Disp = DAG.getTargetConstant(imm, N.getValueType()); if (FrameIndexSDNode *FI = dyn_cast(N.getOperand(0))) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); + fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); } else { Base = N.getOperand(0); } @@ -1115,9 +1145,10 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, } Disp = DAG.getTargetConstant(0, getPointerTy()); - if (FrameIndexSDNode *FI = dyn_cast(N)) + if (FrameIndexSDNode *FI = dyn_cast(N)) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); - else + fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); + } else Base = N; return true; // [r+0] } -- cgit v1.1 From ff16df71f50231c79c379a146dc55b4d6867cbd9 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Tue, 9 Jul 2013 07:59:25 +0000 Subject: [PowerPC] Support .llong and fix .word This adds support for the .llong PowerPC-specifc assembler directive. In doing so, I notices that .word is currently incorrect: it is supposed to define a 2-byte data element, not a 4-byte one. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185911 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index af91ffb..32cf373d 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -1177,7 +1177,9 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getIdentifier(); if (IDVal == ".word") - return ParseDirectiveWord(4, DirectiveID.getLoc()); + return ParseDirectiveWord(2, DirectiveID.getLoc()); + if (IDVal == ".llong") + return ParseDirectiveWord(8, DirectiveID.getLoc()); if (IDVal == ".tc") return ParseDirectiveTC(isPPC64()? 8 : 4, DirectiveID.getLoc()); return true; -- cgit v1.1 From f6ea5e0d8007234fc74c1ff6ac2c3ca316c41d92 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 9 Jul 2013 09:32:42 +0000 Subject: [SystemZ] Use "STC;MVC" for memset Use "STC;MVC" for memsets that are too big for two STCs or MV...Is yet small enough for a single MVC. As with memcpy, I'm leaving longer cases till later. The number of tests might seem excessive, but f33 & f34 from memset-04.ll failed the first cut because I'd not added the "?:" on the calculation of Size1. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185918 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelLowering.cpp | 8 +++ lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp | 81 ++++++++++++++++++++++++++ lib/Target/SystemZ/SystemZSelectionDAGInfo.h | 6 ++ 3 files changed, 95 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index bf35946..b1abc2c 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -245,6 +245,14 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) // We want to use MVC in preference to even a single load/store pair. MaxStoresPerMemcpy = 0; MaxStoresPerMemcpyOptSize = 0; + + // The main memset sequence is a byte store followed by an MVC. + // Two STC or MV..I stores win over that, but the kind of fused stores + // generated by target-independent code don't when the byte value is + // variable. E.g. "STC ;MHI ,257;STH " is not better + // than "STC;MVC". Handle the choice in target-specific code instead. + MaxStoresPerMemset = 0; + MaxStoresPerMemsetOptSize = 0; } bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index d2da9d2..4ca9292 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -44,3 +44,84 @@ EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, } return SDValue(); } + +// Handle a memset of 1, 2, 4 or 8 bytes with the operands given by +// Chain, Dst, ByteVal and Size. These cases are expected to use +// MVI, MVHHI, MVHI and MVGHI respectively. +static SDValue memsetStore(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Dst, uint64_t ByteVal, uint64_t Size, + unsigned Align, + MachinePointerInfo DstPtrInfo) { + uint64_t StoreVal = ByteVal; + for (unsigned I = 1; I < Size; ++I) + StoreVal |= ByteVal << (I * 8); + return DAG.getStore(Chain, DL, + DAG.getConstant(StoreVal, MVT::getIntegerVT(Size * 8)), + Dst, DstPtrInfo, false, false, Align); +} + +SDValue SystemZSelectionDAGInfo:: +EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Dst, SDValue Byte, SDValue Size, + unsigned Align, bool IsVolatile, + MachinePointerInfo DstPtrInfo) const { + EVT DstVT = Dst.getValueType(); + + if (IsVolatile) + return SDValue(); + + if (ConstantSDNode *CSize = dyn_cast(Size)) { + uint64_t Bytes = CSize->getZExtValue(); + if (Bytes == 0) + return SDValue(); + if (ConstantSDNode *CByte = dyn_cast(Byte)) { + // Handle cases that can be done using at most two of + // MVI, MVHI, MVHHI and MVGHI. The latter two can only be + // used if ByteVal is all zeros or all ones; in other casees, + // we can move at most 2 halfwords. + uint64_t ByteVal = CByte->getZExtValue(); + if (ByteVal == 0 || ByteVal == 255 ? + Bytes <= 16 && CountPopulation_64(Bytes) <= 2 : + Bytes <= 4) { + unsigned Size1 = Bytes == 16 ? 8 : 1 << findLastSet(Bytes); + unsigned Size2 = Bytes - Size1; + SDValue Chain1 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size1, + Align, DstPtrInfo); + if (Size2 == 0) + return Chain1; + Dst = DAG.getNode(ISD::ADD, DL, DstVT, Dst, + DAG.getConstant(Size1, DstVT)); + DstPtrInfo = DstPtrInfo.getWithOffset(Size1); + SDValue Chain2 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size2, + std::min(Align, Size1), DstPtrInfo); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2); + } + } else { + // Handle one and two bytes using STC. + if (Bytes <= 2) { + SDValue Chain1 = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, + false, false, Align); + if (Bytes == 1) + return Chain1; + SDValue Dst2 = DAG.getNode(ISD::ADD, DL, DstVT, Dst, + DAG.getConstant(1, DstVT)); + SDValue Chain2 = DAG.getStore(Chain, DL, Byte, Dst2, + DstPtrInfo.getWithOffset(1), + false, false, 1); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2); + } + } + assert(Bytes >= 2 && "Should have dealt with 0- and 1-byte cases already"); + if (Bytes <= 0x101) { + // Copy the byte to the first location and then use MVC to copy + // it to the rest. + Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, + false, false, Align); + SDValue Dst2 = DAG.getNode(ISD::ADD, DL, DstVT, Dst, + DAG.getConstant(1, DstVT)); + return DAG.getNode(SystemZISD::MVC, DL, MVT::Other, Chain, Dst2, Dst, + DAG.getConstant(Bytes - 1, MVT::i32)); + } + } + return SDValue(); +} diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h index 39c1491..9138a9c 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -33,6 +33,12 @@ public: MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const LLVM_OVERRIDE; + + virtual SDValue + EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, + SDValue Chain, SDValue Dst, SDValue Byte, + SDValue Size, unsigned Align, bool IsVolatile, + MachinePointerInfo DstPtrInfo) const; }; } -- cgit v1.1 From 2e015ef9bb40e5d9f98db9a9509b9986873089ea Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 9 Jul 2013 09:46:39 +0000 Subject: [SystemZ] Use MVC for simple load/store pairs Look for patterns of the form (store (load ...), ...) in which the two locations are known not to partially overlap. (Identical locations are OK.) These sequences are better implemented by MVC unless either the load or the store could use RELATIVE LONG instructions. The testcase showed that we weren't using LHRL and LGHRL for extload16, only sextloadi16. The patch fixes that too. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185919 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 46 ++++++++++++++++++++++++++++++ lib/Target/SystemZ/SystemZInstrFP.td | 4 +++ lib/Target/SystemZ/SystemZInstrInfo.td | 22 ++++++++++++++ lib/Target/SystemZ/SystemZPatterns.td | 14 +++++++++ 4 files changed, 86 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index f10ba23..0891adc 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "SystemZTargetMachine.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -209,6 +210,8 @@ class SystemZDAGToDAGISel : public SelectionDAGISel { SDNode *splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0, uint64_t UpperVal, uint64_t LowerVal); + bool storeLoadCanUseMVC(SDNode *N) const; + public: SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel) : SelectionDAGISel(TM, OptLevel), @@ -533,6 +536,49 @@ SDNode *SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node, return Or.getNode(); } +// N is a (store (load ...), ...) pattern. Return true if it can use MVC. +bool SystemZDAGToDAGISel::storeLoadCanUseMVC(SDNode *N) const { + StoreSDNode *Store = cast(N); + LoadSDNode *Load = cast(Store->getValue().getNode()); + + // MVC is logically a bytewise copy, so can't be used for volatile accesses. + if (Load->isVolatile() || Store->isVolatile()) + return false; + + // Prefer not to use MVC if either address can use ... RELATIVE LONG + // instructions. + assert(Load->getMemoryVT() == Store->getMemoryVT() && + "Should already have checked that the types match"); + uint64_t Size = Load->getMemoryVT().getStoreSize(); + if (Size > 1 && Size <= 8) { + // Prefer LHRL, LRL and LGRL. + if (Load->getBasePtr().getOpcode() == SystemZISD::PCREL_WRAPPER) + return false; + // Prefer STHRL, STRL and STGRL. + if (Store->getBasePtr().getOpcode() == SystemZISD::PCREL_WRAPPER) + return false; + } + + // There's no chance of overlap if the load is invariant. + if (Load->isInvariant()) + return true; + + // If both operands are aligned, they must be equal or not overlap. + if (Load->getAlignment() >= Size && Store->getAlignment() >= Size) + return true; + + // Otherwise we need to check whether there's an alias. + const Value *V1 = Load->getSrcValue(); + const Value *V2 = Store->getSrcValue(); + if (!V1 || !V2) + return false; + + int64_t End1 = Load->getSrcValueOffset() + Size; + int64_t End2 = Store->getSrcValueOffset() + Size; + return !AA->alias(AliasAnalysis::Location(V1, End1, Load->getTBAAInfo()), + AliasAnalysis::Location(V2, End2, Store->getTBAAInfo())); +} + SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { // Dump information about the Node being selected DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n"); diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td index 4317306..23a3790 100644 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -75,6 +75,10 @@ def : CopySign128; +defm LoadStoreF32 : MVCLoadStore; +defm LoadStoreF64 : MVCLoadStore; +defm LoadStoreF128 : MVCLoadStore; + //===----------------------------------------------------------------------===// // Load instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index b4e5c25..5e13c7f 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -294,6 +294,20 @@ let mayLoad = 1, mayStore = 1, usesCustomInserter = 1 in [(z_mvc bdaddr12only:$dest, bdaddr12only:$src, imm32len8:$length)]>; +defm LoadStore8_32 : MVCLoadStore; +defm LoadStore16_32 : MVCLoadStore; +defm LoadStore32_32 : MVCLoadStore; + +defm LoadStore8 : MVCLoadStore; +defm LoadStore16 : MVCLoadStore; +defm LoadStore32 : MVCLoadStore; +defm LoadStore64 : MVCLoadStore; + //===----------------------------------------------------------------------===// // Sign extensions //===----------------------------------------------------------------------===// @@ -339,6 +353,14 @@ def : Pat<(i64 (extloadi8 bdxaddr20only:$src)), (LGB bdxaddr20only:$src)>; def : Pat<(i64 (extloadi16 bdxaddr20only:$src)), (LGH bdxaddr20only:$src)>; def : Pat<(i64 (extloadi32 bdxaddr20only:$src)), (LGF bdxaddr20only:$src)>; +// We want PC-relative addresses to be tried ahead of BD and BDX addresses. +// However, BDXs have two extra operands and are therefore 6 units more +// complex. +let AddedComplexity = 7 in { + def : Pat<(i32 (extloadi16 pcrel32:$src)), (LHRL pcrel32:$src)>; + def : Pat<(i64 (extloadi16 pcrel32:$src)), (LGHRL pcrel32:$src)>; +} + //===----------------------------------------------------------------------===// // Zero extensions //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZPatterns.td b/lib/Target/SystemZ/SystemZPatterns.td index fb6c221..74cc5f0 100644 --- a/lib/Target/SystemZ/SystemZPatterns.td +++ b/lib/Target/SystemZ/SystemZPatterns.td @@ -65,3 +65,17 @@ multiclass InsertMem; } + +// Use MVC instruction INSN for a load of type LOAD followed by a store +// of type STORE. VT is the type of the intermediate register and LENGTH +// is the number of bytes to copy (which may be smaller than VT). +multiclass MVCLoadStore length> { + def Pat : PatFrag<(ops node:$dest, node:$src), + (store (vt (load node:$src)), node:$dest), + [{ return storeLoadCanUseMVC(N); }]>; + + def : Pat<(!cast(NAME##"Pat") bdaddr12only:$dest, + bdaddr12only:$src), + (insn bdaddr12only:$dest, bdaddr12only:$src, length)>; +} -- cgit v1.1 From 9fb5a6588becc92be1d7cf503d2947b170be3c31 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Tue, 9 Jul 2013 09:59:04 +0000 Subject: Add MC assembly/disassembly support for VCVT{A, N, P, M} to V8FP. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185922 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrFormats.td | 33 ++++++++++++++++++-- lib/Target/ARM/ARMInstrVFP.td | 51 +++++++++++++++++++++++++++++++ lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 8 +++-- 3 files changed, 86 insertions(+), 6 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 16b7bc5..ed73d36 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1551,8 +1551,8 @@ class ADbI opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, // FP, binary, not predicated class ADbInp opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops, InstrItinClass itin, string asm, list pattern> - : VFPXI + : VFPXI { // Instruction operands. bits<5> Dd; @@ -1577,7 +1577,7 @@ class ADbInp opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops, let Inst{4} = 0; } -// Single precision, unary +// Single precision, unary, predicated class ASuI opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> @@ -1601,6 +1601,33 @@ class ASuI opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, let Inst{4} = opcod5; } +// Single precision, unary, non-predicated +class ASuInp opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, + bit opcod5, dag oops, dag iops, InstrItinClass itin, + string asm, list pattern> + : VFPXI { + // Instruction operands. + bits<5> Sd; + bits<5> Sm; + + let Inst{31-28} = 0b1111; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{19-16} = opcod3; + let Inst{11-9} = 0b101; + let Inst{8} = 0; // Single precision + let Inst{7-6} = opcod4; + let Inst{4} = opcod5; +} + // Single precision unary, if no NEON. Same as ASuI except not available if // NEON is enabled. class ASuIn opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 27e2df4..27f1578 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -583,6 +583,57 @@ def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0, let Inst{5} = Dm{4}; } +multiclass vcvt_inst rm> { + let PostEncoderMethod = "" in { + def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vcvt", opc, ".s32.f32\t$Sd, $Sm"), + []>, Requires<[HasV8FP]> { + let Inst{17-16} = rm; + } + + def US : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vcvt", opc, ".u32.f32\t$Sd, $Sm"), + []>, Requires<[HasV8FP]> { + let Inst{17-16} = rm; + } + + def SD : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0, + (outs SPR:$Sd), (ins DPR:$Dm), + NoItinerary, !strconcat("vcvt", opc, ".s32.f64\t$Sd, $Dm"), + []>, Requires<[HasV8FP]> { + bits<5> Dm; + + let Inst{17-16} = rm; + + // Encode instruction operands + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{8} = 1; + } + + def UD : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0, + (outs SPR:$Sd), (ins DPR:$Dm), + NoItinerary, !strconcat("vcvt", opc, ".u32.f64\t$Sd, $Dm"), + []>, Requires<[HasV8FP]> { + bits<5> Dm; + + let Inst{17-16} = rm; + + // Encode instruction operands + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{8} = 1; + } + } +} + +defm VCVTA : vcvt_inst<"a", 0b00>; +defm VCVTN : vcvt_inst<"n", 0b01>; +defm VCVTP : vcvt_inst<"p", 0b10>; +defm VCVTM : vcvt_inst<"m", 0b11>; + def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0, (outs DPR:$Dd), (ins DPR:$Dm), IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm", diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index f7f1901..56557e3 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -4906,7 +4906,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "umaal" || Mnemonic == "umlal" || Mnemonic == "vabal" || Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal" || Mnemonic == "fmuls" || Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || - Mnemonic.startswith("vsel")) + Mnemonic == "vcvta" || Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || + Mnemonic == "vcvtm" || Mnemonic.startswith("vsel")) return Mnemonic; // First, split out any predication code. Ignore mnemonics we know aren't @@ -5006,8 +5007,9 @@ getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, if (Mnemonic == "bkpt" || Mnemonic == "cbnz" || Mnemonic == "setend" || Mnemonic == "cps" || Mnemonic == "it" || Mnemonic == "cbz" || Mnemonic == "trap" || Mnemonic == "setend" || - Mnemonic.startswith("cps") || Mnemonic == "vmaxnm" || - Mnemonic == "vminnm" || Mnemonic.startswith("vsel")) { + Mnemonic.startswith("cps") || Mnemonic.startswith("vsel") || + Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || Mnemonic == "vcvta" || + Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm") { // These mnemonics are never predicable CanAcceptPredicationCode = false; } else if (!isThumb()) { -- cgit v1.1 From b2713e018e1c99bb9a65d2d2e63dc7e3e2222c57 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Tue, 9 Jul 2013 10:00:34 +0000 Subject: [PowerPC] Support ".machine any" The PowerPC assembler is supposed to provide a directive .machine that allows switching the supported CPU instruction set on the fly. Since we do not yet check CPU feature sets at all and always accept any available instruction, this is not really useful at this point. However, it makes sense to accept (and ignore) ".machine any" to avoid spuriously rejecting existing assembler files that use this. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185924 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 32cf373d..ab29ee7 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -196,6 +196,7 @@ class PPCAsmParser : public MCTargetAsmParser { bool ParseDirectiveWord(unsigned Size, SMLoc L); bool ParseDirectiveTC(unsigned Size, SMLoc L); + bool ParseDirectiveMachine(SMLoc L); bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SmallVectorImpl &Operands, @@ -1182,6 +1183,8 @@ bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) { return ParseDirectiveWord(8, DirectiveID.getLoc()); if (IDVal == ".tc") return ParseDirectiveTC(isPPC64()? 8 : 4, DirectiveID.getLoc()); + if (IDVal == ".machine") + return ParseDirectiveMachine(DirectiveID.getLoc()); return true; } @@ -1227,6 +1230,30 @@ bool PPCAsmParser::ParseDirectiveTC(unsigned Size, SMLoc L) { return ParseDirectiveWord(Size, L); } +/// ParseDirectiveMachine +/// ::= .machine [ cpu | "push" | "pop" ] +bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) { + if (getLexer().isNot(AsmToken::Identifier) && + getLexer().isNot(AsmToken::String)) + return Error(L, "unexpected token in directive"); + + StringRef CPU = Parser.getTok().getIdentifier(); + Parser.Lex(); + + // FIXME: Right now, the parser always allows any available + // instruction, so the .machine directive is not useful. + // Implement ".machine any" (by doing nothing) for the benefit + // of existing assembler code. Likewise, we can then implement + // ".machine push" and ".machine pop" as no-op. + if (CPU != "any" && CPU != "push" && CPU != "pop") + return Error(L, "unrecognized machine type"); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return Error(L, "unexpected token in directive"); + + return false; +} + /// Force static initialization. extern "C" void LLVMInitializePowerPCAsmParser() { RegisterMCAsmParser A(ThePPC32Target); -- cgit v1.1 From 8dc741d29f9c9beff8a9f26ff23b307b9df4f8fd Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Tue, 9 Jul 2013 11:03:21 +0000 Subject: Add MC assembly/disassembly support for VRINT{Z, X, R} to V8FP. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185926 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrVFP.td | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 27f1578..4ee41e8 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -648,6 +648,27 @@ def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0, let D = VFPNeonA8Domain; } +multiclass vrint_inst_zrx { + def S : ASuI<0b11101, 0b11, 0b0110, 0b11, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm", + []>, Requires<[HasV8FP]> { + let Inst{7} = op2; + let Inst{16} = op; + } + def D : ADuI<0b11101, 0b11, 0b0110, 0b11, 0, + (outs DPR:$Dd), (ins DPR:$Dm), + NoItinerary, !strconcat("vrint", opc), ".f64\t$Dd, $Dm", + []>, Requires<[HasV8FP]> { + let Inst{7} = op2; + let Inst{16} = op; + } +} + +defm VRINTZ : vrint_inst_zrx<"z", 0, 1>; +defm VRINTR : vrint_inst_zrx<"r", 0, 0>; +defm VRINTX : vrint_inst_zrx<"x", 1, 0>; + def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs DPR:$Dd), (ins DPR:$Dm), IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm", -- cgit v1.1 From 12f45c3782c0d01bcf9973bbc23dba2b17ce54cb Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Tue, 9 Jul 2013 11:26:18 +0000 Subject: Add MC assembly/disassembly support for VRINT{A, N, P, M} to V8FP. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185929 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrFormats.td | 26 ++++++++++++++++++++++++++ lib/Target/ARM/ARMInstrVFP.td | 27 +++++++++++++++++++++++++++ lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 9 ++++++--- 3 files changed, 59 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index ed73d36..9a542b9 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1522,6 +1522,32 @@ class ADuI opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, let Inst{4} = opcod5; } +// Double precision, unary, not-predicated +class ADuInp opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, + bit opcod5, dag oops, dag iops, InstrItinClass itin, + string asm, list pattern> + : VFPXI { + // Instruction operands. + bits<5> Dd; + bits<5> Dm; + + let Inst{31-28} = 0b1111; + + // Encode instruction operands. + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{15-12} = Dd{3-0}; + let Inst{22} = Dd{4}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{19-16} = opcod3; + let Inst{11-9} = 0b101; + let Inst{8} = 1; // Double precision + let Inst{7-6} = opcod4; + let Inst{4} = opcod5; +} + // Double precision, binary class ADbI opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, InstrItinClass itin, string opc, string asm, diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 4ee41e8..cbfd25f 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -669,6 +669,33 @@ defm VRINTZ : vrint_inst_zrx<"z", 0, 1>; defm VRINTR : vrint_inst_zrx<"r", 0, 0>; defm VRINTX : vrint_inst_zrx<"x", 1, 0>; +multiclass vrint_inst_anpm rm> { + let PostEncoderMethod = "" in { + def S : ASuInp<0b11101, 0b11, 0b1000, 0b01, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vrint", opc, ".f32\t$Sd, $Sm"), + []>, Requires<[HasV8FP]> { + let Inst{17-16} = rm; + } + def D : ADuInp<0b11101, 0b11, 0b1000, 0b01, 0, + (outs DPR:$Dd), (ins DPR:$Dm), + NoItinerary, !strconcat("vrint", opc, ".f64\t$Dd, $Dm"), + []>, Requires<[HasV8FP]> { + let Inst{17-16} = rm; + } + } + + def : InstAlias(NAME#"S") SPR:$Sd, SPR:$Sm)>; + def : InstAlias(NAME#"D") DPR:$Dd, DPR:$Dm)>; +} + +defm VRINTA : vrint_inst_anpm<"a", 0b00>; +defm VRINTN : vrint_inst_anpm<"n", 0b01>; +defm VRINTP : vrint_inst_anpm<"p", 0b10>; +defm VRINTM : vrint_inst_anpm<"m", 0b11>; + def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs DPR:$Dd), (ins DPR:$Dm), IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm", diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 56557e3..f114b7a 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -4906,8 +4906,9 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "umaal" || Mnemonic == "umlal" || Mnemonic == "vabal" || Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal" || Mnemonic == "fmuls" || Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || - Mnemonic == "vcvta" || Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || - Mnemonic == "vcvtm" || Mnemonic.startswith("vsel")) + Mnemonic == "vcvta" || Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || + Mnemonic == "vcvtm" || Mnemonic == "vrinta" || Mnemonic == "vrintn" || + Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic.startswith("vsel")) return Mnemonic; // First, split out any predication code. Ignore mnemonics we know aren't @@ -5009,7 +5010,9 @@ getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, Mnemonic == "trap" || Mnemonic == "setend" || Mnemonic.startswith("cps") || Mnemonic.startswith("vsel") || Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || Mnemonic == "vcvta" || - Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm") { + Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" || + Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" || + Mnemonic == "vrintm") { // These mnemonics are never predicable CanAcceptPredicationCode = false; } else if (!isThumb()) { -- cgit v1.1 From f79b9b859384fbbc065066e5978e39e09a1cc899 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Tue, 9 Jul 2013 15:03:03 +0000 Subject: R600: Print Export Swizzle git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185939 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600Instructions.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 15dcf14..cb887d1 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -522,9 +522,9 @@ let usesCustomInserter = 1, isNotDuplicable = 1 in { class ExportSwzInst : InstR600ISA<( outs), (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase, - i32imm:$sw_x, i32imm:$sw_y, i32imm:$sw_z, i32imm:$sw_w, i32imm:$inst, + RSel:$sw_x, RSel:$sw_y, RSel:$sw_z, RSel:$sw_w, i32imm:$inst, i32imm:$eop), - !strconcat("EXPORT", " $gpr"), + !strconcat("EXPORT", " $gpr.$sw_x$sw_y$sw_z$sw_w"), []>, ExportWord0, ExportSwzWord1 { let elem_size = 3; let Inst{31-0} = Word0; -- cgit v1.1 From c6f13db656c7649f933c74c4f90c09ff74de52a8 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Tue, 9 Jul 2013 15:03:11 +0000 Subject: R600: Use DAG lowering pass to handle fcos/fsin NOTE: This is a candidate for the stable branch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185940 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.h | 2 ++ lib/Target/R600/R600ISelLowering.cpp | 39 +++++++++++++++++++++++++++++++++++- lib/Target/R600/R600ISelLowering.h | 2 ++ lib/Target/R600/R600Instructions.td | 32 +++++++++-------------------- 4 files changed, 52 insertions(+), 23 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index d739a01..7f4468c 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -121,6 +121,8 @@ enum { // End AMDIL ISD Opcodes DWORDADDR, FRACT, + COS_HW, + SIN_HW, FMAX, SMAX, UMAX, diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index ce2aa92..4413734 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -26,7 +26,8 @@ using namespace llvm; R600TargetLowering::R600TargetLowering(TargetMachine &TM) : - AMDGPUTargetLowering(TM) { + AMDGPUTargetLowering(TM), + Gen(TM.getSubtarget().getGeneration()) { addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass); addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); @@ -38,6 +39,9 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::FDIV, MVT::v4f32, Expand); setOperationAction(ISD::FSUB, MVT::v4f32, Expand); + setOperationAction(ISD::FCOS, MVT::f32, Custom); + setOperationAction(ISD::FSIN, MVT::f32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand); @@ -473,6 +477,8 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const R600MachineFunctionInfo *MFI = MF.getInfo(); switch (Op.getOpcode()) { default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); + case ISD::FCOS: + case ISD::FSIN: return LowerTrig(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::STORE: return LowerSTORE(Op, DAG); @@ -723,6 +729,37 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N, } } +SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const { + // On hw >= R700, COS/SIN input must be between -1. and 1. + // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5) + EVT VT = Op.getValueType(); + SDValue Arg = Op.getOperand(0); + SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT, + DAG.getNode(ISD::FADD, SDLoc(Op), VT, + DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg, + DAG.getConstantFP(0.15915494309, MVT::f32)), + DAG.getConstantFP(0.5, MVT::f32))); + unsigned TrigNode; + switch (Op.getOpcode()) { + case ISD::FCOS: + TrigNode = AMDGPUISD::COS_HW; + break; + case ISD::FSIN: + TrigNode = AMDGPUISD::SIN_HW; + break; + default: + llvm_unreachable("Wrong trig opcode"); + } + SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT, + DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart, + DAG.getConstantFP(-0.5, MVT::f32))); + if (Gen >= AMDGPUSubtarget::R700) + return TrigVal; + // On R600 hw, COS/SIN input must be between -Pi and Pi. + return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal, + DAG.getConstantFP(3.14159265359, MVT::f32)); +} + SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode( ISD::SETCC, diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h index d4ba4c8..a033fcb 100644 --- a/lib/Target/R600/R600ISelLowering.h +++ b/lib/Target/R600/R600ISelLowering.h @@ -40,6 +40,7 @@ public: SmallVectorImpl &InVals) const; virtual EVT getSetCCResultType(LLVMContext &, EVT VT) const; private: + unsigned Gen; /// Each OpenCL kernel has nine implicit parameters that are stored in the /// first nine dwords of a Vertex Buffer. These implicit parameters are /// lowered to load instructions which retreive the values from the Vertex @@ -60,6 +61,7 @@ private: SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const; SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth, SelectionDAG &DAG) const; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index cb887d1..735dcfc 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -364,6 +364,14 @@ def DOT4 : SDNode<"AMDGPUISD::DOT4", [] >; +def COS_HW : SDNode<"AMDGPUISD::COS_HW", + SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]> +>; + +def SIN_HW : SDNode<"AMDGPUISD::SIN_HW", + SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]> +>; + def TEXTURE_FETCH_Type : SDTypeProfile<1, 19, [SDTCisFP<0>]>; def TEXTURE_FETCH: SDNode<"AMDGPUISD::TEXTURE_FETCH", TEXTURE_FETCH_Type, []>; @@ -1080,14 +1088,14 @@ class RECIPSQRT_IEEE_Common inst> : R600_1OP < } class SIN_Common inst> : R600_1OP < - inst, "SIN", []>{ + inst, "SIN", [(set f32:$dst, (SIN_HW f32:$src0))]>{ let Trig = 1; let TransOnly = 1; let Itinerary = TransALU; } class COS_Common inst> : R600_1OP < - inst, "COS", []> { + inst, "COS", [(set f32:$dst, (COS_HW f32:$src0))]> { let Trig = 1; let TransOnly = 1; let Itinerary = TransALU; @@ -1228,18 +1236,6 @@ let Predicates = [isR600] in { } -// Helper pattern for normalizing inputs to triginomic instructions for R700+ -// cards. -class COS_PAT : Pat< - (fcos f32:$src), - (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), $src)) ->; - -class SIN_PAT : Pat< - (fsin f32:$src), - (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), $src)) ->; - //===----------------------------------------------------------------------===// // R700 Only instructions //===----------------------------------------------------------------------===// @@ -1247,10 +1243,6 @@ class SIN_PAT : Pat< let Predicates = [isR700] in { def SIN_r700 : SIN_Common<0x6E>; def COS_r700 : COS_Common<0x6F>; - - // R700 normalizes inputs to SIN/COS the same as EG - def : SIN_PAT ; - def : COS_PAT ; } //===----------------------------------------------------------------------===// @@ -1276,8 +1268,6 @@ def SIN_eg : SIN_Common<0x8D>; def COS_eg : COS_Common<0x8E>; def : POW_Common ; -def : SIN_PAT ; -def : COS_PAT ; def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>; //===----------------------------------------------------------------------===// @@ -1726,8 +1716,6 @@ def COS_cm : COS_Common<0x8E>; } // End isVector = 1 def : POW_Common ; -def : SIN_PAT ; -def : COS_PAT ; defm DIV_cm : DIV_Common; -- cgit v1.1 From 07bb3f1d0a6638dd96566eeca1b38f315576216c Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Tue, 9 Jul 2013 15:03:19 +0000 Subject: R600: Fix wrong export reswizzling git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185941 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600OptimizeVectorRegisters.cpp | 4 ---- 1 file changed, 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/R600OptimizeVectorRegisters.cpp b/lib/Target/R600/R600OptimizeVectorRegisters.cpp index 4636426..c47bc39 100644 --- a/lib/Target/R600/R600OptimizeVectorRegisters.cpp +++ b/lib/Target/R600/R600OptimizeVectorRegisters.cpp @@ -183,10 +183,6 @@ MachineInstr *R600VectorRegMerger::RebuildVector( std::vector UpdatedUndef = BaseRSI->UndefReg; for (DenseMap::iterator It = RSI->RegToChan.begin(), E = RSI->RegToChan.end(); It != E; ++It) { - if (BaseRSI->RegToChan.find((*It).first) != BaseRSI->RegToChan.end()) { - UpdatedRegToChan[(*It).first] = (*It).second; - continue; - } unsigned DstReg = MRI->createVirtualRegister(&AMDGPU::R600_Reg128RegClass); unsigned SubReg = (*It).first; unsigned Swizzle = (*It).second; -- cgit v1.1 From f4bdec2ebeb1306a77e9377583c5799199775f88 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Tue, 9 Jul 2013 15:03:25 +0000 Subject: R600: Fix a rare bug where swizzle optimization returns wrong values git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185942 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600ISelLowering.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 4413734..ad4fd87 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -1296,6 +1296,8 @@ static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, VectorEntry.getOperand(3) }; bool isUnmovable[4] = { false, false, false, false }; + for (unsigned i = 0; i < 4; i++) + RemapSwizzle[i] = i; for (unsigned i = 0; i < 4; i++) { if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) { @@ -1304,8 +1306,7 @@ static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, if (!isUnmovable[Idx]) { // Swap i and Idx std::swap(NewBldVec[Idx], NewBldVec[i]); - RemapSwizzle[Idx] = i; - RemapSwizzle[i] = Idx; + std::swap(RemapSwizzle[RemapSwizzle[Idx]], RemapSwizzle[RemapSwizzle[i]]); } isUnmovable[Idx] = true; } -- cgit v1.1 From f2cfef8172fd2eceb036b8caff50623a189ba2ff Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Tue, 9 Jul 2013 15:03:33 +0000 Subject: R600: Do not predicated basic block with multiple alu clause Test is not included as it is several 1000 lines long. To test this functionnality, a test case must generate at least 2 ALU clauses, where an ALU clause is ~110 instructions long. NOTE: This is a candidate for the stable branch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185943 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUTargetMachine.cpp | 5 +++- lib/Target/R600/R600ControlFlowFinalizer.cpp | 2 ++ lib/Target/R600/R600EmitClauseMarkers.cpp | 14 ++++++--- lib/Target/R600/R600InstrInfo.cpp | 45 ++++++++++++++++++++++++++++ lib/Target/R600/R600Instructions.td | 2 +- lib/Target/R600/R600Packetizer.cpp | 3 +- 6 files changed, 64 insertions(+), 7 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index 90f72de..7a14e50 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -148,7 +148,11 @@ bool AMDGPUPassConfig::addPostRegAlloc() { } bool AMDGPUPassConfig::addPreSched2() { + const AMDGPUSubtarget &ST = TM->getSubtarget(); + if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { + addPass(createR600EmitClauseMarkers(*TM)); + } addPass(&IfConverterID); return false; } @@ -158,7 +162,6 @@ bool AMDGPUPassConfig::addPreEmitPass() { if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { addPass(createAMDGPUCFGPreparationPass(*TM)); addPass(createAMDGPUCFGStructurizerPass(*TM)); - addPass(createR600EmitClauseMarkers(*TM)); addPass(createR600ExpandSpecialInstrsPass(*TM)); addPass(&FinalizeMachineBundlesID); addPass(createR600Packetizer(*TM)); diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp index 887c808..932a6a7 100644 --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -256,6 +256,7 @@ private: ClauseContent.push_back(MILit); } } + assert(ClauseContent.size() < 128 && "ALU clause is too big"); ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1); return ClauseFile(ClauseHead, ClauseContent); } @@ -276,6 +277,7 @@ private: void EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause, unsigned &CfCount) { + Clause.first->getOperand(0).setImm(0); CounterPropagateAddr(Clause.first, CfCount); MachineBasicBlock *BB = Clause.first->getParent(); BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::ALU_CLAUSE)) diff --git a/lib/Target/R600/R600EmitClauseMarkers.cpp b/lib/Target/R600/R600EmitClauseMarkers.cpp index 0aea2d7..c1da64c 100644 --- a/lib/Target/R600/R600EmitClauseMarkers.cpp +++ b/lib/Target/R600/R600EmitClauseMarkers.cpp @@ -32,6 +32,7 @@ class R600EmitClauseMarkersPass : public MachineFunctionPass { private: static char ID; const R600InstrInfo *TII; + int Address; unsigned OccupiedDwords(MachineInstr *MI) const { switch (MI->getOpcode()) { @@ -159,7 +160,7 @@ private: } MachineBasicBlock::iterator - MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { + MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) { MachineBasicBlock::iterator ClauseHead = I; std::vector > KCacheBanks; bool PushBeforeModifier = false; @@ -199,20 +200,25 @@ private: unsigned Opcode = PushBeforeModifier ? AMDGPU::CF_ALU_PUSH_BEFORE : AMDGPU::CF_ALU; BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode)) - .addImm(0) // ADDR + // We don't use the ADDR field until R600ControlFlowFinalizer pass, where + // it is safe to assume it is 0. However if we always put 0 here, the ifcvt + // pass may assume that identical ALU clause starter at the beginning of a + // true and false branch can be factorized which is not the case. + .addImm(Address++) // ADDR .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0 .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1 .addImm(KCacheBanks.empty()?0:2) // KM0 .addImm((KCacheBanks.size() < 2)?0:2) // KM1 .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0 .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1 - .addImm(AluInstCount); // COUNT + .addImm(AluInstCount) // COUNT + .addImm(1); // Enabled return I; } public: R600EmitClauseMarkersPass(TargetMachine &tm) : MachineFunctionPass(ID), - TII(0) { } + TII(0), Address(0) { } virtual bool runOnMachineFunction(MachineFunction &MF) { TII = static_cast(MF.getTarget().getInstrInfo()); diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 969a7ce..d0935fa 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -651,6 +651,17 @@ int R600InstrInfo::getBranchInstr(const MachineOperand &op) const { }; } +static +MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) { + for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend(); + It != E; ++It) { + if (It->getOpcode() == AMDGPU::CF_ALU || + It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE) + return llvm::prior(It.base()); + } + return MBB.end(); +} + unsigned R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, @@ -672,6 +683,11 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) .addMBB(TBB) .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); + MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); + if (CfAlu == MBB.end()) + return 1; + assert (CfAlu->getOpcode() == AMDGPU::CF_ALU); + CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE)); return 1; } } else { @@ -683,6 +699,11 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, .addMBB(TBB) .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB); + MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); + if (CfAlu == MBB.end()) + return 2; + assert (CfAlu->getOpcode() == AMDGPU::CF_ALU); + CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE)); return 2; } } @@ -706,6 +727,11 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); clearFlag(predSet, 0, MO_FLAG_PUSH); I->eraseFromParent(); + MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); + if (CfAlu == MBB.end()) + break; + assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE); + CfAlu->setDesc(get(AMDGPU::CF_ALU)); break; } case AMDGPU::JUMP: @@ -726,6 +752,11 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); clearFlag(predSet, 0, MO_FLAG_PUSH); I->eraseFromParent(); + MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); + if (CfAlu == MBB.end()) + break; + assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE); + CfAlu->setDesc(get(AMDGPU::CF_ALU)); break; } case AMDGPU::JUMP: @@ -760,6 +791,15 @@ R600InstrInfo::isPredicable(MachineInstr *MI) const { if (MI->getOpcode() == AMDGPU::KILLGT) { return false; + } else if (MI->getOpcode() == AMDGPU::CF_ALU) { + // If the clause start in the middle of MBB then the MBB has more + // than a single clause, unable to predicate several clauses. + if (MI->getParent()->begin() != MachineBasicBlock::iterator(MI)) + return false; + // TODO: We don't support KC merging atm + if (MI->getOperand(3).getImm() != 0 || MI->getOperand(4).getImm() != 0) + return false; + return true; } else if (isVector(*MI)) { return false; } else { @@ -855,6 +895,11 @@ R600InstrInfo::PredicateInstruction(MachineInstr *MI, const SmallVectorImpl &Pred) const { int PIdx = MI->findFirstPredOperandIdx(); + if (MI->getOpcode() == AMDGPU::CF_ALU) { + MI->getOperand(8).setImm(0); + return true; + } + if (PIdx != -1) { MachineOperand &PMO = MI->getOperand(PIdx); PMO.setReg(Pred[2].getReg()); diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 735dcfc..df5c438 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -563,7 +563,7 @@ class ALU_CLAUSE inst, string OpName> : AMDGPUInst <(outs), (ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1, KCACHE:$KCACHE_MODE0, KCACHE:$KCACHE_MODE1, i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, -i32imm:$COUNT), +i32imm:$COUNT, i32imm:$Enabled), !strconcat(OpName, " $COUNT, @$ADDR, " "KC0[$KCACHE_MODE0], KC1[$KCACHE_MODE1]"), [] >, CF_ALU_WORD0, CF_ALU_WORD1 { diff --git a/lib/Target/R600/R600Packetizer.cpp b/lib/Target/R600/R600Packetizer.cpp index 5ee51fa..f4219bd 100644 --- a/lib/Target/R600/R600Packetizer.cpp +++ b/lib/Target/R600/R600Packetizer.cpp @@ -304,7 +304,8 @@ bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) { MachineBasicBlock::iterator End = MBB->end(); MachineBasicBlock::iterator MI = MBB->begin(); while (MI != End) { - if (MI->isKill()) { + if (MI->isKill() || + (MI->getOpcode() == AMDGPU::CF_ALU && !MI->getOperand(8).getImm())) { MachineBasicBlock::iterator DeleteMI = MI; ++MI; MBB->erase(DeleteMI); -- cgit v1.1 From 7a34599db017a5486cf7cd11eb124984acec8286 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Tue, 9 Jul 2013 16:41:09 +0000 Subject: [PowerPC] Revert r185476 and fix up TLS variant kinds In the commit message to r185476 I wrote: >The PowerPC-specific modifiers VK_PPC_TLSGD and VK_PPC_TLSLD >correspond exactly to the generic modifiers VK_TLSGD and VK_TLSLD. >This causes some confusion with the asm parser, since VK_PPC_TLSGD >is output as @tlsgd, which is then read back in as VK_TLSGD. > >To avoid this confusion, this patch removes the PowerPC-specific >modifiers and uses the generic modifiers throughout. (The only >drawback is that the generic modifiers are printed in upper case >while the usual convention on PowerPC is to use lower-case modifiers. >But this is just a cosmetic issue.) This was unfortunately incorrect, there is is fact another, serious drawback to using the default VK_TLSLD/VK_TLSGD variant kinds: using these causes ELFObjectWriter::RelocNeedsGOT to return true, which in turn causes the ELFObjectWriter to emit an undefined reference to _GLOBAL_OFFSET_TABLE_. This is a problem on powerpc64, because it uses the TOC instead of the GOT, and the linker does not provide _GLOBAL_OFFSET_TABLE_, so the symbol remains undefined. This means shared libraries using TLS built with the integrated assembler are currently broken. While the whole RelocNeedsGOT / _GLOBAL_OFFSET_TABLE_ situation probably ought to be properly fixed at some point, for now I'm simply reverting the r185476 commit. Now this in turn exposes the breakage of handling @tlsgd/@tlsld in the asm parser that this check-in was originally intended to fix. To avoid this regression, I'm also adding a different fix for this problem: while common code now parses @tlsgd as VK_TLSGD, a special hack in the asm parser translates this code to the platform-specific VK_PPC_TLSGD that the back-end now expects. While this is not really pretty, it's self-contained and shouldn't hurt anything else for now. One the underlying problem is fixed, this hack can be reverted again. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185945 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 54 ++++++++++++++++++++++ .../PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp | 4 +- lib/Target/PowerPC/PPCAsmPrinter.cpp | 6 ++- 3 files changed, 60 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index ab29ee7..3c677cc 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -190,6 +190,7 @@ class PPCAsmParser : public MCTargetAsmParser { const MCExpr *ExtractModifierFromExpr(const MCExpr *E, PPCMCExpr::VariantKind &Variant); + const MCExpr *FixupVariantKind(const MCExpr *E); bool ParseExpression(const MCExpr *&EVal); bool ParseOperand(SmallVectorImpl &Operands); @@ -1006,6 +1007,57 @@ ExtractModifierFromExpr(const MCExpr *E, llvm_unreachable("Invalid expression kind!"); } +/// Find all VK_TLSGD/VK_TLSLD symbol references in expression and replace +/// them by VK_PPC_TLSGD/VK_PPC_TLSLD. This is necessary to avoid having +/// _GLOBAL_OFFSET_TABLE_ created via ELFObjectWriter::RelocNeedsGOT. +/// FIXME: This is a hack. +const MCExpr *PPCAsmParser:: +FixupVariantKind(const MCExpr *E) { + MCContext &Context = getParser().getContext(); + + switch (E->getKind()) { + case MCExpr::Target: + case MCExpr::Constant: + return E; + + case MCExpr::SymbolRef: { + const MCSymbolRefExpr *SRE = cast(E); + MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; + + switch (SRE->getKind()) { + case MCSymbolRefExpr::VK_TLSGD: + Variant = MCSymbolRefExpr::VK_PPC_TLSGD; + break; + case MCSymbolRefExpr::VK_TLSLD: + Variant = MCSymbolRefExpr::VK_PPC_TLSLD; + break; + default: + return E; + } + return MCSymbolRefExpr::Create(&SRE->getSymbol(), Variant, Context); + } + + case MCExpr::Unary: { + const MCUnaryExpr *UE = cast(E); + const MCExpr *Sub = FixupVariantKind(UE->getSubExpr()); + if (Sub == UE->getSubExpr()) + return E; + return MCUnaryExpr::Create(UE->getOpcode(), Sub, Context); + } + + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast(E); + const MCExpr *LHS = FixupVariantKind(BE->getLHS()); + const MCExpr *RHS = FixupVariantKind(BE->getRHS()); + if (LHS == BE->getLHS() && RHS == BE->getRHS()) + return E; + return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, Context); + } + } + + llvm_unreachable("Invalid expression kind!"); +} + /// Parse an expression. This differs from the default "parseExpression" /// in that it handles complex \code @l/@ha \endcode modifiers. bool PPCAsmParser:: @@ -1013,6 +1065,8 @@ ParseExpression(const MCExpr *&EVal) { if (getParser().parseExpression(EVal)) return true; + EVal = FixupVariantKind(EVal); + PPCMCExpr::VariantKind Variant; const MCExpr *E = ExtractModifierFromExpr(EVal, Variant); if (E) diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 0833b4e..54de70e 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -312,10 +312,10 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case PPC::fixup_ppc_nofixup: switch (Modifier) { default: llvm_unreachable("Unsupported Modifier"); - case MCSymbolRefExpr::VK_TLSGD: + case MCSymbolRefExpr::VK_PPC_TLSGD: Type = ELF::R_PPC64_TLSGD; break; - case MCSymbolRefExpr::VK_TLSLD: + case MCSymbolRefExpr::VK_PPC_TLSLD: Type = ELF::R_PPC64_TLSLD; break; case MCSymbolRefExpr::VK_PPC_TLS: diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 8a6c514..66d9466 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -573,7 +573,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = Mang->getSymbol(GValue); const MCExpr *SymVar = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_TLSGD, OutContext); + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD, + OutContext); OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLS) .addExpr(TlsRef) .addExpr(SymVar)); @@ -624,7 +625,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = Mang->getSymbol(GValue); const MCExpr *SymVar = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_TLSLD, OutContext); + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD, + OutContext); OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLS) .addExpr(TlsRef) .addExpr(SymVar)); -- cgit v1.1 From e54885af9b54bfc7436a928a48d3db1ef88a2a70 Mon Sep 17 00:00:00 2001 From: Stephen Lin Date: Tue, 9 Jul 2013 18:16:56 +0000 Subject: AArch64/PowerPC/SystemZ/X86: This patch fixes the interface, usage, and all in-tree implementations of TargetLoweringBase::isFMAFasterThanMulAndAdd in order to resolve the following issues with fmuladd (i.e. optional FMA) intrinsics: 1. On X86(-64) targets, ISD::FMA nodes are formed when lowering fmuladd intrinsics even if the subtarget does not support FMA instructions, leading to laughably bad code generation in some situations. 2. On AArch64 targets, ISD::FMA nodes are formed for operations on fp128, resulting in a call to a software fp128 FMA implementation. 3. On PowerPC targets, FMAs are not generated from fmuladd intrinsics on types like v2f32, v8f32, v4f64, etc., even though they promote, split, scalarize, etc. to types that support hardware FMAs. The function has also been slightly renamed for consistency and to force a merge/build conflict for any out-of-tree target implementing it. To resolve, see comments and fixed in-tree examples. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185956 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 21 +++++++++++++++++++++ lib/Target/AArch64/AArch64ISelLowering.h | 10 +++++----- lib/Target/PowerPC/PPCISelLowering.cpp | 9 +++------ lib/Target/PowerPC/PPCISelLowering.h | 10 +++++----- lib/Target/SystemZ/SystemZISelLowering.cpp | 20 ++++++++++++++++++++ lib/Target/SystemZ/SystemZISelLowering.h | 4 +--- lib/Target/X86/X86ISelLowering.cpp | 21 +++++++++++++++++++++ lib/Target/X86/X86ISelLowering.h | 10 +++++----- 8 files changed, 81 insertions(+), 24 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 84051d4..1fa1edb 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2798,6 +2798,27 @@ AArch64TargetLowering::PerformDAGCombine(SDNode *N, return SDValue(); } +bool +AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { + VT = VT.getScalarType(); + + if (!VT.isSimple()) + return false; + + switch (VT.getSimpleVT().SimpleTy) { + case MVT::f16: + case MVT::f32: + case MVT::f64: + return true; + case MVT::f128: + return false; + default: + break; + } + + return false; +} + AArch64TargetLowering::ConstraintType AArch64TargetLowering::getConstraintType(const std::string &Constraint) const { if (Constraint.size() == 1) { diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index 901a9be..320346e 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -229,11 +229,11 @@ public: virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; - /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than - /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to - /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd - /// is expanded to mul + add. - virtual bool isFMAFasterThanMulAndAdd(EVT) const { return true; } + /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster + /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be + /// expanded to FMAs when this method returns true, otherwise fmuladd is + /// expanded to fmul + fadd. + virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const; ConstraintType getConstraintType(const std::string &Constraint) const; diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index cf41c02..812f096 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -7809,18 +7809,15 @@ bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, return true; } -/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than -/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to -/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd -/// is expanded to mul + add. -bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const { +bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { + VT = VT.getScalarType(); + if (!VT.isSimple()) return false; switch (VT.getSimpleVT().SimpleTy) { case MVT::f32: case MVT::f64: - case MVT::v4f32: return true; default: break; diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 4801a41..776ad2a 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -459,11 +459,11 @@ namespace llvm { /// relative to software emulation. virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast = 0) const; - /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than - /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to - /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd - /// is expanded to mul + add. - virtual bool isFMAFasterThanMulAndAdd(EVT VT) const; + /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster + /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be + /// expanded to FMAs when this method returns true, otherwise fmuladd is + /// expanded to fmul + fadd. + virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const; private: SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const; diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index b1abc2c..d344134 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -255,6 +255,26 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) MaxStoresPerMemsetOptSize = 0; } +bool +SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { + VT = VT.getScalarType(); + + if (!VT.isSimple()) + return false; + + switch (VT.getSimpleVT().SimpleTy) { + case MVT::f32: + case MVT::f64: + return true; + case MVT::f128: + return false; + default: + break; + } + + return false; +} + bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { // We can load zero using LZ?R and negative zero using LZ?R;LC?BR. return Imm.isZero() || Imm.isNegZero(); diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 4ddfcbb..88e1fa7 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -129,9 +129,7 @@ public: virtual EVT getSetCCResultType(LLVMContext &, EVT) const { return MVT::i32; } - virtual bool isFMAFasterThanMulAndAdd(EVT) const LLVM_OVERRIDE { - return true; - } + virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const LLVM_OVERRIDE; virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const; virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const; virtual const char *getTargetNodeName(unsigned Opcode) const LLVM_OVERRIDE; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a680ac0..f00df35 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -12966,6 +12966,27 @@ bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const { return false; } +bool +X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { + if (!(Subtarget->hasFMA() || Subtarget->hasFMA4())) + return false; + + VT = VT.getScalarType(); + + if (!VT.isSimple()) + return false; + + switch (VT.getSimpleVT().SimpleTy) { + case MVT::f32: + case MVT::f64: + return true; + default: + break; + } + + return false; +} + bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const { // i16 instructions are longer (0x66 prefix) and potentially slower. return !(VT1 == MVT::i32 && VT2 == MVT::i16); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 0e5e822..8317824 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -646,11 +646,11 @@ namespace llvm { virtual bool isZExtFree(EVT VT1, EVT VT2) const; virtual bool isZExtFree(SDValue Val, EVT VT2) const; - /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than - /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to - /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd - /// is expanded to mul + add. - virtual bool isFMAFasterThanMulAndAdd(EVT) const { return true; } + /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster + /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be + /// expanded to FMAs when this method returns true, otherwise fmuladd is + /// expanded to fmul + fadd. + virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const; /// isNarrowingProfitable - Return true if it's profitable to narrow /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow -- cgit v1.1 From 7c2d8f7b5ea1d0abaed1176f87ea2509e65e82be Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Tue, 9 Jul 2013 18:50:20 +0000 Subject: [PowerPC] Better fix for PR16556. A more complete example of the bug in PR16556 was recently provided, showing that the previous fix was not sufficient. The previous fix is reverted herein. The real problem is that ReplaceNodeResults() uses LowerFP_TO_INT as custom lowering for FP_TO_SINT during type legalization, without checking whether the input type is handled by that routine. LowerFP_TO_INT requires the input to be f32 or f64, so we fail when the input is ppcf128. I'm leaving the test case from the initial fix (r185821) in place, and adding the new test as another crash-only check. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185959 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 812f096..791d334 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -4720,15 +4720,6 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, SDLoc dl) const { assert(Op.getOperand(0).getValueType().isFloatingPoint()); SDValue Src = Op.getOperand(0); - - // If we have a long double here, it must be that we have an undef of - // that type. In this case return an undef of the target type. - if (Src.getValueType() == MVT::ppcf128) { - assert(Src.getOpcode() == ISD::UNDEF && "Unhandled ppcf128!"); - return DAG.getNode(ISD::UNDEF, dl, - Op.getValueType().getSimpleVT().SimpleTy); - } - if (Src.getValueType() == MVT::f32) Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); @@ -5808,6 +5799,9 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::FP_TO_SINT: + // LowerFP_TO_INT() can only handle f32 and f64. + if (N->getOperand(0).getValueType() == MVT::ppcf128) + return; Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); return; } -- cgit v1.1 From 97c37bb4d4ae5e505350091e520a1354069941e0 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Tue, 9 Jul 2013 22:59:22 +0000 Subject: ARM: Fix incorrect pack pattern for thumb2 Propagate the fix from r185712 to Thumb2 codegen as well. Original commit message applies here as well: A "pkhtb x, x, y asr #num" uses the lower 16 bits of "y asr #num" and packs them in the bottom half of "x". An arithmetic and logic shift are only equivalent in this context if the shift amount is 16. We would be shifting in ones into the bottom 16bits instead of zeros if "y" is negative. rdar://14338767 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185982 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrThumb2.td | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index d71824e..ee9eaaa 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -2951,7 +2951,12 @@ def t2PKHTB : T2ThreeReg< // Alternate cases for PKHTB where identities eliminate some nodes. Note that // a shift amount of 0 is *not legal* here, it is PKHBT instead. -def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16_31:$sh)), +// We also can not replace a srl (17..31) by an arithmetic shift we would use in +// pkhtb src1, src2, asr (17..31). +def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16:$sh)), + (t2PKHTB rGPR:$src1, rGPR:$src2, imm16:$sh)>, + Requires<[HasT2ExtractPack, IsThumb2]>; +def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (sra rGPR:$src2, imm16_31:$sh)), (t2PKHTB rGPR:$src1, rGPR:$src2, imm16_31:$sh)>, Requires<[HasT2ExtractPack, IsThumb2]>; def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), -- cgit v1.1 From 9ddfe5ea6f46448cf01e114c971e6bd7ac6ad06c Mon Sep 17 00:00:00 2001 From: Stephen Lin Date: Wed, 10 Jul 2013 01:54:24 +0000 Subject: Explicitly define ARMISelLowering::isFMAFasterThanFMulAndFAdd. No functionality change. Currently ARM is the only backend that supports FMA instructions (for at least some subtargets) but does not implement this virtual, so FMAs are never generated except from explicit fma intrinsic calls. Apparently this is due to the fact that it supports both fused (one rounding step) and unfused (two rounding step) multiply + add instructions. This patch clarifies that this the case without changing behavior by implementing the virtual function to simply return false, as the default TargetLoweringBase version does. It is possible that some cpus perform the fused version faster than the unfused version and vice-versa, so the function implementation should be revisited if hard data is found. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185994 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index cb5b680..c7331bd 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -458,6 +458,17 @@ namespace llvm { SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const; + /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster + /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be + /// expanded to FMAs when this method returns true, otherwise fmuladd is + /// expanded to fmul + fadd. + /// + /// ARM supports both fused and unfused multiply-add operations; we already + /// lower a pair of fmul and fmadd to the latter so it's not clear that there + /// would be a gain or that the gain would be worthwhile enough to risk + /// correctness bugs. + virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const { return false; } + SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, -- cgit v1.1 From ea870a53a5a0c644e5b15af5ae59d8a4378a4d2a Mon Sep 17 00:00:00 2001 From: Stephen Lin Date: Wed, 10 Jul 2013 01:57:39 +0000 Subject: Fix typo git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185995 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index c7331bd..6593777 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -464,7 +464,7 @@ namespace llvm { /// expanded to fmul + fadd. /// /// ARM supports both fused and unfused multiply-add operations; we already - /// lower a pair of fmul and fmadd to the latter so it's not clear that there + /// lower a pair of fmul and fadd to the latter so it's not clear that there /// would be a gain or that the gain would be worthwhile enough to risk /// correctness bugs. virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const { return false; } -- cgit v1.1 From 2ec5933eae2e889225d33bd2f93a35926e958c95 Mon Sep 17 00:00:00 2001 From: Vladimir Medic Date: Wed, 10 Jul 2013 10:18:10 +0000 Subject: Add support for Mips break and syscall insructions. The corresponding test cases are added. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185999 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsInstrFormats.td | 28 ++++++++++++++++++++++++++++ lib/Target/Mips/MipsInstrInfo.td | 21 +++++++++++++++++++++ 2 files changed, 49 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index 14cfcf9..6073476 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -491,6 +491,34 @@ class TEQ_FM funct> { } //===----------------------------------------------------------------------===// +// System calls format +//===----------------------------------------------------------------------===// + +class SYS_FM funct> +{ + bits<20> code_; + bits<32> Inst; + let Inst{31-26} = 0x0; + let Inst{25-6} = code_; + let Inst{5-0} = funct; +} + +//===----------------------------------------------------------------------===// +// Break instruction format +//===----------------------------------------------------------------------===// + +class BRK_FM funct> +{ + bits<10> code_1; + bits<10> code_2; + bits<32> Inst; + let Inst{31-26} = 0x0; + let Inst{25-16} = code_1; + let Inst{15-6} = code_2; + let Inst{5-0} = funct; +} + +//===----------------------------------------------------------------------===// // // FLOATING POINT INSTRUCTION FORMATS // diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index d2164f7..712e204 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -250,6 +250,12 @@ def simm16 : Operand { def simm20 : Operand { } +def uimm20 : Operand { +} + +def uimm10 : Operand { +} + def simm16_64 : Operand; def shamt : Operand; @@ -637,6 +643,14 @@ class BAL_FT : let hasDelaySlot = 1; let Defs = [RA]; } +// Syscall +class SYS_FT : + InstSE<(outs), (ins uimm20:$code_), + !strconcat(opstr, "\t$code_"), [], NoItinerary, FrmI>; +// Break +class BRK_FT : + InstSE<(outs), (ins uimm10:$code_1, uimm10:$code_2), + !strconcat(opstr, "\t$code_1, $code_2"), [], NoItinerary, FrmOther>; // Sync let hasSideEffects = 1 in @@ -941,6 +955,9 @@ defm SWR : StoreLeftRightM<"swr", MipsSWR, CPURegs>, LW_FM<0x2e>; def SYNC : SYNC_FT, SYNC_FM; def TEQ : TEQ_FT<"teq", CPURegsOpnd>, TEQ_FM<0x34>; +def BREAK : BRK_FT<"break">, BRK_FM<0xd>; +def SYSCALL : SYS_FT<"syscall">, SYS_FM<0xc>; + /// Load-linked, Store-conditional let Predicates = [NotN64, HasStdEnc] in { def LL : LLBase<"ll", CPURegsOpnd, mem>, LW_FM<0x30>; @@ -1119,6 +1136,10 @@ def : InstAlias<"bnez $rs,$offset", def : InstAlias<"beqz $rs,$offset", (BEQ CPURegsOpnd:$rs, ZERO, brtarget:$offset), 1>, Requires<[NotMips64]>; +def : InstAlias<"syscall", (SYSCALL 0), 1>; + +def : InstAlias<"break $imm", (BREAK uimm10:$imm, 0), 1>; +def : InstAlias<"break", (BREAK 0, 0), 1>; //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions //===----------------------------------------------------------------------===// -- cgit v1.1 From 296c1534b4ad835c6d9280145b63ca2b25831228 Mon Sep 17 00:00:00 2001 From: Vladimir Medic Date: Wed, 10 Jul 2013 12:26:26 +0000 Subject: Reverting commit r185999 due to buildboot failure. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186000 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsInstrFormats.td | 28 ---------------------------- lib/Target/Mips/MipsInstrInfo.td | 21 --------------------- 2 files changed, 49 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index 6073476..14cfcf9 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -491,34 +491,6 @@ class TEQ_FM funct> { } //===----------------------------------------------------------------------===// -// System calls format -//===----------------------------------------------------------------------===// - -class SYS_FM funct> -{ - bits<20> code_; - bits<32> Inst; - let Inst{31-26} = 0x0; - let Inst{25-6} = code_; - let Inst{5-0} = funct; -} - -//===----------------------------------------------------------------------===// -// Break instruction format -//===----------------------------------------------------------------------===// - -class BRK_FM funct> -{ - bits<10> code_1; - bits<10> code_2; - bits<32> Inst; - let Inst{31-26} = 0x0; - let Inst{25-16} = code_1; - let Inst{15-6} = code_2; - let Inst{5-0} = funct; -} - -//===----------------------------------------------------------------------===// // // FLOATING POINT INSTRUCTION FORMATS // diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 712e204..d2164f7 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -250,12 +250,6 @@ def simm16 : Operand { def simm20 : Operand { } -def uimm20 : Operand { -} - -def uimm10 : Operand { -} - def simm16_64 : Operand; def shamt : Operand; @@ -643,14 +637,6 @@ class BAL_FT : let hasDelaySlot = 1; let Defs = [RA]; } -// Syscall -class SYS_FT : - InstSE<(outs), (ins uimm20:$code_), - !strconcat(opstr, "\t$code_"), [], NoItinerary, FrmI>; -// Break -class BRK_FT : - InstSE<(outs), (ins uimm10:$code_1, uimm10:$code_2), - !strconcat(opstr, "\t$code_1, $code_2"), [], NoItinerary, FrmOther>; // Sync let hasSideEffects = 1 in @@ -955,9 +941,6 @@ defm SWR : StoreLeftRightM<"swr", MipsSWR, CPURegs>, LW_FM<0x2e>; def SYNC : SYNC_FT, SYNC_FM; def TEQ : TEQ_FT<"teq", CPURegsOpnd>, TEQ_FM<0x34>; -def BREAK : BRK_FT<"break">, BRK_FM<0xd>; -def SYSCALL : SYS_FT<"syscall">, SYS_FM<0xc>; - /// Load-linked, Store-conditional let Predicates = [NotN64, HasStdEnc] in { def LL : LLBase<"ll", CPURegsOpnd, mem>, LW_FM<0x30>; @@ -1136,10 +1119,6 @@ def : InstAlias<"bnez $rs,$offset", def : InstAlias<"beqz $rs,$offset", (BEQ CPURegsOpnd:$rs, ZERO, brtarget:$offset), 1>, Requires<[NotMips64]>; -def : InstAlias<"syscall", (SYSCALL 0), 1>; - -def : InstAlias<"break $imm", (BREAK uimm10:$imm, 0), 1>; -def : InstAlias<"break", (BREAK 0, 0), 1>; //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions //===----------------------------------------------------------------------===// -- cgit v1.1 From e355d850d6b3f595ce58a7b612502b57ca7f3271 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Wed, 10 Jul 2013 15:29:01 +0000 Subject: PPC: Add a better comment about the i64 FI fixup In discussing this change with Bill Schmidt, it was decided that the original comment about negative FIs was incorrect. We'll still exclude them for now, but now with a more-accurate explanation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186005 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 791d334..b39f0d5 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1043,8 +1043,19 @@ static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) { if (VT != MVT::i64) return; - // This should not be needed for negative FIs, which come from argument - // lowering, because the ABI should guarentee the necessary alignment. + // NOTE: We'll exclude negative FIs here, which come from argument + // lowering, because there are no known test cases triggering this problem + // using packed structures (or similar). We can remove this exclusion if + // we find such a test case. The reason why this is so test-case driven is + // because this entire 'fixup' is only to prevent crashes (from the + // register scavenger) on not-really-valid inputs. For example, if we have: + // %a = alloca i1 + // %b = bitcast i1* %a to i64* + // store i64* a, i64 b + // then the store should really be marked as 'align 1', but is not. If it + // were marked as 'align 1' then the indexed form would have been + // instruction-selected initially, and the problem this 'fixup' is preventing + // won't happen regardless. if (FrameIdx < 0) return; -- cgit v1.1 From 0a9aaacd7298e4108eeecef7bad7933ae7cf36c4 Mon Sep 17 00:00:00 2001 From: Michel Danzer Date: Wed, 10 Jul 2013 16:36:36 +0000 Subject: R600/SI: Add intrinsics for texture sampling with user derivatives Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186008 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstructions.td | 7 ++++++- lib/Target/R600/SIIntrinsics.td | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 9c96c08..c9eac7d 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -535,7 +535,7 @@ def IMAGE_SAMPLE_B : MIMG_Sampler_Helper <0x00000025, "IMAGE_SAMPLE_B">; //def IMAGE_SAMPLE_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ", 0x00000027>; def IMAGE_SAMPLE_C : MIMG_Sampler_Helper <0x00000028, "IMAGE_SAMPLE_C">; //def IMAGE_SAMPLE_C_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL", 0x00000029>; -//def IMAGE_SAMPLE_C_D : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D", 0x0000002a>; +def IMAGE_SAMPLE_C_D : MIMG_Sampler_Helper <0x0000002a, "IMAGE_SAMPLE_C_D">; //def IMAGE_SAMPLE_C_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL", 0x0000002b>; def IMAGE_SAMPLE_C_L : MIMG_Sampler_Helper <0x0000002c, "IMAGE_SAMPLE_C_L">; def IMAGE_SAMPLE_C_B : MIMG_Sampler_Helper <0x0000002d, "IMAGE_SAMPLE_C_B">; @@ -1296,6 +1296,11 @@ multiclass SamplePatterns { def : SampleArrayPattern ; def : SampleShadowPattern ; def : SampleShadowArrayPattern ; + + def : SamplePattern ; + def : SampleArrayPattern ; + def : SampleShadowPattern ; + def : SampleShadowArrayPattern ; } defm : SamplePatterns; diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td index 224cd2f..d2643e0 100644 --- a/lib/Target/R600/SIIntrinsics.td +++ b/lib/Target/R600/SIIntrinsics.td @@ -23,6 +23,7 @@ let TargetPrefix = "SI", isTarget = 1 in { def int_SI_sample : Sample; def int_SI_sampleb : Sample; + def int_SI_sampled : Sample; def int_SI_samplel : Sample; def int_SI_imageload : Intrinsic <[llvm_v4i32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; -- cgit v1.1 From 7740daa8ba053294b7448556c049cf6778711d66 Mon Sep 17 00:00:00 2001 From: Michel Danzer Date: Wed, 10 Jul 2013 16:36:43 +0000 Subject: R600/SI: Initial support for LDS/GDS instructions Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186009 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInsertWaits.cpp | 2 ++ lib/Target/R600/SIInstrFormats.td | 24 ++++++++++++++++++++++++ lib/Target/R600/SIInstrInfo.td | 23 +++++++++++++++++++++++ lib/Target/R600/SIInstructions.td | 3 +++ lib/Target/R600/SILowerControlFlow.cpp | 16 ++++++++++++++++ 5 files changed, 68 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp index c36e1dc..d31da45 100644 --- a/lib/Target/R600/SIInsertWaits.cpp +++ b/lib/Target/R600/SIInsertWaits.cpp @@ -134,6 +134,8 @@ Counters SIInsertWaits::getHwCounts(MachineInstr &MI) { if (TSFlags & SIInstrFlags::LGKM_CNT) { MachineOperand &Op = MI.getOperand(0); + if (!Op.isReg()) + Op = MI.getOperand(1); assert(Op.isReg() && "First LGKM operand must be a register!"); unsigned Reg = Op.getReg(); diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td index 51f323d..434aa7e 100644 --- a/lib/Target/R600/SIInstrFormats.td +++ b/lib/Target/R600/SIInstrFormats.td @@ -281,6 +281,30 @@ class VINTRP op, dag outs, dag ins, string asm, list pattern> : let Uses = [EXEC] in { +class DS op, dag outs, dag ins, string asm, list pattern> : + Enc64 { + + bits<8> vdst; + bits<1> gds; + bits<8> addr; + bits<8> data0; + bits<8> data1; + bits<8> offset0; + bits<8> offset1; + + let Inst{7-0} = offset0; + let Inst{15-8} = offset1; + let Inst{17} = gds; + let Inst{25-18} = op; + let Inst{31-26} = 0x36; //encoding + let Inst{39-32} = addr; + let Inst{47-40} = data0; + let Inst{55-48} = data1; + let Inst{63-56} = vdst; + + let LGKM_CNT = 1; +} + class MUBUF op, dag outs, dag ins, string asm, list pattern> : Enc64 { diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 36812ca..655a8b1 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -287,6 +287,29 @@ class VOP3_64 op, string opName, list pattern> : VOP3 < // Vector I/O classes //===----------------------------------------------------------------------===// +class DS_Load_Helper op, string asm, RegisterClass regClass> : DS < + op, + (outs regClass:$vdst), + (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, VReg_32:$data1, + i8imm:$offset0, i8imm:$offset1), + asm#" $vdst, $gds, $addr, $data0, $data1, $offset0, $offset1, [M0]", + []> { + let mayLoad = 1; + let mayStore = 0; +} + +class DS_Store_Helper op, string asm, RegisterClass regClass> : DS < + op, + (outs), + (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, VReg_32:$data1, + i8imm:$offset0, i8imm:$offset1), + asm#" $gds, $addr, $data0, $data1, $offset0, $offset1, [M0]", + []> { + let mayStore = 1; + let mayLoad = 0; + let vdst = 0; +} + class MTBUF_Store_Helper op, string asm, RegisterClass regClass> : MTBUF < op, (outs), diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index c9eac7d..09460d8 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -391,6 +391,9 @@ defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">; } // End isCompare = 1 +def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>; +def DS_READ_B32 : DS_Load_Helper <0x00000036, "DS_READ_B32", VReg_32>; + //def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>; //def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>; //def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "BUFFER_LOAD_FORMAT_XYZ", []>; diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp index 5b434fb..c2e8f02 100644 --- a/lib/Target/R600/SILowerControlFlow.cpp +++ b/lib/Target/R600/SILowerControlFlow.cpp @@ -411,6 +411,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { TRI = MF.getTarget().getRegisterInfo(); bool HaveKill = false; + bool NeedM0 = false; bool NeedWQM = false; unsigned Depth = 0; @@ -482,6 +483,13 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { IndirectDst(MI); break; + case AMDGPU::DS_READ_B32: + NeedWQM = true; + // Fall through + case AMDGPU::DS_WRITE_B32: + NeedM0 = true; + break; + case AMDGPU::V_INTERP_P1_F32: case AMDGPU::V_INTERP_P2_F32: case AMDGPU::V_INTERP_MOV_F32: @@ -492,6 +500,14 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { } } + if (NeedM0) { + MachineBasicBlock &MBB = MF.front(); + // Initialize M0 to a value that won't cause LDS access to be discarded + // due to offset clamping + BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_MOV_B32), + AMDGPU::M0).addImm(0xffffffff); + } + if (NeedWQM) { MachineBasicBlock &MBB = MF.front(); BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64), -- cgit v1.1 From 0a9e22b86366caee81c32dbbd942a32634993955 Mon Sep 17 00:00:00 2001 From: Michel Danzer Date: Wed, 10 Jul 2013 16:36:52 +0000 Subject: R600/SI: Add intrinsic for retrieving the current thread ID Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186010 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstructions.td | 10 ++++++++-- lib/Target/R600/SIIntrinsics.td | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 09460d8..61755b4 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -903,8 +903,8 @@ defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>; defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>; defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>; //defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>; -//defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>; -//defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>; +defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>; +defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>; let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", @@ -1575,6 +1575,12 @@ def : Pat < (V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0)))) >; +def : Pat < + (int_SI_tid), + (V_MBCNT_HI_U32_B32_e32 0xffffffff, + (V_MBCNT_LO_U32_B32_e64 0xffffffff, 0, 0, 0, 0, 0)) +>; + /********** ================== **********/ /********** VOP3 Patterns **********/ /********** ================== **********/ diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td index d2643e0..2fa073e 100644 --- a/lib/Target/R600/SIIntrinsics.td +++ b/lib/Target/R600/SIIntrinsics.td @@ -14,6 +14,7 @@ let TargetPrefix = "SI", isTarget = 1 in { + def int_SI_tid : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>; def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; -- cgit v1.1 From 5b388cac7fed74cd318ec150e30984ca71db5999 Mon Sep 17 00:00:00 2001 From: Michel Danzer Date: Wed, 10 Jul 2013 16:36:57 +0000 Subject: R600/SI: Add pattern for the AMDGPU.barrier.local intrinsic lit test coverage to follow in the next commit. Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186011 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstructions.td | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 61755b4..30f2a4a 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -774,8 +774,17 @@ def S_CBRANCH_EXECNZ : SOPP < } // End isBranch = 1 } // End isTerminator = 1 -//def S_BARRIER : SOPP_ <0x0000000a, "S_BARRIER", []>; let hasSideEffects = 1 in { +def S_BARRIER : SOPP <0x0000000a, (ins), "S_BARRIER", + [(int_AMDGPU_barrier_local)] +> { + let SIMM16 = 0; + let isBarrier = 1; + let hasCtrlDep = 1; + let mayLoad = 1; + let mayStore = 1; +} + def S_WAITCNT : SOPP <0x0000000c, (ins i32imm:$simm16), "S_WAITCNT $simm16", [] >; -- cgit v1.1 From a3e39dc7055486cbf514ccd868cfabc69d7f6f4e Mon Sep 17 00:00:00 2001 From: Michel Danzer Date: Wed, 10 Jul 2013 16:37:07 +0000 Subject: R600/SI: Initial local memory support Enough for the radeonsi driver to use it for calculating derivatives. Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186012 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUAsmPrinter.cpp | 7 +++++++ lib/Target/R600/AMDGPUISelLowering.cpp | 4 +--- lib/Target/R600/R600ISelLowering.cpp | 2 ++ lib/Target/R600/SIDefines.h | 4 ++++ lib/Target/R600/SIISelLowering.cpp | 5 +++++ lib/Target/R600/SIInstructions.td | 15 +++++++++++++++ 6 files changed, 34 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp index 996d2a6..e039b77 100644 --- a/lib/Target/R600/AMDGPUAsmPrinter.cpp +++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp @@ -233,7 +233,14 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) { OutStreamer.EmitIntValue(RsrcReg, 4); OutStreamer.EmitIntValue(S_00B028_VGPRS(MaxVGPR / 4) | S_00B028_SGPRS(MaxSGPR / 8), 4); + + if (MFI->ShaderType == ShaderType::COMPUTE) { + OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4); + OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(RoundUpToAlignment(MFI->LDSSize, 256) >> 8), 4); + } if (MFI->ShaderType == ShaderType::PIXEL) { + OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4); + OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(RoundUpToAlignment(MFI->LDSSize, 256) >> 8), 4); OutStreamer.EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4); OutStreamer.EmitIntValue(MFI->PSInputAddr, 4); } diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 4019a1f..7fad3bb 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -72,8 +72,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::UDIVREM, MVT::i32, Custom); setOperationAction(ISD::UREM, MVT::i32, Expand); - setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); - int types[] = { (int)MVT::v2i32, (int)MVT::v4i32 @@ -158,7 +156,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, // XXX: Account for alignment? MFI->LDSSize += Size; - return DAG.getConstant(Offset, MVT::i32); + return DAG.getConstant(Offset, TD->getPointerSize() == 8 ? MVT::i64 : MVT::i32); } SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index ad4fd87..7aef08a 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -92,6 +92,8 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::SELECT_CC); + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setBooleanContents(ZeroOrNegativeOneBooleanContent); setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); setSchedulingPreference(Sched::VLIW); diff --git a/lib/Target/R600/SIDefines.h b/lib/Target/R600/SIDefines.h index 716b093..147578c 100644 --- a/lib/Target/R600/SIDefines.h +++ b/lib/Target/R600/SIDefines.h @@ -12,11 +12,15 @@ #define SIDEFINES_H_ #define R_00B028_SPI_SHADER_PGM_RSRC1_PS 0x00B028 +#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS 0x00B02C +#define S_00B02C_EXTRA_LDS_SIZE(x) (((x) & 0xFF) << 8) #define R_00B128_SPI_SHADER_PGM_RSRC1_VS 0x00B128 #define R_00B228_SPI_SHADER_PGM_RSRC1_GS 0x00B228 #define R_00B848_COMPUTE_PGM_RSRC1 0x00B848 #define S_00B028_VGPRS(x) (((x) & 0x3F) << 0) #define S_00B028_SGPRS(x) (((x) & 0x0F) << 6) +#define R_00B84C_COMPUTE_PGM_RSRC2 0x00B84C +#define S_00B84C_LDS_SIZE(x) (((x) & 0x1FF) << 15) #define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC #endif // SIDEFINES_H_ diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 9d4cfef..a314bc4 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -75,6 +75,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); + setTargetDAGCombine(ISD::SELECT_CC); setTargetDAGCombine(ISD::SETCC); @@ -310,11 +312,14 @@ MVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const { //===----------------------------------------------------------------------===// SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + SIMachineFunctionInfo *MFI = MF.getInfo(); switch (Op.getOpcode()) { default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG); + case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG); case ISD::INTRINSIC_WO_CHAIN: { unsigned IntrinsicID = cast(Op.getOperand(0))->getZExtValue(); diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 30f2a4a..5a1bf30 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1599,6 +1599,21 @@ def : Pat < (V_MAD_F32 $src0, $src1, $src2) >; +/********** ======================= **********/ +/********** Load/Store Patterns **********/ +/********** ======================= **********/ + +def : Pat < + (local_load i64:$src0), + (i32 (DS_READ_B32 0, (EXTRACT_SUBREG $src0, sub0), + (EXTRACT_SUBREG $src0, sub0), (EXTRACT_SUBREG $src0, sub0), 0, 0)) +>; + +def : Pat < + (local_store i32:$src1, i64:$src0), + (DS_WRITE_B32 0, (EXTRACT_SUBREG $src0, sub0), $src1, $src1, 0, 0) +>; + /********** ================== **********/ /********** SMRD Patterns **********/ /********** ================== **********/ -- cgit v1.1 From 5fa2ba2769d31815f54ebf1af42f868f1486e4d0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 10 Jul 2013 16:38:35 +0000 Subject: Simplify code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186013 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 6d05ad2..0f83cdc 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -1858,9 +1858,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { if (!StopHere) BI->second.push_back(MI); } else { - SmallVector MIs; - MIs.push_back(MI); - Base2LdsMap[Base] = MIs; + Base2LdsMap[Base].push_back(MI); LdBases.push_back(Base); } } else { @@ -1876,9 +1874,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { if (!StopHere) BI->second.push_back(MI); } else { - SmallVector MIs; - MIs.push_back(MI); - Base2StsMap[Base] = MIs; + Base2StsMap[Base].push_back(MI); StBases.push_back(Base); } } -- cgit v1.1 From 80095e5f8e433b7a68fbad4ff460503fd06f84ef Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Wed, 10 Jul 2013 17:19:22 +0000 Subject: Replacing an empty switch with its moral equivalent. No functional changes intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186017 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp | 6 +----- lib/Target/R600/R600InstrInfo.cpp | 4 +--- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp index 5bc0668..052868e 100644 --- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp +++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp @@ -54,11 +54,7 @@ static MCSubtargetInfo *createMBlazeMCSubtargetInfo(StringRef TT, StringRef CPU, } static MCAsmInfo *createMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { - Triple TheTriple(TT); - switch (TheTriple.getOS()) { - default: - return new MBlazeMCAsmInfo(); - } + return new MBlazeMCAsmInfo(); } static MCCodeGenInfo *createMBlazeMCCodeGenInfo(StringRef TT, Reloc::Model RM, diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index d0935fa..1a07b05 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -114,9 +114,7 @@ bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const { } bool R600InstrInfo::isReductionOp(unsigned Opcode) const { - switch(Opcode) { - default: return false; - } + return false; } bool R600InstrInfo::isCubeOp(unsigned Opcode) const { -- cgit v1.1 From 3ee0673e4f5f0324ecd0a65507009b0748ed072c Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 11 Jul 2013 08:37:13 +0000 Subject: [SystemZ] Allow 8-bit operands to RISBG RISBG has three 8-bit operands (I3, I4 and I5). I'd originally restricted all three to 6 bits, since that's the only range we intended to use at the time. However, the top bit of I4 acts as a "zero" flag for RISBG, while the top bit of I3 acts as a "test" flag for RNSBG & co. This patch therefore allows them to have the full 8-bit range. I've left the fifth operand as a 6-bit value for now since the upper 2 bits have no defined meaning. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186070 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZInstrFormats.td | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index fb530cc..7300b90 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -1020,8 +1020,7 @@ multiclass CmpSwapRSPair rsOpcode, bits<16> rsyOpcode, class RotateSelectRIEf opcode, RegisterOperand cls1, RegisterOperand cls2> : InstRIEf { let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; -- cgit v1.1 From b3cabb44c32b5a3aba9b4d23aae9723d498ea7a9 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 11 Jul 2013 08:59:12 +0000 Subject: [SystemZ] Use zeroing form of RISBG for some AND sequences RISBG can handle some ANDs for which no AND IMMEDIATE exists. It also acts as a three-operand AND for some cases where an AND IMMEDIATE could be used instead. It might be worth adding a pass to replace RISBG with AND IMMEDIATE in cases where the register operands end up being the same and where AND IMMEDIATE is smaller. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186072 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 119 ++++++++++++++++++++++++++++- 1 file changed, 118 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 0891adc..5b1b77b 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -200,6 +200,16 @@ class SystemZDAGToDAGISel : public SelectionDAGISel { Addr, Base, Disp, Index); } + // Return an undefined i64 value. + SDValue getUNDEF64(SDLoc DL); + + // Convert N to VT, if it isn't already. + SDValue convertTo(SDLoc DL, EVT VT, SDValue N); + + // Try to use RISBG to implement ISD::AND node N. Return the selected + // node on success, otherwise return null. + SDNode *tryRISBGForAND(SDNode *N); + // If Op0 is null, then Node is a constant that can be loaded using: // // (Opcode UpperVal LowerVal) @@ -521,6 +531,107 @@ bool SystemZDAGToDAGISel::selectBDXAddr(SystemZAddressingMode::AddrForm Form, return true; } +// Return true if Mask matches the regexp 0*1+0*, given that zero masks +// have already been filtered out. Store the first set bit in LSB and +// the number of set bits in Length if so. +static bool isStringOfOnes(uint64_t Mask, unsigned &LSB, unsigned &Length) { + unsigned First = findFirstSet(Mask); + uint64_t Top = (Mask >> First) + 1; + if ((Top & -Top) == Top) + { + LSB = First; + Length = findFirstSet(Top); + return true; + } + return false; +} + +// Return a mask with Count low bits set. +static uint64_t allOnes(unsigned int Count) { + return Count == 0 ? 0 : (uint64_t(1) << (Count - 1) << 1) - 1; +} + +// Return true if RISBG can be used to extract the bits in Mask from +// a value that has BitSize bits. Store the start and end operands +// (I3 and I4) in Start and End if so. +static bool isRISBGMask(uint64_t Mask, unsigned BitSize, unsigned &Start, + unsigned &End) { + // Reject trivial all-zero and all-one masks. + uint64_t Used = allOnes(BitSize); + if (Mask == 0 || Mask == Used) + return false; + + // Handle the 1+0+ or 0+1+0* cases. Start then specifies the index of + // the msb and End specifies the index of the lsb. + unsigned LSB, Length; + if (isStringOfOnes(Mask, LSB, Length)) + { + Start = 63 - (LSB + Length - 1); + End = 63 - LSB; + return true; + } + + // Handle the wrap-around 1+0+1+ cases. Start then specifies the msb + // of the low 1s and End specifies the lsb of the high 1s. + if (isStringOfOnes(Mask ^ Used, LSB, Length)) + { + assert(LSB > 0 && "Bottom bit must be set"); + assert(LSB + Length < BitSize && "Top bit must be set"); + Start = 63 - (LSB - 1); + End = 63 - (LSB + Length); + return true; + } + + return false; +} + +SDValue SystemZDAGToDAGISel::getUNDEF64(SDLoc DL) { + SDNode *N = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64); + return SDValue(N, 0); +} + +SDValue SystemZDAGToDAGISel::convertTo(SDLoc DL, EVT VT, SDValue N) { + if (N.getValueType() == MVT::i32 && VT == MVT::i64) { + SDValue Index = CurDAG->getTargetConstant(SystemZ::subreg_32bit, MVT::i64); + SDNode *Insert = CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, + DL, VT, getUNDEF64(DL), N, Index); + return SDValue(Insert, 0); + } + if (N.getValueType() == MVT::i64 && VT == MVT::i32) { + SDValue Index = CurDAG->getTargetConstant(SystemZ::subreg_32bit, MVT::i64); + SDNode *Extract = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, + DL, VT, N, Index); + return SDValue(Extract, 0); + } + assert(N.getValueType() == VT && "Unexpected value types"); + return N; +} + +SDNode *SystemZDAGToDAGISel::tryRISBGForAND(SDNode *N) { + EVT VT = N->getValueType(0); + unsigned BitSize = VT.getSizeInBits(); + unsigned Start, End; + ConstantSDNode *MaskNode = + dyn_cast(N->getOperand(1).getNode()); + if (!MaskNode + || !isRISBGMask(MaskNode->getZExtValue(), BitSize, Start, End)) + return 0; + + // Prefer register extensions like LLC over RSIBG. + if ((Start == 32 || Start == 48 || Start == 56) && End == 63) + return 0; + + SDValue Ops[5] = { + getUNDEF64(SDLoc(N)), + convertTo(SDLoc(N), MVT::i64, N->getOperand(0)), + CurDAG->getTargetConstant(Start, MVT::i32), + CurDAG->getTargetConstant(End | 128, MVT::i32), + CurDAG->getTargetConstant(0, MVT::i32) + }; + N = CurDAG->getMachineNode(SystemZ::RISBG, SDLoc(N), MVT::i64, Ops); + return convertTo(SDLoc(N), VT, SDValue(N, 0)).getNode(); +} + SDNode *SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0, uint64_t UpperVal, uint64_t LowerVal) { @@ -590,6 +701,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { } unsigned Opcode = Node->getOpcode(); + SDNode *ResNode = 0; switch (Opcode) { case ISD::OR: case ISD::XOR: @@ -604,6 +716,10 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { } break; + case ISD::AND: + ResNode = tryRISBGForAND(Node); + break; + case ISD::Constant: // If this is a 64-bit constant that is out of the range of LLILF, // LLIHF and LGFI, split it into two 32-bit pieces. @@ -631,7 +747,8 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { } // Select the default instruction - SDNode *ResNode = SelectCode(Node); + if (!ResNode) + ResNode = SelectCode(Node); DEBUG(errs() << "=> "; if (ResNode == NULL || ResNode == Node) -- cgit v1.1 From 261e2877ebcb3c6139ddcc67992662494232b096 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 11 Jul 2013 09:10:09 +0000 Subject: [SystemZ] Use zeroing form of RISBG for shift-and-AND sequences Extend r186072 to handle shifts and ANDs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186073 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 71 +++++++++++++++++++++++++++--- 1 file changed, 66 insertions(+), 5 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 5b1b77b..39589f6 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -613,20 +613,81 @@ SDNode *SystemZDAGToDAGISel::tryRISBGForAND(SDNode *N) { unsigned Start, End; ConstantSDNode *MaskNode = dyn_cast(N->getOperand(1).getNode()); - if (!MaskNode - || !isRISBGMask(MaskNode->getZExtValue(), BitSize, Start, End)) + if (!MaskNode) return 0; + SDValue Input = N->getOperand(0); + uint64_t Mask = MaskNode->getZExtValue(); + if (!isRISBGMask(Mask, BitSize, Start, End)) { + APInt KnownZero, KnownOne; + CurDAG->ComputeMaskedBits(Input, KnownZero, KnownOne); + Mask |= KnownZero.getZExtValue(); + if (!isRISBGMask(Mask, BitSize, Start, End)) + return 0; + } + + unsigned Rotate = 0; + if (Input->getOpcode() == ISD::ROTL && BitSize == 64) { + // Any 64-bit rotate left can be merged into the RISBG. + if (ConstantSDNode *CountNode = + dyn_cast(Input.getOperand(1).getNode())) { + Rotate = CountNode->getZExtValue() & (BitSize - 1); + Input = Input->getOperand(0); + } + } else if (Input->getOpcode() == ISD::SHL) { + // Try to convert (and (shl X, count), mask) into + // (and (rotl X, count), mask&(~0<(Input.getOperand(1).getNode())) { + uint64_t Count = CountNode->getZExtValue(); + if (Count > 0 && + Count < BitSize && + isRISBGMask(Mask & (allOnes(BitSize - Count) << Count), + BitSize, Start, End)) { + Rotate = Count; + Input = Input->getOperand(0); + } + } + } else if (Input->getOpcode() == ISD::SRL) { + // Try to convert (and (srl X, count), mask) into + // (and (rotl X, size-count), mask&(~0>>count)), which is similar + // to SLL above. + if (ConstantSDNode *CountNode = + dyn_cast(Input.getOperand(1).getNode())) { + uint64_t Count = CountNode->getZExtValue(); + if (Count > 0 && + Count < BitSize && + isRISBGMask(Mask & allOnes(BitSize - Count), BitSize, Start, End)) { + Rotate = 64 - Count; + Input = Input->getOperand(0); + } + } + } else if (Start <= End && Input->getOpcode() == ISD::SRA) { + // Try to convert (and (sra X, count), mask) into + // (and (rotl X, size-count), mask). The mask must not include + // any sign bits. + if (ConstantSDNode *CountNode = + dyn_cast(Input.getOperand(1).getNode())) { + uint64_t Count = CountNode->getZExtValue(); + if (Count > 0 && Count < BitSize && Start >= 64 - (BitSize - Count)) { + Rotate = 64 - Count; + Input = Input->getOperand(0); + } + } + } + // Prefer register extensions like LLC over RSIBG. - if ((Start == 32 || Start == 48 || Start == 56) && End == 63) + if (Rotate == 0 && (Start == 32 || Start == 48 || Start == 56) && End == 63) return 0; SDValue Ops[5] = { getUNDEF64(SDLoc(N)), - convertTo(SDLoc(N), MVT::i64, N->getOperand(0)), + convertTo(SDLoc(N), MVT::i64, Input), CurDAG->getTargetConstant(Start, MVT::i32), CurDAG->getTargetConstant(End | 128, MVT::i32), - CurDAG->getTargetConstant(0, MVT::i32) + CurDAG->getTargetConstant(Rotate, MVT::i32) }; N = CurDAG->getMachineNode(SystemZ::RISBG, SDLoc(N), MVT::i64, Ops); return convertTo(SDLoc(N), VT, SDValue(N, 0)).getNode(); -- cgit v1.1 From 53c86db25b5b4e163c68dc91c8ce1bc8180e6ff3 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Thu, 11 Jul 2013 16:31:51 +0000 Subject: PPCDAGToDAGISel::isRunOfOnes should return false on zero This fixes a bug (found by csmith) at -O0 where we attempt to create a RLWIMI with an out-of-range operand. Most uses of the isRunOfOnes function are guarded by a condition that the value is not zero. This was not true in two places, and in both places a zero input would result in an out-of-rage MB value (= 32). To fix this, isRunOfOnes returns false on a zero input (and I've remove one now-redundant guard). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186101 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 901d369..475bde1 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -330,6 +330,9 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { } bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) { + if (!Val) + return false; + if (isShiftedMask_32(Val)) { // look for the first non-zero bit MB = countLeadingZeros(Val); @@ -435,7 +438,7 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) { } unsigned MB, ME; - if (InsertMask && isRunOfOnes(InsertMask, MB, ME)) { + if (isRunOfOnes(InsertMask, MB, ME)) { SDValue Tmp1, Tmp2; if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) && -- cgit v1.1 From e2ff00e117ba9b758b298e671f65c0b002f8a52d Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Thu, 11 Jul 2013 17:43:32 +0000 Subject: PPC: Add some missing V_SET0 patterns We had patterns to match v4i32 immAllZerosV -> V_SET0, but not patterns for v8i16 (which occurs in the test case) or v16i8. The same was true for V_SETALLONES (so I added the associated patterns for those as well). Another bug found by llvm-stress. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186108 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCInstrAltivec.td | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index 0976559..fdea51d 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -665,11 +665,24 @@ def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>; def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>; let isCodeGenOnly = 1 in { -def V_SET0 : VXForm_setzero<1220, (outs vrrc:$vD), (ins), +def V_SET0B : VXForm_setzero<1220, (outs vrrc:$vD), (ins), + "vxor $vD, $vD, $vD", VecFP, + [(set v16i8:$vD, (v16i8 immAllZerosV))]>; +def V_SET0H : VXForm_setzero<1220, (outs vrrc:$vD), (ins), + "vxor $vD, $vD, $vD", VecFP, + [(set v8i16:$vD, (v8i16 immAllZerosV))]>; +def V_SET0 : VXForm_setzero<1220, (outs vrrc:$vD), (ins), "vxor $vD, $vD, $vD", VecFP, [(set v4i32:$vD, (v4i32 immAllZerosV))]>; + let IMM=-1 in { -def V_SETALLONES : VXForm_3<908, (outs vrrc:$vD), (ins), +def V_SETALLONESB : VXForm_3<908, (outs vrrc:$vD), (ins), + "vspltisw $vD, -1", VecFP, + [(set v16i8:$vD, (v16i8 immAllOnesV))]>; +def V_SETALLONESH : VXForm_3<908, (outs vrrc:$vD), (ins), + "vspltisw $vD, -1", VecFP, + [(set v8i16:$vD, (v8i16 immAllOnesV))]>; +def V_SETALLONES : VXForm_3<908, (outs vrrc:$vD), (ins), "vspltisw $vD, -1", VecFP, [(set v4i32:$vD, (v4i32 immAllOnesV))]>; } -- cgit v1.1 From ac226bbf457f6b5e5210a4a82b1ce678298b2d89 Mon Sep 17 00:00:00 2001 From: Charles Davis Date: Fri, 12 Jul 2013 06:02:35 +0000 Subject: Target/X86: Add explicit Win64 and System V/x86-64 calling conventions. Summary: This patch adds explicit calling convention types for the Win64 and System V/x86-64 ABIs. This allows code to override the default, and use the Win64 convention on a target that wants to use SysV (and vice-versa). This is needed to implement the `ms_abi` and `sysv_abi` GNU attributes. Reviewers: CC: git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186144 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86CallingConv.td | 7 +++++++ lib/Target/X86/X86FastISel.cpp | 25 ++++++++++++++----------- lib/Target/X86/X86ISelLowering.cpp | 30 +++++++++++++++++------------- lib/Target/X86/X86Subtarget.h | 8 +++++++- 4 files changed, 45 insertions(+), 25 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index 9eafbd5..40c5d91 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -156,6 +156,11 @@ def RetCC_X86_32 : CallingConv<[ def RetCC_X86_64 : CallingConv<[ // HiPE uses RetCC_X86_64_HiPE CCIfCC<"CallingConv::HiPE", CCDelegateTo>, + + // Handle explicit CC selection + CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo>, + CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo>, + // Mingw64 and native Win64 use Win64 CC CCIfSubtarget<"isTargetWin64()", CCDelegateTo>, @@ -489,6 +494,8 @@ def CC_X86_32 : CallingConv<[ def CC_X86_64 : CallingConv<[ CCIfCC<"CallingConv::GHC", CCDelegateTo>, CCIfCC<"CallingConv::HiPE", CCDelegateTo>, + CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo>, + CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo>, // Mingw64 and native Win64 use Win64 CC CCIfSubtarget<"isTargetWin64()", CCDelegateTo>, diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index f8f06f6..9c91e93 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -712,10 +712,11 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { CallingConv::ID CC = F.getCallingConv(); if (CC != CallingConv::C && CC != CallingConv::Fast && - CC != CallingConv::X86_FastCall) + CC != CallingConv::X86_FastCall && + CC != CallingConv::X86_64_SysV) return false; - if (Subtarget->isTargetWin64()) + if (Subtarget->isCallingConvWin64(CC)) return false; // Don't handle popping bytes on return for now. @@ -1705,9 +1706,6 @@ bool X86FastISel::FastLowerArguments() { if (!FuncInfo.CanLowerReturn) return false; - if (Subtarget->isTargetWin64()) - return false; - const Function *F = FuncInfo.Fn; if (F->isVarArg()) return false; @@ -1715,7 +1713,10 @@ bool X86FastISel::FastLowerArguments() { CallingConv::ID CC = F->getCallingConv(); if (CC != CallingConv::C) return false; - + + if (Subtarget->isCallingConvWin64(CC)) + return false; + if (!Subtarget->is64Bit()) return false; @@ -1817,8 +1818,10 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { // Handle only C and fastcc calling conventions for now. ImmutableCallSite CS(CI); CallingConv::ID CC = CS.getCallingConv(); + bool isWin64 = Subtarget->isCallingConvWin64(CC); if (CC != CallingConv::C && CC != CallingConv::Fast && - CC != CallingConv::X86_FastCall) + CC != CallingConv::X86_FastCall && CC != CallingConv::X86_64_Win64 && + CC != CallingConv::X86_64_SysV) return false; // fastcc with -tailcallopt is intended to provide a guaranteed @@ -1832,7 +1835,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { // Don't know how to handle Win64 varargs yet. Nothing special needed for // x86-32. Special handling for x86-64 is implemented. - if (isVarArg && Subtarget->isTargetWin64()) + if (isVarArg && isWin64) return false; // Fast-isel doesn't know about callee-pop yet. @@ -1962,7 +1965,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { I->getParent()->getContext()); // Allocate shadow area for Win64 - if (Subtarget->isTargetWin64()) + if (isWin64) CCInfo.AllocateStack(32, 8); CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_X86); @@ -2078,7 +2081,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { X86::EBX).addReg(Base); } - if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) { + if (Subtarget->is64Bit() && isVarArg && !isWin64) { // Count the number of XMM registers allocated. static const uint16_t XMMArgRegs[] = { X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, @@ -2147,7 +2150,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { if (Subtarget->isPICStyleGOT()) MIB.addReg(X86::EBX, RegState::Implicit); - if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) + if (Subtarget->is64Bit() && isVarArg && !isWin64) MIB.addReg(X86::AL, RegState::Implicit); // Add implicit physical register uses to the call. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f00df35..6284dd7 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1880,13 +1880,19 @@ static bool IsTailCallConvention(CallingConv::ID CC) { CC == CallingConv::HiPE); } +/// \brief Return true if the calling convention is a C calling convention. +static bool IsCCallConvention(CallingConv::ID CC) { + return (CC == CallingConv::C || CC == CallingConv::X86_64_Win64 || + CC == CallingConv::X86_64_SysV); +} + bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls) return false; CallSite CS(CI); CallingConv::ID CalleeCC = CS.getCallingConv(); - if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C) + if (!IsTailCallConvention(CalleeCC) && !IsCCallConvention(CalleeCC)) return false; return true; @@ -1961,7 +1967,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, MachineFrameInfo *MFI = MF.getFrameInfo(); bool Is64Bit = Subtarget->is64Bit(); bool IsWindows = Subtarget->isTargetWindows(); - bool IsWin64 = Subtarget->isTargetWin64(); + bool IsWin64 = Subtarget->isCallingConvWin64(CallConv); assert(!(isVarArg && IsTailCallConvention(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"); @@ -1972,9 +1978,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, ArgLocs, *DAG.getContext()); // Allocate shadow area for Win64 - if (IsWin64) { + if (IsWin64) CCInfo.AllocateStack(32, 8); - } CCInfo.AnalyzeFormalArguments(Ins, CC_X86); @@ -2287,7 +2292,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, MachineFunction &MF = DAG.getMachineFunction(); bool Is64Bit = Subtarget->is64Bit(); - bool IsWin64 = Subtarget->isTargetWin64(); + bool IsWin64 = Subtarget->isCallingConvWin64(CallConv); bool IsWindows = Subtarget->isTargetWindows(); StructReturnType SR = callIsStructReturn(Outs); bool IsSibcall = false; @@ -2320,9 +2325,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, ArgLocs, *DAG.getContext()); // Allocate shadow area for Win64 - if (IsWin64) { + if (IsWin64) CCInfo.AllocateStack(32, 8); - } CCInfo.AnalyzeCallOperands(Outs, CC_X86); @@ -2833,13 +2837,12 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, SelectionDAG &DAG) const { - if (!IsTailCallConvention(CalleeCC) && - CalleeCC != CallingConv::C) + if (!IsTailCallConvention(CalleeCC) && !IsCCallConvention(CalleeCC)) return false; // If -tailcallopt is specified, make fastcc functions tail-callable. const MachineFunction &MF = DAG.getMachineFunction(); - const Function *CallerF = DAG.getMachineFunction().getFunction(); + const Function *CallerF = MF.getFunction(); // If the function return type is x86_fp80 and the callee return type is not, // then the FP_EXTEND of the call result is not a nop. It's not safe to @@ -2849,6 +2852,8 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, CallingConv::ID CallerCC = CallerF->getCallingConv(); bool CCMatch = CallerCC == CalleeCC; + bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC); + bool IsCallerWin64 = Subtarget->isCallingConvWin64(CallerCC); if (getTargetMachine().Options.GuaranteedTailCallOpt) { if (IsTailCallConvention(CalleeCC) && CCMatch) @@ -2882,7 +2887,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // Optimizing for varargs on Win64 is unlikely to be safe without // additional testing. - if (Subtarget->isTargetWin64()) + if (IsCalleeWin64 || IsCallerWin64) return false; SmallVector ArgLocs; @@ -2957,9 +2962,8 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, getTargetMachine(), ArgLocs, *DAG.getContext()); // Allocate shadow area for Win64 - if (Subtarget->isTargetWin64()) { + if (IsCalleeWin64) CCInfo.AllocateStack(32, 8); - } CCInfo.AnalyzeCallOperands(Outs, CC_X86); if (CCInfo.getNextStackOffset()) { diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 66832b9..01a28d0 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -338,7 +338,13 @@ public: } bool isPICStyleStubAny() const { return PICStyle == PICStyles::StubDynamicNoPIC || - PICStyle == PICStyles::StubPIC; } + PICStyle == PICStyles::StubPIC; + } + + bool isCallingConvWin64(CallingConv::ID CC) const { + return (isTargetWin64() && CC != CallingConv::X86_64_SysV) || + CC == CallingConv::X86_64_Win64; + } /// ClassifyGlobalReference - Classify a global variable reference for the /// current subtarget according to how we should reference it in a non-pcrel -- cgit v1.1 From 6cf3cfa0ab1da0c52730fec103bbc69eb0370081 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 12 Jul 2013 08:37:17 +0000 Subject: [SystemZ] Improve spilling of LGDR and LDGR If the source of these instructions is spilled we should load the destination. If the destination is spilled we should store the source. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186147 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZInstrInfo.cpp | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index cfd270a..bbac73f 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -368,6 +368,28 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, .getRegClass(MI->getOperand(OpNum).getReg())->getSize() && "Invalid size combination"); + unsigned Opcode = MI->getOpcode(); + if (Opcode == SystemZ::LGDR || Opcode == SystemZ::LDGR) { + bool Op0IsGPR = (Opcode == SystemZ::LGDR); + bool Op1IsGPR = (Opcode == SystemZ::LDGR); + // If we're spilling the destination of an LDGR or LGDR, store the + // source register instead. + if (OpNum == 0) { + unsigned StoreOpcode = Op1IsGPR ? SystemZ::STG : SystemZ::STD; + return BuildMI(MF, MI->getDebugLoc(), get(StoreOpcode)) + .addOperand(MI->getOperand(1)).addFrameIndex(FrameIndex) + .addImm(0).addReg(0); + } + // If we're spilling the source of an LDGR or LGDR, load the + // destination register instead. + if (OpNum == 1) { + unsigned LoadOpcode = Op0IsGPR ? SystemZ::LG : SystemZ::LD; + unsigned Dest = MI->getOperand(0).getReg(); + return BuildMI(MF, MI->getDebugLoc(), get(LoadOpcode), Dest) + .addFrameIndex(FrameIndex).addImm(0).addReg(0); + } + } + // Look for cases where the source of a simple store or the destination // of a simple load is being spilled. Try to use MVC instead. // @@ -399,7 +421,7 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // If the spilled operand is the final one, try to change R // into . - int MemOpcode = SystemZ::getMemOpcode(MI->getOpcode()); + int MemOpcode = SystemZ::getMemOpcode(Opcode); if (MemOpcode >= 0) { unsigned NumOps = MI->getNumExplicitOperands(); if (OpNum == NumOps - 1) { -- cgit v1.1 From 5e009541973b7935386055066689902aa7134e2d Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 12 Jul 2013 09:08:12 +0000 Subject: [SystemZ] Fix parsing of inline asm registers GPR and FPR constraints like "{r2}" and "{f2}" weren't handled correctly because the name-to-regno mapping depends on the value type and (because of that) the internal names in RegStrings are not the same as the AsmName. CC constraints like "{cc}" didn't work either because there was no associated register class. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186148 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelLowering.cpp | 42 ++++++++++++++++++++++++++++++ lib/Target/SystemZ/SystemZMCInstLower.cpp | 9 +++---- lib/Target/SystemZ/SystemZMCInstLower.h | 2 +- lib/Target/SystemZ/SystemZRegisterInfo.td | 4 ++- 4 files changed, 49 insertions(+), 8 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index d344134..a317f0c 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -386,6 +386,22 @@ getSingleConstraintMatchWeight(AsmOperandInfo &info, return weight; } +// Parse a "{tNNN}" register constraint for which the register type "t" +// has already been verified. MC is the class associated with "t" and +// Map maps 0-based register numbers to LLVM register numbers. +static std::pair +parseRegisterNumber(const std::string &Constraint, + const TargetRegisterClass *RC, const unsigned *Map) { + assert(*(Constraint.end()-1) == '}' && "Missing '}'"); + if (isdigit(Constraint[2])) { + std::string Suffix(Constraint.data() + 2, Constraint.size() - 2); + unsigned Index = atoi(Suffix.c_str()); + if (Index < 16 && Map[Index]) + return std::make_pair(Map[Index], RC); + } + return std::make_pair(0u, static_cast(0)); +} + std::pair SystemZTargetLowering:: getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const { if (Constraint.size() == 1) { @@ -415,6 +431,32 @@ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const { return std::make_pair(0U, &SystemZ::FP32BitRegClass); } } + if (Constraint[0] == '{') { + // We need to override the default register parsing for GPRs and FPRs + // because the interpretation depends on VT. The internal names of + // the registers are also different from the external names + // (F0D and F0S instead of F0, etc.). + if (Constraint[1] == 'r') { + if (VT == MVT::i32) + return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass, + SystemZMC::GR32Regs); + if (VT == MVT::i128) + return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass, + SystemZMC::GR128Regs); + return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass, + SystemZMC::GR64Regs); + } + if (Constraint[1] == 'f') { + if (VT == MVT::f32) + return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass, + SystemZMC::FP32Regs); + if (VT == MVT::f128) + return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass, + SystemZMC::FP128Regs); + return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass, + SystemZMC::FP64Regs); + } + } return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } diff --git a/lib/Target/SystemZ/SystemZMCInstLower.cpp b/lib/Target/SystemZ/SystemZMCInstLower.cpp index fd3f867..432a0d3 100644 --- a/lib/Target/SystemZ/SystemZMCInstLower.cpp +++ b/lib/Target/SystemZ/SystemZMCInstLower.cpp @@ -57,9 +57,6 @@ MCOperand SystemZMCInstLower::lowerOperand(const MachineOperand &MO) const { llvm_unreachable("unknown operand type"); case MachineOperand::MO_Register: - // Ignore all implicit register operands. - if (MO.isImplicit()) - return MCOperand(); return MCOperand::CreateReg(MO.getReg()); case MachineOperand::MO_Immediate: @@ -104,8 +101,8 @@ void SystemZMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.setOpcode(Opcode); for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { const MachineOperand &MO = MI->getOperand(I); - MCOperand MCOp = lowerOperand(MO); - if (MCOp.isValid()) - OutMI.addOperand(MCOp); + // Ignore all implicit register operands. + if (!MO.isReg() || !MO.isImplicit()) + OutMI.addOperand(lowerOperand(MO)); } } diff --git a/lib/Target/SystemZ/SystemZMCInstLower.h b/lib/Target/SystemZ/SystemZMCInstLower.h index afa72f3..db5bdb0 100644 --- a/lib/Target/SystemZ/SystemZMCInstLower.h +++ b/lib/Target/SystemZ/SystemZMCInstLower.h @@ -35,7 +35,7 @@ public: // Lower MachineInstr MI to MCInst OutMI. void lower(const MachineInstr *MI, MCInst &OutMI) const; - // Return an MCOperand for MO. Return an empty operand if MO is implicit. + // Return an MCOperand for MO. MCOperand lowerOperand(const MachineOperand& MO) const; // Return an MCOperand for MO, given that it equals Symbol + Offset. diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td index d65553e..ffffe72 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.td +++ b/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -147,5 +147,7 @@ defm FP128 : SystemZRegClass<"FP128", f128, 128, (add F0Q, F1Q, F4Q, F5Q, // Other registers //===----------------------------------------------------------------------===// -// The 2-bit condition code field of the PSW. +// The 2-bit condition code field of the PSW. Every register named in an +// inline asm needs a class associated with it. def CC : SystemZReg<"cc">; +def CCRegs : RegisterClass<"SystemZ", [i32], 32, (add CC)>; -- cgit v1.1 From 9bcad42c3aadab118b6ed5f30f2ea0d87228fd3f Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 12 Jul 2013 09:17:10 +0000 Subject: [SystemZ] Optimize sign-extends of vector setccs Normal (sext (setcc ...)) sequences are optimised into (select_cc ..., -1, 0) by DAGCombiner::visitSIGN_EXTEND. However, this is deliberately not done for vectors, and after vector type legalization we have (sext_inreg (setcc ...)) instead. I wondered about trying to extend DAGCombiner to handle this case too, but it seemed to be a loss on some other targets I tried, even those for which SETCC isn't "legal" and SELECT_CC is. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186149 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZInstrInfo.td | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 5e13c7f..c47e04b 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -1073,3 +1073,12 @@ def : Pat<(add GR64:$src1, imm64zx32n:$src2), (SLGFI GR64:$src1, imm64zx32n:$src2)>; def : Pat<(sub GR64:$src1, (zextloadi32 bdxaddr20only:$addr)), (SLGF GR64:$src1, bdxaddr20only:$addr)>; + +// Optimize sign-extended 1/0 selects to -1/0 selects. This is important +// for vector legalization. +def : Pat<(sra (shl (i32 (z_select_ccmask 1, 0, imm:$cc)), (i32 31)), (i32 31)), + (Select32 (LHI -1), (LHI 0), imm:$cc)>; +def : Pat<(sra (shl (i64 (anyext (i32 (z_select_ccmask 1, 0, imm:$cc)))), + (i32 63)), + (i32 63)), + (Select64 (LGHI -1), (LGHI 0), imm:$cc)>; -- cgit v1.1 From dd51a0c1e0b3cce8093244533b3505668d16f218 Mon Sep 17 00:00:00 2001 From: Vladimir Medic Date: Fri, 12 Jul 2013 09:25:35 +0000 Subject: Add support for Mips break and syscall insructions. The corresponding test cases are added. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186151 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsInstrFormats.td | 28 ++++++++++++++++++++++++++++ lib/Target/Mips/MipsInstrInfo.td | 21 +++++++++++++++++++++ 2 files changed, 49 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index 14cfcf9..6073476 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -491,6 +491,34 @@ class TEQ_FM funct> { } //===----------------------------------------------------------------------===// +// System calls format +//===----------------------------------------------------------------------===// + +class SYS_FM funct> +{ + bits<20> code_; + bits<32> Inst; + let Inst{31-26} = 0x0; + let Inst{25-6} = code_; + let Inst{5-0} = funct; +} + +//===----------------------------------------------------------------------===// +// Break instruction format +//===----------------------------------------------------------------------===// + +class BRK_FM funct> +{ + bits<10> code_1; + bits<10> code_2; + bits<32> Inst; + let Inst{31-26} = 0x0; + let Inst{25-16} = code_1; + let Inst{15-6} = code_2; + let Inst{5-0} = funct; +} + +//===----------------------------------------------------------------------===// // // FLOATING POINT INSTRUCTION FORMATS // diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index d2164f7..712e204 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -250,6 +250,12 @@ def simm16 : Operand { def simm20 : Operand { } +def uimm20 : Operand { +} + +def uimm10 : Operand { +} + def simm16_64 : Operand; def shamt : Operand; @@ -637,6 +643,14 @@ class BAL_FT : let hasDelaySlot = 1; let Defs = [RA]; } +// Syscall +class SYS_FT : + InstSE<(outs), (ins uimm20:$code_), + !strconcat(opstr, "\t$code_"), [], NoItinerary, FrmI>; +// Break +class BRK_FT : + InstSE<(outs), (ins uimm10:$code_1, uimm10:$code_2), + !strconcat(opstr, "\t$code_1, $code_2"), [], NoItinerary, FrmOther>; // Sync let hasSideEffects = 1 in @@ -941,6 +955,9 @@ defm SWR : StoreLeftRightM<"swr", MipsSWR, CPURegs>, LW_FM<0x2e>; def SYNC : SYNC_FT, SYNC_FM; def TEQ : TEQ_FT<"teq", CPURegsOpnd>, TEQ_FM<0x34>; +def BREAK : BRK_FT<"break">, BRK_FM<0xd>; +def SYSCALL : SYS_FT<"syscall">, SYS_FM<0xc>; + /// Load-linked, Store-conditional let Predicates = [NotN64, HasStdEnc] in { def LL : LLBase<"ll", CPURegsOpnd, mem>, LW_FM<0x30>; @@ -1119,6 +1136,10 @@ def : InstAlias<"bnez $rs,$offset", def : InstAlias<"beqz $rs,$offset", (BEQ CPURegsOpnd:$rs, ZERO, brtarget:$offset), 1>, Requires<[NotMips64]>; +def : InstAlias<"syscall", (SYSCALL 0), 1>; + +def : InstAlias<"break $imm", (BREAK uimm10:$imm, 0), 1>; +def : InstAlias<"break", (BREAK 0, 0), 1>; //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions //===----------------------------------------------------------------------===// -- cgit v1.1 From fff967358b56c4e191089f668b75ae415b5bd992 Mon Sep 17 00:00:00 2001 From: Stephen Lin Date: Fri, 12 Jul 2013 15:31:36 +0000 Subject: X86: fold SSE2/AVX2 logical shift by immediate amount into zero vector when possible Patch by Andrea Di Biagio git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186165 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6284dd7..95ca6c3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16321,6 +16321,38 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +/// \brief Returns a vector of 0s if the node in input is a vector logical +/// shift by a constant amount which is known to be bigger than or equal +/// to the vector element size in bits. +static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + + if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 && + (!Subtarget->hasInt256() || + (VT != MVT::v4i64 && VT != MVT::v8i32 && VT != MVT::v16i16))) + return SDValue(); + + SDValue Amt = N->getOperand(1); + SDLoc DL(N); + if (isSplatVector(Amt.getNode())) { + SDValue SclrAmt = Amt->getOperand(0); + if (ConstantSDNode *C = dyn_cast(SclrAmt)) { + APInt ShiftAmt = C->getAPIntValue(); + unsigned MaxAmount = VT.getVectorElementType().getSizeInBits(); + + // SSE2/AVX2 logical shifts always return a vector of 0s + // if the shift amount is bigger than or equal to + // the element size. The constant shift amount will be + // encoded as a 8-bit immediate. + if (ShiftAmt.trunc(8).uge(MaxAmount)) + return getZeroVector(VT, Subtarget, DAG, DL); + } + } + + return SDValue(); +} + /// PerformShiftCombine - Combine shifts. static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, @@ -16330,6 +16362,12 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, if (V.getNode()) return V; } + if (N->getOpcode() != ISD::SRA) { + // Try to fold this logical shift into a zero vector. + SDValue V = performShiftToAllZeros(N, DAG, Subtarget); + if (V.getNode()) return V; + } + return SDValue(); } -- cgit v1.1 From b619dd5d5b69ba9f4571a96e1a96d09d8aed03a7 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 12 Jul 2013 18:06:44 +0000 Subject: X86: Shrink certain forms of movsx. In particular: movsbw %al, %ax --> cbtw movswl %ax, %eax --> cwtl movslq %eax, %rax --> cltq According to Intel's manual those have the same performance characteristics but come with a smaller encoding. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186174 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86MCInstLower.cpp | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 3fea4cb..c7c00b5 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -254,6 +254,34 @@ static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) { Inst.addOperand(Saved); } +/// \brief If a movsx instruction has a shorter encoding for the used register +/// simplify the instruction to use it instead. +static void SimplifyMOVSX(MCInst &Inst) { + unsigned NewOpcode = 0; + unsigned Op0 = Inst.getOperand(0).getReg(), Op1 = Inst.getOperand(1).getReg(); + switch (Inst.getOpcode()) { + default: + llvm_unreachable("Unexpected instruction!"); + case X86::MOVSX16rr8: // movsbw %al, %ax --> cbtw + if (Op0 == X86::AX && Op1 == X86::AL) + NewOpcode = X86::CBW; + break; + case X86::MOVSX32rr16: // movswl %ax, %eax --> cwtl + if (Op0 == X86::EAX && Op1 == X86::AX) + NewOpcode = X86::CWDE; + break; + case X86::MOVSX64rr32: // movslq %eax, %rax --> cltq + if (Op0 == X86::RAX && Op1 == X86::EAX) + NewOpcode = X86::CDQE; + break; + } + + if (NewOpcode != 0) { + Inst = MCInst(); + Inst.setOpcode(NewOpcode); + } +} + /// \brief Simplify things like MOV32rm to MOV32o32a. static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst, unsigned Opcode) { @@ -557,6 +585,13 @@ ReSimplify: case X86::XOR32ri: SimplifyShortImmForm(OutMI, X86::XOR32i32); break; case X86::XOR64ri32: SimplifyShortImmForm(OutMI, X86::XOR64i32); break; + // Try to shrink some forms of movsx. + case X86::MOVSX16rr8: + case X86::MOVSX32rr16: + case X86::MOVSX64rr32: + SimplifyMOVSX(OutMI); + break; + case X86::MORESTACK_RET: OutMI.setOpcode(X86::RET); break; -- cgit v1.1 From 68e132866236f5d59271d2c7ffb77a9c8e743752 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 12 Jul 2013 18:14:56 +0000 Subject: R600/SI: Add initial double precision support for SI Patch by: Niels Ole Salscheider Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186177 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 6 ++++++ lib/Target/R600/SIISelLowering.cpp | 1 + lib/Target/R600/SIInstructions.td | 30 +++++++++++++++++++++++++++++- 3 files changed, 36 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 7fad3bb..9891ad3 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -60,12 +60,18 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::STORE, MVT::v4f32, Promote); AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32); + setOperationAction(ISD::STORE, MVT::f64, Promote); + AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64); + setOperationAction(ISD::LOAD, MVT::f32, Promote); AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32); setOperationAction(ISD::LOAD, MVT::v4f32, Promote); AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); + setOperationAction(ISD::LOAD, MVT::f64, Promote); + AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64); + setOperationAction(ISD::MUL, MVT::i64, Expand); setOperationAction(ISD::UDIV, MVT::i32, Expand); diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index a314bc4..4d0fdf3 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -45,6 +45,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : addRegisterClass(MVT::v2i32, &AMDGPU::VReg_64RegClass); addRegisterClass(MVT::v2f32, &AMDGPU::VReg_64RegClass); + addRegisterClass(MVT::f64, &AMDGPU::VReg_64RegClass); addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass); addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass); diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 5a1bf30..8436b67 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -663,7 +663,9 @@ defm V_RSQ_LEGACY_F32 : VOP1_32 < [(set f32:$dst, (int_AMDGPU_rsq f32:$src0))] >; defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>; -defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", []>; +defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", + [(set f64:$dst, (fdiv FP_ONE, f64:$src0))] +>; defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>; defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64", []>; defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64", []>; @@ -1008,10 +1010,25 @@ def V_LSHR_B64 : VOP3_64_Shift <0x00000162, "V_LSHR_B64", >; def V_ASHR_I64 : VOP3_64_Shift <0x00000163, "V_ASHR_I64", []>; +let isCommutable = 1 in { + def V_ADD_F64 : VOP3_64 <0x00000164, "V_ADD_F64", []>; def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>; def V_MIN_F64 : VOP3_64 <0x00000166, "V_MIN_F64", []>; def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>; + +} // isCommutable = 1 + +def : Pat < + (fadd f64:$src0, f64:$src1), + (V_ADD_F64 $src0, $src1, (i64 0)) +>; + +def : Pat < + (fmul f64:$src0, f64:$src1), + (V_MUL_F64 $src0, $src1, (i64 0)) +>; + def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>; let isCommutable = 1 in { @@ -1434,6 +1451,10 @@ def : BitConvert ; def : BitConvert ; def : BitConvert ; +def : BitConvert ; + +def : BitConvert ; + /********** =================== **********/ /********** Src & Dst modifiers **********/ /********** =================== **********/ @@ -1522,6 +1543,11 @@ def : Pat< (V_MUL_F32_e32 $src0, (V_RCP_F32_e32 $src1)) >; +def : Pat< + (fdiv f64:$src0, f64:$src1), + (V_MUL_F64 $src0, (V_RCP_F64_e32 $src1), (i64 0)) +>; + def : Pat < (fcos f32:$src0), (V_COS_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32 CONST.TWO_PI_INV))) @@ -1672,6 +1698,8 @@ multiclass MUBUFLoad_Pattern ; } +defm : MUBUFLoad_Pattern ; defm : MUBUFLoad_Pattern ; defm : MUBUFLoad_Pattern Date: Fri, 12 Jul 2013 18:15:02 +0000 Subject: R600/SI: SI support for 64bit ConstantFP Patch by: Niels Ole Salscheider Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186178 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstrInfo.td | 12 ++++++++++++ lib/Target/R600/SIInstructions.td | 7 +++++++ 2 files changed, 19 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 655a8b1..067a34b 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -21,11 +21,23 @@ def LO32 : SDNodeXFormgetTargetConstant(N->getZExtValue() & 0xffffffff, MVT::i32); }]>; +def LO32f : SDNodeXFormgetValueAPF().bitcastToAPInt().getZExtValue() & 0xffffffff; + float *fval = reinterpret_cast(&val); + return CurDAG->getTargetConstantFP(*fval, MVT::f32); +}]>; + // Transformation function, extract the upper 32bit of a 64bit immediate def HI32 : SDNodeXFormgetTargetConstant(N->getZExtValue() >> 32, MVT::i32); }]>; +def HI32f : SDNodeXFormgetValueAPF().bitcastToAPInt().getZExtValue() >> 32; + float *fval = reinterpret_cast(&val); + return CurDAG->getTargetConstantFP(*fval, MVT::f32); +}]>; + def IMM8bitDWORD : ImmLeaf < i32, [{ return (Imm & ~0x3FC) == 0; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 8436b67..3deaa2e 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1509,6 +1509,13 @@ def : Pat < (S_MOV_B32 (i32 (HI32 imm:$imm))), sub1) >; +def : Pat < + (f64 fpimm:$imm), + (INSERT_SUBREG (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + (V_MOV_B32_e32 (f32 (LO32f fpimm:$imm))), sub0), + (V_MOV_B32_e32 (f32 (HI32f fpimm:$imm))), sub1) +>; + /********** ===================== **********/ /********** Interpolation Paterns **********/ /********** ===================== **********/ -- cgit v1.1 From d2442c10f9bfe8a9d6cdcb28030d32deb5b192b1 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 12 Jul 2013 18:15:08 +0000 Subject: R600/SI: Add double precision fsub pattern for SI Patch by: Niels Ole Salscheider Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186179 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIISelLowering.cpp | 15 +++++++++++++++ lib/Target/R600/SIInstructions.td | 17 ++++++++++++++--- 2 files changed, 29 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 4d0fdf3..336bfbf 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -296,6 +296,21 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( MI->eraseFromParent(); break; } + case AMDGPU::V_SUB_F64: { + const SIInstrInfo *TII = + static_cast(getTargetMachine().getInstrInfo()); + BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64), + MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addReg(MI->getOperand(2).getReg()) + .addImm(0) /* src2 */ + .addImm(0) /* ABS */ + .addImm(0) /* CLAMP */ + .addImm(0) /* OMOD */ + .addImm(2); /* NEG */ + MI->eraseFromParent(); + break; + } } return BB; } diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 3deaa2e..eed4f7f 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1232,17 +1232,23 @@ def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST; } // Uses = [EXEC,VCC,M0], Defs = [EXEC,VCC,M0] -// This psuedo instruction takes a pointer as input and outputs a resource -// constant that can be used with the ADDR64 MUBUF instructions. - let usesCustomInserter = 1 in { +// This psuedo instruction takes a pointer as input and outputs a resource +// constant that can be used with the ADDR64 MUBUF instructions. def SI_ADDR64_RSRC : InstSI < (outs SReg_128:$srsrc), (ins SReg_64:$ptr), "", [] >; +def V_SUB_F64 : InstSI < + (outs VReg_64:$dst), + (ins VReg_64:$src0, VReg_64:$src1), + "V_SUB_F64 $dst, $src0, $src1", + [] +>; + } // end usesCustomInserter } // end IsCodeGenOnly, isPseudo @@ -1271,6 +1277,11 @@ def : Pat < $src0, $src1, $src2, $src3) >; +def : Pat < + (f64 (fsub f64:$src0, f64:$src1)), + (V_SUB_F64 $src0, $src1) +>; + /********** ======================= **********/ /********** Image sampling patterns **********/ /********** ======================= **********/ -- cgit v1.1 From 513fc45629bbd32f7c22544312d17eaec75505ea Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 12 Jul 2013 18:15:13 +0000 Subject: R600/SI: Add fsqrt pattern for SI Patch by: Niels Ole Salscheider Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186180 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstructions.td | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index eed4f7f..0f94164 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -669,8 +669,12 @@ defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>; defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64", []>; defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64", []>; -defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32", []>; -defm V_SQRT_F64 : VOP1_64 <0x00000034, "V_SQRT_F64", []>; +defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32", + [(set f32:$dst, (fsqrt f32:$src0))] +>; +defm V_SQRT_F64 : VOP1_64 <0x00000034, "V_SQRT_F64", + [(set f64:$dst, (fsqrt f64:$src0))] +>; defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32", []>; defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32", []>; defm V_NOT_B32 : VOP1_32 <0x00000037, "V_NOT_B32", []>; -- cgit v1.1 From 97781281ca824b4084632b23dbb1e71da9638876 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 12 Jul 2013 18:15:19 +0000 Subject: R600/SI: Implement select and compares for SI Patch by: Niels Ole Salscheider Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186181 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstructions.td | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 0f94164..ffa45c5 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -176,19 +176,19 @@ defm V_CMPX_TRU_F32 : VOPC_32 <0x0000001f, "V_CMPX_TRU_F32">; } // End hasSideEffects = 1, Defs = [EXEC] defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64">; -defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64">; -defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64">; -defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64">; -defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64">; +defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64", f64, COND_LT>; +defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64", f64, COND_EQ>; +defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64", f64, COND_LE>; +defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64", f64, COND_GT>; defm V_CMP_LG_F64 : VOPC_64 <0x00000025, "V_CMP_LG_F64">; -defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64">; +defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64", f64, COND_GE>; defm V_CMP_O_F64 : VOPC_64 <0x00000027, "V_CMP_O_F64">; defm V_CMP_U_F64 : VOPC_64 <0x00000028, "V_CMP_U_F64">; defm V_CMP_NGE_F64 : VOPC_64 <0x00000029, "V_CMP_NGE_F64">; defm V_CMP_NLG_F64 : VOPC_64 <0x0000002a, "V_CMP_NLG_F64">; defm V_CMP_NGT_F64 : VOPC_64 <0x0000002b, "V_CMP_NGT_F64">; defm V_CMP_NLE_F64 : VOPC_64 <0x0000002c, "V_CMP_NLE_F64">; -defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64">; +defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64", f64, COND_NE>; defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64">; defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64">; @@ -827,6 +827,18 @@ def : Pat < (V_CNDMASK_B32_e64 $src0, $src1, $src2) >; +//use two V_CNDMASK_B32_e64 instructions for f64 +def : Pat < + (f64 (select i1:$src2, f64:$src1, f64:$src0)), + (INSERT_SUBREG (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + (V_CNDMASK_B32_e64 (EXTRACT_SUBREG $src0, sub0), + (EXTRACT_SUBREG $src1, sub0), + $src2), sub0), + (V_CNDMASK_B32_e64 (EXTRACT_SUBREG $src0, sub1), + (EXTRACT_SUBREG $src1, sub1), + $src2), sub1) +>; + defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>; defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>; -- cgit v1.1 From c9c322cc39aab82be8c031a57ebcc6e1206246bd Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 12 Jul 2013 18:15:26 +0000 Subject: R600/SI: Add support for f64 kernel arguments Patch by: Niels Ole Salscheider Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186182 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUCallingConv.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUCallingConv.td b/lib/Target/R600/AMDGPUCallingConv.td index 826932b..29a0326 100644 --- a/lib/Target/R600/AMDGPUCallingConv.td +++ b/lib/Target/R600/AMDGPUCallingConv.td @@ -39,7 +39,7 @@ def CC_SI : CallingConv<[ // Calling convention for SI compute kernels def CC_SI_Kernel : CallingConv<[ CCIfType<[v4i32, v4f32], CCAssignToStack <16, 4>>, - CCIfType<[i64], CCAssignToStack < 8, 4>>, + CCIfType<[i64, f64], CCAssignToStack < 8, 4>>, CCIfType<[i32, f32], CCAssignToStack < 4, 4>>, CCIfType<[i16], CCAssignToStack < 2, 4>>, CCIfType<[i8], CCAssignToStack < 1, 4>> -- cgit v1.1 From c0a11edba6ea46c782672ab3fb4e4ab3dc267a22 Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Fri, 12 Jul 2013 19:16:02 +0000 Subject: TargetTransformInfo: address calculation parameter for gather/scather Address calculation for gather/scather in vectorized code can incur a significant cost making vectorization unbeneficial. Add infrastructure to add cost. Tests and cost model for targets will be in follow-up commits. radar://14351991 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186187 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMTargetTransformInfo.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 53ece66..79f56a4 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -124,7 +124,7 @@ public: unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const; - unsigned getAddressComputationCost(Type *Val) const; + unsigned getAddressComputationCost(Type *Val, bool IsComplex) const; unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Op1Info = OK_AnyValue, @@ -425,7 +425,7 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); } -unsigned ARMTTI::getAddressComputationCost(Type *Ty) const { +unsigned ARMTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { // In many cases the address computation is not merged into the instruction // addressing mode. return 1; -- cgit v1.1 From 4a1c764264a8908aa041acf12f68cd8bcc2037b1 Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Fri, 12 Jul 2013 19:16:04 +0000 Subject: ARM cost model: Add cost for gather/scather Fixes a 35% degradation compared to unvectorized code in MiBench/automotive-susan and an equally serious regression on a private image processing benchmark. radar://14351991 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186188 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMTargetTransformInfo.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 79f56a4..5cc64de 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -426,6 +426,15 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, } unsigned ARMTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { + // Address computations in vectorized code with non-consecutive addresses will + // likely result in more instructions compared to scalar code where the + // computation can more often be merged into the index mode. The resulting + // extra micro-ops can significantly decrease throughput. + unsigned NumVectorInstToHideOverhead = 10; + + if (Ty->isVectorTy() && IsComplex) + return NumVectorInstToHideOverhead; + // In many cases the address computation is not merged into the instruction // addressing mode. return 1; -- cgit v1.1 From 7251a75f6ee9ce38263be6580a235187475458ed Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Fri, 12 Jul 2013 19:16:07 +0000 Subject: X86 cost model: Add cost for vectorized gather/scather radar://14351991 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186189 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86TargetTransformInfo.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 68e1a67..3bbddad 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -100,6 +100,8 @@ public: unsigned Alignment, unsigned AddressSpace) const; + virtual unsigned getAddressComputationCost(Type *PtrTy, bool IsComplex) const; + /// @} }; @@ -598,3 +600,16 @@ unsigned X86TTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, return Cost; } + +unsigned X86TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { + // Address computations in vectorized code with non-consecutive addresses will + // likely result in more instructions compared to scalar code where the + // computation can more often be merged into the index mode. The resulting + // extra micro-ops can significantly decrease throughput. + unsigned NumVectorInstToHideOverhead = 10; + + if (Ty->isVectorTy() && IsComplex) + return NumVectorInstToHideOverhead; + + return TargetTransformInfo::getAddressComputationCost(Ty, IsComplex); +} -- cgit v1.1 From 795740b591331a0caf342edc4d572ec348415b40 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 12 Jul 2013 20:18:05 +0000 Subject: R600: Remove unsafe type punning. No intended functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186196 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstrInfo.td | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 067a34b..52af79c 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -22,9 +22,8 @@ def LO32 : SDNodeXForm; def LO32f : SDNodeXFormgetValueAPF().bitcastToAPInt().getZExtValue() & 0xffffffff; - float *fval = reinterpret_cast(&val); - return CurDAG->getTargetConstantFP(*fval, MVT::f32); + APInt V = N->getValueAPF().bitcastToAPInt().trunc(32); + return CurDAG->getTargetConstantFP(APFloat(APFloat::IEEEsingle, V), MVT::f32); }]>; // Transformation function, extract the upper 32bit of a 64bit immediate @@ -33,9 +32,8 @@ def HI32 : SDNodeXForm; def HI32f : SDNodeXFormgetValueAPF().bitcastToAPInt().getZExtValue() >> 32; - float *fval = reinterpret_cast(&val); - return CurDAG->getTargetConstantFP(*fval, MVT::f32); + APInt V = N->getValueAPF().bitcastToAPInt().lshr(32).trunc(32); + return CurDAG->getTargetConstantFP(APFloat(APFloat::IEEEsingle, V), MVT::f32); }]>; def IMM8bitDWORD : ImmLeaf < -- cgit v1.1 From adde9da01c78dd837a4e32217c9445c2c1aadb27 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Fri, 12 Jul 2013 22:08:24 +0000 Subject: Remove extraneous braces. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186212 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 7f52749..6064813 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -821,22 +821,19 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { switch (Opcode) { default: break; - case Instruction::BitCast: { + case Instruction::BitCast: // Look through bitcasts. return ARMComputeAddress(U->getOperand(0), Addr); - } - case Instruction::IntToPtr: { + case Instruction::IntToPtr: // Look past no-op inttoptrs. if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) return ARMComputeAddress(U->getOperand(0), Addr); break; - } - case Instruction::PtrToInt: { + case Instruction::PtrToInt: // Look past no-op ptrtoints. if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) return ARMComputeAddress(U->getOperand(0), Addr); break; - } case Instruction::GetElementPtr: { Address SavedAddr = Addr; int TmpOffset = Addr.Offset; -- cgit v1.1 From ae24f7d3c6770fb32eb1f6215bab1fc92cbe2d94 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Fri, 12 Jul 2013 22:43:20 +0000 Subject: [mips] Add instruction itinerary classes for mult, seb and slt instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186222 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips64InstrInfo.td | 8 ++++---- lib/Target/Mips/MipsInstrInfo.td | 18 +++++++++--------- lib/Target/Mips/MipsSchedule.td | 3 +++ 3 files changed, 16 insertions(+), 13 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index b55679e..2ab99a1 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -181,14 +181,14 @@ def TAILCALL64_R : JumpFR, MTLO_FM<8>, IsTailCall; let DecoderNamespace = "Mips64" in { /// Multiply and Divide Instructions. -def DMULT : Mult<"dmult", IIImul, CPU64RegsOpnd, [HI64, LO64]>, +def DMULT : Mult<"dmult", IIImult, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1c>; -def DMULTu : Mult<"dmultu", IIImul, CPU64RegsOpnd, [HI64, LO64]>, +def DMULTu : Mult<"dmultu", IIImult, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1d>; def PseudoDMULT : MultDivPseudo; + IIImult>; def PseudoDMULTu : MultDivPseudo; + IIImult>; def DSDIV : Div<"ddiv", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1e>; def DUDIV : Div<"ddivu", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1f>; def PseudoDSDIV : MultDivPseudo : InstSE<(outs), (ins CPURegsOpnd:$rs, CPURegsOpnd:$rt), - !strconcat(opstr, "\t$rs, $rt"), [], IIImul, FrmR> { + !strconcat(opstr, "\t$rs, $rt"), [], IIImult, FrmR> { let Defs = [HI, LO]; let Uses = [HI, LO]; let isCommutable = isComm; @@ -560,14 +560,14 @@ class SetCC_R : InstSE<(outs CPURegsOpnd:$rd), (ins RC:$rs, RC:$rt), !strconcat(opstr, "\t$rd, $rs, $rt"), [(set CPURegsOpnd:$rd, (cond_op RC:$rs, RC:$rt))], - IIAlu, FrmR, opstr>; + IIslt, FrmR, opstr>; class SetCC_I: InstSE<(outs CPURegsOpnd:$rt), (ins RC:$rs, Od:$imm16), !strconcat(opstr, "\t$rt, $rs, $imm16"), [(set CPURegsOpnd:$rt, (cond_op RC:$rs, imm_type:$imm16))], - IIAlu, FrmI, opstr>; + IIslt, FrmI, opstr>; // Jump class JumpFJ (ins CPURegsOpnd:$rs, CPURegsOpnd:$rt, ACRegs:$acin), [(set ACRegs:$ac, (OpNode CPURegsOpnd:$rs, CPURegsOpnd:$rt, ACRegs:$acin))], - IIImul>, + IIImult>, PseudoInstExpansion<(RealInst CPURegsOpnd:$rs, CPURegsOpnd:$rt)> { string Constraints = "$acin = $ac"; } @@ -741,7 +741,7 @@ class CountLeading1: // Sign Extend in Register. class SignExtInReg : InstSE<(outs RC:$rd), (ins RC:$rt), !strconcat(opstr, "\t$rd, $rt"), - [(set RC:$rd, (sext_inreg RC:$rt, vt))], NoItinerary, FrmR> { + [(set RC:$rd, (sext_inreg RC:$rt, vt))], IIseb, FrmR> { let Predicates = [HasSEInReg, HasStdEnc]; } @@ -1015,12 +1015,12 @@ let Uses = [V0, V1], isTerminator = 1, isReturn = 1, isBarrier = 1 in { } /// Multiply and Divide Instructions. -def MULT : MMRel, Mult<"mult", IIImul, CPURegsOpnd, [HI, LO]>, +def MULT : MMRel, Mult<"mult", IIImult, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x18>; -def MULTu : MMRel, Mult<"multu", IIImul, CPURegsOpnd, [HI, LO]>, +def MULTu : MMRel, Mult<"multu", IIImult, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x19>; -def PseudoMULT : MultDivPseudo; -def PseudoMULTu : MultDivPseudo; +def PseudoMULT : MultDivPseudo; +def PseudoMULTu : MultDivPseudo; def SDIV : Div<"div", IIIdiv, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x1a>; def UDIV : Div<"divu", IIIdiv, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x1b>; def PseudoSDIV : MultDivPseudo Date: Fri, 12 Jul 2013 23:33:03 +0000 Subject: Fix ARM paired GPR COPY lowering ARM paired GPR COPY was being lowered to two MOVr without CC. This patch puts the CC back. My test is a reduction of the case where I encountered the issue, 64-bit atomics use paired GPRs. The issue only occurs with selectionDAG, FastISel doesn't encounter it so I didn't bother calling it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186226 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseInstrInfo.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 5283d7b..d670178 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -745,6 +745,9 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (Opc == ARM::VORRq) Mov.addReg(Src); Mov = AddDefaultPred(Mov); + // MOVr can set CC. + if (Opc == ARM::MOVr) + Mov = AddDefaultCC(Mov); } // Add implicit super-register defs and kills to the last instruction. Mov->addRegisterDefined(DestReg, TRI); -- cgit v1.1 From 434c0bd2a5c3c7ebaa8ca10dd7e4fdb1f25e92a0 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Fri, 12 Jul 2013 23:33:22 +0000 Subject: [mips] Implement MipsTargetMachine::getInstrItineraryData(). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186227 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsSubtarget.h | 1 + lib/Target/Mips/MipsTargetMachine.cpp | 4 ++-- lib/Target/Mips/MipsTargetMachine.h | 6 ++++++ 3 files changed, 9 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index ef7568a..bfb13bb 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -194,6 +194,7 @@ public: bool hasBitCount() const { return HasBitCount; } bool hasFPIdx() const { return HasFPIdx; } + const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } bool allowMixed16_32() const { return inMips16ModeDefault() | AllowMixed16_32;} diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 9af2f1b..ced6a09 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -70,8 +70,8 @@ MipsTargetMachine(const Target &T, StringRef TT, "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32-S64")), InstrInfo(MipsInstrInfo::create(*this)), FrameLowering(MipsFrameLowering::create(*this, Subtarget)), - TLInfo(MipsTargetLowering::create(*this)), - TSInfo(*this), JITInfo() { + TLInfo(MipsTargetLowering::create(*this)), TSInfo(*this), + InstrItins(Subtarget.getInstrItineraryData()), JITInfo() { initAsmInfo(); } diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index ee55708..5a9a11d 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -44,6 +44,7 @@ class MipsTargetMachine : public LLVMTargetMachine { OwningPtr FrameLoweringSE; OwningPtr TLInfoSE; MipsSelectionDAGInfo TSInfo; + const InstrItineraryData &InstrItins; MipsJITInfo JITInfo; public: @@ -65,6 +66,11 @@ public: { return &Subtarget; } virtual const DataLayout *getDataLayout() const { return &DL;} + + virtual const InstrItineraryData *getInstrItineraryData() const { + return Subtarget.inMips16Mode() ? 0 : &InstrItins; + } + virtual MipsJITInfo *getJITInfo() { return &JITInfo; } -- cgit v1.1 From 0a14e7123269ffc84b26d87676ddce1afc335f02 Mon Sep 17 00:00:00 2001 From: Joerg Sonnenberger Date: Sat, 13 Jul 2013 17:59:55 +0000 Subject: Reduce large list of macros to the primary platform macros. Distingiush between ELF (Linux, FreeBSD, NetBSD) and OSX as platform for the assembler dialect. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186252 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCJITInfo.cpp | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp index cfcd749..9f4525b 100644 --- a/lib/Target/PowerPC/PPCJITInfo.cpp +++ b/lib/Target/PowerPC/PPCJITInfo.cpp @@ -71,8 +71,11 @@ static void EmitBranchToAt(uint64_t At, uint64_t To, bool isCall, bool is64Bit){ extern "C" void PPC32CompilationCallback(); extern "C" void PPC64CompilationCallback(); -#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \ - !(defined(__ppc64__) || defined(__FreeBSD__)) +#if !defined(__ppc__) || defined(__ppc64__) +void PPC32CompilationCallback() { + llvm_unreachable("This is not a 32bit PowerPC, you can't execute this!"); +} +#elif !defined(__ELF__) // CompilationCallback stub - We can't use a C function with inline assembly in // it, because we the prolog/epilog inserted by GCC won't work for us. Instead, // write our own wrapper, which does things our way, so we have complete control @@ -137,8 +140,8 @@ asm( "bctr\n" ); -#elif defined(__PPC__) && !defined(__ppc64__) -// Linux & FreeBSD / PPC 32 support +#else +// ELF PPC 32 support // CompilationCallback stub - We can't use a C function with inline assembly in // it, because we the prolog/epilog inserted by GCC won't work for us. Instead, @@ -197,15 +200,14 @@ asm( "mtlr 0\n" "bctr\n" ); -#else -void PPC32CompilationCallback() { - llvm_unreachable("This is not a power pc, you can't execute this!"); -} #endif -#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \ - defined(__ppc64__) -#ifdef __ELF__ +#ifndef __ppc64__ +void PPC64CompilationCallback() { + llvm_unreachable("This is not a 64bit PowerPC, you can't execute this!"); +} +#else +# ifdef __ELF__ asm( ".text\n" ".align 2\n" @@ -219,13 +221,13 @@ asm( ".align 4\n" ".type PPC64CompilationCallback,@function\n" ".L.PPC64CompilationCallback:\n" -#else +# else asm( ".text\n" ".align 2\n" ".globl _PPC64CompilationCallback\n" "_PPC64CompilationCallback:\n" -#endif +# endif // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the // FIXME: need to save v[0-19] for altivec? // Set up a proper stack frame @@ -258,12 +260,12 @@ asm( "ld 5, 280(1)\n" // stub's frame "ld 4, 16(5)\n" // stub's lr "li 5, 1\n" // 1 == 64 bit -#ifdef __ELF__ +# ifdef __ELF__ "bl LLVMPPCCompilationCallback\n" "nop\n" -#else +# else "bl _LLVMPPCCompilationCallback\n" -#endif +# endif "mtctr 3\n" // Restore all int arg registers "ld 10, 272(1)\n" "ld 9, 264(1)\n" @@ -285,10 +287,6 @@ asm( // XXX: any special TOC handling in the ELF case for JIT? "bctr\n" ); -#else -void PPC64CompilationCallback() { - llvm_unreachable("This is not a power pc, you can't execute this!"); -} #endif extern "C" { -- cgit v1.1 From a0ec3f9b7b826b9b40b80199923b664bad808cce Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 14 Jul 2013 04:42:23 +0000 Subject: Use SmallVectorImpl& instead of SmallVector to avoid repeating small vector size. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186274 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 6 +++--- lib/Target/ARM/ARMISelLowering.cpp | 10 +++++----- lib/Target/ARM/ARMISelLowering.h | 2 +- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 14 +++++++------- lib/Target/Hexagon/HexagonCallingConvLower.cpp | 2 +- lib/Target/Hexagon/HexagonCallingConvLower.h | 4 ++-- lib/Target/Hexagon/HexagonHardwareLoops.cpp | 8 ++++---- lib/Target/Hexagon/HexagonISelLowering.cpp | 8 ++++---- lib/Target/MBlaze/MBlazeFrameLowering.cpp | 4 ++-- lib/Target/MBlaze/MBlazeISelLowering.cpp | 6 +++--- lib/Target/MBlaze/MBlazeMachineFunction.h | 2 +- lib/Target/MSP430/MSP430ISelLowering.cpp | 6 +++--- lib/Target/Mips/MipsISelLowering.cpp | 8 ++++---- lib/Target/Mips/MipsISelLowering.h | 2 +- lib/Target/NVPTX/NVPTXISelLowering.cpp | 6 +++--- lib/Target/PowerPC/PPCFrameLowering.cpp | 4 ++-- lib/Target/PowerPC/PPCISelLowering.cpp | 24 ++++++++++++------------ lib/Target/PowerPC/PPCMachineFunctionInfo.h | 2 +- lib/Target/R600/AMDILCFGStructurizer.cpp | 8 ++++---- lib/Target/R600/R600EmitClauseMarkers.cpp | 2 +- lib/Target/R600/R600InstrInfo.cpp | 2 +- lib/Target/Sparc/SparcISelLowering.cpp | 6 +++--- lib/Target/SystemZ/SystemZISelLowering.cpp | 6 +++--- lib/Target/X86/X86ISelLowering.cpp | 8 ++++---- lib/Target/XCore/XCoreISelLowering.cpp | 8 ++++---- 25 files changed, 79 insertions(+), 79 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 1fa1edb..777f4dd 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1079,9 +1079,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; SDLoc &dl = CLI.DL; - SmallVector &Outs = CLI.Outs; - SmallVector &OutVals = CLI.OutVals; - SmallVector &Ins = CLI.Ins; + SmallVectorImpl &Outs = CLI.Outs; + SmallVectorImpl &OutVals = CLI.OutVals; + SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &IsTailCall = CLI.IsTailCall; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 8c4a3f1..cc323f68 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -74,7 +74,7 @@ namespace { class ARMCCState : public CCState { public: ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, - const TargetMachine &TM, SmallVector &locs, + const TargetMachine &TM, SmallVectorImpl &locs, LLVMContext &C, ParmContext PC) : CCState(CC, isVarArg, MF, TM, locs, C) { assert(((PC == Call) || (PC == Prologue)) && @@ -1330,7 +1330,7 @@ void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG, RegsToPassVector &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, SDValue &StackPtr, - SmallVector &MemOpChains, + SmallVectorImpl &MemOpChains, ISD::ArgFlagsTy Flags) const { SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, @@ -1358,9 +1358,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; SDLoc &dl = CLI.DL; - SmallVector &Outs = CLI.Outs; - SmallVector &OutVals = CLI.OutVals; - SmallVector &Ins = CLI.Ins; + SmallVectorImpl &Outs = CLI.Outs; + SmallVectorImpl &OutVals = CLI.OutVals; + SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &isTailCall = CLI.IsTailCall; diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 6593777..ed6c405 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -417,7 +417,7 @@ namespace llvm { RegsToPassVector &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, SDValue &StackPtr, - SmallVector &MemOpChains, + SmallVectorImpl &MemOpChains, ISD::ArgFlagsTy Flags) const; SDValue GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 0f83cdc..1803a8a 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -109,12 +109,12 @@ namespace { unsigned PredReg, unsigned Scratch, DebugLoc dl, - SmallVector &Merges); + SmallVectorImpl &Merges); void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, int Opcode, unsigned Size, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, MemOpQueue &MemOps, - SmallVector &Merges); + SmallVectorImpl &Merges); void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); bool FixInvalidRegPairOp(MachineBasicBlock &MBB, @@ -371,7 +371,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, DebugLoc dl, - SmallVector &Merges) { + SmallVectorImpl &Merges) { // First calculate which of the registers should be killed by the merged // instruction. const unsigned insertPos = memOps[insertAfter].Position; @@ -444,10 +444,10 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, /// load / store multiple instructions. void ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, - unsigned Base, int Opcode, unsigned Size, - ARMCC::CondCodes Pred, unsigned PredReg, - unsigned Scratch, MemOpQueue &MemOps, - SmallVector &Merges) { + unsigned Base, int Opcode, unsigned Size, + ARMCC::CondCodes Pred, unsigned PredReg, + unsigned Scratch, MemOpQueue &MemOps, + SmallVectorImpl &Merges) { bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); int Offset = MemOps[SIndex].Offset; int SOffset = Offset; diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.cpp b/lib/Target/Hexagon/HexagonCallingConvLower.cpp index fc5503a..f5f958c 100644 --- a/lib/Target/Hexagon/HexagonCallingConvLower.cpp +++ b/lib/Target/Hexagon/HexagonCallingConvLower.cpp @@ -25,7 +25,7 @@ using namespace llvm; Hexagon_CCState::Hexagon_CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &tm, - SmallVector &locs, + SmallVectorImpl &locs, LLVMContext &c) : CallingConv(CC), IsVarArg(isVarArg), TM(tm), Locs(locs), Context(c) { // No stack is used. diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.h b/lib/Target/Hexagon/HexagonCallingConvLower.h index eed99f4..33c8306 100644 --- a/lib/Target/Hexagon/HexagonCallingConvLower.h +++ b/lib/Target/Hexagon/HexagonCallingConvLower.h @@ -48,14 +48,14 @@ class Hexagon_CCState { CallingConv::ID CallingConv; bool IsVarArg; const TargetMachine &TM; - SmallVector &Locs; + SmallVectorImpl &Locs; LLVMContext &Context; unsigned StackOffset; SmallVector UsedRegs; public: Hexagon_CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &TM, - SmallVector &locs, LLVMContext &c); + SmallVectorImpl &locs, LLVMContext &c); void addLoc(const CCValAssign &V) { Locs.push_back(V); diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp index d002788..3c4ca0f 100644 --- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -134,7 +134,7 @@ namespace { /// has a computable trip count and, if so, return a value that represents /// the trip count expression. CountValue *getLoopTripCount(MachineLoop *L, - SmallVector &OldInsts); + SmallVectorImpl &OldInsts); /// \brief Return the expression that represents the number of times /// a loop iterates. The function takes the operands that represent the @@ -164,7 +164,7 @@ namespace { /// \brief Return true if the instruction is now dead. bool isDead(const MachineInstr *MI, - SmallVector &DeadPhis) const; + SmallVectorImpl &DeadPhis) const; /// \brief Remove the instruction if it is now dead. void removeIfDead(MachineInstr *MI); @@ -428,7 +428,7 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L, /// induction variable patterns that are used in the calculation for /// the number of time the loop is executed. CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, - SmallVector &OldInsts) { + SmallVectorImpl &OldInsts) { MachineBasicBlock *TopMBB = L->getTopBlock(); MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(); assert(PI != TopMBB->pred_end() && @@ -890,7 +890,7 @@ bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const { /// for inline asm, physical registers and instructions with side effects /// removed. bool HexagonHardwareLoops::isDead(const MachineInstr *MI, - SmallVector &DeadPhis) const { + SmallVectorImpl &DeadPhis) const { // Examine each operand. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 85e1045..6cb126f 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -382,10 +382,10 @@ SDValue HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; - SDLoc &dl = CLI.DL; - SmallVector &Outs = CLI.Outs; - SmallVector &OutVals = CLI.OutVals; - SmallVector &Ins = CLI.Ins; + SDLoc &dl = CLI.DL; + SmallVectorImpl &Outs = CLI.Outs; + SmallVectorImpl &OutVals = CLI.OutVals; + SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &isTailCall = CLI.IsTailCall; diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/lib/Target/MBlaze/MBlazeFrameLowering.cpp index 786eeee..f7a8831 100644 --- a/lib/Target/MBlaze/MBlazeFrameLowering.cpp +++ b/lib/Target/MBlaze/MBlazeFrameLowering.cpp @@ -38,8 +38,8 @@ static cl::opt MBDisableStackAdjust( cl::desc("Disable MBlaze stack layout adjustment."), cl::Hidden); -static void replaceFrameIndexes(MachineFunction &MF, - SmallVector, 16> &FR) { +static void replaceFrameIndexes(MachineFunction &MF, + SmallVectorImpl > &FR) { MachineFrameInfo *MFI = MF.getFrameInfo(); MBlazeFunctionInfo *MBlazeFI = MF.getInfo(); const SmallVectorImpl >::iterator FRB = FR.begin(); diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp index 8046da2..54df965 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -687,9 +687,9 @@ LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; SDLoc dl = CLI.DL; - SmallVector &Outs = CLI.Outs; - SmallVector &OutVals = CLI.OutVals; - SmallVector &Ins = CLI.Ins; + SmallVectorImpl &Outs = CLI.Outs; + SmallVectorImpl &OutVals = CLI.OutVals; + SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &isTailCall = CLI.IsTailCall; diff --git a/lib/Target/MBlaze/MBlazeMachineFunction.h b/lib/Target/MBlaze/MBlazeMachineFunction.h index 10d507f..43240eb 100644 --- a/lib/Target/MBlaze/MBlazeMachineFunction.h +++ b/lib/Target/MBlaze/MBlazeMachineFunction.h @@ -118,7 +118,7 @@ public: return false; } - const SmallVector& getLiveIn() const { return LiveInFI; } + const SmallVectorImpl &getLiveIn() const { return LiveInFI; } void recordReplacement(int OFI, int NFI) { FIReplacements.insert(std::make_pair(OFI,NFI)); diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 168e3f1..b144164 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -279,9 +279,9 @@ MSP430TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; SDLoc &dl = CLI.DL; - SmallVector &Outs = CLI.Outs; - SmallVector &OutVals = CLI.OutVals; - SmallVector &Ins = CLI.Ins; + SmallVectorImpl &Outs = CLI.Outs; + SmallVectorImpl &OutVals = CLI.OutVals; + SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &isTailCall = CLI.IsTailCall; diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 5fd50fd..ffa077f 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -2323,9 +2323,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; SDLoc DL = CLI.DL; - SmallVector &Outs = CLI.Outs; - SmallVector &OutVals = CLI.OutVals; - SmallVector &Ins = CLI.Ins; + SmallVectorImpl &Outs = CLI.Outs; + SmallVectorImpl &OutVals = CLI.OutVals; + SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &IsTailCall = CLI.IsTailCall; @@ -3383,7 +3383,7 @@ copyByValRegs(SDValue Chain, SDLoc DL, std::vector &OutChains, void MipsTargetLowering:: passByValArg(SDValue Chain, SDLoc DL, std::deque< std::pair > &RegsToPass, - SmallVector &MemOpChains, SDValue StackPtr, + SmallVectorImpl &MemOpChains, SDValue StackPtr, MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, const MipsCC &CC, const ByValArgInfo &ByVal, const ISD::ArgFlagsTy &Flags, bool isLittle) const { diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 6103db5..123a2a6 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -386,7 +386,7 @@ namespace llvm { /// passByValArg - Pass a byval argument in registers or on stack. void passByValArg(SDValue Chain, SDLoc DL, std::deque< std::pair > &RegsToPass, - SmallVector &MemOpChains, SDValue StackPtr, + SmallVectorImpl &MemOpChains, SDValue StackPtr, MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, const MipsCC &CC, const ByValArgInfo &ByVal, const ISD::ArgFlagsTy &Flags, bool isLittle) const; diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index d4cc31b..c89c2fc 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -493,9 +493,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; SDLoc dl = CLI.DL; - SmallVector &Outs = CLI.Outs; - SmallVector &OutVals = CLI.OutVals; - SmallVector &Ins = CLI.Ins; + SmallVectorImpl &Outs = CLI.Outs; + SmallVectorImpl &OutVals = CLI.OutVals; + SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &isTailCall = CLI.IsTailCall; diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index a19ce23..8e33830 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -369,7 +369,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { // Check if the link register (LR) must be saved. PPCFunctionInfo *FI = MF.getInfo(); bool MustSaveLR = FI->mustSaveLR(); - const SmallVector &MustSaveCRs = FI->getMustSaveCRs(); + const SmallVectorImpl &MustSaveCRs = FI->getMustSaveCRs(); // Do we have a frame pointer for this function? bool HasFP = hasFP(MF); @@ -642,7 +642,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, // Check if the link register (LR) has been saved. PPCFunctionInfo *FI = MF.getInfo(); bool MustSaveLR = FI->mustSaveLR(); - const SmallVector &MustSaveCRs = FI->getMustSaveCRs(); + const SmallVectorImpl &MustSaveCRs = FI->getMustSaveCRs(); // Do we have a frame pointer for this function? bool HasFP = hasFP(MF); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index b39f0d5..a38201a 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -2957,8 +2957,8 @@ struct TailCallArgumentInfo { static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, - const SmallVector &TailCallArgs, - SmallVector &MemOpChains, + const SmallVectorImpl &TailCallArgs, + SmallVectorImpl &MemOpChains, SDLoc dl) { for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { SDValue Arg = TailCallArgs[i].Arg; @@ -3016,7 +3016,7 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, - SmallVector& TailCallArguments) { + SmallVectorImpl& TailCallArguments) { int Offset = ArgOffset + SPDiff; uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8; int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); @@ -3081,8 +3081,8 @@ static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, - bool isVector, SmallVector &MemOpChains, - SmallVector &TailCallArguments, + bool isVector, SmallVectorImpl &MemOpChains, + SmallVectorImpl &TailCallArguments, SDLoc dl) { EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); if (!isTailCall) { @@ -3106,7 +3106,7 @@ static void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, bool isDarwinABI, - SmallVector &TailCallArguments) { + SmallVectorImpl &TailCallArguments) { MachineFunction &MF = DAG.getMachineFunction(); // Emit a sequence of copyto/copyfrom virtual registers for arguments that @@ -3133,8 +3133,8 @@ void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, static unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall, - SmallVector, 8> &RegsToPass, - SmallVector &Ops, std::vector &NodeTys, + SmallVectorImpl > &RegsToPass, + SmallVectorImpl &Ops, std::vector &NodeTys, const PPCSubtarget &PPCSubTarget) { bool isPPC64 = PPCSubTarget.isPPC64(); @@ -3460,10 +3460,10 @@ SDValue PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; - SDLoc &dl = CLI.DL; - SmallVector &Outs = CLI.Outs; - SmallVector &OutVals = CLI.OutVals; - SmallVector &Ins = CLI.Ins; + SDLoc &dl = CLI.DL; + SmallVectorImpl &Outs = CLI.Outs; + SmallVectorImpl &OutVals = CLI.OutVals; + SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &isTailCall = CLI.IsTailCall; diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h index 40d1f3a..3b2ac3b 100644 --- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -160,7 +160,7 @@ public: int getCRSpillFrameIndex() const { return CRSpillFrameIndex; } void setCRSpillFrameIndex(int idx) { CRSpillFrameIndex = idx; } - const SmallVector & + const SmallVectorImpl & getMustSaveCRs() const { return MustSaveCRs; } void addMustSaveCR(unsigned Reg) { MustSaveCRs.push_back(Reg); } }; diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp index 437480c..20a94f1 100644 --- a/lib/Target/R600/AMDILCFGStructurizer.cpp +++ b/lib/Target/R600/AMDILCFGStructurizer.cpp @@ -92,7 +92,7 @@ void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) { } template -void ReverseVector(SmallVector &Src) { +void ReverseVector(SmallVectorImpl &Src) { size_t sz = Src.size(); for (size_t i = 0; i < sz/2; ++i) { NodeT *t = Src[i]; @@ -258,7 +258,7 @@ private: BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep); void removeUnconditionalBranch(BlockT *SrcBlock); void removeRedundantConditionalBranch(BlockT *SrcBlock); - void addDummyExitBlock(SmallVector &RetBlocks); + void addDummyExitBlock(SmallVectorImpl &RetBlocks); void removeSuccessor(BlockT *SrcBlock); BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock); @@ -2076,8 +2076,8 @@ void CFGStructurizer::removeRedundantConditionalBranch(BlockT *srcBlk) { } //removeRedundantConditionalBranch template -void CFGStructurizer::addDummyExitBlock(SmallVector &retBlks) { +void CFGStructurizer::addDummyExitBlock(SmallVectorImpl + &retBlks) { BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock(); funcRep->push_back(dummyExitBlk); //insert to function CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep); diff --git a/lib/Target/R600/R600EmitClauseMarkers.cpp b/lib/Target/R600/R600EmitClauseMarkers.cpp index c1da64c..fac2b47 100644 --- a/lib/Target/R600/R600EmitClauseMarkers.cpp +++ b/lib/Target/R600/R600EmitClauseMarkers.cpp @@ -107,7 +107,7 @@ private: bool SubstituteKCacheBank(MachineInstr *MI, std::vector > &CachedConsts) const { std::vector > UsedKCache; - const SmallVector, 3> &Consts = + const SmallVectorImpl > &Consts = TII->getSrcs(MI); assert((TII->isALUInstr(MI->getOpcode()) || MI->getOpcode() == AMDGPU::DOT_4) && "Can't assign Const"); diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 1a07b05..0c059aa 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -519,7 +519,7 @@ R600InstrInfo::fitsConstReadLimitations(const std::vector &MIs) if (!isALUInstr(MI->getOpcode())) continue; - const SmallVector, 3> &Srcs = + const SmallVectorImpl > &Srcs = getSrcs(MI); for (unsigned j = 0, e = Srcs.size(); j < e; j++) { diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 414087a..6ddfa8c 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -654,9 +654,9 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; SDLoc &dl = CLI.DL; - SmallVector &Outs = CLI.Outs; - SmallVector &OutVals = CLI.OutVals; - SmallVector &Ins = CLI.Ins; + SmallVectorImpl &Outs = CLI.Outs; + SmallVectorImpl &OutVals = CLI.OutVals; + SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &isTailCall = CLI.IsTailCall; diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index a317f0c..e6e6d02 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -679,9 +679,9 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; SDLoc &DL = CLI.DL; - SmallVector &Outs = CLI.Outs; - SmallVector &OutVals = CLI.OutVals; - SmallVector &Ins = CLI.Ins; + SmallVectorImpl &Outs = CLI.Outs; + SmallVectorImpl &OutVals = CLI.OutVals; + SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &isTailCall = CLI.IsTailCall; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 95ca6c3..3407a98 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2280,10 +2280,10 @@ SDValue X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; - SDLoc &dl = CLI.DL; - SmallVector &Outs = CLI.Outs; - SmallVector &OutVals = CLI.OutVals; - SmallVector &Ins = CLI.Ins; + SDLoc &dl = CLI.DL; + SmallVectorImpl &Outs = CLI.Outs; + SmallVectorImpl &OutVals = CLI.OutVals; + SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; CallingConv::ID CallConv = CLI.CallConv; diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 5af2c9c..5f3d935 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -847,10 +847,10 @@ SDValue XCoreTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; - SDLoc &dl = CLI.DL; - SmallVector &Outs = CLI.Outs; - SmallVector &OutVals = CLI.OutVals; - SmallVector &Ins = CLI.Ins; + SDLoc &dl = CLI.DL; + SmallVectorImpl &Outs = CLI.Outs; + SmallVectorImpl &OutVals = CLI.OutVals; + SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &isTailCall = CLI.IsTailCall; -- cgit v1.1 From a77f816c4c6e4c833ac9ab78e2e038dcfb861c73 Mon Sep 17 00:00:00 2001 From: Anton Korobeynikov Date: Sun, 14 Jul 2013 15:11:00 +0000 Subject: Properly lower jump tables on MSP430. Patch by Job Noorman! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186283 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/MSP430/MSP430ISelLowering.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index b144164..682eb2c 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -987,8 +987,8 @@ SDValue MSP430TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { JumpTableSDNode *JT = cast(Op); SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()); - Result.getNode()->setDebugLoc(JT->getDebugLoc()); - return Result; + return DAG.getNode(MSP430ISD::Wrapper, SDLoc(JT), + getPointerTy(), Result); } /// getPostIndexedAddressParts - returns true by value, base pointer and -- cgit v1.1 From c7c4a7867cae8fb0ad5c79c3e67d75e415287626 Mon Sep 17 00:00:00 2001 From: Anton Korobeynikov Date: Sun, 14 Jul 2013 18:19:44 +0000 Subject: Use conventional syntax for branches. Patch by Job! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186291 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/MSP430/MSP430InstrInfo.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td index e45780d..50e3fda 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.td +++ b/lib/Target/MSP430/MSP430InstrInfo.td @@ -183,10 +183,10 @@ let isBarrier = 1 in { "br\t$brdst", [(brind tblockaddress:$brdst)]>; def Br : I16rr<0, (outs), (ins GR16:$brdst), - "mov.w\t{$brdst, pc}", + "br\t$brdst", [(brind GR16:$brdst)]>; def Bm : I16rm<0, (outs), (ins memsrc:$brdst), - "mov.w\t{$brdst, pc}", + "br\t$brdst", [(brind (load addr:$brdst))]>; } } -- cgit v1.1 From b9df53a40b22c74ce3f3a7b4a7c0676a38cf5e73 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 15 Jul 2013 04:27:47 +0000 Subject: Use llvm::array_lengthof to replace sizeof(array)/sizeof(array[0]). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186301 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 2 +- lib/Target/PowerPC/PPCAsmPrinter.cpp | 2 +- lib/Target/R600/AMDGPUISelLowering.cpp | 2 +- lib/Target/R600/AMDILISelLowering.cpp | 8 ++++---- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index cc323f68..fdc015b 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -2732,7 +2732,7 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, lastRegToSaveIndex = REnd - ARM::R0; } else { firstRegToSaveIndex = CCInfo.getFirstUnallocated - (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); + (GPRArgRegs, array_lengthof(GPRArgRegs)); lastRegToSaveIndex = 4; } diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 66d9466..55de1dc 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -853,7 +853,7 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { // FIXME: This is a total hack, finish mc'izing the PPC backend. if (OutStreamer.hasRawTextSupport()) { - assert(Directive < sizeof(CPUDirectives) / sizeof(*CPUDirectives) && + assert(Directive < array_lengthof(CPUDirectives) && "CPUDirectives[] might not be up-to-date!"); OutStreamer.EmitRawText("\t.machine " + Twine(CPUDirectives[Directive])); } diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 9891ad3..3629d74 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -82,7 +82,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : (int)MVT::v2i32, (int)MVT::v4i32 }; - size_t NumTypes = sizeof(types) / sizeof(*types); + const size_t NumTypes = array_lengthof(types); for (unsigned int x = 0; x < NumTypes; ++x) { MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x]; diff --git a/lib/Target/R600/AMDILISelLowering.cpp b/lib/Target/R600/AMDILISelLowering.cpp index d669966..95e785b 100644 --- a/lib/Target/R600/AMDILISelLowering.cpp +++ b/lib/Target/R600/AMDILISelLowering.cpp @@ -82,10 +82,10 @@ void AMDGPUTargetLowering::InitAMDILLowering() { (int)MVT::v2f64, (int)MVT::v2i64 }; - size_t NumTypes = sizeof(types) / sizeof(*types); - size_t NumFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes); - size_t NumIntTypes = sizeof(IntTypes) / sizeof(*IntTypes); - size_t NumVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes); + const size_t NumTypes = array_lengthof(types); + const size_t NumFloatTypes = array_lengthof(FloatTypes); + const size_t NumIntTypes = array_lengthof(IntTypes); + const size_t NumVectorTypes = array_lengthof(VectorTypes); const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget(); // These are the current register classes that are -- cgit v1.1 From 787e71df693e94cc512f3e439bf91609a8ec9bae Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 15 Jul 2013 06:39:13 +0000 Subject: Make some arrays 'static const' git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186307 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 2 +- lib/Target/R600/AMDILISelLowering.cpp | 8 ++++---- lib/Target/R600/R600ControlFlowFinalizer.cpp | 2 +- lib/Target/R600/SIInstrInfo.cpp | 10 +++++----- 4 files changed, 11 insertions(+), 11 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 3629d74..c9df89c 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -78,7 +78,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::UDIVREM, MVT::i32, Custom); setOperationAction(ISD::UREM, MVT::i32, Expand); - int types[] = { + static const int types[] = { (int)MVT::v2i32, (int)MVT::v4i32 }; diff --git a/lib/Target/R600/AMDILISelLowering.cpp b/lib/Target/R600/AMDILISelLowering.cpp index 95e785b..970787e 100644 --- a/lib/Target/R600/AMDILISelLowering.cpp +++ b/lib/Target/R600/AMDILISelLowering.cpp @@ -39,7 +39,7 @@ using namespace llvm; // TargetLowering Class Implementation Begins //===----------------------------------------------------------------------===// void AMDGPUTargetLowering::InitAMDILLowering() { - int types[] = { + static const int types[] = { (int)MVT::i8, (int)MVT::i16, (int)MVT::i32, @@ -58,19 +58,19 @@ void AMDGPUTargetLowering::InitAMDILLowering() { (int)MVT::v2i64 }; - int IntTypes[] = { + static const int IntTypes[] = { (int)MVT::i8, (int)MVT::i16, (int)MVT::i32, (int)MVT::i64 }; - int FloatTypes[] = { + static const int FloatTypes[] = { (int)MVT::f32, (int)MVT::f64 }; - int VectorTypes[] = { + static const int VectorTypes[] = { (int)MVT::v2i8, (int)MVT::v4i8, (int)MVT::v2i16, diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp index 932a6a7..1cd0ac3 100644 --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -172,7 +172,7 @@ private: } void getLiteral(MachineInstr *MI, std::vector &Lits) const { - unsigned LiteralRegs[] = { + static const unsigned LiteralRegs[] = { AMDGPU::ALU_LITERAL_X, AMDGPU::ALU_LITERAL_Y, AMDGPU::ALU_LITERAL_Z, diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index cb582a6..551ae86 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -42,27 +42,27 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, // never be necessary. assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC); - const int16_t Sub0_15[] = { + static const int16_t Sub0_15[] = { AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 0 }; - const int16_t Sub0_7[] = { + static const int16_t Sub0_7[] = { AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 0 }; - const int16_t Sub0_3[] = { + static const int16_t Sub0_3[] = { AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0 }; - const int16_t Sub0_2[] = { + static const int16_t Sub0_2[] = { AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 0 }; - const int16_t Sub0_1[] = { + static const int16_t Sub0_1[] = { AMDGPU::sub0, AMDGPU::sub1, 0 }; -- cgit v1.1 From da129a2eb3c248384aa60eb2d59f14b2e877e098 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 15 Jul 2013 06:54:12 +0000 Subject: Add 'static' keyword to some const arrays for consistency. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186308 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 3407a98..1a0c937 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -8100,7 +8100,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, LLVMContext *Context = DAG.getContext(); // Build some magic constants. - const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 }; + static const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 }; Constant *C0 = ConstantDataVector::get(*Context, CV0); SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16); @@ -9399,8 +9399,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, SDValue EQ = DAG.getNode(X86ISD::PCMPEQ, dl, MVT::v4i32, Op0, Op1); // Create masks for only the low parts/high parts of the 64 bit integers. - const int MaskHi[] = { 1, 1, 3, 3 }; - const int MaskLo[] = { 0, 0, 2, 2 }; + static const int MaskHi[] = { 1, 1, 3, 3 }; + static const int MaskLo[] = { 0, 0, 2, 2 }; SDValue EQHi = DAG.getVectorShuffle(MVT::v4i32, dl, EQ, EQ, MaskHi); SDValue GTLo = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskLo); SDValue GTHi = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi); @@ -9427,7 +9427,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1); // Make sure the lower and upper halves are both all-ones. - const int Mask[] = { 1, 0, 3, 2 }; + static const int Mask[] = { 1, 0, 3, 2 }; SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask); Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, Result, Shuf); @@ -11492,7 +11492,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, "Should not custom lower when pmuldq is available!"); // Extract the odd parts. - const int UnpackMask[] = { 1, -1, 3, -1 }; + static const int UnpackMask[] = { 1, -1, 3, -1 }; SDValue Aodds = DAG.getVectorShuffle(VT, dl, A, A, UnpackMask); SDValue Bodds = DAG.getVectorShuffle(VT, dl, B, B, UnpackMask); @@ -11506,7 +11506,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, // Merge the two vectors back together with a shuffle. This expands into 2 // shuffles. - const int ShufMask[] = { 0, 4, 2, 6 }; + static const int ShufMask[] = { 0, 4, 2, 6 }; return DAG.getVectorShuffle(VT, dl, Evens, Odds, ShufMask); } -- cgit v1.1 From 3698dc4d4e7f32fb2d189aaf83a505742c6858e6 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 15 Jul 2013 07:02:45 +0000 Subject: Add const qualifier to some static arrays. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186309 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64FrameLowering.cpp | 10 +++++----- lib/Target/AArch64/AArch64FrameLowering.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index d571765..7318230 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -425,7 +425,7 @@ AArch64FrameLowering::emitFrameMemOps(bool isPrologue, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const std::vector &CSI, const TargetRegisterInfo *TRI, - LoadStoreMethod PossClasses[], + const LoadStoreMethod PossClasses[], unsigned NumClasses) const { DebugLoc DL = MBB.findDebugLoc(MBBI); MachineFunction &MF = *MBB.getParent(); @@ -528,11 +528,11 @@ AArch64FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, if (CSI.empty()) return false; - static LoadStoreMethod PossibleClasses[] = { + static const LoadStoreMethod PossibleClasses[] = { {&AArch64::GPR64RegClass, AArch64::LSPair64_STR, AArch64::LS64_STR}, {&AArch64::FPR64RegClass, AArch64::LSFPPair64_STR, AArch64::LSFP64_STR}, }; - unsigned NumClasses = llvm::array_lengthof(PossibleClasses); + const unsigned NumClasses = llvm::array_lengthof(PossibleClasses); emitFrameMemOps(/* isPrologue = */ true, MBB, MBBI, CSI, TRI, PossibleClasses, NumClasses); @@ -549,11 +549,11 @@ AArch64FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, if (CSI.empty()) return false; - static LoadStoreMethod PossibleClasses[] = { + static const LoadStoreMethod PossibleClasses[] = { {&AArch64::GPR64RegClass, AArch64::LSPair64_LDR, AArch64::LS64_LDR}, {&AArch64::FPR64RegClass, AArch64::LSFPPair64_LDR, AArch64::LSFP64_LDR}, }; - unsigned NumClasses = llvm::array_lengthof(PossibleClasses); + const unsigned NumClasses = llvm::array_lengthof(PossibleClasses); emitFrameMemOps(/* isPrologue = */ false, MBB, MBBI, CSI, TRI, PossibleClasses, NumClasses); diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h index 45ea0ec..032dd90 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.h +++ b/lib/Target/AArch64/AArch64FrameLowering.h @@ -90,7 +90,7 @@ public: MachineBasicBlock::iterator MI, const std::vector &CSI, const TargetRegisterInfo *TRI, - LoadStoreMethod PossibleClasses[], + const LoadStoreMethod PossibleClasses[], unsigned NumClasses) const; -- cgit v1.1 From f67c7d7e8c5949037e85dd233876989c1fea7099 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 15 Jul 2013 07:22:00 +0000 Subject: Make some arrays 'static const' git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186311 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 20 ++++----- lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 26 ++++++------ .../AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp | 48 +++++++++++++--------- 3 files changed, 51 insertions(+), 43 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 777f4dd..4f4553d 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -264,16 +264,16 @@ EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord, unsigned &LdrOpc, unsigned &StrOpc) { - static unsigned LoadBares[] = {AArch64::LDXR_byte, AArch64::LDXR_hword, - AArch64::LDXR_word, AArch64::LDXR_dword}; - static unsigned LoadAcqs[] = {AArch64::LDAXR_byte, AArch64::LDAXR_hword, - AArch64::LDAXR_word, AArch64::LDAXR_dword}; - static unsigned StoreBares[] = {AArch64::STXR_byte, AArch64::STXR_hword, - AArch64::STXR_word, AArch64::STXR_dword}; - static unsigned StoreRels[] = {AArch64::STLXR_byte, AArch64::STLXR_hword, - AArch64::STLXR_word, AArch64::STLXR_dword}; - - unsigned *LoadOps, *StoreOps; + static const unsigned LoadBares[] = {AArch64::LDXR_byte, AArch64::LDXR_hword, + AArch64::LDXR_word, AArch64::LDXR_dword}; + static const unsigned LoadAcqs[] = {AArch64::LDAXR_byte, AArch64::LDAXR_hword, + AArch64::LDAXR_word, AArch64::LDAXR_dword}; + static const unsigned StoreBares[] = {AArch64::STXR_byte, AArch64::STXR_hword, + AArch64::STXR_word, AArch64::STXR_dword}; + static const unsigned StoreRels[] = {AArch64::STLXR_byte,AArch64::STLXR_hword, + AArch64::STLXR_word, AArch64::STLXR_dword}; + + const unsigned *LoadOps, *StoreOps; if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent) LoadOps = LoadAcqs; else diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 4a0237d..10a9a6a 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -454,7 +454,7 @@ public: } bool isMOVN32Imm() const { - static AArch64MCExpr::VariantKind PermittedModifiers[] = { + static const AArch64MCExpr::VariantKind PermittedModifiers[] = { AArch64MCExpr::VK_AARCH64_SABS_G0, AArch64MCExpr::VK_AARCH64_SABS_G1, AArch64MCExpr::VK_AARCH64_DTPREL_G1, @@ -463,13 +463,13 @@ public: AArch64MCExpr::VK_AARCH64_TPREL_G1, AArch64MCExpr::VK_AARCH64_TPREL_G0, }; - unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); return isMoveWideImm(32, PermittedModifiers, NumModifiers); } bool isMOVN64Imm() const { - static AArch64MCExpr::VariantKind PermittedModifiers[] = { + static const AArch64MCExpr::VariantKind PermittedModifiers[] = { AArch64MCExpr::VK_AARCH64_SABS_G0, AArch64MCExpr::VK_AARCH64_SABS_G1, AArch64MCExpr::VK_AARCH64_SABS_G2, @@ -481,14 +481,14 @@ public: AArch64MCExpr::VK_AARCH64_TPREL_G1, AArch64MCExpr::VK_AARCH64_TPREL_G0, }; - unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); return isMoveWideImm(64, PermittedModifiers, NumModifiers); } bool isMOVZ32Imm() const { - static AArch64MCExpr::VariantKind PermittedModifiers[] = { + static const AArch64MCExpr::VariantKind PermittedModifiers[] = { AArch64MCExpr::VK_AARCH64_ABS_G0, AArch64MCExpr::VK_AARCH64_ABS_G1, AArch64MCExpr::VK_AARCH64_SABS_G0, @@ -499,13 +499,13 @@ public: AArch64MCExpr::VK_AARCH64_TPREL_G1, AArch64MCExpr::VK_AARCH64_TPREL_G0, }; - unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); return isMoveWideImm(32, PermittedModifiers, NumModifiers); } bool isMOVZ64Imm() const { - static AArch64MCExpr::VariantKind PermittedModifiers[] = { + static const AArch64MCExpr::VariantKind PermittedModifiers[] = { AArch64MCExpr::VK_AARCH64_ABS_G0, AArch64MCExpr::VK_AARCH64_ABS_G1, AArch64MCExpr::VK_AARCH64_ABS_G2, @@ -521,13 +521,13 @@ public: AArch64MCExpr::VK_AARCH64_TPREL_G1, AArch64MCExpr::VK_AARCH64_TPREL_G0, }; - unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); return isMoveWideImm(64, PermittedModifiers, NumModifiers); } bool isMOVK32Imm() const { - static AArch64MCExpr::VariantKind PermittedModifiers[] = { + static const AArch64MCExpr::VariantKind PermittedModifiers[] = { AArch64MCExpr::VK_AARCH64_ABS_G0_NC, AArch64MCExpr::VK_AARCH64_ABS_G1_NC, AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC, @@ -536,13 +536,13 @@ public: AArch64MCExpr::VK_AARCH64_TPREL_G1_NC, AArch64MCExpr::VK_AARCH64_TPREL_G0_NC, }; - unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); return isMoveWideImm(32, PermittedModifiers, NumModifiers); } bool isMOVK64Imm() const { - static AArch64MCExpr::VariantKind PermittedModifiers[] = { + static const AArch64MCExpr::VariantKind PermittedModifiers[] = { AArch64MCExpr::VK_AARCH64_ABS_G0_NC, AArch64MCExpr::VK_AARCH64_ABS_G1_NC, AArch64MCExpr::VK_AARCH64_ABS_G2_NC, @@ -553,13 +553,13 @@ public: AArch64MCExpr::VK_AARCH64_TPREL_G1_NC, AArch64MCExpr::VK_AARCH64_TPREL_G0_NC, }; - unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); return isMoveWideImm(64, PermittedModifiers, NumModifiers); } bool isMoveWideImm(unsigned RegWidth, - AArch64MCExpr::VariantKind *PermittedModifiers, + const AArch64MCExpr::VariantKind *PermittedModifiers, unsigned NumModifiers) const { if (!isImmWithLSL()) return false; diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index 8cf374f..b9770b3 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -152,10 +152,10 @@ getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx, switch (Expr->getKind()) { default: llvm_unreachable("Unexpected operand modifier"); case AArch64MCExpr::VK_AARCH64_LO12: { - unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_lo12, - AArch64::fixup_a64_ldst16_lo12, - AArch64::fixup_a64_ldst32_lo12, - AArch64::fixup_a64_ldst64_lo12, + static const unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_lo12, + AArch64::fixup_a64_ldst16_lo12, + AArch64::fixup_a64_ldst32_lo12, + AArch64::fixup_a64_ldst64_lo12, AArch64::fixup_a64_ldst128_lo12 }; assert(MemSize <= 16 && "Invalid fixup for operation"); FixupKind = FixupsBySize[Log2_32(MemSize)]; @@ -166,19 +166,23 @@ getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx, FixupKind = AArch64::fixup_a64_ld64_got_lo12_nc; break; case AArch64MCExpr::VK_AARCH64_DTPREL_LO12: { - unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_dtprel_lo12, - AArch64::fixup_a64_ldst16_dtprel_lo12, - AArch64::fixup_a64_ldst32_dtprel_lo12, - AArch64::fixup_a64_ldst64_dtprel_lo12 }; + static const unsigned FixupsBySize[] = { + AArch64::fixup_a64_ldst8_dtprel_lo12, + AArch64::fixup_a64_ldst16_dtprel_lo12, + AArch64::fixup_a64_ldst32_dtprel_lo12, + AArch64::fixup_a64_ldst64_dtprel_lo12 + }; assert(MemSize <= 8 && "Invalid fixup for operation"); FixupKind = FixupsBySize[Log2_32(MemSize)]; break; } case AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC: { - unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_dtprel_lo12_nc, - AArch64::fixup_a64_ldst16_dtprel_lo12_nc, - AArch64::fixup_a64_ldst32_dtprel_lo12_nc, - AArch64::fixup_a64_ldst64_dtprel_lo12_nc }; + static const unsigned FixupsBySize[] = { + AArch64::fixup_a64_ldst8_dtprel_lo12_nc, + AArch64::fixup_a64_ldst16_dtprel_lo12_nc, + AArch64::fixup_a64_ldst32_dtprel_lo12_nc, + AArch64::fixup_a64_ldst64_dtprel_lo12_nc + }; assert(MemSize <= 8 && "Invalid fixup for operation"); FixupKind = FixupsBySize[Log2_32(MemSize)]; break; @@ -188,19 +192,23 @@ getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx, FixupKind = AArch64::fixup_a64_ld64_gottprel_lo12_nc; break; case AArch64MCExpr::VK_AARCH64_TPREL_LO12:{ - unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_tprel_lo12, - AArch64::fixup_a64_ldst16_tprel_lo12, - AArch64::fixup_a64_ldst32_tprel_lo12, - AArch64::fixup_a64_ldst64_tprel_lo12 }; + static const unsigned FixupsBySize[] = { + AArch64::fixup_a64_ldst8_tprel_lo12, + AArch64::fixup_a64_ldst16_tprel_lo12, + AArch64::fixup_a64_ldst32_tprel_lo12, + AArch64::fixup_a64_ldst64_tprel_lo12 + }; assert(MemSize <= 8 && "Invalid fixup for operation"); FixupKind = FixupsBySize[Log2_32(MemSize)]; break; } case AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC: { - unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_tprel_lo12_nc, - AArch64::fixup_a64_ldst16_tprel_lo12_nc, - AArch64::fixup_a64_ldst32_tprel_lo12_nc, - AArch64::fixup_a64_ldst64_tprel_lo12_nc }; + static const unsigned FixupsBySize[] = { + AArch64::fixup_a64_ldst8_tprel_lo12_nc, + AArch64::fixup_a64_ldst16_tprel_lo12_nc, + AArch64::fixup_a64_ldst32_tprel_lo12_nc, + AArch64::fixup_a64_ldst64_tprel_lo12_nc + }; assert(MemSize <= 8 && "Invalid fixup for operation"); FixupKind = FixupsBySize[Log2_32(MemSize)]; break; -- cgit v1.1 From 0bf3c99886bed6796eada8f65942ee6023fc6e89 Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Mon, 15 Jul 2013 14:25:26 +0000 Subject: Test commit to see if write access works. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186321 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/MSP430/MSP430ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 682eb2c..803e899 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -456,7 +456,7 @@ MSP430TargetLowering::LowerReturn(SDValue Chain, /// LowerCCCCallTo - functions arguments are copied from virtual regs to /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. -/// TODO: sret. +// TODO: sret. SDValue MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, -- cgit v1.1 From cd0f2458641ebebdb887da0381bd3acea6a1902f Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 15 Jul 2013 19:00:09 +0000 Subject: R600/SI: Add support for 64-bit loads https://bugs.freedesktop.org/show_bug.cgi?id=65873 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186339 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUInstructions.td | 20 ++++++++++++++++++++ lib/Target/R600/SIISelLowering.cpp | 2 ++ lib/Target/R600/SIInstructions.td | 8 +++++++- 3 files changed, 29 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 234bb99..7197c61 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -86,6 +86,12 @@ def COND_NULL : PatLeaf < // Load/Store Pattern Fragments //===----------------------------------------------------------------------===// +def az_extload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{ + LoadSDNode *L = cast(N); + return L->getExtensionType() == ISD::ZEXTLOAD || + L->getExtensionType() == ISD::EXTLOAD; +}]>; + def zextloadi8_global : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr), [{ return isGlobalLoad(dyn_cast(N)); }]>; @@ -94,6 +100,20 @@ def zextloadi8_constant : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr), [{ return isGlobalLoad(dyn_cast(N)); }]>; +def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; + +def az_extloadi32_global : PatFrag<(ops node:$ptr), + (az_extloadi32 node:$ptr), [{ + return isGlobalLoad(dyn_cast(N)); +}]>; + +def az_extloadi32_constant : PatFrag<(ops node:$ptr), + (az_extloadi32 node:$ptr), [{ + return isConstantLoad(dyn_cast(N), -1); +}]>; + def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return isLocalLoad(dyn_cast(N)); }]>; diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 336bfbf..6cae978 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -76,6 +76,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand); + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); setTargetDAGCombine(ISD::SELECT_CC); diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index ffa45c5..95e86d7 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1024,7 +1024,9 @@ def V_LSHL_B64 : VOP3_64_Shift <0x00000161, "V_LSHL_B64", def V_LSHR_B64 : VOP3_64_Shift <0x00000162, "V_LSHR_B64", [(set i64:$dst, (srl i64:$src0, i32:$src1))] >; -def V_ASHR_I64 : VOP3_64_Shift <0x00000163, "V_ASHR_I64", []>; +def V_ASHR_I64 : VOP3_64_Shift <0x00000163, "V_ASHR_I64", + [(set i64:$dst, (sra i64:$src0, i32:$src1))] +>; let isCommutable = 1 in { @@ -1738,6 +1740,10 @@ defm : MUBUFLoad_Pattern ; defm : MUBUFLoad_Pattern ; +defm : MUBUFLoad_Pattern ; +defm : MUBUFLoad_Pattern ; defm : MUBUFLoad_Pattern ; defm : MUBUFLoad_Pattern Date: Mon, 15 Jul 2013 20:22:58 +0000 Subject: Fix register subclass handling in PPCInstrInfo::insertSelect PPCInstrInfo::insertSelect and PPCInstrInfo::canInsertSelect were computing the common subclass of the true and false inputs, and then selecting either the 32-bit or the 64-bit isel variant based on the result of calling PPC::GPRCRegClass.hasSubClassEq(RC) and PPC::G8RCRegClass.hasSubClassEq(RC) (where RC is the common subclass). Unfortunately, this is not quite right: if we have something like this: %vreg8 = SELECT_CC_I8 %vreg4, %vreg7, %vreg6, 76; G8RC_and_G8RC_NOX0:%vreg8 CRRC:%vreg4 G8RC_NOX0:%vreg7,%vreg6 then the common subclass of G8RC_and_G8RC_NOX0 and G8RC_NOX0 is G8RC_NOX0, and G8RC_NOX0 is not a subclass of G8RC (because it also contains the ZERO8 pseudo-register). As a result, we also need to check the common subclass against GPRC_NOR0 and G8RC_NOX0 explicitly. This had not been a problem for clients of insertSelect that called canInsertSelect first (because it had a compensating mistake), but insertSelect is also used by the PPC pseudo-instruction expander, and this error was causing a problem in that context. This problem was found by csmith. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186343 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCInstrInfo.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 1ad879d..375daee 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -448,7 +448,9 @@ bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB, // isel is for regular integer GPRs only. if (!PPC::GPRCRegClass.hasSubClassEq(RC) && - !PPC::G8RCRegClass.hasSubClassEq(RC)) + !PPC::GPRC_NOR0RegClass.hasSubClassEq(RC) && + !PPC::G8RCRegClass.hasSubClassEq(RC) && + !PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) return false; // FIXME: These numbers are for the A2, how well they work for other cores is @@ -478,12 +480,15 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB, const TargetRegisterClass *RC = RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); assert(RC && "TrueReg and FalseReg must have overlapping register classes"); - assert((PPC::GPRCRegClass.hasSubClassEq(RC) || - PPC::G8RCRegClass.hasSubClassEq(RC)) && + + bool Is64Bit = PPC::G8RCRegClass.hasSubClassEq(RC) || + PPC::G8RC_NOX0RegClass.hasSubClassEq(RC); + assert((Is64Bit || + PPC::GPRCRegClass.hasSubClassEq(RC) || + PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) && "isel is for regular integer GPRs only"); - unsigned OpCode = - PPC::GPRCRegClass.hasSubClassEq(RC) ? PPC::ISEL : PPC::ISEL8; + unsigned OpCode = Is64Bit ? PPC::ISEL8 : PPC::ISEL; unsigned SelectPred = Cond[0].getImm(); unsigned SubIdx; -- cgit v1.1 From a44c37f880c8ca84b7388dd52fb2708495697a18 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Mon, 15 Jul 2013 22:29:40 +0000 Subject: PPC: Refactoring to support subtarget feature changing This change mirrors the changes that were made to the X86 and ARM targets to support subtarget feature changing. As indicated in r182899, the mechanism is still undergoing revision, and so as with the X86 and ARM targets, there is no test case yet (there is no effective functionality change). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186357 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCSubtarget.cpp | 99 +++++++++++++++++++++++-------------- lib/Target/PowerPC/PPCSubtarget.h | 7 +++ 2 files changed, 69 insertions(+), 37 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index a8f2b3f..51fbfda 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -14,7 +14,10 @@ #include "PPCSubtarget.h" #include "PPC.h" #include "PPCRegisterInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Function.h" #include "llvm/Support/Host.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetMachine.h" @@ -29,32 +32,67 @@ using namespace llvm; PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, bool is64Bit) : PPCGenSubtargetInfo(TT, CPU, FS) - , StackAlignment(16) - , DarwinDirective(PPC::DIR_NONE) - , HasMFOCRF(false) - , Has64BitSupport(false) - , Use64BitRegs(false) , IsPPC64(is64Bit) - , HasAltivec(false) - , HasQPX(false) - , HasFSQRT(false) - , HasFRE(false) - , HasFRES(false) - , HasFRSQRTE(false) - , HasFRSQRTES(false) - , HasRecipPrec(false) - , HasSTFIWX(false) - , HasLFIWAX(false) - , HasFPRND(false) - , HasFPCVT(false) - , HasISEL(false) - , HasPOPCNTD(false) - , HasLDBRX(false) - , IsBookE(false) - , HasLazyResolverStubs(false) - , IsJITCodeModel(false) , TargetTriple(TT) { + initializeEnvironment(); + resetSubtargetFeatures(CPU, FS); +} + +/// SetJITMode - This is called to inform the subtarget info that we are +/// producing code for the JIT. +void PPCSubtarget::SetJITMode() { + // JIT mode doesn't want lazy resolver stubs, it knows exactly where + // everything is. This matters for PPC64, which codegens in PIC mode without + // stubs. + HasLazyResolverStubs = false; + + // Calls to external functions need to use indirect calls + IsJITCodeModel = true; +} + +void PPCSubtarget::resetSubtargetFeatures(const MachineFunction *MF) { + AttributeSet FnAttrs = MF->getFunction()->getAttributes(); + Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex, + "target-cpu"); + Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex, + "target-features"); + std::string CPU = + !CPUAttr.hasAttribute(Attribute::None) ? CPUAttr.getValueAsString() : ""; + std::string FS = + !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : ""; + if (!FS.empty()) { + initializeEnvironment(); + resetSubtargetFeatures(CPU, FS); + } +} +void PPCSubtarget::initializeEnvironment() { + StackAlignment = 16; + DarwinDirective = PPC::DIR_NONE; + HasMFOCRF = false; + Has64BitSupport = false; + Use64BitRegs = false; + HasAltivec = false; + HasQPX = false; + HasFSQRT = false; + HasFRE = false; + HasFRES = false; + HasFRSQRTE = false; + HasFRSQRTES = false; + HasRecipPrec = false; + HasSTFIWX = false; + HasLFIWAX = false; + HasFPRND = false; + HasFPCVT = false; + HasISEL = false; + HasPOPCNTD = false; + HasLDBRX = false; + IsBookE = false; + HasLazyResolverStubs = false; + IsJITCodeModel = false; +} + +void PPCSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { // Determine default and user specified characteristics std::string CPUName = CPU; if (CPUName.empty()) @@ -72,7 +110,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, std::string FullFS = FS; // If we are generating code for ppc64, verify that options make sense. - if (is64Bit) { + if (IsPPC64) { Has64BitSupport = true; // Silently force 64-bit register use on ppc64. Use64BitRegs = true; @@ -101,19 +139,6 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, StackAlignment = 32; } -/// SetJITMode - This is called to inform the subtarget info that we are -/// producing code for the JIT. -void PPCSubtarget::SetJITMode() { - // JIT mode doesn't want lazy resolver stubs, it knows exactly where - // everything is. This matters for PPC64, which codegens in PIC mode without - // stubs. - HasLazyResolverStubs = false; - - // Calls to external functions need to use indirect calls - IsJITCodeModel = true; -} - - /// hasLazyResolverStub - Return true if accesses to the specified global have /// to go through a dyld lazy resolution stub. This means that an extra load /// is required to get the address of the global. diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 097f2bc..50af75d 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -137,6 +137,13 @@ public: : "E-p:32:32-f64:64:64-i64:64:64-f128:64:128-n32"; } + /// \brief Reset the features for the PowerPC target. + virtual void resetSubtargetFeatures(const MachineFunction *MF); +private: + void initializeEnvironment(); + void resetSubtargetFeatures(StringRef CPU, StringRef FS); + +public: /// isPPC64 - Return true if we are generating code for 64-bit pointer mode. /// bool isPPC64() const { return IsPPC64; } -- cgit v1.1 From 4172a8abbabea2359d91bb07101166565127d798 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 16 Jul 2013 01:17:10 +0000 Subject: Add 'const' qualifiers to static const char* variables. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186371 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIAnnotateControlFlow.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIAnnotateControlFlow.cpp b/lib/Target/R600/SIAnnotateControlFlow.cpp index 9791ef4..6bbdf59 100644 --- a/lib/Target/R600/SIAnnotateControlFlow.cpp +++ b/lib/Target/R600/SIAnnotateControlFlow.cpp @@ -31,13 +31,13 @@ typedef std::pair StackEntry; typedef SmallVector StackVector; // Intrinsic names the control flow is annotated with -static const char *IfIntrinsic = "llvm.SI.if"; -static const char *ElseIntrinsic = "llvm.SI.else"; -static const char *BreakIntrinsic = "llvm.SI.break"; -static const char *IfBreakIntrinsic = "llvm.SI.if.break"; -static const char *ElseBreakIntrinsic = "llvm.SI.else.break"; -static const char *LoopIntrinsic = "llvm.SI.loop"; -static const char *EndCfIntrinsic = "llvm.SI.end.cf"; +static const char *const IfIntrinsic = "llvm.SI.if"; +static const char *const ElseIntrinsic = "llvm.SI.else"; +static const char *const BreakIntrinsic = "llvm.SI.break"; +static const char *const IfBreakIntrinsic = "llvm.SI.if.break"; +static const char *const ElseBreakIntrinsic = "llvm.SI.else.break"; +static const char *const LoopIntrinsic = "llvm.SI.loop"; +static const char *const EndCfIntrinsic = "llvm.SI.end.cf"; class SIAnnotateControlFlow : public FunctionPass { -- cgit v1.1 From 103ba845f09252d90a05109af7174f54bf412daf Mon Sep 17 00:00:00 2001 From: Renato Golin Date: Tue, 16 Jul 2013 09:32:17 +0000 Subject: ARM EABI divmod support This patch enables calls to __aeabi_idivmod when in EABI mode, by using the remainder value returned on registers (R1), enabled by the ARM triple "none-eabi". Note that Darwin and GNUEABI triples will continue lowering on GNU style, that is, using the stack for the remainder. Still need to add SREM/UREM support fix for 64-bit lowering. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186390 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 80 +++++++++++++++++++++++++++++++++++++- lib/Target/ARM/ARMISelLowering.h | 1 + lib/Target/ARM/ARMSubtarget.h | 8 ++++ 3 files changed, 87 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index fdc015b..3648199 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -693,10 +693,36 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::SDIV, MVT::i32, Expand); setOperationAction(ISD::UDIV, MVT::i32, Expand); } + + // FIXME: Also set divmod for SREM on EABI setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); - setOperationAction(ISD::SDIVREM, MVT::i32, Expand); - setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + // Register based DivRem for AEABI (RTABI 4.2) + if (Subtarget->isTargetAEABI()) { + setLibcallName(RTLIB::SDIVREM_I8, "__aeabi_idivmod"); + setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod"); + setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod"); + setLibcallName(RTLIB::SDIVREM_I64, "__aeabi_ldivmod"); + setLibcallName(RTLIB::UDIVREM_I8, "__aeabi_uidivmod"); + setLibcallName(RTLIB::UDIVREM_I16, "__aeabi_uidivmod"); + setLibcallName(RTLIB::UDIVREM_I32, "__aeabi_uidivmod"); + setLibcallName(RTLIB::UDIVREM_I64, "__aeabi_uldivmod"); + + setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SDIVREM_I32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SDIVREM_I64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIVREM_I32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIVREM_I64, CallingConv::ARM_AAPCS); + + setOperationAction(ISD::SDIVREM, MVT::i32, Custom); + setOperationAction(ISD::UDIVREM, MVT::i32, Custom); + } else { + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + } setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); @@ -5863,6 +5889,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); + case ISD::SDIVREM: + case ISD::UDIVREM: return LowerDivRem(Op, DAG); } } @@ -10677,6 +10705,54 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } +SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { + assert(Subtarget->isTargetAEABI() && "Register-based DivRem lowering only"); + unsigned Opcode = Op->getOpcode(); + assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && + "Invalid opcode for Div/Rem lowering"); + bool isSigned = (Opcode == ISD::SDIVREM); + EVT VT = Op->getValueType(0); + Type *Ty = VT.getTypeForEVT(*DAG.getContext()); + + RTLIB::Libcall LC; + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; + case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; + case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; + case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; + } + + SDValue InChain = DAG.getEntryNode(); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) { + EVT ArgVT = Op->getOperand(i).getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Op->getOperand(i); + Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + + SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), + getPointerTy()); + + Type *RetTy = (Type*)StructType::get(Ty, Ty, NULL); + + SDLoc dl(Op); + TargetLowering:: + CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, true, + 0, getLibcallCallingConv(LC), /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Callee, Args, DAG, dl); + std::pair CallInfo = LowerCallTo(CLI); + + return CallInfo.first; +} + bool ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The ARM target isn't yet aware of offsets. diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index ed6c405..beba5ce 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -457,6 +457,7 @@ namespace llvm { const ARMSubtarget *ST) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const; + SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const; /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 63ba6c5..ad7f1b3 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -280,6 +280,14 @@ public: bool isTargetNaCl() const { return TargetTriple.getOS() == Triple::NaCl; } bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; } bool isTargetELF() const { return !isTargetDarwin(); } + // ARM EABI is the bare-metal EABI described in ARM ABI documents and + // can be accessed via -target arm-none-eabi. This is NOT GNUEABI. + // FIXME: Add a flag for bare-metal for that target and set Triple::EABI + // even for GNUEABI, so we can make a distinction here and still conform to + // the EABI on GNU (and Android) mode. This requires change in Clang, too. + bool isTargetAEABI() const { + return TargetTriple.getEnvironment() == Triple::EABI; + } bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; } bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; } -- cgit v1.1 From 2f438131f115a3860ee344a827a091790d6dc13d Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Tue, 16 Jul 2013 09:46:55 +0000 Subject: ARM: implement ldrex, strex and clrex intrinsics Intrinsics already existed for the 64-bit variants, so these support operations of size at most 32-bits. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186392 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelDAGToDAG.cpp | 29 ++++++++++++++++ lib/Target/ARM/ARMISelLowering.cpp | 24 ++++++++++++++ lib/Target/ARM/ARMInstrInfo.td | 57 ++++++++++++++++++++++++++++---- lib/Target/ARM/ARMInstrThumb2.td | 35 +++++++++++++++----- lib/Target/NVPTX/NVPTXAllocaHoisting.cpp | 2 +- 5 files changed, 131 insertions(+), 16 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 4eda5dc..31ce38e 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -175,6 +175,7 @@ public: SDValue &OffImm); bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, SDValue &OffReg, SDValue &ShImm); + bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); inline bool is_so_imm(unsigned Imm) const { return ARM_AM::getSOImmVal(Imm) != -1; @@ -1417,6 +1418,34 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, return true; } +bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, + SDValue &OffImm) { + // This *must* succeed since it's used for the irreplacable ldrex and strex + // instructions. + Base = N; + OffImm = CurDAG->getTargetConstant(0, MVT::i32); + + if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N)) + return true; + + ConstantSDNode *RHS = dyn_cast(N.getOperand(1)); + if (!RHS) + return true; + + uint32_t RHSC = (int)RHS->getZExtValue(); + if (RHSC > 1020 || RHSC % 4 != 0) + return true; + + Base = N.getOperand(0); + if (Base.getOpcode() == ISD::FrameIndex) { + int FI = cast(Base)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, getTargetLowering()->getPointerTy()); + } + + OffImm = CurDAG->getTargetConstant(RHSC / 4, MVT::i32); + return true; +} + //===--------------------------------------------------------------------===// /// getAL - Returns a ARMCC::AL immediate node. diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 3648199..83fb175 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -10838,6 +10838,30 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = true; return true; } + case Intrinsic::arm_ldrex: { + PointerType *PtrTy = cast(I.getArgOperand(0)->getType()); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(PtrTy->getElementType()); + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType()); + Info.vol = true; + Info.readMem = true; + Info.writeMem = false; + return true; + } + case Intrinsic::arm_strex: { + PointerType *PtrTy = cast(I.getArgOperand(1)->getType()); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(PtrTy->getElementType()); + Info.ptrVal = I.getArgOperand(1); + Info.offset = 0; + Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType()); + Info.vol = true; + Info.readMem = false; + Info.writeMem = true; + return true; + } case Intrinsic::arm_strexd: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i64; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 9eba553..84c210f 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -4383,14 +4383,44 @@ let usesCustomInserter = 1 in { [(ARMcopystructbyval GPR:$dst, GPR:$src, imm:$size, imm:$alignment)]>; } +def ldrex_1 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; +}]>; + +def ldrex_2 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; + +def ldrex_4 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; + +def strex_1 : PatFrag<(ops node:$val, node:$ptr), + (int_arm_strex node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; +}]>; + +def strex_2 : PatFrag<(ops node:$val, node:$ptr), + (int_arm_strex node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; + +def strex_4 : PatFrag<(ops node:$val, node:$ptr), + (int_arm_strex node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; + let mayLoad = 1 in { def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr), NoItinerary, - "ldrexb", "\t$Rt, $addr", []>; + "ldrexb", "\t$Rt, $addr", + [(set GPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>; def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr), - NoItinerary, "ldrexh", "\t$Rt, $addr", []>; + NoItinerary, "ldrexh", "\t$Rt, $addr", + [(set GPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>; def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), - NoItinerary, "ldrex", "\t$Rt, $addr", []>; + NoItinerary, "ldrex", "\t$Rt, $addr", + [(set GPR:$Rt, (ldrex_4 addr_offset_none:$addr))]>; let hasExtraDefRegAllocReq = 1 in def LDREXD: AIldrex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr), NoItinerary, "ldrexd", "\t$Rt, $addr", []> { @@ -4400,11 +4430,14 @@ def LDREXD: AIldrex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr), let mayStore = 1, Constraints = "@earlyclobber $Rd" in { def STREXB: AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), - NoItinerary, "strexb", "\t$Rd, $Rt, $addr", []>; + NoItinerary, "strexb", "\t$Rd, $Rt, $addr", + [(set GPR:$Rd, (strex_1 GPR:$Rt, addr_offset_none:$addr))]>; def STREXH: AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), - NoItinerary, "strexh", "\t$Rd, $Rt, $addr", []>; + NoItinerary, "strexh", "\t$Rd, $Rt, $addr", + [(set GPR:$Rd, (strex_2 GPR:$Rt, addr_offset_none:$addr))]>; def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), - NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>; + NoItinerary, "strex", "\t$Rd, $Rt, $addr", + [(set GPR:$Rd, (strex_4 GPR:$Rt, addr_offset_none:$addr))]>; let hasExtraSrcRegAllocReq = 1 in def STREXD : AIstrex<0b01, (outs GPR:$Rd), (ins GPRPairOp:$Rt, addr_offset_none:$addr), @@ -4414,11 +4447,21 @@ def STREXD : AIstrex<0b01, (outs GPR:$Rd), } -def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", []>, +def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", + [(int_arm_clrex)]>, Requires<[IsARM, HasV7]> { let Inst{31-0} = 0b11110101011111111111000000011111; } +def : ARMPat<(and (ldrex_1 addr_offset_none:$addr), 0xff), + (LDREXB addr_offset_none:$addr)>; +def : ARMPat<(and (ldrex_2 addr_offset_none:$addr), 0xffff), + (LDREXH addr_offset_none:$addr)>; +def : ARMPat<(strex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr), + (STREXB GPR:$Rt, addr_offset_none:$addr)>; +def : ARMPat<(strex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr), + (STREXH GPR:$Rt, addr_offset_none:$addr)>; + // SWP/SWPB are deprecated in V6/V7. let mayLoad = 1, mayStore = 1 in { def SWP : AIswp<0, (outs GPRnopc:$Rt), diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index ee9eaaa..8d15630 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -251,7 +251,8 @@ def t2am_imm8s4_offset : Operand { def MemImm0_1020s4OffsetAsmOperand : AsmOperandClass { let Name = "MemImm0_1020s4Offset"; } -def t2addrmode_imm0_1020s4 : Operand { +def t2addrmode_imm0_1020s4 : Operand, + ComplexPattern { let PrintMethod = "printT2AddrModeImm0_1020s4Operand"; let EncoderMethod = "getT2AddrModeImm0_1020s4OpValue"; let DecoderMethod = "DecodeT2AddrModeImm0_1020s4"; @@ -3201,13 +3202,16 @@ class T2I_strex opcod, dag oops, dag iops, AddrMode am, int sz, let mayLoad = 1 in { def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, - "ldrexb", "\t$Rt, $addr", "", []>; + "ldrexb", "\t$Rt, $addr", "", + [(set rGPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>; def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, - "ldrexh", "\t$Rt, $addr", "", []>; + "ldrexh", "\t$Rt, $addr", "", + [(set rGPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>; def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_imm0_1020s4:$addr), AddrModeNone, 4, NoItinerary, - "ldrex", "\t$Rt, $addr", "", []> { + "ldrex", "\t$Rt, $addr", "", + [(set rGPR:$Rt, (ldrex_4 t2addrmode_imm0_1020s4:$addr))]> { bits<4> Rt; bits<12> addr; let Inst{31-27} = 0b11101; @@ -3232,16 +3236,22 @@ let mayStore = 1, Constraints = "@earlyclobber $Rd" in { def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, - "strexb", "\t$Rd, $Rt, $addr", "", []>; + "strexb", "\t$Rd, $Rt, $addr", "", + [(set rGPR:$Rd, (strex_1 rGPR:$Rt, + addr_offset_none:$addr))]>; def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, - "strexh", "\t$Rd, $Rt, $addr", "", []>; + "strexh", "\t$Rd, $Rt, $addr", "", + [(set rGPR:$Rd, (strex_2 rGPR:$Rt, + addr_offset_none:$addr))]>; + def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_imm0_1020s4:$addr), AddrModeNone, 4, NoItinerary, "strex", "\t$Rd, $Rt, $addr", "", - []> { + [(set rGPR:$Rd, (strex_4 rGPR:$Rt, + t2addrmode_imm0_1020s4:$addr))]> { bits<4> Rd; bits<4> Rt; bits<12> addr; @@ -3263,7 +3273,7 @@ def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd), } } -def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", []>, +def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", [(int_arm_clrex)]>, Requires<[IsThumb2, HasV7]> { let Inst{31-16} = 0xf3bf; let Inst{15-14} = 0b10; @@ -3274,6 +3284,15 @@ def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", []>, let Inst{3-0} = 0b1111; } +def : T2Pat<(and (ldrex_1 addr_offset_none:$addr), 0xff), + (t2LDREXB addr_offset_none:$addr)>; +def : T2Pat<(and (ldrex_2 addr_offset_none:$addr), 0xffff), + (t2LDREXH addr_offset_none:$addr)>; +def : T2Pat<(strex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr), + (t2STREXB GPR:$Rt, addr_offset_none:$addr)>; +def : T2Pat<(strex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr), + (t2STREXH GPR:$Rt, addr_offset_none:$addr)>; + //===----------------------------------------------------------------------===// // SJLJ Exception handling intrinsics // eh_sjlj_setjmp() is an instruction sequence to store the return diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp index 0f792ec..1f37696 100644 --- a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp +++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp @@ -37,7 +37,7 @@ bool NVPTXAllocaHoisting::runOnFunction(Function &function) { } char NVPTXAllocaHoisting::ID = 1; -RegisterPass +static RegisterPass X("alloca-hoisting", "Hoisting alloca instructions in non-entry " "blocks to the entry block"); -- cgit v1.1 From 649c7fc4747b508d72031bce111902fe53932cca Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Tue, 16 Jul 2013 09:59:51 +0000 Subject: PPCJITInfo.cpp: Tweak r186252 with s/__ppc/__powerpc/ to work on powerpc-linux Fedora 12. g++ (GCC) 4.4.4 20100630 (Red Hat 4.4.4-10) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186396 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCJITInfo.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp index 9f4525b..1745061 100644 --- a/lib/Target/PowerPC/PPCJITInfo.cpp +++ b/lib/Target/PowerPC/PPCJITInfo.cpp @@ -71,7 +71,7 @@ static void EmitBranchToAt(uint64_t At, uint64_t To, bool isCall, bool is64Bit){ extern "C" void PPC32CompilationCallback(); extern "C" void PPC64CompilationCallback(); -#if !defined(__ppc__) || defined(__ppc64__) +#if !defined(__powerpc__) || defined(__powerpc64__) void PPC32CompilationCallback() { llvm_unreachable("This is not a 32bit PowerPC, you can't execute this!"); } @@ -202,7 +202,7 @@ asm( ); #endif -#ifndef __ppc64__ +#ifndef __powerpc64__ void PPC64CompilationCallback() { llvm_unreachable("This is not a 64bit PowerPC, you can't execute this!"); } -- cgit v1.1 From 088483627720acb58c96951b7b634f67312c7272 Mon Sep 17 00:00:00 2001 From: Vladimir Medic Date: Tue, 16 Jul 2013 10:07:14 +0000 Subject: This patch represents Mips utilization of r186388 code that alows asm matcher to emit mnemonics contain '.' characters. This makes asm parser code simpler and more efficient. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186397 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 137 +------------ lib/Target/Mips/MipsCondMov.td | 70 +++---- lib/Target/Mips/MipsInstrFPU.td | 300 ++++++++++++++++++---------- lib/Target/Mips/MipsInstrFormats.td | 5 + 4 files changed, 242 insertions(+), 270 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 4d805a7..a4f3721 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -78,15 +78,13 @@ class MipsAsmParser : public MCTargetAsmParser { SMLoc NameLoc, SmallVectorImpl &Operands); - bool parseMathOperation(StringRef Name, SMLoc NameLoc, - SmallVectorImpl &Operands); - bool ParseDirective(AsmToken DirectiveID); MipsAsmParser::OperandMatchResultTy parseRegs(SmallVectorImpl &Operands, int RegKind); - MipsAsmParser::OperandMatchResultTy + + MipsAsmParser::OperandMatchResultTy parseMemOperand(SmallVectorImpl &Operands); MipsAsmParser::OperandMatchResultTy @@ -1274,6 +1272,7 @@ MipsAsmParser::parseRegs(SmallVectorImpl &Operands, } return MatchOperand_NoMatch; } + MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseCPU64Regs(SmallVectorImpl &Operands) { @@ -1335,9 +1334,9 @@ bool MipsAsmParser::searchSymbolAlias( APInt IntVal(32, -1); if (!DefSymbol.substr(1).getAsInteger(10, IntVal)) RegNum = matchRegisterByNumber(IntVal.getZExtValue(), - isMips64() - ? Mips::CPU64RegsRegClassID - : Mips::CPURegsRegClassID); + isMips64() + ? Mips::CPU64RegsRegClassID + : Mips::CPURegsRegClassID); else { // Lookup for the register with the corresponding name. switch (Kind) { @@ -1368,7 +1367,7 @@ bool MipsAsmParser::searchSymbolAlias( Parser.Lex(); const MCConstantExpr *Const = static_cast(Expr); MipsOperand *op = MipsOperand::CreateImm(Const, S, - Parser.getTok().getLoc()); + Parser.getTok().getLoc()); Operands.push_back(op); return true; } @@ -1492,130 +1491,17 @@ MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) { return VK; } -// Converts condition string to immediate operand value. -static int ConvertCcString(StringRef CondString) { - int CC = StringSwitch(CondString) - .Case(".f", 0) - .Case(".un", 1) - .Case(".eq", 2) - .Case(".ueq", 3) - .Case(".olt", 4) - .Case(".ult", 5) - .Case(".ole", 6) - .Case(".ule", 7) - .Case(".sf", 8) - .Case(".ngle", 9) - .Case(".seq", 10) - .Case(".ngl", 11) - .Case(".lt", 12) - .Case(".nge", 13) - .Case(".le", 14) - .Case(".ngt", 15) - .Default(-1); - - return CC; -} - -bool MipsAsmParser:: -parseMathOperation(StringRef Name, SMLoc NameLoc, - SmallVectorImpl &Operands) { - // Split the format. - size_t Start = Name.find('.'), Next = Name.rfind('.'); - StringRef Format1 = Name.slice(Start, Next); - // Add the first format to the operands. - Operands.push_back(MipsOperand::CreateToken(Format1, NameLoc)); - // Now for the second format. - StringRef Format2 = Name.slice(Next, StringRef::npos); - Operands.push_back(MipsOperand::CreateToken(Format2, NameLoc)); - - // Set the format for the first register. - setFpFormat(Format1); - - // Read the remaining operands. - if (getLexer().isNot(AsmToken::EndOfStatement)) { - // Read the first operand. - if (ParseOperand(Operands, Name)) { - SMLoc Loc = getLexer().getLoc(); - Parser.eatToEndOfStatement(); - return Error(Loc, "unexpected token in argument list"); - } - - if (getLexer().isNot(AsmToken::Comma)) { - SMLoc Loc = getLexer().getLoc(); - Parser.eatToEndOfStatement(); - return Error(Loc, "unexpected token in argument list"); - } - Parser.Lex(); // Eat the comma. - - // Set the format for the first register - setFpFormat(Format2); - - // Parse and remember the operand. - if (ParseOperand(Operands, Name)) { - SMLoc Loc = getLexer().getLoc(); - Parser.eatToEndOfStatement(); - return Error(Loc, "unexpected token in argument list"); - } - } - - if (getLexer().isNot(AsmToken::EndOfStatement)) { - SMLoc Loc = getLexer().getLoc(); - Parser.eatToEndOfStatement(); - return Error(Loc, "unexpected token in argument list"); - } - - Parser.Lex(); // Consume the EndOfStatement. - return false; -} bool MipsAsmParser:: ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, SmallVectorImpl &Operands) { - StringRef Mnemonic; - - setDefaultFpFormat(); - // Create the leading tokens for the mnemonic, split by '.' characters. - size_t Start = 0, Next = Name.find('.'); - Mnemonic = Name.slice(Start, Next); - - Operands.push_back(MipsOperand::CreateToken(Mnemonic, NameLoc)); - - if (Next != StringRef::npos) { - // There is a format token in mnemonic. - size_t Dot = Name.find('.', Next + 1); - StringRef Format = Name.slice(Next, Dot); - if (Dot == StringRef::npos) // Only one '.' in a string, it's a format. - Operands.push_back(MipsOperand::CreateToken(Format, NameLoc)); - else { - if (Name.startswith("c.")) { - // Floating point compare, add '.' and immediate represent for cc. - Operands.push_back(MipsOperand::CreateToken(".", NameLoc)); - int Cc = ConvertCcString(Format); - if (Cc == -1) { - return Error(NameLoc, "Invalid conditional code"); - } - SMLoc E = SMLoc::getFromPointer( - Parser.getTok().getLoc().getPointer() - 1); - Operands.push_back( - MipsOperand::CreateImm(MCConstantExpr::Create(Cc, getContext()), - NameLoc, E)); - } else { - // trunc, ceil, floor ... - return parseMathOperation(Name, NameLoc, Operands); - } - - // The rest is a format. - Format = Name.slice(Dot, StringRef::npos); - Operands.push_back(MipsOperand::CreateToken(Format, NameLoc)); - } - - setFpFormat(Format); - } + // First operand in MCInst is instruction mnemonic. + Operands.push_back(MipsOperand::CreateToken(Name, NameLoc)); // Read the remaining operands. if (getLexer().isNot(AsmToken::EndOfStatement)) { // Read the first operand. - if (ParseOperand(Operands, Mnemonic)) { + if (ParseOperand(Operands, Name)) { SMLoc Loc = getLexer().getLoc(); Parser.eatToEndOfStatement(); return Error(Loc, "unexpected token in argument list"); @@ -1623,7 +1509,6 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, while (getLexer().is(AsmToken::Comma)) { Parser.Lex(); // Eat the comma. - // Parse and remember the operand. if (ParseOperand(Operands, Name)) { SMLoc Loc = getLexer().getLoc(); @@ -1632,13 +1517,11 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, } } } - if (getLexer().isNot(AsmToken::EndOfStatement)) { SMLoc Loc = getLexer().getLoc(); Parser.eatToEndOfStatement(); return Error(Loc, "unexpected token in argument list"); } - Parser.Lex(); // Consume the EndOfStatement. return false; } diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td index 42e4c99..cb0112a 100644 --- a/lib/Target/Mips/MipsCondMov.td +++ b/lib/Target/Mips/MipsCondMov.td @@ -16,7 +16,7 @@ // MipsISelLowering::EmitInstrWithCustomInserter if target does not have // conditional move instructions. // cond:int, data:int -class CMov_I_I_FT : InstSE<(outs DRC:$rd), (ins DRC:$rs, CRC:$rt, DRC:$F), !strconcat(opstr, "\t$rd, $rs, $rt"), [], Itin, FrmFR> { @@ -24,7 +24,7 @@ class CMov_I_I_FT : InstSE<(outs DRC:$fd), (ins DRC:$fs, CRC:$rt, DRC:$F), !strconcat(opstr, "\t$fd, $fs, $rt"), [], Itin, FrmFR> { @@ -32,7 +32,7 @@ class CMov_I_F_FT : InstSE<(outs RC:$rd), (ins RC:$rs, RC:$F), !strconcat(opstr, "\t$rd, $rs, $$fcc0"), @@ -103,82 +103,84 @@ multiclass MovnPats, +def MOVZ_I_I : CMov_I_I_FT<"movz", CPURegsOpnd, CPURegsOpnd, NoItinerary>, ADD_FM<0, 0xa>; let Predicates = [HasStdEnc], DecoderNamespace = "Mips64" in { - def MOVZ_I_I64 : CMov_I_I_FT<"movz", CPURegs, CPU64Regs, NoItinerary>, - ADD_FM<0, 0xa>; - def MOVZ_I64_I : CMov_I_I_FT<"movz", CPU64Regs, CPURegs, NoItinerary>, - ADD_FM<0, 0xa> { + def MOVZ_I_I64 : CMov_I_I_FT<"movz", CPURegsOpnd, CPU64RegsOpnd, + NoItinerary>, ADD_FM<0, 0xa>; + def MOVZ_I64_I : CMov_I_I_FT<"movz", CPU64RegsOpnd, CPURegsOpnd, + NoItinerary>, ADD_FM<0, 0xa> { let isCodeGenOnly = 1; } - def MOVZ_I64_I64 : CMov_I_I_FT<"movz", CPU64Regs, CPU64Regs, NoItinerary>, - ADD_FM<0, 0xa> { + def MOVZ_I64_I64 : CMov_I_I_FT<"movz", CPU64RegsOpnd, CPU64RegsOpnd, + NoItinerary>, ADD_FM<0, 0xa> { let isCodeGenOnly = 1; } } -def MOVN_I_I : CMov_I_I_FT<"movn", CPURegs, CPURegs, NoItinerary>, - ADD_FM<0, 0xb>; +def MOVN_I_I : CMov_I_I_FT<"movn", CPURegsOpnd, CPURegsOpnd, + NoItinerary>, ADD_FM<0, 0xb>; let Predicates = [HasStdEnc], DecoderNamespace = "Mips64" in { - def MOVN_I_I64 : CMov_I_I_FT<"movn", CPURegs, CPU64Regs, NoItinerary>, - ADD_FM<0, 0xb>; - def MOVN_I64_I : CMov_I_I_FT<"movn", CPU64Regs, CPURegs, NoItinerary>, - ADD_FM<0, 0xb> { + def MOVN_I_I64 : CMov_I_I_FT<"movn", CPURegsOpnd, CPU64RegsOpnd, + NoItinerary>, ADD_FM<0, 0xb>; + def MOVN_I64_I : CMov_I_I_FT<"movn", CPU64RegsOpnd, CPURegsOpnd, + NoItinerary>, ADD_FM<0, 0xb> { let isCodeGenOnly = 1; } - def MOVN_I64_I64 : CMov_I_I_FT<"movn", CPU64Regs, CPU64Regs, NoItinerary>, - ADD_FM<0, 0xb> { + def MOVN_I64_I64 : CMov_I_I_FT<"movn", CPU64RegsOpnd, CPU64RegsOpnd, + NoItinerary>, ADD_FM<0, 0xb> { let isCodeGenOnly = 1; } } -def MOVZ_I_S : CMov_I_F_FT<"movz.s", CPURegs, FGR32, IIFmove>, +def MOVZ_I_S : CMov_I_F_FT<"movz.s", CPURegsOpnd, FGR32RegsOpnd, IIFmove>, CMov_I_F_FM<18, 16>; -def MOVZ_I64_S : CMov_I_F_FT<"movz.s", CPU64Regs, FGR32, IIFmove>, +def MOVZ_I64_S : CMov_I_F_FT<"movz.s", CPU64RegsOpnd, FGR32RegsOpnd, IIFmove>, CMov_I_F_FM<18, 16>, Requires<[HasMips64, HasStdEnc]> { let DecoderNamespace = "Mips64"; } -def MOVN_I_S : CMov_I_F_FT<"movn.s", CPURegs, FGR32, IIFmove>, +def MOVN_I_S : CMov_I_F_FT<"movn.s", CPURegsOpnd, FGR32RegsOpnd, IIFmove>, CMov_I_F_FM<19, 16>; -def MOVN_I64_S : CMov_I_F_FT<"movn.s", CPU64Regs, FGR32, IIFmove>, +def MOVN_I64_S : CMov_I_F_FT<"movn.s", CPU64RegsOpnd, FGR32RegsOpnd, IIFmove>, CMov_I_F_FM<19, 16>, Requires<[HasMips64, HasStdEnc]> { let DecoderNamespace = "Mips64"; } let Predicates = [NotFP64bit, HasStdEnc] in { - def MOVZ_I_D32 : CMov_I_F_FT<"movz.d", CPURegs, AFGR64, IIFmove>, + def MOVZ_I_D32 : CMov_I_F_FT<"movz.d", CPURegsOpnd, AFGR64RegsOpnd, IIFmove>, CMov_I_F_FM<18, 17>; - def MOVN_I_D32 : CMov_I_F_FT<"movn.d", CPURegs, AFGR64, IIFmove>, + def MOVN_I_D32 : CMov_I_F_FT<"movn.d", CPURegsOpnd, AFGR64RegsOpnd, IIFmove>, CMov_I_F_FM<19, 17>; } let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in { - def MOVZ_I_D64 : CMov_I_F_FT<"movz.d", CPURegs, FGR64, IIFmove>, + def MOVZ_I_D64 : CMov_I_F_FT<"movz.d", CPURegsOpnd, FGR64RegsOpnd, IIFmove>, CMov_I_F_FM<18, 17>; - def MOVZ_I64_D64 : CMov_I_F_FT<"movz.d", CPU64Regs, FGR64, IIFmove>, - CMov_I_F_FM<18, 17> { + def MOVZ_I64_D64 : CMov_I_F_FT<"movz.d", CPU64RegsOpnd, FGR64RegsOpnd, + IIFmove>, CMov_I_F_FM<18, 17> { let isCodeGenOnly = 1; } - def MOVN_I_D64 : CMov_I_F_FT<"movn.d", CPURegs, FGR64, IIFmove>, + def MOVN_I_D64 : CMov_I_F_FT<"movn.d", CPURegsOpnd, FGR64RegsOpnd, IIFmove>, CMov_I_F_FM<19, 17>; - def MOVN_I64_D64 : CMov_I_F_FT<"movn.d", CPU64Regs, FGR64, IIFmove>, - CMov_I_F_FM<19, 17> { + def MOVN_I64_D64 : CMov_I_F_FT<"movn.d", CPU64RegsOpnd, FGR64RegsOpnd, + IIFmove>, CMov_I_F_FM<19, 17> { let isCodeGenOnly = 1; } } -def MOVT_I : CMov_F_I_FT<"movt", CPURegs, IIAlu, MipsCMovFP_T>, CMov_F_I_FM<1>; -def MOVT_I64 : CMov_F_I_FT<"movt", CPU64Regs, IIAlu, MipsCMovFP_T>, +def MOVT_I : CMov_F_I_FT<"movt", CPURegsOpnd, IIAlu, MipsCMovFP_T>, + CMov_F_I_FM<1>; +def MOVT_I64 : CMov_F_I_FT<"movt", CPU64RegsOpnd, IIAlu, MipsCMovFP_T>, CMov_F_I_FM<1>, Requires<[HasMips64, HasStdEnc]> { let DecoderNamespace = "Mips64"; } -def MOVF_I : CMov_F_I_FT<"movf", CPURegs, IIAlu, MipsCMovFP_F>, CMov_F_I_FM<0>; -def MOVF_I64 : CMov_F_I_FT<"movf", CPU64Regs, IIAlu, MipsCMovFP_F>, +def MOVF_I : CMov_F_I_FT<"movf", CPURegsOpnd, IIAlu, MipsCMovFP_F>, + CMov_F_I_FM<0>; +def MOVF_I64 : CMov_F_I_FT<"movf", CPU64RegsOpnd, IIAlu, MipsCMovFP_F>, CMov_F_I_FM<0>, Requires<[HasMips64, HasStdEnc]> { let DecoderNamespace = "Mips64"; } diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index c2acec1..ec4c429 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -88,7 +88,7 @@ def fpimm0neg : PatLeaf<(fpimm), [{ // Only S32 and D32 are supported right now. //===----------------------------------------------------------------------===// -class ADDS_FT : InstSE<(outs RC:$fd), (ins RC:$fs, RC:$ft), !strconcat(opstr, "\t$fd, $fs, $ft"), @@ -98,15 +98,15 @@ class ADDS_FT { - def _D32 : ADDS_FT, + def _D32 : ADDS_FT, Requires<[NotFP64bit, HasStdEnc]>; - def _D64 : ADDS_FT, + def _D64 : ADDS_FT, Requires<[IsFP64bit, HasStdEnc]> { string DecoderNamespace = "Mips64"; } } -class ABSS_FT : InstSE<(outs DstRC:$fd), (ins SrcRC:$fs), !strconcat(opstr, "\t$fd, $fs"), [(set DstRC:$fd, (OpNode SrcRC:$fs))], Itin, FrmFR>, @@ -114,39 +114,39 @@ class ABSS_FT { - def _D32 : ABSS_FT, + def _D32 : ABSS_FT, Requires<[NotFP64bit, HasStdEnc]>; - def _D64 : ABSS_FT, + def _D64 : ABSS_FT, Requires<[IsFP64bit, HasStdEnc]> { string DecoderNamespace = "Mips64"; } } multiclass ROUND_M { - def _D32 : ABSS_FT, + def _D32 : ABSS_FT, Requires<[NotFP64bit, HasStdEnc]>; - def _D64 : ABSS_FT, + def _D64 : ABSS_FT, Requires<[IsFP64bit, HasStdEnc]> { let DecoderNamespace = "Mips64"; } } -class MFC1_FT : InstSE<(outs DstRC:$rt), (ins SrcRC:$fs), !strconcat(opstr, "\t$rt, $fs"), [(set DstRC:$rt, (OpNode SrcRC:$fs))], Itin, FrmFR>; -class MTC1_FT : InstSE<(outs DstRC:$fs), (ins SrcRC:$rt), !strconcat(opstr, "\t$rt, $fs"), [(set DstRC:$fs, (OpNode SrcRC:$rt))], Itin, FrmFR>; -class MFC1_FT_CCR : InstSE<(outs DstRC:$rt), (ins SrcRC:$fs), !strconcat(opstr, "\t$rt, $fs"), [(set DstRC:$rt, (OpNode SrcRC:$fs))], Itin, FrmFR>; -class MTC1_FT_CCR : InstSE<(outs DstRC:$fs), (ins SrcRC:$rt), !strconcat(opstr, "\t$rt, $fs"), [(set DstRC:$fs, (OpNode SrcRC:$rt))], Itin, FrmFR>; @@ -167,13 +167,13 @@ class SW_FT : InstSE<(outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft), !strconcat(opstr, "\t$fd, $fr, $fs, $ft"), [(set RC:$fd, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr))], Itin, FrmFR>; -class NMADDS_FT : InstSE<(outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft), !strconcat(opstr, "\t$fd, $fr, $fs, $ft"), @@ -213,16 +213,52 @@ class CEQS_FT { let Defs = [FCR31]; -} + let isCodeGenOnly = 1; +} + +class C_COND_FT : + InstSE<(outs), (ins RC:$fs, RC:$ft), + !strconcat("c.", CondStr, ".", Typestr, "\t$fs, $ft"), [], IIFcmp, + FrmFR>; + +multiclass C_COND_M fmt> { + def C_F_#NAME : C_COND_FT<"f", TypeStr, RC>, C_COND_FM; + def C_UN_#NAME : C_COND_FT<"un", TypeStr, RC>, C_COND_FM; + def C_EQ_#NAME : C_COND_FT<"eq", TypeStr, RC>, C_COND_FM; + def C_UEQ_#NAME : C_COND_FT<"ueq", TypeStr, RC>, C_COND_FM; + def C_OLT_#NAME : C_COND_FT<"olt", TypeStr, RC>, C_COND_FM; + def C_ULT_#NAME : C_COND_FT<"ult", TypeStr, RC>, C_COND_FM; + def C_OLE_#NAME : C_COND_FT<"ole", TypeStr, RC>, C_COND_FM; + def C_ULE_#NAME : C_COND_FT<"ule", TypeStr, RC>, C_COND_FM; + def C_SF_#NAME : C_COND_FT<"sf", TypeStr, RC>, C_COND_FM; + def C_NGLE_#NAME : C_COND_FT<"ngle", TypeStr, RC>, C_COND_FM; + def C_SEQ_#NAME : C_COND_FT<"seq", TypeStr, RC>, C_COND_FM; + def C_NGL_#NAME : C_COND_FT<"ngl", TypeStr, RC>, C_COND_FM; + def C_LT_#NAME : C_COND_FT<"lt", TypeStr, RC>, C_COND_FM; + def C_NGE_#NAME : C_COND_FT<"nge", TypeStr, RC>, C_COND_FM; + def C_LE_#NAME : C_COND_FT<"le", TypeStr, RC>, C_COND_FM; + def C_NGT_#NAME : C_COND_FT<"ngt", TypeStr, RC>, C_COND_FM; +} + +defm S : C_COND_M<"s", FGR32RegsOpnd, 16>; +defm D32 : C_COND_M<"d", AFGR64RegsOpnd, 17>, + Requires<[NotFP64bit, HasStdEnc]>; +let DecoderNamespace = "Mips64" in +defm D64 : C_COND_M<"d", FGR64RegsOpnd, 17>, Requires<[IsFP64bit, HasStdEnc]>; //===----------------------------------------------------------------------===// // Floating Point Instructions //===----------------------------------------------------------------------===// -def ROUND_W_S : ABSS_FT<"round.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xc, 16>; -def TRUNC_W_S : ABSS_FT<"trunc.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xd, 16>; -def CEIL_W_S : ABSS_FT<"ceil.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xe, 16>; -def FLOOR_W_S : ABSS_FT<"floor.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xf, 16>; -def CVT_W_S : ABSS_FT<"cvt.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0x24, 16>; +def ROUND_W_S : ABSS_FT<"round.w.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt>, + ABSS_FM<0xc, 16>; +def TRUNC_W_S : ABSS_FT<"trunc.w.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt>, + ABSS_FM<0xd, 16>; +def CEIL_W_S : ABSS_FT<"ceil.w.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt>, + ABSS_FM<0xe, 16>; +def FLOOR_W_S : ABSS_FT<"floor.w.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt>, + ABSS_FM<0xf, 16>; +def CVT_W_S : ABSS_FT<"cvt.w.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt>, + ABSS_FM<0x24, 16>; defm ROUND_W : ROUND_M<"round.w.d", IIFcvt>, ABSS_FM<0xc, 17>; defm TRUNC_W : ROUND_M<"trunc.w.d", IIFcvt>, ABSS_FM<0xd, 17>; @@ -231,54 +267,72 @@ defm FLOOR_W : ROUND_M<"floor.w.d", IIFcvt>, ABSS_FM<0xf, 17>; defm CVT_W : ROUND_M<"cvt.w.d", IIFcvt>, ABSS_FM<0x24, 17>; let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in { - def ROUND_L_S : ABSS_FT<"round.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x8, 16>; - def ROUND_L_D64 : ABSS_FT<"round.l.d", FGR64, FGR64, IIFcvt>, + def ROUND_L_S : ABSS_FT<"round.l.s", FGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>, + ABSS_FM<0x8, 16>; + def ROUND_L_D64 : ABSS_FT<"round.l.d", FGR64RegsOpnd, FGR64RegsOpnd, IIFcvt>, ABSS_FM<0x8, 17>; - def TRUNC_L_S : ABSS_FT<"trunc.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x9, 16>; - def TRUNC_L_D64 : ABSS_FT<"trunc.l.d", FGR64, FGR64, IIFcvt>, + def TRUNC_L_S : ABSS_FT<"trunc.l.s", FGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>, + ABSS_FM<0x9, 16>; + def TRUNC_L_D64 : ABSS_FT<"trunc.l.d", FGR64RegsOpnd, FGR64RegsOpnd, IIFcvt>, ABSS_FM<0x9, 17>; - def CEIL_L_S : ABSS_FT<"ceil.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0xa, 16>; - def CEIL_L_D64 : ABSS_FT<"ceil.l.d", FGR64, FGR64, IIFcvt>, ABSS_FM<0xa, 17>; - def FLOOR_L_S : ABSS_FT<"floor.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0xb, 16>; - def FLOOR_L_D64 : ABSS_FT<"floor.l.d", FGR64, FGR64, IIFcvt>, + def CEIL_L_S : ABSS_FT<"ceil.l.s", FGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>, + ABSS_FM<0xa, 16>; + def CEIL_L_D64 : ABSS_FT<"ceil.l.d", FGR64RegsOpnd, FGR64RegsOpnd, IIFcvt>, + ABSS_FM<0xa, 17>; + def FLOOR_L_S : ABSS_FT<"floor.l.s", FGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>, + ABSS_FM<0xb, 16>; + def FLOOR_L_D64 : ABSS_FT<"floor.l.d", FGR64RegsOpnd, FGR64RegsOpnd, IIFcvt>, ABSS_FM<0xb, 17>; } -def CVT_S_W : ABSS_FT<"cvt.s.w", FGR32, FGR32, IIFcvt>, ABSS_FM<0x20, 20>; -def CVT_L_S : ABSS_FT<"cvt.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x25, 16>; -def CVT_L_D64: ABSS_FT<"cvt.l.d", FGR64, FGR64, IIFcvt>, ABSS_FM<0x25, 17>; +def CVT_S_W : ABSS_FT<"cvt.s.w", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt>, + ABSS_FM<0x20, 20>; +def CVT_L_S : ABSS_FT<"cvt.l.s", FGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>, + ABSS_FM<0x25, 16>; +def CVT_L_D64: ABSS_FT<"cvt.l.d", FGR64RegsOpnd, FGR64RegsOpnd, IIFcvt>, + ABSS_FM<0x25, 17>; let Predicates = [NotFP64bit, HasStdEnc] in { - def CVT_S_D32 : ABSS_FT<"cvt.s.d", FGR32, AFGR64, IIFcvt>, ABSS_FM<0x20, 17>; - def CVT_D32_W : ABSS_FT<"cvt.d.w", AFGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 20>; - def CVT_D32_S : ABSS_FT<"cvt.d.s", AFGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 16>; + def CVT_S_D32 : ABSS_FT<"cvt.s.d", FGR32RegsOpnd, AFGR64RegsOpnd, IIFcvt>, + ABSS_FM<0x20, 17>; + def CVT_D32_W : ABSS_FT<"cvt.d.w", AFGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>, + ABSS_FM<0x21, 20>; + def CVT_D32_S : ABSS_FT<"cvt.d.s", AFGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>, + ABSS_FM<0x21, 16>; } let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in { - def CVT_S_D64 : ABSS_FT<"cvt.s.d", FGR32, FGR64, IIFcvt>, ABSS_FM<0x20, 17>; - def CVT_S_L : ABSS_FT<"cvt.s.l", FGR32, FGR64, IIFcvt>, ABSS_FM<0x20, 21>; - def CVT_D64_W : ABSS_FT<"cvt.d.w", FGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 20>; - def CVT_D64_S : ABSS_FT<"cvt.d.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 16>; - def CVT_D64_L : ABSS_FT<"cvt.d.l", FGR64, FGR64, IIFcvt>, ABSS_FM<0x21, 21>; + def CVT_S_D64 : ABSS_FT<"cvt.s.d", FGR32RegsOpnd, FGR64RegsOpnd, IIFcvt>, + ABSS_FM<0x20, 17>; + def CVT_S_L : ABSS_FT<"cvt.s.l", FGR32RegsOpnd, FGR64RegsOpnd, IIFcvt>, + ABSS_FM<0x20, 21>; + def CVT_D64_W : ABSS_FT<"cvt.d.w", FGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>, + ABSS_FM<0x21, 20>; + def CVT_D64_S : ABSS_FT<"cvt.d.s", FGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>, + ABSS_FM<0x21, 16>; + def CVT_D64_L : ABSS_FT<"cvt.d.l", FGR64RegsOpnd, FGR64RegsOpnd, IIFcvt>, + ABSS_FM<0x21, 21>; } let isPseudo = 1, isCodeGenOnly = 1 in { - def PseudoCVT_S_W : ABSS_FT<"", FGR32, CPURegs, IIFcvt>; - def PseudoCVT_D32_W : ABSS_FT<"", AFGR64, CPURegs, IIFcvt>; - def PseudoCVT_S_L : ABSS_FT<"", FGR64, CPU64Regs, IIFcvt>; - def PseudoCVT_D64_W : ABSS_FT<"", FGR64, CPURegs, IIFcvt>; - def PseudoCVT_D64_L : ABSS_FT<"", FGR64, CPU64Regs, IIFcvt>; + def PseudoCVT_S_W : ABSS_FT<"", FGR32RegsOpnd, CPURegsOpnd, IIFcvt>; + def PseudoCVT_D32_W : ABSS_FT<"", AFGR64RegsOpnd, CPURegsOpnd, IIFcvt>; + def PseudoCVT_S_L : ABSS_FT<"", FGR64RegsOpnd, CPU64RegsOpnd, IIFcvt>; + def PseudoCVT_D64_W : ABSS_FT<"", FGR64RegsOpnd, CPURegsOpnd, IIFcvt>; + def PseudoCVT_D64_L : ABSS_FT<"", FGR64RegsOpnd, CPU64RegsOpnd, IIFcvt>; } let Predicates = [NoNaNsFPMath, HasStdEnc] in { - def FABS_S : ABSS_FT<"abs.s", FGR32, FGR32, IIFcvt, fabs>, ABSS_FM<0x5, 16>; - def FNEG_S : ABSS_FT<"neg.s", FGR32, FGR32, IIFcvt, fneg>, ABSS_FM<0x7, 16>; + def FABS_S : ABSS_FT<"abs.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt, fabs>, + ABSS_FM<0x5, 16>; + def FNEG_S : ABSS_FT<"neg.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt, fneg>, + ABSS_FM<0x7, 16>; defm FABS : ABSS_M<"abs.d", IIFcvt, fabs>, ABSS_FM<0x5, 17>; defm FNEG : ABSS_M<"neg.d", IIFcvt, fneg>, ABSS_FM<0x7, 17>; } -def FSQRT_S : ABSS_FT<"sqrt.s", FGR32, FGR32, IIFsqrtSingle, fsqrt>, - ABSS_FM<0x4, 16>; +def FSQRT_S : ABSS_FT<"sqrt.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFsqrtSingle, + fsqrt>, ABSS_FM<0x4, 16>; defm FSQRT : ABSS_M<"sqrt.d", IIFsqrtDouble, fsqrt>, ABSS_FM<0x4, 17>; // The odd-numbered registers are only referenced when doing loads, @@ -287,26 +341,30 @@ defm FSQRT : ABSS_M<"sqrt.d", IIFsqrtDouble, fsqrt>, ABSS_FM<0x4, 17>; // regardless of register aliasing. /// Move Control Registers From/To CPU Registers -def CFC1 : MFC1_FT_CCR<"cfc1", CPURegs, CCROpnd, IIFmove>, MFC1_FM<2>; -def CTC1 : MTC1_FT_CCR<"ctc1", CCROpnd, CPURegs, IIFmove>, MFC1_FM<6>; -def MFC1 : MFC1_FT<"mfc1", CPURegs, FGR32, IIFmoveC1, bitconvert>, MFC1_FM<0>; -def MTC1 : MTC1_FT<"mtc1", FGR32, CPURegs, IIFmoveC1, bitconvert>, MFC1_FM<4>; -def DMFC1 : MFC1_FT<"dmfc1", CPU64Regs, FGR64, IIFmoveC1, bitconvert>, - MFC1_FM<1>; -def DMTC1 : MTC1_FT<"dmtc1", FGR64, CPU64Regs, IIFmoveC1, bitconvert>, - MFC1_FM<5>; - -def FMOV_S : ABSS_FT<"mov.s", FGR32, FGR32, IIFmove>, ABSS_FM<0x6, 16>; -def FMOV_D32 : ABSS_FT<"mov.d", AFGR64, AFGR64, IIFmove>, ABSS_FM<0x6, 17>, - Requires<[NotFP64bit, HasStdEnc]>; -def FMOV_D64 : ABSS_FT<"mov.d", FGR64, FGR64, IIFmove>, ABSS_FM<0x6, 17>, - Requires<[IsFP64bit, HasStdEnc]> { - let DecoderNamespace = "Mips64"; +def CFC1 : MFC1_FT_CCR<"cfc1", CPURegsOpnd, CCROpnd, IIFmove>, MFC1_FM<2>; +def CTC1 : MTC1_FT_CCR<"ctc1", CCROpnd, CPURegsOpnd, IIFmove>, MFC1_FM<6>; +def MFC1 : MFC1_FT<"mfc1", CPURegsOpnd, FGR32RegsOpnd, IIFmoveC1, bitconvert>, + MFC1_FM<0>; +def MTC1 : MTC1_FT<"mtc1", FGR32RegsOpnd, CPURegsOpnd, IIFmoveC1, bitconvert>, + MFC1_FM<4>; +def DMFC1 : MFC1_FT<"dmfc1", CPU64RegsOpnd, FGR64RegsOpnd, IIFmoveC1, + bitconvert>, MFC1_FM<1>; +def DMTC1 : MTC1_FT<"dmtc1", FGR64RegsOpnd, CPU64RegsOpnd, IIFmoveC1, + bitconvert>, MFC1_FM<5>; + +def FMOV_S : ABSS_FT<"mov.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFmove>, + ABSS_FM<0x6, 16>; +def FMOV_D32 : ABSS_FT<"mov.d", AFGR64RegsOpnd, AFGR64RegsOpnd, IIFmove>, + ABSS_FM<0x6, 17>, Requires<[NotFP64bit, HasStdEnc]>; +def FMOV_D64 : ABSS_FT<"mov.d", FGR64RegsOpnd, FGR64RegsOpnd, IIFmove>, + ABSS_FM<0x6, 17>, Requires<[IsFP64bit, HasStdEnc]> { + let DecoderNamespace = "Mips64"; } /// Floating Point Memory Instructions let Predicates = [IsN64, HasStdEnc], DecoderNamespace = "Mips64" in { - def LWC1_P8 : LW_FT<"lwc1", FGR32RegsOpnd, IIFLoad, mem64, load>, LW_FM<0x31>; + def LWC1_P8 : LW_FT<"lwc1", FGR32RegsOpnd, IIFLoad, mem64, load>, + LW_FM<0x31>; def SWC1_P8 : SW_FT<"swc1", FGR32RegsOpnd, IIFStore, mem64, store>, LW_FM<0x39>; def LDC164_P8 : LW_FT<"ldc1", FGR64RegsOpnd, IIFLoad, mem64, load>, @@ -390,47 +448,59 @@ let Predicates = [HasMips64, HasStdEnc], } /// Floating-point Aritmetic -def FADD_S : ADDS_FT<"add.s", FGR32, IIFadd, 1, fadd>, ADDS_FM<0x00, 16>; -defm FADD : ADDS_M<"add.d", IIFadd, 1, fadd>, ADDS_FM<0x00, 17>; -def FDIV_S : ADDS_FT<"div.s", FGR32, IIFdivSingle, 0, fdiv>, ADDS_FM<0x03, 16>; -defm FDIV : ADDS_M<"div.d", IIFdivDouble, 0, fdiv>, ADDS_FM<0x03, 17>; -def FMUL_S : ADDS_FT<"mul.s", FGR32, IIFmulSingle, 1, fmul>, ADDS_FM<0x02, 16>; -defm FMUL : ADDS_M<"mul.d", IIFmulDouble, 1, fmul>, ADDS_FM<0x02, 17>; -def FSUB_S : ADDS_FT<"sub.s", FGR32, IIFadd, 0, fsub>, ADDS_FM<0x01, 16>; -defm FSUB : ADDS_M<"sub.d", IIFadd, 0, fsub>, ADDS_FM<0x01, 17>; +def FADD_S : ADDS_FT<"add.s", FGR32RegsOpnd, IIFadd, 1, fadd>, + ADDS_FM<0x00, 16>; +defm FADD : ADDS_M<"add.d", IIFadd, 1, fadd>, ADDS_FM<0x00, 17>; +def FDIV_S : ADDS_FT<"div.s", FGR32RegsOpnd, IIFdivSingle, 0, fdiv>, + ADDS_FM<0x03, 16>; +defm FDIV : ADDS_M<"div.d", IIFdivDouble, 0, fdiv>, ADDS_FM<0x03, 17>; +def FMUL_S : ADDS_FT<"mul.s", FGR32RegsOpnd, IIFmulSingle, 1, fmul>, + ADDS_FM<0x02, 16>; +defm FMUL : ADDS_M<"mul.d", IIFmulDouble, 1, fmul>, ADDS_FM<0x02, 17>; +def FSUB_S : ADDS_FT<"sub.s", FGR32RegsOpnd, IIFadd, 0, fsub>, + ADDS_FM<0x01, 16>; +defm FSUB : ADDS_M<"sub.d", IIFadd, 0, fsub>, ADDS_FM<0x01, 17>; let Predicates = [HasMips32r2, HasStdEnc] in { - def MADD_S : MADDS_FT<"madd.s", FGR32, IIFmulSingle, fadd>, MADDS_FM<4, 0>; - def MSUB_S : MADDS_FT<"msub.s", FGR32, IIFmulSingle, fsub>, MADDS_FM<5, 0>; + def MADD_S : MADDS_FT<"madd.s", FGR32RegsOpnd, IIFmulSingle, fadd>, + MADDS_FM<4, 0>; + def MSUB_S : MADDS_FT<"msub.s", FGR32RegsOpnd, IIFmulSingle, fsub>, + MADDS_FM<5, 0>; } let Predicates = [HasMips32r2, NoNaNsFPMath, HasStdEnc] in { - def NMADD_S : NMADDS_FT<"nmadd.s", FGR32, IIFmulSingle, fadd>, MADDS_FM<6, 0>; - def NMSUB_S : NMADDS_FT<"nmsub.s", FGR32, IIFmulSingle, fsub>, MADDS_FM<7, 0>; + def NMADD_S : NMADDS_FT<"nmadd.s", FGR32RegsOpnd, IIFmulSingle, fadd>, + MADDS_FM<6, 0>; + def NMSUB_S : NMADDS_FT<"nmsub.s", FGR32RegsOpnd, IIFmulSingle, fsub>, + MADDS_FM<7, 0>; } let Predicates = [HasMips32r2, NotFP64bit, HasStdEnc] in { - def MADD_D32 : MADDS_FT<"madd.d", AFGR64, IIFmulDouble, fadd>, MADDS_FM<4, 1>; - def MSUB_D32 : MADDS_FT<"msub.d", AFGR64, IIFmulDouble, fsub>, MADDS_FM<5, 1>; + def MADD_D32 : MADDS_FT<"madd.d", AFGR64RegsOpnd, IIFmulDouble, fadd>, + MADDS_FM<4, 1>; + def MSUB_D32 : MADDS_FT<"msub.d", AFGR64RegsOpnd, IIFmulDouble, fsub>, + MADDS_FM<5, 1>; } let Predicates = [HasMips32r2, NotFP64bit, NoNaNsFPMath, HasStdEnc] in { - def NMADD_D32 : NMADDS_FT<"nmadd.d", AFGR64, IIFmulDouble, fadd>, + def NMADD_D32 : NMADDS_FT<"nmadd.d", AFGR64RegsOpnd, IIFmulDouble, fadd>, MADDS_FM<6, 1>; - def NMSUB_D32 : NMADDS_FT<"nmsub.d", AFGR64, IIFmulDouble, fsub>, + def NMSUB_D32 : NMADDS_FT<"nmsub.d", AFGR64RegsOpnd, IIFmulDouble, fsub>, MADDS_FM<7, 1>; } let Predicates = [HasMips32r2, IsFP64bit, HasStdEnc], isCodeGenOnly=1 in { - def MADD_D64 : MADDS_FT<"madd.d", FGR64, IIFmulDouble, fadd>, MADDS_FM<4, 1>; - def MSUB_D64 : MADDS_FT<"msub.d", FGR64, IIFmulDouble, fsub>, MADDS_FM<5, 1>; + def MADD_D64 : MADDS_FT<"madd.d", FGR64RegsOpnd, IIFmulDouble, fadd>, + MADDS_FM<4, 1>; + def MSUB_D64 : MADDS_FT<"msub.d", FGR64RegsOpnd, IIFmulDouble, fsub>, + MADDS_FM<5, 1>; } let Predicates = [HasMips32r2, IsFP64bit, NoNaNsFPMath, HasStdEnc], isCodeGenOnly=1 in { - def NMADD_D64 : NMADDS_FT<"nmadd.d", FGR64, IIFmulDouble, fadd>, + def NMADD_D64 : NMADDS_FT<"nmadd.d", FGR64RegsOpnd, IIFmulDouble, fadd>, MADDS_FM<6, 1>; - def NMSUB_D64 : NMADDS_FT<"nmsub.d", FGR64, IIFmulDouble, fsub>, + def NMSUB_D64 : NMADDS_FT<"nmsub.d", FGR64RegsOpnd, IIFmulDouble, fsub>, MADDS_FM<7, 1>; } @@ -484,17 +554,19 @@ def MOVCCRToCCR : PseudoSE<(outs CCR:$dst), (ins CCROpnd:$src), []>; // This pseudo instr gets expanded into 2 mtc1 instrs after register // allocation. def BuildPairF64 : - PseudoSE<(outs AFGR64:$dst), - (ins CPURegs:$lo, CPURegs:$hi), - [(set AFGR64:$dst, (MipsBuildPairF64 CPURegs:$lo, CPURegs:$hi))]>; + PseudoSE<(outs AFGR64RegsOpnd:$dst), + (ins CPURegsOpnd:$lo, CPURegsOpnd:$hi), + [(set AFGR64RegsOpnd:$dst, + (MipsBuildPairF64 CPURegsOpnd:$lo, CPURegsOpnd:$hi))]>; // This pseudo instr gets expanded into 2 mfc1 instrs after register // allocation. // if n is 0, lower part of src is extracted. // if n is 1, higher part of src is extracted. def ExtractElementF64 : - PseudoSE<(outs CPURegs:$dst), (ins AFGR64:$src, i32imm:$n), - [(set CPURegs:$dst, (MipsExtractElementF64 AFGR64:$src, imm:$n))]>; + PseudoSE<(outs CPURegsOpnd:$dst), (ins AFGR64RegsOpnd:$src, i32imm:$n), + [(set CPURegsOpnd:$dst, + (MipsExtractElementF64 AFGR64RegsOpnd:$src, imm:$n))]>; //===----------------------------------------------------------------------===// // Floating Point Patterns @@ -502,34 +574,44 @@ def ExtractElementF64 : def : MipsPat<(f32 fpimm0), (MTC1 ZERO)>; def : MipsPat<(f32 fpimm0neg), (FNEG_S (MTC1 ZERO))>; -def : MipsPat<(f32 (sint_to_fp CPURegs:$src)), (PseudoCVT_S_W CPURegs:$src)>; -def : MipsPat<(MipsTruncIntFP FGR32:$src), (TRUNC_W_S FGR32:$src)>; +def : MipsPat<(f32 (sint_to_fp CPURegsOpnd:$src)), + (PseudoCVT_S_W CPURegsOpnd:$src)>; +def : MipsPat<(MipsTruncIntFP FGR32RegsOpnd:$src), + (TRUNC_W_S FGR32RegsOpnd:$src)>; let Predicates = [NotFP64bit, HasStdEnc] in { - def : MipsPat<(f64 (sint_to_fp CPURegs:$src)), - (PseudoCVT_D32_W CPURegs:$src)>; - def : MipsPat<(MipsTruncIntFP AFGR64:$src), (TRUNC_W_D32 AFGR64:$src)>; - def : MipsPat<(f32 (fround AFGR64:$src)), (CVT_S_D32 AFGR64:$src)>; - def : MipsPat<(f64 (fextend FGR32:$src)), (CVT_D32_S FGR32:$src)>; + def : MipsPat<(f64 (sint_to_fp CPURegsOpnd:$src)), + (PseudoCVT_D32_W CPURegsOpnd:$src)>; + def : MipsPat<(MipsTruncIntFP AFGR64RegsOpnd:$src), + (TRUNC_W_D32 AFGR64RegsOpnd:$src)>; + def : MipsPat<(f32 (fround AFGR64RegsOpnd:$src)), + (CVT_S_D32 AFGR64RegsOpnd:$src)>; + def : MipsPat<(f64 (fextend FGR32RegsOpnd:$src)), + (CVT_D32_S FGR32RegsOpnd:$src)>; } let Predicates = [IsFP64bit, HasStdEnc] in { def : MipsPat<(f64 fpimm0), (DMTC1 ZERO_64)>; def : MipsPat<(f64 fpimm0neg), (FNEG_D64 (DMTC1 ZERO_64))>; - def : MipsPat<(f64 (sint_to_fp CPURegs:$src)), - (PseudoCVT_D64_W CPURegs:$src)>; - def : MipsPat<(f32 (sint_to_fp CPU64Regs:$src)), - (EXTRACT_SUBREG (PseudoCVT_S_L CPU64Regs:$src), sub_32)>; - def : MipsPat<(f64 (sint_to_fp CPU64Regs:$src)), - (PseudoCVT_D64_L CPU64Regs:$src)>; - - def : MipsPat<(MipsTruncIntFP FGR64:$src), (TRUNC_W_D64 FGR64:$src)>; - def : MipsPat<(MipsTruncIntFP FGR32:$src), (TRUNC_L_S FGR32:$src)>; - def : MipsPat<(MipsTruncIntFP FGR64:$src), (TRUNC_L_D64 FGR64:$src)>; - - def : MipsPat<(f32 (fround FGR64:$src)), (CVT_S_D64 FGR64:$src)>; - def : MipsPat<(f64 (fextend FGR32:$src)), (CVT_D64_S FGR32:$src)>; + def : MipsPat<(f64 (sint_to_fp CPURegsOpnd:$src)), + (PseudoCVT_D64_W CPURegsOpnd:$src)>; + def : MipsPat<(f32 (sint_to_fp CPU64RegsOpnd:$src)), + (EXTRACT_SUBREG (PseudoCVT_S_L CPU64RegsOpnd:$src), sub_32)>; + def : MipsPat<(f64 (sint_to_fp CPU64RegsOpnd:$src)), + (PseudoCVT_D64_L CPU64RegsOpnd:$src)>; + + def : MipsPat<(MipsTruncIntFP FGR64RegsOpnd:$src), + (TRUNC_W_D64 FGR64RegsOpnd:$src)>; + def : MipsPat<(MipsTruncIntFP FGR32RegsOpnd:$src), + (TRUNC_L_S FGR32RegsOpnd:$src)>; + def : MipsPat<(MipsTruncIntFP FGR64RegsOpnd:$src), + (TRUNC_L_D64 FGR64RegsOpnd:$src)>; + + def : MipsPat<(f32 (fround FGR64RegsOpnd:$src)), + (CVT_S_D64 FGR64RegsOpnd:$src)>; + def : MipsPat<(f64 (fextend FGR32RegsOpnd:$src)), + (CVT_D64_S FGR32RegsOpnd:$src)>; } // Patterns for loads/stores with a reg+imm operand. diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index 6073476..c2a5879 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -97,6 +97,7 @@ class InstSE pattern, let Predicates = [HasStdEnc]; string BaseOpcode = opstr; string Arch; + let MnemonicContainsDot = 1; } // Mips Pseudo Instructions Format @@ -679,6 +680,10 @@ class CEQS_FM fmt> { let Inst{3-0} = cond; } +class C_COND_FM fmt, bits<4> c> : CEQS_FM { + let cond = c; +} + class CMov_I_F_FM funct, bits<5> fmt> { bits<5> fd; bits<5> fs; -- cgit v1.1 From 376452165863fad987c890d9773e6eb87742a3e1 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 16 Jul 2013 11:02:24 +0000 Subject: [SystemZ] Use RISBG for (shift (and ...)) Another patch in the series to make more use of R.SBG. This one extends r186072 and r186073 to handle cases where the AND is inside the shift. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186399 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 265 ++++++++++++++++++----------- 1 file changed, 168 insertions(+), 97 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 39589f6..149001e 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -92,6 +92,28 @@ struct SystemZAddressingMode { } }; +// Return a mask with Count low bits set. +static uint64_t allOnes(unsigned int Count) { + return Count == 0 ? 0 : (uint64_t(1) << (Count - 1) << 1) - 1; +} + +// Represents operands 2 to 5 of a ROTATE AND ... SELECTED BITS operation. +// The operands are: Input (R2), Start (I3), End (I4) and Rotate (I5). +// The operand value is effectively (and (rotl Input Rotate) Mask) and +// has BitSize bits. +struct RISBGOperands { + RISBGOperands(SDValue N) + : BitSize(N.getValueType().getSizeInBits()), Mask(allOnes(BitSize)), + Input(N), Start(64 - BitSize), End(63), Rotate(0) {} + + unsigned BitSize; + uint64_t Mask; + SDValue Input; + unsigned Start; + unsigned End; + unsigned Rotate; +}; + class SystemZDAGToDAGISel : public SelectionDAGISel { const SystemZTargetLowering &Lowering; const SystemZSubtarget &Subtarget; @@ -200,15 +222,19 @@ class SystemZDAGToDAGISel : public SelectionDAGISel { Addr, Base, Disp, Index); } + // Try to fold some of Ops.Input into other fields of Ops. Return true + // on success. + bool expandRISBG(RISBGOperands &Ops); + // Return an undefined i64 value. SDValue getUNDEF64(SDLoc DL); // Convert N to VT, if it isn't already. SDValue convertTo(SDLoc DL, EVT VT, SDValue N); - // Try to use RISBG to implement ISD::AND node N. Return the selected - // node on success, otherwise return null. - SDNode *tryRISBGForAND(SDNode *N); + // Try to implement AND or shift node N using RISBG with the zero flag set. + // Return the selected node on success, otherwise return null. + SDNode *tryRISBGZero(SDNode *N); // If Op0 is null, then Node is a constant that can be loaded using: // @@ -546,19 +572,15 @@ static bool isStringOfOnes(uint64_t Mask, unsigned &LSB, unsigned &Length) { return false; } -// Return a mask with Count low bits set. -static uint64_t allOnes(unsigned int Count) { - return Count == 0 ? 0 : (uint64_t(1) << (Count - 1) << 1) - 1; -} +// Try to update RISBG so that only the bits of Ops.Input in Mask are used. +// Return true on success. +static bool refineRISBGMask(RISBGOperands &RISBG, uint64_t Mask) { + if (RISBG.Rotate != 0) + Mask = (Mask << RISBG.Rotate) | (Mask >> (64 - RISBG.Rotate)); + Mask &= RISBG.Mask; -// Return true if RISBG can be used to extract the bits in Mask from -// a value that has BitSize bits. Store the start and end operands -// (I3 and I4) in Start and End if so. -static bool isRISBGMask(uint64_t Mask, unsigned BitSize, unsigned &Start, - unsigned &End) { - // Reject trivial all-zero and all-one masks. - uint64_t Used = allOnes(BitSize); - if (Mask == 0 || Mask == Used) + // Reject trivial all-zero masks. + if (Mask == 0) return false; // Handle the 1+0+ or 0+1+0* cases. Start then specifies the index of @@ -566,25 +588,127 @@ static bool isRISBGMask(uint64_t Mask, unsigned BitSize, unsigned &Start, unsigned LSB, Length; if (isStringOfOnes(Mask, LSB, Length)) { - Start = 63 - (LSB + Length - 1); - End = 63 - LSB; + RISBG.Mask = Mask; + RISBG.Start = 63 - (LSB + Length - 1); + RISBG.End = 63 - LSB; return true; } // Handle the wrap-around 1+0+1+ cases. Start then specifies the msb // of the low 1s and End specifies the lsb of the high 1s. - if (isStringOfOnes(Mask ^ Used, LSB, Length)) + if (isStringOfOnes(Mask ^ allOnes(RISBG.BitSize), LSB, Length)) { assert(LSB > 0 && "Bottom bit must be set"); - assert(LSB + Length < BitSize && "Top bit must be set"); - Start = 63 - (LSB - 1); - End = 63 - (LSB + Length); + assert(LSB + Length < RISBG.BitSize && "Top bit must be set"); + RISBG.Mask = Mask; + RISBG.Start = 63 - (LSB - 1); + RISBG.End = 63 - (LSB + Length); return true; } return false; } +bool SystemZDAGToDAGISel::expandRISBG(RISBGOperands &RISBG) { + SDValue N = RISBG.Input; + switch (N.getOpcode()) { + case ISD::AND: { + ConstantSDNode *MaskNode = + dyn_cast(N.getOperand(1).getNode()); + if (!MaskNode) + return false; + + SDValue Input = N.getOperand(0); + uint64_t Mask = MaskNode->getZExtValue(); + if (!refineRISBGMask(RISBG, Mask)) { + // If some bits of Input are already known zeros, those bits will have + // been removed from the mask. See if adding them back in makes the + // mask suitable. + APInt KnownZero, KnownOne; + CurDAG->ComputeMaskedBits(Input, KnownZero, KnownOne); + Mask |= KnownZero.getZExtValue(); + if (!refineRISBGMask(RISBG, Mask)) + return false; + } + RISBG.Input = Input; + return true; + } + + case ISD::ROTL: { + // Any 64-bit rotate left can be merged into the RISBG. + if (RISBG.BitSize != 64) + return false; + ConstantSDNode *CountNode + = dyn_cast(N.getOperand(1).getNode()); + if (!CountNode) + return false; + + RISBG.Rotate = (RISBG.Rotate + CountNode->getZExtValue()) & 63; + RISBG.Input = N.getOperand(0); + return true; + } + + case ISD::SHL: { + // Treat (shl X, count) as (and (rotl X, count), ~0<(N.getOperand(1).getNode()); + if (!CountNode) + return false; + + uint64_t Count = CountNode->getZExtValue(); + if (Count < 1 || + Count >= RISBG.BitSize || + !refineRISBGMask(RISBG, allOnes(RISBG.BitSize - Count) << Count)) + return false; + + RISBG.Rotate = (RISBG.Rotate + Count) & 63; + RISBG.Input = N.getOperand(0); + return true; + } + + case ISD::SRL: { + // Treat (srl X, count), mask) as (and (rotl X, size-count), ~0>>count), + // which is similar to SLL above. + ConstantSDNode *CountNode = + dyn_cast(N.getOperand(1).getNode()); + if (!CountNode) + return false; + + uint64_t Count = CountNode->getZExtValue(); + if (Count < 1 || + Count >= RISBG.BitSize || + !refineRISBGMask(RISBG, allOnes(RISBG.BitSize - Count))) + return false; + + RISBG.Rotate = (RISBG.Rotate - Count) & 63; + RISBG.Input = N.getOperand(0); + return true; + } + + case ISD::SRA: { + // Treat (sra X, count) as (rotl X, size-count) as long as the top + // count bits from Ops.Input are ignored. + ConstantSDNode *CountNode = + dyn_cast(N.getOperand(1).getNode()); + if (!CountNode) + return false; + + uint64_t Count = CountNode->getZExtValue(); + if (RISBG.Rotate != 0 || + Count < 1 || + Count >= RISBG.BitSize || + RISBG.Start < 64 - (RISBG.BitSize - Count)) + return false; + + RISBG.Rotate = -Count & 63; + RISBG.Input = N.getOperand(0); + return true; + } + default: + return false; + } +} + SDValue SystemZDAGToDAGISel::getUNDEF64(SDLoc DL) { SDNode *N = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64); return SDValue(N, 0); @@ -607,87 +731,31 @@ SDValue SystemZDAGToDAGISel::convertTo(SDLoc DL, EVT VT, SDValue N) { return N; } -SDNode *SystemZDAGToDAGISel::tryRISBGForAND(SDNode *N) { - EVT VT = N->getValueType(0); - unsigned BitSize = VT.getSizeInBits(); - unsigned Start, End; - ConstantSDNode *MaskNode = - dyn_cast(N->getOperand(1).getNode()); - if (!MaskNode) +SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { + RISBGOperands RISBG(SDValue(N, 0)); + unsigned Count = 0; + while (expandRISBG(RISBG)) + Count += 1; + // Prefer to use normal shift instructions over RISBG, since they can handle + // all cases and are sometimes shorter. Prefer to use RISBG for ANDs though, + // since it is effectively a three-operand instruction in this case, + // and since it can handle some masks that AND IMMEDIATE can't. + if (Count < (N->getOpcode() == ISD::AND ? 1 : 2)) return 0; - SDValue Input = N->getOperand(0); - uint64_t Mask = MaskNode->getZExtValue(); - if (!isRISBGMask(Mask, BitSize, Start, End)) { - APInt KnownZero, KnownOne; - CurDAG->ComputeMaskedBits(Input, KnownZero, KnownOne); - Mask |= KnownZero.getZExtValue(); - if (!isRISBGMask(Mask, BitSize, Start, End)) - return 0; - } - - unsigned Rotate = 0; - if (Input->getOpcode() == ISD::ROTL && BitSize == 64) { - // Any 64-bit rotate left can be merged into the RISBG. - if (ConstantSDNode *CountNode = - dyn_cast(Input.getOperand(1).getNode())) { - Rotate = CountNode->getZExtValue() & (BitSize - 1); - Input = Input->getOperand(0); - } - } else if (Input->getOpcode() == ISD::SHL) { - // Try to convert (and (shl X, count), mask) into - // (and (rotl X, count), mask&(~0<(Input.getOperand(1).getNode())) { - uint64_t Count = CountNode->getZExtValue(); - if (Count > 0 && - Count < BitSize && - isRISBGMask(Mask & (allOnes(BitSize - Count) << Count), - BitSize, Start, End)) { - Rotate = Count; - Input = Input->getOperand(0); - } - } - } else if (Input->getOpcode() == ISD::SRL) { - // Try to convert (and (srl X, count), mask) into - // (and (rotl X, size-count), mask&(~0>>count)), which is similar - // to SLL above. - if (ConstantSDNode *CountNode = - dyn_cast(Input.getOperand(1).getNode())) { - uint64_t Count = CountNode->getZExtValue(); - if (Count > 0 && - Count < BitSize && - isRISBGMask(Mask & allOnes(BitSize - Count), BitSize, Start, End)) { - Rotate = 64 - Count; - Input = Input->getOperand(0); - } - } - } else if (Start <= End && Input->getOpcode() == ISD::SRA) { - // Try to convert (and (sra X, count), mask) into - // (and (rotl X, size-count), mask). The mask must not include - // any sign bits. - if (ConstantSDNode *CountNode = - dyn_cast(Input.getOperand(1).getNode())) { - uint64_t Count = CountNode->getZExtValue(); - if (Count > 0 && Count < BitSize && Start >= 64 - (BitSize - Count)) { - Rotate = 64 - Count; - Input = Input->getOperand(0); - } - } - } - - // Prefer register extensions like LLC over RSIBG. - if (Rotate == 0 && (Start == 32 || Start == 48 || Start == 56) && End == 63) + // Prefer register extensions like LLC over RISBG. + if (RISBG.Rotate == 0 && + (RISBG.Start == 32 || RISBG.Start == 48 || RISBG.Start == 56) && + RISBG.End == 63) return 0; + EVT VT = N->getValueType(0); SDValue Ops[5] = { getUNDEF64(SDLoc(N)), - convertTo(SDLoc(N), MVT::i64, Input), - CurDAG->getTargetConstant(Start, MVT::i32), - CurDAG->getTargetConstant(End | 128, MVT::i32), - CurDAG->getTargetConstant(Rotate, MVT::i32) + convertTo(SDLoc(N), MVT::i64, RISBG.Input), + CurDAG->getTargetConstant(RISBG.Start, MVT::i32), + CurDAG->getTargetConstant(RISBG.End | 128, MVT::i32), + CurDAG->getTargetConstant(RISBG.Rotate, MVT::i32) }; N = CurDAG->getMachineNode(SystemZ::RISBG, SDLoc(N), MVT::i64, Ops); return convertTo(SDLoc(N), VT, SDValue(N, 0)).getNode(); @@ -778,7 +846,10 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { break; case ISD::AND: - ResNode = tryRISBGForAND(Node); + case ISD::ROTL: + case ISD::SHL: + case ISD::SRL: + ResNode = tryRISBGZero(Node); break; case ISD::Constant: -- cgit v1.1 From c25d21e05b76e9c542e3bea6a9a12a77772beb14 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 16 Jul 2013 11:28:08 +0000 Subject: [SystemZ] Add MC support for R[NOX]SBG CodeGen support will come later. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186401 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZInstrInfo.td | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index c47e04b..c6839e8 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -827,6 +827,14 @@ let Defs = [CC] in { def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>; } +// Rotate second operand left and perform a logical operation with selected +// bits of the first operand. +let Defs = [CC] in { + def RNSBG : RotateSelectRIEf<"rnsbg", 0xEC54, GR64, GR64>; + def ROSBG : RotateSelectRIEf<"rosbg", 0xEC56, GR64, GR64>; + def RXSBG : RotateSelectRIEf<"rxsbg", 0xEC57, GR64, GR64>; +} + //===----------------------------------------------------------------------===// // Comparison //===----------------------------------------------------------------------===// -- cgit v1.1 From ab42fc66b1d4d7c57344de3cf266494ad8518787 Mon Sep 17 00:00:00 2001 From: Vladimir Medic Date: Tue, 16 Jul 2013 11:43:20 +0000 Subject: Fixing a buildbot failure:unused function. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186403 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Disassembler/MipsDisassembler.cpp | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index b6b265c..8899aeb 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -183,11 +183,6 @@ static DecodeStatus DecodeSimm16(MCInst &Inst, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeCondCode(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder); - static DecodeStatus DecodeInsSize(MCInst &Inst, unsigned Insn, uint64_t Address, @@ -464,15 +459,6 @@ static DecodeStatus DecodeHWRegsRegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeCondCode(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder) { - int CondCode = Insn & 0xf; - Inst.addOperand(MCOperand::CreateImm(CondCode)); - return MCDisassembler::Success; -} - static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, -- cgit v1.1 From de25544a73acbb1dd99c948ccbea81eedcd34bc9 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 16 Jul 2013 11:55:57 +0000 Subject: [SystemZ] Use ROSBG and non-zero form of RISBG for OR nodes git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186405 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 87 +++++++++++++++++++++++++++++- 1 file changed, 86 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 149001e..f5d5e5a 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -222,6 +222,11 @@ class SystemZDAGToDAGISel : public SelectionDAGISel { Addr, Base, Disp, Index); } + // Check whether (or Op (and X InsertMask)) is effectively an insertion + // of X into bits InsertMask of some Y != Op. Return true if so and + // set Op to that Y. + bool detectOrAndInsertion(SDValue &Op, uint64_t InsertMask); + // Try to fold some of Ops.Input into other fields of Ops. Return true // on success. bool expandRISBG(RISBGOperands &Ops); @@ -236,6 +241,10 @@ class SystemZDAGToDAGISel : public SelectionDAGISel { // Return the selected node on success, otherwise return null. SDNode *tryRISBGZero(SDNode *N); + // Try to use RISBG or ROSBG to implement OR node N. Return the selected + // node on success, otherwise return null. + SDNode *tryRISBGOrROSBG(SDNode *N); + // If Op0 is null, then Node is a constant that can be loaded using: // // (Opcode UpperVal LowerVal) @@ -557,6 +566,38 @@ bool SystemZDAGToDAGISel::selectBDXAddr(SystemZAddressingMode::AddrForm Form, return true; } +bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op, + uint64_t InsertMask) { + // We're only interested in cases where the insertion is into some operand + // of Op, rather than into Op itself. The only useful case is an AND. + if (Op.getOpcode() != ISD::AND) + return false; + + // We need a constant mask. + ConstantSDNode *MaskNode = + dyn_cast(Op.getOperand(1).getNode()); + if (!MaskNode) + return false; + + // It's not an insertion of Op.getOperand(0) if the two masks overlap. + uint64_t AndMask = MaskNode->getZExtValue(); + if (InsertMask & AndMask) + return false; + + // It's only an insertion if all bits are covered or are known to be zero. + // The inner check covers all cases but is more expensive. + uint64_t Used = allOnes(Op.getValueType().getSizeInBits()); + if (Used != (AndMask | InsertMask)) { + APInt KnownZero, KnownOne; + CurDAG->ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne); + if (Used != (AndMask | InsertMask | KnownZero.getZExtValue())) + return false; + } + + Op = Op.getOperand(0); + return true; +} + // Return true if Mask matches the regexp 0*1+0*, given that zero masks // have already been filtered out. Store the first set bit in LSB and // the number of set bits in Length if so. @@ -761,6 +802,47 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { return convertTo(SDLoc(N), VT, SDValue(N, 0)).getNode(); } +SDNode *SystemZDAGToDAGISel::tryRISBGOrROSBG(SDNode *N) { + // Try treating each operand of N as the second operand of RISBG or ROSBG + // and see which goes deepest. + RISBGOperands RISBG[] = { N->getOperand(0), N->getOperand(1) }; + unsigned Count[] = { 0, 0 }; + for (unsigned I = 0; I < 2; ++I) + while (expandRISBG(RISBG[I])) + Count[I] += 1; + + // Do nothing if neither operand is suitable. + if (Count[0] == 0 && Count[1] == 0) + return 0; + + // Pick the deepest second operand. + unsigned I = Count[0] > Count[1] ? 0 : 1; + SDValue Op0 = N->getOperand(I ^ 1); + + // Prefer IC for character insertions from memory. + if ((RISBG[I].Mask & 0xff) == 0) + if (LoadSDNode *Load = dyn_cast(Op0.getNode())) + if (Load->getMemoryVT() == MVT::i8) + return 0; + + // See whether we can avoid an AND in the first operand by converting + // ROSBG to RISBG. + unsigned Opcode = SystemZ::ROSBG; + if (detectOrAndInsertion(Op0, RISBG[I].Mask)) + Opcode = SystemZ::RISBG; + + EVT VT = N->getValueType(0); + SDValue Ops[5] = { + convertTo(SDLoc(N), MVT::i64, Op0), + convertTo(SDLoc(N), MVT::i64, RISBG[I].Input), + CurDAG->getTargetConstant(RISBG[I].Start, MVT::i32), + CurDAG->getTargetConstant(RISBG[I].End, MVT::i32), + CurDAG->getTargetConstant(RISBG[I].Rotate, MVT::i32) + }; + N = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i64, Ops); + return convertTo(SDLoc(N), VT, SDValue(N, 0)).getNode(); +} + SDNode *SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0, uint64_t UpperVal, uint64_t LowerVal) { @@ -833,10 +915,13 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { SDNode *ResNode = 0; switch (Opcode) { case ISD::OR: + if (Node->getOperand(1).getOpcode() != ISD::Constant) + ResNode = tryRISBGOrROSBG(Node); + // Fall through. case ISD::XOR: // If this is a 64-bit operation in which both 32-bit halves are nonzero, // split the operation into two. - if (Node->getValueType(0) == MVT::i64) + if (!ResNode && Node->getValueType(0) == MVT::i64) if (ConstantSDNode *Op1 = dyn_cast(Node->getOperand(1))) { uint64_t Val = Op1->getZExtValue(); if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val)) -- cgit v1.1 From ad9a0d27d36f18dff2b2d37dd13b11ed2d07688b Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Tue, 16 Jul 2013 12:15:36 +0000 Subject: ARM: allow printing of ARM atomic DAG nodes. We'd forgotten to provide string representations for the special ARMISD atomic nodes; this adds them in. No effect on CodeGen, just makes the output of "-view-whatever-dags" slightly more readable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186406 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 83fb175..569bead 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1092,6 +1092,19 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD"; case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD"; case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD"; + + case ARMISD::ATOMADD64_DAG: return "ATOMADD64_DAG"; + case ARMISD::ATOMSUB64_DAG: return "ATOMSUB64_DAG"; + case ARMISD::ATOMOR64_DAG: return "ATOMOR64_DAG"; + case ARMISD::ATOMXOR64_DAG: return "ATOMXOR64_DAG"; + case ARMISD::ATOMAND64_DAG: return "ATOMAND64_DAG"; + case ARMISD::ATOMNAND64_DAG: return "ATOMNAND64_DAG"; + case ARMISD::ATOMSWAP64_DAG: return "ATOMSWAP64_DAG"; + case ARMISD::ATOMCMPXCHG64_DAG: return "ATOMCMPXCHG64_DAG"; + case ARMISD::ATOMMIN64_DAG: return "ATOMMIN64_DAG"; + case ARMISD::ATOMUMIN64_DAG: return "ATOMUMIN64_DAG"; + case ARMISD::ATOMMAX64_DAG: return "ATOMMAX64_DAG"; + case ARMISD::ATOMUMAX64_DAG: return "ATOMUMAX64_DAG"; } } -- cgit v1.1 From 6e2dc6d669dcbd2129bbc939088d30c7a751bd94 Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Tue, 16 Jul 2013 12:48:34 +0000 Subject: [XCore] Fix printing of inline asm operands. Previously an asm operand with no operand modifier would give the error "invalid operand in inline asm". git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186407 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCoreAsmPrinter.cpp | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp index e802c1b..435062b 100644 --- a/lib/Target/XCore/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/XCoreAsmPrinter.cpp @@ -240,18 +240,14 @@ void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum, bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,const char *ExtraCode, raw_ostream &O) { - // Does this asm operand have a single letter operand modifier? - if (ExtraCode && ExtraCode[0]) - if (ExtraCode[1] != 0) return true; // Unknown modifier. - - switch (ExtraCode[0]) { - default: - // See if this is a generic print operand - return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); - } + // Print the operand if there is no operand modifier. + if (!ExtraCode || !ExtraCode[0]) { + printOperand(MI, OpNo, O); + return false; + } - printOperand(MI, OpNo, O); - return false; + // Otherwise fallback on the default implementation. + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); } void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) { -- cgit v1.1 From 17c95a217d359a48a95b35730829e870fe8491eb Mon Sep 17 00:00:00 2001 From: Juergen Ributzka Date: Tue, 16 Jul 2013 17:44:23 +0000 Subject: Test commit to verify write access. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186429 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/Target') diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index 834a998..6bca900 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -55,3 +55,4 @@ add_subdirectory(Disassembler) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) add_subdirectory(AsmParser) + -- cgit v1.1 From b95e0f6f2f43d2c9ae8dd9407f9216d02fa4c833 Mon Sep 17 00:00:00 2001 From: Juergen Ributzka Date: Tue, 16 Jul 2013 18:20:45 +0000 Subject: [X86] Use min/max to optimze unsigend vector comparison on X86 Use PMIN/PMAX for UGE/ULE vector comparions to reduce the number of required instructions. This trick also works for UGT/ULT, but there is no advantage in doing so. It wouldn't reduce the number of instructions and it would actually reduce performance. Reviewer: Ben radar:5972691 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186432 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1a0c937..8f80243 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -9351,8 +9351,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, // GT and EQ comparisons for integer, swapping operands and multiple // operations may be required for some comparisons. unsigned Opc; - bool Swap = false, Invert = false, FlipSigns = false; - + bool Swap = false, Invert = false, FlipSigns = false, MinMax = false; + switch (SetCCOpcode) { default: llvm_unreachable("Unexpected SETCC condition"); case ISD::SETNE: Invert = true; @@ -9366,6 +9366,23 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, case ISD::SETUGE: Swap = true; case ISD::SETULE: Opc = X86ISD::PCMPGT; FlipSigns = true; Invert = true; break; } + + // Special case: Use min/max operations for SETULE/SETUGE + MVT VET = VT.getVectorElementType(); + bool hasMinMax = + (Subtarget->hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32)) + || (Subtarget->hasSSE2() && (VET == MVT::i8)); + + if (hasMinMax) { + switch (SetCCOpcode) { + default: break; + case ISD::SETULE: Opc = X86ISD::UMIN; MinMax = true; break; + case ISD::SETUGE: Opc = X86ISD::UMAX; MinMax = true; break; + } + + if (MinMax) { Swap = false; Invert = false; FlipSigns = false; } + } + if (Swap) std::swap(Op0, Op1); @@ -9452,6 +9469,9 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, // If the logical-not of the result is required, perform that now. if (Invert) Result = DAG.getNOT(dl, Result, VT); + + if (MinMax) + Result = DAG.getNode(X86ISD::PCMPEQ, dl, VT, Op0, Result); return Result; } -- cgit v1.1 From c1b49b56d4132efa2e06deb8f23508d0de4c8800 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Tue, 16 Jul 2013 19:44:17 +0000 Subject: Add a wrapper for open. This centralizes the handling of O_BINARY and opens the way for hiding more differences (like how open behaves with directories). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186447 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/TargetMachineC.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp index 01d12e8..7419122 100644 --- a/lib/Target/TargetMachineC.cpp +++ b/lib/Target/TargetMachineC.cpp @@ -200,7 +200,7 @@ static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M, LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M, char* Filename, LLVMCodeGenFileType codegen, char** ErrorMessage) { std::string error; - raw_fd_ostream dest(Filename, error, raw_fd_ostream::F_Binary); + raw_fd_ostream dest(Filename, error, sys::fs::F_Binary); formatted_raw_ostream destf(dest); if (!error.empty()) { *ErrorMessage = strdup(error.c_str()); -- cgit v1.1 From 898788c6bcc2abfe0e1c7b21c14394352963acd6 Mon Sep 17 00:00:00 2001 From: Tilmann Scheller Date: Tue, 16 Jul 2013 21:52:34 +0000 Subject: ARM: Add support for the Thumb2 PLI alternate literal form. This adds an instruction alias to make the assembler recognize the alternate literal form: pli [PC, #+/-] See A8.8.129 in the ARM ARM (DDI 0406C.b). Fixes . git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186459 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrThumb2.td | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 8d15630..19d76e5 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -4427,3 +4427,6 @@ def : t2InstAlias<"ldrsh${p}.w $Rt, $addr", def : t2InstAlias<"add${p} $Rd, pc, $imm", (t2ADR rGPR:$Rd, imm0_4095:$imm, pred:$p)>; + +// PLI with alternate literal form. +def : t2InstAlias<"pli${p} $addr", (t2PLIpci t2ldr_pcrel_imm12:$addr, pred:$p)>; -- cgit v1.1 From 82d4215f64dc941f21bbae7ec781367d343387b8 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Tue, 16 Jul 2013 22:01:40 +0000 Subject: Related to r181161 - Indirect branches may not be the last branch in a basic block. Blocks that have an indirect branch terminator, even if it's not the last terminator, should still be treated as unanalyzable. Reducing a useful regression test case is proving difficult - I hope to have one soon. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186461 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseInstrInfo.cpp | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index d670178..5d012fc 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -295,6 +295,11 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, if (!isUnpredicatedTerminator(I)) return false; + // Check whether the second-to-last branch is indirect, return + // 'unanalyzeable' here too. + if (I != MBB.begin() && prior(I)->isIndirectBranch()) + return true; + // If there is only one terminator instruction, process it. if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { if (isUncondBranchOpcode(LastOpc)) { @@ -322,6 +327,8 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, LastInst->eraseFromParent(); LastInst = SecondLastInst; LastOpc = LastInst->getOpcode(); + if (I != MBB.begin() && prior(I)->isIndirectBranch()) + return true; // Indirect branches are unanalyzeable. if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { // Return now the only terminator is an unconditional branch. TBB = LastInst->getOperand(0).getMBB(); -- cgit v1.1 From f5eea529834ba1f08a2d95e51a4f49fb9766a927 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 17 Jul 2013 00:31:35 +0000 Subject: Move string pointer from being a static class member to just a static global in the one file its needed in. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186476 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUMachineFunction.cpp | 7 ++----- lib/Target/R600/AMDGPUMachineFunction.h | 2 -- 2 files changed, 2 insertions(+), 7 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUMachineFunction.cpp b/lib/Target/R600/AMDGPUMachineFunction.cpp index 9a1e344..f2342b0 100644 --- a/lib/Target/R600/AMDGPUMachineFunction.cpp +++ b/lib/Target/R600/AMDGPUMachineFunction.cpp @@ -2,10 +2,9 @@ #include "AMDGPU.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" +using namespace llvm; -namespace llvm { - -const char *AMDGPUMachineFunction::ShaderTypeAttribute = "ShaderType"; +static const char *const ShaderTypeAttribute = "ShaderType"; AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) : MachineFunctionInfo() { @@ -21,5 +20,3 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) : llvm_unreachable("Can't parse shader type!"); } } - -} diff --git a/lib/Target/R600/AMDGPUMachineFunction.h b/lib/Target/R600/AMDGPUMachineFunction.h index 5d5df12..789b96a 100644 --- a/lib/Target/R600/AMDGPUMachineFunction.h +++ b/lib/Target/R600/AMDGPUMachineFunction.h @@ -18,8 +18,6 @@ namespace llvm { class AMDGPUMachineFunction : public MachineFunctionInfo { -private: - static const char *ShaderTypeAttribute; public: AMDGPUMachineFunction(const MachineFunction &MF); unsigned ShaderType; -- cgit v1.1 From fe47bf8fa07e12b70ff8b234fa1f6b97c8d2753d Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Wed, 17 Jul 2013 00:45:52 +0000 Subject: PPC: Implement base pointer and stack realignment This builds on some frame-lowering code that has existed since 2005 (r24224) but was disabled in 2008 (r48188) because it needed base pointer support to function correctly. This implementation follows the strategy suggested by Dale Johannesen in r48188 where the following comment was added: This does not currently work, because the delta between old and new stack pointers is added to offsets that reference incoming parameters after the prolog is generated, and the code that does that doesn't handle a variable delta. You don't want to do that anyway; a better approach is to reserve another register that retains to the incoming stack pointer, and reference parameters relative to that. And now we do exactly that. If we don't need a frame pointer, then we use r31 as a base pointer. If we do need a frame pointer, then we use r30 as a base pointer. The base pointer retains the value of the stack pointer before it was decremented in the prologue. We then use the base pointer to resolve all negative frame indicies. The basic scheme follows that for base pointers in the X86 backend. We use a base pointer when we need to dynamically realign the incoming stack pointer. This currently applies only to static objects (dynamic allocas with large alignments, and base-pointer support in SjLj lowering will come in future commits). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186478 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCFrameLowering.cpp | 173 +++++++++++++++++++++++----- lib/Target/PowerPC/PPCFrameLowering.h | 14 +++ lib/Target/PowerPC/PPCMachineFunctionInfo.h | 7 ++ lib/Target/PowerPC/PPCRegisterInfo.cpp | 81 +++++++++++-- lib/Target/PowerPC/PPCRegisterInfo.h | 6 + 5 files changed, 240 insertions(+), 41 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 8e33830..3b57390 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -26,17 +26,6 @@ using namespace llvm; -// FIXME This disables some code that aligns the stack to a boundary bigger than -// the default (16 bytes on Darwin) when there is a stack local of greater -// alignment. This does not currently work, because the delta between old and -// new stack pointers is added to offsets that reference incoming parameters -// after the prolog is generated, and the code that does that doesn't handle a -// variable delta. You don't want to do that anyway; a better approach is to -// reserve another register that retains to the incoming stack pointer, and -// reference parameters relative to that. -#define ALIGN_STACK 0 - - /// VRRegNo - Map from a numbered VR register to its enum value. /// static const uint16_t VRRegNo[] = { @@ -217,9 +206,12 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, // Get the alignments provided by the target, and the maximum alignment // (if any) of the fixed frame objects. - unsigned MaxAlign = MFI->getMaxAlignment(); unsigned TargetAlign = getStackAlignment(); - unsigned AlignMask = TargetAlign - 1; // + unsigned MaxAlign = MFI->getMaxAlignment(); + unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1; + + const PPCRegisterInfo *RegInfo = + static_cast(MF.getTarget().getRegisterInfo()); // If we are a leaf function, and use up to 224 bytes of stack space, // don't have a frame pointer, calls, or dynamic alloca then we do not need @@ -235,7 +227,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, FrameSize <= 224 && // Fits in red zone. !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. - (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment. + !RegInfo->hasBasePointer(MF)) { // No special alignment. // No need for frame if (UpdateMF) MFI->setStackSize(0); @@ -332,6 +324,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { MachineFrameInfo *MFI = MF.getFrameInfo(); const PPCInstrInfo &TII = *static_cast(MF.getTarget().getInstrInfo()); + const PPCRegisterInfo *RegInfo = + static_cast(MF.getTarget().getRegisterInfo()); MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); @@ -358,6 +352,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { // Work out frame sizes. unsigned FrameSize = determineFrameLayout(MF); int NegFrameSize = -FrameSize; + if (!isInt<32>(NegFrameSize)) + llvm_unreachable("Unhandled stack size!"); if (MFI->isFrameAddressTaken()) replaceFPWithRealFP(MF); @@ -372,6 +368,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { const SmallVectorImpl &MustSaveCRs = FI->getMustSaveCRs(); // Do we have a frame pointer for this function? bool HasFP = hasFP(MF); + bool HasBP = RegInfo->hasBasePointer(MF); int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI); @@ -387,6 +384,20 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { } } + int BPOffset = 0; + if (HasBP) { + if (Subtarget.isSVR4ABI()) { + MachineFrameInfo *FFI = MF.getFrameInfo(); + int BPIndex = FI->getBasePointerSaveIndex(); + assert(BPIndex && "No Base Pointer Save Slot!"); + BPOffset = FFI->getObjectOffset(BPIndex); + } else { + BPOffset = + PPCFrameLowering::getBasePointerSaveOffset(isPPC64, isDarwinABI, + HasFP); + } + } + if (isPPC64) { if (MustSaveLR) BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0); @@ -404,6 +415,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { .addImm(FPOffset) .addReg(PPC::X1); + if (HasBP) + BuildMI(MBB, MBBI, dl, TII.get(PPC::STD)) + .addReg(HasFP ? PPC::X30 : PPC::X31) + .addImm(BPOffset) + .addReg(PPC::X1); + if (MustSaveLR) BuildMI(MBB, MBBI, dl, TII.get(PPC::STD)) .addReg(PPC::X0) @@ -427,6 +444,14 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { .addImm(FPOffset) .addReg(PPC::R1); + if (HasBP) + // FIXME: On PPC32 SVR4, FPOffset is negative and access to negative + // offsets of R1 is not allowed. + BuildMI(MBB, MBBI, dl, TII.get(PPC::STW)) + .addReg(HasFP ? PPC::R30 : PPC::R31) + .addImm(BPOffset) + .addReg(PPC::R1); + assert(MustSaveCRs.empty() && "Prologue CR saving supported only in 64-bit mode"); @@ -441,26 +466,44 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { if (!FrameSize) return; // Get stack alignments. - unsigned TargetAlign = getStackAlignment(); unsigned MaxAlign = MFI->getMaxAlignment(); // Adjust stack pointer: r1 += NegFrameSize. // If there is a preferred stack alignment, align R1 now if (!isPPC64) { // PPC32. - if (ALIGN_STACK && MaxAlign > TargetAlign) { + + if (HasBP) { + // Save a copy of r1 as the base pointer. + BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), + HasFP ? PPC::R30 : PPC::R31) + .addReg(PPC::R1) + .addReg(PPC::R1); + } + + if (HasBP && MaxAlign > 1) { assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) && "Invalid alignment!"); - assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!"); BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), PPC::R0) .addReg(PPC::R1) .addImm(0) .addImm(32 - Log2_32(MaxAlign)) .addImm(31); - BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC) ,PPC::R0) - .addReg(PPC::R0, RegState::Kill) - .addImm(NegFrameSize); + if (isInt<16>(NegFrameSize)) { + BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC), PPC::R0) + .addReg(PPC::R0, RegState::Kill) + .addImm(NegFrameSize); + } else { + BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R12) + .addImm(NegFrameSize >> 16); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R12) + .addReg(PPC::R12, RegState::Kill) + .addImm(NegFrameSize & 0xFFFF); + BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFC), PPC::R0) + .addReg(PPC::R0, RegState::Kill) + .addReg(PPC::R12, RegState::Kill); + } BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX), PPC::R1) .addReg(PPC::R1, RegState::Kill) .addReg(PPC::R1) @@ -482,18 +525,36 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { .addReg(PPC::R0); } } else { // PPC64. - if (ALIGN_STACK && MaxAlign > TargetAlign) { + if (HasBP) { + // Save a copy of r1 as the base pointer. + BuildMI(MBB, MBBI, dl, TII.get(PPC::OR8), + HasFP ? PPC::X30 : PPC::X31) + .addReg(PPC::X1) + .addReg(PPC::X1); + } + + if (HasBP && MaxAlign > 1) { assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) && "Invalid alignment!"); - assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!"); BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), PPC::X0) .addReg(PPC::X1) .addImm(0) .addImm(64 - Log2_32(MaxAlign)); - BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0) - .addReg(PPC::X0) - .addImm(NegFrameSize); + if (isInt<16>(NegFrameSize)) { + BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0) + .addReg(PPC::X0, RegState::Kill) + .addImm(NegFrameSize); + } else { + BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X12) + .addImm(NegFrameSize >> 16); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X12) + .addReg(PPC::X12, RegState::Kill) + .addImm(NegFrameSize & 0xFFFF); + BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFC8), PPC::X0) + .addReg(PPC::X0, RegState::Kill) + .addReg(PPC::X12, RegState::Kill); + } BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX), PPC::X1) .addReg(PPC::X1, RegState::Kill) .addReg(PPC::X1) @@ -535,6 +596,14 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { MCCFIInstruction::createOffset(FrameLabel, Reg, FPOffset)); } + if (HasBP) { + unsigned Reg = isPPC64 ? (HasFP ? PPC::X30 : PPC::X31) : + (HasFP ? PPC::R30 : PPC::R31); + Reg = MRI->getDwarfRegNum(Reg, true); + MMI.addFrameInst( + MCCFIInstruction::createOffset(FrameLabel, Reg, BPOffset)); + } + if (MustSaveLR) { unsigned Reg = isPPC64 ? PPC::LR8 : PPC::LR; Reg = MRI->getDwarfRegNum(Reg, true); @@ -614,6 +683,8 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, assert(MBBI != MBB.end() && "Returning block has no terminator"); const PPCInstrInfo &TII = *static_cast(MF.getTarget().getInstrInfo()); + const PPCRegisterInfo *RegInfo = + static_cast(MF.getTarget().getRegisterInfo()); unsigned RetOpcode = MBBI->getOpcode(); DebugLoc dl; @@ -629,8 +700,6 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, // Get alignment info so we know how to restore r1 const MachineFrameInfo *MFI = MF.getFrameInfo(); - unsigned TargetAlign = getStackAlignment(); - unsigned MaxAlign = MFI->getMaxAlignment(); // Get the number of bytes allocated from the FrameInfo. int FrameSize = MFI->getStackSize(); @@ -645,6 +714,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, const SmallVectorImpl &MustSaveCRs = FI->getMustSaveCRs(); // Do we have a frame pointer for this function? bool HasFP = hasFP(MF); + bool HasBP = RegInfo->hasBasePointer(MF); int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI); @@ -660,6 +730,20 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, } } + int BPOffset = 0; + if (HasBP) { + if (Subtarget.isSVR4ABI()) { + MachineFrameInfo *FFI = MF.getFrameInfo(); + int BPIndex = FI->getBasePointerSaveIndex(); + assert(BPIndex && "No Base Pointer Save Slot!"); + BPOffset = FFI->getObjectOffset(BPIndex); + } else { + BPOffset = + PPCFrameLowering::getBasePointerSaveOffset(isPPC64, isDarwinABI, + HasFP); + } + } + bool UsesTCRet = RetOpcode == PPC::TCRETURNri || RetOpcode == PPC::TCRETURNdi || RetOpcode == PPC::TCRETURNai || @@ -704,7 +788,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, .addReg(PPC::R31) .addReg(PPC::R0); } else if (isInt<16>(FrameSize) && - (!ALIGN_STACK || TargetAlign >= MaxAlign) && + !HasBP && !MFI->hasVarSizedObjects()) { BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1) .addReg(PPC::R1).addImm(FrameSize); @@ -727,7 +811,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, .addReg(PPC::X1) .addReg(PPC::X31) .addReg(PPC::X0); - } else if (isInt<16>(FrameSize) && TargetAlign >= MaxAlign && + } else if (isInt<16>(FrameSize) && !HasBP && !MFI->hasVarSizedObjects()) { BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1) .addReg(PPC::X1).addImm(FrameSize); @@ -751,6 +835,10 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X31) .addImm(FPOffset).addReg(PPC::X1); + if (HasBP) + BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), HasFP ? PPC::X30 : PPC::X31) + .addImm(BPOffset).addReg(PPC::X1); + if (!MustSaveCRs.empty()) for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) @@ -770,6 +858,10 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R31) .addImm(FPOffset).addReg(PPC::R1); + if (HasBP) + BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), HasFP ? PPC::R30 : PPC::R31) + .addImm(FPOffset).addReg(PPC::R1); + if (MustSaveLR) BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR)).addReg(PPC::R0); } @@ -848,7 +940,8 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { void PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *) const { - const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + const PPCRegisterInfo *RegInfo = + static_cast(MF.getTarget().getRegisterInfo()); // Save and clear the LR state. PPCFunctionInfo *FI = MF.getInfo(); @@ -873,6 +966,15 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, FI->setFramePointerSaveIndex(FPSI); } + int BPSI = FI->getBasePointerSaveIndex(); + if (!BPSI && RegInfo->hasBasePointer(MF)) { + int BPOffset = getBasePointerSaveOffset(isPPC64, isDarwinABI, needsFP(MF)); + // Allocate the frame index for the base pointer save area. + BPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, BPOffset, true); + // Save the result. + FI->setBasePointerSaveIndex(BPSI); + } + // Reserve stack space to move the linkage area to in case of a tail call. int TCSPDelta = 0; if (MF.getTarget().Options.GuaranteedTailCallOpt && @@ -1004,6 +1106,17 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); } + const PPCRegisterInfo *RegInfo = + static_cast(MF.getTarget().getRegisterInfo()); + if (RegInfo->hasBasePointer(MF)) { + HasGPSaveArea = true; + + int FI = PFI->getBasePointerSaveIndex(); + assert(FI && "No Base Pointer Save Slot!"); + + FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); + } + // General register save area starts right below the Floating-point // register save area. if (HasGPSaveArea || HasG8SaveArea) { diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h index 6f5f936..9acf129 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.h +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -94,6 +94,20 @@ public: return isPPC64 ? -8U : -4U; } + /// getBasePointerSaveOffset - Return the previous frame offset to save the + /// base pointer. + static unsigned getBasePointerSaveOffset(bool isPPC64, bool isDarwinABI, + bool hasFP) { + if (!hasFP) + return getFramePointerSaveOffset(isPPC64, isDarwinABI); + + if (isDarwinABI) + return isPPC64 ? -16U : -8U; + + // SVR4 ABI: First slot in the general register save area. + return isPPC64 ? -16U : -8U; + } + /// getLinkageSize - Return the size of the PowerPC ABI linkage area. /// static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI) { diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h index 3b2ac3b..33f843d 100644 --- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -32,6 +32,9 @@ class PPCFunctionInfo : public MachineFunctionInfo { /// int ReturnAddrSaveIndex; + /// Frame index where the old base pointer is stored. + int BasePointerSaveIndex; + /// MustSaveLR - Indicates whether LR is defined (or clobbered) in the current /// function. This is only valid after the initial scan of the function by /// PEI. @@ -93,6 +96,7 @@ public: explicit PPCFunctionInfo(MachineFunction &MF) : FramePointerSaveIndex(0), ReturnAddrSaveIndex(0), + BasePointerSaveIndex(0), HasSpills(false), HasNonRISpills(false), SpillsCR(false), @@ -113,6 +117,9 @@ public: int getReturnAddrSaveIndex() const { return ReturnAddrSaveIndex; } void setReturnAddrSaveIndex(int idx) { ReturnAddrSaveIndex = idx; } + int getBasePointerSaveIndex() const { return BasePointerSaveIndex; } + void setBasePointerSaveIndex(int Idx) { BasePointerSaveIndex = Idx; } + unsigned getMinReservedArea() const { return MinReservedArea; } void setMinReservedArea(unsigned size) { MinReservedArea = size; } diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 8a0954c..49de8da 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -48,6 +48,14 @@ using namespace llvm; +static cl::opt +EnableBasePointer("ppc-use-base-pointer", cl::Hidden, cl::init(true), + cl::desc("Enable use of a base pointer for complex stack frames")); + +static cl::opt +AlwaysBasePointer("ppc-always-use-base-pointer", cl::Hidden, cl::init(false), + cl::desc("Force the use of a base pointer in every function")); + PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST) : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR, ST.isPPC64() ? 0 : 1, @@ -170,18 +178,28 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::X1); Reserved.set(PPC::X13); - if (PPCFI->needsFP(MF)) + if (PPCFI->needsFP(MF) || hasBasePointer(MF)) { Reserved.set(PPC::X31); + // If we need a base pointer, and we also have a frame pointer, then use + // r30 as the base pointer. + if (PPCFI->needsFP(MF) && hasBasePointer(MF)) + Reserved.set(PPC::X30); + } + // The 64-bit SVR4 ABI reserves r2 for the TOC pointer. if (Subtarget.isSVR4ABI()) { Reserved.set(PPC::X2); } } - if (PPCFI->needsFP(MF)) + if (PPCFI->needsFP(MF) || hasBasePointer(MF)) { Reserved.set(PPC::R31); + if (PPCFI->needsFP(MF) && hasBasePointer(MF)) + Reserved.set(PPC::R30); + } + // Reserve Altivec registers when Altivec is unavailable. if (!Subtarget.hasAltivec()) for (TargetRegisterClass::iterator I = PPC::VRRCRegClass.begin(), @@ -524,7 +542,6 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); // Get the frame info. MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); DebugLoc dl = MI.getDebugLoc(); unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum); @@ -562,12 +579,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP). - - bool is64Bit = Subtarget.isPPC64(); - MI.getOperand(FIOperandNum).ChangeToRegister(TFI->hasFP(MF) ? - (is64Bit ? PPC::X31 : PPC::R31) : - (is64Bit ? PPC::X1 : PPC::R1), - false); + MI.getOperand(FIOperandNum).ChangeToRegister( + FrameIndex < 0 ? getBaseRegister(MF) : getFrameRegister(MF), false); // Figure out if the offset in the instruction is shifted right two bits. bool isIXAddr = usesIXAddr(MI); @@ -586,8 +599,10 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Naked functions have stack size 0, although getStackSize may not reflect that // because we didn't call all the pieces that compute it for naked functions. if (!MF.getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked)) - Offset += MFI->getStackSize(); + hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked)) { + if (!(hasBasePointer(MF) && FrameIndex < 0)) + Offset += MFI->getStackSize(); + } // If we can, encode the offset directly into the instruction. If this is a // normal PPC "ri" instruction, any 16-bit value can be safely encoded. If @@ -605,6 +620,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // The offset doesn't fit into a single register, scavenge one to build the // offset in. + bool is64Bit = Subtarget.isPPC64(); const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; const TargetRegisterClass *RC = is64Bit ? G8RC : GPRC; @@ -658,6 +674,49 @@ unsigned PPCRegisterInfo::getEHHandlerRegister() const { return !Subtarget.isPPC64() ? PPC::R4 : PPC::X4; } +unsigned PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + if (!hasBasePointer(MF)) + return getFrameRegister(MF); + + if (!Subtarget.isPPC64()) + return TFI->hasFP(MF) ? PPC::R30 : PPC::R31; + else + return TFI->hasFP(MF) ? PPC::X30 : PPC::X31; +} + +bool PPCRegisterInfo::hasBasePointer(const MachineFunction &MF) const { + if (!EnableBasePointer) + return false; + if (AlwaysBasePointer) + return true; + + // If we need to realign the stack, then the stack pointer can no longer + // serve as an offset into the caller's stack space. As a result, we need a + // base pointer. + return needsStackRealignment(MF); +} + +bool PPCRegisterInfo::canRealignStack(const MachineFunction &MF) const { + if (!MF.getTarget().Options.RealignStack) + return false; + + return true; +} + +bool PPCRegisterInfo::needsStackRealignment(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const Function *F = MF.getFunction(); + unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment(); + bool requiresRealignment = + ((MFI->getMaxAlignment() > StackAlign) || + F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackAlignment)); + + return requiresRealignment && canRealignStack(MF); +} + /// Returns true if the instruction's frame index /// reference would be better served by a base register other than FP /// or SP. Used by LocalStackFrameAllocation to determine which frame index diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 93626a9..d02af9e 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -92,6 +92,12 @@ public: // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; + // Base pointer (stack realignment) support. + unsigned getBaseRegister(const MachineFunction &MF) const; + bool hasBasePointer(const MachineFunction &MF) const; + bool canRealignStack(const MachineFunction &MF) const; + bool needsStackRealignment(const MachineFunction &MF) const; + // Exception handling queries. unsigned getEHExceptionRegister() const; unsigned getEHHandlerRegister() const; -- cgit v1.1 From 8d7435e9b1319c6e748a06c0b41a4c3de82ec750 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Wed, 17 Jul 2013 05:35:44 +0000 Subject: PPC: Add CTR-register clobber to builtin setjmp Because the builtin longjmp implementation uses a CTR-based indirect jump, when the control flow arrives at the builtin setjmp call, the CTR register has necessarily been clobbered. Correspondingly, this adds CTR to the list of implicit definitions of the builtin setjmp pseudo instruction. We don't need to add CTR to the implicit definitions of builtin longjmp because, even though it does clobber the CTR register, the control flow cannot return to inside the loop unless there is also a builtin setjmp call. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186488 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCCTRLoops.cpp | 7 +++++++ lib/Target/PowerPC/PPCInstr64Bit.td | 1 + lib/Target/PowerPC/PPCInstrInfo.td | 1 + 3 files changed, 9 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index 30181a0..4e30c537 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -233,6 +233,13 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { #endif case Intrinsic::longjmp: + + // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp + // because, although it does clobber the counter register, the + // control can't then return to inside the loop unless there is also + // an eh_sjlj_setjmp. + case Intrinsic::eh_sjlj_setjmp: + case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset: diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index e7bb259..f04820a 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -280,6 +280,7 @@ def MFCR8 : XFXForm_3<31, 19, (outs g8rc:$rT), (ins), } // neverHasSideEffects = 1 let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { + let Defs = [CTR8] in def EH_SjLj_SetJmp64 : Pseudo<(outs gprc:$dst), (ins memr:$buf), "#EH_SJLJ_SETJMP64", [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>, diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index d4969f6..398a11b 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -1093,6 +1093,7 @@ def TAILBA : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst), } let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { + let Defs = [CTR] in def EH_SjLj_SetJmp32 : Pseudo<(outs gprc:$dst), (ins memr:$buf), "#EH_SJLJ_SETJMP32", [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>, -- cgit v1.1 From d055c595443fefe64b33d28d0b2556ace04084ad Mon Sep 17 00:00:00 2001 From: JF Bastien Date: Wed, 17 Jul 2013 05:46:46 +0000 Subject: Fix ARMFastISel::ARMEmitIntExt shift emission My patch 'r183551 - ARM FastISel integer sext/zext improvements' was incorrect when emitting ARM register-immediate ASR, LSL, LSR instructions: they are pseudo-instructions in ARMInstrInfo.td and I should have used MOVsi instead. This is not an issue when code is generated through a .s file, but is an issue when generated straight to a .o (-filetype=obj). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186489 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 82 ++++++++++++++++++++++++++---------------- 1 file changed, 52 insertions(+), 30 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 6064813..d4aa2f2 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -42,7 +42,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" @@ -2631,34 +2630,46 @@ unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, }; // Table governing the instruction(s) to be emitted. - static const struct { - // First entry for each of the following is sext, second zext. - uint16_t Opc[2]; - uint8_t Imm[2]; // All instructions have either a shift or a mask. - uint8_t hasS[2]; // Some instructions have an S bit, always set it to 0. - } OpcTbl[2][2][3] = { + static const struct InstructionTable { + uint32_t Opc : 16; + uint32_t hasS : 1; // Some instructions have an S bit, always set it to 0. + uint32_t Shift : 7; // For shift operand addressing mode, used by MOVsi. + uint32_t Imm : 8; // All instructions have either a shift or a mask. + } IT[2][2][3][2] = { { // Two instructions (first is left shift, second is in this table). - { // ARM - /* 1 */ { { ARM::ASRi, ARM::LSRi }, { 31, 31 }, { 1, 1 } }, - /* 8 */ { { ARM::ASRi, ARM::LSRi }, { 24, 24 }, { 1, 1 } }, - /* 16 */ { { ARM::ASRi, ARM::LSRi }, { 16, 16 }, { 1, 1 } } + { // ARM Opc S Shift Imm + /* 1 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 31 }, + /* 1 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 31 } }, + /* 8 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 24 }, + /* 8 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 24 } }, + /* 16 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 16 }, + /* 16 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 16 } } }, - { // Thumb - /* 1 */ { { ARM::tASRri, ARM::tLSRri }, { 31, 31 }, { 0, 0 } }, - /* 8 */ { { ARM::tASRri, ARM::tLSRri }, { 24, 24 }, { 0, 0 } }, - /* 16 */ { { ARM::tASRri, ARM::tLSRri }, { 16, 16 }, { 0, 0 } } + { // Thumb Opc S Shift Imm + /* 1 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 31 }, + /* 1 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 31 } }, + /* 8 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 24 }, + /* 8 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 24 } }, + /* 16 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 16 }, + /* 16 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 16 } } } }, { // Single instruction. - { // ARM - /* 1 */ { { ARM::KILL, ARM::ANDri }, { 0, 1 }, { 0, 1 } }, - /* 8 */ { { ARM::SXTB, ARM::ANDri }, { 0, 255 }, { 0, 1 } }, - /* 16 */ { { ARM::SXTH, ARM::UXTH }, { 0, 0 }, { 0, 0 } } + { // ARM Opc S Shift Imm + /* 1 bit sext */ { { ARM::KILL , 0, ARM_AM::no_shift, 0 }, + /* 1 bit zext */ { ARM::ANDri , 1, ARM_AM::no_shift, 1 } }, + /* 8 bit sext */ { { ARM::SXTB , 0, ARM_AM::no_shift, 0 }, + /* 8 bit zext */ { ARM::ANDri , 1, ARM_AM::no_shift, 255 } }, + /* 16 bit sext */ { { ARM::SXTH , 0, ARM_AM::no_shift, 0 }, + /* 16 bit zext */ { ARM::UXTH , 0, ARM_AM::no_shift, 0 } } }, - { // Thumb - /* 1 */ { { ARM::KILL, ARM::t2ANDri }, { 0, 1 }, { 0, 1 } }, - /* 8 */ { { ARM::t2SXTB, ARM::t2ANDri }, { 0, 255 }, { 0, 1 } }, - /* 16 */ { { ARM::t2SXTH, ARM::t2UXTH }, { 0, 0 }, { 0, 0 } } + { // Thumb Opc S Shift Imm + /* 1 bit sext */ { { ARM::KILL , 0, ARM_AM::no_shift, 0 }, + /* 1 bit zext */ { ARM::t2ANDri, 1, ARM_AM::no_shift, 1 } }, + /* 8 bit sext */ { { ARM::t2SXTB , 0, ARM_AM::no_shift, 0 }, + /* 8 bit zext */ { ARM::t2ANDri, 1, ARM_AM::no_shift, 255 } }, + /* 16 bit sext */ { { ARM::t2SXTH , 0, ARM_AM::no_shift, 0 }, + /* 16 bit zext */ { ARM::t2UXTH , 0, ARM_AM::no_shift, 0 } } } } }; @@ -2673,20 +2684,28 @@ unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, "other sizes unimplemented"); bool hasV6Ops = Subtarget->hasV6Ops(); - unsigned Bitness = countTrailingZeros(SrcBits) >> 1; // {1,8,16}=>{0,1,2} + unsigned Bitness = SrcBits / 8; // {1,8,16}=>{0,1,2} assert((Bitness < 3) && "sanity-check table bounds"); bool isSingleInstr = isSingleInstrTbl[Bitness][isThumb2][hasV6Ops][isZExt]; const TargetRegisterClass *RC = RCTbl[isThumb2][isSingleInstr]; - unsigned Opc = OpcTbl[isSingleInstr][isThumb2][Bitness].Opc[isZExt]; + const InstructionTable *ITP = &IT[isSingleInstr][isThumb2][Bitness][isZExt]; + unsigned Opc = ITP->Opc; assert(ARM::KILL != Opc && "Invalid table entry"); - unsigned Imm = OpcTbl[isSingleInstr][isThumb2][Bitness].Imm[isZExt]; - unsigned hasS = OpcTbl[isSingleInstr][isThumb2][Bitness].hasS[isZExt]; + unsigned hasS = ITP->hasS; + ARM_AM::ShiftOpc Shift = (ARM_AM::ShiftOpc) ITP->Shift; + assert(((Shift == ARM_AM::no_shift) == (Opc != ARM::MOVsi)) && + "only MOVsi has shift operand addressing mode"); + unsigned Imm = ITP->Imm; // 16-bit Thumb instructions always set CPSR (unless they're in an IT block). bool setsCPSR = &ARM::tGPRRegClass == RC; - unsigned LSLOpc = isThumb2 ? ARM::tLSLri : ARM::LSLi; + unsigned LSLOpc = isThumb2 ? ARM::tLSLri : ARM::MOVsi; unsigned ResultReg; + // MOVsi encodes shift and immediate in shift operand addressing mode. + // The following condition has the same value when emitting two + // instruction sequences: both are shifts. + bool ImmIsSO = (Shift != ARM_AM::no_shift); // Either one or two instructions are emitted. // They're always of the form: @@ -2699,13 +2718,16 @@ unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned NumInstrsEmitted = isSingleInstr ? 1 : 2; for (unsigned Instr = 0; Instr != NumInstrsEmitted; ++Instr) { ResultReg = createResultReg(RC); - unsigned Opcode = ((0 == Instr) && !isSingleInstr) ? LSLOpc : Opc; + bool isLsl = (0 == Instr) && !isSingleInstr; + unsigned Opcode = isLsl ? LSLOpc : Opc; + ARM_AM::ShiftOpc ShiftAM = isLsl ? ARM_AM::lsl : Shift; + unsigned ImmEnc = ImmIsSO ? ARM_AM::getSORegOpc(ShiftAM, Imm) : Imm; bool isKill = 1 == Instr; MachineInstrBuilder MIB = BuildMI( *FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opcode), ResultReg); if (setsCPSR) MIB.addReg(ARM::CPSR, RegState::Define); - AddDefaultPred(MIB.addReg(SrcReg, isKill * RegState::Kill).addImm(Imm)); + AddDefaultPred(MIB.addReg(SrcReg, isKill * RegState::Kill).addImm(ImmEnc)); if (hasS) AddDefaultCC(MIB); // Second instruction consumes the first's result. -- cgit v1.1 From e0364b64d12330f6f8c47ef98fc658468e2b72e4 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 17 Jul 2013 05:57:45 +0000 Subject: Make x86 fast-isel correctly choose between aligned and unaligned operations for vector stores. Fixes PR16640. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186491 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 40 ++++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 9c91e93..7419822 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -79,8 +79,10 @@ private: bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR); - bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM); - bool X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM); + bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM, + bool Aligned = false); + bool X86FastEmitStore(EVT VT, unsigned ValReg, const X86AddressMode &AM, + bool Aligned = false); bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, unsigned &ResultReg); @@ -233,7 +235,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, /// and a displacement offset, or a GlobalAddress, /// i.e. V. Return true if it is possible. bool -X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) { +X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, + const X86AddressMode &AM, bool Aligned) { // Get opcode and regclass of the output for the given store instruction. unsigned Opc = 0; switch (VT.getSimpleVT().SimpleTy) { @@ -243,8 +246,8 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) { // Mask out all but lowest bit. unsigned AndResult = createResultReg(&X86::GR8RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1); - Val = AndResult; + TII.get(X86::AND8ri), AndResult).addReg(ValReg).addImm(1); + ValReg = AndResult; } // FALLTHROUGH, handling i1 as i8. case MVT::i8: Opc = X86::MOV8mr; break; @@ -260,26 +263,35 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) { (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m; break; case MVT::v4f32: - Opc = X86::MOVAPSmr; + if (Aligned) + Opc = X86::MOVAPSmr; + else + Opc = X86::MOVUPSmr; break; case MVT::v2f64: - Opc = X86::MOVAPDmr; + if (Aligned) + Opc = X86::MOVAPSmr; + else + Opc = X86::MOVUPSmr; break; case MVT::v4i32: case MVT::v2i64: case MVT::v8i16: case MVT::v16i8: - Opc = X86::MOVDQAmr; + if (Aligned) + Opc = X86::MOVDQAmr; + else + Opc = X86::MOVDQUmr; break; } addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, - DL, TII.get(Opc)), AM).addReg(Val); + DL, TII.get(Opc)), AM).addReg(ValReg); return true; } bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, - const X86AddressMode &AM) { + const X86AddressMode &AM, bool Aligned) { // Handle 'null' like i32/i64 0. if (isa(Val)) Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext())); @@ -314,7 +326,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, if (ValReg == 0) return false; - return X86FastEmitStore(VT, ValReg, AM); + return X86FastEmitStore(VT, ValReg, AM, Aligned); } /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of @@ -688,6 +700,10 @@ bool X86FastISel::X86SelectStore(const Instruction *I) { if (S->isAtomic()) return false; + unsigned SABIAlignment = + TD.getABITypeAlignment(S->getValueOperand()->getType()); + bool Aligned = S->getAlignment() == 0 || S->getAlignment() >= SABIAlignment; + MVT VT; if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true)) return false; @@ -696,7 +712,7 @@ bool X86FastISel::X86SelectStore(const Instruction *I) { if (!X86SelectAddress(I->getOperand(1), AM)) return false; - return X86FastEmitStore(VT, I->getOperand(0), AM); + return X86FastEmitStore(VT, I->getOperand(0), AM, Aligned); } /// X86SelectRet - Select and emit code to implement ret instructions. -- cgit v1.1 From 77c95b6b95d14202efb6f4c40a2a2280c3d93ae2 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 17 Jul 2013 06:58:23 +0000 Subject: Teach x86 fast-isel to use AVX opcodes for vector stores when AVX is enabled. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186496 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 7419822..8c9575c 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -264,24 +264,24 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, break; case MVT::v4f32: if (Aligned) - Opc = X86::MOVAPSmr; + Opc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr; else - Opc = X86::MOVUPSmr; + Opc = Subtarget->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr; break; case MVT::v2f64: if (Aligned) - Opc = X86::MOVAPSmr; + Opc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr; else - Opc = X86::MOVUPSmr; + Opc = Subtarget->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr; break; case MVT::v4i32: case MVT::v2i64: case MVT::v8i16: case MVT::v16i8: if (Aligned) - Opc = X86::MOVDQAmr; + Opc = Subtarget->hasAVX() ? X86::VMOVDQAmr : X86::MOVDQAmr; else - Opc = X86::MOVDQUmr; + Opc = Subtarget->hasAVX() ? X86::VMOVDQUmr : X86::MOVDQUmr; break; } -- cgit v1.1 From dcc4207a00b31687018f87de75846579bbdb9c77 Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Wed, 17 Jul 2013 10:58:37 +0000 Subject: [XCore] Ensure implicit operands aren't lost on the return instruction. Patch by Robert Lytton. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186500 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCoreFrameLowering.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp index 736a4ef..b57cf9d 100644 --- a/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/lib/Target/XCore/XCoreFrameLowering.cpp @@ -223,7 +223,9 @@ void XCoreFrameLowering::emitEpilogue(MachineFunction &MF, assert(MBBI->getOpcode() == XCore::RETSP_u6 || MBBI->getOpcode() == XCore::RETSP_lu6); int Opcode = (isU6) ? XCore::RETSP_u6 : XCore::RETSP_lu6; - BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize); + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize); + for (unsigned i = 3, e = MBBI->getNumOperands(); i < e; ++i) + MIB->addOperand(MBBI->getOperand(i)); // copy any variadic operands MBB.erase(MBBI); } else { int Opcode = (isU6) ? XCore::LDAWSP_ru6 : XCore::LDAWSP_lru6; -- cgit v1.1 From 19c14abf1c4ccebfa7d07bdd6ea8462a15c0b749 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Wed, 17 Jul 2013 13:59:38 +0000 Subject: [ARMv8] Add support for the NEON instructions vmaxnm/vminnm. This adds a new class for non-predicable NEON instructions and a new DecoderNamespace for v8 NEON instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186504 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMCodeEmitter.cpp | 2 ++ lib/Target/ARM/ARMInstrFormats.td | 41 +++++++++++++++++++++ lib/Target/ARM/ARMInstrNEON.td | 45 ++++++++++++++++++++++++ lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 18 ++++++++++ lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp | 13 +++++++ 5 files changed, 119 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index e6f69a4..568ca85 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -167,6 +167,8 @@ namespace { const { return 0; } unsigned NEONThumb2DupPostEncoder(const MachineInstr &MI,unsigned Val) const { return 0; } + unsigned NEONThumb2V8PostEncoder(const MachineInstr &MI,unsigned Val) + const { return 0; } unsigned VFPThumb2PostEncoder(const MachineInstr&MI, unsigned Val) const { return 0; } unsigned getAdrLabelOpValue(const MachineInstr &MI, unsigned Op) diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 9a542b9..a835187 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1830,6 +1830,21 @@ class NeonXI pattern> + : InstARM { + let OutOperandList = oops; + let InOperandList = iops; + let AsmString = !strconcat(opc, ".", dt, "\t", asm); + let Pattern = pattern; + list Predicates = [HasNEON]; + let DecoderNamespace = "NEON"; + + let Inst{31-28} = 0b1111; +} + class NLdSt op21_20, bits<4> op11_8, bits<4> op7_4, dag oops, dag iops, InstrItinClass itin, string opc, string dt, string asm, string cstr, list pattern> @@ -2030,6 +2045,32 @@ class N3V op21_20, bits<4> op11_8, bit op6, bit op4, let Inst{5} = Vm{4}; } +class N3Vnp op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, dag oops, dag iops,Format f, InstrItinClass itin, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable, list pattern> + : NeonInp { + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + + // Encode instruction operands + let Inst{22} = Vd{4}; + let Inst{15-12} = Vd{3-0}; + let Inst{19-16} = Vn{3-0}; + let Inst{7} = Vn{4}; + let Inst{5} = Vm{4}; + let Inst{3-0} = Vm{3-0}; + + // Encode constant bits + let Inst{27-23} = op27_23; + let Inst{21-20} = op21_20; + let Inst{11-8} = op11_8; + let Inst{6} = op6; + let Inst{4} = op4; +} + class N3VLane32 op21_20, bits<4> op11_8, bit op6, bit op4, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string dt, string asm, string cstr, diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 32b8d67..f349f10 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -2541,6 +2541,16 @@ class N3VDInt op21_20, bits<4> op11_8, bit op4, let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; } + +class N3VDIntnp op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, Format f, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable> + : N3Vnp; + class N3VDIntSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> : N3VLane32<0, 1, op21_20, op11_8, 1, 0, @@ -2552,6 +2562,7 @@ class N3VDIntSL op21_20, bits<4> op11_8, InstrItinClass itin, imm:$lane)))))]> { let isCommutable = 0; } + class N3VDIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> : N3VLane16<0, 1, op21_20, op11_8, 1, 0, @@ -2584,6 +2595,16 @@ class N3VQInt op21_20, bits<4> op11_8, bit op4, let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; } + +class N3VQIntnp op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, Format f, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable> + : N3Vnp; + class N3VQIntSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> @@ -4659,6 +4680,18 @@ def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmax", "f32", v4f32, v4f32, int_arm_neon_vmaxs, 1>; +// VMAXNM +let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { + def VMAXNMND : N3VDIntnp<0b000110, 0b00, 0b1111, 0, 1, + N3RegFrm, NoItinerary, "vmaxnm", "f32", + v2f32, v2f32, int_arm_neon_vmaxnm, 1>, + Requires<[HasV8, HasNEON]>; + def VMAXNMNQ : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1, + N3RegFrm, NoItinerary, "vmaxnm", "f32", + v4f32, v4f32, int_arm_neon_vmaxnm, 1>, + Requires<[HasV8, HasNEON]>; +} + // VMIN : Vector Minimum defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, @@ -4673,6 +4706,18 @@ def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmin", "f32", v4f32, v4f32, int_arm_neon_vmins, 1>; +// VMINNM +let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { + def VMINNMND : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1, + N3RegFrm, NoItinerary, "vminnm", "f32", + v2f32, v2f32, int_arm_neon_vminnm, 1>, + Requires<[HasV8, HasNEON]>; + def VMINNMNQ : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1, + N3RegFrm, NoItinerary, "vminnm", "f32", + v4f32, v4f32, int_arm_neon_vminnm, 1>, + Requires<[HasV8, HasNEON]>; +} + // Vector Pairwise Operations. // VPADD : Vector Pairwise Add diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 32ce3be..ce4dbb9 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -499,7 +499,14 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } MI.clear(); + result = decodeInstruction(DecoderTablev8NEON32, MI, insn, Address, + this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + return result; + } + MI.clear(); Size = 0; return MCDisassembler::Fail; } @@ -818,6 +825,17 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } } + MI.clear(); + uint32_t NEONv8Insn = insn32; + NEONv8Insn &= 0xF3FFFFFF; // Clear bits 27-26 + result = decodeInstruction(DecoderTablev8NEON32, MI, NEONv8Insn, Address, + this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + return result; + } + + MI.clear(); Size = 0; return MCDisassembler::Fail; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 8631d81..a18d465 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -315,6 +315,8 @@ public: unsigned EncodedValue) const; unsigned NEONThumb2DupPostEncoder(const MCInst &MI, unsigned EncodedValue) const; + unsigned NEONThumb2V8PostEncoder(const MCInst &MI, + unsigned EncodedValue) const; unsigned VFPThumb2PostEncoder(const MCInst &MI, unsigned EncodedValue) const; @@ -389,6 +391,17 @@ unsigned ARMMCCodeEmitter::NEONThumb2DupPostEncoder(const MCInst &MI, return EncodedValue; } +/// Post-process encoded NEON v8 instructions, and rewrite them to Thumb2 form +/// if we are in Thumb2. +unsigned ARMMCCodeEmitter::NEONThumb2V8PostEncoder(const MCInst &MI, + unsigned EncodedValue) const { + if (isThumb2()) { + EncodedValue |= 0xC000000; // Set bits 27-26 + } + + return EncodedValue; +} + /// VFPThumb2PostEncoder - Post-process encoded VFP instructions and rewrite /// them to their Thumb2 form if we are currently in Thumb2 mode. unsigned ARMMCCodeEmitter:: -- cgit v1.1 From 16f385f90f481195bfcf6b139ced4cee033bb887 Mon Sep 17 00:00:00 2001 From: Vladimir Medic Date: Wed, 17 Jul 2013 14:05:19 +0000 Subject: Implement eret and deret(return from exception) instructions for Mips. Test examples are given. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186507 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsInstrFormats.td | 13 +++++++++++++ lib/Target/Mips/MipsInstrInfo.td | 8 ++++++++ 2 files changed, 21 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index c2a5879..0ae93b4 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -520,6 +520,19 @@ class BRK_FM funct> } //===----------------------------------------------------------------------===// +// Exception return format +//===----------------------------------------------------------------------===// + +class ER_FM funct> +{ + bits<32> Inst; + let Inst{31-26} = 0x10; + let Inst{25} = 1; + let Inst{24-6} = 0; + let Inst{5-0} = funct; +} + +//===----------------------------------------------------------------------===// // // FLOATING POINT INSTRUCTION FORMATS // diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 1bc1077..6082a3f 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -652,6 +652,11 @@ class BRK_FT : InstSE<(outs), (ins uimm10:$code_1, uimm10:$code_2), !strconcat(opstr, "\t$code_1, $code_2"), [], NoItinerary, FrmOther>; +// (D)Eret +class ER_FT : + InstSE<(outs), (ins), + opstr, [], NoItinerary, FrmOther>; + // Sync let hasSideEffects = 1 in class SYNC_FT : @@ -958,6 +963,9 @@ def TEQ : TEQ_FT<"teq", CPURegsOpnd>, TEQ_FM<0x34>; def BREAK : BRK_FT<"break">, BRK_FM<0xd>; def SYSCALL : SYS_FT<"syscall">, SYS_FM<0xc>; +def ERET : ER_FT<"eret">, ER_FM<0x18>; +def DERET : ER_FT<"deret">, ER_FM<0x1f>; + /// Load-linked, Store-conditional let Predicates = [NotN64, HasStdEnc] in { def LL : LLBase<"ll", CPURegsOpnd, mem>, LW_FM<0x30>; -- cgit v1.1 From fce9279ac0265fd5ea637dd30253bad26f4273da Mon Sep 17 00:00:00 2001 From: Vladimir Medic Date: Wed, 17 Jul 2013 15:00:42 +0000 Subject: This patch checks for valid mnemonics at the beginning of parseInstruction method, thus giving the user the right error message for non-existing instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186512 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index a4f3721..de3c4fd 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -1495,6 +1495,11 @@ MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) { bool MipsAsmParser:: ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, SmallVectorImpl &Operands) { + // Check if we have valid mnemonic + if (!mnemonicIsValid(Name)) { + Parser.eatToEndOfStatement(); + return Error(NameLoc, "Unknown instruction"); + } // First operand in MCInst is instruction mnemonic. Operands.push_back(MipsOperand::CreateToken(Name, NameLoc)); -- cgit v1.1 From 27d0c68617dee9c60efbc179c31b4a1bd28daa34 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Wed, 17 Jul 2013 19:09:27 +0000 Subject: [mips] Use "foreach" loop to make register definitions more concise. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186528 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsRegisterInfo.td | 89 ++++--------------------------------- 1 file changed, 9 insertions(+), 80 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index a5320bb..63e4001 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -147,91 +147,20 @@ let Namespace = "Mips" in { def RA_64 : Mips64GPRReg< 31, "ra", [RA]>, DwarfRegNum<[31]>; /// Mips Single point precision FPU Registers - def F0 : FPR< 0, "f0">, DwarfRegNum<[32]>; - def F1 : FPR< 1, "f1">, DwarfRegNum<[33]>; - def F2 : FPR< 2, "f2">, DwarfRegNum<[34]>; - def F3 : FPR< 3, "f3">, DwarfRegNum<[35]>; - def F4 : FPR< 4, "f4">, DwarfRegNum<[36]>; - def F5 : FPR< 5, "f5">, DwarfRegNum<[37]>; - def F6 : FPR< 6, "f6">, DwarfRegNum<[38]>; - def F7 : FPR< 7, "f7">, DwarfRegNum<[39]>; - def F8 : FPR< 8, "f8">, DwarfRegNum<[40]>; - def F9 : FPR< 9, "f9">, DwarfRegNum<[41]>; - def F10 : FPR<10, "f10">, DwarfRegNum<[42]>; - def F11 : FPR<11, "f11">, DwarfRegNum<[43]>; - def F12 : FPR<12, "f12">, DwarfRegNum<[44]>; - def F13 : FPR<13, "f13">, DwarfRegNum<[45]>; - def F14 : FPR<14, "f14">, DwarfRegNum<[46]>; - def F15 : FPR<15, "f15">, DwarfRegNum<[47]>; - def F16 : FPR<16, "f16">, DwarfRegNum<[48]>; - def F17 : FPR<17, "f17">, DwarfRegNum<[49]>; - def F18 : FPR<18, "f18">, DwarfRegNum<[50]>; - def F19 : FPR<19, "f19">, DwarfRegNum<[51]>; - def F20 : FPR<20, "f20">, DwarfRegNum<[52]>; - def F21 : FPR<21, "f21">, DwarfRegNum<[53]>; - def F22 : FPR<22, "f22">, DwarfRegNum<[54]>; - def F23 : FPR<23, "f23">, DwarfRegNum<[55]>; - def F24 : FPR<24, "f24">, DwarfRegNum<[56]>; - def F25 : FPR<25, "f25">, DwarfRegNum<[57]>; - def F26 : FPR<26, "f26">, DwarfRegNum<[58]>; - def F27 : FPR<27, "f27">, DwarfRegNum<[59]>; - def F28 : FPR<28, "f28">, DwarfRegNum<[60]>; - def F29 : FPR<29, "f29">, DwarfRegNum<[61]>; - def F30 : FPR<30, "f30">, DwarfRegNum<[62]>; - def F31 : FPR<31, "f31">, DwarfRegNum<[63]>; + foreach I = 0-31 in + def F#I : FPR, DwarfRegNum<[!add(I, 32)]>; /// Mips Double point precision FPU Registers (aliased /// with the single precision to hold 64 bit values) - def D0 : AFPR< 0, "f0", [F0, F1]>; - def D1 : AFPR< 2, "f2", [F2, F3]>; - def D2 : AFPR< 4, "f4", [F4, F5]>; - def D3 : AFPR< 6, "f6", [F6, F7]>; - def D4 : AFPR< 8, "f8", [F8, F9]>; - def D5 : AFPR<10, "f10", [F10, F11]>; - def D6 : AFPR<12, "f12", [F12, F13]>; - def D7 : AFPR<14, "f14", [F14, F15]>; - def D8 : AFPR<16, "f16", [F16, F17]>; - def D9 : AFPR<18, "f18", [F18, F19]>; - def D10 : AFPR<20, "f20", [F20, F21]>; - def D11 : AFPR<22, "f22", [F22, F23]>; - def D12 : AFPR<24, "f24", [F24, F25]>; - def D13 : AFPR<26, "f26", [F26, F27]>; - def D14 : AFPR<28, "f28", [F28, F29]>; - def D15 : AFPR<30, "f30", [F30, F31]>; + foreach I = 0-15 in + def D#I : AFPR("F"#!shl(I, 1)), + !cast("F"#!add(!shl(I, 1), 1))]>; /// Mips Double point precision FPU Registers in MFP64 mode. - def D0_64 : AFPR64<0, "f0", [F0]>, DwarfRegNum<[32]>; - def D1_64 : AFPR64<1, "f1", [F1]>, DwarfRegNum<[33]>; - def D2_64 : AFPR64<2, "f2", [F2]>, DwarfRegNum<[34]>; - def D3_64 : AFPR64<3, "f3", [F3]>, DwarfRegNum<[35]>; - def D4_64 : AFPR64<4, "f4", [F4]>, DwarfRegNum<[36]>; - def D5_64 : AFPR64<5, "f5", [F5]>, DwarfRegNum<[37]>; - def D6_64 : AFPR64<6, "f6", [F6]>, DwarfRegNum<[38]>; - def D7_64 : AFPR64<7, "f7", [F7]>, DwarfRegNum<[39]>; - def D8_64 : AFPR64<8, "f8", [F8]>, DwarfRegNum<[40]>; - def D9_64 : AFPR64<9, "f9", [F9]>, DwarfRegNum<[41]>; - def D10_64 : AFPR64<10, "f10", [F10]>, DwarfRegNum<[42]>; - def D11_64 : AFPR64<11, "f11", [F11]>, DwarfRegNum<[43]>; - def D12_64 : AFPR64<12, "f12", [F12]>, DwarfRegNum<[44]>; - def D13_64 : AFPR64<13, "f13", [F13]>, DwarfRegNum<[45]>; - def D14_64 : AFPR64<14, "f14", [F14]>, DwarfRegNum<[46]>; - def D15_64 : AFPR64<15, "f15", [F15]>, DwarfRegNum<[47]>; - def D16_64 : AFPR64<16, "f16", [F16]>, DwarfRegNum<[48]>; - def D17_64 : AFPR64<17, "f17", [F17]>, DwarfRegNum<[49]>; - def D18_64 : AFPR64<18, "f18", [F18]>, DwarfRegNum<[50]>; - def D19_64 : AFPR64<19, "f19", [F19]>, DwarfRegNum<[51]>; - def D20_64 : AFPR64<20, "f20", [F20]>, DwarfRegNum<[52]>; - def D21_64 : AFPR64<21, "f21", [F21]>, DwarfRegNum<[53]>; - def D22_64 : AFPR64<22, "f22", [F22]>, DwarfRegNum<[54]>; - def D23_64 : AFPR64<23, "f23", [F23]>, DwarfRegNum<[55]>; - def D24_64 : AFPR64<24, "f24", [F24]>, DwarfRegNum<[56]>; - def D25_64 : AFPR64<25, "f25", [F25]>, DwarfRegNum<[57]>; - def D26_64 : AFPR64<26, "f26", [F26]>, DwarfRegNum<[58]>; - def D27_64 : AFPR64<27, "f27", [F27]>, DwarfRegNum<[59]>; - def D28_64 : AFPR64<28, "f28", [F28]>, DwarfRegNum<[60]>; - def D29_64 : AFPR64<29, "f29", [F29]>, DwarfRegNum<[61]>; - def D30_64 : AFPR64<30, "f30", [F30]>, DwarfRegNum<[62]>; - def D31_64 : AFPR64<31, "f31", [F31]>, DwarfRegNum<[63]>; + foreach I = 0-31 in + def D#I#_64 : AFPR64("F"#I)]>, + DwarfRegNum<[!add(I, 32)]>; // Hi/Lo registers def HI : Register<"ac0">, DwarfRegNum<[64]>; -- cgit v1.1 From 7f6d84230c0e21b4f841a0b28a1e494fe327d2b7 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Wed, 17 Jul 2013 19:43:13 +0000 Subject: Silencing an MSVC warning about signed vs unsigned comparison mismatches. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186529 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index f5d5e5a..bf341b3 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -781,7 +781,7 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { // all cases and are sometimes shorter. Prefer to use RISBG for ANDs though, // since it is effectively a three-operand instruction in this case, // and since it can handle some masks that AND IMMEDIATE can't. - if (Count < (N->getOpcode() == ISD::AND ? 1 : 2)) + if (Count < (N->getOpcode() == ISD::AND ? 1U : 2U)) return 0; // Prefer register extensions like LLC over RISBG. -- cgit v1.1 From 0541722de4beb2e53058dbf4ed1ebf0d96ddd6cb Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Wed, 17 Jul 2013 23:50:51 +0000 Subject: PPC: Add base-pointer support to builtin setjmp/longjmp First, this changes the base-pointer implementation to remove an unnecessary complication (and one that is incompatible with how builtin SjLj is implemented): instead of using r31 as the base pointer when it is not needed as a frame pointer, now the base pointer will always be r30 when needed. Second, we introduce another pseudo register, BP, which is used just like the FP pseudo register to refer to the base register before we know for certain what register it will be. Third, we now save BP into the jmp_buf, and restore r30 from that slot in longjmp. If the function that called setjmp did not use a base pointer, then r30 will be overwritten by the setjmp-calling-function's restore code. FP restoration (which is restored into r31) works the same way. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186545 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCFrameLowering.cpp | 38 ++++++++++++++++++++------------- lib/Target/PowerPC/PPCFrameLowering.h | 6 +----- lib/Target/PowerPC/PPCISelLowering.cpp | 33 +++++++++++++++++++++++++--- lib/Target/PowerPC/PPCRegisterInfo.cpp | 28 +++++++++++------------- lib/Target/PowerPC/PPCRegisterInfo.td | 8 +++++-- 5 files changed, 72 insertions(+), 41 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 3b57390..d846365 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -297,6 +297,12 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { unsigned FPReg = is31 ? PPC::R31 : PPC::R1; unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; + const PPCRegisterInfo *RegInfo = + static_cast(MF.getTarget().getRegisterInfo()); + bool HasBP = RegInfo->hasBasePointer(MF); + unsigned BPReg = HasBP ? (unsigned) PPC::R30 : FPReg; + unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FPReg; + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) { @@ -313,6 +319,13 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { case PPC::FP8: MO.setReg(FP8Reg); break; + case PPC::BP: + MO.setReg(BPReg); + break; + case PPC::BP8: + MO.setReg(BP8Reg); + break; + } } } @@ -393,8 +406,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { BPOffset = FFI->getObjectOffset(BPIndex); } else { BPOffset = - PPCFrameLowering::getBasePointerSaveOffset(isPPC64, isDarwinABI, - HasFP); + PPCFrameLowering::getBasePointerSaveOffset(isPPC64, isDarwinABI); } } @@ -417,7 +429,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { if (HasBP) BuildMI(MBB, MBBI, dl, TII.get(PPC::STD)) - .addReg(HasFP ? PPC::X30 : PPC::X31) + .addReg(PPC::X30) .addImm(BPOffset) .addReg(PPC::X1); @@ -448,7 +460,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { // FIXME: On PPC32 SVR4, FPOffset is negative and access to negative // offsets of R1 is not allowed. BuildMI(MBB, MBBI, dl, TII.get(PPC::STW)) - .addReg(HasFP ? PPC::R30 : PPC::R31) + .addReg(PPC::R30) .addImm(BPOffset) .addReg(PPC::R1); @@ -475,8 +487,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { if (HasBP) { // Save a copy of r1 as the base pointer. - BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), - HasFP ? PPC::R30 : PPC::R31) + BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), PPC::R30) .addReg(PPC::R1) .addReg(PPC::R1); } @@ -527,8 +538,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { } else { // PPC64. if (HasBP) { // Save a copy of r1 as the base pointer. - BuildMI(MBB, MBBI, dl, TII.get(PPC::OR8), - HasFP ? PPC::X30 : PPC::X31) + BuildMI(MBB, MBBI, dl, TII.get(PPC::OR8), PPC::X30) .addReg(PPC::X1) .addReg(PPC::X1); } @@ -597,8 +607,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { } if (HasBP) { - unsigned Reg = isPPC64 ? (HasFP ? PPC::X30 : PPC::X31) : - (HasFP ? PPC::R30 : PPC::R31); + unsigned Reg = isPPC64 ? PPC::X30 : PPC::R30; Reg = MRI->getDwarfRegNum(Reg, true); MMI.addFrameInst( MCCFIInstruction::createOffset(FrameLabel, Reg, BPOffset)); @@ -739,8 +748,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, BPOffset = FFI->getObjectOffset(BPIndex); } else { BPOffset = - PPCFrameLowering::getBasePointerSaveOffset(isPPC64, isDarwinABI, - HasFP); + PPCFrameLowering::getBasePointerSaveOffset(isPPC64, isDarwinABI); } } @@ -836,7 +844,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, .addImm(FPOffset).addReg(PPC::X1); if (HasBP) - BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), HasFP ? PPC::X30 : PPC::X31) + BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X30) .addImm(BPOffset).addReg(PPC::X1); if (!MustSaveCRs.empty()) @@ -859,7 +867,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, .addImm(FPOffset).addReg(PPC::R1); if (HasBP) - BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), HasFP ? PPC::R30 : PPC::R31) + BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R30) .addImm(FPOffset).addReg(PPC::R1); if (MustSaveLR) @@ -968,7 +976,7 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, int BPSI = FI->getBasePointerSaveIndex(); if (!BPSI && RegInfo->hasBasePointer(MF)) { - int BPOffset = getBasePointerSaveOffset(isPPC64, isDarwinABI, needsFP(MF)); + int BPOffset = getBasePointerSaveOffset(isPPC64, isDarwinABI); // Allocate the frame index for the base pointer save area. BPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, BPOffset, true); // Save the result. diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h index 9acf129..7aab37e 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.h +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -96,11 +96,7 @@ public: /// getBasePointerSaveOffset - Return the previous frame offset to save the /// base pointer. - static unsigned getBasePointerSaveOffset(bool isPPC64, bool isDarwinABI, - bool hasFP) { - if (!hasFP) - return getFramePointerSaveOffset(isPPC64, isDarwinABI); - + static unsigned getBasePointerSaveOffset(bool isPPC64, bool isDarwinABI) { if (isDarwinABI) return isPPC64 ? -16U : -8U; diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index a38201a..fd225cc 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -6084,6 +6084,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, // thisMBB: const int64_t LabelOffset = 1 * PVT.getStoreSize(); const int64_t TOCOffset = 3 * PVT.getStoreSize(); + const int64_t BPOffset = 4 * PVT.getStoreSize(); // Prepare IP either in reg. const TargetRegisterClass *PtrRC = getRegClassFor(PVT); @@ -6095,10 +6096,25 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, .addReg(PPC::X2) .addImm(TOCOffset) .addReg(BufReg); - MIB.setMemRefs(MMOBegin, MMOEnd); } + // Naked functions never have a base pointer, and so we use r1. For all + // other functions, this decision must be delayed until during PEI. + unsigned BaseReg; + if (MF->getFunction()->getAttributes().hasAttribute( + AttributeSet::FunctionIndex, Attribute::Naked)) + BaseReg = PPCSubTarget.isPPC64() ? PPC::X1 : PPC::R1; + else + BaseReg = PPCSubTarget.isPPC64() ? PPC::BP8 : PPC::BP; + + MIB = BuildMI(*thisMBB, MI, DL, + TII->get(PPCSubTarget.isPPC64() ? PPC::STD : PPC::STW)) + .addReg(BaseReg) + .addImm(BPOffset) + .addReg(BufReg); + MIB.setMemRefs(MMOBegin, MMOEnd); + // Setup MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); const PPCRegisterInfo *TRI = @@ -6170,12 +6186,14 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, // Since FP is only updated here but NOT referenced, it's treated as GPR. unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; + unsigned BP = (PVT == MVT::i64) ? PPC::X30 : PPC::R30; MachineInstrBuilder MIB; const int64_t LabelOffset = 1 * PVT.getStoreSize(); const int64_t SPOffset = 2 * PVT.getStoreSize(); const int64_t TOCOffset = 3 * PVT.getStoreSize(); + const int64_t BPOffset = 4 * PVT.getStoreSize(); unsigned BufReg = MI->getOperand(0).getReg(); @@ -6217,8 +6235,17 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, } MIB.setMemRefs(MMOBegin, MMOEnd); - // FIXME: When we also support base pointers, that register must also be - // restored here. + // Reload BP + if (PVT == MVT::i64) { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP) + .addImm(BPOffset) + .addReg(BufReg); + } else { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP) + .addImm(BPOffset) + .addReg(BufReg); + } + MIB.setMemRefs(MMOBegin, MMOEnd); // Reload TOC if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) { diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 49de8da..fdc604a 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -152,6 +152,11 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::FP); Reserved.set(PPC::FP8); + // The BP register is also not really a register, but is the representation + // of the base pointer register used by setjmp. + Reserved.set(PPC::BP); + Reserved.set(PPC::BP8); + // The counter registers must be reserved so that counter-based loops can // be correctly formed (and the mtctr instructions are not DCE'd). Reserved.set(PPC::CTR); @@ -178,14 +183,11 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::X1); Reserved.set(PPC::X13); - if (PPCFI->needsFP(MF) || hasBasePointer(MF)) { + if (PPCFI->needsFP(MF)) Reserved.set(PPC::X31); - // If we need a base pointer, and we also have a frame pointer, then use - // r30 as the base pointer. - if (PPCFI->needsFP(MF) && hasBasePointer(MF)) - Reserved.set(PPC::X30); - } + if (hasBasePointer(MF)) + Reserved.set(PPC::X30); // The 64-bit SVR4 ABI reserves r2 for the TOC pointer. if (Subtarget.isSVR4ABI()) { @@ -193,12 +195,11 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { } } - if (PPCFI->needsFP(MF) || hasBasePointer(MF)) { + if (PPCFI->needsFP(MF)) Reserved.set(PPC::R31); - if (PPCFI->needsFP(MF) && hasBasePointer(MF)) - Reserved.set(PPC::R30); - } + if (hasBasePointer(MF)) + Reserved.set(PPC::R30); // Reserve Altivec registers when Altivec is unavailable. if (!Subtarget.hasAltivec()) @@ -675,15 +676,10 @@ unsigned PPCRegisterInfo::getEHHandlerRegister() const { } unsigned PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - if (!hasBasePointer(MF)) return getFrameRegister(MF); - if (!Subtarget.isPPC64()) - return TFI->hasFP(MF) ? PPC::R30 : PPC::R31; - else - return TFI->hasFP(MF) ? PPC::X30 : PPC::X31; + return Subtarget.isPPC64() ? PPC::X30 : PPC::R30; } bool PPCRegisterInfo::hasBasePointer(const MachineFunction &MF) const { diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index 003e7c3..d566e2c 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -94,6 +94,10 @@ def ZERO8 : GP8; def FP : GPR<0 /* arbitrary */, "**FRAME POINTER**">; def FP8 : GP8; +// Representations of the base pointer used by setjmp. +def BP : GPR<0 /* arbitrary */, "**BASE POINTER**">; +def BP8 : GP8; + // Condition register bits def CR0LT : CRBIT< 0, "0">; def CR0GT : CRBIT< 1, "1">; @@ -172,11 +176,11 @@ def RM: SPR<512, "**ROUNDING MODE**">; // then nonvolatiles in reverse order since stmw/lmw save from rN to r31 def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12), (sequence "R%u", 30, 13), - R31, R0, R1, FP)>; + R31, R0, R1, FP, BP)>; def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12), (sequence "X%u", 30, 14), - X31, X13, X0, X1, FP8)>; + X31, X13, X0, X1, FP8, BP8)>; // For some instructions r0 is special (representing the value 0 instead of // the value in the r0 register), and we use these register subclasses to -- cgit v1.1 From aad2a72c285a48e34d89ba69d24eb624f2b09b0e Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Thu, 18 Jul 2013 04:28:21 +0000 Subject: PPC: Support dynamic allocas with large alignment Support for dynamic stack alignments in the PPC backend has been unfinished, in part because it depends on dynamic stack realignment (which I only just recently implemented fully). Now we can also support dynamic allocas with higher than the default target stack alignment (16 bytes). In order to round-up the requested size to the maximum requested alignment, we need an additional register to hold the rounded-up size. We're already using one scavenged register to hold the previous stack-pointer value (which needs to be stored with the signal-safe stdux update), and so when we have dynamic allocas and a large alignment, we allocate two emergency spill slots for the scavenger. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186562 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCFrameLowering.cpp | 6 ++- lib/Target/PowerPC/PPCRegisterInfo.cpp | 74 +++++++++++++++++++++------------ 2 files changed, 53 insertions(+), 27 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index d846365..24d3a0b 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -1237,8 +1237,12 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, RC->getAlignment(), false)); + // Might we have over-aligned allocas? + bool HasAlVars = MFI->hasVarSizedObjects() && + MFI->getMaxAlignment() > getStackAlignment(); + // These kinds of spills might need two registers. - if (spillsCR(MF) || spillsVRSAVE(MF)) + if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars) RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false)); diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index fdc604a..b762a57 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -269,8 +269,8 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const { // Get stack alignments. unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment(); unsigned MaxAlign = MFI->getMaxAlignment(); - if (MaxAlign > TargetAlign) - report_fatal_error("Dynamic alloca with large aligns not supported"); + assert((maxCallFrameSize & (MaxAlign-1)) == 0 && + "Maximum call-frame size not sufficiently aligned"); // Determine the previous frame's address. If FrameSize can't be // represented as 16 bits or we need special alignment, then we load the @@ -295,40 +295,62 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const { .addImm(0) .addReg(PPC::R1); } - + + bool KillNegSizeReg = MI.getOperand(1).isKill(); + unsigned NegSizeReg = MI.getOperand(1).getReg(); + // Grow the stack and update the stack pointer link, then determine the // address of new allocated space. if (LP64) { + if (MaxAlign > TargetAlign) { + unsigned UnalNegSizeReg = NegSizeReg; + NegSizeReg = MF.getRegInfo().createVirtualRegister(G8RC); + + // Unfortunately, there is no andi, only andi., and we can't insert that + // here because we might clobber cr0 while it is live. + BuildMI(MBB, II, dl, TII.get(PPC::LI8), NegSizeReg) + .addImm(~(MaxAlign-1)); + + unsigned NegSizeReg1 = NegSizeReg; + NegSizeReg = MF.getRegInfo().createVirtualRegister(G8RC); + BuildMI(MBB, II, dl, TII.get(PPC::AND8), NegSizeReg) + .addReg(UnalNegSizeReg, getKillRegState(KillNegSizeReg)) + .addReg(NegSizeReg1, RegState::Kill); + KillNegSizeReg = true; + } + BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1) .addReg(Reg, RegState::Kill) .addReg(PPC::X1) - .addReg(MI.getOperand(1).getReg()); - if (!MI.getOperand(1).isKill()) - BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg()) - .addReg(PPC::X1) - .addImm(maxCallFrameSize); - else - // Implicitly kill the register. - BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg()) - .addReg(PPC::X1) - .addImm(maxCallFrameSize) - .addReg(MI.getOperand(1).getReg(), RegState::ImplicitKill); + .addReg(NegSizeReg, getKillRegState(KillNegSizeReg)); + BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg()) + .addReg(PPC::X1) + .addImm(maxCallFrameSize); } else { + if (MaxAlign > TargetAlign) { + unsigned UnalNegSizeReg = NegSizeReg; + NegSizeReg = MF.getRegInfo().createVirtualRegister(GPRC); + + // Unfortunately, there is no andi, only andi., and we can't insert that + // here because we might clobber cr0 while it is live. + BuildMI(MBB, II, dl, TII.get(PPC::LI), NegSizeReg) + .addImm(~(MaxAlign-1)); + + unsigned NegSizeReg1 = NegSizeReg; + NegSizeReg = MF.getRegInfo().createVirtualRegister(GPRC); + BuildMI(MBB, II, dl, TII.get(PPC::AND), NegSizeReg) + .addReg(UnalNegSizeReg, getKillRegState(KillNegSizeReg)) + .addReg(NegSizeReg1, RegState::Kill); + KillNegSizeReg = true; + } + BuildMI(MBB, II, dl, TII.get(PPC::STWUX), PPC::R1) .addReg(Reg, RegState::Kill) .addReg(PPC::R1) - .addReg(MI.getOperand(1).getReg()); - - if (!MI.getOperand(1).isKill()) - BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg()) - .addReg(PPC::R1) - .addImm(maxCallFrameSize); - else - // Implicitly kill the register. - BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg()) - .addReg(PPC::R1) - .addImm(maxCallFrameSize) - .addReg(MI.getOperand(1).getReg(), RegState::ImplicitKill); + .addReg(NegSizeReg, getKillRegState(KillNegSizeReg)); + BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg()) + .addReg(PPC::R1) + .addImm(maxCallFrameSize); } // Discard the DYNALLOC instruction. -- cgit v1.1 From fe754512dcab6bb4bce4d3ea370c3202894e711b Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 18 Jul 2013 07:16:44 +0000 Subject: Fix copy and paste bug from r186491 to make v2f64 use MOVAPD/MOVUPD as it should. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186566 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 8c9575c..5bc3420 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -270,9 +270,9 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, break; case MVT::v2f64: if (Aligned) - Opc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr; + Opc = Subtarget->hasAVX() ? X86::VMOVAPDmr : X86::MOVAPDmr; else - Opc = Subtarget->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr; + Opc = Subtarget->hasAVX() ? X86::VMOVUPDmr : X86::MOVUPDmr; break; case MVT::v4i32: case MVT::v2i64: -- cgit v1.1 From 764f6f51257a0669acc58c8e5b4b802a29069302 Mon Sep 17 00:00:00 2001 From: Vladimir Medic Date: Thu, 18 Jul 2013 09:28:35 +0000 Subject: This patch extends mips register parsing methods to allow indexed register parsing. The corresponding test cases are added to the patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186567 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index de3c4fd..56a5dfd 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -1268,6 +1268,18 @@ MipsAsmParser::parseRegs(SmallVectorImpl &Operands, // Set the proper register kind. MipsOperand* op = static_cast(Operands.back()); op->setRegKind(Kind); + if ((Kind == MipsOperand::Kind_CPURegs) + && (getLexer().is(AsmToken::LParen))) { + // Check if it is indexed addressing operand. + Operands.push_back(MipsOperand::CreateToken("(", getLexer().getLoc())); + Parser.Lex(); // Eat the parenthesis. + if (parseRegs(Operands,RegKind) != MatchOperand_Success) + return MatchOperand_NoMatch; + if (getLexer().isNot(AsmToken::RParen)) + return MatchOperand_NoMatch; + Operands.push_back(MipsOperand::CreateToken(")", getLexer().getLoc())); + Parser.Lex(); + } return MatchOperand_Success; } return MatchOperand_NoMatch; -- cgit v1.1 From d46bb05e1ae484c491ea85527e45da86e78be658 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Thu, 18 Jul 2013 09:34:35 +0000 Subject: Remove the extra leading 0 from VMAXNMND. The N3VDIntnp pattern takes bits<5> and I gave it 6 bits. Thanks to Jiangning Liu for spotting it! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186568 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index f349f10..f389909 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -4682,7 +4682,7 @@ def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, // VMAXNM let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { - def VMAXNMND : N3VDIntnp<0b000110, 0b00, 0b1111, 0, 1, + def VMAXNMND : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1, N3RegFrm, NoItinerary, "vmaxnm", "f32", v2f32, v2f32, int_arm_neon_vmaxnm, 1>, Requires<[HasV8, HasNEON]>; -- cgit v1.1 From efb6c52efb4116b6a6d6c99192db68ab69025119 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 18 Jul 2013 09:45:08 +0000 Subject: [SystemZ] Rename and formatting fixes In hindsight, using "RISBG" for something that can be any type of R.SBG instruction was a bit confusing, so this renames it to RxSBG. That might not be the best choice either, since there is an instruction called RXSBG, but hopefully the lower-case letter stands out enough. While there I fixed a couple of GNUisms that had crept in -- sorry about that! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186569 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 125 ++++++++++++++--------------- 1 file changed, 61 insertions(+), 64 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index bf341b3..aea0808 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -101,8 +101,8 @@ static uint64_t allOnes(unsigned int Count) { // The operands are: Input (R2), Start (I3), End (I4) and Rotate (I5). // The operand value is effectively (and (rotl Input Rotate) Mask) and // has BitSize bits. -struct RISBGOperands { - RISBGOperands(SDValue N) +struct RxSBGOperands { + RxSBGOperands(SDValue N) : BitSize(N.getValueType().getSizeInBits()), Mask(allOnes(BitSize)), Input(N), Start(64 - BitSize), End(63), Rotate(0) {} @@ -227,9 +227,9 @@ class SystemZDAGToDAGISel : public SelectionDAGISel { // set Op to that Y. bool detectOrAndInsertion(SDValue &Op, uint64_t InsertMask); - // Try to fold some of Ops.Input into other fields of Ops. Return true - // on success. - bool expandRISBG(RISBGOperands &Ops); + // Try to fold some of RxSBG.Input into other fields of RxSBG. + // Return true on success. + bool expandRxSBG(RxSBGOperands &RxSBG); // Return an undefined i64 value. SDValue getUNDEF64(SDLoc DL); @@ -604,21 +604,20 @@ bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op, static bool isStringOfOnes(uint64_t Mask, unsigned &LSB, unsigned &Length) { unsigned First = findFirstSet(Mask); uint64_t Top = (Mask >> First) + 1; - if ((Top & -Top) == Top) - { - LSB = First; - Length = findFirstSet(Top); - return true; - } + if ((Top & -Top) == Top) { + LSB = First; + Length = findFirstSet(Top); + return true; + } return false; } -// Try to update RISBG so that only the bits of Ops.Input in Mask are used. +// Try to update RxSBG so that only the bits of RxSBG.Input in Mask are used. // Return true on success. -static bool refineRISBGMask(RISBGOperands &RISBG, uint64_t Mask) { - if (RISBG.Rotate != 0) - Mask = (Mask << RISBG.Rotate) | (Mask >> (64 - RISBG.Rotate)); - Mask &= RISBG.Mask; +static bool refineRxSBGMask(RxSBGOperands &RxSBG, uint64_t Mask) { + if (RxSBG.Rotate != 0) + Mask = (Mask << RxSBG.Rotate) | (Mask >> (64 - RxSBG.Rotate)); + Mask &= RxSBG.Mask; // Reject trivial all-zero masks. if (Mask == 0) @@ -627,31 +626,29 @@ static bool refineRISBGMask(RISBGOperands &RISBG, uint64_t Mask) { // Handle the 1+0+ or 0+1+0* cases. Start then specifies the index of // the msb and End specifies the index of the lsb. unsigned LSB, Length; - if (isStringOfOnes(Mask, LSB, Length)) - { - RISBG.Mask = Mask; - RISBG.Start = 63 - (LSB + Length - 1); - RISBG.End = 63 - LSB; - return true; - } + if (isStringOfOnes(Mask, LSB, Length)) { + RxSBG.Mask = Mask; + RxSBG.Start = 63 - (LSB + Length - 1); + RxSBG.End = 63 - LSB; + return true; + } // Handle the wrap-around 1+0+1+ cases. Start then specifies the msb // of the low 1s and End specifies the lsb of the high 1s. - if (isStringOfOnes(Mask ^ allOnes(RISBG.BitSize), LSB, Length)) - { - assert(LSB > 0 && "Bottom bit must be set"); - assert(LSB + Length < RISBG.BitSize && "Top bit must be set"); - RISBG.Mask = Mask; - RISBG.Start = 63 - (LSB - 1); - RISBG.End = 63 - (LSB + Length); - return true; - } + if (isStringOfOnes(Mask ^ allOnes(RxSBG.BitSize), LSB, Length)) { + assert(LSB > 0 && "Bottom bit must be set"); + assert(LSB + Length < RxSBG.BitSize && "Top bit must be set"); + RxSBG.Mask = Mask; + RxSBG.Start = 63 - (LSB - 1); + RxSBG.End = 63 - (LSB + Length); + return true; + } return false; } -bool SystemZDAGToDAGISel::expandRISBG(RISBGOperands &RISBG) { - SDValue N = RISBG.Input; +bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) { + SDValue N = RxSBG.Input; switch (N.getOpcode()) { case ISD::AND: { ConstantSDNode *MaskNode = @@ -661,31 +658,31 @@ bool SystemZDAGToDAGISel::expandRISBG(RISBGOperands &RISBG) { SDValue Input = N.getOperand(0); uint64_t Mask = MaskNode->getZExtValue(); - if (!refineRISBGMask(RISBG, Mask)) { + if (!refineRxSBGMask(RxSBG, Mask)) { // If some bits of Input are already known zeros, those bits will have // been removed from the mask. See if adding them back in makes the // mask suitable. APInt KnownZero, KnownOne; CurDAG->ComputeMaskedBits(Input, KnownZero, KnownOne); Mask |= KnownZero.getZExtValue(); - if (!refineRISBGMask(RISBG, Mask)) + if (!refineRxSBGMask(RxSBG, Mask)) return false; } - RISBG.Input = Input; + RxSBG.Input = Input; return true; } case ISD::ROTL: { - // Any 64-bit rotate left can be merged into the RISBG. - if (RISBG.BitSize != 64) + // Any 64-bit rotate left can be merged into the RxSBG. + if (RxSBG.BitSize != 64) return false; ConstantSDNode *CountNode = dyn_cast(N.getOperand(1).getNode()); if (!CountNode) return false; - RISBG.Rotate = (RISBG.Rotate + CountNode->getZExtValue()) & 63; - RISBG.Input = N.getOperand(0); + RxSBG.Rotate = (RxSBG.Rotate + CountNode->getZExtValue()) & 63; + RxSBG.Input = N.getOperand(0); return true; } @@ -698,12 +695,12 @@ bool SystemZDAGToDAGISel::expandRISBG(RISBGOperands &RISBG) { uint64_t Count = CountNode->getZExtValue(); if (Count < 1 || - Count >= RISBG.BitSize || - !refineRISBGMask(RISBG, allOnes(RISBG.BitSize - Count) << Count)) + Count >= RxSBG.BitSize || + !refineRxSBGMask(RxSBG, allOnes(RxSBG.BitSize - Count) << Count)) return false; - RISBG.Rotate = (RISBG.Rotate + Count) & 63; - RISBG.Input = N.getOperand(0); + RxSBG.Rotate = (RxSBG.Rotate + Count) & 63; + RxSBG.Input = N.getOperand(0); return true; } @@ -717,12 +714,12 @@ bool SystemZDAGToDAGISel::expandRISBG(RISBGOperands &RISBG) { uint64_t Count = CountNode->getZExtValue(); if (Count < 1 || - Count >= RISBG.BitSize || - !refineRISBGMask(RISBG, allOnes(RISBG.BitSize - Count))) + Count >= RxSBG.BitSize || + !refineRxSBGMask(RxSBG, allOnes(RxSBG.BitSize - Count))) return false; - RISBG.Rotate = (RISBG.Rotate - Count) & 63; - RISBG.Input = N.getOperand(0); + RxSBG.Rotate = (RxSBG.Rotate - Count) & 63; + RxSBG.Input = N.getOperand(0); return true; } @@ -735,14 +732,14 @@ bool SystemZDAGToDAGISel::expandRISBG(RISBGOperands &RISBG) { return false; uint64_t Count = CountNode->getZExtValue(); - if (RISBG.Rotate != 0 || + if (RxSBG.Rotate != 0 || Count < 1 || - Count >= RISBG.BitSize || - RISBG.Start < 64 - (RISBG.BitSize - Count)) + Count >= RxSBG.BitSize || + RxSBG.Start < 64 - (RxSBG.BitSize - Count)) return false; - RISBG.Rotate = -Count & 63; - RISBG.Input = N.getOperand(0); + RxSBG.Rotate = -Count & 63; + RxSBG.Input = N.getOperand(0); return true; } default: @@ -773,9 +770,9 @@ SDValue SystemZDAGToDAGISel::convertTo(SDLoc DL, EVT VT, SDValue N) { } SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { - RISBGOperands RISBG(SDValue(N, 0)); + RxSBGOperands RISBG(SDValue(N, 0)); unsigned Count = 0; - while (expandRISBG(RISBG)) + while (expandRxSBG(RISBG)) Count += 1; // Prefer to use normal shift instructions over RISBG, since they can handle // all cases and are sometimes shorter. Prefer to use RISBG for ANDs though, @@ -805,10 +802,10 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { SDNode *SystemZDAGToDAGISel::tryRISBGOrROSBG(SDNode *N) { // Try treating each operand of N as the second operand of RISBG or ROSBG // and see which goes deepest. - RISBGOperands RISBG[] = { N->getOperand(0), N->getOperand(1) }; + RxSBGOperands RxSBG[] = { N->getOperand(0), N->getOperand(1) }; unsigned Count[] = { 0, 0 }; for (unsigned I = 0; I < 2; ++I) - while (expandRISBG(RISBG[I])) + while (expandRxSBG(RxSBG[I])) Count[I] += 1; // Do nothing if neither operand is suitable. @@ -820,7 +817,7 @@ SDNode *SystemZDAGToDAGISel::tryRISBGOrROSBG(SDNode *N) { SDValue Op0 = N->getOperand(I ^ 1); // Prefer IC for character insertions from memory. - if ((RISBG[I].Mask & 0xff) == 0) + if ((RxSBG[I].Mask & 0xff) == 0) if (LoadSDNode *Load = dyn_cast(Op0.getNode())) if (Load->getMemoryVT() == MVT::i8) return 0; @@ -828,16 +825,16 @@ SDNode *SystemZDAGToDAGISel::tryRISBGOrROSBG(SDNode *N) { // See whether we can avoid an AND in the first operand by converting // ROSBG to RISBG. unsigned Opcode = SystemZ::ROSBG; - if (detectOrAndInsertion(Op0, RISBG[I].Mask)) + if (detectOrAndInsertion(Op0, RxSBG[I].Mask)) Opcode = SystemZ::RISBG; EVT VT = N->getValueType(0); SDValue Ops[5] = { convertTo(SDLoc(N), MVT::i64, Op0), - convertTo(SDLoc(N), MVT::i64, RISBG[I].Input), - CurDAG->getTargetConstant(RISBG[I].Start, MVT::i32), - CurDAG->getTargetConstant(RISBG[I].End, MVT::i32), - CurDAG->getTargetConstant(RISBG[I].Rotate, MVT::i32) + convertTo(SDLoc(N), MVT::i64, RxSBG[I].Input), + CurDAG->getTargetConstant(RxSBG[I].Start, MVT::i32), + CurDAG->getTargetConstant(RxSBG[I].End, MVT::i32), + CurDAG->getTargetConstant(RxSBG[I].Rotate, MVT::i32) }; N = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i64, Ops); return convertTo(SDLoc(N), VT, SDValue(N, 0)).getNode(); -- cgit v1.1 From 30a132f7676ec5465a2245cb94e7bd9214ea8eb7 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 18 Jul 2013 10:06:15 +0000 Subject: [SystemZ] Use RXSBG Extend the previous R.SBG patches to handle XORs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186570 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index aea0808..67e9d21 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -241,9 +241,9 @@ class SystemZDAGToDAGISel : public SelectionDAGISel { // Return the selected node on success, otherwise return null. SDNode *tryRISBGZero(SDNode *N); - // Try to use RISBG or ROSBG to implement OR node N. Return the selected - // node on success, otherwise return null. - SDNode *tryRISBGOrROSBG(SDNode *N); + // Try to use RISBG or Opcode to implement OR or XOR node N. + // Return the selected node on success, otherwise return null. + SDNode *tryRxSBG(SDNode *N, unsigned Opcode); // If Op0 is null, then Node is a constant that can be loaded using: // @@ -799,8 +799,8 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { return convertTo(SDLoc(N), VT, SDValue(N, 0)).getNode(); } -SDNode *SystemZDAGToDAGISel::tryRISBGOrROSBG(SDNode *N) { - // Try treating each operand of N as the second operand of RISBG or ROSBG +SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) { + // Try treating each operand of N as the second operand of the RxSBG // and see which goes deepest. RxSBGOperands RxSBG[] = { N->getOperand(0), N->getOperand(1) }; unsigned Count[] = { 0, 0 }; @@ -817,15 +817,14 @@ SDNode *SystemZDAGToDAGISel::tryRISBGOrROSBG(SDNode *N) { SDValue Op0 = N->getOperand(I ^ 1); // Prefer IC for character insertions from memory. - if ((RxSBG[I].Mask & 0xff) == 0) + if (Opcode == SystemZ::ROSBG && (RxSBG[I].Mask & 0xff) == 0) if (LoadSDNode *Load = dyn_cast(Op0.getNode())) if (Load->getMemoryVT() == MVT::i8) return 0; // See whether we can avoid an AND in the first operand by converting // ROSBG to RISBG. - unsigned Opcode = SystemZ::ROSBG; - if (detectOrAndInsertion(Op0, RxSBG[I].Mask)) + if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op0, RxSBG[I].Mask)) Opcode = SystemZ::RISBG; EVT VT = N->getValueType(0); @@ -913,9 +912,14 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { switch (Opcode) { case ISD::OR: if (Node->getOperand(1).getOpcode() != ISD::Constant) - ResNode = tryRISBGOrROSBG(Node); - // Fall through. + ResNode = tryRxSBG(Node, SystemZ::ROSBG); + goto or_xor; + case ISD::XOR: + if (Node->getOperand(1).getOpcode() != ISD::Constant) + ResNode = tryRxSBG(Node, SystemZ::RXSBG); + // Fall through. + or_xor: // If this is a 64-bit operation in which both 32-bit halves are nonzero, // split the operation into two. if (!ResNode && Node->getValueType(0) == MVT::i64) @@ -931,7 +935,8 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { case ISD::ROTL: case ISD::SHL: case ISD::SRL: - ResNode = tryRISBGZero(Node); + if (!ResNode) + ResNode = tryRISBGZero(Node); break; case ISD::Constant: -- cgit v1.1 From 9dffd71d0af3d78ee1f21865dd064fb43bc623be Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 18 Jul 2013 10:14:55 +0000 Subject: [SystemZ] Generalize RxSBG SRA case The original code only folded SRA into ROTATE ... SELECTED BITS if there was no outer shift. This patch splits out that check and generalises it slightly. The extra cases aren't really that interesting, but this is paving the way for RNSBG support. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186571 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 63 +++++++++++++++++------------- 1 file changed, 36 insertions(+), 27 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 67e9d21..0f9a37e 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -647,9 +647,29 @@ static bool refineRxSBGMask(RxSBGOperands &RxSBG, uint64_t Mask) { return false; } +// RxSBG.Input is a shift of Count bits in the direction given by IsLeft. +// Return true if the result depends on the signs or zeros that are +// shifted in. +static bool shiftedInBitsMatter(RxSBGOperands &RxSBG, uint64_t Count, + bool IsLeft) { + // Work out which bits of the shift result are zeros or sign copies. + uint64_t ShiftedIn = allOnes(Count); + if (!IsLeft) + ShiftedIn <<= RxSBG.BitSize - Count; + + // Rotate that mask in the same way as RxSBG.Input is rotated. + if (RxSBG.Rotate != 0) + ShiftedIn = ((ShiftedIn << RxSBG.Rotate) | + (ShiftedIn >> (64 - RxSBG.Rotate))); + + // Fail if any of the zero or sign bits are used. + return (ShiftedIn & RxSBG.Mask) != 0; +} + bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) { SDValue N = RxSBG.Input; - switch (N.getOpcode()) { + unsigned Opcode = N.getOpcode(); + switch (Opcode) { case ISD::AND: { ConstantSDNode *MaskNode = dyn_cast(N.getOperand(1).getNode()); @@ -704,41 +724,30 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) { return true; } - case ISD::SRL: { - // Treat (srl X, count), mask) as (and (rotl X, size-count), ~0>>count), - // which is similar to SLL above. - ConstantSDNode *CountNode = - dyn_cast(N.getOperand(1).getNode()); - if (!CountNode) - return false; - - uint64_t Count = CountNode->getZExtValue(); - if (Count < 1 || - Count >= RxSBG.BitSize || - !refineRxSBGMask(RxSBG, allOnes(RxSBG.BitSize - Count))) - return false; - - RxSBG.Rotate = (RxSBG.Rotate - Count) & 63; - RxSBG.Input = N.getOperand(0); - return true; - } - + case ISD::SRL: case ISD::SRA: { - // Treat (sra X, count) as (rotl X, size-count) as long as the top - // count bits from Ops.Input are ignored. ConstantSDNode *CountNode = dyn_cast(N.getOperand(1).getNode()); if (!CountNode) return false; uint64_t Count = CountNode->getZExtValue(); - if (RxSBG.Rotate != 0 || - Count < 1 || - Count >= RxSBG.BitSize || - RxSBG.Start < 64 - (RxSBG.BitSize - Count)) + if (Count < 1 || Count >= RxSBG.BitSize) return false; - RxSBG.Rotate = -Count & 63; + if (Opcode == ISD::SRA) { + // Treat (sra X, count) as (rotl X, size-count) as long as the top + // Count bits from RxSBG.Input are ignored. + if (shiftedInBitsMatter(RxSBG, Count, false)) + return false; + } else { + // Treat (srl X, count), mask) as (and (rotl X, size-count), ~0>>count), + // which is similar to SLL above. + if (!refineRxSBGMask(RxSBG, allOnes(RxSBG.BitSize - Count))) + return false; + } + + RxSBG.Rotate = (RxSBG.Rotate - Count) & 63; RxSBG.Input = N.getOperand(0); return true; } -- cgit v1.1 From 6a3d933e1645d34984f4c7c9e2e4e46d0d15e1b3 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Thu, 18 Jul 2013 10:20:25 +0000 Subject: Add Thumb tests for the ARMv8 FP instructions that I recently added. Also, fix the namespace for two instructions that I missed previously. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186572 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrVFP.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index cbfd25f..f9cfa15 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -584,7 +584,7 @@ def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0, } multiclass vcvt_inst rm> { - let PostEncoderMethod = "" in { + let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in { def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vcvt", opc, ".s32.f32\t$Sd, $Sm"), @@ -670,7 +670,7 @@ defm VRINTR : vrint_inst_zrx<"r", 0, 0>; defm VRINTX : vrint_inst_zrx<"x", 1, 0>; multiclass vrint_inst_anpm rm> { - let PostEncoderMethod = "" in { + let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in { def S : ASuInp<0b11101, 0b11, 0b1000, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vrint", opc, ".f32\t$Sd, $Sm"), -- cgit v1.1 From 722a26d63e717f5cfbf924e042f4f300bfee1328 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 18 Jul 2013 10:40:35 +0000 Subject: [SystemZ] Use RNSBG This should be the last of the R.SBG patches for now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186573 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/README.txt | 5 -- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 83 ++++++++++++++++++++++++------ 2 files changed, 67 insertions(+), 21 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt index 55e9fc0..2782b63 100644 --- a/lib/Target/SystemZ/README.txt +++ b/lib/Target/SystemZ/README.txt @@ -118,11 +118,6 @@ such as ICM and STCM. -- -We could make more use of the ROTATE AND ... SELECTED BITS instructions. -At the moment we only use RISBG, and only then for subword atomic operations. - --- - DAGCombiner can detect integer absolute, but there's not yet an associated ISD opcode. We could add one and implement it using LOAD POSITIVE. Negated absolutes could use LOAD NEGATIVE. diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 0f9a37e..8866253 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -97,15 +97,24 @@ static uint64_t allOnes(unsigned int Count) { return Count == 0 ? 0 : (uint64_t(1) << (Count - 1) << 1) - 1; } -// Represents operands 2 to 5 of a ROTATE AND ... SELECTED BITS operation. -// The operands are: Input (R2), Start (I3), End (I4) and Rotate (I5). -// The operand value is effectively (and (rotl Input Rotate) Mask) and -// has BitSize bits. +// Represents operands 2 to 5 of the ROTATE AND ... SELECTED BITS operation +// given by Opcode. The operands are: Input (R2), Start (I3), End (I4) and +// Rotate (I5). The combined operand value is effectively: +// +// (or (rotl Input, Rotate), ~Mask) +// +// for RNSBG and: +// +// (and (rotl Input, Rotate), Mask) +// +// otherwise. The value has BitSize bits. struct RxSBGOperands { - RxSBGOperands(SDValue N) - : BitSize(N.getValueType().getSizeInBits()), Mask(allOnes(BitSize)), - Input(N), Start(64 - BitSize), End(63), Rotate(0) {} + RxSBGOperands(unsigned Op, SDValue N) + : Opcode(Op), BitSize(N.getValueType().getSizeInBits()), + Mask(allOnes(BitSize)), Input(N), Start(64 - BitSize), End(63), + Rotate(0) {} + unsigned Opcode; unsigned BitSize; uint64_t Mask; SDValue Input; @@ -671,6 +680,9 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) { unsigned Opcode = N.getOpcode(); switch (Opcode) { case ISD::AND: { + if (RxSBG.Opcode == SystemZ::RNSBG) + return false; + ConstantSDNode *MaskNode = dyn_cast(N.getOperand(1).getNode()); if (!MaskNode) @@ -692,6 +704,31 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) { return true; } + case ISD::OR: { + if (RxSBG.Opcode != SystemZ::RNSBG) + return false; + + ConstantSDNode *MaskNode = + dyn_cast(N.getOperand(1).getNode()); + if (!MaskNode) + return false; + + SDValue Input = N.getOperand(0); + uint64_t Mask = ~MaskNode->getZExtValue(); + if (!refineRxSBGMask(RxSBG, Mask)) { + // If some bits of Input are already known ones, those bits will have + // been removed from the mask. See if adding them back in makes the + // mask suitable. + APInt KnownZero, KnownOne; + CurDAG->ComputeMaskedBits(Input, KnownZero, KnownOne); + Mask &= ~KnownOne.getZExtValue(); + if (!refineRxSBGMask(RxSBG, Mask)) + return false; + } + RxSBG.Input = Input; + return true; + } + case ISD::ROTL: { // Any 64-bit rotate left can be merged into the RxSBG. if (RxSBG.BitSize != 64) @@ -707,18 +744,26 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) { } case ISD::SHL: { - // Treat (shl X, count) as (and (rotl X, count), ~0<(N.getOperand(1).getNode()); if (!CountNode) return false; uint64_t Count = CountNode->getZExtValue(); - if (Count < 1 || - Count >= RxSBG.BitSize || - !refineRxSBGMask(RxSBG, allOnes(RxSBG.BitSize - Count) << Count)) + if (Count < 1 || Count >= RxSBG.BitSize) return false; + if (RxSBG.Opcode == SystemZ::RNSBG) { + // Treat (shl X, count) as (rotl X, size-count) as long as the bottom + // count bits from RxSBG.Input are ignored. + if (shiftedInBitsMatter(RxSBG, Count, true)) + return false; + } else { + // Treat (shl X, count) as (and (rotl X, count), ~0<= RxSBG.BitSize) return false; - if (Opcode == ISD::SRA) { - // Treat (sra X, count) as (rotl X, size-count) as long as the top - // Count bits from RxSBG.Input are ignored. + if (RxSBG.Opcode == SystemZ::RNSBG || Opcode == ISD::SRA) { + // Treat (srl|sra X, count) as (rotl X, size-count) as long as the top + // count bits from RxSBG.Input are ignored. if (shiftedInBitsMatter(RxSBG, Count, false)) return false; } else { @@ -779,7 +824,7 @@ SDValue SystemZDAGToDAGISel::convertTo(SDLoc DL, EVT VT, SDValue N) { } SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { - RxSBGOperands RISBG(SDValue(N, 0)); + RxSBGOperands RISBG(SystemZ::RISBG, SDValue(N, 0)); unsigned Count = 0; while (expandRxSBG(RISBG)) Count += 1; @@ -811,7 +856,10 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) { // Try treating each operand of N as the second operand of the RxSBG // and see which goes deepest. - RxSBGOperands RxSBG[] = { N->getOperand(0), N->getOperand(1) }; + RxSBGOperands RxSBG[] = { + RxSBGOperands(Opcode, N->getOperand(0)), + RxSBGOperands(Opcode, N->getOperand(1)) + }; unsigned Count[] = { 0, 0 }; for (unsigned I = 0; I < 2; ++I) while (expandRxSBG(RxSBG[I])) @@ -941,6 +989,9 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { break; case ISD::AND: + if (Node->getOperand(1).getOpcode() != ISD::Constant) + ResNode = tryRxSBG(Node, SystemZ::RNSBG); + // Fall through. case ISD::ROTL: case ISD::SHL: case ISD::SRL: -- cgit v1.1 From 59b3300664d062bf04159eacaeb44d6c729e6a8c Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Thu, 18 Jul 2013 11:53:22 +0000 Subject: [ARMv8] Add NEON instructions VCVT{A, N, P, M}. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186574 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrFormats.td | 29 +++++++++++++++++++++++++++++ lib/Target/ARM/ARMInstrNEON.td | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index a835187..1349476 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1964,6 +1964,35 @@ class N2V op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, let Inst{5} = Vm{4}; } +// Same as N2V but not predicated. +class N2Vnp op17_16, bits<3> op10_8, bit op7, bit op6, + dag oops, dag iops, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, list pattern> + : NeonInp { + bits<5> Vd; + bits<5> Vm; + + // Encode instruction operands + let Inst{22} = Vd{4}; + let Inst{15-12} = Vd{3-0}; + let Inst{5} = Vm{4}; + let Inst{3-0} = Vm{3-0}; + + // Encode constant bits + let Inst{27-23} = 0b00111; + let Inst{21-20} = 0b11; + let Inst{19-18} = 0b10; + let Inst{17-16} = op17_16; + let Inst{11} = 0; + let Inst{10-8} = op10_8; + let Inst{7} = op7; + let Inst{6} = op6; + let Inst{4} = 0; + + let DecoderNamespace = "NEON"; +} + // Same as N2V except it doesn't have a datatype suffix. class N2VX op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index f389909..d707e58 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -2379,6 +2379,21 @@ class N2VQInt op24_23, bits<2> op21_20, bits<2> op19_18, (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; +// Same as above, but not predicated. +class N2VDIntnp op17_16, bits<3> op10_8, bit op7, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2Vnp; + +class N2VQIntnp op17_16, bits<3> op10_8, bit op7, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2Vnp; + // Narrow 2-register operations. class N2VN op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, @@ -5431,6 +5446,26 @@ def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", v4f32, v4i32, uint_to_fp>; +// VCVT{A, N, P, M} +multiclass VCVT_FPI op10_8, SDPatternOperator IntS, + SDPatternOperator IntU> { + let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { + def SD : N2VDIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), + "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>; + def SQ : N2VQIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), + "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>; + def UD : N2VDIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), + "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>; + def UQ : N2VQIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), + "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>; + } +} + +defm VCVTAn : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>; +defm VCVTNn : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>; +defm VCVTPn : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>; +defm VCVTMn : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>; + // VCVT : Vector Convert Between Floating-Point and Fixed-Point. let DecoderMethod = "DecodeVCVTD" in { def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", -- cgit v1.1 From 4e377d9306f471025415beb7639e0a3e776efa27 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Thu, 18 Jul 2013 12:00:25 +0000 Subject: Change 'n' to 'N' to keep consistent with other instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186576 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index d707e58..0836700 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -5461,10 +5461,10 @@ multiclass VCVT_FPI op10_8, SDPatternOperator IntS, } } -defm VCVTAn : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>; -defm VCVTNn : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>; -defm VCVTPn : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>; -defm VCVTMn : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>; +defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>; +defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>; +defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>; +defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>; // VCVT : Vector Convert Between Floating-Point and Fixed-Point. let DecoderMethod = "DecodeVCVTD" in { -- cgit v1.1 From f5660aab413539bd94cfea8cd88fed80c54cd984 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 18 Jul 2013 21:43:35 +0000 Subject: R600: Expand VSELECT for all types git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186613 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 3 +++ lib/Target/R600/R600ISelLowering.cpp | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index c9df89c..e2fd369 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -77,6 +77,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::UDIV, MVT::i32, Expand); setOperationAction(ISD::UDIVREM, MVT::i32, Custom); setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::VSELECT, MVT::v2f32, Expand); + setOperationAction(ISD::VSELECT, MVT::v4f32, Expand); static const int types[] = { (int)MVT::v2i32, @@ -97,6 +99,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::SUB, VT, Expand); setOperationAction(ISD::UDIV, VT, Expand); setOperationAction(ISD::UREM, VT, Expand); + setOperationAction(ISD::VSELECT, VT, Expand); setOperationAction(ISD::XOR, VT, Expand); } } diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 7aef08a..1067b38 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -67,9 +67,6 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::SELECT, MVT::i32, Custom); setOperationAction(ISD::SELECT, MVT::f32, Custom); - setOperationAction(ISD::VSELECT, MVT::v4i32, Expand); - setOperationAction(ISD::VSELECT, MVT::v2i32, Expand); - // Legalize loads and stores to the private address space. setOperationAction(ISD::LOAD, MVT::i32, Custom); setOperationAction(ISD::LOAD, MVT::v2i32, Expand); -- cgit v1.1 From fc047278c160cf15d99502d8170d431cfcfe8a5b Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 18 Jul 2013 21:43:42 +0000 Subject: R600/SI: Add support for v2f32 stores git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186614 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 3 +++ lib/Target/R600/SIInstructions.td | 3 +++ 2 files changed, 6 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index e2fd369..666e158 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -57,6 +57,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::STORE, MVT::f32, Promote); AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32); + setOperationAction(ISD::STORE, MVT::v2f32, Promote); + AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32); + setOperationAction(ISD::STORE, MVT::v4f32, Promote); AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32); diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 95e86d7..76b73de 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1484,6 +1484,9 @@ def : BitConvert ; def : BitConvert ; +def : BitConvert ; +def : BitConvert ; + /********** =================== **********/ /********** Src & Dst modifiers **********/ /********** =================== **********/ -- cgit v1.1 From ac85f3f65ce67f71bb8e4626e0a50d818500e426 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 18 Jul 2013 21:43:48 +0000 Subject: R600/SI: Add support for v2f32 loads git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186615 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 3 +++ lib/Target/R600/SIInstructions.td | 1 + lib/Target/R600/SIRegisterInfo.td | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 666e158..2a4e44f 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -69,6 +69,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::LOAD, MVT::f32, Promote); AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32); + setOperationAction(ISD::LOAD, MVT::v2f32, Promote); + AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32); + setOperationAction(ISD::LOAD, MVT::v4f32, Promote); AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 76b73de..c7d97c9 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1707,6 +1707,7 @@ multiclass SMRD_Pattern { defm : SMRD_Pattern ; defm : SMRD_Pattern ; defm : SMRD_Pattern ; +defm : SMRD_Pattern ; defm : SMRD_Pattern ; defm : SMRD_Pattern ; diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index 244d4c0..292b9d2 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -153,7 +153,7 @@ def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, (add SGPR_32, M0Reg) >; -def SReg_64 : RegisterClass<"AMDGPU", [i64, i1], 64, +def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, i1], 64, (add SGPR_64, VCCReg, EXECReg) >; -- cgit v1.1 From 4e518fd941b119834b5764708fbabf41adc45040 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 18 Jul 2013 21:43:53 +0000 Subject: R600/SI: Fix crash with VSELECT https://bugs.freedesktop.org/show_bug.cgi?id=66175 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186616 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIISelLowering.cpp | 11 ++++++++++- lib/Target/R600/SIInstructions.td | 3 +++ 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 6cae978..316567c 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -34,6 +34,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass); addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass); + addRegisterClass(MVT::v2i1, &AMDGPU::VReg_64RegClass); + addRegisterClass(MVT::v4i1, &AMDGPU::VReg_128RegClass); + addRegisterClass(MVT::v16i8, &AMDGPU::SReg_128RegClass); addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass); addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass); @@ -72,6 +75,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + setOperationAction(ISD::SETCC, MVT::v2i1, Expand); + setOperationAction(ISD::SETCC, MVT::v4i1, Expand); + setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); @@ -318,7 +324,10 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( } EVT SITargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { - return MVT::i1; + if (!VT.isVector()) { + return MVT::i1; + } + return MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); } MVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const { diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index c7d97c9..789a518 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1487,6 +1487,9 @@ def : BitConvert ; def : BitConvert ; def : BitConvert ; +def : BitConvert ; +def : BitConvert ; + /********** =================== **********/ /********** Src & Dst modifiers **********/ /********** =================== **********/ -- cgit v1.1 From bbcea55b68fad8116c29b3e831c5df398d558569 Mon Sep 17 00:00:00 2001 From: Tilmann Scheller Date: Thu, 18 Jul 2013 22:19:59 +0000 Subject: ARM: Make sure the instruction alias for PLI uses the right subtarget features. PLI requires both the Thumb2 and the ARMv7 feature. Related to . git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186620 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrThumb2.td | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 19d76e5..f014e02 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -4429,4 +4429,6 @@ def : t2InstAlias<"add${p} $Rd, pc, $imm", (t2ADR rGPR:$Rd, imm0_4095:$imm, pred:$p)>; // PLI with alternate literal form. -def : t2InstAlias<"pli${p} $addr", (t2PLIpci t2ldr_pcrel_imm12:$addr, pred:$p)>; +def : InstAlias<"pli${p} $addr", + (t2PLIpci t2ldr_pcrel_imm12:$addr, pred:$p)>, + Requires<[IsThumb2,HasV7]>; -- cgit v1.1 From da218210f7371cd47a43252756e03b6a03a6b06d Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Fri, 19 Jul 2013 01:19:52 +0000 Subject: [mips] Delete MFC1_FT_CCR, MTC1_FT_CCR and MOVCCRToCCR. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186642 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsInstrFPU.td | 15 ++------------- lib/Target/Mips/MipsSEInstrInfo.cpp | 2 -- 2 files changed, 2 insertions(+), 15 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index ec4c429..fc7ea30 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -141,16 +141,6 @@ class MTC1_FT; -class MFC1_FT_CCR : - InstSE<(outs DstRC:$rt), (ins SrcRC:$fs), !strconcat(opstr, "\t$rt, $fs"), - [(set DstRC:$rt, (OpNode SrcRC:$fs))], Itin, FrmFR>; - -class MTC1_FT_CCR : - InstSE<(outs DstRC:$fs), (ins SrcRC:$rt), !strconcat(opstr, "\t$rt, $fs"), - [(set DstRC:$fs, (OpNode SrcRC:$rt))], Itin, FrmFR>; - class LW_FT : InstSE<(outs RC:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), @@ -341,8 +331,8 @@ defm FSQRT : ABSS_M<"sqrt.d", IIFsqrtDouble, fsqrt>, ABSS_FM<0x4, 17>; // regardless of register aliasing. /// Move Control Registers From/To CPU Registers -def CFC1 : MFC1_FT_CCR<"cfc1", CPURegsOpnd, CCROpnd, IIFmove>, MFC1_FM<2>; -def CTC1 : MTC1_FT_CCR<"ctc1", CCROpnd, CPURegsOpnd, IIFmove>, MFC1_FM<6>; +def CFC1 : MFC1_FT<"cfc1", CPURegsOpnd, CCROpnd, IIFmove>, MFC1_FM<2>; +def CTC1 : MTC1_FT<"ctc1", CCROpnd, CPURegsOpnd, IIFmove>, MFC1_FM<6>; def MFC1 : MFC1_FT<"mfc1", CPURegsOpnd, FGR32RegsOpnd, IIFmoveC1, bitconvert>, MFC1_FM<0>; def MTC1 : MTC1_FT<"mtc1", FGR32RegsOpnd, CPURegsOpnd, IIFmoveC1, bitconvert>, @@ -549,7 +539,6 @@ def FCMP_D64 : CEQS_FT<"d", FGR64, IIFcmp, MipsFPCmp>, CEQS_FM<17>, //===----------------------------------------------------------------------===// // Floating Point Pseudo-Instructions //===----------------------------------------------------------------------===// -def MOVCCRToCCR : PseudoSE<(outs CCR:$dst), (ins CCROpnd:$src), []>; // This pseudo instr gets expanded into 2 mtc1 instrs after register // allocation. diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index e2a33dd..c384f31 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -141,8 +141,6 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = Mips::FMOV_D32; else if (Mips::FGR64RegClass.contains(DestReg, SrcReg)) Opc = Mips::FMOV_D64; - else if (Mips::CCRRegClass.contains(DestReg, SrcReg)) - Opc = Mips::MOVCCRToCCR; else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg. if (Mips::CPU64RegsRegClass.contains(SrcReg)) Opc = Mips::OR64, ZeroReg = Mips::ZERO_64; -- cgit v1.1 From eddfaad1ef9a208a8a9ee23c26fac4d980caa99a Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 19 Jul 2013 16:09:03 +0000 Subject: [SystemZ] Start adding z196 and zEC12 support This first step just adds definitions for SLLK, SRLK and SRAK. The next patch will actually make use of them during codegen. insn-bad.s tests that some form of error is reported when using these instructions on z10. More work is needed to get the "instruction requires: distinct-ops" that we'd ideally like, so I've stubbed that part out for now. I'll come back and make it mandatory once the necessary changes are in. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186680 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZ.td | 7 ++----- lib/Target/SystemZ/SystemZInstrFormats.td | 19 +++++++++++++------ lib/Target/SystemZ/SystemZInstrInfo.td | 16 ++++++++-------- lib/Target/SystemZ/SystemZProcessors.td | 26 ++++++++++++++++++++++++++ lib/Target/SystemZ/SystemZSubtarget.cpp | 4 +++- lib/Target/SystemZ/SystemZSubtarget.h | 6 ++++++ 6 files changed, 58 insertions(+), 20 deletions(-) create mode 100644 lib/Target/SystemZ/SystemZProcessors.td (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZ.td b/lib/Target/SystemZ/SystemZ.td index e03c32f..abf5c8e 100644 --- a/lib/Target/SystemZ/SystemZ.td +++ b/lib/Target/SystemZ/SystemZ.td @@ -14,13 +14,10 @@ include "llvm/Target/Target.td" //===----------------------------------------------------------------------===// -// SystemZ supported processors +// SystemZ supported processors and features //===----------------------------------------------------------------------===// -class Proc Features> - : Processor; - -def : Proc<"z10", []>; +include "SystemZProcessors.td" //===----------------------------------------------------------------------===// // Register file description diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index 7300b90..45147c1 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -816,20 +816,27 @@ multiclass BinarySIPair siOpcode, } class ShiftRS opcode, SDPatternOperator operator, - RegisterOperand cls, AddressingMode mode> - : InstRS + : InstRS { + [(set cls:$R1, (operator cls:$R1src, shift12only:$BD2))]> { let R3 = 0; let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; } class ShiftRSY opcode, SDPatternOperator operator, - RegisterOperand cls, AddressingMode mode> - : InstRSY + : InstRSY; + [(set cls:$R1, (operator cls:$R3, shift20only:$BD2))]>; + +multiclass ShiftRSAndK opcode1, bits<16> opcode2, + SDPatternOperator operator, RegisterOperand cls> { + def K : ShiftRSY, + Requires<[FeatureDistinctOps]>; + def "" : ShiftRS; +} class CompareRR opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index c6839e8..4670156 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -796,26 +796,26 @@ def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load, 8>; // Shift left. let neverHasSideEffects = 1 in { - def SLL : ShiftRS <"sll", 0x89, shl, GR32, shift12only>; - def SLLG : ShiftRSY<"sllg", 0xEB0D, shl, GR64, shift20only>; + defm SLL : ShiftRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>; + def SLLG : ShiftRSY<"sllg", 0xEB0D, shl, GR64>; } // Logical shift right. let neverHasSideEffects = 1 in { - def SRL : ShiftRS <"srl", 0x88, srl, GR32, shift12only>; - def SRLG : ShiftRSY<"srlg", 0xEB0C, srl, GR64, shift20only>; + defm SRL : ShiftRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>; + def SRLG : ShiftRSY<"srlg", 0xEB0C, srl, GR64>; } // Arithmetic shift right. let Defs = [CC] in { - def SRA : ShiftRS <"sra", 0x8A, sra, GR32, shift12only>; - def SRAG : ShiftRSY<"srag", 0xEB0A, sra, GR64, shift20only>; + defm SRA : ShiftRSAndK<"sra", 0x8A, 0xEBDC, sra, GR32>; + def SRAG : ShiftRSY<"srag", 0xEB0A, sra, GR64>; } // Rotate left. let neverHasSideEffects = 1 in { - def RLL : ShiftRSY<"rll", 0xEB1D, rotl, GR32, shift20only>; - def RLLG : ShiftRSY<"rllg", 0xEB1C, rotl, GR64, shift20only>; + def RLL : ShiftRSY<"rll", 0xEB1D, rotl, GR32>; + def RLLG : ShiftRSY<"rllg", 0xEB1C, rotl, GR64>; } // Rotate second operand left and inserted selected bits into first operand. diff --git a/lib/Target/SystemZ/SystemZProcessors.td b/lib/Target/SystemZ/SystemZProcessors.td new file mode 100644 index 0000000..5668ae3 --- /dev/null +++ b/lib/Target/SystemZ/SystemZProcessors.td @@ -0,0 +1,26 @@ +//===-- SystemZ.td - SystemZ processors and features ---------*- tblgen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Processor and feature definitions. +// +//===----------------------------------------------------------------------===// + +class SystemZFeature + : Predicate<"Subtarget.has"##intname##"()">, + AssemblerPredicate<"Feature"##intname, extname>, + SubtargetFeature; + +def FeatureDistinctOps : SystemZFeature< + "distinct-ops", "DistinctOps", + "Assume that the distinct-operands facility is installed" +>; + +def : Processor<"z10", NoItineraries, []>; +def : Processor<"z196", NoItineraries, [FeatureDistinctOps]>; +def : Processor<"zEC12", NoItineraries, [FeatureDistinctOps]>; diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp index cfd3324..f37ea21 100644 --- a/lib/Target/SystemZ/SystemZSubtarget.cpp +++ b/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -9,6 +9,7 @@ #include "SystemZSubtarget.h" #include "llvm/IR/GlobalValue.h" +#include "MCTargetDesc/SystemZMCTargetDesc.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR @@ -19,7 +20,8 @@ using namespace llvm; SystemZSubtarget::SystemZSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS) - : SystemZGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT) { + : SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false), + TargetTriple(TT) { std::string CPUName = CPU; if (CPUName.empty()) CPUName = "z10"; diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h index 8d4d450..4a86287 100644 --- a/lib/Target/SystemZ/SystemZSubtarget.h +++ b/lib/Target/SystemZ/SystemZSubtarget.h @@ -26,6 +26,9 @@ class GlobalValue; class StringRef; class SystemZSubtarget : public SystemZGenSubtargetInfo { +protected: + bool HasDistinctOps; + private: Triple TargetTriple; @@ -36,6 +39,9 @@ public: // Automatically generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + // Return true if the target has the distinct-operands facility. + bool hasDistinctOps() const { return HasDistinctOps; } + // Return true if GV can be accessed using LARL for reloc model RM // and code model CM. bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM, -- cgit v1.1 From 93c2125c3979bcb4656daf3c2fb5748fb3973e1a Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 19 Jul 2013 16:12:08 +0000 Subject: [SystemZ] Use SLLK, SRLK and SRAK for codegen This patch uses the instructions added in r186680 for codegen. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186681 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZInstrFormats.td | 26 +++++++++++++++-- lib/Target/SystemZ/SystemZInstrInfo.cpp | 47 +++++++++++++++++++++++++++++-- lib/Target/SystemZ/SystemZInstrInfo.h | 5 ++++ 3 files changed, 73 insertions(+), 5 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index 45147c1..b030182 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -34,6 +34,12 @@ class InstSystemZ opcode, SDPatternOperator operator, multiclass ShiftRSAndK opcode1, bits<16> opcode2, SDPatternOperator operator, RegisterOperand cls> { - def K : ShiftRSY, - Requires<[FeatureDistinctOps]>; - def "" : ShiftRS; + let NumOpsKey = mnemonic in { + let NumOpsValue = "3" in + def K : ShiftRSY, + Requires<[FeatureDistinctOps]>; + let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + def "" : ShiftRS; + } } class CompareRR opcode, SDPatternOperator operator, diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index bbac73f..3a502a0 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -12,9 +12,10 @@ //===----------------------------------------------------------------------===// #include "SystemZInstrInfo.h" +#include "SystemZTargetMachine.h" #include "SystemZInstrBuilder.h" +#include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetMachine.h" #define GET_INSTRINFO_CTOR #define GET_INSTRMAP_INFO @@ -24,7 +25,7 @@ using namespace llvm; SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm) : SystemZGenInstrInfo(SystemZ::ADJCALLSTACKDOWN, SystemZ::ADJCALLSTACKUP), - RI(tm) { + RI(tm), TM(tm) { } // MI is a 128-bit load or store. Split it into two 64-bit loads or stores, @@ -352,6 +353,48 @@ static bool isSimpleBD12Move(const MachineInstr *MI, unsigned Flag) { } MachineInstr * +SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI, + LiveVariables *LV) const { + MachineInstr *MI = MBBI; + MachineBasicBlock *MBB = MI->getParent(); + + unsigned Opcode = MI->getOpcode(); + unsigned NumOps = MI->getNumOperands(); + + // Try to convert something like SLL into SLLK, if supported. + // We prefer to keep the two-operand form where possible both + // because it tends to be shorter and because some instructions + // have memory forms that can be used during spilling. + if (TM.getSubtargetImpl()->hasDistinctOps()) { + int ThreeOperandOpcode = SystemZ::getThreeOperandOpcode(Opcode); + if (ThreeOperandOpcode >= 0) { + unsigned DestReg = MI->getOperand(0).getReg(); + MachineOperand &Src = MI->getOperand(1); + MachineInstrBuilder MIB = BuildMI(*MBB, MBBI, MI->getDebugLoc(), + get(ThreeOperandOpcode), DestReg); + // Keep the kill state, but drop the tied flag. + MIB.addReg(Src.getReg(), getKillRegState(Src.isKill())); + // Keep the remaining operands as-is. + for (unsigned I = 2; I < NumOps; ++I) + MIB.addOperand(MI->getOperand(I)); + MachineInstr *NewMI = MIB; + + // Transfer killing information to the new instruction. + if (LV) { + for (unsigned I = 1; I < NumOps; ++I) { + MachineOperand &Op = MI->getOperand(I); + if (Op.isReg() && Op.isKill()) + LV->replaceKillInstruction(Op.getReg(), MI, NewMI); + } + } + return MIB; + } + } + return 0; +} + +MachineInstr * SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, const SmallVectorImpl &Ops, diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index 3fe71d8..2050e8e 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -79,6 +79,7 @@ namespace SystemZII { class SystemZInstrInfo : public SystemZGenInstrInfo { const SystemZRegisterInfo RI; + SystemZTargetMachine &TM; void splitMove(MachineBasicBlock::iterator MI, unsigned NewOpcode) const; void splitAdjDynAlloc(MachineBasicBlock::iterator MI) const; @@ -120,6 +121,10 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const LLVM_OVERRIDE; virtual MachineInstr * + convertToThreeAddress(MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI, + LiveVariables *LV) const; + virtual MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, const SmallVectorImpl &Ops, int FrameIndex) const; -- cgit v1.1 From cae5d5ea658e05091e66b742b5834f1896ff2f5d Mon Sep 17 00:00:00 2001 From: Tilmann Scheller Date: Fri, 19 Jul 2013 16:18:56 +0000 Subject: ARM: Add instruction aliases for the Thumb2 PLD/PLDW (literal) alternate form. See A8.8.127 in ARM DDI 0406C.b. Related to . git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186682 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrThumb2.td | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index f014e02..fdbf5ae 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -4428,7 +4428,12 @@ def : t2InstAlias<"ldrsh${p}.w $Rt, $addr", def : t2InstAlias<"add${p} $Rd, pc, $imm", (t2ADR rGPR:$Rd, imm0_4095:$imm, pred:$p)>; -// PLI with alternate literal form. +// PLD/PLDW/PLI with alternate literal form. +def : t2InstAlias<"pld${p} $addr", + (t2PLDpci t2ldr_pcrel_imm12:$addr, pred:$p)>; +def : InstAlias<"pldw${p} $addr", + (t2PLDWpci t2ldr_pcrel_imm12:$addr, pred:$p)>, + Requires<[IsThumb2,HasV7,HasMP]>; def : InstAlias<"pli${p} $addr", (t2PLIpci t2ldr_pcrel_imm12:$addr, pred:$p)>, Requires<[IsThumb2,HasV7]>; -- cgit v1.1 From db92fb07169af6941dfe47439f9849d370f0eb0b Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 19 Jul 2013 16:21:55 +0000 Subject: [SystemZ] Add NRK, ORK and XRK The atomic tests assume the two-operand forms, so I've restricted them to z10. Running and-01.ll, or-01.ll and xor-01.ll for z196 as well as z10 shows why using convertToThreeAddress() is better than exposing the three-operand forms first and then converting back to two operands where possible (which is what I'd originally tried). Using the three-operand form first stops us from taking advantage of NG, OG and XG for spills. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186683 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZInstrFormats.td | 18 ++++++++++++++++++ lib/Target/SystemZ/SystemZInstrInfo.td | 6 +++--- 2 files changed, 21 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index b030182..24f86bc 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -729,6 +729,24 @@ class BinaryRRF opcode, SDPatternOperator operator, let OpType = "reg"; } +class BinaryRRFK opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRF; + +multiclass BinaryRRAndK opcode1, bits<16> opcode2, + SDPatternOperator operator, RegisterOperand cls1, + RegisterOperand cls2> { + let NumOpsKey = mnemonic in { + let NumOpsValue = "3" in + def K : BinaryRRFK, + Requires<[FeatureDistinctOps]>; + let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + def "" : BinaryRR; + } +} + class BinaryRI opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> : InstRI; + defm NR : BinaryRRAndK<"n", 0x14, 0xB9F4, and, GR32, GR32>; def NGR : BinaryRRE<"ng", 0xB980, and, GR64, GR64>; } @@ -685,7 +685,7 @@ defm : RMWIByte; let Defs = [CC] in { // ORs of a register. let isCommutable = 1 in { - def OR : BinaryRR <"o", 0x16, or, GR32, GR32>; + defm OR : BinaryRRAndK<"o", 0x16, 0xB9F6, or, GR32, GR32>; def OGR : BinaryRRE<"og", 0xB981, or, GR64, GR64>; } @@ -722,7 +722,7 @@ defm : RMWIByte; let Defs = [CC] in { // XORs of a register. let isCommutable = 1 in { - def XR : BinaryRR <"x", 0x17, xor, GR32, GR32>; + defm XR : BinaryRRAndK<"x", 0x17, 0xB9F7, xor, GR32, GR32>; def XGR : BinaryRRE<"xg", 0xB982, xor, GR64, GR64>; } -- cgit v1.1 From 52b2774577e07fbf804e4d647119578df4111f21 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 19 Jul 2013 16:24:22 +0000 Subject: [SystemZ] Add NGRK, OGRK and XGRK Like r186683, but for 64 bits. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186685 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZInstrFormats.td | 12 ++++++++++++ lib/Target/SystemZ/SystemZInstrInfo.td | 6 +++--- 2 files changed, 15 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index 24f86bc..f099975 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -747,6 +747,18 @@ multiclass BinaryRRAndK opcode1, bits<16> opcode2, } } +multiclass BinaryRREAndK opcode1, bits<16> opcode2, + SDPatternOperator operator, RegisterOperand cls1, + RegisterOperand cls2> { + let NumOpsKey = mnemonic in { + let NumOpsValue = "3" in + def K : BinaryRRFK, + Requires<[FeatureDistinctOps]>; + let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + def "" : BinaryRRE; + } +} + class BinaryRI opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> : InstRI; - def NGR : BinaryRRE<"ng", 0xB980, and, GR64, GR64>; + defm NGR : BinaryRREAndK<"ng", 0xB980, 0xB9E4, and, GR64, GR64>; } // ANDs of a 16-bit immediate, leaving other bits unaffected. @@ -686,7 +686,7 @@ let Defs = [CC] in { // ORs of a register. let isCommutable = 1 in { defm OR : BinaryRRAndK<"o", 0x16, 0xB9F6, or, GR32, GR32>; - def OGR : BinaryRRE<"og", 0xB981, or, GR64, GR64>; + defm OGR : BinaryRREAndK<"og", 0xB981, 0xB9E6, or, GR64, GR64>; } // ORs of a 16-bit immediate, leaving other bits unaffected. @@ -723,7 +723,7 @@ let Defs = [CC] in { // XORs of a register. let isCommutable = 1 in { defm XR : BinaryRRAndK<"x", 0x17, 0xB9F7, xor, GR32, GR32>; - def XGR : BinaryRRE<"xg", 0xB982, xor, GR64, GR64>; + defm XGR : BinaryRREAndK<"xg", 0xB982, 0xB9E7, xor, GR64, GR64>; } // XORs of a 32-bit immediate, leaving other bits unaffected. -- cgit v1.1 From dc05e0bff67f818e615a47e831ff92d65ee0ac64 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 19 Jul 2013 16:26:39 +0000 Subject: [SystemZ] Add ARK, AGRK, SRK and SGRK The testsuite changes follow the same lines as for r186683. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186686 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZInstrInfo.td | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 5ae6af2..fe2b536 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -535,8 +535,8 @@ def : Pat<(or (zext32 GR32:$src), imm64hf32:$imm), let Defs = [CC] in { // Addition of a register. let isCommutable = 1 in { - def AR : BinaryRR <"a", 0x1A, add, GR32, GR32>; - def AGR : BinaryRRE<"ag", 0xB908, add, GR64, GR64>; + defm AR : BinaryRRAndK<"a", 0x1A, 0xB9F8, add, GR32, GR32>; + defm AGR : BinaryRREAndK<"ag", 0xB908, 0xB9E8, add, GR64, GR64>; } def AGFR : BinaryRRE<"agf", 0xB918, null_frag, GR64, GR32>; @@ -599,9 +599,9 @@ let Defs = [CC], Uses = [CC] in { // add-immediate instruction instead. let Defs = [CC] in { // Subtraction of a register. - def SR : BinaryRR <"s", 0x1B, sub, GR32, GR32>; + defm SR : BinaryRRAndK<"s", 0x1B, 0xB9F9, sub, GR32, GR32>; def SGFR : BinaryRRE<"sgf", 0xB919, null_frag, GR64, GR32>; - def SGR : BinaryRRE<"sg", 0xB909, sub, GR64, GR64>; + defm SGR : BinaryRREAndK<"sg", 0xB909, 0xB9E9, sub, GR64, GR64>; // Subtraction of memory. defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, sub, GR32, sextloadi16, 2>; -- cgit v1.1 From 70d3e71f2e44250594f1b6edd7bbbf8b945a4452 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 19 Jul 2013 16:32:12 +0000 Subject: [SystemZ] Add AHIK and AGHIK I did these as a separate patch because it uses a slightly different form of RIE layout. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186687 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZInstrFormats.td | 35 +++++++++++++++++++++++++++++++ lib/Target/SystemZ/SystemZInstrInfo.td | 4 ++-- 2 files changed, 37 insertions(+), 2 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index f099975..9257a6a 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -181,6 +181,23 @@ class InstRIEc op, dag outs, dag ins, string asmstr, list pattern> let Inst{7-0} = op{7-0}; } +class InstRIEd op, dag outs, dag ins, string asmstr, list pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<4> R3; + bits<16> I2; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = R3; + let Inst{31-16} = I2; + let Inst{15-8} = 0; + let Inst{7-0} = op{7-0}; +} + class InstRIEf op, dag outs, dag ins, string asmstr, list pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; @@ -768,6 +785,24 @@ class BinaryRI opcode, SDPatternOperator operator, let DisableEncoding = "$R1src"; } +class BinaryRIE opcode, SDPatternOperator operator, + RegisterOperand cls, Immediate imm> + : InstRIEd; + +multiclass BinaryRIAndK opcode1, bits<16> opcode2, + SDPatternOperator operator, RegisterOperand cls, + Immediate imm> { + let NumOpsKey = mnemonic in { + let NumOpsValue = "3" in + def K : BinaryRIE, + Requires<[FeatureDistinctOps]>; + let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + def "" : BinaryRI; + } +} + class BinaryRIL opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> : InstRIL; // Addition of signed 16-bit immediates. - def AHI : BinaryRI<"ahi", 0xA7A, add, GR32, imm32sx16>; - def AGHI : BinaryRI<"aghi", 0xA7B, add, GR64, imm64sx16>; + defm AHI : BinaryRIAndK<"ahi", 0xA7A, 0xECD8, add, GR32, imm32sx16>; + defm AGHI : BinaryRIAndK<"aghi", 0xA7B, 0xECD9, add, GR64, imm64sx16>; // Addition of signed 32-bit immediates. def AFI : BinaryRIL<"afi", 0xC29, add, GR32, simm32>; -- cgit v1.1 From 6fec715a1a662ce3b560f85c710875cfeeb1fb98 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Fri, 19 Jul 2013 16:34:16 +0000 Subject: [ARMv8] Implement the NEON instructions VRINT{N, X, A, Z, M, P}. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186688 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 28 ++++++++++++++++++++++++++++ lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 30 ++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 0836700..3e2ab06 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -5738,6 +5738,34 @@ def VTBX4Pseudo IIC_VTBX4, "$orig = $dst", []>; } // DecoderMethod = "DecodeTBLInstruction" +// VRINT : Vector Rounding +multiclass VRINT_FPI op9_7, SDPatternOperator Int> { + let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { + def D : N2VDIntnp<0b10, 0b100, 0, NoItinerary, + !strconcat("vrint", op), "f32", + v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> { + let Inst{9-7} = op9_7; + } + def Q : N2VQIntnp<0b10, 0b100, 0, NoItinerary, + !strconcat("vrint", op), "f32", + v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> { + let Inst{9-7} = op9_7; + } + } + + def : InstAlias(NAME#"D") DPR:$Dd, DPR:$Dm)>; + def : InstAlias(NAME#"Q") QPR:$Qd, QPR:$Qm)>; +} + +defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>; +defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>; +defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>; +defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>; +defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>; +defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>; + //===----------------------------------------------------------------------===// // NEON instructions for single-precision FP math //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index f114b7a..7061ede 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -263,6 +263,8 @@ class ARMAsmParser : public MCTargetAsmParser { const SmallVectorImpl &Ops); bool shouldOmitCCOutOperand(StringRef Mnemonic, SmallVectorImpl &Operands); + bool shouldOmitPredicateOperand(StringRef Mnemonic, + SmallVectorImpl &Operands); public: enum ARMMatchResultTy { @@ -5157,6 +5159,25 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, return false; } +bool ARMAsmParser::shouldOmitPredicateOperand( + StringRef Mnemonic, SmallVectorImpl &Operands) { + // VRINT{Z, R, X} have a predicate operand in VFP, but not in NEON + unsigned RegIdx = 3; + if ((Mnemonic == "vrintz" || Mnemonic == "vrintx" || Mnemonic == "vrintr") && + static_cast(Operands[2])->getToken() == ".f32") { + if (static_cast(Operands[3])->isToken() && + static_cast(Operands[3])->getToken() == ".f32") + RegIdx = 4; + + if (static_cast(Operands[RegIdx])->isReg() && + (ARMMCRegisterClasses[ARM::DPRRegClassID] + .contains(static_cast(Operands[RegIdx])->getReg()) || + ARMMCRegisterClasses[ARM::QPRRegClassID] + .contains(static_cast(Operands[RegIdx])->getReg()))) + return true; + } +} + static bool isDataTypeToken(StringRef Tok) { return Tok == ".8" || Tok == ".16" || Tok == ".32" || Tok == ".64" || Tok == ".i8" || Tok == ".i16" || Tok == ".i32" || Tok == ".i64" || @@ -5359,6 +5380,15 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, delete Op; } + // Some instructions have the same mnemonic, but don't always + // have a predicate. Distinguish them here and delete the + // predicate if needed. + if (shouldOmitPredicateOperand(Mnemonic, Operands)) { + ARMOperand *Op = static_cast(Operands[1]); + Operands.erase(Operands.begin() + 1); + delete Op; + } + // ARM mode 'blx' need special handling, as the register operand version // is predicable, but the label operand version is not. So, we can't rely // on the Mnemonic based checking to correctly figure out when to put -- cgit v1.1 From c7c7e1502a62123a5e54fe6ff7da490bf26d319e Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 19 Jul 2013 16:37:00 +0000 Subject: [SystemZ] Add ALRK, AGLRK, SLRK and SGLRK Follows the same lines as r186686, but much more limited, since we only use ADD LOGICAL for multi-i64 additions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186689 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZInstrInfo.td | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 5ffb86b..56b7a1f 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -564,11 +564,17 @@ defm : SXB; let Defs = [CC] in { // Addition of a register. let isCommutable = 1 in { - def ALR : BinaryRR <"al", 0x1E, addc, GR32, GR32>; - def ALGR : BinaryRRE<"alg", 0xB90A, addc, GR64, GR64>; + defm ALR : BinaryRRAndK<"al", 0x1E, 0xB9FA, addc, GR32, GR32>; + defm ALGR : BinaryRREAndK<"alg", 0xB90A, 0xB9EA, addc, GR64, GR64>; } def ALGFR : BinaryRRE<"algf", 0xB91A, null_frag, GR64, GR32>; + // Addition of signed 16-bit immediates. + def ALHSIK : BinaryRIE<"alhsik", 0xECDA, addc, GR32, imm32sx16>, + Requires<[FeatureDistinctOps]>; + def ALGHSIK : BinaryRIE<"alghsik", 0xECDB, addc, GR64, imm64sx16>, + Requires<[FeatureDistinctOps]>; + // Addition of unsigned 32-bit immediates. def ALFI : BinaryRIL<"alfi", 0xC2B, addc, GR32, uimm32>; def ALGFI : BinaryRIL<"algfi", 0xC2A, addc, GR64, imm64zx32>; @@ -614,9 +620,9 @@ defm : SXB; // Subtraction producing a carry. let Defs = [CC] in { // Subtraction of a register. - def SLR : BinaryRR <"sl", 0x1F, subc, GR32, GR32>; + defm SLR : BinaryRRAndK<"sl", 0x1F, 0xB9FB, subc, GR32, GR32>; def SLGFR : BinaryRRE<"slgf", 0xB91B, null_frag, GR64, GR32>; - def SLGR : BinaryRRE<"slg", 0xB90B, subc, GR64, GR64>; + defm SLGR : BinaryRREAndK<"slg", 0xB90B, 0xB9EB, subc, GR64, GR64>; // Subtraction of unsigned 32-bit immediates. These don't match // subc because we prefer addc for constants. -- cgit v1.1 From 55dcefbc4006204c0d2816d5a7c921517c53383c Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Fri, 19 Jul 2013 16:45:16 +0000 Subject: Add a line that got missed off somehow. Sorry about that! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186692 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/Target') diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 7061ede..e10d2c7 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -5176,6 +5176,7 @@ bool ARMAsmParser::shouldOmitPredicateOperand( .contains(static_cast(Operands[RegIdx])->getReg()))) return true; } + return false; } static bool isDataTypeToken(StringRef Tok) { -- cgit v1.1 From 9e8ba2b193088dc91e33edd223c0fcff57b15e83 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Fri, 19 Jul 2013 21:44:56 +0000 Subject: R600: Replace legacy debug code in AMDILCFGStructurizer.cpp git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186723 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDILCFGStructurizer.cpp | 463 ++++++++++++++++--------------- 1 file changed, 235 insertions(+), 228 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp index 20a94f1..6b61a03 100644 --- a/lib/Target/R600/AMDILCFGStructurizer.cpp +++ b/lib/Target/R600/AMDILCFGStructurizer.cpp @@ -8,11 +8,12 @@ /// \file //==-----------------------------------------------------------------------===// -#define DEBUGME 0 #define DEBUG_TYPE "structcfg" #include "AMDGPU.h" #include "AMDGPUInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -62,22 +63,22 @@ STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions"); //===----------------------------------------------------------------------===// namespace { #define SHOWNEWINSTR(i) \ - if (DEBUGME) errs() << "New instr: " << *i << "\n" + DEBUG(dbgs() << "New instr: " << *i << "\n"); #define SHOWNEWBLK(b, msg) \ -if (DEBUGME) { \ - errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ - errs() << "\n"; \ -} +DEBUG( \ + dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ + dbgs() << "\n"; \ +); #define SHOWBLK_DETAIL(b, msg) \ -if (DEBUGME) { \ +DEBUG( \ if (b) { \ - errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ - b->print(errs()); \ - errs() << "\n"; \ + dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ + b->print(dbgs()); \ + dbgs() << "\n"; \ } \ -} +); #define INVALIDSCCNUM -1 #define INVALIDREGNUM 0 @@ -332,21 +333,27 @@ bool CFGStructurizer::prepare(FuncT &func, PassT &pass, //FIXME: if not reducible flow graph, make it so ??? - if (DEBUGME) { - errs() << "AMDGPUCFGStructurizer::prepare\n"; - } + DEBUG( + dbgs() << "AMDGPUCFGStructurizer::prepare\n"; + ); loopInfo = CFGTraits::getLoopInfo(pass); - if (DEBUGME) { - errs() << "LoopInfo:\n"; - PrintLoopinfo(*loopInfo, errs()); - } + DEBUG( + dbgs() << "LoopInfo:\n"; + PrintLoopinfo(*loopInfo, dbgs()); + ); orderBlocks(); - if (DEBUGME) { - errs() << "Ordered blocks:\n"; - printOrderedBlocks(errs()); - } + DEBUG( + for (typename SmallVectorImpl::const_iterator + iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end(); + iterBlk != iterBlkEnd; + ++iterBlk) { + (*iterBlk)->dump(); + } + dbgs() << "Ordered blocks:\n"; + printOrderedBlocks(dbgs()); + ); SmallVector retBlks; @@ -396,26 +403,26 @@ bool CFGStructurizer::run(FuncT &func, PassT &pass, TRI = tri; //Assume reducible CFG... - if (DEBUGME) { - errs() << "AMDGPUCFGStructurizer::run\n"; + DEBUG( + dbgs() << "AMDGPUCFGStructurizer::run\n"; func.viewCFG(); - } + ); domTree = CFGTraits::getDominatorTree(pass); - if (DEBUGME) { - domTree->print(errs(), (const llvm::Module*)0); - } + DEBUG( + domTree->print(dbgs(), (const llvm::Module*)0); + ); postDomTree = CFGTraits::getPostDominatorTree(pass); - if (DEBUGME) { - postDomTree->print(errs()); - } + DEBUG( + postDomTree->print(dbgs()); + ); loopInfo = CFGTraits::getLoopInfo(pass); - if (DEBUGME) { - errs() << "LoopInfo:\n"; - PrintLoopinfo(*loopInfo, errs()); - } + DEBUG( + dbgs() << "LoopInfo:\n"; + PrintLoopinfo(*loopInfo, dbgs()); + ); orderBlocks(); #ifdef STRESSTEST @@ -423,10 +430,10 @@ bool CFGStructurizer::run(FuncT &func, PassT &pass, ReverseVector(orderedBlks); #endif - if (DEBUGME) { - errs() << "Ordered blocks:\n"; - printOrderedBlocks(errs()); - } + DEBUG( + dbgs() << "Ordered blocks:\n"; + printOrderedBlocks(dbgs()); + ); int numIter = 0; bool finish = false; BlockT *curBlk; @@ -436,10 +443,10 @@ bool CFGStructurizer::run(FuncT &func, PassT &pass, do { ++numIter; - if (DEBUGME) { - errs() << "numIter = " << numIter + DEBUG( + dbgs() << "numIter = " << numIter << ", numRemaintedBlk = " << numRemainedBlk << "\n"; - } + ); typename SmallVectorImpl::const_iterator iterBlk = orderedBlks.begin(); @@ -461,10 +468,10 @@ bool CFGStructurizer::run(FuncT &func, PassT &pass, sccBeginBlk = curBlk; sccNumIter = 0; sccNumBlk = numRemainedBlk; // Init to maximum possible number. - if (DEBUGME) { - errs() << "start processing SCC" << getSCCNum(sccBeginBlk); - errs() << "\n"; - } + DEBUG( + dbgs() << "start processing SCC" << getSCCNum(sccBeginBlk); + dbgs() << "\n"; + ); } if (!isRetiredBlock(curBlk)) { @@ -480,21 +487,21 @@ bool CFGStructurizer::run(FuncT &func, PassT &pass, ++sccNumIter; int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk); if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) { - if (DEBUGME) { - errs() << "Can't reduce SCC " << getSCCNum(curBlk) + DEBUG( + dbgs() << "Can't reduce SCC " << getSCCNum(curBlk) << ", sccNumIter = " << sccNumIter; - errs() << "doesn't make any progress\n"; - } + dbgs() << "doesn't make any progress\n"; + ); contNextScc = true; } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) { sccNumBlk = sccRemainedNumBlk; iterBlk = sccBeginIter; contNextScc = false; - if (DEBUGME) { - errs() << "repeat processing SCC" << getSCCNum(curBlk) + DEBUG( + dbgs() << "repeat processing SCC" << getSCCNum(curBlk) << "sccNumIter = " << sccNumIter << "\n"; func.viewCFG(); - } + ); } else { // Finish the current scc. contNextScc = true; @@ -512,9 +519,9 @@ bool CFGStructurizer::run(FuncT &func, PassT &pass, BlockT *entryBlk = FuncGTraits::nodes_begin(&func); if (entryBlk->succ_size() == 0) { finish = true; - if (DEBUGME) { - errs() << "Reduce to one block\n"; - } + DEBUG( + dbgs() << "Reduce to one block\n"; + ); } else { int newnumRemainedBlk = countActiveBlock(orderedBlks.begin(), orderedBlks.end()); @@ -524,9 +531,9 @@ bool CFGStructurizer::run(FuncT &func, PassT &pass, numRemainedBlk = newnumRemainedBlk; } else { makeProgress = false; - if (DEBUGME) { - errs() << "No progress\n"; - } + DEBUG( + dbgs() << "No progress\n"; + ); } } } while (!finish && makeProgress); @@ -539,9 +546,9 @@ bool CFGStructurizer::run(FuncT &func, PassT &pass, iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) { if ((*iterMap).second && (*iterMap).second->isRetired) { assert(((*iterMap).first)->getNumber() != -1); - if (DEBUGME) { - errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n"; - } + DEBUG( + dbgs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n"; + ); (*iterMap).first->eraseFromParent(); //Remove from the parent Function. } delete (*iterMap).second; @@ -555,12 +562,12 @@ bool CFGStructurizer::run(FuncT &func, PassT &pass, } loopLandInfoMap.clear(); - if (DEBUGME) { + DEBUG( func.viewCFG(); - } + ); if (!finish) { - assert(!"IRREDUCIBL_CF"); + llvm_unreachable("IRREDUCIBL_CF"); } return true; @@ -609,7 +616,7 @@ template void CFGStructurizer::orderBlocks() { BlockT *bb = &(*blockIter1); sccNum = getSCCNum(bb); if (sccNum == INVALIDSCCNUM) { - errs() << "unreachable block BB" << bb->getNumber() << "\n"; + dbgs() << "unreachable block BB" << bb->getNumber() << "\n"; } } } //orderBlocks @@ -618,18 +625,18 @@ template int CFGStructurizer::patternMatch(BlockT *curBlk) { int numMatch = 0; int curMatch; - if (DEBUGME) { - errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n"; - } + DEBUG( + dbgs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n"; + ); while ((curMatch = patternMatchGroup(curBlk)) > 0) { numMatch += curMatch; } - if (DEBUGME) { - errs() << "End patternMatch BB" << curBlk->getNumber() + DEBUG( + dbgs() << "End patternMatch BB" << curBlk->getNumber() << ", numMatch = " << numMatch << "\n"; - } + ); return numMatch; } //patternMatch @@ -811,9 +818,9 @@ int CFGStructurizer::loopbreakPatternMatch(LoopT *loopRep, BlockTSmallerVector exitingBlks; loopRep->getExitingBlocks(exitingBlks); - if (DEBUGME) { - errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n"; - } + DEBUG( + dbgs() << "Loop has " << exitingBlks.size() << " exiting blocks\n"; + ); if (exitingBlks.size() == 0) { setLoopLandBlock(loopRep); @@ -834,9 +841,9 @@ int CFGStructurizer::loopbreakPatternMatch(LoopT *loopRep, assert(exitBlkSet.size() > 0); assert(exitBlks.size() == exitingBlks.size()); - if (DEBUGME) { - errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n"; - } + DEBUG( + dbgs() << "Loop has " << exitBlkSet.size() << " exit blocks\n"; + ); // Find exitLandBlk. BlockT *exitLandBlk = NULL; @@ -861,19 +868,19 @@ int CFGStructurizer::loopbreakPatternMatch(LoopT *loopRep, BlockT *exitBlk = *iter; PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true); - if (DEBUGME) { - errs() << "BB" << exitBlk->getNumber() + DEBUG( + dbgs() << "BB" << exitBlk->getNumber() << " to BB" << exitLandBlk->getNumber() << " PathToKind=" << pathKind << "\n"; - } + ); allInPath = allInPath && (pathKind == SinglePath_InPath); allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath); if (!allInPath && !allNotInPath) { - if (DEBUGME) { - errs() << "singlePath check fail\n"; - } + DEBUG( + dbgs() << "singlePath check fail\n"; + ); return -1; } } // check all exit blocks @@ -891,19 +898,19 @@ int CFGStructurizer::loopbreakPatternMatch(LoopT *loopRep, loopRep, exitBlkSet, exitLandBlk)) != NULL) { - if (DEBUGME) { - errs() << "relocateLoopcontBlock success\n"; - } + DEBUG( + dbgs() << "relocateLoopcontBlock success\n"; + ); } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep, exitingBlks, exitBlks)) != NULL) { - if (DEBUGME) { - errs() << "insertEndbranchBlock success\n"; - } + DEBUG( + dbgs() << "insertEndbranchBlock success\n"; + ); } else { - if (DEBUGME) { - errs() << "loop exit fail\n"; - } + DEBUG( + dbgs() << "loop exit fail\n"; + ); return -1; } } @@ -1017,11 +1024,11 @@ bool CFGStructurizer::isSameloopDetachedContbreak(BlockT *src1Blk, if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) { LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; if (theEntry != NULL) { - if (DEBUGME) { - errs() << "isLoopContBreakBlock yes src1 = BB" + DEBUG( + dbgs() << "isLoopContBreakBlock yes src1 = BB" << src1Blk->getNumber() << " src2 = BB" << src2Blk->getNumber() << "\n"; - } + ); return true; } } @@ -1035,9 +1042,9 @@ int CFGStructurizer::handleJumpintoIf(BlockT *headBlk, BlockT *falseBlk) { int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk); if (num == 0) { - if (DEBUGME) { - errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n"; - } + DEBUG( + dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n"; + ); num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk); } return num; @@ -1053,22 +1060,22 @@ int CFGStructurizer::handleJumpintoIfImp(BlockT *headBlk, //trueBlk could be the common post dominator downBlk = trueBlk; - if (DEBUGME) { - errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber() + DEBUG( + dbgs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber() << " true = BB" << trueBlk->getNumber() << ", numSucc=" << trueBlk->succ_size() << " false = BB" << falseBlk->getNumber() << "\n"; - } + ); while (downBlk) { - if (DEBUGME) { - errs() << "check down = BB" << downBlk->getNumber(); - } + DEBUG( + dbgs() << "check down = BB" << downBlk->getNumber(); + ); if (singlePathTo(falseBlk, downBlk) == SinglePath_InPath) { - if (DEBUGME) { - errs() << " working\n"; - } + DEBUG( + dbgs() << " working\n"; + ); num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk); num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk); @@ -1081,9 +1088,9 @@ int CFGStructurizer::handleJumpintoIfImp(BlockT *headBlk, break; } - if (DEBUGME) { - errs() << " not working\n"; - } + DEBUG( + dbgs() << " not working\n"; + ); downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL; } // walk down the postDomTree @@ -1096,43 +1103,43 @@ void CFGStructurizer::showImproveSimpleJumpintoIf(BlockT *headBlk, BlockT *falseBlk, BlockT *landBlk, bool detail) { - errs() << "head = BB" << headBlk->getNumber() + dbgs() << "head = BB" << headBlk->getNumber() << " size = " << headBlk->size(); if (detail) { - errs() << "\n"; - headBlk->print(errs()); - errs() << "\n"; + dbgs() << "\n"; + headBlk->print(dbgs()); + dbgs() << "\n"; } if (trueBlk) { - errs() << ", true = BB" << trueBlk->getNumber() << " size = " + dbgs() << ", true = BB" << trueBlk->getNumber() << " size = " << trueBlk->size() << " numPred = " << trueBlk->pred_size(); if (detail) { - errs() << "\n"; - trueBlk->print(errs()); - errs() << "\n"; + dbgs() << "\n"; + trueBlk->print(dbgs()); + dbgs() << "\n"; } } if (falseBlk) { - errs() << ", false = BB" << falseBlk->getNumber() << " size = " + dbgs() << ", false = BB" << falseBlk->getNumber() << " size = " << falseBlk->size() << " numPred = " << falseBlk->pred_size(); if (detail) { - errs() << "\n"; - falseBlk->print(errs()); - errs() << "\n"; + dbgs() << "\n"; + falseBlk->print(dbgs()); + dbgs() << "\n"; } } if (landBlk) { - errs() << ", land = BB" << landBlk->getNumber() << " size = " + dbgs() << ", land = BB" << landBlk->getNumber() << " size = " << landBlk->size() << " numPred = " << landBlk->pred_size(); if (detail) { - errs() << "\n"; - landBlk->print(errs()); - errs() << "\n"; + dbgs() << "\n"; + landBlk->print(dbgs()); + dbgs() << "\n"; } } - errs() << "\n"; + dbgs() << "\n"; } //showImproveSimpleJumpintoIf template @@ -1169,10 +1176,10 @@ int CFGStructurizer::improveSimpleJumpintoIf(BlockT *headBlk, migrateFalse = true; } - if (DEBUGME) { - errs() << "before improveSimpleJumpintoIf: "; + DEBUG( + dbgs() << "before improveSimpleJumpintoIf: "; showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0); - } + ); // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk // @@ -1269,10 +1276,10 @@ int CFGStructurizer::improveSimpleJumpintoIf(BlockT *headBlk, } } //for } - if (DEBUGME) { - errs() << "result from improveSimpleJumpintoIf: "; + DEBUG( + dbgs() << "result from improveSimpleJumpintoIf: "; showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0); - } + ); // update landBlk *plandBlk = landBlk; @@ -1286,10 +1293,10 @@ void CFGStructurizer::handleLoopbreak(BlockT *exitingBlk, BlockT *exitBlk, LoopT *exitLoop, BlockT *landBlk) { - if (DEBUGME) { - errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop) + DEBUG( + dbgs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop) << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n"; - } + ); const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); RegiT initReg = INVALIDREGNUM; @@ -1314,14 +1321,14 @@ void CFGStructurizer::handleLoopcontBlock(BlockT *contingBlk, LoopT *contingLoop, BlockT *contBlk, LoopT *contLoop) { - if (DEBUGME) { - errs() << "loopcontPattern cont = BB" << contingBlk->getNumber() + DEBUG( + dbgs() << "loopcontPattern cont = BB" << contingBlk->getNumber() << " header = BB" << contBlk->getNumber() << "\n"; - errs() << "Trying to continue loop-depth = " + dbgs() << "Trying to continue loop-depth = " << getLoopDepth(contLoop) << " from loop-depth = " << getLoopDepth(contingLoop) << "\n"; - } + ); RegiT initReg = INVALIDREGNUM; const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); @@ -1343,10 +1350,10 @@ void CFGStructurizer::handleLoopcontBlock(BlockT *contingBlk, template void CFGStructurizer::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) { - if (DEBUGME) { - errs() << "serialPattern BB" << dstBlk->getNumber() + DEBUG( + dbgs() << "serialPattern BB" << dstBlk->getNumber() << " <= BB" << srcBlk->getNumber() << "\n"; - } + ); dstBlk->splice(dstBlk->end(), srcBlk, srcBlk->begin(), srcBlk->end()); dstBlk->removeSuccessor(srcBlk); @@ -1362,26 +1369,26 @@ void CFGStructurizer::mergeIfthenelseBlock(InstrT *branchInstr, BlockT *trueBlk, BlockT *falseBlk, BlockT *landBlk) { - if (DEBUGME) { - errs() << "ifPattern BB" << curBlk->getNumber(); - errs() << "{ "; + DEBUG( + dbgs() << "ifPattern BB" << curBlk->getNumber(); + dbgs() << "{ "; if (trueBlk) { - errs() << "BB" << trueBlk->getNumber(); + dbgs() << "BB" << trueBlk->getNumber(); } - errs() << " } else "; - errs() << "{ "; + dbgs() << " } else "; + dbgs() << "{ "; if (falseBlk) { - errs() << "BB" << falseBlk->getNumber(); + dbgs() << "BB" << falseBlk->getNumber(); } - errs() << " }\n "; - errs() << "landBlock: "; + dbgs() << " }\n "; + dbgs() << "landBlock: "; if (landBlk == NULL) { - errs() << "NULL"; + dbgs() << "NULL"; } else { - errs() << "BB" << landBlk->getNumber(); + dbgs() << "BB" << landBlk->getNumber(); } - errs() << "\n"; - } + dbgs() << "\n"; + ); int oldOpcode = branchInstr->getOpcode(); DebugLoc branchDL = branchInstr->getDebugLoc(); @@ -1435,10 +1442,10 @@ void CFGStructurizer::mergeLooplandBlock(BlockT *dstBlk, LoopLandInfo *loopLand) { BlockT *landBlk = loopLand->landBlk; - if (DEBUGME) { - errs() << "loopPattern header = BB" << dstBlk->getNumber() + DEBUG( + dbgs() << "loopPattern header = BB" << dstBlk->getNumber() << " land = BB" << landBlk->getNumber() << "\n"; - } + ); // Loop contInitRegs are init at the beginning of the loop. for (typename std::set::const_iterator iter = @@ -1521,7 +1528,7 @@ void CFGStructurizer::reversePredicateSetter(typename BlockT::iterator I) static_cast(I)->getOperand(2).setImm(OPCODE_IS_ZERO); return; default: - assert(0 && "PRED_X Opcode invalid!"); + llvm_unreachable("PRED_X Opcode invalid!"); } } } @@ -1532,11 +1539,11 @@ void CFGStructurizer::mergeLoopbreakBlock(BlockT *exitingBlk, BlockT *exitBlk, BlockT *exitLandBlk, RegiT setReg) { - if (DEBUGME) { - errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber() + DEBUG( + dbgs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber() << " exit = BB" << exitBlk->getNumber() << " land = BB" << exitLandBlk->getNumber() << "\n"; - } + ); InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk); assert(branchInstr && CFGTraits::isCondBranch(branchInstr)); @@ -1596,11 +1603,11 @@ template void CFGStructurizer::settleLoopcontBlock(BlockT *contingBlk, BlockT *contBlk, RegiT setReg) { - if (DEBUGME) { - errs() << "settleLoopcontBlock conting = BB" + DEBUG( + dbgs() << "settleLoopcontBlock conting = BB" << contingBlk->getNumber() << ", cont = BB" << contBlk->getNumber() << "\n"; - } + ); InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk); if (branchInstr) { @@ -1711,10 +1718,10 @@ CFGStructurizer::relocateLoopcontBlock(LoopT *parentLoopRep, contInstr->eraseFromParent(); } endBlk->addSuccessor(newBlk); - if (DEBUGME) { - errs() << "Add new continue Block to BB" + DEBUG( + dbgs() << "Add new continue Block to BB" << endBlk->getNumber() << " successors\n"; - } + ); } return newBlk; @@ -1927,10 +1934,10 @@ CFGStructurizer::cloneBlockForPredecessor(BlockT *curBlk, numClonedInstr += curBlk->size(); - if (DEBUGME) { - errs() << "Cloned block: " << "BB" + DEBUG( + dbgs() << "Cloned block: " << "BB" << curBlk->getNumber() << "size " << curBlk->size() << "\n"; - } + ); SHOWNEWBLK(cloneBlk, "result of Cloned block: "); @@ -1966,29 +1973,29 @@ void CFGStructurizer::migrateInstruction(BlockT *srcBlk, //look for the input branchinstr, not the AMDGPU branchinstr InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk); if (branchInstr == NULL) { - if (DEBUGME) { - errs() << "migrateInstruction don't see branch instr\n" ; - } + DEBUG( + dbgs() << "migrateInstruction don't see branch instr\n" ; + ); spliceEnd = srcBlk->end(); } else { - if (DEBUGME) { - errs() << "migrateInstruction see branch instr\n" ; + DEBUG( + dbgs() << "migrateInstruction see branch instr\n" ; branchInstr->dump(); - } + ); spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr); } - if (DEBUGME) { - errs() << "migrateInstruction before splice dstSize = " << dstBlk->size() + DEBUG( + dbgs() << "migrateInstruction before splice dstSize = " << dstBlk->size() << "srcSize = " << srcBlk->size() << "\n"; - } + ); //splice insert before insertPos dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd); - if (DEBUGME) { - errs() << "migrateInstruction after splice dstSize = " << dstBlk->size() + DEBUG( + dbgs() << "migrateInstruction after splice dstSize = " << dstBlk->size() << "srcSize = " << srcBlk->size() << "\n"; - } + ); } //migrateInstruction // normalizeInfiniteLoopExit change @@ -2016,7 +2023,7 @@ CFGStructurizer::normalizeInfiniteLoopExit(LoopT* LoopRep) { funcRep->push_back(dummyExitBlk); //insert to function SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: "); - if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n"; + DEBUG(dbgs() << "Old branch instr: " << *branchInstr << "\n";); typename BlockT::iterator insertPos = CFGTraits::getInstrPos(loopLatch, branchInstr); @@ -2047,10 +2054,10 @@ void CFGStructurizer::removeUnconditionalBranch(BlockT *srcBlk) { // test_fc_do_while_or.c need to fix the upstream on this to remove the loop. while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk)) && CFGTraits::isUncondBranch(branchInstr)) { - if (DEBUGME) { - errs() << "Removing unconditional branch instruction" ; + DEBUG( + dbgs() << "Removing unconditional branch instruction" ; branchInstr->dump(); - } + ); branchInstr->eraseFromParent(); } } //removeUnconditionalBranch @@ -2064,10 +2071,10 @@ void CFGStructurizer::removeRedundantConditionalBranch(BlockT *srcBlk) { if (blk1 == blk2) { InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk); assert(branchInstr && CFGTraits::isCondBranch(branchInstr)); - if (DEBUGME) { - errs() << "Removing unneeded conditional branch instruction" ; + DEBUG( + dbgs() << "Removing unneeded conditional branch instruction" ; branchInstr->dump(); - } + ); branchInstr->eraseFromParent(); SHOWNEWBLK(blk1, "Removing redundant successor"); srcBlk->removeSuccessor(blk1); @@ -2091,10 +2098,10 @@ void CFGStructurizer::addDummyExitBlock(SmallVectorImpl curInstr->eraseFromParent(); } curBlk->addSuccessor(dummyExitBlk); - if (DEBUGME) { - errs() << "Add dummyExitBlock to BB" << curBlk->getNumber() + DEBUG( + dbgs() << "Add dummyExitBlock to BB" << curBlk->getNumber() << " successors\n"; - } + ); } //for SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: "); @@ -2126,9 +2133,9 @@ int CFGStructurizer::getSCCNum(BlockT *srcBlk) { template void CFGStructurizer::retireBlock(BlockT *dstBlk, BlockT *srcBlk) { - if (DEBUGME) { - errs() << "Retiring BB" << srcBlk->getNumber() << "\n"; - } + DEBUG( + dbgs() << "Retiring BB" << srcBlk->getNumber() << "\n"; + ); BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk]; @@ -2245,11 +2252,11 @@ void CFGStructurizer::setLoopLandBlock(LoopT *loopRep, BlockT *blk) { theEntry->landBlk = blk; - if (DEBUGME) { - errs() << "setLoopLandBlock loop-header = BB" + DEBUG( + dbgs() << "setLoopLandBlock loop-header = BB" << loopRep->getHeader()->getNumber() << " landing-block = BB" << blk->getNumber() << "\n"; - } + ); } // setLoopLandBlock template @@ -2262,11 +2269,11 @@ void CFGStructurizer::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) { theEntry->breakOnRegs.insert(regNum); - if (DEBUGME) { - errs() << "addLoopBreakOnReg loop-header = BB" + DEBUG( + dbgs() << "addLoopBreakOnReg loop-header = BB" << loopRep->getHeader()->getNumber() << " regNum = " << regNum << "\n"; - } + ); } // addLoopBreakOnReg template @@ -2278,11 +2285,11 @@ void CFGStructurizer::addLoopContOnReg(LoopT *loopRep, RegiT regNum) { } theEntry->contOnRegs.insert(regNum); - if (DEBUGME) { - errs() << "addLoopContOnReg loop-header = BB" + DEBUG( + dbgs() << "addLoopContOnReg loop-header = BB" << loopRep->getHeader()->getNumber() << " regNum = " << regNum << "\n"; - } + ); } // addLoopContOnReg template @@ -2294,11 +2301,11 @@ void CFGStructurizer::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) { } theEntry->breakInitRegs.insert(regNum); - if (DEBUGME) { - errs() << "addLoopBreakInitReg loop-header = BB" + DEBUG( + dbgs() << "addLoopBreakInitReg loop-header = BB" << loopRep->getHeader()->getNumber() << " regNum = " << regNum << "\n"; - } + ); } // addLoopBreakInitReg template @@ -2310,11 +2317,11 @@ void CFGStructurizer::addLoopContInitReg(LoopT *loopRep, RegiT regNum) { } theEntry->contInitRegs.insert(regNum); - if (DEBUGME) { - errs() << "addLoopContInitReg loop-header = BB" + DEBUG( + dbgs() << "addLoopContInitReg loop-header = BB" << loopRep->getHeader()->getNumber() << " regNum = " << regNum << "\n"; - } + ); } // addLoopContInitReg template @@ -2327,11 +2334,11 @@ void CFGStructurizer::addLoopEndbranchInitReg(LoopT *loopRep, } theEntry->endbranchInitRegs.insert(regNum); - if (DEBUGME) { - errs() << "addLoopEndbranchInitReg loop-header = BB" + DEBUG( + dbgs() << "addLoopEndbranchInitReg loop-header = BB" << loopRep->getHeader()->getNumber() << " regNum = " << regNum << "\n"; - } + ); } // addLoopEndbranchInitReg template @@ -2437,14 +2444,14 @@ CFGStructurizer::findNearestCommonPostDom } } - if (DEBUGME) { - errs() << "Common post dominator for exit blocks is "; + DEBUG( + dbgs() << "Common post dominator for exit blocks is "; if (commonDom) { - errs() << "BB" << commonDom->getNumber() << "\n"; + dbgs() << "BB" << commonDom->getNumber() << "\n"; } else { - errs() << "NULL\n"; + dbgs() << "NULL\n"; } - } + ); return commonDom; } //findNearestCommonPostDom @@ -2591,7 +2598,7 @@ struct CFGStructTraits { case AMDGPU::BRANCH_COND_i32: case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32; default: - assert(0 && "internal error"); + llvm_unreachable("internal error"); } return -1; } @@ -2603,7 +2610,7 @@ struct CFGStructTraits { case AMDGPU::BRANCH_COND_i32: case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32; default: - assert(0 && "internal error"); + llvm_unreachable("internal error"); } return -1; } @@ -2613,7 +2620,7 @@ struct CFGStructTraits { case AMDGPU::JUMP_COND: case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32; default: - assert(0 && "internal error"); + llvm_unreachable("internal error"); }; return -1; } @@ -2623,7 +2630,7 @@ struct CFGStructTraits { case AMDGPU::JUMP_COND: case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32; default: - assert(0 && "internal error"); + llvm_unreachable("internal error"); } return -1; } @@ -2753,10 +2760,10 @@ struct CFGStructTraits { if (instr) { assert(isReturn); } else if (isReturn) { - if (DEBUGME) { - errs() << "BB" << blk->getNumber() + DEBUG( + dbgs() << "BB" << blk->getNumber() <<" is return block without RETURN instr\n"; - } + ); } return isReturn; -- cgit v1.1 From 12140450fa9c768f022946e2f355816ba8cca31d Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Fri, 19 Jul 2013 21:45:06 +0000 Subject: R600: Simplify AMDILCFGStructurize by removing templates and assuming single exit git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186724 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPU.h | 1 - lib/Target/R600/AMDGPUTargetMachine.cpp | 1 - lib/Target/R600/AMDILCFGStructurizer.cpp | 3850 +++++++++++------------------- 3 files changed, 1341 insertions(+), 2511 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index f284291..7621422 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -31,7 +31,6 @@ FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm); FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm); FunctionPass *createR600Packetizer(TargetMachine &tm); FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm); -FunctionPass *createAMDGPUCFGPreparationPass(TargetMachine &tm); FunctionPass *createAMDGPUCFGStructurizerPass(TargetMachine &tm); // SI Passes diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index 7a14e50..1dc1b6b 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -160,7 +160,6 @@ bool AMDGPUPassConfig::addPreSched2() { bool AMDGPUPassConfig::addPreEmitPass() { const AMDGPUSubtarget &ST = TM->getSubtarget(); if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { - addPass(createAMDGPUCFGPreparationPass(*TM)); addPass(createAMDGPUCFGStructurizerPass(*TM)); addPass(createR600ExpandSpecialInstrsPass(*TM)); addPass(&FinalizeMachineBundlesID); diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp index 6b61a03..85ac725 100644 --- a/lib/Target/R600/AMDILCFGStructurizer.cpp +++ b/lib/Target/R600/AMDILCFGStructurizer.cpp @@ -12,11 +12,13 @@ #include "AMDGPU.h" #include "AMDGPUInstrInfo.h" +#include "R600InstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/Analysis/DominatorInternals.h" #include "llvm/Analysis/Dominators.h" #include "llvm/CodeGen/MachineDominators.h" @@ -47,12 +49,8 @@ STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern " "matched"); STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern " "matched"); -STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break " - "pattern matched"); STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue " "pattern matched"); -STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern " - "matched"); STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks"); STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions"); @@ -81,16 +79,6 @@ DEBUG( \ ); #define INVALIDSCCNUM -1 -#define INVALIDREGNUM 0 - -template -void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) { - for (typename LoopinfoT::iterator iter = LoopInfo.begin(), - iterEnd = LoopInfo.end(); - iter != iterEnd; ++iter) { - (*iter)->print(OS, 0); - } -} template void ReverseVector(SmallVectorImpl &Src) { @@ -110,40 +98,14 @@ void ReverseVector(SmallVectorImpl &Src) { // //===----------------------------------------------------------------------===// + namespace { -template -struct CFGStructTraits { -}; -template class BlockInformation { public: - bool isRetired; - int sccNum; - //SmallVector succInstr; - //Instructions defining the corresponding successor. - BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {} -}; - -template -class LandInformation { -public: - BlockT *landBlk; - std::set breakInitRegs; //Registers that need to "reg = 0", before - //WHILELOOP(thisloop) init before entering - //thisloop. - std::set contInitRegs; //Registers that need to "reg = 0", after - //WHILELOOP(thisloop) init after entering - //thisloop. - std::set endbranchInitRegs; //Init before entering this loop, at loop - //land block, branch cond on this reg. - std::set breakOnRegs; //registers that need to "if (reg) break - //endif" after ENDLOOP(thisloop) break - //outerLoopOf(thisLoop). - std::set contOnRegs; //registers that need to "if (reg) continue - //endif" after ENDLOOP(thisloop) continue on - //outerLoopOf(thisLoop). - LandInformation() : landBlk(NULL) {} + bool IsRetired; + int SccNum; + BlockInformation() : IsRetired(false), SccNum(INVALIDSCCNUM) {} }; } // end anonymous namespace @@ -155,1030 +117,1218 @@ public: //===----------------------------------------------------------------------===// namespace { -// bixia TODO: port it to BasicBlock, not just MachineBasicBlock. -template -class CFGStructurizer { +class AMDGPUCFGStructurizer : public MachineFunctionPass { public: - typedef enum { + typedef SmallVector MBBVector; + typedef std::map MBBInfoMap; + typedef std::map LoopLandInfoMap; + + enum PathToKind { Not_SinglePath = 0, SinglePath_InPath = 1, SinglePath_NotInPath = 2 - } PathToKind; + }; -public: - typedef typename PassT::InstructionType InstrT; - typedef typename PassT::FunctionType FuncT; - typedef typename PassT::DominatortreeType DomTreeT; - typedef typename PassT::PostDominatortreeType PostDomTreeT; - typedef typename PassT::DomTreeNodeType DomTreeNodeT; - typedef typename PassT::LoopinfoType LoopInfoT; - - typedef GraphTraits FuncGTraits; - //typedef FuncGTraits::nodes_iterator BlockIterator; - typedef typename FuncT::iterator BlockIterator; - - typedef typename FuncGTraits::NodeType BlockT; - typedef GraphTraits BlockGTraits; - typedef GraphTraits > InvBlockGTraits; - //typedef BlockGTraits::succ_iterator InstructionIterator; - typedef typename BlockT::iterator InstrIterator; - - typedef CFGStructTraits CFGTraits; - typedef BlockInformation BlockInfo; - typedef std::map BlockInfoMap; - - typedef int RegiT; - typedef typename PassT::LoopType LoopT; - typedef LandInformation LoopLandInfo; - typedef std::map LoopLandInfoMap; - //landing info for loop break - typedef SmallVector BlockTSmallerVector; + static char ID; -public: - CFGStructurizer(); - ~CFGStructurizer(); + AMDGPUCFGStructurizer(TargetMachine &tm) : + MachineFunctionPass(ID), TM(tm), + TII(static_cast(tm.getInstrInfo())), + TRI(&TII->getRegisterInfo()) { } + + const char *getPassName() const { + return "AMD IL Control Flow Graph structurizer Pass"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + } /// Perform the CFG structurization - bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri); + bool run(); /// Perform the CFG preparation - bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri); + /// This step will remove every unconditionnal/dead jump instructions and make + /// sure all loops have an exit block + bool prepare(); + + bool runOnMachineFunction(MachineFunction &MF) { + DEBUG(MF.dump();); + OrderedBlks.clear(); + FuncRep = &MF; + MLI = &getAnalysis(); + DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI);); + MDT = &getAnalysis(); + DEBUG(MDT->print(dbgs(), (const llvm::Module*)0);); + PDT = &getAnalysis(); + DEBUG(PDT->print(dbgs());); + prepare(); + run(); + DEBUG(MF.dump();); + return true; + } -private: - void reversePredicateSetter(typename BlockT::iterator); - void orderBlocks(); - void printOrderedBlocks(llvm::raw_ostream &OS); - int patternMatch(BlockT *CurBlock); - int patternMatchGroup(BlockT *CurBlock); - - int serialPatternMatch(BlockT *CurBlock); - int ifPatternMatch(BlockT *CurBlock); - int switchPatternMatch(BlockT *CurBlock); - int loopendPatternMatch(BlockT *CurBlock); - int loopPatternMatch(BlockT *CurBlock); - - int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader); - int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader); - //int loopWithoutBreak(BlockT *); - - void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop, - BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock); - void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop, - BlockT *ContBlock, LoopT *contLoop); - bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block); - int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, - BlockT *FalseBlock); - int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock, - BlockT *FalseBlock); - int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, - BlockT *FalseBlock, BlockT **LandBlockPtr); - void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, - BlockT *FalseBlock, BlockT *LandBlock, - bool Detail = false); - PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock, - bool AllowSideEntry = true); - BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock, - bool AllowSideEntry = true); - int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock); - void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock); - - void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock, - BlockT *TrueBlock, BlockT *FalseBlock, - BlockT *LandBlock); - void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand); - void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock, - BlockT *ExitLandBlock, RegiT SetReg); - void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock, - RegiT SetReg); - BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep, - std::set &ExitBlockSet, - BlockT *ExitLandBlk); - BlockT *addLoopEndbranchBlock(LoopT *LoopRep, - BlockTSmallerVector &ExitingBlocks, - BlockTSmallerVector &ExitBlocks); - BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep); - void removeUnconditionalBranch(BlockT *SrcBlock); - void removeRedundantConditionalBranch(BlockT *SrcBlock); - void addDummyExitBlock(SmallVectorImpl &RetBlocks); - - void removeSuccessor(BlockT *SrcBlock); - BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock); - BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock); - - void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock, - InstrIterator InsertPos); - - void recordSccnum(BlockT *SrcBlock, int SCCNum); - int getSCCNum(BlockT *srcBlk); - - void retireBlock(BlockT *DstBlock, BlockT *SrcBlock); - bool isRetiredBlock(BlockT *SrcBlock); - bool isActiveLoophead(BlockT *CurBlock); - bool needMigrateBlock(BlockT *Block); - - BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock, - BlockTSmallerVector &exitBlocks, - std::set &ExitBlockSet); - void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL); - BlockT *getLoopLandBlock(LoopT *LoopRep); - LoopLandInfo *getLoopLandInfo(LoopT *LoopRep); - - void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum); - void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum); - void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum); - void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum); - void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum); - - bool hasBackEdge(BlockT *curBlock); - unsigned getLoopDepth (LoopT *LoopRep); - int countActiveBlock( - typename SmallVectorImpl::const_iterator IterStart, - typename SmallVectorImpl::const_iterator IterEnd); - BlockT *findNearestCommonPostDom(std::set&); - BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2); +protected: + TargetMachine &TM; + MachineDominatorTree *MDT; + MachinePostDominatorTree *PDT; + MachineLoopInfo *MLI; + const R600InstrInfo *TII; + const AMDGPURegisterInfo *TRI; + + // PRINT FUNCTIONS + /// Print the ordered Blocks. + void printOrderedBlocks() const { + size_t i = 0; + for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(), + iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) { + dbgs() << "BB" << (*iterBlk)->getNumber(); + dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")"; + if (i != 0 && i % 10 == 0) { + dbgs() << "\n"; + } else { + dbgs() << " "; + } + } + } + static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) { + for (MachineLoop::iterator iter = LoopInfo.begin(), + iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) { + (*iter)->print(dbgs(), 0); + } + } + + // UTILITY FUNCTIONS + int getSCCNum(MachineBasicBlock *MBB) const; + MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const; + bool hasBackEdge(MachineBasicBlock *MBB) const; + static unsigned getLoopDepth(MachineLoop *LoopRep); + bool isRetiredBlock(MachineBasicBlock *MBB) const; + bool isActiveLoophead(MachineBasicBlock *MBB) const; + PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB, + bool AllowSideEntry = true) const; + int countActiveBlock(MBBVector::const_iterator It, + MBBVector::const_iterator E) const; + bool needMigrateBlock(MachineBasicBlock *MBB) const; + + // Utility Functions + void reversePredicateSetter(MachineBasicBlock::iterator I); + /// Compute the reversed DFS post order of Blocks + void orderBlocks(MachineFunction *MF); + + // Function originaly from CFGStructTraits + void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode, + DebugLoc DL = DebugLoc()); + MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode, + DebugLoc DL = DebugLoc()); + MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode); + void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode, + DebugLoc DL); + void insertCondBranchBefore(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, int NewOpcode, int RegNum, + DebugLoc DL); + void insertCondBranchEnd(MachineBasicBlock *MBB, int NewOpcode, int RegNum); + static int getBranchNzeroOpcode(int OldOpcode); + static int getBranchZeroOpcode(int OldOpcode); + static int getContinueNzeroOpcode(int OldOpcode); + static int getContinueZeroOpcode(int OldOpcode); + static MachineBasicBlock *getTrueBranch(MachineInstr *MI); + static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB); + static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB, + MachineInstr *MI); + static bool isCondBranch(MachineInstr *MI); + static bool isUncondBranch(MachineInstr *MI); + static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB); + static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB); + /// The correct naming for this is getPossibleLoopendBlockBranchInstr. + /// + /// BB with backward-edge could have move instructions after the branch + /// instruction. Such move instruction "belong to" the loop backward-edge. + MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB); + static MachineInstr *getReturnInstr(MachineBasicBlock *MBB); + static MachineInstr *getContinueInstr(MachineBasicBlock *MBB); + static MachineInstr *getLoopBreakInstr(MachineBasicBlock *MBB); + static bool isReturnBlock(MachineBasicBlock *MBB); + static void cloneSuccessorList(MachineBasicBlock *DstMBB, + MachineBasicBlock *SrcMBB) ; + static MachineBasicBlock *clone(MachineBasicBlock *MBB); + /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose + /// because the AMDGPU instruction is not recognized as terminator fix this + /// and retire this routine + void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB, + MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk); + static void wrapup(MachineBasicBlock *MBB); + + + int patternMatch(MachineBasicBlock *MBB); + int patternMatchGroup(MachineBasicBlock *MBB); + int serialPatternMatch(MachineBasicBlock *MBB); + int ifPatternMatch(MachineBasicBlock *MBB); + int loopendPatternMatch(); + int mergeLoop(MachineLoop *LoopRep); + int loopcontPatternMatch(MachineLoop *LoopRep, MachineBasicBlock *LoopHeader); + + void handleLoopcontBlock(MachineBasicBlock *ContingMBB, + MachineLoop *ContingLoop, MachineBasicBlock *ContMBB, + MachineLoop *ContLoop); + /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in + /// the same loop with LoopLandInfo without explicitly keeping track of + /// loopContBlks and loopBreakBlks, this is a method to get the information. + bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB, + MachineBasicBlock *Src2MBB); + int handleJumpintoIf(MachineBasicBlock *HeadMBB, + MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB); + int handleJumpintoIfImp(MachineBasicBlock *HeadMBB, + MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB); + int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, + MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, + MachineBasicBlock **LandMBBPtr); + void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, + MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, + MachineBasicBlock *LandMBB, bool Detail = false); + int cloneOnSideEntryTo(MachineBasicBlock *PreMBB, + MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB); + void mergeSerialBlock(MachineBasicBlock *DstMBB, + MachineBasicBlock *SrcMBB); + + void mergeIfthenelseBlock(MachineInstr *BranchMI, + MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, + MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB); + void mergeLooplandBlock(MachineBasicBlock *DstMBB, + MachineBasicBlock *LandMBB); + void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, + MachineBasicBlock *LandMBB); + void settleLoopcontBlock(MachineBasicBlock *ContingMBB, + MachineBasicBlock *ContMBB); + /// normalizeInfiniteLoopExit change + /// B1: + /// uncond_br LoopHeader + /// + /// to + /// B1: + /// cond_br 1 LoopHeader dummyExit + /// and return the newly added dummy exit block + MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep); + void removeUnconditionalBranch(MachineBasicBlock *MBB); + /// Remove duplicate branches instructions in a block. + /// For instance + /// B0: + /// cond_br X B1 B2 + /// cond_br X B1 B2 + /// is transformed to + /// B0: + /// cond_br X B1 B2 + void removeRedundantConditionalBranch(MachineBasicBlock *MBB); + void addDummyExitBlock(SmallVectorImpl &RetMBB); + void removeSuccessor(MachineBasicBlock *MBB); + MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB, + MachineBasicBlock *PredMBB); + void migrateInstruction(MachineBasicBlock *SrcMBB, + MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I); + void recordSccnum(MachineBasicBlock *MBB, int SCCNum); + void retireBlock(MachineBasicBlock *MBB); + void setLoopLandBlock(MachineLoop *LoopRep, MachineBasicBlock *MBB = NULL); + + MachineBasicBlock *findNearestCommonPostDom(std::set&); + /// This is work around solution for findNearestCommonDominator not avaiable + /// to post dom a proper fix should go to Dominators.h. + MachineBasicBlock *findNearestCommonPostDom(MachineBasicBlock *MBB1, + MachineBasicBlock *MBB2); private: - DomTreeT *domTree; - PostDomTreeT *postDomTree; - LoopInfoT *loopInfo; - PassT *passRep; - FuncT *funcRep; - - BlockInfoMap blockInfoMap; - LoopLandInfoMap loopLandInfoMap; - SmallVector orderedBlks; - const AMDGPURegisterInfo *TRI; + MBBInfoMap BlockInfoMap; + LoopLandInfoMap LLInfoMap; + std::map Visited; + MachineFunction *FuncRep; + SmallVector OrderedBlks; +}; + +int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const { + MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB); + if (It == BlockInfoMap.end()) + return INVALIDSCCNUM; + return (*It).second->SccNum; +} + +MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep) + const { + LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep); + if (It == LLInfoMap.end()) + return NULL; + return (*It).second; +} -}; //template class CFGStructurizer +bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const { + MachineLoop *LoopRep = MLI->getLoopFor(MBB); + if (!LoopRep) + return false; + MachineBasicBlock *LoopHeader = LoopRep->getHeader(); + return MBB->isSuccessor(LoopHeader); +} + +unsigned AMDGPUCFGStructurizer::getLoopDepth(MachineLoop *LoopRep) { + return LoopRep ? LoopRep->getLoopDepth() : 0; +} -template CFGStructurizer::CFGStructurizer() - : domTree(NULL), postDomTree(NULL), loopInfo(NULL) { +bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const { + MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB); + if (It == BlockInfoMap.end()) + return false; + return (*It).second->IsRetired; } -template CFGStructurizer::~CFGStructurizer() { - for (typename BlockInfoMap::iterator I = blockInfoMap.begin(), - E = blockInfoMap.end(); I != E; ++I) { - delete I->second; +bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const { + MachineLoop *LoopRep = MLI->getLoopFor(MBB); + while (LoopRep && LoopRep->getHeader() == MBB) { + MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep); + if(!LoopLand) + return true; + if (!isRetiredBlock(LoopLand)) + return true; + LoopRep = LoopRep->getParentLoop(); + } + return false; +} +AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo( + MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB, + bool AllowSideEntry) const { + assert(DstMBB); + if (SrcMBB == DstMBB) + return SinglePath_InPath; + while (SrcMBB && SrcMBB->succ_size() == 1) { + SrcMBB = *SrcMBB->succ_begin(); + if (SrcMBB == DstMBB) + return SinglePath_InPath; + if (!AllowSideEntry && SrcMBB->pred_size() > 1) + return Not_SinglePath; } + if (SrcMBB && SrcMBB->succ_size()==0) + return SinglePath_NotInPath; + return Not_SinglePath; } -template -bool CFGStructurizer::prepare(FuncT &func, PassT &pass, - const AMDGPURegisterInfo * tri) { - passRep = &pass; - funcRep = &func; - TRI = tri; +int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It, + MBBVector::const_iterator E) const { + int Count = 0; + while (It != E) { + if (!isRetiredBlock(*It)) + ++Count; + ++It; + } + return Count; +} - bool changed = false; +bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const { + unsigned BlockSizeThreshold = 30; + unsigned CloneInstrThreshold = 100; + bool MultiplePreds = MBB && (MBB->pred_size() > 1); - //FIXME: if not reducible flow graph, make it so ??? + if(!MultiplePreds) + return false; + unsigned BlkSize = MBB->size(); + return ((BlkSize > BlockSizeThreshold) && + (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold)); +} - DEBUG( - dbgs() << "AMDGPUCFGStructurizer::prepare\n"; - ); +void AMDGPUCFGStructurizer::reversePredicateSetter( + MachineBasicBlock::iterator I) { + while (I--) { + if (I->getOpcode() == AMDGPU::PRED_X) { + switch (static_cast(I)->getOperand(2).getImm()) { + case OPCODE_IS_ZERO_INT: + static_cast(I)->getOperand(2) + .setImm(OPCODE_IS_NOT_ZERO_INT); + return; + case OPCODE_IS_NOT_ZERO_INT: + static_cast(I)->getOperand(2) + .setImm(OPCODE_IS_ZERO_INT); + return; + case OPCODE_IS_ZERO: + static_cast(I)->getOperand(2) + .setImm(OPCODE_IS_NOT_ZERO); + return; + case OPCODE_IS_NOT_ZERO: + static_cast(I)->getOperand(2) + .setImm(OPCODE_IS_ZERO); + return; + default: + llvm_unreachable("PRED_X Opcode invalid!"); + } + } + } +} - loopInfo = CFGTraits::getLoopInfo(pass); - DEBUG( - dbgs() << "LoopInfo:\n"; - PrintLoopinfo(*loopInfo, dbgs()); - ); +void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB, + int NewOpcode, DebugLoc DL) { + MachineInstr *MI = MBB->getParent() + ->CreateMachineInstr(TII->get(NewOpcode), DL); + MBB->push_back(MI); + //assume the instruction doesn't take any reg operand ... + SHOWNEWINSTR(MI); +} - orderBlocks(); - DEBUG( - for (typename SmallVectorImpl::const_iterator - iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end(); - iterBlk != iterBlkEnd; - ++iterBlk) { - (*iterBlk)->dump(); +MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB, + int NewOpcode, DebugLoc DL) { + MachineInstr *MI = + MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL); + if (MBB->begin() != MBB->end()) + MBB->insert(MBB->begin(), MI); + else + MBB->push_back(MI); + SHOWNEWINSTR(MI); + return MI; +} + +MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore( + MachineBasicBlock::iterator I, int NewOpcode) { + MachineInstr *OldMI = &(*I); + MachineBasicBlock *MBB = OldMI->getParent(); + MachineInstr *NewMBB = + MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc()); + MBB->insert(I, NewMBB); + //assume the instruction doesn't take any reg operand ... + SHOWNEWINSTR(NewMBB); + return NewMBB; +} + +void AMDGPUCFGStructurizer::insertCondBranchBefore( + MachineBasicBlock::iterator I, int NewOpcode, DebugLoc DL) { + MachineInstr *OldMI = &(*I); + MachineBasicBlock *MBB = OldMI->getParent(); + MachineFunction *MF = MBB->getParent(); + MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL); + MBB->insert(I, NewMI); + MachineInstrBuilder MIB(*MF, NewMI); + MIB.addReg(OldMI->getOperand(1).getReg(), false); + SHOWNEWINSTR(NewMI); + //erase later oldInstr->eraseFromParent(); +} + +void AMDGPUCFGStructurizer::insertCondBranchBefore(MachineBasicBlock *blk, + MachineBasicBlock::iterator I, int NewOpcode, int RegNum, + DebugLoc DL) { + MachineFunction *MF = blk->getParent(); + MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL); + //insert before + blk->insert(I, NewInstr); + MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false); + SHOWNEWINSTR(NewInstr); +} + +void AMDGPUCFGStructurizer::insertCondBranchEnd(MachineBasicBlock *MBB, + int NewOpcode, int RegNum) { + MachineFunction *MF = MBB->getParent(); + MachineInstr *NewInstr = + MF->CreateMachineInstr(TII->get(NewOpcode), DebugLoc()); + MBB->push_back(NewInstr); + MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false); + SHOWNEWINSTR(NewInstr); +} + +int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) { + switch(OldOpcode) { + case AMDGPU::JUMP_COND: + case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; + case AMDGPU::BRANCH_COND_i32: + case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32; + default: llvm_unreachable("internal error"); + } + return -1; +} + +int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) { + switch(OldOpcode) { + case AMDGPU::JUMP_COND: + case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; + case AMDGPU::BRANCH_COND_i32: + case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32; + default: llvm_unreachable("internal error"); + } + return -1; +} + +int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) { + switch(OldOpcode) { + case AMDGPU::JUMP_COND: + case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32; + default: llvm_unreachable("internal error"); + }; + return -1; +} + +int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) { + switch(OldOpcode) { + case AMDGPU::JUMP_COND: + case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32; + default: llvm_unreachable("internal error"); + } + return -1; +} + +MachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) { + return MI->getOperand(0).getMBB(); +} + +void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI, + MachineBasicBlock *MBB) { + MI->getOperand(0).setMBB(MBB); +} + +MachineBasicBlock * +AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB, + MachineInstr *MI) { + assert(MBB->succ_size() == 2); + MachineBasicBlock *TrueBranch = getTrueBranch(MI); + MachineBasicBlock::succ_iterator It = MBB->succ_begin(); + MachineBasicBlock::succ_iterator Next = It; + ++Next; + return (*It == TrueBranch) ? *Next : *It; +} + +bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) { + switch (MI->getOpcode()) { + case AMDGPU::JUMP_COND: + case AMDGPU::BRANCH_COND_i32: + case AMDGPU::BRANCH_COND_f32: return true; + default: + return false; + } + return false; +} + +bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) { + switch (MI->getOpcode()) { + case AMDGPU::JUMP: + case AMDGPU::BRANCH: + return true; + default: + return false; + } + return false; +} + +DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) { + //get DebugLoc from the first MachineBasicBlock instruction with debug info + DebugLoc DL; + for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end(); + ++It) { + MachineInstr *instr = &(*It); + if (instr->getDebugLoc().isUnknown() == false) + DL = instr->getDebugLoc(); + } + return DL; +} + +MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr( + MachineBasicBlock *MBB) { + MachineBasicBlock::reverse_iterator It = MBB->rbegin(); + MachineInstr *MI = &*It; + if (MI && (isCondBranch(MI) || isUncondBranch(MI))) + return MI; + return NULL; +} + +MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr( + MachineBasicBlock *MBB) { + for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend(); + It != E; ++It) { + // FIXME: Simplify + MachineInstr *MI = &*It; + if (MI) { + if (isCondBranch(MI) || isUncondBranch(MI)) + return MI; + else if (!TII->isMov(MI->getOpcode())) + break; } - dbgs() << "Ordered blocks:\n"; - printOrderedBlocks(dbgs()); - ); + } + return NULL; +} + +MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) { + MachineBasicBlock::reverse_iterator It = MBB->rbegin(); + if (It != MBB->rend()) { + MachineInstr *instr = &(*It); + if (instr->getOpcode() == AMDGPU::RETURN) + return instr; + } + return NULL; +} + +MachineInstr *AMDGPUCFGStructurizer::getContinueInstr(MachineBasicBlock *MBB) { + MachineBasicBlock::reverse_iterator It = MBB->rbegin(); + if (It != MBB->rend()) { + MachineInstr *MI = &(*It); + if (MI->getOpcode() == AMDGPU::CONTINUE) + return MI; + } + return NULL; +} + +MachineInstr *AMDGPUCFGStructurizer::getLoopBreakInstr(MachineBasicBlock *MBB) { + for (MachineBasicBlock::iterator It = MBB->begin(); (It != MBB->end()); + ++It) { + MachineInstr *MI = &(*It); + if (MI->getOpcode() == AMDGPU::PREDICATED_BREAK) + return MI; + } + return NULL; +} + +bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) { + MachineInstr *MI = getReturnInstr(MBB); + bool IsReturn = (MBB->succ_size() == 0); + if (MI) + assert(IsReturn); + else if (IsReturn) + DEBUG( + dbgs() << "BB" << MBB->getNumber() + <<" is return block without RETURN instr\n";); + return IsReturn; +} + +void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB, + MachineBasicBlock *SrcMBB) { + for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(), + iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It) + DstMBB->addSuccessor(*It); // *iter's predecessor is also taken care of +} + +MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) { + MachineFunction *Func = MBB->getParent(); + MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock(); + Func->push_back(NewMBB); //insert to function + for (MachineBasicBlock::iterator It = MBB->begin(), E = MBB->end(); + It != E; ++It) { + MachineInstr *MI = Func->CloneMachineInstr(It); + NewMBB->push_back(MI); + } + return NewMBB; +} + +void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith( + MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB, + MachineBasicBlock *NewBlk) { + MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB); + if (BranchMI && isCondBranch(BranchMI) && + getTrueBranch(BranchMI) == OldMBB) + setTrueBranch(BranchMI, NewBlk); +} + +void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) { + assert((!MBB->getParent()->getJumpTableInfo() + || MBB->getParent()->getJumpTableInfo()->isEmpty()) + && "found a jump table"); + + //collect continue right before endloop + SmallVector ContInstr; + MachineBasicBlock::iterator Pre = MBB->begin(); + MachineBasicBlock::iterator E = MBB->end(); + MachineBasicBlock::iterator It = Pre; + while (It != E) { + if (Pre->getOpcode() == AMDGPU::CONTINUE + && It->getOpcode() == AMDGPU::ENDLOOP) + ContInstr.push_back(Pre); + Pre = It; + ++It; + } + + //delete continue right before endloop + for (unsigned i = 0; i < ContInstr.size(); ++i) + ContInstr[i]->eraseFromParent(); + + // TODO to fix up jump table so later phase won't be confused. if + // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but + // there isn't such an interface yet. alternatively, replace all the other + // blocks in the jump table with the entryBlk //} + +} + + +bool AMDGPUCFGStructurizer::prepare() { + bool Changed = false; + + //FIXME: if not reducible flow graph, make it so ??? - SmallVector retBlks; - - for (typename LoopInfoT::iterator iter = loopInfo->begin(), - iterEnd = loopInfo->end(); - iter != iterEnd; ++iter) { - LoopT* loopRep = (*iter); - BlockTSmallerVector exitingBlks; - loopRep->getExitingBlocks(exitingBlks); - - if (exitingBlks.size() == 0) { - BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep); - if (dummyExitBlk != NULL) - retBlks.push_back(dummyExitBlk); + DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";); + + orderBlocks(FuncRep); + + SmallVector RetBlks; + + // Add an ExitBlk to loop that don't have one + for (MachineLoopInfo::iterator It = MLI->begin(), + E = MLI->end(); It != E; ++It) { + MachineLoop *LoopRep = (*It); + MBBVector ExitingMBBs; + LoopRep->getExitingBlocks(ExitingMBBs); + + if (ExitingMBBs.size() == 0) { + MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep); + if (DummyExitBlk) + RetBlks.push_back(DummyExitBlk); } } // Remove unconditional branch instr. // Add dummy exit block iff there are multiple returns. - - for (typename SmallVectorImpl::const_iterator - iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end(); - iterBlk != iterEndBlk; - ++iterBlk) { - BlockT *curBlk = *iterBlk; - removeUnconditionalBranch(curBlk); - removeRedundantConditionalBranch(curBlk); - if (CFGTraits::isReturnBlock(curBlk)) { - retBlks.push_back(curBlk); + for (SmallVectorImpl::const_iterator + It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) { + MachineBasicBlock *MBB = *It; + removeUnconditionalBranch(MBB); + removeRedundantConditionalBranch(MBB); + if (isReturnBlock(MBB)) { + RetBlks.push_back(MBB); } - assert(curBlk->succ_size() <= 2); - } //for + assert(MBB->succ_size() <= 2); + } - if (retBlks.size() >= 2) { - addDummyExitBlock(retBlks); - changed = true; + if (RetBlks.size() >= 2) { + addDummyExitBlock(RetBlks); + Changed = true; } - return changed; -} //CFGStructurizer::prepare + return Changed; +} -template -bool CFGStructurizer::run(FuncT &func, PassT &pass, - const AMDGPURegisterInfo * tri) { - passRep = &pass; - funcRep = &func; - TRI = tri; +bool AMDGPUCFGStructurizer::run() { //Assume reducible CFG... - DEBUG( - dbgs() << "AMDGPUCFGStructurizer::run\n"; - func.viewCFG(); - ); - - domTree = CFGTraits::getDominatorTree(pass); - DEBUG( - domTree->print(dbgs(), (const llvm::Module*)0); - ); - - postDomTree = CFGTraits::getPostDominatorTree(pass); - DEBUG( - postDomTree->print(dbgs()); - ); - - loopInfo = CFGTraits::getLoopInfo(pass); - DEBUG( - dbgs() << "LoopInfo:\n"; - PrintLoopinfo(*loopInfo, dbgs()); - ); + DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n";FuncRep->viewCFG();); - orderBlocks(); #ifdef STRESSTEST //Use the worse block ordering to test the algorithm. ReverseVector(orderedBlks); #endif - DEBUG( - dbgs() << "Ordered blocks:\n"; - printOrderedBlocks(dbgs()); - ); - int numIter = 0; - bool finish = false; - BlockT *curBlk; - bool makeProgress = false; - int numRemainedBlk = countActiveBlock(orderedBlks.begin(), - orderedBlks.end()); + DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks();); + int NumIter = 0; + bool Finish = false; + MachineBasicBlock *MBB; + bool MakeProgress = false; + int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(), + OrderedBlks.end()); do { - ++numIter; + ++NumIter; DEBUG( - dbgs() << "numIter = " << numIter - << ", numRemaintedBlk = " << numRemainedBlk << "\n"; + dbgs() << "numIter = " << NumIter + << ", numRemaintedBlk = " << NumRemainedBlk << "\n"; ); - typename SmallVectorImpl::const_iterator - iterBlk = orderedBlks.begin(); - typename SmallVectorImpl::const_iterator - iterBlkEnd = orderedBlks.end(); + SmallVectorImpl::const_iterator It = + OrderedBlks.begin(); + SmallVectorImpl::const_iterator E = + OrderedBlks.end(); - typename SmallVectorImpl::const_iterator - sccBeginIter = iterBlk; - BlockT *sccBeginBlk = NULL; - int sccNumBlk = 0; // The number of active blocks, init to a + SmallVectorImpl::const_iterator SccBeginIter = + It; + MachineBasicBlock *SccBeginMBB = NULL; + int SccNumBlk = 0; // The number of active blocks, init to a // maximum possible number. - int sccNumIter; // Number of iteration in this SCC. + int SccNumIter; // Number of iteration in this SCC. - while (iterBlk != iterBlkEnd) { - curBlk = *iterBlk; + while (It != E) { + MBB = *It; - if (sccBeginBlk == NULL) { - sccBeginIter = iterBlk; - sccBeginBlk = curBlk; - sccNumIter = 0; - sccNumBlk = numRemainedBlk; // Init to maximum possible number. + if (!SccBeginMBB) { + SccBeginIter = It; + SccBeginMBB = MBB; + SccNumIter = 0; + SccNumBlk = NumRemainedBlk; // Init to maximum possible number. DEBUG( - dbgs() << "start processing SCC" << getSCCNum(sccBeginBlk); + dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB); dbgs() << "\n"; ); } - if (!isRetiredBlock(curBlk)) { - patternMatch(curBlk); - } + if (!isRetiredBlock(MBB)) + patternMatch(MBB); - ++iterBlk; + ++It; - bool contNextScc = true; - if (iterBlk == iterBlkEnd - || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) { + bool ContNextScc = true; + if (It == E + || getSCCNum(SccBeginMBB) != getSCCNum(*It)) { // Just finish one scc. - ++sccNumIter; - int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk); - if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) { + ++SccNumIter; + int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It); + if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) { DEBUG( - dbgs() << "Can't reduce SCC " << getSCCNum(curBlk) - << ", sccNumIter = " << sccNumIter; + dbgs() << "Can't reduce SCC " << getSCCNum(MBB) + << ", sccNumIter = " << SccNumIter; dbgs() << "doesn't make any progress\n"; ); - contNextScc = true; - } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) { - sccNumBlk = sccRemainedNumBlk; - iterBlk = sccBeginIter; - contNextScc = false; + ContNextScc = true; + } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) { + SccNumBlk = sccRemainedNumBlk; + It = SccBeginIter; + ContNextScc = false; DEBUG( - dbgs() << "repeat processing SCC" << getSCCNum(curBlk) - << "sccNumIter = " << sccNumIter << "\n"; - func.viewCFG(); + dbgs() << "repeat processing SCC" << getSCCNum(MBB) + << "sccNumIter = " << SccNumIter << "\n"; + FuncRep->viewCFG(); ); } else { // Finish the current scc. - contNextScc = true; + ContNextScc = true; } } else { // Continue on next component in the current scc. - contNextScc = false; + ContNextScc = false; } - if (contNextScc) { - sccBeginBlk = NULL; - } + if (ContNextScc) + SccBeginMBB = NULL; } //while, "one iteration" over the function. - BlockT *entryBlk = FuncGTraits::nodes_begin(&func); - if (entryBlk->succ_size() == 0) { - finish = true; + MachineBasicBlock *EntryMBB = + GraphTraits::nodes_begin(FuncRep); + if (EntryMBB->succ_size() == 0) { + Finish = true; DEBUG( dbgs() << "Reduce to one block\n"; ); } else { - int newnumRemainedBlk - = countActiveBlock(orderedBlks.begin(), orderedBlks.end()); + int NewnumRemainedBlk + = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end()); // consider cloned blocks ?? - if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) { - makeProgress = true; - numRemainedBlk = newnumRemainedBlk; + if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) { + MakeProgress = true; + NumRemainedBlk = NewnumRemainedBlk; } else { - makeProgress = false; + MakeProgress = false; DEBUG( dbgs() << "No progress\n"; ); } } - } while (!finish && makeProgress); + } while (!Finish && MakeProgress); // Misc wrap up to maintain the consistency of the Function representation. - CFGTraits::wrapup(FuncGTraits::nodes_begin(&func)); + wrapup(GraphTraits::nodes_begin(FuncRep)); // Detach retired Block, release memory. - for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(), - iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) { - if ((*iterMap).second && (*iterMap).second->isRetired) { - assert(((*iterMap).first)->getNumber() != -1); + for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end(); + It != E; ++It) { + if ((*It).second && (*It).second->IsRetired) { + assert(((*It).first)->getNumber() != -1); DEBUG( - dbgs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n"; + dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n"; ); - (*iterMap).first->eraseFromParent(); //Remove from the parent Function. + (*It).first->eraseFromParent(); //Remove from the parent Function. } - delete (*iterMap).second; - } - blockInfoMap.clear(); - - // clear loopLandInfoMap - for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(), - iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) { - delete (*iterMap).second; + delete (*It).second; } - loopLandInfoMap.clear(); + BlockInfoMap.clear(); + LLInfoMap.clear(); DEBUG( - func.viewCFG(); + FuncRep->viewCFG(); ); - if (!finish) { + if (!Finish) llvm_unreachable("IRREDUCIBL_CF"); - } return true; -} //CFGStructurizer::run - -/// Print the ordered Blocks. -/// -template -void CFGStructurizer::printOrderedBlocks(llvm::raw_ostream &os) { - size_t i = 0; - for (typename SmallVectorImpl::const_iterator - iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end(); - iterBlk != iterBlkEnd; - ++iterBlk, ++i) { - os << "BB" << (*iterBlk)->getNumber(); - os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")"; - if (i != 0 && i % 10 == 0) { - os << "\n"; - } else { - os << " "; - } - } -} //printOrderedBlocks - -/// Compute the reversed DFS post order of Blocks -/// -template void CFGStructurizer::orderBlocks() { - int sccNum = 0; - BlockT *bb; - for (scc_iterator sccIter = scc_begin(funcRep), - sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) { - std::vector &sccNext = *sccIter; - for (typename std::vector::const_iterator - blockIter = sccNext.begin(), blockEnd = sccNext.end(); +} + + + +void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) { + int SccNum = 0; + MachineBasicBlock *MBB; + for (scc_iterator It = scc_begin(MF), E = scc_end(MF); + It != E; ++It, ++SccNum) { + std::vector &SccNext = *It; + for (std::vector::const_iterator + blockIter = SccNext.begin(), blockEnd = SccNext.end(); blockIter != blockEnd; ++blockIter) { - bb = *blockIter; - orderedBlks.push_back(bb); - recordSccnum(bb, sccNum); + MBB = *blockIter; + OrderedBlks.push_back(MBB); + recordSccnum(MBB, SccNum); } } //walk through all the block in func to check for unreachable - for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep), - blockEnd1 = FuncGTraits::nodes_end(funcRep); - blockIter1 != blockEnd1; ++blockIter1) { - BlockT *bb = &(*blockIter1); - sccNum = getSCCNum(bb); - if (sccNum == INVALIDSCCNUM) { - dbgs() << "unreachable block BB" << bb->getNumber() << "\n"; - } + typedef GraphTraits GTM; + MachineFunction::iterator It = GTM::nodes_begin(MF), E = GTM::nodes_end(MF); + for (; It != E; ++It) { + MachineBasicBlock *MBB = &(*It); + SccNum = getSCCNum(MBB); + if (SccNum == INVALIDSCCNUM) + dbgs() << "unreachable block BB" << MBB->getNumber() << "\n"; } -} //orderBlocks +} -template int CFGStructurizer::patternMatch(BlockT *curBlk) { - int numMatch = 0; - int curMatch; +int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) { + int NumMatch = 0; + int CurMatch; DEBUG( - dbgs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n"; + dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n"; ); - while ((curMatch = patternMatchGroup(curBlk)) > 0) { - numMatch += curMatch; - } + while ((CurMatch = patternMatchGroup(MBB)) > 0) + NumMatch += CurMatch; DEBUG( - dbgs() << "End patternMatch BB" << curBlk->getNumber() - << ", numMatch = " << numMatch << "\n"; + dbgs() << "End patternMatch BB" << MBB->getNumber() + << ", numMatch = " << NumMatch << "\n"; ); - return numMatch; -} //patternMatch - -template -int CFGStructurizer::patternMatchGroup(BlockT *curBlk) { - int numMatch = 0; - numMatch += serialPatternMatch(curBlk); - numMatch += ifPatternMatch(curBlk); - numMatch += loopendPatternMatch(curBlk); - numMatch += loopPatternMatch(curBlk); - return numMatch; -}//patternMatchGroup - -template -int CFGStructurizer::serialPatternMatch(BlockT *curBlk) { - if (curBlk->succ_size() != 1) { + return NumMatch; +} + +int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) { + int NumMatch = 0; + NumMatch += loopendPatternMatch(); + NumMatch += serialPatternMatch(MBB); + NumMatch += ifPatternMatch(MBB); + return NumMatch; +} + + +int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) { + if (MBB->succ_size() != 1) return 0; - } - BlockT *childBlk = *curBlk->succ_begin(); - if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) { + MachineBasicBlock *childBlk = *MBB->succ_begin(); + if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) return 0; - } - mergeSerialBlock(curBlk, childBlk); + mergeSerialBlock(MBB, childBlk); ++numSerialPatternMatch; return 1; -} //serialPatternMatch +} -template -int CFGStructurizer::ifPatternMatch(BlockT *curBlk) { +int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) { //two edges - if (curBlk->succ_size() != 2) { + if (MBB->succ_size() != 2) return 0; - } - - if (hasBackEdge(curBlk)) { + if (hasBackEdge(MBB)) return 0; - } - - InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk); - if (branchInstr == NULL) { + MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); + if (!BranchMI) return 0; - } - assert(CFGTraits::isCondBranch(branchInstr)); + assert(isCondBranch(BranchMI)); - BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr); - BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr); - BlockT *landBlk; - int cloned = 0; + MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI); + serialPatternMatch(TrueMBB); + ifPatternMatch(TrueMBB); + MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI); + serialPatternMatch(FalseMBB); + ifPatternMatch(FalseMBB); + MachineBasicBlock *LandBlk; + int Cloned = 0; + assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty()); // TODO: Simplify - if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1 - && *trueBlk->succ_begin() == *falseBlk->succ_begin()) { - landBlk = *trueBlk->succ_begin(); - } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) { - landBlk = NULL; - } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) { - landBlk = falseBlk; - falseBlk = NULL; - } else if (falseBlk->succ_size() == 1 - && *falseBlk->succ_begin() == trueBlk) { - landBlk = trueBlk; - trueBlk = NULL; - } else if (falseBlk->succ_size() == 1 - && isSameloopDetachedContbreak(trueBlk, falseBlk)) { - landBlk = *falseBlk->succ_begin(); - } else if (trueBlk->succ_size() == 1 - && isSameloopDetachedContbreak(falseBlk, trueBlk)) { - landBlk = *trueBlk->succ_begin(); + if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1 + && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) { + // Diamond pattern + LandBlk = *TrueMBB->succ_begin(); + } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) { + // Triangle pattern, false is empty + LandBlk = FalseMBB; + FalseMBB = NULL; + } else if (FalseMBB->succ_size() == 1 + && *FalseMBB->succ_begin() == TrueMBB) { + // Triangle pattern, true is empty + LandBlk = TrueMBB; + TrueMBB = NULL; + } else if (FalseMBB->succ_size() == 1 + && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) { + LandBlk = *FalseMBB->succ_begin(); + } else if (TrueMBB->succ_size() == 1 + && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) { + LandBlk = *TrueMBB->succ_begin(); } else { - return handleJumpintoIf(curBlk, trueBlk, falseBlk); + return handleJumpintoIf(MBB, TrueMBB, FalseMBB); } // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the // new BB created for landBlk==NULL may introduce new challenge to the // reduction process. - if (landBlk != NULL && - ((trueBlk && trueBlk->pred_size() > 1) - || (falseBlk && falseBlk->pred_size() > 1))) { - cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk); + if (LandBlk && + ((TrueMBB && TrueMBB->pred_size() > 1) + || (FalseMBB && FalseMBB->pred_size() > 1))) { + Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk); } - if (trueBlk && trueBlk->pred_size() > 1) { - trueBlk = cloneBlockForPredecessor(trueBlk, curBlk); - ++cloned; + if (TrueMBB && TrueMBB->pred_size() > 1) { + TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB); + ++Cloned; } - if (falseBlk && falseBlk->pred_size() > 1) { - falseBlk = cloneBlockForPredecessor(falseBlk, curBlk); - ++cloned; + if (FalseMBB && FalseMBB->pred_size() > 1) { + FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB); + ++Cloned; } - mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk); + mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk); ++numIfPatternMatch; - numClonedBlock += cloned; + numClonedBlock += Cloned; - return 1 + cloned; -} //ifPatternMatch - -template -int CFGStructurizer::switchPatternMatch(BlockT *curBlk) { - return 0; -} //switchPatternMatch + return 1 + Cloned; +} -template -int CFGStructurizer::loopendPatternMatch(BlockT *curBlk) { - LoopT *loopRep = loopInfo->getLoopFor(curBlk); - typename std::vector nestedLoops; - while (loopRep) { - nestedLoops.push_back(loopRep); - loopRep = loopRep->getParentLoop(); +int AMDGPUCFGStructurizer::loopendPatternMatch() { + std::vector NestedLoops; + for (MachineLoopInfo::iterator It = MLI->begin(), E = MLI->end(); + It != E; ++It) { + df_iterator LpIt = df_begin(*It), + LpE = df_end(*It); + for (; LpIt != LpE; ++LpIt) + NestedLoops.push_back(*LpIt); } - - if (nestedLoops.size() == 0) { + if (NestedLoops.size() == 0) return 0; - } // Process nested loop outside->inside, so "continue" to a outside loop won't // be mistaken as "break" of the current loop. - int num = 0; - for (typename std::vector::reverse_iterator - iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend(); - iter != iterEnd; ++iter) { - loopRep = *iter; - - if (getLoopLandBlock(loopRep) != NULL) { + int Num = 0; + for (std::vector::reverse_iterator It = NestedLoops.rbegin(), + E = NestedLoops.rend(); It != E; ++It) { + MachineLoop *ExaminedLoop = *It; + if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop]) continue; - } - - BlockT *loopHeader = loopRep->getHeader(); - - int numBreak = loopbreakPatternMatch(loopRep, loopHeader); - - if (numBreak == -1) { + DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump();); + int NumBreak = mergeLoop(ExaminedLoop); + if (NumBreak == -1) break; - } - - int numCont = loopcontPatternMatch(loopRep, loopHeader); - num += numBreak + numCont; + Num += NumBreak; } + return Num; +} - return num; -} //loopendPatternMatch - -template -int CFGStructurizer::loopPatternMatch(BlockT *curBlk) { - if (curBlk->succ_size() != 0) { - return 0; - } +int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) { + MachineBasicBlock *LoopHeader = LoopRep->getHeader(); + MBBVector ExitingMBBs; + LoopRep->getExitingBlocks(ExitingMBBs); + assert(!ExitingMBBs.empty() && "Infinite Loop not supported"); + DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() << " exiting blocks\n";); + // We assume a single ExitBlk + MBBVector ExitBlks; + LoopRep->getExitBlocks(ExitBlks); + SmallPtrSet ExitBlkSet; + for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i) + ExitBlkSet.insert(ExitBlks[i]); + assert(ExitBlkSet.size() == 1); + MachineBasicBlock *ExitBlk = *ExitBlks.begin(); + assert(ExitBlk && "Loop has several exit block"); + MBBVector LatchBlks; + typedef GraphTraits > InvMBBTraits; + InvMBBTraits::ChildIteratorType PI = InvMBBTraits::child_begin(LoopHeader), + PE = InvMBBTraits::child_end(LoopHeader); + for (; PI != PE; PI++) { + if (LoopRep->contains(*PI)) + LatchBlks.push_back(*PI); + } + + for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i) + mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk); + for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i) + settleLoopcontBlock(LatchBlks[i], LoopHeader); + int Match = 0; + do { + Match = 0; + Match += serialPatternMatch(LoopHeader); + Match += ifPatternMatch(LoopHeader); + } while (Match > 0); + mergeLooplandBlock(LoopHeader, ExitBlk); + MachineLoop *ParentLoop = LoopRep->getParentLoop(); + if (ParentLoop) + MLI->changeLoopFor(LoopHeader, ParentLoop); + else + MLI->removeBlock(LoopHeader); + Visited[LoopRep] = true; + return 1; +} - int numLoop = 0; - LoopT *loopRep = loopInfo->getLoopFor(curBlk); - while (loopRep && loopRep->getHeader() == curBlk) { - LoopLandInfo *loopLand = getLoopLandInfo(loopRep); - if (loopLand) { - BlockT *landBlk = loopLand->landBlk; - assert(landBlk); - if (!isRetiredBlock(landBlk)) { - mergeLooplandBlock(curBlk, loopLand); - ++numLoop; - } +int AMDGPUCFGStructurizer::loopcontPatternMatch(MachineLoop *LoopRep, + MachineBasicBlock *LoopHeader) { + int NumCont = 0; + SmallVector ContMBB; + typedef GraphTraits > GTIM; + GTIM::ChildIteratorType It = GTIM::child_begin(LoopHeader), + E = GTIM::child_end(LoopHeader); + for (; It != E; ++It) { + MachineBasicBlock *MBB = *It; + if (LoopRep->contains(MBB)) { + handleLoopcontBlock(MBB, MLI->getLoopFor(MBB), + LoopHeader, LoopRep); + ContMBB.push_back(MBB); + ++NumCont; } - loopRep = loopRep->getParentLoop(); } - numLoopPatternMatch += numLoop; + for (SmallVectorImpl::iterator It = ContMBB.begin(), + E = ContMBB.end(); It != E; ++It) { + (*It)->removeSuccessor(LoopHeader); + } - return numLoop; -} //loopPatternMatch + numLoopcontPatternMatch += NumCont; -template -int CFGStructurizer::loopbreakPatternMatch(LoopT *loopRep, - BlockT *loopHeader) { - BlockTSmallerVector exitingBlks; - loopRep->getExitingBlocks(exitingBlks); + return NumCont; +} - DEBUG( - dbgs() << "Loop has " << exitingBlks.size() << " exiting blocks\n"; - ); - if (exitingBlks.size() == 0) { - setLoopLandBlock(loopRep); - return 0; +bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak( + MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) { + if (Src1MBB->succ_size() == 0) { + MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB); + if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) { + MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep]; + if (TheEntry) { + DEBUG( + dbgs() << "isLoopContBreakBlock yes src1 = BB" + << Src1MBB->getNumber() + << " src2 = BB" << Src2MBB->getNumber() << "\n"; + ); + return true; + } + } } + return false; +} - // Compute the corresponding exitBlks and exit block set. - BlockTSmallerVector exitBlks; - std::set exitBlkSet; - for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(), - iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) { - BlockT *exitingBlk = *iter; - BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk); - exitBlks.push_back(exitBlk); - exitBlkSet.insert(exitBlk); //non-duplicate insert +int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB, + MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) { + int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB); + if (Num == 0) { + DEBUG( + dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n"; + ); + Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB); } + return Num; +} - assert(exitBlkSet.size() > 0); - assert(exitBlks.size() == exitingBlks.size()); +int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB, + MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) { + int Num = 0; + MachineBasicBlock *DownBlk; + + //trueBlk could be the common post dominator + DownBlk = TrueMBB; DEBUG( - dbgs() << "Loop has " << exitBlkSet.size() << " exit blocks\n"; + dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber() + << " true = BB" << TrueMBB->getNumber() + << ", numSucc=" << TrueMBB->succ_size() + << " false = BB" << FalseMBB->getNumber() << "\n"; ); - // Find exitLandBlk. - BlockT *exitLandBlk = NULL; - int numCloned = 0; - int numSerial = 0; - - if (exitBlkSet.size() == 1) { - exitLandBlk = *exitBlkSet.begin(); - } else { - exitLandBlk = findNearestCommonPostDom(exitBlkSet); - - if (exitLandBlk == NULL) { - return -1; - } - - bool allInPath = true; - bool allNotInPath = true; - for (typename std::set::const_iterator - iter = exitBlkSet.begin(), - iterEnd = exitBlkSet.end(); - iter != iterEnd; ++iter) { - BlockT *exitBlk = *iter; + while (DownBlk) { + DEBUG( + dbgs() << "check down = BB" << DownBlk->getNumber(); + ); - PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true); + if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) { DEBUG( - dbgs() << "BB" << exitBlk->getNumber() - << " to BB" << exitLandBlk->getNumber() << " PathToKind=" - << pathKind << "\n"; + dbgs() << " working\n"; ); - allInPath = allInPath && (pathKind == SinglePath_InPath); - allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath); + Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk); + Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk); - if (!allInPath && !allNotInPath) { - DEBUG( - dbgs() << "singlePath check fail\n"; - ); - return -1; - } - } // check all exit blocks - - if (allNotInPath) { - - // TODO: Simplify, maybe separate function? - LoopT *parentLoopRep = loopRep->getParentLoop(); - BlockT *parentLoopHeader = NULL; - if (parentLoopRep) - parentLoopHeader = parentLoopRep->getHeader(); - - if (exitLandBlk == parentLoopHeader && - (exitLandBlk = relocateLoopcontBlock(parentLoopRep, - loopRep, - exitBlkSet, - exitLandBlk)) != NULL) { - DEBUG( - dbgs() << "relocateLoopcontBlock success\n"; - ); - } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep, - exitingBlks, - exitBlks)) != NULL) { - DEBUG( - dbgs() << "insertEndbranchBlock success\n"; - ); - } else { - DEBUG( - dbgs() << "loop exit fail\n"; - ); - return -1; - } - } - - // Handle side entry to exit path. - exitBlks.clear(); - exitBlkSet.clear(); - for (typename BlockTSmallerVector::iterator iterExiting = - exitingBlks.begin(), - iterExitingEnd = exitingBlks.end(); - iterExiting != iterExitingEnd; ++iterExiting) { - BlockT *exitingBlk = *iterExiting; - BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk); - BlockT *newExitBlk = exitBlk; - - if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) { - newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk); - ++numCloned; - } - - numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk); - - exitBlks.push_back(newExitBlk); - exitBlkSet.insert(newExitBlk); - } - - for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(), - iterExitEnd = exitBlks.end(); - iterExit != iterExitEnd; ++iterExit) { - BlockT *exitBlk = *iterExit; - numSerial += serialPatternMatch(exitBlk); - } - - for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(), - iterExitEnd = exitBlks.end(); - iterExit != iterExitEnd; ++iterExit) { - BlockT *exitBlk = *iterExit; - if (exitBlk->pred_size() > 1) { - if (exitBlk != exitLandBlk) { - return -1; - } - } else { - if (exitBlk != exitLandBlk && - (exitBlk->succ_size() != 1 || - *exitBlk->succ_begin() != exitLandBlk)) { - return -1; - } - } - } - } // else - - exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet); - - // Fold break into the breaking block. Leverage across level breaks. - assert(exitingBlks.size() == exitBlks.size()); - for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(), - iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end(); - iterExit != iterExitEnd; ++iterExit, ++iterExiting) { - BlockT *exitBlk = *iterExit; - BlockT *exitingBlk = *iterExiting; - assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk); - LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk); - handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk); - } - - int numBreak = static_cast(exitingBlks.size()); - numLoopbreakPatternMatch += numBreak; - numClonedBlock += numCloned; - return numBreak + numSerial + numCloned; -} //loopbreakPatternMatch - -template -int CFGStructurizer::loopcontPatternMatch(LoopT *loopRep, - BlockT *loopHeader) { - int numCont = 0; - SmallVector contBlk; - for (typename InvBlockGTraits::ChildIteratorType iter = - InvBlockGTraits::child_begin(loopHeader), - iterEnd = InvBlockGTraits::child_end(loopHeader); - iter != iterEnd; ++iter) { - BlockT *curBlk = *iter; - if (loopRep->contains(curBlk)) { - handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk), - loopHeader, loopRep); - contBlk.push_back(curBlk); - ++numCont; - } - } - - for (typename SmallVectorImpl::iterator - iter = contBlk.begin(), iterEnd = contBlk.end(); - iter != iterEnd; ++iter) { - (*iter)->removeSuccessor(loopHeader); - } - - numLoopcontPatternMatch += numCont; - - return numCont; -} //loopcontPatternMatch - - -template -bool CFGStructurizer::isSameloopDetachedContbreak(BlockT *src1Blk, - BlockT *src2Blk) { - // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the - // same loop with LoopLandInfo without explicitly keeping track of - // loopContBlks and loopBreakBlks, this is a method to get the information. - // - if (src1Blk->succ_size() == 0) { - LoopT *loopRep = loopInfo->getLoopFor(src1Blk); - if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) { - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; - if (theEntry != NULL) { - DEBUG( - dbgs() << "isLoopContBreakBlock yes src1 = BB" - << src1Blk->getNumber() - << " src2 = BB" << src2Blk->getNumber() << "\n"; - ); - return true; - } - } - } - return false; -} //isSameloopDetachedContbreak - -template -int CFGStructurizer::handleJumpintoIf(BlockT *headBlk, - BlockT *trueBlk, - BlockT *falseBlk) { - int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk); - if (num == 0) { - DEBUG( - dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n"; - ); - num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk); - } - return num; -} - -template -int CFGStructurizer::handleJumpintoIfImp(BlockT *headBlk, - BlockT *trueBlk, - BlockT *falseBlk) { - int num = 0; - BlockT *downBlk; - - //trueBlk could be the common post dominator - downBlk = trueBlk; - - DEBUG( - dbgs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber() - << " true = BB" << trueBlk->getNumber() - << ", numSucc=" << trueBlk->succ_size() - << " false = BB" << falseBlk->getNumber() << "\n"; - ); - - while (downBlk) { - DEBUG( - dbgs() << "check down = BB" << downBlk->getNumber(); - ); - - if (singlePathTo(falseBlk, downBlk) == SinglePath_InPath) { - DEBUG( - dbgs() << " working\n"; - ); - - num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk); - num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk); - - numClonedBlock += num; - num += serialPatternMatch(*headBlk->succ_begin()); - num += serialPatternMatch(*(++headBlk->succ_begin())); - num += ifPatternMatch(headBlk); - assert(num > 0); + numClonedBlock += Num; + Num += serialPatternMatch(*HeadMBB->succ_begin()); + Num += serialPatternMatch(*(++HeadMBB->succ_begin())); + Num += ifPatternMatch(HeadMBB); + assert(Num > 0); break; } DEBUG( dbgs() << " not working\n"; ); - downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL; + DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : NULL; } // walk down the postDomTree - return num; -} //handleJumpintoIf - -template -void CFGStructurizer::showImproveSimpleJumpintoIf(BlockT *headBlk, - BlockT *trueBlk, - BlockT *falseBlk, - BlockT *landBlk, - bool detail) { - dbgs() << "head = BB" << headBlk->getNumber() - << " size = " << headBlk->size(); - if (detail) { + return Num; +} + +void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf( + MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB, + MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) { + dbgs() << "head = BB" << HeadMBB->getNumber() + << " size = " << HeadMBB->size(); + if (Detail) { dbgs() << "\n"; - headBlk->print(dbgs()); + HeadMBB->print(dbgs()); dbgs() << "\n"; } - if (trueBlk) { - dbgs() << ", true = BB" << trueBlk->getNumber() << " size = " - << trueBlk->size() << " numPred = " << trueBlk->pred_size(); - if (detail) { + if (TrueMBB) { + dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = " + << TrueMBB->size() << " numPred = " << TrueMBB->pred_size(); + if (Detail) { dbgs() << "\n"; - trueBlk->print(dbgs()); + TrueMBB->print(dbgs()); dbgs() << "\n"; } } - if (falseBlk) { - dbgs() << ", false = BB" << falseBlk->getNumber() << " size = " - << falseBlk->size() << " numPred = " << falseBlk->pred_size(); - if (detail) { + if (FalseMBB) { + dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = " + << FalseMBB->size() << " numPred = " << FalseMBB->pred_size(); + if (Detail) { dbgs() << "\n"; - falseBlk->print(dbgs()); + FalseMBB->print(dbgs()); dbgs() << "\n"; } } - if (landBlk) { - dbgs() << ", land = BB" << landBlk->getNumber() << " size = " - << landBlk->size() << " numPred = " << landBlk->pred_size(); - if (detail) { + if (LandMBB) { + dbgs() << ", land = BB" << LandMBB->getNumber() << " size = " + << LandMBB->size() << " numPred = " << LandMBB->pred_size(); + if (Detail) { dbgs() << "\n"; - landBlk->print(dbgs()); + LandMBB->print(dbgs()); dbgs() << "\n"; } } dbgs() << "\n"; -} //showImproveSimpleJumpintoIf +} -template -int CFGStructurizer::improveSimpleJumpintoIf(BlockT *headBlk, - BlockT *trueBlk, - BlockT *falseBlk, - BlockT **plandBlk) { - bool migrateTrue = false; - bool migrateFalse = false; +int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, + MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, + MachineBasicBlock **LandMBBPtr) { + bool MigrateTrue = false; + bool MigrateFalse = false; - BlockT *landBlk = *plandBlk; + MachineBasicBlock *LandBlk = *LandMBBPtr; - assert((trueBlk == NULL || trueBlk->succ_size() <= 1) - && (falseBlk == NULL || falseBlk->succ_size() <= 1)); + assert((!TrueMBB || TrueMBB->succ_size() <= 1) + && (!FalseMBB || FalseMBB->succ_size() <= 1)); - if (trueBlk == falseBlk) { + if (TrueMBB == FalseMBB) return 0; - } - migrateTrue = needMigrateBlock(trueBlk); - migrateFalse = needMigrateBlock(falseBlk); + MigrateTrue = needMigrateBlock(TrueMBB); + MigrateFalse = needMigrateBlock(FalseMBB); - if (!migrateTrue && !migrateFalse) { + if (!MigrateTrue && !MigrateFalse) return 0; - } // If we need to migrate either trueBlk and falseBlk, migrate the rest that // have more than one predecessors. without doing this, its predecessor // rather than headBlk will have undefined value in initReg. - if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) { - migrateTrue = true; - } - if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) { - migrateFalse = true; - } + if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1) + MigrateTrue = true; + if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1) + MigrateFalse = true; DEBUG( dbgs() << "before improveSimpleJumpintoIf: "; - showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0); + showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0); ); // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk @@ -1193,205 +1343,142 @@ int CFGStructurizer::improveSimpleJumpintoIf(BlockT *headBlk, // add initReg = initVal to headBlk const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); - unsigned initReg = - funcRep->getRegInfo().createVirtualRegister(I32RC); - if (!migrateTrue || !migrateFalse) { - int initVal = migrateTrue ? 0 : 1; - CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal); - } + unsigned InitReg = + HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); + if (!MigrateTrue || !MigrateFalse) + llvm_unreachable("Extra register needed to handle CFG"); - int numNewBlk = 0; + int NumNewBlk = 0; - if (landBlk == NULL) { - landBlk = funcRep->CreateMachineBasicBlock(); - funcRep->push_back(landBlk); //insert to function + if (!LandBlk) { + LandBlk = HeadMBB->getParent()->CreateMachineBasicBlock(); + HeadMBB->getParent()->push_back(LandBlk); //insert to function - if (trueBlk) { - trueBlk->addSuccessor(landBlk); + if (TrueMBB) { + TrueMBB->addSuccessor(LandBlk); } else { - headBlk->addSuccessor(landBlk); + HeadMBB->addSuccessor(LandBlk); } - if (falseBlk) { - falseBlk->addSuccessor(landBlk); + if (FalseMBB) { + FalseMBB->addSuccessor(LandBlk); } else { - headBlk->addSuccessor(landBlk); + HeadMBB->addSuccessor(LandBlk); } - numNewBlk ++; + NumNewBlk ++; } - bool landBlkHasOtherPred = (landBlk->pred_size() > 2); + bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2); //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL" - typename BlockT::iterator insertPos = - CFGTraits::getInstrPos - (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep)); - - if (landBlkHasOtherPred) { - unsigned immReg = - funcRep->getRegInfo().createVirtualRegister(I32RC); - CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2); - unsigned cmpResReg = - funcRep->getRegInfo().createVirtualRegister(I32RC); - - CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg, - initReg, immReg); - CFGTraits::insertCondBranchBefore(landBlk, insertPos, - AMDGPU::IF_PREDICATE_SET, passRep, - cmpResReg, DebugLoc()); + MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF); + + if (LandBlkHasOtherPred) { + llvm_unreachable("Extra register needed to handle CFG"); + unsigned CmpResReg = + HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); + llvm_unreachable("Extra compare instruction needed to handle CFG"); + insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, + CmpResReg, DebugLoc()); } - CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_PREDICATE_SET, - passRep, initReg, DebugLoc()); + insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg, + DebugLoc()); - if (migrateTrue) { - migrateInstruction(trueBlk, landBlk, insertPos); + if (MigrateTrue) { + migrateInstruction(TrueMBB, LandBlk, I); // need to uncondionally insert the assignment to ensure a path from its // predecessor rather than headBlk has valid value in initReg if // (initVal != 1). - CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1); + llvm_unreachable("Extra register needed to handle CFG"); } - CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep); + insertInstrBefore(I, AMDGPU::ELSE); - if (migrateFalse) { - migrateInstruction(falseBlk, landBlk, insertPos); + if (MigrateFalse) { + migrateInstruction(FalseMBB, LandBlk, I); // need to uncondionally insert the assignment to ensure a path from its // predecessor rather than headBlk has valid value in initReg if // (initVal != 0) - CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0); + llvm_unreachable("Extra register needed to handle CFG"); } - if (landBlkHasOtherPred) { + if (LandBlkHasOtherPred) { // add endif - CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep); + insertInstrBefore(I, AMDGPU::ENDIF); // put initReg = 2 to other predecessors of landBlk - for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(), - predIterEnd = landBlk->pred_end(); predIter != predIterEnd; - ++predIter) { - BlockT *curBlk = *predIter; - if (curBlk != trueBlk && curBlk != falseBlk) { - CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2); - } - } //for + for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(), + PE = LandBlk->pred_end(); PI != PE; ++PI) { + MachineBasicBlock *MBB = *PI; + if (MBB != TrueMBB && MBB != FalseMBB) + llvm_unreachable("Extra register needed to handle CFG"); + } } DEBUG( dbgs() << "result from improveSimpleJumpintoIf: "; - showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0); + showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0); ); // update landBlk - *plandBlk = landBlk; - - return numNewBlk; -} //improveSimpleJumpintoIf - -template -void CFGStructurizer::handleLoopbreak(BlockT *exitingBlk, - LoopT *exitingLoop, - BlockT *exitBlk, - LoopT *exitLoop, - BlockT *landBlk) { - DEBUG( - dbgs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop) - << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n"; - ); - const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); - - RegiT initReg = INVALIDREGNUM; - if (exitingLoop != exitLoop) { - initReg = static_cast - (funcRep->getRegInfo().createVirtualRegister(I32RC)); - assert(initReg != INVALIDREGNUM); - addLoopBreakInitReg(exitLoop, initReg); - while (exitingLoop != exitLoop && exitingLoop) { - addLoopBreakOnReg(exitingLoop, initReg); - exitingLoop = exitingLoop->getParentLoop(); - } - assert(exitingLoop == exitLoop); - } - - mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg); - -} //handleLoopbreak - -template -void CFGStructurizer::handleLoopcontBlock(BlockT *contingBlk, - LoopT *contingLoop, - BlockT *contBlk, - LoopT *contLoop) { - DEBUG( - dbgs() << "loopcontPattern cont = BB" << contingBlk->getNumber() - << " header = BB" << contBlk->getNumber() << "\n"; + *LandMBBPtr = LandBlk; - dbgs() << "Trying to continue loop-depth = " - << getLoopDepth(contLoop) - << " from loop-depth = " << getLoopDepth(contingLoop) << "\n"; - ); - - RegiT initReg = INVALIDREGNUM; - const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); - if (contingLoop != contLoop) { - initReg = static_cast - (funcRep->getRegInfo().createVirtualRegister(I32RC)); - assert(initReg != INVALIDREGNUM); - addLoopContInitReg(contLoop, initReg); - while (contingLoop && contingLoop->getParentLoop() != contLoop) { - addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg - contingLoop = contingLoop->getParentLoop(); - } - assert(contingLoop && contingLoop->getParentLoop() == contLoop); - addLoopContOnReg(contingLoop, initReg); - } + return NumNewBlk; +} - settleLoopcontBlock(contingBlk, contBlk, initReg); -} //handleLoopcontBlock +void AMDGPUCFGStructurizer::handleLoopcontBlock(MachineBasicBlock *ContingMBB, + MachineLoop *ContingLoop, MachineBasicBlock *ContMBB, + MachineLoop *ContLoop) { + DEBUG(dbgs() << "loopcontPattern cont = BB" << ContingMBB->getNumber() + << " header = BB" << ContMBB->getNumber() << "\n"; + dbgs() << "Trying to continue loop-depth = " + << getLoopDepth(ContLoop) + << " from loop-depth = " << getLoopDepth(ContingLoop) << "\n";); + settleLoopcontBlock(ContingMBB, ContMBB); +} -template -void CFGStructurizer::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) { +void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB, + MachineBasicBlock *SrcMBB) { DEBUG( - dbgs() << "serialPattern BB" << dstBlk->getNumber() - << " <= BB" << srcBlk->getNumber() << "\n"; + dbgs() << "serialPattern BB" << DstMBB->getNumber() + << " <= BB" << SrcMBB->getNumber() << "\n"; ); - dstBlk->splice(dstBlk->end(), srcBlk, srcBlk->begin(), srcBlk->end()); + DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end()); - dstBlk->removeSuccessor(srcBlk); - CFGTraits::cloneSuccessorList(dstBlk, srcBlk); + DstMBB->removeSuccessor(SrcMBB); + cloneSuccessorList(DstMBB, SrcMBB); - removeSuccessor(srcBlk); - retireBlock(dstBlk, srcBlk); -} //mergeSerialBlock + removeSuccessor(SrcMBB); + MLI->removeBlock(SrcMBB); + retireBlock(SrcMBB); +} -template -void CFGStructurizer::mergeIfthenelseBlock(InstrT *branchInstr, - BlockT *curBlk, - BlockT *trueBlk, - BlockT *falseBlk, - BlockT *landBlk) { +void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI, + MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, + MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) { DEBUG( - dbgs() << "ifPattern BB" << curBlk->getNumber(); + dbgs() << "ifPattern BB" << MBB->getNumber(); dbgs() << "{ "; - if (trueBlk) { - dbgs() << "BB" << trueBlk->getNumber(); + if (TrueMBB) { + dbgs() << "BB" << TrueMBB->getNumber(); } dbgs() << " } else "; dbgs() << "{ "; - if (falseBlk) { - dbgs() << "BB" << falseBlk->getNumber(); + if (FalseMBB) { + dbgs() << "BB" << FalseMBB->getNumber(); } dbgs() << " }\n "; dbgs() << "landBlock: "; - if (landBlk == NULL) { + if (!LandMBB) { dbgs() << "NULL"; } else { - dbgs() << "BB" << landBlk->getNumber(); + dbgs() << "BB" << LandMBB->getNumber(); } dbgs() << "\n"; ); - int oldOpcode = branchInstr->getOpcode(); - DebugLoc branchDL = branchInstr->getDebugLoc(); + int OldOpcode = BranchMI->getOpcode(); + DebugLoc BranchDL = BranchMI->getDebugLoc(); // transform to // if cond @@ -1401,1645 +1488,390 @@ void CFGStructurizer::mergeIfthenelseBlock(InstrT *branchInstr, // endif // landBlk - typename BlockT::iterator branchInstrPos = - CFGTraits::getInstrPos(curBlk, branchInstr); - CFGTraits::insertCondBranchBefore(branchInstrPos, - CFGTraits::getBranchNzeroOpcode(oldOpcode), - passRep, - branchDL); - - if (trueBlk) { - curBlk->splice(branchInstrPos, trueBlk, trueBlk->begin(), trueBlk->end()); - curBlk->removeSuccessor(trueBlk); - if (landBlk && trueBlk->succ_size()!=0) { - trueBlk->removeSuccessor(landBlk); - } - retireBlock(curBlk, trueBlk); - } - CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep); - - if (falseBlk) { - curBlk->splice(branchInstrPos, falseBlk, falseBlk->begin(), - falseBlk->end()); - curBlk->removeSuccessor(falseBlk); - if (landBlk && falseBlk->succ_size() != 0) { - falseBlk->removeSuccessor(landBlk); - } - retireBlock(curBlk, falseBlk); - } - CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep); + MachineBasicBlock::iterator I = BranchMI; + insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode), + BranchDL); - branchInstr->eraseFromParent(); + if (TrueMBB) { + MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end()); + MBB->removeSuccessor(TrueMBB); + if (LandMBB && TrueMBB->succ_size()!=0) + TrueMBB->removeSuccessor(LandMBB); + retireBlock(TrueMBB); + MLI->removeBlock(TrueMBB); + } - if (landBlk && trueBlk && falseBlk) { - curBlk->addSuccessor(landBlk); + if (FalseMBB) { + insertInstrBefore(I, AMDGPU::ELSE); + MBB->splice(I, FalseMBB, FalseMBB->begin(), + FalseMBB->end()); + MBB->removeSuccessor(FalseMBB); + if (LandMBB && FalseMBB->succ_size() != 0) + FalseMBB->removeSuccessor(LandMBB); + retireBlock(FalseMBB); + MLI->removeBlock(FalseMBB); } + insertInstrBefore(I, AMDGPU::ENDIF); -} //mergeIfthenelseBlock + BranchMI->eraseFromParent(); -template -void CFGStructurizer::mergeLooplandBlock(BlockT *dstBlk, - LoopLandInfo *loopLand) { - BlockT *landBlk = loopLand->landBlk; + if (LandMBB && TrueMBB && FalseMBB) + MBB->addSuccessor(LandMBB); - DEBUG( - dbgs() << "loopPattern header = BB" << dstBlk->getNumber() - << " land = BB" << landBlk->getNumber() << "\n"; - ); +} - // Loop contInitRegs are init at the beginning of the loop. - for (typename std::set::const_iterator iter = - loopLand->contInitRegs.begin(), - iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) { - CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); - } +void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk, + MachineBasicBlock *LandMBB) { + DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber() + << " land = BB" << LandMBB->getNumber() << "\n";); /* we last inserterd the DebugLoc in the - * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk. + * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current + * dstBlk. * search for the DebugLoc in the that statement. * if not found, we have to insert the empty/default DebugLoc */ - InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk); - DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc(); - - CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak); - // Loop breakInitRegs are init before entering the loop. - for (typename std::set::const_iterator iter = - loopLand->breakInitRegs.begin(), - iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter) { - CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); - } - // Loop endbranchInitRegs are init before entering the loop. - for (typename std::set::const_iterator iter = - loopLand->endbranchInitRegs.begin(), - iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) { - CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); - } + MachineInstr *LoopBreakInstr = getLoopBreakInstr(DstBlk); + DebugLoc DLBreak = (LoopBreakInstr) ? LoopBreakInstr->getDebugLoc() : + DebugLoc(); + + insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DLBreak); - /* we last inserterd the DebugLoc in the continue statement in the current dstBlk + /* we last inserterd the DebugLoc in the continue statement in the current + * dstBlk. * search for the DebugLoc in the continue statement. * if not found, we have to insert the empty/default DebugLoc */ - InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk); - DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc(); - - CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue); - // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this - // loop. - for (typename std::set::const_iterator iter = - loopLand->breakOnRegs.begin(), - iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) { - CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::PREDICATED_BREAK, passRep, - *iter); - } - - // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this - // loop. - for (std::set::const_iterator iter = loopLand->contOnRegs.begin(), - iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) { - CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32, - passRep, *iter); - } - - dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end()); + MachineInstr *ContinueInstr = getContinueInstr(DstBlk); + DebugLoc DLContinue = (ContinueInstr) ? ContinueInstr->getDebugLoc() : + DebugLoc(); - for (typename BlockT::succ_iterator iter = landBlk->succ_begin(), - iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) { - dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of. - } - - removeSuccessor(landBlk); - retireBlock(dstBlk, landBlk); -} //mergeLooplandBlock - -template -void CFGStructurizer::reversePredicateSetter(typename BlockT::iterator I) { - while (I--) { - if (I->getOpcode() == AMDGPU::PRED_X) { - switch (static_cast(I)->getOperand(2).getImm()) { - case OPCODE_IS_ZERO_INT: - static_cast(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT); - return; - case OPCODE_IS_NOT_ZERO_INT: - static_cast(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT); - return; - case OPCODE_IS_ZERO: - static_cast(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO); - return; - case OPCODE_IS_NOT_ZERO: - static_cast(I)->getOperand(2).setImm(OPCODE_IS_ZERO); - return; - default: - llvm_unreachable("PRED_X Opcode invalid!"); - } - } - } + insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DLContinue); + DstBlk->addSuccessor(LandMBB); + DstBlk->removeSuccessor(DstBlk); } -template -void CFGStructurizer::mergeLoopbreakBlock(BlockT *exitingBlk, - BlockT *exitBlk, - BlockT *exitLandBlk, - RegiT setReg) { - DEBUG( - dbgs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber() - << " exit = BB" << exitBlk->getNumber() - << " land = BB" << exitLandBlk->getNumber() << "\n"; - ); - - InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk); - assert(branchInstr && CFGTraits::isCondBranch(branchInstr)); - - DebugLoc DL = branchInstr->getDebugLoc(); - - BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr); - - // transform exitingBlk to - // if ( ) { - // exitBlk (if exitBlk != exitLandBlk) - // setReg = 1 - // break - // }endif - // successor = {orgSuccessor(exitingBlk) - exitBlk} - - typename BlockT::iterator branchInstrPos = - CFGTraits::getInstrPos(exitingBlk, branchInstr); - - if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) { - //break_logical - - if (trueBranch != exitBlk) { - reversePredicateSetter(branchInstrPos); - } - CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL); - } else { - if (trueBranch != exitBlk) { - reversePredicateSetter(branchInstr); - } - CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL); - if (exitBlk != exitLandBlk) { - //splice is insert-before ... - exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(), - exitBlk->end()); - } - if (setReg != INVALIDREGNUM) { - CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1); - } - CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep); - } //if_logical +void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, + MachineBasicBlock *LandMBB) { + DEBUG(dbgs() << "loopbreakPattern exiting = BB" << ExitingMBB->getNumber() + << " land = BB" << LandMBB->getNumber() << "\n";); + MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB); + assert(BranchMI && isCondBranch(BranchMI)); + DebugLoc DL = BranchMI->getDebugLoc(); + MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI); + MachineBasicBlock::iterator I = BranchMI; + if (TrueBranch != LandMBB) + reversePredicateSetter(I); + insertCondBranchBefore(I, AMDGPU::PREDICATED_BREAK, DL); //now branchInst can be erase safely - branchInstr->eraseFromParent(); - + BranchMI->eraseFromParent(); //now take care of successors, retire blocks - exitingBlk->removeSuccessor(exitBlk); - if (exitBlk != exitLandBlk) { - //splice is insert-before ... - exitBlk->removeSuccessor(exitLandBlk); - retireBlock(exitingBlk, exitBlk); - } - -} //mergeLoopbreakBlock - -template -void CFGStructurizer::settleLoopcontBlock(BlockT *contingBlk, - BlockT *contBlk, - RegiT setReg) { - DEBUG( - dbgs() << "settleLoopcontBlock conting = BB" - << contingBlk->getNumber() - << ", cont = BB" << contBlk->getNumber() << "\n"; - ); - - InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk); - if (branchInstr) { - assert(CFGTraits::isCondBranch(branchInstr)); - typename BlockT::iterator branchInstrPos = - CFGTraits::getInstrPos(contingBlk, branchInstr); - BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr); - int oldOpcode = branchInstr->getOpcode(); - DebugLoc DL = branchInstr->getDebugLoc(); - - // transform contingBlk to - // if () { - // move instr after branchInstr - // continue - // or - // setReg = 1 - // break - // }endif - // successor = {orgSuccessor(contingBlk) - loopHeader} - - bool useContinueLogical = - (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr); - - if (useContinueLogical == false) { - int branchOpcode = - trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode) - : CFGTraits::getBranchZeroOpcode(oldOpcode); - - CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL); - - if (setReg != INVALIDREGNUM) { - CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1); - // insertEnd to ensure phi-moves, if exist, go before the continue-instr. - CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL); - } else { - // insertEnd to ensure phi-moves, if exist, go before the continue-instr. - CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL); - } + ExitingMBB->removeSuccessor(LandMBB); +} - CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL); +void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB, + MachineBasicBlock *ContMBB) { + DEBUG(dbgs() << "settleLoopcontBlock conting = BB" + << ContingMBB->getNumber() + << ", cont = BB" << ContMBB->getNumber() << "\n";); + + MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB); + if (MI) { + assert(isCondBranch(MI)); + MachineBasicBlock::iterator I = MI; + MachineBasicBlock *TrueBranch = getTrueBranch(MI); + int OldOpcode = MI->getOpcode(); + DebugLoc DL = MI->getDebugLoc(); + + bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI); + + if (UseContinueLogical == false) { + int BranchOpcode = + TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) : + getBranchZeroOpcode(OldOpcode); + insertCondBranchBefore(I, BranchOpcode, DL); + // insertEnd to ensure phi-moves, if exist, go before the continue-instr. + insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL); + insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL); } else { - int branchOpcode = - trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode) - : CFGTraits::getContinueZeroOpcode(oldOpcode); - - CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL); + int BranchOpcode = + TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) : + getContinueZeroOpcode(OldOpcode); + insertCondBranchBefore(I, BranchOpcode, DL); } - branchInstr->eraseFromParent(); + MI->eraseFromParent(); } else { // if we've arrived here then we've already erased the branch instruction - // travel back up the basic block to see the last reference of our debug location - // we've just inserted that reference here so it should be representative - if (setReg != INVALIDREGNUM) { - CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1); - // insertEnd to ensure phi-moves, if exist, go before the continue-instr. - CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk)); - } else { - // insertEnd to ensure phi-moves, if exist, go before the continue-instr. - CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk)); - } - } //else - -} //settleLoopcontBlock - -// BBs in exitBlkSet are determined as in break-path for loopRep, -// before we can put code for BBs as inside loop-body for loopRep -// check whether those BBs are determined as cont-BB for parentLoopRep -// earlier. -// If so, generate a new BB newBlk -// (1) set newBlk common successor of BBs in exitBlkSet -// (2) change the continue-instr in BBs in exitBlkSet to break-instr -// (3) generate continue-instr in newBlk -// -template -typename CFGStructurizer::BlockT * -CFGStructurizer::relocateLoopcontBlock(LoopT *parentLoopRep, - LoopT *loopRep, - std::set &exitBlkSet, - BlockT *exitLandBlk) { - std::set endBlkSet; - - - - for (typename std::set::const_iterator iter = exitBlkSet.begin(), - iterEnd = exitBlkSet.end(); - iter != iterEnd; ++iter) { - BlockT *exitBlk = *iter; - BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk); - - if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL) - return NULL; - - endBlkSet.insert(endBlk); - } - - BlockT *newBlk = funcRep->CreateMachineBasicBlock(); - funcRep->push_back(newBlk); //insert to function - CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep); - SHOWNEWBLK(newBlk, "New continue block: "); - - for (typename std::set::const_iterator iter = endBlkSet.begin(), - iterEnd = endBlkSet.end(); - iter != iterEnd; ++iter) { - BlockT *endBlk = *iter; - InstrT *contInstr = CFGTraits::getContinueInstr(endBlk); - if (contInstr) { - contInstr->eraseFromParent(); - } - endBlk->addSuccessor(newBlk); - DEBUG( - dbgs() << "Add new continue Block to BB" - << endBlk->getNumber() << " successors\n"; - ); - } - - return newBlk; -} //relocateLoopcontBlock - - -// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as -// LoopLandBlock. This BB branch on the loop endBranchInit register to the -// pathes corresponding to the loop exiting branches. - -template -typename CFGStructurizer::BlockT * -CFGStructurizer::addLoopEndbranchBlock(LoopT *loopRep, - BlockTSmallerVector &exitingBlks, - BlockTSmallerVector &exitBlks) { - const AMDGPUInstrInfo *tii = - static_cast(passRep->getTargetInstrInfo()); - const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); - - RegiT endBranchReg = static_cast - (funcRep->getRegInfo().createVirtualRegister(I32RC)); - assert(endBranchReg >= 0); - - // reg = 0 before entering the loop - addLoopEndbranchInitReg(loopRep, endBranchReg); - - uint32_t numBlks = static_cast(exitingBlks.size()); - assert(numBlks >=2 && numBlks == exitBlks.size()); - - BlockT *preExitingBlk = exitingBlks[0]; - BlockT *preExitBlk = exitBlks[0]; - BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock(); - funcRep->push_back(preBranchBlk); //insert to function - SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: "); - - BlockT *newLandBlk = preBranchBlk; - - CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk, - newLandBlk); - preExitingBlk->removeSuccessor(preExitBlk); - preExitingBlk->addSuccessor(newLandBlk); - - //it is redundant to add reg = 0 to exitingBlks[0] - - // For 1..n th exiting path (the last iteration handles two pathes) create the - // branch to the previous path and the current path. - for (uint32_t i = 1; i < numBlks; ++i) { - BlockT *curExitingBlk = exitingBlks[i]; - BlockT *curExitBlk = exitBlks[i]; - BlockT *curBranchBlk; - - if (i == numBlks - 1) { - curBranchBlk = curExitBlk; - } else { - curBranchBlk = funcRep->CreateMachineBasicBlock(); - funcRep->push_back(curBranchBlk); //insert to function - SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: "); - } - - // Add reg = i to exitingBlks[i]. - CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep, - endBranchReg, i); - - // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge - // (exitingBlks[i], newLandBlk). - CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk, - newLandBlk); - curExitingBlk->removeSuccessor(curExitBlk); - curExitingBlk->addSuccessor(newLandBlk); - - // add to preBranchBlk the branch instruction: - // if (endBranchReg == preVal) - // preExitBlk - // else - // curBranchBlk - // - // preValReg = i - 1 - - DebugLoc DL; - RegiT preValReg = static_cast - (funcRep->getRegInfo().createVirtualRegister(I32RC)); - - preBranchBlk->insert(preBranchBlk->begin(), - tii->getMovImmInstr(preBranchBlk->getParent(), preValReg, - i - 1)); - - // condResReg = (endBranchReg == preValReg) - RegiT condResReg = static_cast - (funcRep->getRegInfo().createVirtualRegister(I32RC)); - BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg) - .addReg(endBranchReg).addReg(preValReg); - - BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32)) - .addMBB(preExitBlk).addReg(condResReg); - - preBranchBlk->addSuccessor(preExitBlk); - preBranchBlk->addSuccessor(curBranchBlk); - - // Update preExitingBlk, preExitBlk, preBranchBlk. - preExitingBlk = curExitingBlk; - preExitBlk = curExitBlk; - preBranchBlk = curBranchBlk; - - } //end for 1 .. n blocks - - return newLandBlk; -} //addLoopEndbranchBlock - -template -typename CFGStructurizer::PathToKind -CFGStructurizer::singlePathTo(BlockT *srcBlk, BlockT *dstBlk, - bool allowSideEntry) { - assert(dstBlk); - - if (srcBlk == dstBlk) { - return SinglePath_InPath; - } - - while (srcBlk && srcBlk->succ_size() == 1) { - srcBlk = *srcBlk->succ_begin(); - if (srcBlk == dstBlk) { - return SinglePath_InPath; - } - - if (!allowSideEntry && srcBlk->pred_size() > 1) { - return Not_SinglePath; - } - } - - if (srcBlk && srcBlk->succ_size()==0) { - return SinglePath_NotInPath; - } - - return Not_SinglePath; -} //singlePathTo - -// If there is a single path from srcBlk to dstBlk, return the last block before -// dstBlk If there is a single path from srcBlk->end without dstBlk, return the -// last block in the path Otherwise, return NULL -template -typename CFGStructurizer::BlockT * -CFGStructurizer::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk, - bool allowSideEntry) { - assert(dstBlk); - - if (srcBlk == dstBlk) { - return srcBlk; - } - - if (srcBlk->succ_size() == 0) { - return srcBlk; - } - - while (srcBlk && srcBlk->succ_size() == 1) { - BlockT *preBlk = srcBlk; - - srcBlk = *srcBlk->succ_begin(); - if (srcBlk == NULL) { - return preBlk; - } - - if (!allowSideEntry && srcBlk->pred_size() > 1) { - return NULL; - } - } - - if (srcBlk && srcBlk->succ_size()==0) { - return srcBlk; + // travel back up the basic block to see the last reference of our debug + // location we've just inserted that reference here so it should be + // representative insertEnd to ensure phi-moves, if exist, go before the + // continue-instr. + insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, + getLastDebugLocInBB(ContingMBB)); } +} - return NULL; - -} //singlePathEnd - -template -int CFGStructurizer::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk, - BlockT *dstBlk) { - int cloned = 0; - assert(preBlk->isSuccessor(srcBlk)); - while (srcBlk && srcBlk != dstBlk) { - assert(srcBlk->succ_size() == 1); - if (srcBlk->pred_size() > 1) { - srcBlk = cloneBlockForPredecessor(srcBlk, preBlk); - ++cloned; +int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB, + MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) { + int Cloned = 0; + assert(PreMBB->isSuccessor(SrcMBB)); + while (SrcMBB && SrcMBB != DstMBB) { + assert(SrcMBB->succ_size() == 1); + if (SrcMBB->pred_size() > 1) { + SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB); + ++Cloned; } - preBlk = srcBlk; - srcBlk = *srcBlk->succ_begin(); + PreMBB = SrcMBB; + SrcMBB = *SrcMBB->succ_begin(); } - return cloned; -} //cloneOnSideEntryTo + return Cloned; +} -template -typename CFGStructurizer::BlockT * -CFGStructurizer::cloneBlockForPredecessor(BlockT *curBlk, - BlockT *predBlk) { - assert(predBlk->isSuccessor(curBlk) && +MachineBasicBlock * +AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB, + MachineBasicBlock *PredMBB) { + assert(PredMBB->isSuccessor(MBB) && "succBlk is not a prececessor of curBlk"); - BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions - CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk); + MachineBasicBlock *CloneMBB = clone(MBB); //clone instructions + replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB); //srcBlk, oldBlk, newBlk - predBlk->removeSuccessor(curBlk); - predBlk->addSuccessor(cloneBlk); + PredMBB->removeSuccessor(MBB); + PredMBB->addSuccessor(CloneMBB); // add all successor to cloneBlk - CFGTraits::cloneSuccessorList(cloneBlk, curBlk); + cloneSuccessorList(CloneMBB, MBB); - numClonedInstr += curBlk->size(); + numClonedInstr += MBB->size(); DEBUG( dbgs() << "Cloned block: " << "BB" - << curBlk->getNumber() << "size " << curBlk->size() << "\n"; + << MBB->getNumber() << "size " << MBB->size() << "\n"; ); - SHOWNEWBLK(cloneBlk, "result of Cloned block: "); - - return cloneBlk; -} //cloneBlockForPredecessor - -template -typename CFGStructurizer::BlockT * -CFGStructurizer::exitingBlock2ExitBlock(LoopT *loopRep, - BlockT *exitingBlk) { - BlockT *exitBlk = NULL; - - for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(), - iterSuccEnd = exitingBlk->succ_end(); - iterSucc != iterSuccEnd; ++iterSucc) { - BlockT *curBlk = *iterSucc; - if (!loopRep->contains(curBlk)) { - assert(exitBlk == NULL); - exitBlk = curBlk; - } - } + SHOWNEWBLK(CloneMBB, "result of Cloned block: "); - assert(exitBlk != NULL); - - return exitBlk; -} //exitingBlock2ExitBlock + return CloneMBB; +} -template -void CFGStructurizer::migrateInstruction(BlockT *srcBlk, - BlockT *dstBlk, - InstrIterator insertPos) { - InstrIterator spliceEnd; +void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB, + MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) { + MachineBasicBlock::iterator SpliceEnd; //look for the input branchinstr, not the AMDGPU branchinstr - InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk); - if (branchInstr == NULL) { + MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB); + if (!BranchMI) { DEBUG( dbgs() << "migrateInstruction don't see branch instr\n" ; ); - spliceEnd = srcBlk->end(); + SpliceEnd = SrcMBB->end(); } else { DEBUG( dbgs() << "migrateInstruction see branch instr\n" ; - branchInstr->dump(); + BranchMI->dump(); ); - spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr); + SpliceEnd = BranchMI; } DEBUG( - dbgs() << "migrateInstruction before splice dstSize = " << dstBlk->size() - << "srcSize = " << srcBlk->size() << "\n"; + dbgs() << "migrateInstruction before splice dstSize = " << DstMBB->size() + << "srcSize = " << SrcMBB->size() << "\n"; ); //splice insert before insertPos - dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd); + DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd); DEBUG( - dbgs() << "migrateInstruction after splice dstSize = " << dstBlk->size() - << "srcSize = " << srcBlk->size() << "\n"; + dbgs() << "migrateInstruction after splice dstSize = " << DstMBB->size() + << "srcSize = " << SrcMBB->size() << "\n"; ); -} //migrateInstruction +} -// normalizeInfiniteLoopExit change -// B1: -// uncond_br LoopHeader -// -// to -// B1: -// cond_br 1 LoopHeader dummyExit -// and return the newly added dummy exit block -// -template -typename CFGStructurizer::BlockT * -CFGStructurizer::normalizeInfiniteLoopExit(LoopT* LoopRep) { - BlockT *loopHeader; - BlockT *loopLatch; - loopHeader = LoopRep->getHeader(); - loopLatch = LoopRep->getLoopLatch(); - BlockT *dummyExitBlk = NULL; +MachineBasicBlock * +AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) { + MachineBasicBlock *LoopHeader = LoopRep->getHeader(); + MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch(); const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); - if (loopHeader!=NULL && loopLatch!=NULL) { - InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch); - if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) { - dummyExitBlk = funcRep->CreateMachineBasicBlock(); - funcRep->push_back(dummyExitBlk); //insert to function - SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: "); - - DEBUG(dbgs() << "Old branch instr: " << *branchInstr << "\n";); - - typename BlockT::iterator insertPos = - CFGTraits::getInstrPos(loopLatch, branchInstr); - unsigned immReg = - funcRep->getRegInfo().createVirtualRegister(I32RC); - CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1); - InstrT *newInstr = - CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep); - MachineInstrBuilder MIB(*funcRep, newInstr); - MIB.addMBB(loopHeader); - MIB.addReg(immReg, false); - - SHOWNEWINSTR(newInstr); - - branchInstr->eraseFromParent(); - loopLatch->addSuccessor(dummyExitBlk); - } - } - return dummyExitBlk; -} //normalizeInfiniteLoopExit + if (!LoopHeader || !LoopLatch) + return NULL; + MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch); + // Is LoopRep an infinite loop ? + if (!BranchMI || !isUncondBranch(BranchMI)) + return NULL; + + MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); + FuncRep->push_back(DummyExitBlk); //insert to function + SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: "); + DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";); + MachineBasicBlock::iterator I = BranchMI; + unsigned ImmReg = FuncRep->getRegInfo().createVirtualRegister(I32RC); + llvm_unreachable("Extra register needed to handle CFG"); + MachineInstr *NewMI = insertInstrBefore(I, AMDGPU::BRANCH_COND_i32); + MachineInstrBuilder MIB(*FuncRep, NewMI); + MIB.addMBB(LoopHeader); + MIB.addReg(ImmReg, false); + SHOWNEWINSTR(NewMI); + BranchMI->eraseFromParent(); + LoopLatch->addSuccessor(DummyExitBlk); + + return DummyExitBlk; +} -template -void CFGStructurizer::removeUnconditionalBranch(BlockT *srcBlk) { - InstrT *branchInstr; +void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) { + MachineInstr *BranchMI; // I saw two unconditional branch in one basic block in example // test_fc_do_while_or.c need to fix the upstream on this to remove the loop. - while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk)) - && CFGTraits::isUncondBranch(branchInstr)) { - DEBUG( - dbgs() << "Removing unconditional branch instruction" ; - branchInstr->dump(); - ); - branchInstr->eraseFromParent(); + while ((BranchMI = getLoopendBlockBranchInstr(MBB)) + && isUncondBranch(BranchMI)) { + DEBUG(dbgs() << "Removing uncond branch instr"; BranchMI->dump();); + BranchMI->eraseFromParent(); } -} //removeUnconditionalBranch +} -template -void CFGStructurizer::removeRedundantConditionalBranch(BlockT *srcBlk) { - if (srcBlk->succ_size() == 2) { - BlockT *blk1 = *srcBlk->succ_begin(); - BlockT *blk2 = *(++srcBlk->succ_begin()); +void AMDGPUCFGStructurizer::removeRedundantConditionalBranch( + MachineBasicBlock *MBB) { + if (MBB->succ_size() != 2) + return; + MachineBasicBlock *MBB1 = *MBB->succ_begin(); + MachineBasicBlock *MBB2 = *(++MBB->succ_begin()); + if (MBB1 != MBB2) + return; + + MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); + assert(BranchMI && isCondBranch(BranchMI)); + DEBUG(dbgs() << "Removing unneeded cond branch instr"; BranchMI->dump();); + BranchMI->eraseFromParent(); + SHOWNEWBLK(MBB1, "Removing redundant successor"); + MBB->removeSuccessor(MBB1); +} - if (blk1 == blk2) { - InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk); - assert(branchInstr && CFGTraits::isCondBranch(branchInstr)); - DEBUG( - dbgs() << "Removing unneeded conditional branch instruction" ; - branchInstr->dump(); - ); - branchInstr->eraseFromParent(); - SHOWNEWBLK(blk1, "Removing redundant successor"); - srcBlk->removeSuccessor(blk1); - } - } -} //removeRedundantConditionalBranch - -template -void CFGStructurizer::addDummyExitBlock(SmallVectorImpl - &retBlks) { - BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock(); - funcRep->push_back(dummyExitBlk); //insert to function - CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep); - - for (typename SmallVectorImpl::iterator iter = - retBlks.begin(), - iterEnd = retBlks.end(); iter != iterEnd; ++iter) { - BlockT *curBlk = *iter; - InstrT *curInstr = CFGTraits::getReturnInstr(curBlk); - if (curInstr) { - curInstr->eraseFromParent(); - } - curBlk->addSuccessor(dummyExitBlk); +void AMDGPUCFGStructurizer::addDummyExitBlock( + SmallVectorImpl &RetMBB) { + MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); + FuncRep->push_back(DummyExitBlk); //insert to function + insertInstrEnd(DummyExitBlk, AMDGPU::RETURN); + + for (SmallVectorImpl::iterator It = RetMBB.begin(), + E = RetMBB.end(); It != E; ++It) { + MachineBasicBlock *MBB = *It; + MachineInstr *MI = getReturnInstr(MBB); + if (MI) + MI->eraseFromParent(); + MBB->addSuccessor(DummyExitBlk); DEBUG( - dbgs() << "Add dummyExitBlock to BB" << curBlk->getNumber() + dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber() << " successors\n"; ); - } //for - - SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: "); -} //addDummyExitBlock - -template -void CFGStructurizer::removeSuccessor(BlockT *srcBlk) { - while (srcBlk->succ_size()) { - srcBlk->removeSuccessor(*srcBlk->succ_begin()); } + SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: "); } -template -void CFGStructurizer::recordSccnum(BlockT *srcBlk, int sccNum) { - BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk]; +void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) { + while (MBB->succ_size()) + MBB->removeSuccessor(*MBB->succ_begin()); +} - if (srcBlkInfo == NULL) { - srcBlkInfo = new BlockInfo(); - } - - srcBlkInfo->sccNum = sccNum; +void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB, + int SccNum) { + BlockInformation *&srcBlkInfo = BlockInfoMap[MBB]; + if (!srcBlkInfo) + srcBlkInfo = new BlockInformation(); + srcBlkInfo->SccNum = SccNum; } -template -int CFGStructurizer::getSCCNum(BlockT *srcBlk) { - BlockInfo *srcBlkInfo = blockInfoMap[srcBlk]; - return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM; -} - -template -void CFGStructurizer::retireBlock(BlockT *dstBlk, BlockT *srcBlk) { +void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) { DEBUG( - dbgs() << "Retiring BB" << srcBlk->getNumber() << "\n"; + dbgs() << "Retiring BB" << MBB->getNumber() << "\n"; ); - BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk]; + BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB]; - if (srcBlkInfo == NULL) { - srcBlkInfo = new BlockInfo(); - } + if (!SrcBlkInfo) + SrcBlkInfo = new BlockInformation(); - srcBlkInfo->isRetired = true; - assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0 + SrcBlkInfo->IsRetired = true; + assert(MBB->succ_size() == 0 && MBB->pred_size() == 0 && "can't retire block yet"); } -template -bool CFGStructurizer::isRetiredBlock(BlockT *srcBlk) { - BlockInfo *srcBlkInfo = blockInfoMap[srcBlk]; - return (srcBlkInfo && srcBlkInfo->isRetired); -} - -template -bool CFGStructurizer::isActiveLoophead(BlockT *curBlk) { - LoopT *loopRep = loopInfo->getLoopFor(curBlk); - while (loopRep && loopRep->getHeader() == curBlk) { - LoopLandInfo *loopLand = getLoopLandInfo(loopRep); - - if(loopLand == NULL) - return true; - - BlockT *landBlk = loopLand->landBlk; - assert(landBlk); - if (!isRetiredBlock(landBlk)) { - return true; - } - - loopRep = loopRep->getParentLoop(); +void AMDGPUCFGStructurizer::setLoopLandBlock(MachineLoop *loopRep, + MachineBasicBlock *MBB) { + MachineBasicBlock *&TheEntry = LLInfoMap[loopRep]; + if (!MBB) { + MBB = FuncRep->CreateMachineBasicBlock(); + FuncRep->push_back(MBB); //insert to function + SHOWNEWBLK(MBB, "DummyLandingBlock for loop without break: "); } - - return false; -} //isActiveLoophead - -template -bool CFGStructurizer::needMigrateBlock(BlockT *blk) { - const unsigned blockSizeThreshold = 30; - const unsigned cloneInstrThreshold = 100; - - bool multiplePreds = blk && (blk->pred_size() > 1); - - if(!multiplePreds) - return false; - - unsigned blkSize = blk->size(); - return ((blkSize > blockSizeThreshold) - && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold)); -} //needMigrateBlock - -template -typename CFGStructurizer::BlockT * -CFGStructurizer::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk, - BlockTSmallerVector &exitBlks, - std::set &exitBlkSet) { - SmallVector inpathBlks; //in exit path blocks - - for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(), - predIterEnd = landBlk->pred_end(); - predIter != predIterEnd; ++predIter) { - BlockT *curBlk = *predIter; - if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) { - inpathBlks.push_back(curBlk); - } - } //for - - //if landBlk has predecessors that are not in the given loop, - //create a new block - BlockT *newLandBlk = landBlk; - if (inpathBlks.size() != landBlk->pred_size()) { - newLandBlk = funcRep->CreateMachineBasicBlock(); - funcRep->push_back(newLandBlk); //insert to function - newLandBlk->addSuccessor(landBlk); - for (typename SmallVectorImpl::iterator iter = - inpathBlks.begin(), - iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) { - BlockT *curBlk = *iter; - CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk); - //srcBlk, oldBlk, newBlk - curBlk->removeSuccessor(landBlk); - curBlk->addSuccessor(newLandBlk); - } - for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) { - if (exitBlks[i] == landBlk) { - exitBlks[i] = newLandBlk; - } - } - SHOWNEWBLK(newLandBlk, "NewLandingBlock: "); - } - - setLoopLandBlock(loopRep, newLandBlk); - - return newLandBlk; -} // recordLoopbreakLand - -template -void CFGStructurizer::setLoopLandBlock(LoopT *loopRep, BlockT *blk) { - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; - - if (theEntry == NULL) { - theEntry = new LoopLandInfo(); - } - assert(theEntry->landBlk == NULL); - - if (blk == NULL) { - blk = funcRep->CreateMachineBasicBlock(); - funcRep->push_back(blk); //insert to function - SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: "); - } - - theEntry->landBlk = blk; - + TheEntry = MBB; DEBUG( dbgs() << "setLoopLandBlock loop-header = BB" << loopRep->getHeader()->getNumber() - << " landing-block = BB" << blk->getNumber() << "\n"; - ); -} // setLoopLandBlock - -template -void CFGStructurizer::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) { - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; - - if (theEntry == NULL) { - theEntry = new LoopLandInfo(); - } - - theEntry->breakOnRegs.insert(regNum); - - DEBUG( - dbgs() << "addLoopBreakOnReg loop-header = BB" - << loopRep->getHeader()->getNumber() - << " regNum = " << regNum << "\n"; - ); -} // addLoopBreakOnReg - -template -void CFGStructurizer::addLoopContOnReg(LoopT *loopRep, RegiT regNum) { - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; - - if (theEntry == NULL) { - theEntry = new LoopLandInfo(); - } - theEntry->contOnRegs.insert(regNum); - - DEBUG( - dbgs() << "addLoopContOnReg loop-header = BB" - << loopRep->getHeader()->getNumber() - << " regNum = " << regNum << "\n"; - ); -} // addLoopContOnReg - -template -void CFGStructurizer::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) { - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; - - if (theEntry == NULL) { - theEntry = new LoopLandInfo(); - } - theEntry->breakInitRegs.insert(regNum); - - DEBUG( - dbgs() << "addLoopBreakInitReg loop-header = BB" - << loopRep->getHeader()->getNumber() - << " regNum = " << regNum << "\n"; - ); -} // addLoopBreakInitReg - -template -void CFGStructurizer::addLoopContInitReg(LoopT *loopRep, RegiT regNum) { - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; - - if (theEntry == NULL) { - theEntry = new LoopLandInfo(); - } - theEntry->contInitRegs.insert(regNum); - - DEBUG( - dbgs() << "addLoopContInitReg loop-header = BB" - << loopRep->getHeader()->getNumber() - << " regNum = " << regNum << "\n"; - ); -} // addLoopContInitReg - -template -void CFGStructurizer::addLoopEndbranchInitReg(LoopT *loopRep, - RegiT regNum) { - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; - - if (theEntry == NULL) { - theEntry = new LoopLandInfo(); - } - theEntry->endbranchInitRegs.insert(regNum); - - DEBUG( - dbgs() << "addLoopEndbranchInitReg loop-header = BB" - << loopRep->getHeader()->getNumber() - << " regNum = " << regNum << "\n"; + << " landing-block = BB" << MBB->getNumber() << "\n"; ); -} // addLoopEndbranchInitReg - -template -typename CFGStructurizer::LoopLandInfo * -CFGStructurizer::getLoopLandInfo(LoopT *loopRep) { - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; - - return theEntry; -} // getLoopLandInfo - -template -typename CFGStructurizer::BlockT * -CFGStructurizer::getLoopLandBlock(LoopT *loopRep) { - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; - - return theEntry ? theEntry->landBlk : NULL; -} // getLoopLandBlock - - -template -bool CFGStructurizer::hasBackEdge(BlockT *curBlk) { - LoopT *loopRep = loopInfo->getLoopFor(curBlk); - if (loopRep == NULL) - return false; - - BlockT *loopHeader = loopRep->getHeader(); - - return curBlk->isSuccessor(loopHeader); - -} //hasBackEdge - -template -unsigned CFGStructurizer::getLoopDepth(LoopT *loopRep) { - return loopRep ? loopRep->getLoopDepth() : 0; -} //getLoopDepth - -template -int CFGStructurizer::countActiveBlock -(typename SmallVectorImpl::const_iterator iterStart, - typename SmallVectorImpl::const_iterator iterEnd) { - int count = 0; - while (iterStart != iterEnd) { - if (!isRetiredBlock(*iterStart)) { - ++count; - } - ++iterStart; - } - - return count; -} //countActiveBlock +} -// This is work around solution for findNearestCommonDominator not avaiable to -// post dom a proper fix should go to Dominators.h. +MachineBasicBlock * +AMDGPUCFGStructurizer::findNearestCommonPostDom(MachineBasicBlock *MBB1, + MachineBasicBlock *MBB2) { -template -typename CFGStructurizer::BlockT* -CFGStructurizer::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) { + if (PDT->dominates(MBB1, MBB2)) + return MBB1; + if (PDT->dominates(MBB2, MBB1)) + return MBB2; - if (postDomTree->dominates(blk1, blk2)) { - return blk1; - } - if (postDomTree->dominates(blk2, blk1)) { - return blk2; - } - - DomTreeNodeT *node1 = postDomTree->getNode(blk1); - DomTreeNodeT *node2 = postDomTree->getNode(blk2); + MachineDomTreeNode *Node1 = PDT->getNode(MBB1); + MachineDomTreeNode *Node2 = PDT->getNode(MBB2); // Handle newly cloned node. - if (node1 == NULL && blk1->succ_size() == 1) { - return findNearestCommonPostDom(*blk1->succ_begin(), blk2); - } - if (node2 == NULL && blk2->succ_size() == 1) { - return findNearestCommonPostDom(blk1, *blk2->succ_begin()); - } + if (!Node1 && MBB1->succ_size() == 1) + return findNearestCommonPostDom(*MBB1->succ_begin(), MBB2); + if (!Node2 && MBB2->succ_size() == 1) + return findNearestCommonPostDom(MBB1, *MBB2->succ_begin()); - if (node1 == NULL || node2 == NULL) { + if (!Node1 || !Node2) return NULL; - } - node1 = node1->getIDom(); - while (node1) { - if (postDomTree->dominates(node1, node2)) { - return node1->getBlock(); - } - node1 = node1->getIDom(); + Node1 = Node1->getIDom(); + while (Node1) { + if (PDT->dominates(Node1, Node2)) + return Node1->getBlock(); + Node1 = Node1->getIDom(); } return NULL; } -template -typename CFGStructurizer::BlockT * -CFGStructurizer::findNearestCommonPostDom -(typename std::set &blks) { - BlockT *commonDom; - typename std::set::const_iterator iter = blks.begin(); - typename std::set::const_iterator iterEnd = blks.end(); - for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) { - BlockT *curBlk = *iter; - if (curBlk != commonDom) { - commonDom = findNearestCommonPostDom(curBlk, commonDom); - } +MachineBasicBlock * +AMDGPUCFGStructurizer::findNearestCommonPostDom( + std::set &MBBs) { + MachineBasicBlock *CommonDom; + std::set::const_iterator It = MBBs.begin(); + std::set::const_iterator E = MBBs.end(); + for (CommonDom = *It; It != E && CommonDom; ++It) { + MachineBasicBlock *MBB = *It; + if (MBB != CommonDom) + CommonDom = findNearestCommonPostDom(MBB, CommonDom); } DEBUG( dbgs() << "Common post dominator for exit blocks is "; - if (commonDom) { - dbgs() << "BB" << commonDom->getNumber() << "\n"; - } else { + if (CommonDom) + dbgs() << "BB" << CommonDom->getNumber() << "\n"; + else dbgs() << "NULL\n"; - } ); - return commonDom; -} //findNearestCommonPostDom - -} // end anonymous namespace - -//todo: move-end - - -//===----------------------------------------------------------------------===// -// -// CFGStructurizer for AMDGPU -// -//===----------------------------------------------------------------------===// - - -namespace { -class AMDGPUCFGStructurizer : public MachineFunctionPass { -public: - typedef MachineInstr InstructionType; - typedef MachineFunction FunctionType; - typedef MachineBasicBlock BlockType; - typedef MachineLoopInfo LoopinfoType; - typedef MachineDominatorTree DominatortreeType; - typedef MachinePostDominatorTree PostDominatortreeType; - typedef MachineDomTreeNode DomTreeNodeType; - typedef MachineLoop LoopType; - -protected: - TargetMachine &TM; - -public: - AMDGPUCFGStructurizer(char &pid, TargetMachine &tm); - const TargetInstrInfo *getTargetInstrInfo() const; - const AMDGPURegisterInfo *getTargetRegisterInfo() const; -}; - -} // end anonymous namespace -AMDGPUCFGStructurizer::AMDGPUCFGStructurizer(char &pid, TargetMachine &tm) - : MachineFunctionPass(pid), TM(tm) { -} - -const TargetInstrInfo *AMDGPUCFGStructurizer::getTargetInstrInfo() const { - return TM.getInstrInfo(); -} - -const AMDGPURegisterInfo *AMDGPUCFGStructurizer::getTargetRegisterInfo() const { - return static_cast(TM.getRegisterInfo()); -} - -//===----------------------------------------------------------------------===// -// -// CFGPrepare -// -//===----------------------------------------------------------------------===// - - -namespace { -class AMDGPUCFGPrepare : public AMDGPUCFGStructurizer { -public: - static char ID; - -public: - AMDGPUCFGPrepare(TargetMachine &tm); - - virtual const char *getPassName() const; - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - - bool runOnMachineFunction(MachineFunction &F); -}; - -char AMDGPUCFGPrepare::ID = 0; -} // end anonymous namespace - -AMDGPUCFGPrepare::AMDGPUCFGPrepare(TargetMachine &tm) - : AMDGPUCFGStructurizer(ID, tm ) { -} -const char *AMDGPUCFGPrepare::getPassName() const { - return "AMD IL Control Flow Graph Preparation Pass"; -} - -void AMDGPUCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addPreserved(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); + return CommonDom; } -//===----------------------------------------------------------------------===// -// -// CFGPerform -// -//===----------------------------------------------------------------------===// - - -namespace { -class AMDGPUCFGPerform : public AMDGPUCFGStructurizer { -public: - static char ID; - -public: - AMDGPUCFGPerform(TargetMachine &tm); - virtual const char *getPassName() const; - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - bool runOnMachineFunction(MachineFunction &F); -}; - -char AMDGPUCFGPerform::ID = 0; -} // end anonymous namespace - - AMDGPUCFGPerform::AMDGPUCFGPerform(TargetMachine &tm) -: AMDGPUCFGStructurizer(ID, tm) { -} - -const char *AMDGPUCFGPerform::getPassName() const { - return "AMD IL Control Flow Graph structurizer Pass"; -} - -void AMDGPUCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addPreserved(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); -} - -//===----------------------------------------------------------------------===// -// -// CFGStructTraits -// -//===----------------------------------------------------------------------===// - -namespace { -// this class is tailor to the AMDGPU backend -template<> -struct CFGStructTraits { - typedef int RegiT; - - static int getBranchNzeroOpcode(int oldOpcode) { - switch(oldOpcode) { - case AMDGPU::JUMP_COND: - case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; - case AMDGPU::BRANCH_COND_i32: - case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32; - default: - llvm_unreachable("internal error"); - } - return -1; - } - - static int getBranchZeroOpcode(int oldOpcode) { - switch(oldOpcode) { - case AMDGPU::JUMP_COND: - case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; - case AMDGPU::BRANCH_COND_i32: - case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32; - default: - llvm_unreachable("internal error"); - } - return -1; - } - - static int getContinueNzeroOpcode(int oldOpcode) { - switch(oldOpcode) { - case AMDGPU::JUMP_COND: - case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32; - default: - llvm_unreachable("internal error"); - }; - return -1; - } - - static int getContinueZeroOpcode(int oldOpcode) { - switch(oldOpcode) { - case AMDGPU::JUMP_COND: - case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32; - default: - llvm_unreachable("internal error"); - } - return -1; - } - - static MachineBasicBlock *getTrueBranch(MachineInstr *instr) { - return instr->getOperand(0).getMBB(); - } - - static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) { - instr->getOperand(0).setMBB(blk); - } - - static MachineBasicBlock * - getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) { - assert(blk->succ_size() == 2); - MachineBasicBlock *trueBranch = getTrueBranch(instr); - MachineBasicBlock::succ_iterator iter = blk->succ_begin(); - MachineBasicBlock::succ_iterator iterNext = iter; - ++iterNext; - - return (*iter == trueBranch) ? *iterNext : *iter; - } - - static bool isCondBranch(MachineInstr *instr) { - switch (instr->getOpcode()) { - case AMDGPU::JUMP_COND: - case AMDGPU::BRANCH_COND_i32: - case AMDGPU::BRANCH_COND_f32: - break; - default: - return false; - } - return true; - } - - static bool isUncondBranch(MachineInstr *instr) { - switch (instr->getOpcode()) { - case AMDGPU::JUMP: - case AMDGPU::BRANCH: - return true; - default: - return false; - } - return true; - } - - static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) { - //get DebugLoc from the first MachineBasicBlock instruction with debug info - DebugLoc DL; - for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) { - MachineInstr *instr = &(*iter); - if (instr->getDebugLoc().isUnknown() == false) { - DL = instr->getDebugLoc(); - } - } - return DL; - } - - static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) { - MachineBasicBlock::reverse_iterator iter = blk->rbegin(); - MachineInstr *instr = &*iter; - if (instr && (isCondBranch(instr) || isUncondBranch(instr))) { - return instr; - } - return NULL; - } - - // The correct naming for this is getPossibleLoopendBlockBranchInstr. - // - // BB with backward-edge could have move instructions after the branch - // instruction. Such move instruction "belong to" the loop backward-edge. - // - static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) { - const AMDGPUInstrInfo * TII = static_cast( - blk->getParent()->getTarget().getInstrInfo()); - - for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(), - iterEnd = blk->rend(); iter != iterEnd; ++iter) { - // FIXME: Simplify - MachineInstr *instr = &*iter; - if (instr) { - if (isCondBranch(instr) || isUncondBranch(instr)) { - return instr; - } else if (!TII->isMov(instr->getOpcode())) { - break; - } - } - } - return NULL; - } - - static MachineInstr *getReturnInstr(MachineBasicBlock *blk) { - MachineBasicBlock::reverse_iterator iter = blk->rbegin(); - if (iter != blk->rend()) { - MachineInstr *instr = &(*iter); - if (instr->getOpcode() == AMDGPU::RETURN) { - return instr; - } - } - return NULL; - } - - static MachineInstr *getContinueInstr(MachineBasicBlock *blk) { - MachineBasicBlock::reverse_iterator iter = blk->rbegin(); - if (iter != blk->rend()) { - MachineInstr *instr = &(*iter); - if (instr->getOpcode() == AMDGPU::CONTINUE) { - return instr; - } - } - return NULL; - } - - static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) { - for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) { - MachineInstr *instr = &(*iter); - if (instr->getOpcode() == AMDGPU::PREDICATED_BREAK) { - return instr; - } - } - return NULL; - } - - static bool isReturnBlock(MachineBasicBlock *blk) { - MachineInstr *instr = getReturnInstr(blk); - bool isReturn = (blk->succ_size() == 0); - if (instr) { - assert(isReturn); - } else if (isReturn) { - DEBUG( - dbgs() << "BB" << blk->getNumber() - <<" is return block without RETURN instr\n"; - ); - } - - return isReturn; - } +char AMDGPUCFGStructurizer::ID = 0; - static MachineBasicBlock::iterator - getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) { - assert(instr->getParent() == blk && "instruction doesn't belong to block"); - MachineBasicBlock::iterator iter = blk->begin(); - MachineBasicBlock::iterator iterEnd = blk->end(); - while (&(*iter) != instr && iter != iterEnd) { - ++iter; - } - - assert(iter != iterEnd); - return iter; - }//getInstrPos - - static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode, - AMDGPUCFGStructurizer *passRep) { - return insertInstrBefore(blk,newOpcode,passRep,DebugLoc()); - } //insertInstrBefore - - static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode, - AMDGPUCFGStructurizer *passRep, DebugLoc DL) { - const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); - MachineInstr *newInstr = - blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL); - - MachineBasicBlock::iterator res; - if (blk->begin() != blk->end()) { - blk->insert(blk->begin(), newInstr); - } else { - blk->push_back(newInstr); - } - - SHOWNEWINSTR(newInstr); - - return newInstr; - } //insertInstrBefore - - static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode, - AMDGPUCFGStructurizer *passRep) { - insertInstrEnd(blk,newOpcode,passRep,DebugLoc()); - } //insertInstrEnd - - static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode, - AMDGPUCFGStructurizer *passRep, DebugLoc DL) { - const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); - MachineInstr *newInstr = blk->getParent() - ->CreateMachineInstr(tii->get(newOpcode), DL); - - blk->push_back(newInstr); - //assume the instruction doesn't take any reg operand ... - - SHOWNEWINSTR(newInstr); - } //insertInstrEnd - - static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos, - int newOpcode, - AMDGPUCFGStructurizer *passRep) { - MachineInstr *oldInstr = &(*instrPos); - const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); - MachineBasicBlock *blk = oldInstr->getParent(); - MachineInstr *newInstr = - blk->getParent()->CreateMachineInstr(tii->get(newOpcode), - DebugLoc()); - - blk->insert(instrPos, newInstr); - //assume the instruction doesn't take any reg operand ... - - SHOWNEWINSTR(newInstr); - return newInstr; - } //insertInstrBefore - - static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos, - int newOpcode, - AMDGPUCFGStructurizer *passRep, - DebugLoc DL) { - MachineInstr *oldInstr = &(*instrPos); - const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); - MachineBasicBlock *blk = oldInstr->getParent(); - MachineFunction *MF = blk->getParent(); - MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL); - - blk->insert(instrPos, newInstr); - MachineInstrBuilder MIB(*MF, newInstr); - MIB.addReg(oldInstr->getOperand(1).getReg(), false); - - SHOWNEWINSTR(newInstr); - //erase later oldInstr->eraseFromParent(); - } //insertCondBranchBefore - - static void insertCondBranchBefore(MachineBasicBlock *blk, - MachineBasicBlock::iterator insertPos, - int newOpcode, - AMDGPUCFGStructurizer *passRep, - RegiT regNum, - DebugLoc DL) { - const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); - MachineFunction *MF = blk->getParent(); - - MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL); - - //insert before - blk->insert(insertPos, newInstr); - MachineInstrBuilder(*MF, newInstr).addReg(regNum, false); - - SHOWNEWINSTR(newInstr); - } //insertCondBranchBefore - - static void insertCondBranchEnd(MachineBasicBlock *blk, - int newOpcode, - AMDGPUCFGStructurizer *passRep, - RegiT regNum) { - const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); - MachineFunction *MF = blk->getParent(); - MachineInstr *newInstr = - MF->CreateMachineInstr(tii->get(newOpcode), DebugLoc()); - - blk->push_back(newInstr); - MachineInstrBuilder(*MF, newInstr).addReg(regNum, false); - - SHOWNEWINSTR(newInstr); - } //insertCondBranchEnd - - - static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos, - AMDGPUCFGStructurizer *passRep, - RegiT regNum, int regVal) { - MachineInstr *oldInstr = &(*instrPos); - const AMDGPUInstrInfo *tii = - static_cast(passRep->getTargetInstrInfo()); - MachineBasicBlock *blk = oldInstr->getParent(); - MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum, - regVal); - blk->insert(instrPos, newInstr); - - SHOWNEWINSTR(newInstr); - } //insertAssignInstrBefore - - static void insertAssignInstrBefore(MachineBasicBlock *blk, - AMDGPUCFGStructurizer *passRep, - RegiT regNum, int regVal) { - const AMDGPUInstrInfo *tii = - static_cast(passRep->getTargetInstrInfo()); - - MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum, - regVal); - if (blk->begin() != blk->end()) { - blk->insert(blk->begin(), newInstr); - } else { - blk->push_back(newInstr); - } - - SHOWNEWINSTR(newInstr); - - } //insertInstrBefore - - static void insertCompareInstrBefore(MachineBasicBlock *blk, - MachineBasicBlock::iterator instrPos, - AMDGPUCFGStructurizer *passRep, - RegiT dstReg, RegiT src1Reg, - RegiT src2Reg) { - const AMDGPUInstrInfo *tii = - static_cast(passRep->getTargetInstrInfo()); - MachineFunction *MF = blk->getParent(); - MachineInstr *newInstr = - MF->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc()); - - MachineInstrBuilder MIB(*MF, newInstr); - MIB.addReg(dstReg, RegState::Define); //set target - MIB.addReg(src1Reg); //set src value - MIB.addReg(src2Reg); //set src value - - blk->insert(instrPos, newInstr); - SHOWNEWINSTR(newInstr); - - } //insertCompareInstrBefore - - static void cloneSuccessorList(MachineBasicBlock *dstBlk, - MachineBasicBlock *srcBlk) { - for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(), - iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) { - dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of - } - } //cloneSuccessorList - - static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) { - MachineFunction *func = srcBlk->getParent(); - MachineBasicBlock *newBlk = func->CreateMachineBasicBlock(); - func->push_back(newBlk); //insert to function - for (MachineBasicBlock::iterator iter = srcBlk->begin(), - iterEnd = srcBlk->end(); - iter != iterEnd; ++iter) { - MachineInstr *instr = func->CloneMachineInstr(iter); - newBlk->push_back(instr); - } - return newBlk; - } - - //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because - //the AMDGPU instruction is not recognized as terminator fix this and retire - //this routine - static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk, - MachineBasicBlock *oldBlk, - MachineBasicBlock *newBlk) { - MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk); - if (branchInstr && isCondBranch(branchInstr) && - getTrueBranch(branchInstr) == oldBlk) { - setTrueBranch(branchInstr, newBlk); - } - } - - static void wrapup(MachineBasicBlock *entryBlk) { - assert((!entryBlk->getParent()->getJumpTableInfo() - || entryBlk->getParent()->getJumpTableInfo()->isEmpty()) - && "found a jump table"); - - //collect continue right before endloop - SmallVector contInstr; - MachineBasicBlock::iterator pre = entryBlk->begin(); - MachineBasicBlock::iterator iterEnd = entryBlk->end(); - MachineBasicBlock::iterator iter = pre; - while (iter != iterEnd) { - if (pre->getOpcode() == AMDGPU::CONTINUE - && iter->getOpcode() == AMDGPU::ENDLOOP) { - contInstr.push_back(pre); - } - pre = iter; - ++iter; - } //end while - - //delete continue right before endloop - for (unsigned i = 0; i < contInstr.size(); ++i) { - contInstr[i]->eraseFromParent(); - } - - // TODO to fix up jump table so later phase won't be confused. if - // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but - // there isn't such an interface yet. alternatively, replace all the other - // blocks in the jump table with the entryBlk //} - - } //wrapup - - static MachineDominatorTree *getDominatorTree(AMDGPUCFGStructurizer &pass) { - return &pass.getAnalysis(); - } - - static MachinePostDominatorTree* - getPostDominatorTree(AMDGPUCFGStructurizer &pass) { - return &pass.getAnalysis(); - } - - static MachineLoopInfo *getLoopInfo(AMDGPUCFGStructurizer &pass) { - return &pass.getAnalysis(); - } -}; // template class CFGStructTraits } // end anonymous namespace -// createAMDGPUCFGPreparationPass- Returns a pass -FunctionPass *llvm::createAMDGPUCFGPreparationPass(TargetMachine &tm) { - return new AMDGPUCFGPrepare(tm); -} - -bool AMDGPUCFGPrepare::runOnMachineFunction(MachineFunction &func) { - return CFGStructurizer().prepare(func, *this, - getTargetRegisterInfo()); -} -// createAMDGPUCFGStructurizerPass- Returns a pass FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm) { - return new AMDGPUCFGPerform(tm); -} - -bool AMDGPUCFGPerform::runOnMachineFunction(MachineFunction &func) { - return CFGStructurizer().run(func, *this, - getTargetRegisterInfo()); + return new AMDGPUCFGStructurizer(tm); } -- cgit v1.1 From 272458bd06d0c6d09e9bf776fb60735b0cdc8cf1 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Fri, 19 Jul 2013 21:45:15 +0000 Subject: R600: Don't emit empty then clause and use alu_pop_after git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186725 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDILCFGStructurizer.cpp | 8 +++-- lib/Target/R600/R600ControlFlowFinalizer.cpp | 48 ++++++++++++++++++++++------ lib/Target/R600/R600Instructions.td | 1 + 3 files changed, 46 insertions(+), 11 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp index 85ac725..fac56f0 100644 --- a/lib/Target/R600/AMDILCFGStructurizer.cpp +++ b/lib/Target/R600/AMDILCFGStructurizer.cpp @@ -1039,8 +1039,11 @@ int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) { } else if (FalseMBB->succ_size() == 1 && *FalseMBB->succ_begin() == TrueMBB) { // Triangle pattern, true is empty - LandBlk = TrueMBB; - TrueMBB = NULL; + // We reverse the predicate to make a triangle, empty false pattern; + std::swap(TrueMBB, FalseMBB); + reversePredicateSetter(MBB->end()); + LandBlk = FalseMBB; + FalseMBB = NULL; } else if (FalseMBB->succ_size() == 1 && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) { LandBlk = *FalseMBB->succ_begin(); @@ -1456,6 +1459,7 @@ void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB, void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI, MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) { + assert (TrueMBB); DEBUG( dbgs() << "ifPattern BB" << MBB->getNumber(); dbgs() << "{ "; diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp index 1cd0ac3..b69d38b 100644 --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -347,6 +347,9 @@ public: MaxStack = 1; } std::vector FetchClauses, AluClauses; + std::vector LastAlu(1); + std::vector ToPopAfter; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) { if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) { @@ -357,6 +360,10 @@ public: } MachineBasicBlock::iterator MI = I; + if (MI->getOpcode() != AMDGPU::ENDIF) + LastAlu.back() = 0; + if (MI->getOpcode() == AMDGPU::CF_ALU) + LastAlu.back() = MI; I++; switch (MI->getOpcode()) { case AMDGPU::CF_ALU_PUSH_BEFORE: @@ -403,6 +410,7 @@ public: break; } case AMDGPU::IF_PREDICATE_SET: { + LastAlu.push_back(0); MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_JUMP)) .addImm(0) @@ -420,7 +428,7 @@ public: MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_ELSE)) .addImm(0) - .addImm(1); + .addImm(0); DEBUG(dbgs() << CfCount << ":"; MIb->dump();); IfThenElseStack.push_back(MIb); MI->eraseFromParent(); @@ -429,17 +437,24 @@ public: } case AMDGPU::ENDIF: { CurrentStack--; + if (LastAlu.back()) { + ToPopAfter.push_back(LastAlu.back()); + } else { + MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), + getHWInstrDesc(CF_POP)) + .addImm(CfCount + 1) + .addImm(1); + (void)MIb; + DEBUG(dbgs() << CfCount << ":"; MIb->dump();); + CfCount++; + } + MachineInstr *IfOrElseInst = IfThenElseStack.back(); IfThenElseStack.pop_back(); - CounterPropagateAddr(IfOrElseInst, CfCount + 1); - MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), - getHWInstrDesc(CF_POP)) - .addImm(CfCount + 1) - .addImm(1); - (void)MIb; - DEBUG(dbgs() << CfCount << ":"; MIb->dump();); + CounterPropagateAddr(IfOrElseInst, CfCount); + IfOrElseInst->getOperand(1).setImm(1); + LastAlu.pop_back(); MI->eraseFromParent(); - CfCount++; break; } case AMDGPU::PREDICATED_BREAK: { @@ -484,6 +499,21 @@ public: break; } } + for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) { + MachineInstr *Alu = ToPopAfter[i]; + BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu), + TII->get(AMDGPU::CF_ALU_POP_AFTER)) + .addImm(Alu->getOperand(0).getImm()) + .addImm(Alu->getOperand(1).getImm()) + .addImm(Alu->getOperand(2).getImm()) + .addImm(Alu->getOperand(3).getImm()) + .addImm(Alu->getOperand(4).getImm()) + .addImm(Alu->getOperand(5).getImm()) + .addImm(Alu->getOperand(6).getImm()) + .addImm(Alu->getOperand(7).getImm()) + .addImm(Alu->getOperand(8).getImm()); + Alu->eraseFromParent(); + } MFI->StackSize = getHWStackSize(MaxStack, HasPush); } diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index df5c438..3652c89 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -624,6 +624,7 @@ ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG { def CF_ALU : ALU_CLAUSE<8, "ALU">; def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">; +def CF_ALU_POP_AFTER : ALU_CLAUSE<10, "ALU_POP_AFTER">; def FETCH_CLAUSE : AMDGPUInst <(outs), (ins i32imm:$addr), "Fetch clause starting at $addr:", [] > { -- cgit v1.1 From 7b61a701932d850d2777fafda1fea5ec841d893b Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Fri, 19 Jul 2013 23:52:47 +0000 Subject: Refactor AnalyzeBranch on ARM. The previous version did not always analyze indirect branches correctly. Under some circumstances, this led to the deletion of basic blocks that were the destination of indirect branches. In that case it left indirect branches to nowhere in the code. This patch replaces, and is more general than either of the previous fixes for indirect-branch-analysis issues, r181161 and r186461. For other branches (not indirect) this refactor should have *almost* identical behavior to the previous version. There are some corner cases where this refactor is able to analyze blocks that the previous version could not (e.g. this necessitated the update to thumb2-ifcvt2.ll). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186735 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseInstrInfo.cpp | 155 ++++++++++++++++-------------------- 1 file changed, 67 insertions(+), 88 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 5d012fc..977d936 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -272,111 +272,90 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify) const { - // If the block has no terminators, it just falls into the block after it. + TBB = 0; + FBB = 0; + MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) - return false; + return false; // Empty blocks are easy. --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return false; - --I; - } - // Get the last instruction in the block. - MachineInstr *LastInst = I; - unsigned LastOpc = LastInst->getOpcode(); + // Walk backwards from the end of the basic block until the branch is + // analyzed or we give up. + while (isPredicated(I) || I->isTerminator()) { - // Check if it's an indirect branch first, this should return 'unanalyzable' - // even if it's predicated. - if (isIndirectBranchOpcode(LastOpc)) - return true; + // Flag to be raised on unanalyzeable instructions. This is useful in cases + // where we want to clean up on the end of the basic block before we bail + // out. + bool CantAnalyze = false; - if (!isUnpredicatedTerminator(I)) - return false; + // Skip over DEBUG values and predicated nonterminators. + while (I->isDebugValue() || !I->isTerminator()) { + if (I == MBB.begin()) + return false; + --I; + } - // Check whether the second-to-last branch is indirect, return - // 'unanalyzeable' here too. - if (I != MBB.begin() && prior(I)->isIndirectBranch()) - return true; + if (isIndirectBranchOpcode(I->getOpcode()) || + isJumpTableBranchOpcode(I->getOpcode())) { + // Indirect branches and jump tables can't be analyzed, but we still want + // to clean up any instructions at the tail of the basic block. + CantAnalyze = true; + } else if (isUncondBranchOpcode(I->getOpcode())) { + TBB = I->getOperand(0).getMBB(); + } else if (isCondBranchOpcode(I->getOpcode())) { + // Bail out if we encounter multiple conditional branches. + if (!Cond.empty()) + return true; - // If there is only one terminator instruction, process it. - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { - if (isUncondBranchOpcode(LastOpc)) { - TBB = LastInst->getOperand(0).getMBB(); - return false; - } - if (isCondBranchOpcode(LastOpc)) { - // Block ends with fall-through condbranch. - TBB = LastInst->getOperand(0).getMBB(); - Cond.push_back(LastInst->getOperand(1)); - Cond.push_back(LastInst->getOperand(2)); - return false; + assert(!FBB && "FBB should have been null."); + FBB = TBB; + TBB = I->getOperand(0).getMBB(); + Cond.push_back(I->getOperand(1)); + Cond.push_back(I->getOperand(2)); + } else if (I->isReturn()) { + // Returns can't be analyzed, but we should run cleanup. + CantAnalyze = !isPredicated(I); + } else { + // We encountered other unrecognized terminator. Bail out immediately. + return true; } - return true; // Can't handle indirect branch. - } - // Get the instruction before it if it is a terminator. - MachineInstr *SecondLastInst = I; - unsigned SecondLastOpc = SecondLastInst->getOpcode(); - - // If AllowModify is true and the block ends with two or more unconditional - // branches, delete all but the first unconditional branch. - if (AllowModify && isUncondBranchOpcode(LastOpc)) { - while (isUncondBranchOpcode(SecondLastOpc)) { - LastInst->eraseFromParent(); - LastInst = SecondLastInst; - LastOpc = LastInst->getOpcode(); - if (I != MBB.begin() && prior(I)->isIndirectBranch()) - return true; // Indirect branches are unanalyzeable. - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { - // Return now the only terminator is an unconditional branch. - TBB = LastInst->getOperand(0).getMBB(); - return false; - } else { - SecondLastInst = I; - SecondLastOpc = SecondLastInst->getOpcode(); + // Cleanup code - to be run for unpredicated unconditional branches and + // returns. + if (!isPredicated(I) && + (isUncondBranchOpcode(I->getOpcode()) || + isIndirectBranchOpcode(I->getOpcode()) || + isJumpTableBranchOpcode(I->getOpcode()) || + I->isReturn())) { + // Forget any previous condition branch information - it no longer applies. + Cond.clear(); + FBB = 0; + + // If we can modify the function, delete everything below this + // unconditional branch. + if (AllowModify) { + MachineBasicBlock::iterator DI = llvm::next(I); + while (DI != MBB.end()) { + MachineInstr *InstToDelete = DI; + ++DI; + InstToDelete->eraseFromParent(); + } } } - } - // If there are three terminators, we don't know what sort of block this is. - if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) - return true; - - // If the block ends with a B and a Bcc, handle it. - if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { - TBB = SecondLastInst->getOperand(0).getMBB(); - Cond.push_back(SecondLastInst->getOperand(1)); - Cond.push_back(SecondLastInst->getOperand(2)); - FBB = LastInst->getOperand(0).getMBB(); - return false; - } + if (CantAnalyze) + return true; - // If the block ends with two unconditional branches, handle it. The second - // one is not executed, so remove it. - if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { - TBB = SecondLastInst->getOperand(0).getMBB(); - I = LastInst; - if (AllowModify) - I->eraseFromParent(); - return false; - } + if (I == MBB.begin()) + return false; - // ...likewise if it ends with a branch table followed by an unconditional - // branch. The branch folder can create these, and we must get rid of them for - // correctness of Thumb constant islands. - if ((isJumpTableBranchOpcode(SecondLastOpc) || - isIndirectBranchOpcode(SecondLastOpc)) && - isUncondBranchOpcode(LastOpc)) { - I = LastInst; - if (AllowModify) - I->eraseFromParent(); - return true; + --I; } - // Otherwise, can't handle this. - return true; + // We made it past the terminators without bailing out - we must have + // analyzed this branch successfully. + return false; } -- cgit v1.1 From 3cd645701a2948e3d423ca6f98f872e8dd40f403 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 21 Jul 2013 07:28:13 +0000 Subject: Mark that the _ftol2 function used by windows on x86 to handle fptoui modifies ECX. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186787 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FloatingPoint.cpp | 1 + lib/Target/X86/X86InstrCompiler.td | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index a4ea1a9..48470da 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -1662,6 +1662,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::CALLpcrel32)) .addExternalSymbol("_ftol2") .addReg(X86::ST0, RegState::ImplicitKill) + .addReg(X86::ECX, RegState::ImplicitDefine) .addReg(X86::EAX, RegState::Define | RegState::Implicit) .addReg(X86::EDX, RegState::Define | RegState::Implicit) .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 8a7ee7d..8969946 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -129,12 +129,13 @@ def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size), // The MSVC runtime contains an _ftol2 routine for converting floating-point // to integer values. It has a strange calling convention: the input is -// popped from the x87 stack, and the return value is given in EDX:EAX. No -// other registers (aside from flags) are touched. +// popped from the x87 stack, and the return value is given in EDX:EAX. ECX is +// used as a temporary register. No other registers (aside from flags) are +// touched. // Microsoft toolchains do not support 80-bit precision, so a WIN_FTOL_80 // variant is unnecessary. -let Defs = [EAX, EDX, EFLAGS], FPForm = SpecialFP in { +let Defs = [EAX, EDX, ECX, EFLAGS], FPForm = SpecialFP in { def WIN_FTOL_32 : I<0, Pseudo, (outs), (ins RFP32:$src), "# win32 fptoui", [(X86WinFTOL RFP32:$src)]>, -- cgit v1.1 From 72c8331ec1437f8c33fff1dac1ea0ebb11009411 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Sun, 21 Jul 2013 23:11:42 +0000 Subject: Treat nothrow forms of ::operator delete and ::operator delete[] as deallocation functions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186798 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/TargetLibraryInfo.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib/Target') diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp index 99fff59..8696b57 100644 --- a/lib/Target/TargetLibraryInfo.cpp +++ b/lib/Target/TargetLibraryInfo.cpp @@ -27,7 +27,9 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "_IO_getc", "_IO_putc", "_ZdaPv", + "_ZdaPvRKSt9nothrow_t", "_ZdlPv", + "_ZdlPvRKSt9nothrow_t", "_Znaj", "_ZnajRKSt9nothrow_t", "_Znam", -- cgit v1.1 From 95343ef3cd68cb1d19e9455767f38498544c6d1b Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 22 Jul 2013 07:47:51 +0000 Subject: Reverse operands for Intel syntax form of 'bt' alias. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186809 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.td | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index f33ae2a..d58f4aa 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1969,7 +1969,8 @@ def : InstAlias<"aad", (AAD8i8 10)>; def : InstAlias<"aam", (AAM8i8 10)>; // Disambiguate the mem/imm form of bt-without-a-suffix as btl. -def : InstAlias<"bt $imm, $mem", (BT32mi8 i32mem:$mem, i32i8imm:$imm)>; +def : InstAlias<"bt {$imm, $mem|$mem, $imm}", + (BT32mi8 i32mem:$mem, i32i8imm:$imm), 0>; // clr aliases. def : InstAlias<"clrb $reg", (XOR8rr GR8 :$reg, GR8 :$reg)>; -- cgit v1.1 From 1c6e6ce10c61f8db656a04af36e2b374c0fe9566 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Mon, 22 Jul 2013 09:06:12 +0000 Subject: ARM: remove now unneeded custom Asm converters After Ulrich's r180677 (thanks!) TableGen is intelligent enough to handle tied constraints involving complex operands properly, so virtually all of the ARM custom converters are now unnecessary. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186810 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrInfo.td | 12 - lib/Target/ARM/ARMInstrNEON.td | 28 --- lib/Target/ARM/ARMInstrThumb2.td | 42 ++-- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 349 +----------------------------- 4 files changed, 16 insertions(+), 415 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 84c210f..f543e5d 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -2293,7 +2293,6 @@ multiclass AI2_ldridx op, string opc, InstrItinClass itin> { let Inst{19-16} = addr{12-9}; // Rn let Inst{11-8} = addr{7-4}; // imm7_4/zero let Inst{3-0} = addr{3-0}; // imm3_0/Rm - let AsmMatchConverter = "cvtLdWriteBackRegAddrMode3"; let DecoderMethod = "DecodeAddrMode3Instruction"; } def _POST : AI3ldstidx op, string opc> { let Inst{22} = 1; let Inst{11-8} = offset{7-4}; let Inst{3-0} = offset{3-0}; - let AsmMatchConverter = "cvtLdExtTWriteBackImm"; } def r : AI3ldstidxT op, string opc> { let Inst{11-8} = 0; let Unpredictable{11-8} = 0b1111; let Inst{3-0} = Rm{3-0}; - let AsmMatchConverter = "cvtLdExtTWriteBackReg"; let DecoderMethod = "DecodeLDR"; } } @@ -2553,7 +2547,6 @@ multiclass AI2_stridx op, string opc> { let Inst{22} = 1; let Inst{11-8} = offset{7-4}; let Inst{3-0} = offset{3-0}; - let AsmMatchConverter = "cvtStExtTWriteBackImm"; } def r : AI3ldstidxT op, string opc> { let Inst{22} = 0; let Inst{11-8} = 0; let Inst{3-0} = Rm{3-0}; - let AsmMatchConverter = "cvtStExtTWriteBackReg"; } } diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 3e2ab06..af4f4d1 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -656,7 +656,6 @@ multiclass VLD1DWB op7_4, string Dt> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u, @@ -664,7 +663,6 @@ multiclass VLD1DWB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; } } multiclass VLD1QWB op7_4, string Dt> { @@ -675,7 +673,6 @@ multiclass VLD1QWB op7_4, string Dt> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, @@ -683,7 +680,6 @@ multiclass VLD1QWB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -713,7 +709,6 @@ multiclass VLD1D3WB op7_4, string Dt> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, @@ -721,7 +716,6 @@ multiclass VLD1D3WB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -754,7 +748,6 @@ multiclass VLD1D4WB op7_4, string Dt> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, @@ -762,7 +755,6 @@ multiclass VLD1D4WB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -811,7 +803,6 @@ multiclass VLD2WB op11_8, bits<4> op7_4, string Dt, let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), itin, @@ -819,7 +810,6 @@ multiclass VLD2WB op11_8, bits<4> op7_4, string Dt, "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -1348,7 +1338,6 @@ multiclass VLD1DUPWB op7_4, string Dt> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd, GPR:$wb), @@ -1357,7 +1346,6 @@ multiclass VLD1DUPWB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; } } multiclass VLD1QDUPWB op7_4, string Dt> { @@ -1369,7 +1357,6 @@ multiclass VLD1QDUPWB op7_4, string Dt> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd, GPR:$wb), @@ -1378,7 +1365,6 @@ multiclass VLD1QDUPWB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -1419,7 +1405,6 @@ multiclass VLD2DUPWB op7_4, string Dt, RegisterOperand VdTy> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2DupInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd, GPR:$wb), @@ -1428,7 +1413,6 @@ multiclass VLD2DUPWB op7_4, string Dt, RegisterOperand VdTy> { "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2DupInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -1609,7 +1593,6 @@ multiclass VST1DWB op7_4, string Dt> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd), @@ -1618,7 +1601,6 @@ multiclass VST1DWB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; } } multiclass VST1QWB op7_4, string Dt> { @@ -1629,7 +1611,6 @@ multiclass VST1QWB op7_4, string Dt> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd), @@ -1638,7 +1619,6 @@ multiclass VST1QWB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; } } @@ -1669,7 +1649,6 @@ multiclass VST1D3WB op7_4, string Dt> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd), @@ -1678,7 +1657,6 @@ multiclass VST1D3WB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; } } @@ -1714,7 +1692,6 @@ multiclass VST1D4WB op7_4, string Dt> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), @@ -1723,7 +1700,6 @@ multiclass VST1D4WB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; } } @@ -1773,7 +1749,6 @@ multiclass VST2DWB op11_8, bits<4> op7_4, string Dt, let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, @@ -1781,7 +1756,6 @@ multiclass VST2DWB op11_8, bits<4> op7_4, string Dt, "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; } } multiclass VST2QWB op7_4, string Dt> { @@ -1792,7 +1766,6 @@ multiclass VST2QWB op7_4, string Dt> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), @@ -1801,7 +1774,6 @@ multiclass VST2QWB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; } } diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index fdbf5ae..5a425c6 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1318,10 +1318,7 @@ let mayLoad = 1, neverHasSideEffects = 1 in { def t2LDR_PRE : T2Ipreldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_iu, - "ldr", "\t$Rt, $addr!", "$addr.base = $Rn_wb", - []> { - let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8"; -} + "ldr", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>; def t2LDR_POST : T2Ipostldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), @@ -1331,10 +1328,8 @@ def t2LDR_POST : T2Ipostldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), def t2LDRB_PRE : T2Ipreldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, - "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", - []> { - let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8"; -} + "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>; + def t2LDRB_POST : T2Ipostldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, @@ -1343,10 +1338,8 @@ def t2LDRB_POST : T2Ipostldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), def t2LDRH_PRE : T2Ipreldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, - "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", - []> { - let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8"; -} + "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>; + def t2LDRH_POST : T2Ipostldst<0, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, @@ -1356,9 +1349,8 @@ def t2LDRSB_PRE : T2Ipreldst<1, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, "ldrsb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", - []> { - let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8"; -} + []>; + def t2LDRSB_POST : T2Ipostldst<1, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, @@ -1368,9 +1360,8 @@ def t2LDRSH_PRE : T2Ipreldst<1, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, "ldrsh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", - []> { - let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8"; -} + []>; + def t2LDRSH_POST : T2Ipostldst<1, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, @@ -1426,24 +1417,19 @@ def t2STR_PRE : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb), (ins GPRnopc:$Rt, t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_iu, "str", "\t$Rt, $addr!", - "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { - let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8"; -} + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>; + def t2STRH_PRE : T2Ipreldst<0, 0b01, 0, 1, (outs GPRnopc:$Rn_wb), (ins rGPR:$Rt, t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_iu, "strh", "\t$Rt, $addr!", - "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { - let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8"; -} + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>; def t2STRB_PRE : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb), (ins rGPR:$Rt, t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu, "strb", "\t$Rt, $addr!", - "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { - let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8"; -} + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>; } // mayStore = 1, neverHasSideEffects = 1 def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb), @@ -1532,7 +1518,6 @@ def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>; def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb), (ins t2addrmode_imm8s4_pre:$addr), IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []> { - let AsmMatchConverter = "cvtT2LdrdPre"; let DecoderMethod = "DecodeT2LDRDPreInstruction"; } @@ -1545,7 +1530,6 @@ def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs GPR:$wb), (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4_pre:$addr), IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []> { - let AsmMatchConverter = "cvtT2StrdPre"; let DecoderMethod = "DecodeT2STRDPreInstruction"; } diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index e10d2c7..fc1faff 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -219,44 +219,8 @@ class ARMAsmParser : public MCTargetAsmParser { SMLoc &EndLoc); // Asm Match Converter Methods - void cvtT2LdrdPre(MCInst &Inst, const SmallVectorImpl &); - void cvtT2StrdPre(MCInst &Inst, const SmallVectorImpl &); - void cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, - const SmallVectorImpl &); - void cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, - const SmallVectorImpl &); - void cvtLdWriteBackRegAddrMode2(MCInst &Inst, - const SmallVectorImpl &); - void cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, - const SmallVectorImpl &); - void cvtStWriteBackRegAddrModeImm12(MCInst &Inst, - const SmallVectorImpl &); - void cvtStWriteBackRegAddrMode2(MCInst &Inst, - const SmallVectorImpl &); - void cvtStWriteBackRegAddrMode3(MCInst &Inst, - const SmallVectorImpl &); - void cvtLdExtTWriteBackImm(MCInst &Inst, - const SmallVectorImpl &); - void cvtLdExtTWriteBackReg(MCInst &Inst, - const SmallVectorImpl &); - void cvtStExtTWriteBackImm(MCInst &Inst, - const SmallVectorImpl &); - void cvtStExtTWriteBackReg(MCInst &Inst, - const SmallVectorImpl &); - void cvtLdrdPre(MCInst &Inst, const SmallVectorImpl &); - void cvtStrdPre(MCInst &Inst, const SmallVectorImpl &); - void cvtLdWriteBackRegAddrMode3(MCInst &Inst, - const SmallVectorImpl &); void cvtThumbMultiply(MCInst &Inst, const SmallVectorImpl &); - void cvtVLDwbFixed(MCInst &Inst, - const SmallVectorImpl &); - void cvtVLDwbRegister(MCInst &Inst, - const SmallVectorImpl &); - void cvtVSTwbFixed(MCInst &Inst, - const SmallVectorImpl &); - void cvtVSTwbRegister(MCInst &Inst, - const SmallVectorImpl &); bool validateInstruction(MCInst &Inst, const SmallVectorImpl &Ops); bool processInstruction(MCInst &Inst, @@ -4077,260 +4041,9 @@ parseAM3Offset(SmallVectorImpl &Operands) { return MatchOperand_Success; } -/// cvtT2LdrdPre - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtT2LdrdPre(MCInst &Inst, - const SmallVectorImpl &Operands) { - // Rt, Rt2 - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1); - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateReg(0)); - // addr - ((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtT2StrdPre - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtT2StrdPre(MCInst &Inst, - const SmallVectorImpl &Operands) { - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateReg(0)); - // Rt, Rt2 - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1); - // addr - ((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtLdWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, - const SmallVectorImpl &Operands) { - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - - ((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2); - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtStWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, - const SmallVectorImpl &Operands) { - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - ((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2); - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtLdWriteBackRegAddrMode2 - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtLdWriteBackRegAddrMode2(MCInst &Inst, - const SmallVectorImpl &Operands) { - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - - ((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3); - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtLdWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, - const SmallVectorImpl &Operands) { - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - - ((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2); - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - - -/// cvtStWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtStWriteBackRegAddrModeImm12(MCInst &Inst, - const SmallVectorImpl &Operands) { - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - ((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2); - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtStWriteBackRegAddrMode2 - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtStWriteBackRegAddrMode2(MCInst &Inst, - const SmallVectorImpl &Operands) { - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - ((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3); - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtStWriteBackRegAddrMode3 - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtStWriteBackRegAddrMode3(MCInst &Inst, - const SmallVectorImpl &Operands) { - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - ((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3); - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtLdExtTWriteBackImm - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtLdExtTWriteBackImm(MCInst &Inst, - const SmallVectorImpl &Operands) { - // Rt - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - // addr - ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1); - // offset - ((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtLdExtTWriteBackReg - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtLdExtTWriteBackReg(MCInst &Inst, - const SmallVectorImpl &Operands) { - // Rt - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - // addr - ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1); - // offset - ((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtStExtTWriteBackImm - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtStExtTWriteBackImm(MCInst &Inst, - const SmallVectorImpl &Operands) { - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - // Rt - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - // addr - ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1); - // offset - ((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtStExtTWriteBackReg - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtStExtTWriteBackReg(MCInst &Inst, - const SmallVectorImpl &Operands) { - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - // Rt - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - // addr - ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1); - // offset - ((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtLdrdPre - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtLdrdPre(MCInst &Inst, - const SmallVectorImpl &Operands) { - // Rt, Rt2 - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1); - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - // addr - ((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtStrdPre - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtStrdPre(MCInst &Inst, - const SmallVectorImpl &Operands) { - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - // Rt, Rt2 - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1); - // addr - ((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtLdWriteBackRegAddrMode3 - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtLdWriteBackRegAddrMode3(MCInst &Inst, - const SmallVectorImpl &Operands) { - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - ((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3); - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtThumbMultiply - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. +/// Convert parsed operands to MCInst. Needed here because this instruction +/// only has two register operands, but multiplication is commutative so +/// assemblers should accept both "mul rD, rN, rD" and "mul rD, rD, rN". void ARMAsmParser:: cvtThumbMultiply(MCInst &Inst, const SmallVectorImpl &Operands) { @@ -4348,62 +4061,6 @@ cvtThumbMultiply(MCInst &Inst, ((ARMOperand*)Operands[2])->addCondCodeOperands(Inst, 2); } -void ARMAsmParser:: -cvtVLDwbFixed(MCInst &Inst, - const SmallVectorImpl &Operands) { - // Vd - ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - // Vn - ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -void ARMAsmParser:: -cvtVLDwbRegister(MCInst &Inst, - const SmallVectorImpl &Operands) { - // Vd - ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - // Vn - ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2); - // Vm - ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -void ARMAsmParser:: -cvtVSTwbFixed(MCInst &Inst, - const SmallVectorImpl &Operands) { - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - // Vn - ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2); - // Vt - ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -void ARMAsmParser:: -cvtVSTwbRegister(MCInst &Inst, - const SmallVectorImpl &Operands) { - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - // Vn - ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2); - // Vm - ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1); - // Vt - ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - /// Parse an ARM memory expression, return false if successful else return true /// or an error. The first token must be a '[' when called. bool ARMAsmParser:: -- cgit v1.1 From 950bbfd43aa142c86548b0f640bef837b9119bc5 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 22 Jul 2013 09:18:43 +0000 Subject: Add Intel variants to aliases for some FP instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186811 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.td | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index d58f4aa..2227988 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -2021,9 +2021,9 @@ def : InstAlias<"fucompi", (UCOM_FIPr ST1)>; // instructions like "fadd %st(0), %st(0)" as "fadd %st(0)" for consistency with // gas. multiclass FpUnaryAlias { - def : InstAlias; - def : InstAlias; } @@ -2048,12 +2048,12 @@ defm : FpUnaryAlias<"fucompi", UCOM_FIPr>; // Handle "f{mulp,addp} st(0), $op" the same as "f{mulp,addp} $op", since they // commute. We also allow fdiv[r]p/fsubrp even though they don't commute, // solely because gas supports it. -def : InstAlias<"faddp %st(0), $op", (ADD_FPrST0 RST:$op), 0>; -def : InstAlias<"fmulp %st(0), $op", (MUL_FPrST0 RST:$op)>; -def : InstAlias<"fsubp %st(0), $op", (SUBR_FPrST0 RST:$op)>; -def : InstAlias<"fsubrp %st(0), $op", (SUB_FPrST0 RST:$op)>; -def : InstAlias<"fdivp %st(0), $op", (DIVR_FPrST0 RST:$op)>; -def : InstAlias<"fdivrp %st(0), $op", (DIV_FPrST0 RST:$op)>; +def : InstAlias<"faddp\t{%st(0), $op|$op, ST(0)}", (ADD_FPrST0 RST:$op), 0>; +def : InstAlias<"fmulp\t{%st(0), $op|$op, ST(0)}", (MUL_FPrST0 RST:$op)>; +def : InstAlias<"fsub{|r}p\t{%st(0), $op|$op, ST(0)}", (SUBR_FPrST0 RST:$op)>; +def : InstAlias<"fsub{r|}p\t{%st(0), $op|$op, ST(0)}", (SUB_FPrST0 RST:$op)>; +def : InstAlias<"fdiv{|r}p\t{%st(0), $op|$op, ST(0)}", (DIVR_FPrST0 RST:$op)>; +def : InstAlias<"fdiv{r|}p\t{%st(0), $op|$op, ST(0)}", (DIV_FPrST0 RST:$op)>; // We accept "fnstsw %eax" even though it only writes %ax. def : InstAlias<"fnstsw %eax", (FNSTSW16r)>; -- cgit v1.1 From 5e08ce288af4c58cb93989544dd6df0e77ba7d1e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 22 Jul 2013 09:22:49 +0000 Subject: Change %xmm0 to XMM0 in Intel side of asm strings for PBLENDVB. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186812 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 79b1ca3..8fdcdd4 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7016,17 +7016,17 @@ defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64, i128mem, int_x86_sse41_pblendvb>; // Aliases with the implicit xmm0 argument -def : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}", +def : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, XMM0}", (BLENDVPDrr0 VR128:$dst, VR128:$src2)>; -def : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}", +def : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, XMM0}", (BLENDVPDrm0 VR128:$dst, f128mem:$src2)>; -def : InstAlias<"blendvps\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}", +def : InstAlias<"blendvps\t{%xmm0, $src2, $dst|$dst, $src2, XMM0}", (BLENDVPSrr0 VR128:$dst, VR128:$src2)>; -def : InstAlias<"blendvps\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}", +def : InstAlias<"blendvps\t{%xmm0, $src2, $dst|$dst, $src2, XMM0}", (BLENDVPSrm0 VR128:$dst, f128mem:$src2)>; -def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}", +def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, XMM0}", (PBLENDVBrr0 VR128:$dst, VR128:$src2)>; -def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}", +def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, XMM0}", (PBLENDVBrm0 VR128:$dst, i128mem:$src2)>; let Predicates = [UseSSE41] in { -- cgit v1.1 From 0ff6cf7f7e491a7b8179e7c62a266575fb4d523c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 22 Jul 2013 09:42:31 +0000 Subject: More Intel syntax alias fixes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186813 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.td | 56 +++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 28 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 2227988..34547de 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1979,22 +1979,22 @@ def : InstAlias<"clrl $reg", (XOR32rr GR32:$reg, GR32:$reg)>; def : InstAlias<"clrq $reg", (XOR64rr GR64:$reg, GR64:$reg)>; // div and idiv aliases for explicit A register. -def : InstAlias<"divb $src, %al", (DIV8r GR8 :$src)>; -def : InstAlias<"divw $src, %ax", (DIV16r GR16:$src)>; -def : InstAlias<"divl $src, %eax", (DIV32r GR32:$src)>; -def : InstAlias<"divq $src, %rax", (DIV64r GR64:$src)>; -def : InstAlias<"divb $src, %al", (DIV8m i8mem :$src)>; -def : InstAlias<"divw $src, %ax", (DIV16m i16mem:$src)>; -def : InstAlias<"divl $src, %eax", (DIV32m i32mem:$src)>; -def : InstAlias<"divq $src, %rax", (DIV64m i64mem:$src)>; -def : InstAlias<"idivb $src, %al", (IDIV8r GR8 :$src)>; -def : InstAlias<"idivw $src, %ax", (IDIV16r GR16:$src)>; -def : InstAlias<"idivl $src, %eax", (IDIV32r GR32:$src)>; -def : InstAlias<"idivq $src, %rax", (IDIV64r GR64:$src)>; -def : InstAlias<"idivb $src, %al", (IDIV8m i8mem :$src)>; -def : InstAlias<"idivw $src, %ax", (IDIV16m i16mem:$src)>; -def : InstAlias<"idivl $src, %eax", (IDIV32m i32mem:$src)>; -def : InstAlias<"idivq $src, %rax", (IDIV64m i64mem:$src)>; +def : InstAlias<"div{b}\t{$src, %al|AL, $src}", (DIV8r GR8 :$src)>; +def : InstAlias<"div{w}\t{$src, %ax|AX, $src}", (DIV16r GR16:$src)>; +def : InstAlias<"div{l}\t{$src, %eax|EAX, $src}", (DIV32r GR32:$src)>; +def : InstAlias<"div{q}\t{$src, %rax|RAX, $src}", (DIV64r GR64:$src)>; +def : InstAlias<"div{b}\t{$src, %al|AL, $src}", (DIV8m i8mem :$src)>; +def : InstAlias<"div{w}\t{$src, %ax|AX, $src}", (DIV16m i16mem:$src)>; +def : InstAlias<"div{l}\t{$src, %eax|EAX, $src}", (DIV32m i32mem:$src)>; +def : InstAlias<"div{q}\t{$src, %rax|RAX, $src}", (DIV64m i64mem:$src)>; +def : InstAlias<"idiv{b}\t{$src, %al|AL, $src}", (IDIV8r GR8 :$src)>; +def : InstAlias<"idiv{w}\t{$src, %ax|AX, $src}", (IDIV16r GR16:$src)>; +def : InstAlias<"idiv{l}\t{$src, %eax|EAX, $src}", (IDIV32r GR32:$src)>; +def : InstAlias<"idiv{q}\t{$src, %rax|RAX, $src}", (IDIV64r GR64:$src)>; +def : InstAlias<"idiv{b}\t{$src, %al|AL, $src}", (IDIV8m i8mem :$src)>; +def : InstAlias<"idiv{w}\t{$src, %ax|AX, $src}", (IDIV16m i16mem:$src)>; +def : InstAlias<"idiv{l}\t{$src, %eax|EAX, $src}", (IDIV32m i32mem:$src)>; +def : InstAlias<"idiv{q}\t{$src, %rax|RAX, $src}", (IDIV64m i64mem:$src)>; @@ -2076,12 +2076,12 @@ def : InstAlias<"imulq $imm, $r",(IMUL64rri32 GR64:$r, GR64:$r,i64i32imm:$imm)>; def : InstAlias<"imulq $imm, $r", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm)>; // inb %dx -> inb %al, %dx -def : InstAlias<"inb %dx", (IN8rr)>; -def : InstAlias<"inw %dx", (IN16rr)>; -def : InstAlias<"inl %dx", (IN32rr)>; -def : InstAlias<"inb $port", (IN8ri i8imm:$port)>; -def : InstAlias<"inw $port", (IN16ri i8imm:$port)>; -def : InstAlias<"inl $port", (IN32ri i8imm:$port)>; +def : InstAlias<"inb\t{%dx|DX}", (IN8rr)>; +def : InstAlias<"inw\t{%dx|DX}", (IN16rr)>; +def : InstAlias<"inl\t{%dx|DX}", (IN32rr)>; +def : InstAlias<"inb\t$port", (IN8ri i8imm:$port)>; +def : InstAlias<"inw\t$port", (IN16ri i8imm:$port)>; +def : InstAlias<"inl\t$port", (IN32ri i8imm:$port)>; // jmp and call aliases for lcall and ljmp. jmp $42,$5 -> ljmp @@ -2130,12 +2130,12 @@ def : InstAlias<"movzx $src, $dst", (MOVZX64rr16_Q GR64:$dst, GR16:$src), 0>; // Note: No GR32->GR64 movzx form. // outb %dx -> outb %al, %dx -def : InstAlias<"outb %dx", (OUT8rr)>; -def : InstAlias<"outw %dx", (OUT16rr)>; -def : InstAlias<"outl %dx", (OUT32rr)>; -def : InstAlias<"outb $port", (OUT8ir i8imm:$port)>; -def : InstAlias<"outw $port", (OUT16ir i8imm:$port)>; -def : InstAlias<"outl $port", (OUT32ir i8imm:$port)>; +def : InstAlias<"outb\t{%dx|DX}", (OUT8rr)>; +def : InstAlias<"outw\t{%dx|DX}", (OUT16rr)>; +def : InstAlias<"outl\t{%dx|DX}", (OUT32rr)>; +def : InstAlias<"outb\t$port", (OUT8ir i8imm:$port)>; +def : InstAlias<"outw\t$port", (OUT16ir i8imm:$port)>; +def : InstAlias<"outl\t$port", (OUT32ir i8imm:$port)>; // 'sldt ' can be encoded with either sldtw or sldtq with the same // effect (both store to a 16-bit mem). Force to sldtw to avoid ambiguity -- cgit v1.1 From 1011c13f15339632d2edd669dcb3ce22303a26a6 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 22 Jul 2013 09:58:07 +0000 Subject: More Intel syntax alias fixes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186814 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrFPStack.td | 2 +- lib/Target/X86/X86InstrInfo.td | 28 ++++++++++++++-------------- lib/Target/X86/X86InstrSSE.td | 8 ++++---- 3 files changed, 19 insertions(+), 19 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index 2224a08..dfff537 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -578,7 +578,7 @@ def COM_FIPr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg), let SchedRW = [WriteALU] in { let Defs = [AX], Uses = [FPSW] in def FNSTSW16r : I<0xE0, RawFrm, // AX = fp flags - (outs), (ins), "fnstsw %ax", + (outs), (ins), "fnstsw\t{%ax|AX}", [(set AX, (X86fp_stsw FPSW))], IIC_FNSTSW>, DF; def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 34547de..3392a1a 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -2056,8 +2056,8 @@ def : InstAlias<"fdiv{|r}p\t{%st(0), $op|$op, ST(0)}", (DIVR_FPrST0 RST:$op)>; def : InstAlias<"fdiv{r|}p\t{%st(0), $op|$op, ST(0)}", (DIV_FPrST0 RST:$op)>; // We accept "fnstsw %eax" even though it only writes %ax. -def : InstAlias<"fnstsw %eax", (FNSTSW16r)>; -def : InstAlias<"fnstsw %al" , (FNSTSW16r)>; +def : InstAlias<"fnstsw\t{%eax|EAX}", (FNSTSW16r)>; +def : InstAlias<"fnstsw\t{%al|AL}" , (FNSTSW16r)>; def : InstAlias<"fnstsw" , (FNSTSW16r)>; // lcall and ljmp aliases. This seems to be an odd mapping in 64-bit mode, but @@ -2186,19 +2186,19 @@ defm : ShiftRotateByOneAlias<"ror", "ROR">; FIXME */ // test: We accept "testX , " and "testX , " as synonyms. -def : InstAlias<"testb $val, $mem", (TEST8rm GR8 :$val, i8mem :$mem)>; -def : InstAlias<"testw $val, $mem", (TEST16rm GR16:$val, i16mem:$mem)>; -def : InstAlias<"testl $val, $mem", (TEST32rm GR32:$val, i32mem:$mem)>; -def : InstAlias<"testq $val, $mem", (TEST64rm GR64:$val, i64mem:$mem)>; +def : InstAlias<"test{b}\t{$val, $mem|$mem, $val}", (TEST8rm GR8 :$val, i8mem :$mem)>; +def : InstAlias<"test{w}\t{$val, $mem|$mem, $val}", (TEST16rm GR16:$val, i16mem:$mem)>; +def : InstAlias<"test{l}\t{$val, $mem|$mem, $val}", (TEST32rm GR32:$val, i32mem:$mem)>; +def : InstAlias<"test{q}\t{$val, $mem|$mem, $val}", (TEST64rm GR64:$val, i64mem:$mem)>; // xchg: We accept "xchgX , " and "xchgX , " as synonyms. -def : InstAlias<"xchgb $mem, $val", (XCHG8rm GR8 :$val, i8mem :$mem)>; -def : InstAlias<"xchgw $mem, $val", (XCHG16rm GR16:$val, i16mem:$mem)>; -def : InstAlias<"xchgl $mem, $val", (XCHG32rm GR32:$val, i32mem:$mem)>; -def : InstAlias<"xchgq $mem, $val", (XCHG64rm GR64:$val, i64mem:$mem)>; +def : InstAlias<"xchg{b}\t{$mem, $val|$val, $mem}", (XCHG8rm GR8 :$val, i8mem :$mem)>; +def : InstAlias<"xchg{w}\t{$mem, $val|$val, $mem}", (XCHG16rm GR16:$val, i16mem:$mem)>; +def : InstAlias<"xchg{l}\t{$mem, $val|$val, $mem}", (XCHG32rm GR32:$val, i32mem:$mem)>; +def : InstAlias<"xchg{q}\t{$mem, $val|$val, $mem}", (XCHG64rm GR64:$val, i64mem:$mem)>; // xchg: We accept "xchgX , %eax" and "xchgX %eax, " as synonyms. -def : InstAlias<"xchgw %ax, $src", (XCHG16ar GR16:$src)>; -def : InstAlias<"xchgl %eax, $src", (XCHG32ar GR32:$src)>, Requires<[In32BitMode]>; -def : InstAlias<"xchgl %eax, $src", (XCHG32ar64 GR32_NOAX:$src)>, Requires<[In64BitMode]>; -def : InstAlias<"xchgq %rax, $src", (XCHG64ar GR64:$src)>; +def : InstAlias<"xchg{w}\t{%ax, $src|$src, AX}", (XCHG16ar GR16:$src)>; +def : InstAlias<"xchg{l}\t{%eax, $src|$src, EAX}", (XCHG32ar GR32:$src)>, Requires<[In32BitMode]>; +def : InstAlias<"xchg{l}\t{%eax, $src|$src, EAX}", (XCHG32ar64 GR32_NOAX:$src)>, Requires<[In64BitMode]>; +def : InstAlias<"xchg{q}\t{%rax, $src|$src, RAX}", (XCHG64ar GR64:$src)>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 8fdcdd4..e14cb10 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5477,12 +5477,12 @@ def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", TB, Requires<[HasSSE3]>; } // SchedRW -def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>; -def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>; +def : InstAlias<"mwait\t{%eax, %ecx|ECX, EAX}", (MWAITrr)>, Requires<[In32BitMode]>; +def : InstAlias<"mwait\t{%rax, %rcx|RCX, RAX}", (MWAITrr)>, Requires<[In64BitMode]>; -def : InstAlias<"monitor %eax, %ecx, %edx", (MONITORrrr)>, +def : InstAlias<"monitor\t{%eax, %ecx, %edx|EDX, ECX, EAX}", (MONITORrrr)>, Requires<[In32BitMode]>; -def : InstAlias<"monitor %rax, %rcx, %rdx", (MONITORrrr)>, +def : InstAlias<"monitor\t{%rax, %rcx, %rdx|RDX, RCX, RAX}", (MONITORrrr)>, Requires<[In64BitMode]>; //===----------------------------------------------------------------------===// -- cgit v1.1 From e64863c8b5c852516191f5be5bfd2a55af843e3f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 22 Jul 2013 10:07:26 +0000 Subject: Fix typo. Change %cl to CL in Intel pattern. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186815 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrShiftRotate.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td index 89c1a689..59aa946 100644 --- a/lib/Target/X86/X86InstrShiftRotate.td +++ b/lib/Target/X86/X86InstrShiftRotate.td @@ -537,7 +537,7 @@ def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst), [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)], IIC_SR>; def ROL64mCL : RI<0xD3, MRM0m, (outs), (ins i64mem:$dst), - "rol{q}\t{%cl, $dst|$dst, %cl}", + "rol{q}\t{%cl, $dst|$dst, CL}", [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)], IIC_SR>; } -- cgit v1.1 From 1abb7bc7e917771a80cd7788ee37ba7dab98f183 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Mon, 22 Jul 2013 11:02:32 +0000 Subject: Revert "More Intel syntax alias fixes." This reverts commit r186813, which broke the bots. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186818 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.td | 56 +++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 28 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 3392a1a..53f95f0 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1979,22 +1979,22 @@ def : InstAlias<"clrl $reg", (XOR32rr GR32:$reg, GR32:$reg)>; def : InstAlias<"clrq $reg", (XOR64rr GR64:$reg, GR64:$reg)>; // div and idiv aliases for explicit A register. -def : InstAlias<"div{b}\t{$src, %al|AL, $src}", (DIV8r GR8 :$src)>; -def : InstAlias<"div{w}\t{$src, %ax|AX, $src}", (DIV16r GR16:$src)>; -def : InstAlias<"div{l}\t{$src, %eax|EAX, $src}", (DIV32r GR32:$src)>; -def : InstAlias<"div{q}\t{$src, %rax|RAX, $src}", (DIV64r GR64:$src)>; -def : InstAlias<"div{b}\t{$src, %al|AL, $src}", (DIV8m i8mem :$src)>; -def : InstAlias<"div{w}\t{$src, %ax|AX, $src}", (DIV16m i16mem:$src)>; -def : InstAlias<"div{l}\t{$src, %eax|EAX, $src}", (DIV32m i32mem:$src)>; -def : InstAlias<"div{q}\t{$src, %rax|RAX, $src}", (DIV64m i64mem:$src)>; -def : InstAlias<"idiv{b}\t{$src, %al|AL, $src}", (IDIV8r GR8 :$src)>; -def : InstAlias<"idiv{w}\t{$src, %ax|AX, $src}", (IDIV16r GR16:$src)>; -def : InstAlias<"idiv{l}\t{$src, %eax|EAX, $src}", (IDIV32r GR32:$src)>; -def : InstAlias<"idiv{q}\t{$src, %rax|RAX, $src}", (IDIV64r GR64:$src)>; -def : InstAlias<"idiv{b}\t{$src, %al|AL, $src}", (IDIV8m i8mem :$src)>; -def : InstAlias<"idiv{w}\t{$src, %ax|AX, $src}", (IDIV16m i16mem:$src)>; -def : InstAlias<"idiv{l}\t{$src, %eax|EAX, $src}", (IDIV32m i32mem:$src)>; -def : InstAlias<"idiv{q}\t{$src, %rax|RAX, $src}", (IDIV64m i64mem:$src)>; +def : InstAlias<"divb $src, %al", (DIV8r GR8 :$src)>; +def : InstAlias<"divw $src, %ax", (DIV16r GR16:$src)>; +def : InstAlias<"divl $src, %eax", (DIV32r GR32:$src)>; +def : InstAlias<"divq $src, %rax", (DIV64r GR64:$src)>; +def : InstAlias<"divb $src, %al", (DIV8m i8mem :$src)>; +def : InstAlias<"divw $src, %ax", (DIV16m i16mem:$src)>; +def : InstAlias<"divl $src, %eax", (DIV32m i32mem:$src)>; +def : InstAlias<"divq $src, %rax", (DIV64m i64mem:$src)>; +def : InstAlias<"idivb $src, %al", (IDIV8r GR8 :$src)>; +def : InstAlias<"idivw $src, %ax", (IDIV16r GR16:$src)>; +def : InstAlias<"idivl $src, %eax", (IDIV32r GR32:$src)>; +def : InstAlias<"idivq $src, %rax", (IDIV64r GR64:$src)>; +def : InstAlias<"idivb $src, %al", (IDIV8m i8mem :$src)>; +def : InstAlias<"idivw $src, %ax", (IDIV16m i16mem:$src)>; +def : InstAlias<"idivl $src, %eax", (IDIV32m i32mem:$src)>; +def : InstAlias<"idivq $src, %rax", (IDIV64m i64mem:$src)>; @@ -2076,12 +2076,12 @@ def : InstAlias<"imulq $imm, $r",(IMUL64rri32 GR64:$r, GR64:$r,i64i32imm:$imm)>; def : InstAlias<"imulq $imm, $r", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm)>; // inb %dx -> inb %al, %dx -def : InstAlias<"inb\t{%dx|DX}", (IN8rr)>; -def : InstAlias<"inw\t{%dx|DX}", (IN16rr)>; -def : InstAlias<"inl\t{%dx|DX}", (IN32rr)>; -def : InstAlias<"inb\t$port", (IN8ri i8imm:$port)>; -def : InstAlias<"inw\t$port", (IN16ri i8imm:$port)>; -def : InstAlias<"inl\t$port", (IN32ri i8imm:$port)>; +def : InstAlias<"inb %dx", (IN8rr)>; +def : InstAlias<"inw %dx", (IN16rr)>; +def : InstAlias<"inl %dx", (IN32rr)>; +def : InstAlias<"inb $port", (IN8ri i8imm:$port)>; +def : InstAlias<"inw $port", (IN16ri i8imm:$port)>; +def : InstAlias<"inl $port", (IN32ri i8imm:$port)>; // jmp and call aliases for lcall and ljmp. jmp $42,$5 -> ljmp @@ -2130,12 +2130,12 @@ def : InstAlias<"movzx $src, $dst", (MOVZX64rr16_Q GR64:$dst, GR16:$src), 0>; // Note: No GR32->GR64 movzx form. // outb %dx -> outb %al, %dx -def : InstAlias<"outb\t{%dx|DX}", (OUT8rr)>; -def : InstAlias<"outw\t{%dx|DX}", (OUT16rr)>; -def : InstAlias<"outl\t{%dx|DX}", (OUT32rr)>; -def : InstAlias<"outb\t$port", (OUT8ir i8imm:$port)>; -def : InstAlias<"outw\t$port", (OUT16ir i8imm:$port)>; -def : InstAlias<"outl\t$port", (OUT32ir i8imm:$port)>; +def : InstAlias<"outb %dx", (OUT8rr)>; +def : InstAlias<"outw %dx", (OUT16rr)>; +def : InstAlias<"outl %dx", (OUT32rr)>; +def : InstAlias<"outb $port", (OUT8ir i8imm:$port)>; +def : InstAlias<"outw $port", (OUT16ir i8imm:$port)>; +def : InstAlias<"outl $port", (OUT32ir i8imm:$port)>; // 'sldt ' can be encoded with either sldtw or sldtq with the same // effect (both store to a 16-bit mem). Force to sldtw to avoid ambiguity -- cgit v1.1 From 3a8ee4ffd783bd0cf2d83089edb43ec546b49d0d Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Mon, 22 Jul 2013 12:18:04 +0000 Subject: [NVPTX] Use approximate FP ops when unsafe-fp-math is used, and append .ftz to instructions if the nvptx-f32ftz attribute is set to "true" git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186820 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 67 ++++++++++++++++++++++++---------- lib/Target/NVPTX/NVPTXISelDAGToDAG.h | 26 +++---------- lib/Target/NVPTX/NVPTXInstrInfo.td | 24 +++++------- 3 files changed, 61 insertions(+), 56 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index b613587..ba85e35 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -25,11 +25,6 @@ using namespace llvm; -static cl::opt UseFMADInstruction( - "nvptx-mad-enable", cl::ZeroOrMore, - cl::desc("NVPTX Specific: Enable generating FMAD instructions"), - cl::init(false)); - static cl::opt FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore, cl::desc("NVPTX Specific: FMA contraction (0: don't do it" @@ -47,6 +42,12 @@ UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."), cl::init(true)); +static cl::opt +FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, + cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."), + cl::init(false)); + + /// createNVPTXISelDag - This pass converts a legalized DAG into a /// NVPTX-specific DAG, ready for instruction scheduling. FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM, @@ -58,12 +59,7 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, CodeGenOpt::Level OptLevel) : SelectionDAGISel(tm, OptLevel), Subtarget(tm.getSubtarget()) { - // Always do fma.f32 fpcontract if the target supports the instruction. - // Always do fma.f64 fpcontract if the target supports the instruction. - // Do mad.f32 is nvptx-mad-enable is specified and the target does not - // support fma.f32. - doFMADF32 = (OptLevel > 0) && UseFMADInstruction && !Subtarget.hasFMAF32(); doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1); doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1); doFMAF32AGG = @@ -71,20 +67,51 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, doFMAF64AGG = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2); - allowFMA = (FMAContractLevel >= 1) || UseFMADInstruction; - - UseF32FTZ = false; + allowFMA = (FMAContractLevel >= 1); doMulWide = (OptLevel > 0); +} - // Decide how to translate f32 div - do_DIVF32_PREC = UsePrecDivF32; - // Decide how to translate f32 sqrt - do_SQRTF32_PREC = UsePrecSqrtF32; - // sm less than sm_20 does not support div.rnd. Use div.full. - if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20()) - do_DIVF32_PREC = 1; +int NVPTXDAGToDAGISel::getDivF32Level() const { + if (UsePrecDivF32.getNumOccurrences() > 0) { + // If nvptx-prec-div32=N is used on the command-line, always honor it + return UsePrecDivF32; + } else { + // Otherwise, use div.approx if fast math is enabled + if (TM.Options.UnsafeFPMath) + return 0; + else + return 2; + } +} +bool NVPTXDAGToDAGISel::usePrecSqrtF32() const { + if (UsePrecSqrtF32.getNumOccurrences() > 0) { + // If nvptx-prec-sqrtf32 is used on the command-line, always honor it + return UsePrecSqrtF32; + } else { + // Otherwise, use sqrt.approx if fast math is enabled + if (TM.Options.UnsafeFPMath) + return false; + else + return true; + } +} + +bool NVPTXDAGToDAGISel::useF32FTZ() const { + if (FtzEnabled.getNumOccurrences() > 0) { + // If nvptx-f32ftz is used on the command-line, always honor it + return FtzEnabled; + } else { + const Function *F = MF->getFunction(); + // Otherwise, check for an nvptx-f32ftz attribute on the function + if (F->hasFnAttribute("nvptx-f32ftz")) + return (F->getAttributes().getAttribute(AttributeSet::FunctionIndex, + "nvptx-f32ftz") + .getValueAsString() == "true"); + else + return false; + } } /// Select - Select instructions not customized! Used for diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 428e7b2..d961e50 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -28,38 +28,22 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel { // If true, generate corresponding FPCONTRACT. This is // language dependent (i.e. CUDA and OpenCL works differently). - bool doFMADF32; bool doFMAF64; bool doFMAF32; bool doFMAF64AGG; bool doFMAF32AGG; bool allowFMA; - // 0: use div.approx - // 1: use div.full - // 2: For sm_20 and later, ieee-compliant div.rnd.f32 can be generated; - // Otherwise, use div.full - int do_DIVF32_PREC; - - // If true, generate sqrt.rn, else generate sqrt.approx. If FTZ - // is true, then generate the corresponding FTZ version. - bool do_SQRTF32_PREC; - - // If true, add .ftz to f32 instructions. - // This is only meaningful for sm_20 and later, as the default - // is not ftz. - // For sm earlier than sm_20, f32 denorms are always ftz by the - // hardware. - // We always add the .ftz modifier regardless of the sm value - // when Use32FTZ is true. - bool UseF32FTZ; - // If true, generate mul.wide from sext and mul bool doMulWide; + int getDivF32Level() const; + bool usePrecSqrtF32() const; + bool useF32FTZ() const; + public: explicit NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, - CodeGenOpt::Level OptLevel); + CodeGenOpt::Level OptLevel); // Pass Name virtual const char *getPassName() const { diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td index e6335a0..8ce16e9 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -136,28 +136,26 @@ def hasLDG : Predicate<"Subtarget.hasLDG()">; def hasLDU : Predicate<"Subtarget.hasLDU()">; def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">; -def doF32FTZ : Predicate<"UseF32FTZ==1">; -def doNoF32FTZ : Predicate<"UseF32FTZ==0">; +def doF32FTZ : Predicate<"useF32FTZ()">; +def doNoF32FTZ : Predicate<"!useF32FTZ()">; def doFMAF32 : Predicate<"doFMAF32">; -def doFMAF32_ftz : Predicate<"(doFMAF32 && UseF32FTZ)">; +def doFMAF32_ftz : Predicate<"(doFMAF32 && useF32FTZ())">; def doFMAF32AGG : Predicate<"doFMAF32AGG">; -def doFMAF32AGG_ftz : Predicate<"(doFMAF32AGG && UseF32FTZ)">; +def doFMAF32AGG_ftz : Predicate<"(doFMAF32AGG && useF32FTZ())">; def doFMAF64 : Predicate<"doFMAF64">; def doFMAF64AGG : Predicate<"doFMAF64AGG">; -def doFMADF32 : Predicate<"doFMADF32">; -def doFMADF32_ftz : Predicate<"(doFMADF32 && UseF32FTZ)">; def doMulWide : Predicate<"doMulWide">; def allowFMA : Predicate<"allowFMA">; -def allowFMA_ftz : Predicate<"(allowFMA && UseF32FTZ)">; +def allowFMA_ftz : Predicate<"(allowFMA && useF32FTZ())">; -def do_DIVF32_APPROX : Predicate<"do_DIVF32_PREC==0">; -def do_DIVF32_FULL : Predicate<"do_DIVF32_PREC==1">; +def do_DIVF32_APPROX : Predicate<"getDivF32Level()==0">; +def do_DIVF32_FULL : Predicate<"getDivF32Level()==1">; -def do_SQRTF32_APPROX : Predicate<"do_SQRTF32_PREC==0">; -def do_SQRTF32_RN : Predicate<"do_SQRTF32_PREC==1">; +def do_SQRTF32_APPROX : Predicate<"!usePrecSqrtF32()">; +def do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">; def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">; @@ -864,8 +862,6 @@ multiclass FPCONTRACT64 { // If we reverse the order of the following two lines, then rrr2 rule will be // generated for FMA32, but not for rrr. // Therefore, we manually write the rrr2 rule in FPCONTRACT32. -defm FMAD32_ftz : FPCONTRACT32<"mad.ftz.f32", doFMADF32_ftz>; -defm FMAD32 : FPCONTRACT32<"mad.f32", doFMADF32>; defm FMA32_ftz : FPCONTRACT32<"fma.rn.ftz.f32", doFMAF32_ftz>; defm FMA32 : FPCONTRACT32<"fma.rn.f32", doFMAF32>; defm FMA64 : FPCONTRACT64<"fma.rn.f64", doFMAF64>; @@ -904,8 +900,6 @@ multiclass FPCONTRACT64_SUB_PAT { defm FMAF32ext_ftz : FPCONTRACT32_SUB_PAT; defm FMAF32ext : FPCONTRACT32_SUB_PAT; -defm FMADF32ext_ftz : FPCONTRACT32_SUB_PAT_MAD; -defm FMADF32ext : FPCONTRACT32_SUB_PAT_MAD; defm FMAF64ext : FPCONTRACT64_SUB_PAT; def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), -- cgit v1.1 From 02265382929b0275d7b7b334eab5e2fd34e1b9fe Mon Sep 17 00:00:00 2001 From: Mihai Popa Date: Mon, 22 Jul 2013 15:49:36 +0000 Subject: This adds range checking for "ldr Rn, [pc, #imm]" Thumb instructions. With this patch: 1. ldr.n is recognized as mnemonic for the short encoding 2. ldr.w is recognized as menmonic for the long encoding 3. ldr will map to either short or long encodings depending on the size of the offset git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186831 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrThumb.td | 43 +++++++++++++-------------- lib/Target/ARM/ARMInstrThumb2.td | 2 +- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 40 +++++++++++++++++++++++++ lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp | 2 +- 4 files changed, 62 insertions(+), 25 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index a0edaba..e7218c6 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -100,6 +100,13 @@ class OperandUnsignedOffset_b8s2 : AsmOperandClass { def UnsignedOffset_b8s2 : OperandUnsignedOffset_b8s2; +// thumb style PC relative operand. signed, 8 bits magnitude, +// two bits shift. can be represented as either [pc, #imm], #imm, +// or relocatable expression... +def ThumbMemPC : AsmOperandClass { + let Name = "ThumbMemPC"; +} + let OperandType = "OPERAND_PCREL" in { def t_brtarget : Operand { let EncoderMethod = "getThumbBRTargetOpValue"; @@ -132,6 +139,15 @@ def t_blxtarget : Operand { let EncoderMethod = "getThumbBLXTargetOpValue"; let DecoderMethod = "DecodeThumbBLXOffset"; } + +// t_addrmode_pc :=