diff options
author | Christian Konig <christian.koenig@amd.com> | 2013-03-27 09:12:59 +0000 |
---|---|---|
committer | Christian Konig <christian.koenig@amd.com> | 2013-03-27 09:12:59 +0000 |
commit | e49230895d9c666b84beaa748259fbf1f6715122 (patch) | |
tree | b300cd1efed44d08c7e9fd3bbf93e1da1bb0130e /lib | |
parent | 45b14e341a8a85e877d001bbd43f5e2b25b61cb8 (diff) | |
download | external_llvm-e49230895d9c666b84beaa748259fbf1f6715122.zip external_llvm-e49230895d9c666b84beaa748259fbf1f6715122.tar.gz external_llvm-e49230895d9c666b84beaa748259fbf1f6715122.tar.bz2 |
R600/SI: add cummuting of rev instructions
Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Tested-by: Michel Dänzer <michel.daenzer@amd.com>
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178127 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/R600/SIISelLowering.cpp | 22 | ||||
-rw-r--r-- | lib/Target/R600/SIInstrInfo.cpp | 22 | ||||
-rw-r--r-- | lib/Target/R600/SIInstrInfo.h | 4 | ||||
-rw-r--r-- | lib/Target/R600/SIInstrInfo.td | 46 | ||||
-rw-r--r-- | lib/Target/R600/SIInstructions.td | 27 |
5 files changed, 87 insertions, 34 deletions
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 0940455..6f0c307 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -544,6 +544,13 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, unsigned NumDefs = Desc->getNumDefs(); unsigned NumOps = Desc->getNumOperands(); + // Commuted opcode if available + int OpcodeRev = Desc->isCommutable() ? TII->commuteOpcode(Opcode) : -1; + const MCInstrDesc *DescRev = OpcodeRev == -1 ? 0 : &TII->get(OpcodeRev); + + assert(!DescRev || DescRev->getNumDefs() == NumDefs); + assert(!DescRev || DescRev->getNumOperands() == NumOps); + // e64 version if available, -1 otherwise int OpcodeE64 = AMDGPU::getVOPe64(Opcode); const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? 0 : &TII->get(OpcodeE64); @@ -605,8 +612,7 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, continue; } - if (i == 1 && Desc->isCommutable() && - fitsRegClass(DAG, Ops[0], RegClass)) { + if (i == 1 && DescRev && fitsRegClass(DAG, Ops[0], RegClass)) { unsigned OtherRegClass = Desc->OpInfo[NumDefs].RegClass; assert(isVSrc(OtherRegClass) || isSSrc(OtherRegClass)); @@ -620,6 +626,9 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, SDValue Tmp = Ops[1]; Ops[1] = Ops[0]; Ops[0] = Tmp; + + Desc = DescRev; + DescRev = 0; continue; } } @@ -655,10 +664,7 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, for (unsigned i = NumOps - NumDefs, e = Node->getNumOperands(); i < e; ++i) Ops.push_back(Node->getOperand(i)); - // Either create a complete new or update the current instruction - if (Promote2e64) - return DAG.getMachineNode(OpcodeE64, Node->getDebugLoc(), - Node->getVTList(), Ops.data(), Ops.size()); - else - return DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size()); + // Create a complete new instruction + return DAG.getMachineNode(Desc->Opcode, Node->getDebugLoc(), + Node->getVTList(), Ops.data(), Ops.size()); } diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index b9b25b5..0bfcef5 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -158,6 +158,21 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, } } +unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const { + + int NewOpc; + + // Try to map original to commuted opcode + if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1) + return NewOpc; + + // Try to map commuted to original opcode + if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1) + return NewOpc; + + return Opcode; +} + MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { @@ -165,7 +180,12 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, !MI->getOperand(2).isReg()) return 0; - return TargetInstrInfo::commuteInstruction(MI, NewMI); + MI = TargetInstrInfo::commuteInstruction(MI, NewMI); + + if (MI) + MI->setDesc(get(commuteOpcode(MI->getOpcode()))); + + return MI; } MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg, diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index 5789af5..d4e60e5 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -35,6 +35,8 @@ public: unsigned DestReg, unsigned SrcReg, bool KillSrc) const; + unsigned commuteOpcode(unsigned Opcode) const; + virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI=false) const; @@ -76,6 +78,8 @@ public: namespace AMDGPU { int getVOPe64(uint16_t Opcode); + int getCommuteRev(uint16_t Opcode); + int getCommuteOrig(uint16_t Opcode); } // End namespace AMDGPU diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 7af1a87..617f0b8 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -138,6 +138,11 @@ class VOP <string opName> { string OpName = opName; } +class VOP2_REV <string revOp, bit isOrig> { + string RevOp = revOp; + bit IsOrig = isOrig; +} + multiclass VOP1_Helper <bits<8> op, RegisterClass drc, RegisterClass src, string opName, list<dag> pattern> { @@ -166,11 +171,11 @@ multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern> : VOP1_Helper <op, VReg_64, VSrc_64, opName, pattern>; multiclass VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc, - string opName, list<dag> pattern> { + string opName, list<dag> pattern, string revOp> { def _e32 : VOP2 < op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1), opName#"_e32 $dst, $src0, $src1", pattern - >, VOP <opName>; + >, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>; def _e64 : VOP3 < {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, @@ -179,23 +184,26 @@ multiclass VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc, i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg), opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", [] - >, VOP <opName> { + >, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> { let SRC2 = SIOperand.ZERO; } } -multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern> - : VOP2_Helper <op, VReg_32, VSrc_32, opName, pattern>; +multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern, + string revOp = opName> + : VOP2_Helper <op, VReg_32, VSrc_32, opName, pattern, revOp>; -multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern> - : VOP2_Helper <op, VReg_64, VSrc_64, opName, pattern>; +multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern, + string revOp = opName> + : VOP2_Helper <op, VReg_64, VSrc_64, opName, pattern, revOp>; -multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern> { +multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern, + string revOp = opName> { def _e32 : VOP2 < op, (outs VReg_32:$dst), (ins VSrc_32:$src0, VReg_32:$src1), opName#"_e32 $dst, $src0, $src1", pattern - >, VOP <opName>; + >, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>; def _e64 : VOP3b < {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, @@ -204,7 +212,7 @@ multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern> { i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg), opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", [] - >, VOP <opName> { + >, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> { let SRC2 = SIOperand.ZERO; /* the VOP2 variant puts the carry out into VCC, the VOP3 variant can write it into any SGPR. We currently don't use the carry out, @@ -327,4 +335,22 @@ def getVOPe64 : InstrMapping { let ValueCols = [["8"]]; } +// Maps an original opcode to its commuted version +def getCommuteRev : InstrMapping { + let FilterClass = "VOP2_REV"; + let RowFields = ["RevOp"]; + let ColFields = ["IsOrig"]; + let KeyCol = ["1"]; + let ValueCols = [["0"]]; +} + +// Maps an commuted opcode to its original version +def getCommuteOrig : InstrMapping { + let FilterClass = "VOP2_REV"; + let RowFields = ["RevOp"]; + let ColFields = ["IsOrig"]; + let KeyCol = ["0"]; + let ValueCols = [["1"]]; +} + include "SIInstructions.td" diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index b480521..40aa0e2 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -804,13 +804,13 @@ let isCommutable = 1 in { defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32", [(set VReg_32:$dst, (fadd VSrc_32:$src0, VReg_32:$src1))] >; -} // End isCommutable = 1 defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32", [(set VReg_32:$dst, (fsub VSrc_32:$src0, VReg_32:$src1))] >; +defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", [], "V_SUB_F32">; +} // End isCommutable = 1 -defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", []>; defm V_MAC_LEGACY_F32 : VOP2_32 <0x00000006, "V_MAC_LEGACY_F32", []>; let isCommutable = 1 in { @@ -848,22 +848,20 @@ defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>; defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>; defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>; -} // End isCommutable = 1 - defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", [(set VReg_32:$dst, (srl VSrc_32:$src0, (i32 VReg_32:$src1)))] >; -defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", []>; +defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", [], "V_LSHR_B32">; + defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", [(set VReg_32:$dst, (sra VSrc_32:$src0, (i32 VReg_32:$src1)))] >; -defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", []>; +defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">; + defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", [(set VReg_32:$dst, (shl VSrc_32:$src0, (i32 VReg_32:$src1)))] >; -defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", []>; - -let isCommutable = 1 in { +defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">; defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32", [(set VReg_32:$dst, (and VSrc_32:$src0, VReg_32:$src1))] @@ -884,25 +882,24 @@ defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>; //defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>; //defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>; //defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>; -let Defs = [VCC] in { // Carry-out goes to VCC -let isCommutable = 1 in { +let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", [(set VReg_32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))] >; -} // End isCommutable = 1 defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", [(set VReg_32:$dst, (sub (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))] >; +defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], "V_SUB_I32">; -defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", []>; let Uses = [VCC] in { // Carry-out comes from VCC defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", []>; defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", []>; -defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", []>; +defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], "V_SUBB_U32">; } // End Uses = [VCC] -} // End Defs = [VCC] +} // End isCommutable = 1, Defs = [VCC] + defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>; ////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>; ////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>; |