diff options
Diffstat (limited to 'lib/Target')
-rw-r--r-- | lib/Target/R600/AMDGPUInstructions.td | 20 | ||||
-rw-r--r-- | lib/Target/R600/R600ISelLowering.cpp | 29 | ||||
-rw-r--r-- | lib/Target/R600/R600Instructions.td | 12 | ||||
-rw-r--r-- | lib/Target/R600/SIInstructions.td | 14 |
4 files changed, 59 insertions, 16 deletions
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 3227f94..dec6082 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -96,6 +96,10 @@ def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8; }]>; +def az_extloadi8_global : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{ + return isGlobalLoad(dyn_cast<LoadSDNode>(N)); +}]>; + def sextloadi8_global : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{ return isGlobalLoad(dyn_cast<LoadSDNode>(N)); }]>; @@ -108,8 +112,12 @@ def sextloadi8_constant : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{ return isConstantLoad(dyn_cast<LoadSDNode>(N), -1); }]>; -def az_extloadi8_global : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{ - return isGlobalLoad(dyn_cast<LoadSDNode>(N)); +def az_extloadi8_local : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{ + return isLocalLoad(dyn_cast<LoadSDNode>(N)); +}]>; + +def sextloadi8_local : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{ + return isLocalLoad(dyn_cast<LoadSDNode>(N)); }]>; def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ @@ -132,6 +140,14 @@ def sextloadi16_constant : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{ return isConstantLoad(dyn_cast<LoadSDNode>(N), -1); }]>; +def az_extloadi16_local : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{ + return isLocalLoad(dyn_cast<LoadSDNode>(N)); +}]>; + +def sextloadi16_local : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{ + return isLocalLoad(dyn_cast<LoadSDNode>(N)); +}]>; + def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32; }]>; diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index e846ff4..9bc8e8a 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -104,7 +104,21 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo()); switch (MI->getOpcode()) { - default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); + default: + if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::LDS_1A) { + MachineInstrBuilder NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), + TII->get(MI->getOpcode()), + AMDGPU::OQAP); + for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) { + NewMI.addOperand(MI->getOperand(i)); + } + TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV, + MI->getOperand(0).getReg(), + AMDGPU::OQAP); + } else { + return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); + } + break; case AMDGPU::CLAMP_R600: { MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV, @@ -140,19 +154,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( break; } - case AMDGPU::LDS_READ_RET: { - MachineInstrBuilder NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), - TII->get(MI->getOpcode()), - AMDGPU::OQAP); - for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) { - NewMI.addOperand(MI->getOperand(i)); - } - TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV, - MI->getOperand(0).getReg(), - AMDGPU::OQAP); - break; - } - case AMDGPU::MOV_IMM_F32: TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(), MI->getOperand(1).getFPImm()->getValueAPF() diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 3d92278..f5c0266 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1682,6 +1682,18 @@ def LDS_SHORT_WRITE : R600_LDS_1A1D<0x13, "LDS_SHORT_WRITE", def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET", [(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))] >; +def LDS_BYTE_READ_RET : R600_LDS_1A <0x36, "LDS_BYTE_READ_RET", + [(set i32:$dst, (sextloadi8_local i32:$src0))] +>; +def LDS_UBYTE_READ_RET : R600_LDS_1A <0x37, "LDS_UBYTE_READ_RET", + [(set i32:$dst, (az_extloadi8_local i32:$src0))] +>; +def LDS_SHORT_READ_RET : R600_LDS_1A <0x38, "LDS_SHORT_READ_RET", + [(set i32:$dst, (sextloadi16_local i32:$src0))] +>; +def LDS_USHORT_READ_RET : R600_LDS_1A <0x39, "LDS_USHORT_READ_RET", + [(set i32:$dst, (az_extloadi16_local i32:$src0))] +>; // TRUNC is used for the FLT_TO_INT instructions to work around a // perceived problem where the rounding modes are applied differently diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 785dbf1..136f69c 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -395,6 +395,10 @@ def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>; def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>; def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>; def DS_READ_B32 : DS_Load_Helper <0x00000036, "DS_READ_B32", VReg_32>; +def DS_READ_I8 : DS_Load_Helper <0x00000039, "DS_READ_I8", VReg_32>; +def DS_READ_U8 : DS_Load_Helper <0x0000003a, "DS_READ_U8", VReg_32>; +def DS_READ_I16 : DS_Load_Helper <0x0000003b, "DS_READ_I16", VReg_32>; +def DS_READ_U16 : DS_Load_Helper <0x0000003c, "DS_READ_U16", VReg_32>; //def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>; //def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>; @@ -1747,6 +1751,16 @@ def : Pat < /********** Load/Store Patterns **********/ /********** ======================= **********/ +class DSReadPat <DS inst, ValueType vt, PatFrag frag> : Pat < + (frag i32:$src0), + (vt (inst 0, $src0, $src0, $src0, 0, 0)) +>; + +def : DSReadPat <DS_READ_I8, i32, sextloadi8_local>; +def : DSReadPat <DS_READ_U8, i32, az_extloadi8_local>; +def : DSReadPat <DS_READ_I16, i32, sextloadi16_local>; +def : DSReadPat <DS_READ_U16, i32, az_extloadi16_local>; +def : DSReadPat <DS_READ_B32, i32, local_load>; def : Pat < (local_load i32:$src0), (i32 (DS_READ_B32 0, $src0, $src0, $src0, 0, 0)) |