diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2013-08-26 15:05:49 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2013-08-26 15:05:49 +0000 |
commit | 8e78012457682d335ee97cf2859dfe03b7e2ae93 (patch) | |
tree | 5eff0980c06803d29bb1687fe9fb04a64952ddbb /lib/Target/R600 | |
parent | 7a0282daeb214f14d75249cc2d90302c44586c4e (diff) | |
download | external_llvm-8e78012457682d335ee97cf2859dfe03b7e2ae93.zip external_llvm-8e78012457682d335ee97cf2859dfe03b7e2ae93.tar.gz external_llvm-8e78012457682d335ee97cf2859dfe03b7e2ae93.tar.bz2 |
R600: Add support for i8 and i16 local memory stores
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189223 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/R600')
-rw-r--r-- | lib/Target/R600/AMDGPUISelLowering.cpp | 4 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUInstrInfo.td | 7 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUInstructions.td | 20 | ||||
-rw-r--r-- | lib/Target/R600/R600Defines.h | 3 | ||||
-rw-r--r-- | lib/Target/R600/R600InstrFormats.td | 2 | ||||
-rw-r--r-- | lib/Target/R600/R600InstrInfo.cpp | 3 | ||||
-rw-r--r-- | lib/Target/R600/R600Instructions.td | 24 | ||||
-rw-r--r-- | lib/Target/R600/SIInstructions.td | 12 |
8 files changed, 60 insertions, 15 deletions
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 9df835f..88867b6 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -495,9 +495,9 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op, Store->getBasePtr(), DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), PtrVT)); - Chains.push_back(DAG.getStore(Store->getChain(), SL, Val, Ptr, + Chains.push_back(DAG.getTruncStore(Store->getChain(), SL, Val, Ptr, MachinePointerInfo(Store->getMemOperand()->getValue()), - Store->isVolatile(), Store->isNonTemporal(), + MemEltVT, Store->isVolatile(), Store->isNonTemporal(), Store->getAlignment())); } return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, &Chains[0], NumElts); diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td index c61993a..c0d757e 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.td +++ b/lib/Target/R600/AMDGPUInstrInfo.td @@ -73,6 +73,13 @@ def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE", SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>, [SDNPHasChain, SDNPMayStore]>; +// MSKOR instructions are atomic memory instructions used mainly for storing +// 8-bit and 16-bit values. The definition is: +// +// MSKOR(dst, mask, src) MEM[dst] = ((MEM[dst] & ~mask) | src) +// +// src0: vec4(src, 0, 0, mask) +// src1: dst - rat offset (aka pointer) in dwords def AMDGPUstore_mskor : SDNode<"AMDGPUISD::STORE_MSKOR", SDTypeProfile<0, 2, []>, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index df0bade..3227f94 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -156,13 +156,23 @@ def truncstorei16_global : PatFrag<(ops node:$val, node:$ptr), return isGlobalStore(dyn_cast<StoreSDNode>(N)); }]>; -def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return isLocalLoad(dyn_cast<LoadSDNode>(N)); -}]>; - def local_store : PatFrag<(ops node:$val, node:$ptr), (store node:$val, node:$ptr), [{ - return isLocalStore(dyn_cast<StoreSDNode>(N)); + return isLocalStore(dyn_cast<StoreSDNode>(N)); +}]>; + +def truncstorei8_local : PatFrag<(ops node:$val, node:$ptr), + (truncstorei8 node:$val, node:$ptr), [{ + return isLocalStore(dyn_cast<StoreSDNode>(N)); +}]>; + +def truncstorei16_local : PatFrag<(ops node:$val, node:$ptr), + (truncstorei16 node:$val, node:$ptr), [{ + return isLocalStore(dyn_cast<StoreSDNode>(N)); +}]>; + +def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return isLocalLoad(dyn_cast<LoadSDNode>(N)); }]>; def mskor_global : PatFrag<(ops node:$val, node:$ptr), diff --git a/lib/Target/R600/R600Defines.h b/lib/Target/R600/R600Defines.h index 8dc9ebb..1781f2a 100644 --- a/lib/Target/R600/R600Defines.h +++ b/lib/Target/R600/R600Defines.h @@ -45,7 +45,8 @@ namespace R600_InstFlag { ALU_INST = (1 << 14), LDS_1A = (1 << 15), LDS_1A1D = (1 << 16), - IS_EXPORT = (1 << 17) + IS_EXPORT = (1 << 17), + LDS_1A2D = (1 << 18) }; } diff --git a/lib/Target/R600/R600InstrFormats.td b/lib/Target/R600/R600InstrFormats.td index 2ae3311..ae3046d 100644 --- a/lib/Target/R600/R600InstrFormats.td +++ b/lib/Target/R600/R600InstrFormats.td @@ -30,6 +30,7 @@ class InstR600 <dag outs, dag ins, string asm, list<dag> pattern, bit TEXInst = 0; bit ALUInst = 0; bit IsExport = 0; + bit LDS_1A2D = 0; let Namespace = "AMDGPU"; let OutOperandList = outs; @@ -55,6 +56,7 @@ class InstR600 <dag outs, dag ins, string asm, list<dag> pattern, let TSFlags{15} = LDS_1A; let TSFlags{16} = LDS_1A1D; let TSFlags{17} = IsExport; + let TSFlags{18} = LDS_1A2D; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 9548a34..4e0607f 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -149,7 +149,8 @@ bool R600InstrInfo::isLDSInstr(unsigned Opcode) const { unsigned TargetFlags = get(Opcode).TSFlags; return ((TargetFlags & R600_InstFlag::LDS_1A) | - (TargetFlags & R600_InstFlag::LDS_1A1D)); + (TargetFlags & R600_InstFlag::LDS_1A1D) | + (TargetFlags & R600_InstFlag::LDS_1A2D)); } bool R600InstrInfo::isTransOnly(unsigned Opcode) const { diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index b059a81..3d92278 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1657,13 +1657,31 @@ class R600_LDS_1A1D <bits<6> lds_op, string name, list<dag> pattern> : let LDS_1A1D = 1; } -def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET", - [(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))] ->; +class R600_LDS_1A2D <bits<6> lds_op, string name, list<dag> pattern> : + R600_LDS < + lds_op, + (outs), + (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel, + R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel, + R600_Reg32:$src2, REL:$src2_rel, SEL:$src2_sel, + LAST:$last, R600_Pred:$pred_sel, BANK_SWIZZLE:$bank_swizzle), + " "#name# "$last $src0$src0_rel, $src1$src1_rel, $src2$src2_rel, $pred_sel", + pattern> { + let LDS_1A2D = 1; +} def LDS_WRITE : R600_LDS_1A1D <0xD, "LDS_WRITE", [(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)] >; +def LDS_BYTE_WRITE : R600_LDS_1A1D<0x12, "LDS_BYTE_WRITE", + [(truncstorei8_local i32:$src1, i32:$src0)] +>; +def LDS_SHORT_WRITE : R600_LDS_1A1D<0x13, "LDS_SHORT_WRITE", + [(truncstorei16_local i32:$src1, i32:$src0)] +>; +def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET", + [(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))] +>; // TRUNC is used for the FLT_TO_INT instructions to work around a // perceived problem where the rounding modes are applied differently diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 8c52a2e..785dbf1 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -392,6 +392,8 @@ defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">; } // End isCompare = 1 def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>; +def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>; +def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>; def DS_READ_B32 : DS_Load_Helper <0x00000036, "DS_READ_B32", VReg_32>; //def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>; @@ -1750,11 +1752,15 @@ def : Pat < (i32 (DS_READ_B32 0, $src0, $src0, $src0, 0, 0)) >; -def : Pat < - (local_store i32:$src1, i32:$src0), - (DS_WRITE_B32 0, $src0, $src1, $src1, 0, 0) +class DSWritePat <DS inst, ValueType vt, PatFrag frag> : Pat < + (frag i32:$src1, i32:$src0), + (inst 0, $src0, $src1, $src1, 0, 0) >; +def : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>; +def : DSWritePat <DS_WRITE_B16, i32, truncstorei16_local>; +def : DSWritePat <DS_WRITE_B32, i32, local_store>; + /********** ================== **********/ /********** SMRD Patterns **********/ /********** ================== **********/ |