diff options
author | Justin Holewinski <justin.holewinski@gmail.com> | 2011-09-26 16:20:28 +0000 |
---|---|---|
committer | Justin Holewinski <justin.holewinski@gmail.com> | 2011-09-26 16:20:28 +0000 |
commit | c1d8fbd41ac98829ef83fdd83ff5954e0cf03bdf (patch) | |
tree | 78177080e70971eddb7f8982bcfa8fb384ebdcc4 | |
parent | 6b8990df42c3e9814cc60c3072f85b5a38bbb410 (diff) | |
download | external_llvm-c1d8fbd41ac98829ef83fdd83ff5954e0cf03bdf.zip external_llvm-c1d8fbd41ac98829ef83fdd83ff5954e0cf03bdf.tar.gz external_llvm-c1d8fbd41ac98829ef83fdd83ff5954e0cf03bdf.tar.bz2 |
PTX: Unify handling of loads/stores
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@140533 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/PTX/PTXAsmPrinter.cpp | 9 | ||||
-rw-r--r-- | lib/Target/PTX/PTXInstrInfo.td | 103 | ||||
-rw-r--r-- | lib/Target/PTX/PTXRegisterInfo.cpp | 2 | ||||
-rw-r--r-- | test/CodeGen/PTX/ld.ll | 65 | ||||
-rw-r--r-- | test/CodeGen/PTX/st.ll | 65 |
5 files changed, 38 insertions, 206 deletions
diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp index beabd77..d1b6653 100644 --- a/lib/Target/PTX/PTXAsmPrinter.cpp +++ b/lib/Target/PTX/PTXAsmPrinter.cpp @@ -68,6 +68,8 @@ public: const char *Modifier = 0); void printParamOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, const char *Modifier = 0); + void printLocalOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, + const char *Modifier = 0); void printReturnOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, const char *Modifier = 0); void printPredicateOperand(const MachineInstr *MI, raw_ostream &O); @@ -297,7 +299,7 @@ void PTXAsmPrinter::EmitFunctionBodyStart() { if (FrameInfo->getObjectSize(i) > 0) { std::string def = "\t.local .b"; def += utostr(FrameInfo->getObjectSize(i)*8); // Convert to bits - def += " __local_"; + def += " __local"; def += utostr(i); def += ";"; OutStreamer.EmitRawText(Twine(def)); @@ -458,6 +460,11 @@ void PTXAsmPrinter::printReturnOperand(const MachineInstr *MI, int opNum, OS << "__ret"; } +void PTXAsmPrinter::printLocalOperand(const MachineInstr *MI, int opNum, + raw_ostream &OS, const char *Modifier) { + OS << "__local" << MI->getOperand(opNum).getImm(); +} + void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { // Check to see if this is a special global used by LLVM, if so, emit it. if (EmitSpecialLLVMGlobal(gv)) diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td index 50499a5..6b18f13 100644 --- a/lib/Target/PTX/PTXInstrInfo.td +++ b/lib/Target/PTX/PTXInstrInfo.td @@ -147,6 +147,14 @@ def MEMri64 : Operand<i64> { let PrintMethod = "printMemOperand"; let MIOperandInfo = (ops RegI64, i64imm); } +def LOCALri32 : Operand<i32> { + let PrintMethod = "printLocalOperand"; + let MIOperandInfo = (ops RegI32, i32imm); +} +def LOCALri64 : Operand<i64> { + let PrintMethod = "printLocalOperand"; + let MIOperandInfo = (ops RegI64, i64imm); +} def MEMii32 : Operand<i32> { let PrintMethod = "printMemOperand"; let MIOperandInfo = (ops i32imm, i32imm); @@ -602,6 +610,21 @@ multiclass PTX_LD<string opstr, string typestr, Requires<[Use64BitAddresses]>; } +multiclass PTX_LOCAL_LD_ST<string typestr, RegisterClass RC> { + def LDri32 : InstPTX<(outs RC:$d), (ins LOCALri32:$a), + !strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (load_local ADDRlocal32:$a))]>; + def LDri64 : InstPTX<(outs RC:$d), (ins LOCALri64:$a), + !strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (load_local ADDRlocal64:$a))]>; + def STri32 : InstPTX<(outs), (ins RC:$d, LOCALri32:$a), + !strconcat("st.local", !strconcat(typestr, "\t[$a], $d")), + [(store_local RC:$d, ADDRlocal32:$a)]>; + def STri64 : InstPTX<(outs), (ins RC:$d, LOCALri64:$a), + !strconcat("st.local", !strconcat(typestr, "\t[$a], $d")), + [(store_local RC:$d, ADDRlocal64:$a)]>; +} + multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> { defm u16 : PTX_LD<opstr, ".u16", RegI16, pat_load>; defm u32 : PTX_LD<opstr, ".u32", RegI32, pat_load>; @@ -960,86 +983,18 @@ let hasSideEffects = 1 in { [(pat_store RC:$d, ADDRri64:$a)]>, Requires<[Use64BitAddresses]>; */ -let hasSideEffects = 1 in { - def LDLOCALpiPred : InstPTX<(outs RegPred:$d), (ins MEMri32:$a), - "ld.local.pred\t$d, [__local_$a]", - [(set RegPred:$d, (load_local ADDRlocal32:$a))]>; - def LDLOCALpiU16 : InstPTX<(outs RegI16:$d), (ins MEMri32:$a), - "ld.local.u16\t$d, [__local_$a]", - [(set RegI16:$d, (load_local ADDRlocal32:$a))]>; - def LDLOCALpiU32 : InstPTX<(outs RegI32:$d), (ins MEMri32:$a), - "ld.local.u32\t$d, [__local_$a]", - [(set RegI32:$d, (load_local ADDRlocal32:$a))]>; - def LDLOCALpiU64 : InstPTX<(outs RegI64:$d), (ins MEMri32:$a), - "ld.local.u64\t$d, [__local_$a]", - [(set RegI64:$d, (load_local ADDRlocal32:$a))]>; - def LDLOCALpiF32 : InstPTX<(outs RegF32:$d), (ins MEMri32:$a), - "ld.local.f32\t$d, [__local_$a]", - [(set RegF32:$d, (load_local ADDRlocal32:$a))]>; - def LDLOCALpiF64 : InstPTX<(outs RegF64:$d), (ins MEMri32:$a), - "ld.local.f64\t$d, [__local_$a]", - [(set RegF64:$d, (load_local ADDRlocal32:$a))]>; - - def STLOCALpiPred : InstPTX<(outs), (ins RegPred:$d, MEMri32:$a), - "st.local.pred\t[__local_$a], $d", - [(store_local RegPred:$d, ADDRlocal32:$a)]>; - def STLOCALpiU16 : InstPTX<(outs), (ins RegI16:$d, MEMri32:$a), - "st.local.u16\t[__local_$a], $d", - [(store_local RegI16:$d, ADDRlocal32:$a)]>; - def STLOCALpiU32 : InstPTX<(outs), (ins RegI32:$d, MEMri32:$a), - "st.local.u32\t[__local_$a], $d", - [(store_local RegI32:$d, ADDRlocal32:$a)]>; - def STLOCALpiU64 : InstPTX<(outs), (ins RegI64:$d, MEMri32:$a), - "st.local.u64\t[__local_$a], $d", - [(store_local RegI64:$d, ADDRlocal32:$a)]>; - def STLOCALpiF32 : InstPTX<(outs), (ins RegF32:$d, MEMri32:$a), - "st.local.f32\t[__local_$a], $d", - [(store_local RegF32:$d, ADDRlocal32:$a)]>; - def STLOCALpiF64 : InstPTX<(outs), (ins RegF64:$d, MEMri32:$a), - "st.local.f64\t[__local_$a], $d", - [(store_local RegF64:$d, ADDRlocal32:$a)]>; - - /*def LDLOCALpiU16 : InstPTX<(outs RegI16:$d), (ins MEMpi:$a), - "ld.param.u16\t$d, [$a]", - [(set RegI16:$d, (PTXloadparam timm:$a))]>; - def LDLOCALpiU32 : InstPTX<(outs RegI32:$d), (ins MEMpi:$a), - "ld.param.u32\t$d, [$a]", - [(set RegI32:$d, (PTXloadparam timm:$a))]>; - def LDLOCALpiU64 : InstPTX<(outs RegI64:$d), (ins MEMpi:$a), - "ld.param.u64\t$d, [$a]", - [(set RegI64:$d, (PTXloadparam timm:$a))]>; - def LDLOCALpiF32 : InstPTX<(outs RegF32:$d), (ins MEMpi:$a), - "ld.param.f32\t$d, [$a]", - [(set RegF32:$d, (PTXloadparam timm:$a))]>; - def LDLOCALpiF64 : InstPTX<(outs RegF64:$d), (ins MEMpi:$a), - "ld.param.f64\t$d, [$a]", - [(set RegF64:$d, (PTXloadparam timm:$a))]>; - - def STLOCALpiPred : InstPTX<(outs), (ins MEMpi:$d, RegPred:$a), - "st.param.pred\t[$d], $a", - [(PTXstoreparam timm:$d, RegPred:$a)]>; - def STLOCALpiU16 : InstPTX<(outs), (ins MEMpi:$d, RegI16:$a), - "st.param.u16\t[$d], $a", - [(PTXstoreparam timm:$d, RegI16:$a)]>; - def STLOCALpiU32 : InstPTX<(outs), (ins MEMpi:$d, RegI32:$a), - "st.param.u32\t[$d], $a", - [(PTXstoreparam timm:$d, RegI32:$a)]>; - def STLOCALpiU64 : InstPTX<(outs), (ins MEMpi:$d, RegI64:$a), - "st.param.u64\t[$d], $a", - [(PTXstoreparam timm:$d, RegI64:$a)]>; - def STLOCALpiF32 : InstPTX<(outs), (ins MEMpi:$d, RegF32:$a), - "st.param.f32\t[$d], $a", - [(PTXstoreparam timm:$d, RegF32:$a)]>; - def STLOCALpiF64 : InstPTX<(outs), (ins MEMpi:$d, RegF64:$a), - "st.param.f64\t[$d], $a", - [(PTXstoreparam timm:$d, RegF64:$a)]>;*/ -} // Stores defm STg : PTX_ST_ALL<"st.global", store_global>; //defm STl : PTX_ST_ALL<"st.local", store_local>; defm STs : PTX_ST_ALL<"st.shared", store_shared>; +defm LOCALPRED : PTX_LOCAL_LD_ST<".pred", RegPred>; +defm LOCALU16 : PTX_LOCAL_LD_ST<".u16", RegI16>; +defm LOCALU32 : PTX_LOCAL_LD_ST<".u32", RegI32>; +defm LOCALU64 : PTX_LOCAL_LD_ST<".u64", RegI64>; +defm LOCALF32 : PTX_LOCAL_LD_ST<".f32", RegF32>; +defm LOCALF64 : PTX_LOCAL_LD_ST<".f64", RegF64>; // defm STp : PTX_ST_ALL<"st.param", store_parameter>; diff --git a/lib/Target/PTX/PTXRegisterInfo.cpp b/lib/Target/PTX/PTXRegisterInfo.cpp index 6f2e876..acc74f3 100644 --- a/lib/Target/PTX/PTXRegisterInfo.cpp +++ b/lib/Target/PTX/PTXRegisterInfo.cpp @@ -65,5 +65,5 @@ void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // This frame index is post stack slot re-use assignments //MI.getOperand(Index).ChangeToRegister(Reg, false); - MI.getOperand(Index).ChangeToImmediate(0); + MI.getOperand(Index).ChangeToImmediate(FrameIndex); } diff --git a/test/CodeGen/PTX/ld.ll b/test/CodeGen/PTX/ld.ll index 95941dc..81fd33a 100644 --- a/test/CodeGen/PTX/ld.ll +++ b/test/CodeGen/PTX/ld.ll @@ -6,9 +6,6 @@ ;CHECK: .extern .const .b8 array_constant_i16[20]; @array_constant_i16 = external addrspace(1) constant [10 x i16] -;CHECK: .extern .local .b8 array_local_i16[20]; -@array_local_i16 = external addrspace(2) global [10 x i16] - ;CHECK: .extern .shared .b8 array_shared_i16[20]; @array_shared_i16 = external addrspace(4) global [10 x i16] @@ -18,9 +15,6 @@ ;CHECK: .extern .const .b8 array_constant_i32[40]; @array_constant_i32 = external addrspace(1) constant [10 x i32] -;CHECK: .extern .local .b8 array_local_i32[40]; -@array_local_i32 = external addrspace(2) global [10 x i32] - ;CHECK: .extern .shared .b8 array_shared_i32[40]; @array_shared_i32 = external addrspace(4) global [10 x i32] @@ -30,9 +24,6 @@ ;CHECK: .extern .const .b8 array_constant_i64[80]; @array_constant_i64 = external addrspace(1) constant [10 x i64] -;CHECK: .extern .local .b8 array_local_i64[80]; -@array_local_i64 = external addrspace(2) global [10 x i64] - ;CHECK: .extern .shared .b8 array_shared_i64[80]; @array_shared_i64 = external addrspace(4) global [10 x i64] @@ -42,9 +33,6 @@ ;CHECK: .extern .const .b8 array_constant_float[40]; @array_constant_float = external addrspace(1) constant [10 x float] -;CHECK: .extern .local .b8 array_local_float[40]; -@array_local_float = external addrspace(2) global [10 x float] - ;CHECK: .extern .shared .b8 array_shared_float[40]; @array_shared_float = external addrspace(4) global [10 x float] @@ -54,9 +42,6 @@ ;CHECK: .extern .const .b8 array_constant_double[80]; @array_constant_double = external addrspace(1) constant [10 x double] -;CHECK: .extern .local .b8 array_local_double[80]; -@array_local_double = external addrspace(2) global [10 x double] - ;CHECK: .extern .shared .b8 array_shared_double[80]; @array_shared_double = external addrspace(4) global [10 x double] @@ -296,56 +281,6 @@ entry: ret double %x } -define ptx_device i16 @t4_local_u16() { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_local_i16; -;CHECK: ld.local.u16 %ret{{[0-9]+}}, [%r[[R0]]]; -;CHECK: ret; - %i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0 - %x = load i16 addrspace(2)* %i - ret i16 %x -} - -define ptx_device i32 @t4_local_u32() { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_local_i32; -;CHECK: ld.local.u32 %ret{{[0-9]+}}, [%r[[R0]]]; -;CHECK: ret; - %i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0 - %x = load i32 addrspace(2)* %i - ret i32 %x -} - -define ptx_device i64 @t4_local_u64() { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_local_i64; -;CHECK: ld.local.u64 %ret{{[0-9]+}}, [%r[[R0]]]; -;CHECK: ret; - %i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0 - %x = load i64 addrspace(2)* %i - ret i64 %x -} - -define ptx_device float @t4_local_f32() { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_local_float; -;CHECK: ld.local.f32 %ret{{[0-9]+}}, [%r[[R0]]]; -;CHECK: ret; - %i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0 - %x = load float addrspace(2)* %i - ret float %x -} - -define ptx_device double @t4_local_f64() { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_local_double; -;CHECK: ld.local.f64 %ret{{[0-9]+}}, [%r[[R0]]]; -;CHECK: ret; - %i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0 - %x = load double addrspace(2)* %i - ret double %x -} - define ptx_device i16 @t4_shared_u16() { entry: ;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i16; diff --git a/test/CodeGen/PTX/st.ll b/test/CodeGen/PTX/st.ll index beff0cf..63ef58c 100644 --- a/test/CodeGen/PTX/st.ll +++ b/test/CodeGen/PTX/st.ll @@ -6,9 +6,6 @@ ;CHECK: .extern .const .b8 array_constant_i16[20]; @array_constant_i16 = external addrspace(1) constant [10 x i16] -;CHECK: .extern .local .b8 array_local_i16[20]; -@array_local_i16 = external addrspace(2) global [10 x i16] - ;CHECK: .extern .shared .b8 array_shared_i16[20]; @array_shared_i16 = external addrspace(4) global [10 x i16] @@ -18,9 +15,6 @@ ;CHECK: .extern .const .b8 array_constant_i32[40]; @array_constant_i32 = external addrspace(1) constant [10 x i32] -;CHECK: .extern .local .b8 array_local_i32[40]; -@array_local_i32 = external addrspace(2) global [10 x i32] - ;CHECK: .extern .shared .b8 array_shared_i32[40]; @array_shared_i32 = external addrspace(4) global [10 x i32] @@ -30,9 +24,6 @@ ;CHECK: .extern .const .b8 array_constant_i64[80]; @array_constant_i64 = external addrspace(1) constant [10 x i64] -;CHECK: .extern .local .b8 array_local_i64[80]; -@array_local_i64 = external addrspace(2) global [10 x i64] - ;CHECK: .extern .shared .b8 array_shared_i64[80]; @array_shared_i64 = external addrspace(4) global [10 x i64] @@ -42,9 +33,6 @@ ;CHECK: .extern .const .b8 array_constant_float[40]; @array_constant_float = external addrspace(1) constant [10 x float] -;CHECK: .extern .local .b8 array_local_float[40]; -@array_local_float = external addrspace(2) global [10 x float] - ;CHECK: .extern .shared .b8 array_shared_float[40]; @array_shared_float = external addrspace(4) global [10 x float] @@ -54,9 +42,6 @@ ;CHECK: .extern .const .b8 array_constant_double[80]; @array_constant_double = external addrspace(1) constant [10 x double] -;CHECK: .extern .local .b8 array_local_double[80]; -@array_local_double = external addrspace(2) global [10 x double] - ;CHECK: .extern .shared .b8 array_shared_double[80]; @array_shared_double = external addrspace(4) global [10 x double] @@ -251,56 +236,6 @@ entry: ret void } -define ptx_device void @t4_local_u16(i16 %x) { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_local_i16; -;CHECK: st.local.u16 [%r[[R0]]], %rh{{[0-9]+}}; -;CHECK: ret; - %i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0 - store i16 %x, i16 addrspace(2)* %i - ret void -} - -define ptx_device void @t4_local_u32(i32 %x) { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_local_i32; -;CHECK: st.local.u32 [%r[[R0]]], %r{{[0-9]+}}; -;CHECK: ret; - %i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0 - store i32 %x, i32 addrspace(2)* %i - ret void -} - -define ptx_device void @t4_local_u64(i64 %x) { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_local_i64; -;CHECK: st.local.u64 [%r[[R0]]], %rd{{[0-9]+}}; -;CHECK: ret; - %i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0 - store i64 %x, i64 addrspace(2)* %i - ret void -} - -define ptx_device void @t4_local_f32(float %x) { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_local_float; -;CHECK: st.local.f32 [%r[[R0]]], %f{{[0-9]+}}; -;CHECK: ret; - %i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0 - store float %x, float addrspace(2)* %i - ret void -} - -define ptx_device void @t4_local_f64(double %x) { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_local_double; -;CHECK: st.local.f64 [%r[[R0]]], %fd{{[0-9]+}}; -;CHECK: ret; - %i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0 - store double %x, double addrspace(2)* %i - ret void -} - define ptx_device void @t4_shared_u16(i16 %x) { entry: ;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i16; |