diff options
-rw-r--r-- | lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp | 17 | ||||
-rw-r--r-- | lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h | 1 | ||||
-rw-r--r-- | lib/Target/R600/SIInstructions.td | 4 | ||||
-rw-r--r-- | test/CodeGen/R600/wait.ll | 37 |
4 files changed, 58 insertions, 1 deletions
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp index fac3c39..a777802 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp @@ -255,4 +255,21 @@ void AMDGPUInstPrinter::printKCache(const MCInst *MI, unsigned OpNo, } } +void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + // Note: Mask values are taken from SIInsertWaits.cpp and not from ISA docs + // SIInsertWaits.cpp bits usage does not match ISA docs description but it + // works so it might be a misprint in docs. + unsigned SImm16 = MI->getOperand(OpNo).getImm(); + unsigned Vmcnt = SImm16 & 0xF; + unsigned Expcnt = (SImm16 >> 4) & 0xF; + unsigned Lgkmcnt = (SImm16 >> 8) & 0xF; + if (Vmcnt != 0xF) + O << "vmcnt(" << Vmcnt << ") "; + if (Expcnt != 0x7) + O << "expcnt(" << Expcnt << ") "; + if (Lgkmcnt != 0x7) + O << "lgkmcnt(" << Lgkmcnt << ")"; +} + #include "AMDGPUGenAsmWriter.inc" diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h index 4c1dfa6..3524b30 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h @@ -52,6 +52,7 @@ private: void printRSel(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printCT(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printKCache(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printWaitFlag(const MCInst *MI, unsigned OpNo, raw_ostream &O); }; } // End namespace llvm diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 8294fbd..94d42d5 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -25,6 +25,8 @@ def InterpSlot : Operand<i32> { def isSI : Predicate<"Subtarget.getGeneration() " "== AMDGPUSubtarget::SOUTHERN_ISLANDS">; +def WAIT_FLAG : InstFlag<"printWaitFlag">; + let Predicates = [isSI] in { let neverHasSideEffects = 1 in { @@ -815,7 +817,7 @@ def S_BARRIER : SOPP <0x0000000a, (ins), "S_BARRIER", let mayStore = 1; } -def S_WAITCNT : SOPP <0x0000000c, (ins i32imm:$simm16), "S_WAITCNT $simm16", +def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "S_WAITCNT $simm16", [] >; } // End hasSideEffects diff --git a/test/CodeGen/R600/wait.ll b/test/CodeGen/R600/wait.ll new file mode 100644 index 0000000..2cf88fe --- /dev/null +++ b/test/CodeGen/R600/wait.ll @@ -0,0 +1,37 @@ +; RUN: llc < %s -march=r600 -mcpu=SI --verify-machineinstrs | FileCheck %s + +;CHECK-LABEL: @main +;CHECK: S_WAITCNT lgkmcnt(0) +;CHECK: S_WAITCNT vmcnt(0) +;CHECK: S_WAITCNT expcnt(0) lgkmcnt(0) + +define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 { +main_body: + %10 = getelementptr <16 x i8> addrspace(2)* %3, i32 0 + %11 = load <16 x i8> addrspace(2)* %10, !tbaa !0 + %12 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %11, i32 0, i32 %6) + %13 = extractelement <4 x float> %12, i32 0 + %14 = extractelement <4 x float> %12, i32 1 + %15 = extractelement <4 x float> %12, i32 2 + %16 = extractelement <4 x float> %12, i32 3 + %17 = getelementptr <16 x i8> addrspace(2)* %3, i32 1 + %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 + %19 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %6) + %20 = extractelement <4 x float> %19, i32 0 + %21 = extractelement <4 x float> %19, i32 1 + %22 = extractelement <4 x float> %19, i32 2 + %23 = extractelement <4 x float> %19, i32 3 + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %20, float %21, float %22, float %23) + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %13, float %14, float %15, float %16) + ret void +} + +; Function Attrs: nounwind readnone +declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) + +attributes #0 = { "ShaderType"="1" } +attributes #1 = { nounwind readnone } + +!0 = metadata !{metadata !"const", null, i32 1} |