summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nouveau/codegen/nv50_ir.h
diff options
context:
space:
mode:
authorJohannes Obermayr <johannesobermayr@gmx.de>2013-08-20 20:14:00 +0200
committerChristoph Bumiller <e0425955@student.tuwien.ac.at>2013-09-11 21:47:07 +0200
commit5eb7ff1175a644ffe3b0f1a75cb235400355f9fb (patch)
tree613342591e12a96725df715853a5e579ba1ec8ea /src/gallium/drivers/nouveau/codegen/nv50_ir.h
parentebcdaa7bbc3a10fe59447ae77b508ee85eaa582f (diff)
downloadexternal_mesa3d-5eb7ff1175a644ffe3b0f1a75cb235400355f9fb.zip
external_mesa3d-5eb7ff1175a644ffe3b0f1a75cb235400355f9fb.tar.gz
external_mesa3d-5eb7ff1175a644ffe3b0f1a75cb235400355f9fb.tar.bz2
Move nv30, nv50 and nvc0 to nouveau.
It is planned to ship openSUSE 13.1 with -shared libs. nouveau.la, nv30.la, nv50.la and nvc0.la are currently LIBADDs in all nouveau related targets. This change makes it possible to easily build one shared libnouveau.so which is then LIBADDed. Also dlopen will be faster for one library instead of three and build time on -jX will be reduced. Whitespace fixes were requested by 'git am'. Signed-off-by: Johannes Obermayr <johannesobermayr@gmx.de> Acked-by: Christoph Bumiller <christoph.bumiller@speed.at> Acked-by: Ian Romanick <ian.d.romanick@intel.com>
Diffstat (limited to 'src/gallium/drivers/nouveau/codegen/nv50_ir.h')
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir.h1197
1 files changed, 1197 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
new file mode 100644
index 0000000..68c76e5
--- /dev/null
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -0,0 +1,1197 @@
+/*
+ * Copyright 2011 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NV50_IR_H__
+#define __NV50_IR_H__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <deque>
+#include <list>
+#include <vector>
+
+#include "codegen/nv50_ir_util.h"
+#include "codegen/nv50_ir_graph.h"
+
+#include "codegen/nv50_ir_driver.h"
+
+namespace nv50_ir {
+
+enum operation
+{
+ OP_NOP = 0,
+ OP_PHI,
+ OP_UNION, // unify a new definition and several source values
+ OP_SPLIT, // $r0d -> { $r0, $r1 } ($r0d and $r0/$r1 will be coalesced)
+ OP_MERGE, // opposite of split, e.g. combine 2 32 bit into a 64 bit value
+ OP_CONSTRAINT, // copy values into consecutive registers
+ OP_MOV, // simple copy, no modifiers allowed
+ OP_LOAD,
+ OP_STORE,
+ OP_ADD, // NOTE: add u64 + u32 is legal for targets w/o 64-bit integer adds
+ OP_SUB,
+ OP_MUL,
+ OP_DIV,
+ OP_MOD,
+ OP_MAD,
+ OP_FMA,
+ OP_SAD, // abs(src0 - src1) + src2
+ OP_ABS,
+ OP_NEG,
+ OP_NOT,
+ OP_AND,
+ OP_OR,
+ OP_XOR,
+ OP_SHL,
+ OP_SHR,
+ OP_MAX,
+ OP_MIN,
+ OP_SAT, // CLAMP(f32, 0.0, 1.0)
+ OP_CEIL,
+ OP_FLOOR,
+ OP_TRUNC,
+ OP_CVT,
+ OP_SET_AND, // dst = (src0 CMP src1) & src2
+ OP_SET_OR,
+ OP_SET_XOR,
+ OP_SET,
+ OP_SELP, // dst = src2 ? src0 : src1
+ OP_SLCT, // dst = (src2 CMP 0) ? src0 : src1
+ OP_RCP,
+ OP_RSQ,
+ OP_LG2,
+ OP_SIN,
+ OP_COS,
+ OP_EX2,
+ OP_EXP, // exponential (base M_E)
+ OP_LOG, // natural logarithm
+ OP_PRESIN,
+ OP_PREEX2,
+ OP_SQRT,
+ OP_POW,
+ OP_BRA,
+ OP_CALL,
+ OP_RET,
+ OP_CONT,
+ OP_BREAK,
+ OP_PRERET,
+ OP_PRECONT,
+ OP_PREBREAK,
+ OP_BRKPT, // breakpoint (not related to loops)
+ OP_JOINAT, // push control flow convergence point
+ OP_JOIN, // converge
+ OP_DISCARD,
+ OP_EXIT,
+ OP_MEMBAR, // memory barrier (mfence, lfence, sfence)
+ OP_VFETCH, // indirection 0 in attribute space, indirection 1 is vertex base
+ OP_PFETCH, // fetch base address of vertex src0 (immediate) [+ src1]
+ OP_EXPORT,
+ OP_LINTERP,
+ OP_PINTERP,
+ OP_EMIT, // emit vertex
+ OP_RESTART, // restart primitive
+ OP_TEX,
+ OP_TXB, // texture bias
+ OP_TXL, // texure lod
+ OP_TXF, // texel fetch
+ OP_TXQ, // texture size query
+ OP_TXD, // texture derivatives
+ OP_TXG, // texture gather
+ OP_TEXCSAA, // texture op for coverage sampling
+ OP_TEXPREP, // turn cube map array into 2d array coordinates
+ OP_SULDB, // surface load (raw)
+ OP_SULDP, // surface load (formatted)
+ OP_SUSTB, // surface store (raw)
+ OP_SUSTP, // surface store (formatted)
+ OP_SUREDB,
+ OP_SUREDP, // surface reduction (atomic op)
+ OP_SULEA, // surface load effective address
+ OP_SUBFM, // surface bitfield manipulation
+ OP_SUCLAMP, // clamp surface coordinates
+ OP_SUEAU, // surface effective address
+ OP_MADSP, // special integer multiply-add
+ OP_TEXBAR, // texture dependency barrier
+ OP_DFDX,
+ OP_DFDY,
+ OP_RDSV, // read system value
+ OP_WRSV, // write system value
+ OP_QUADOP,
+ OP_QUADON,
+ OP_QUADPOP,
+ OP_POPCNT, // bitcount(src0 & src1)
+ OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7]
+ OP_EXTBF, // place bits [K,K+N) of src0 into dst, src1 = 0xNNKK
+ OP_PERMT, // dst = bytes from src2,src0 selected by src1 (nvc0's src order)
+ OP_ATOM,
+ OP_BAR, // execution barrier, sources = { id, thread count, predicate }
+ OP_VADD, // byte/word vector operations
+ OP_VAVG,
+ OP_VMIN,
+ OP_VMAX,
+ OP_VSAD,
+ OP_VSET,
+ OP_VSHR,
+ OP_VSHL,
+ OP_VSEL,
+ OP_CCTL, // cache control
+ OP_LAST
+};
+
+// various instruction-specific modifier definitions Instruction::subOp
+// MOV_FINAL marks a MOV originating from an EXPORT (used for placing TEXBARs)
+#define NV50_IR_SUBOP_MUL_HIGH 1
+#define NV50_IR_SUBOP_EMIT_RESTART 1
+#define NV50_IR_SUBOP_LDC_IL 1
+#define NV50_IR_SUBOP_LDC_IS 2
+#define NV50_IR_SUBOP_LDC_ISL 3
+#define NV50_IR_SUBOP_SHIFT_WRAP 1
+#define NV50_IR_SUBOP_EMU_PRERET 1
+#define NV50_IR_SUBOP_TEXBAR(n) n
+#define NV50_IR_SUBOP_MOV_FINAL 1
+#define NV50_IR_SUBOP_EXTBF_REV 1
+#define NV50_IR_SUBOP_PERMT_F4E 1
+#define NV50_IR_SUBOP_PERMT_B4E 2
+#define NV50_IR_SUBOP_PERMT_RC8 3
+#define NV50_IR_SUBOP_PERMT_ECL 4
+#define NV50_IR_SUBOP_PERMT_ECR 5
+#define NV50_IR_SUBOP_PERMT_RC16 6
+#define NV50_IR_SUBOP_BAR_SYNC 0
+#define NV50_IR_SUBOP_BAR_ARRIVE 1
+#define NV50_IR_SUBOP_BAR_RED_AND 2
+#define NV50_IR_SUBOP_BAR_RED_OR 3
+#define NV50_IR_SUBOP_BAR_RED_POPC 4
+#define NV50_IR_SUBOP_MEMBAR_L 1
+#define NV50_IR_SUBOP_MEMBAR_S 2
+#define NV50_IR_SUBOP_MEMBAR_M 3
+#define NV50_IR_SUBOP_MEMBAR_CTA (0 << 2)
+#define NV50_IR_SUBOP_MEMBAR_GL (1 << 2)
+#define NV50_IR_SUBOP_MEMBAR_SYS (2 << 2)
+#define NV50_IR_SUBOP_MEMBAR_DIR(m) ((m) & 0x3)
+#define NV50_IR_SUBOP_MEMBAR_SCOPE(m) ((m) & ~0x3)
+#define NV50_IR_SUBOP_MEMBAR(d,s) \
+ (NV50_IR_SUBOP_MEMBAR_##d | NV50_IR_SUBOP_MEMBAR_##s)
+#define NV50_IR_SUBOP_ATOM_ADD 0
+#define NV50_IR_SUBOP_ATOM_MIN 1
+#define NV50_IR_SUBOP_ATOM_MAX 2
+#define NV50_IR_SUBOP_ATOM_INC 3
+#define NV50_IR_SUBOP_ATOM_DEC 4
+#define NV50_IR_SUBOP_ATOM_AND 5
+#define NV50_IR_SUBOP_ATOM_OR 6
+#define NV50_IR_SUBOP_ATOM_XOR 7
+#define NV50_IR_SUBOP_ATOM_CAS 8
+#define NV50_IR_SUBOP_ATOM_EXCH 9
+#define NV50_IR_SUBOP_CCTL_IV 5
+#define NV50_IR_SUBOP_CCTL_IVALL 6
+#define NV50_IR_SUBOP_SUST_IGN 0
+#define NV50_IR_SUBOP_SUST_TRAP 1
+#define NV50_IR_SUBOP_SUST_SDCL 3
+#define NV50_IR_SUBOP_SULD_ZERO 0
+#define NV50_IR_SUBOP_SULD_TRAP 1
+#define NV50_IR_SUBOP_SULD_SDCL 3
+#define NV50_IR_SUBOP_SUBFM_3D 1
+#define NV50_IR_SUBOP_SUCLAMP_2D 0x10
+#define NV50_IR_SUBOP_SUCLAMP_SD(r, d) (( 0 + (r)) | ((d == 2) ? 0x10 : 0))
+#define NV50_IR_SUBOP_SUCLAMP_PL(r, d) (( 5 + (r)) | ((d == 2) ? 0x10 : 0))
+#define NV50_IR_SUBOP_SUCLAMP_BL(r, d) ((10 + (r)) | ((d == 2) ? 0x10 : 0))
+#define NV50_IR_SUBOP_MADSP_SD 0xffff
+// Yes, we could represent those with DataType.
+// Or put the type into operation and have a couple 1000 values in that enum.
+// This will have to do for now.
+// The bitfields are supposed to correspond to nve4 ISA.
+#define NV50_IR_SUBOP_MADSP(a,b,c) (((c) << 8) | ((b) << 4) | (a))
+#define NV50_IR_SUBOP_V1(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x0000)
+#define NV50_IR_SUBOP_V2(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x4000)
+#define NV50_IR_SUBOP_V4(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x8000)
+#define NV50_IR_SUBOP_Vn(n) ((n) >> 14)
+
+enum DataType
+{
+ TYPE_NONE,
+ TYPE_U8,
+ TYPE_S8,
+ TYPE_U16,
+ TYPE_S16,
+ TYPE_U32,
+ TYPE_S32,
+ TYPE_U64, // 64 bit operations are only lowered after register allocation
+ TYPE_S64,
+ TYPE_F16,
+ TYPE_F32,
+ TYPE_F64,
+ TYPE_B96,
+ TYPE_B128
+};
+
+enum CondCode
+{
+ CC_FL = 0,
+ CC_NEVER = CC_FL, // when used with FILE_FLAGS
+ CC_LT = 1,
+ CC_EQ = 2,
+ CC_NOT_P = CC_EQ, // when used with FILE_PREDICATE
+ CC_LE = 3,
+ CC_GT = 4,
+ CC_NE = 5,
+ CC_P = CC_NE,
+ CC_GE = 6,
+ CC_TR = 7,
+ CC_ALWAYS = CC_TR,
+ CC_U = 8,
+ CC_LTU = 9,
+ CC_EQU = 10,
+ CC_LEU = 11,
+ CC_GTU = 12,
+ CC_NEU = 13,
+ CC_GEU = 14,
+ CC_NO = 0x10,
+ CC_NC = 0x11,
+ CC_NS = 0x12,
+ CC_NA = 0x13,
+ CC_A = 0x14,
+ CC_S = 0x15,
+ CC_C = 0x16,
+ CC_O = 0x17
+};
+
+enum RoundMode
+{
+ ROUND_N, // nearest
+ ROUND_M, // towards -inf
+ ROUND_Z, // towards 0
+ ROUND_P, // towards +inf
+ ROUND_NI, // nearest integer
+ ROUND_MI, // to integer towards -inf
+ ROUND_ZI, // to integer towards 0
+ ROUND_PI, // to integer towards +inf
+};
+
+enum CacheMode
+{
+ CACHE_CA, // cache at all levels
+ CACHE_WB = CACHE_CA, // cache write back
+ CACHE_CG, // cache at global level
+ CACHE_CS, // cache streaming
+ CACHE_CV, // cache as volatile
+ CACHE_WT = CACHE_CV // cache write-through
+};
+
+enum DataFile
+{
+ FILE_NULL = 0,
+ FILE_GPR,
+ FILE_PREDICATE, // boolean predicate
+ FILE_FLAGS, // zero/sign/carry/overflow bits
+ FILE_ADDRESS,
+ LAST_REGISTER_FILE = FILE_ADDRESS,
+ FILE_IMMEDIATE,
+ FILE_MEMORY_CONST,
+ FILE_SHADER_INPUT,
+ FILE_SHADER_OUTPUT,
+ FILE_MEMORY_GLOBAL,
+ FILE_MEMORY_SHARED,
+ FILE_MEMORY_LOCAL,
+ FILE_SYSTEM_VALUE,
+ DATA_FILE_COUNT
+};
+
+enum TexTarget
+{
+ TEX_TARGET_1D,
+ TEX_TARGET_2D,
+ TEX_TARGET_2D_MS,
+ TEX_TARGET_3D,
+ TEX_TARGET_CUBE,
+ TEX_TARGET_1D_SHADOW,
+ TEX_TARGET_2D_SHADOW,
+ TEX_TARGET_CUBE_SHADOW,
+ TEX_TARGET_1D_ARRAY,
+ TEX_TARGET_2D_ARRAY,
+ TEX_TARGET_2D_MS_ARRAY,
+ TEX_TARGET_CUBE_ARRAY,
+ TEX_TARGET_1D_ARRAY_SHADOW,
+ TEX_TARGET_2D_ARRAY_SHADOW,
+ TEX_TARGET_RECT,
+ TEX_TARGET_RECT_SHADOW,
+ TEX_TARGET_CUBE_ARRAY_SHADOW,
+ TEX_TARGET_BUFFER,
+ TEX_TARGET_COUNT
+};
+
+enum SVSemantic
+{
+ SV_POSITION, // WPOS
+ SV_VERTEX_ID,
+ SV_INSTANCE_ID,
+ SV_INVOCATION_ID,
+ SV_PRIMITIVE_ID,
+ SV_VERTEX_COUNT, // gl_PatchVerticesIn
+ SV_LAYER,
+ SV_VIEWPORT_INDEX,
+ SV_YDIR,
+ SV_FACE,
+ SV_POINT_SIZE,
+ SV_POINT_COORD,
+ SV_CLIP_DISTANCE,
+ SV_SAMPLE_INDEX,
+ SV_TESS_FACTOR,
+ SV_TESS_COORD,
+ SV_TID,
+ SV_CTAID,
+ SV_NTID,
+ SV_GRIDID,
+ SV_NCTAID,
+ SV_LANEID,
+ SV_PHYSID,
+ SV_NPHYSID,
+ SV_CLOCK,
+ SV_LBASE,
+ SV_SBASE,
+ SV_UNDEFINED,
+ SV_LAST
+};
+
+class Program;
+class Function;
+class BasicBlock;
+
+class Target;
+
+class Instruction;
+class CmpInstruction;
+class TexInstruction;
+class FlowInstruction;
+
+class Value;
+class LValue;
+class Symbol;
+class ImmediateValue;
+
+struct Storage
+{
+ DataFile file;
+ int8_t fileIndex; // signed, may be indirect for CONST[]
+ uint8_t size; // this should match the Instruction type's size
+ DataType type; // mainly for pretty printing
+ union {
+ uint64_t u64; // immediate values
+ uint32_t u32;
+ uint16_t u16;
+ uint8_t u8;
+ int64_t s64;
+ int32_t s32;
+ int16_t s16;
+ int8_t s8;
+ float f32;
+ double f64;
+ int32_t offset; // offset from 0 (base of address space)
+ int32_t id; // register id (< 0 if virtual/unassigned, in units <= 4)
+ struct {
+ SVSemantic sv;
+ int index;
+ } sv;
+ } data;
+};
+
+// precedence: NOT after SAT after NEG after ABS
+#define NV50_IR_MOD_ABS (1 << 0)
+#define NV50_IR_MOD_NEG (1 << 1)
+#define NV50_IR_MOD_SAT (1 << 2)
+#define NV50_IR_MOD_NOT (1 << 3)
+#define NV50_IR_MOD_NEG_ABS (NV50_IR_MOD_NEG | NV50_IR_MOD_ABS)
+
+#define NV50_IR_INTERP_MODE_MASK 0x3
+#define NV50_IR_INTERP_LINEAR (0 << 0)
+#define NV50_IR_INTERP_PERSPECTIVE (1 << 0)
+#define NV50_IR_INTERP_FLAT (2 << 0)
+#define NV50_IR_INTERP_SC (3 << 0) // what exactly is that ?
+#define NV50_IR_INTERP_SAMPLE_MASK 0xc
+#define NV50_IR_INTERP_DEFAULT (0 << 2)
+#define NV50_IR_INTERP_CENTROID (1 << 2)
+#define NV50_IR_INTERP_OFFSET (2 << 2)
+#define NV50_IR_INTERP_SAMPLEID (3 << 2)
+
+// do we really want this to be a class ?
+class Modifier
+{
+public:
+ Modifier() : bits(0) { }
+ Modifier(unsigned int m) : bits(m) { }
+ Modifier(operation op);
+
+ // @return new Modifier applying a after b (asserts if unrepresentable)
+ Modifier operator*(const Modifier) const;
+ Modifier operator*=(const Modifier m) { *this = *this * m; return *this; }
+ Modifier operator==(const Modifier m) const { return m.bits == bits; }
+ Modifier operator!=(const Modifier m) const { return m.bits != bits; }
+
+ inline Modifier operator&(const Modifier m) const { return bits & m.bits; }
+ inline Modifier operator|(const Modifier m) const { return bits | m.bits; }
+ inline Modifier operator^(const Modifier m) const { return bits ^ m.bits; }
+
+ operation getOp() const;
+
+ inline int neg() const { return (bits & NV50_IR_MOD_NEG) ? 1 : 0; }
+ inline int abs() const { return (bits & NV50_IR_MOD_ABS) ? 1 : 0; }
+
+ inline operator bool() const { return bits ? true : false; }
+
+ void applyTo(ImmediateValue &imm) const;
+
+ int print(char *buf, size_t size) const;
+
+private:
+ uint8_t bits;
+};
+
+class ValueRef
+{
+public:
+ ValueRef(Value * = NULL);
+ ValueRef(const ValueRef&);
+ ~ValueRef();
+
+ inline bool exists() const { return value != NULL; }
+
+ void set(Value *);
+ void set(const ValueRef&);
+ inline Value *get() const { return value; }
+ inline Value *rep() const;
+
+ inline Instruction *getInsn() const { return insn; }
+ inline void setInsn(Instruction *inst) { insn = inst; }
+
+ inline bool isIndirect(int dim) const { return indirect[dim] >= 0; }
+ inline const ValueRef *getIndirect(int dim) const;
+
+ inline DataFile getFile() const;
+ inline unsigned getSize() const;
+
+ // SSA: return eventual (traverse MOVs) literal value, if it exists
+ bool getImmediate(ImmediateValue&) const;
+
+public:
+ Modifier mod;
+ int8_t indirect[2]; // >= 0 if relative to lvalue in insn->src(indirect[i])
+ uint8_t swizzle;
+
+ bool usedAsPtr; // for printing
+
+private:
+ Value *value;
+ Instruction *insn;
+};
+
+class ValueDef
+{
+public:
+ ValueDef(Value * = NULL);
+ ValueDef(const ValueDef&);
+ ~ValueDef();
+
+ inline bool exists() const { return value != NULL; }
+
+ inline Value *get() const { return value; }
+ inline Value *rep() const;
+ void set(Value *);
+ bool mayReplace(const ValueRef &);
+ void replace(const ValueRef &, bool doSet); // replace all uses of the old value
+
+ inline Instruction *getInsn() const { return insn; }
+ inline void setInsn(Instruction *inst) { insn = inst; }
+
+ inline DataFile getFile() const;
+ inline unsigned getSize() const;
+
+ inline void setSSA(LValue *);
+ inline const LValue *preSSA() const;
+
+private:
+ Value *value; // should make this LValue * ...
+ LValue *origin; // pre SSA value
+ Instruction *insn;
+};
+
+class Value
+{
+public:
+ Value();
+ virtual ~Value() { }
+
+ virtual Value *clone(ClonePolicy<Function>&) const = 0;
+
+ virtual int print(char *, size_t, DataType ty = TYPE_NONE) const = 0;
+
+ virtual bool equals(const Value *, bool strict = false) const;
+ virtual bool interfers(const Value *) const;
+ virtual bool isUniform() const { return true; }
+
+ inline Value *rep() const { return join; }
+
+ inline Instruction *getUniqueInsn() const;
+ inline Instruction *getInsn() const; // use when uniqueness is certain
+
+ inline int refCount() { return uses.size(); }
+
+ inline LValue *asLValue();
+ inline Symbol *asSym();
+ inline ImmediateValue *asImm();
+ inline const Symbol *asSym() const;
+ inline const ImmediateValue *asImm() const;
+
+ inline bool inFile(DataFile f) { return reg.file == f; }
+
+ static inline Value *get(Iterator&);
+
+ std::list<ValueRef *> uses;
+ std::list<ValueDef *> defs;
+ typedef std::list<ValueRef *>::iterator UseIterator;
+ typedef std::list<ValueRef *>::const_iterator UseCIterator;
+ typedef std::list<ValueDef *>::iterator DefIterator;
+ typedef std::list<ValueDef *>::const_iterator DefCIterator;
+
+ int id;
+ Storage reg;
+
+ // TODO: these should be in LValue:
+ Interval livei;
+ Value *join;
+};
+
+class LValue : public Value
+{
+public:
+ LValue(Function *, DataFile file);
+ LValue(Function *, LValue *);
+ ~LValue() { }
+
+ virtual bool isUniform() const;
+
+ virtual LValue *clone(ClonePolicy<Function>&) const;
+
+ virtual int print(char *, size_t, DataType ty = TYPE_NONE) const;
+
+public:
+ unsigned compMask : 8; // compound/component mask
+ unsigned compound : 1; // used by RA, value involved in split/merge
+ unsigned ssa : 1;
+ unsigned fixedReg : 1; // set & used by RA, earlier just use (id < 0)
+ unsigned noSpill : 1; // do not spill (e.g. if spill temporary already)
+};
+
+class Symbol : public Value
+{
+public:
+ Symbol(Program *, DataFile file = FILE_MEMORY_CONST, ubyte fileIdx = 0);
+ ~Symbol() { }
+
+ virtual Symbol *clone(ClonePolicy<Function>&) const;
+
+ virtual bool equals(const Value *that, bool strict) const;
+
+ virtual bool isUniform() const;
+
+ virtual int print(char *, size_t, DataType ty = TYPE_NONE) const;
+
+ // print with indirect values
+ int print(char *, size_t, Value *, Value *, DataType ty = TYPE_NONE) const;
+
+ inline void setFile(DataFile file, ubyte fileIndex = 0)
+ {
+ reg.file = file;
+ reg.fileIndex = fileIndex;
+ }
+
+ inline void setOffset(int32_t offset);
+ inline void setAddress(Symbol *base, int32_t offset);
+ inline void setSV(SVSemantic sv, uint32_t idx = 0);
+
+ inline const Symbol *getBase() const { return baseSym; }
+
+private:
+ Symbol *baseSym; // array base for Symbols representing array elements
+};
+
+class ImmediateValue : public Value
+{
+public:
+ ImmediateValue() { }
+ ImmediateValue(Program *, uint32_t);
+ ImmediateValue(Program *, float);
+ ImmediateValue(Program *, double);
+ // NOTE: not added to program with
+ ImmediateValue(const ImmediateValue *, DataType ty);
+ ~ImmediateValue() { };
+
+ virtual ImmediateValue *clone(ClonePolicy<Function>&) const;
+
+ virtual bool equals(const Value *that, bool strict) const;
+
+ // these only work if 'type' is valid (we mostly use untyped literals):
+ bool isInteger(const int ival) const; // ival is cast to this' type
+ bool isNegative() const;
+ bool isPow2() const;
+
+ void applyLog2();
+
+ // for constant folding:
+ ImmediateValue operator+(const ImmediateValue&) const;
+ ImmediateValue operator-(const ImmediateValue&) const;
+ ImmediateValue operator*(const ImmediateValue&) const;
+ ImmediateValue operator/(const ImmediateValue&) const;
+
+ ImmediateValue& operator=(const ImmediateValue&); // only sets value !
+
+ bool compare(CondCode cc, float fval) const;
+
+ virtual int print(char *, size_t, DataType ty = TYPE_NONE) const;
+};
+
+class Instruction
+{
+public:
+ Instruction();
+ Instruction(Function *, operation, DataType);
+ virtual ~Instruction();
+
+ virtual Instruction *clone(ClonePolicy<Function>&,
+ Instruction * = NULL) const;
+
+ void setDef(int i, Value *);
+ void setSrc(int s, Value *);
+ void setSrc(int s, const ValueRef&);
+ void swapSources(int a, int b);
+ void moveSources(int s, int delta);
+ bool setIndirect(int s, int dim, Value *);
+
+ inline ValueRef& src(int s) { return srcs[s]; }
+ inline ValueDef& def(int s) { return defs[s]; }
+ inline const ValueRef& src(int s) const { return srcs[s]; }
+ inline const ValueDef& def(int s) const { return defs[s]; }
+
+ inline Value *getDef(int d) const { return defs[d].get(); }
+ inline Value *getSrc(int s) const { return srcs[s].get(); }
+ inline Value *getIndirect(int s, int dim) const;
+
+ inline bool defExists(unsigned d) const
+ {
+ return d < defs.size() && defs[d].exists();
+ }
+ inline bool srcExists(unsigned s) const
+ {
+ return s < srcs.size() && srcs[s].exists();
+ }
+
+ inline bool constrainedDefs() const;
+
+ bool setPredicate(CondCode ccode, Value *);
+ inline Value *getPredicate() const;
+ bool writesPredicate() const;
+ inline bool isPredicated() const { return predSrc >= 0; }
+
+ inline void setFlagsSrc(int s, Value *);
+ inline void setFlagsDef(int d, Value *);
+ inline bool usesFlags() const { return flagsSrc >= 0; }
+
+ unsigned int defCount() const { return defs.size(); };
+ unsigned int defCount(unsigned int mask, bool singleFile = false) const;
+ unsigned int srcCount() const { return srcs.size(); };
+ unsigned int srcCount(unsigned int mask, bool singleFile = false) const;
+
+ // save & remove / set indirect[0,1] and predicate source
+ void takeExtraSources(int s, Value *[3]);
+ void putExtraSources(int s, Value *[3]);
+
+ inline void setType(DataType type) { dType = sType = type; }
+
+ inline void setType(DataType dtype, DataType stype)
+ {
+ dType = dtype;
+ sType = stype;
+ }
+
+ inline bool isPseudo() const { return op < OP_MOV; }
+ bool isDead() const;
+ bool isNop() const;
+ bool isCommutationLegal(const Instruction *) const; // must be adjacent !
+ bool isActionEqual(const Instruction *) const;
+ bool isResultEqual(const Instruction *) const;
+
+ void print() const;
+
+ inline CmpInstruction *asCmp();
+ inline TexInstruction *asTex();
+ inline FlowInstruction *asFlow();
+ inline const TexInstruction *asTex() const;
+ inline const CmpInstruction *asCmp() const;
+ inline const FlowInstruction *asFlow() const;
+
+public:
+ Instruction *next;
+ Instruction *prev;
+ int id;
+ int serial; // CFG order
+
+ operation op;
+ DataType dType; // destination or defining type
+ DataType sType; // source or secondary type
+ CondCode cc;
+ RoundMode rnd;
+ CacheMode cache;
+
+ uint16_t subOp; // quadop, 1 for mul-high, etc.
+
+ unsigned encSize : 4; // encoding size in bytes
+ unsigned saturate : 1; // to [0.0f, 1.0f]
+ unsigned join : 1; // converge control flow (use OP_JOIN until end)
+ unsigned fixed : 1; // prevent dead code elimination
+ unsigned terminator : 1; // end of basic block
+ unsigned ftz : 1; // flush denormal to zero
+ unsigned dnz : 1; // denormals, NaN are zero
+ unsigned ipa : 4; // interpolation mode
+ unsigned lanes : 4;
+ unsigned perPatch : 1;
+ unsigned exit : 1; // terminate program after insn
+ unsigned mask : 4; // for vector ops
+
+ int8_t postFactor; // MUL/DIV(if < 0) by 1 << postFactor
+
+ int8_t predSrc;
+ int8_t flagsDef;
+ int8_t flagsSrc;
+
+ uint8_t sched; // scheduling data (NOTE: maybe move to separate storage)
+
+ BasicBlock *bb;
+
+protected:
+ std::deque<ValueDef> defs; // no gaps !
+ std::deque<ValueRef> srcs; // no gaps !
+
+ // instruction specific methods:
+ // (don't want to subclass, would need more constructors and memory pools)
+public:
+ inline void setInterpolate(unsigned int mode) { ipa = mode; }
+
+ unsigned int getInterpMode() const { return ipa & 0x3; }
+ unsigned int getSampleMode() const { return ipa & 0xc; }
+
+private:
+ void init();
+};
+
+enum TexQuery
+{
+ TXQ_DIMS,
+ TXQ_TYPE,
+ TXQ_SAMPLE_POSITION,
+ TXQ_FILTER,
+ TXQ_LOD,
+ TXQ_WRAP,
+ TXQ_BORDER_COLOUR
+};
+
+class TexInstruction : public Instruction
+{
+public:
+ class Target
+ {
+ public:
+ Target(TexTarget targ = TEX_TARGET_2D) : target(targ) { }
+
+ const char *getName() const { return descTable[target].name; }
+ unsigned int getArgCount() const { return descTable[target].argc; }
+ unsigned int getDim() const { return descTable[target].dim; }
+ int isArray() const { return descTable[target].array ? 1 : 0; }
+ int isCube() const { return descTable[target].cube ? 1 : 0; }
+ int isShadow() const { return descTable[target].shadow ? 1 : 0; }
+ int isMS() const {
+ return target == TEX_TARGET_2D_MS || target == TEX_TARGET_2D_MS_ARRAY; }
+
+ Target& operator=(TexTarget targ)
+ {
+ assert(targ < TEX_TARGET_COUNT);
+ target = targ;
+ return *this;
+ }
+
+ inline bool operator==(TexTarget targ) const { return target == targ; }
+ inline bool operator!=(TexTarget targ) const { return target != targ; }
+
+ enum TexTarget getEnum() const { return target; }
+
+ private:
+ struct Desc
+ {
+ char name[19];
+ uint8_t dim;
+ uint8_t argc;
+ bool array;
+ bool cube;
+ bool shadow;
+ };
+
+ static const struct Desc descTable[TEX_TARGET_COUNT];
+
+ private:
+ enum TexTarget target;
+ };
+
+public:
+ TexInstruction(Function *, operation);
+ virtual ~TexInstruction();
+
+ virtual TexInstruction *clone(ClonePolicy<Function>&,
+ Instruction * = NULL) const;
+
+ inline void setTexture(Target targ, uint8_t r, uint8_t s)
+ {
+ tex.r = r;
+ tex.s = s;
+ tex.target = targ;
+ }
+
+ void setIndirectR(Value *);
+ void setIndirectS(Value *);
+ inline Value *getIndirectR() const;
+ inline Value *getIndirectS() const;
+
+public:
+ struct {
+ Target target;
+
+ uint16_t r;
+ uint16_t s;
+ int8_t rIndirectSrc;
+ int8_t sIndirectSrc;
+
+ uint8_t mask;
+ uint8_t gatherComp;
+
+ bool liveOnly; // only execute on live pixels of a quad (optimization)
+ bool levelZero;
+ bool derivAll;
+
+ int8_t useOffsets; // 0, 1, or 4 for textureGatherOffsets
+ int8_t offset[4][3];
+
+ enum TexQuery query;
+ } tex;
+
+ ValueRef dPdx[3];
+ ValueRef dPdy[3];
+};
+
+class CmpInstruction : public Instruction
+{
+public:
+ CmpInstruction(Function *, operation);
+
+ virtual CmpInstruction *clone(ClonePolicy<Function>&,
+ Instruction * = NULL) const;
+
+ void setCondition(CondCode cond) { setCond = cond; }
+ CondCode getCondition() const { return setCond; }
+
+public:
+ CondCode setCond;
+};
+
+class FlowInstruction : public Instruction
+{
+public:
+ FlowInstruction(Function *, operation, void *target);
+
+ virtual FlowInstruction *clone(ClonePolicy<Function>&,
+ Instruction * = NULL) const;
+
+public:
+ unsigned allWarp : 1;
+ unsigned absolute : 1;
+ unsigned limit : 1;
+ unsigned builtin : 1; // true for calls to emulation code
+ unsigned indirect : 1; // target in src(0)
+
+ union {
+ BasicBlock *bb;
+ int builtin;
+ Function *fn;
+ } target;
+};
+
+class BasicBlock
+{
+public:
+ BasicBlock(Function *);
+ ~BasicBlock();
+
+ BasicBlock *clone(ClonePolicy<Function>&) const;
+
+ inline int getId() const { return id; }
+ inline unsigned int getInsnCount() const { return numInsns; }
+ inline bool isTerminated() const { return exit && exit->terminator; }
+
+ bool dominatedBy(BasicBlock *bb);
+ inline bool reachableBy(const BasicBlock *by, const BasicBlock *term);
+
+ // returns mask of conditional out blocks
+ // e.g. 3 for IF { .. } ELSE { .. } ENDIF, 1 for IF { .. } ENDIF
+ unsigned int initiatesSimpleConditional() const;
+
+public:
+ Function *getFunction() const { return func; }
+ Program *getProgram() const { return program; }
+
+ Instruction *getEntry() const { return entry; } // first non-phi instruction
+ Instruction *getPhi() const { return phi; }
+ Instruction *getFirst() const { return phi ? phi : entry; }
+ Instruction *getExit() const { return exit; }
+
+ void insertHead(Instruction *);
+ void insertTail(Instruction *);
+ void insertBefore(Instruction *, Instruction *);
+ void insertAfter(Instruction *, Instruction *);
+ void remove(Instruction *);
+ void permuteAdjacent(Instruction *, Instruction *);
+
+ BasicBlock *idom() const;
+
+ // NOTE: currently does not rebuild the dominator tree
+ BasicBlock *splitBefore(Instruction *, bool attach = true);
+ BasicBlock *splitAfter(Instruction *, bool attach = true);
+
+ DLList& getDF() { return df; }
+ DLList::Iterator iterDF() { return df.iterator(); }
+
+ static inline BasicBlock *get(Iterator&);
+ static inline BasicBlock *get(Graph::Node *);
+
+public:
+ Graph::Node cfg; // first edge is branch *taken* (the ELSE branch)
+ Graph::Node dom;
+
+ BitSet liveSet;
+ BitSet defSet;
+
+ uint32_t binPos;
+ uint32_t binSize;
+
+ Instruction *joinAt; // for quick reference
+
+ bool explicitCont; // loop headers: true if loop contains continue stmts
+
+private:
+ int id;
+ DLList df;
+
+ Instruction *phi;
+ Instruction *entry;
+ Instruction *exit;
+
+ unsigned int numInsns;
+
+private:
+ Function *func;
+ Program *program;
+
+ void splitCommon(Instruction *, BasicBlock *, bool attach);
+};
+
+class Function
+{
+public:
+ Function(Program *, const char *name, uint32_t label);
+ ~Function();
+
+ static inline Function *get(Graph::Node *node);
+
+ inline Program *getProgram() const { return prog; }
+ inline const char *getName() const { return name; }
+ inline int getId() const { return id; }
+ inline uint32_t getLabel() const { return label; }
+
+ void print();
+ void printLiveIntervals() const;
+ void printCFGraph(const char *filePath);
+
+ bool setEntry(BasicBlock *);
+ bool setExit(BasicBlock *);
+
+ unsigned int orderInstructions(ArrayList&);
+
+ inline void add(BasicBlock *bb, int& id) { allBBlocks.insert(bb, id); }
+ inline void add(Instruction *insn, int& id) { allInsns.insert(insn, id); }
+ inline void add(LValue *lval, int& id) { allLValues.insert(lval, id); }
+
+ inline LValue *getLValue(int id);
+
+ void buildLiveSets();
+ void buildDefSets();
+ bool convertToSSA();
+
+public:
+ std::deque<ValueDef> ins;
+ std::deque<ValueRef> outs;
+ std::deque<Value *> clobbers;
+
+ Graph cfg;
+ Graph::Node *cfgExit;
+ Graph *domTree;
+ Graph::Node call; // node in the call graph
+
+ BasicBlock **bbArray; // BBs in emission order
+ int bbCount;
+
+ unsigned int loopNestingBound;
+ int regClobberMax;
+
+ uint32_t binPos;
+ uint32_t binSize;
+
+ Value *stackPtr;
+
+ uint32_t tlsBase; // base address for l[] space (if no stack pointer is used)
+ uint32_t tlsSize;
+
+ ArrayList allBBlocks;
+ ArrayList allInsns;
+ ArrayList allLValues;
+
+private:
+ void buildLiveSetsPreSSA(BasicBlock *, const int sequence);
+ void buildDefSetsPreSSA(BasicBlock *bb, const int seq);
+
+private:
+ uint32_t label;
+ int id;
+ const char *const name;
+ Program *prog;
+};
+
+enum CGStage
+{
+ CG_STAGE_PRE_SSA,
+ CG_STAGE_SSA, // expected directly before register allocation
+ CG_STAGE_POST_RA
+};
+
+class Program
+{
+public:
+ enum Type
+ {
+ TYPE_VERTEX,
+ TYPE_TESSELLATION_CONTROL,
+ TYPE_TESSELLATION_EVAL,
+ TYPE_GEOMETRY,
+ TYPE_FRAGMENT,
+ TYPE_COMPUTE
+ };
+
+ Program(Type type, Target *targ);
+ ~Program();
+
+ void print();
+
+ Type getType() const { return progType; }
+
+ inline void add(Function *fn, int& id) { allFuncs.insert(fn, id); }
+ inline void del(Function *fn, int& id) { allFuncs.remove(id); }
+ inline void add(Value *rval, int& id) { allRValues.insert(rval, id); }
+
+ bool makeFromTGSI(struct nv50_ir_prog_info *);
+ bool makeFromSM4(struct nv50_ir_prog_info *);
+ bool convertToSSA();
+ bool optimizeSSA(int level);
+ bool optimizePostRA(int level);
+ bool registerAllocation();
+ bool emitBinary(struct nv50_ir_prog_info *);
+
+ const Target *getTarget() const { return target; }
+
+private:
+ void emitSymbolTable(struct nv50_ir_prog_info *);
+
+ Type progType;
+ Target *target;
+
+public:
+ Function *main;
+ Graph calls;
+
+ ArrayList allFuncs;
+ ArrayList allRValues;
+
+ uint32_t *code;
+ uint32_t binSize;
+ uint32_t tlsSize; // size required for FILE_MEMORY_LOCAL
+
+ int maxGPR;
+
+ MemoryPool mem_Instruction;
+ MemoryPool mem_CmpInstruction;
+ MemoryPool mem_TexInstruction;
+ MemoryPool mem_FlowInstruction;
+ MemoryPool mem_LValue;
+ MemoryPool mem_Symbol;
+ MemoryPool mem_ImmediateValue;
+
+ uint32_t dbgFlags;
+ uint8_t optLevel;
+
+ void *targetPriv; // e.g. to carry information between passes
+
+ const struct nv50_ir_prog_info *driver; // for driver configuration
+
+ void releaseInstruction(Instruction *);
+ void releaseValue(Value *);
+};
+
+// TODO: add const version
+class Pass
+{
+public:
+ bool run(Program *, bool ordered = false, bool skipPhi = false);
+ bool run(Function *, bool ordered = false, bool skipPhi = false);
+
+private:
+ // return false to continue with next entity on next higher level
+ virtual bool visit(Function *) { return true; }
+ virtual bool visit(BasicBlock *) { return true; }
+ virtual bool visit(Instruction *) { return false; }
+
+ bool doRun(Program *, bool ordered, bool skipPhi);
+ bool doRun(Function *, bool ordered, bool skipPhi);
+
+protected:
+ bool err;
+ Function *func;
+ Program *prog;
+};
+
+// =============================================================================
+
+#include "codegen/nv50_ir_inlines.h"
+
+} // namespace nv50_ir
+
+#endif // __NV50_IR_H__