summaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary/tgsi
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary/tgsi')
-rw-r--r--src/gallium/auxiliary/tgsi/Makefile1
-rw-r--r--src/gallium/auxiliary/tgsi/SConscript1
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_build.c5
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_build.h6
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump.c275
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump_c.c134
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c317
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.h29
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_info.c161
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_info.h53
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_parse.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sanity.c17
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.c6
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.c284
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_text.c164
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_transform.c1
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_transform.h1
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_util.c1
18 files changed, 846 insertions, 612 deletions
diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile
index bbeff13..806a2bd 100644
--- a/src/gallium/auxiliary/tgsi/Makefile
+++ b/src/gallium/auxiliary/tgsi/Makefile
@@ -7,6 +7,7 @@ C_SOURCES = \
tgsi_build.c \
tgsi_dump.c \
tgsi_exec.c \
+ tgsi_info.c \
tgsi_iterate.c \
tgsi_parse.c \
tgsi_scan.c \
diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript
index 03982e2..45bf3f6 100644
--- a/src/gallium/auxiliary/tgsi/SConscript
+++ b/src/gallium/auxiliary/tgsi/SConscript
@@ -7,6 +7,7 @@ tgsi = env.ConvenienceLibrary(
'tgsi_dump.c',
'tgsi_dump_c.c',
'tgsi_exec.c',
+ 'tgsi_info.c',
'tgsi_iterate.c',
'tgsi_parse.c',
'tgsi_sanity.c',
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index 742ef14..74614d3 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -26,7 +26,6 @@
**************************************************************************/
#include "pipe/p_debug.h"
-#include "pipe/p_util.h"
#include "pipe/p_shader_tokens.h"
#include "tgsi_build.h"
#include "tgsi_parse.h"
@@ -820,7 +819,7 @@ tgsi_build_instruction_ext_nv(
unsigned cond_swizzle_z,
unsigned cond_swizzle_w,
unsigned cond_dst_update,
- unsigned cond_flow_update,
+ unsigned cond_flow_enable,
struct tgsi_token *prev_token,
struct tgsi_instruction *instruction,
struct tgsi_header *header )
@@ -837,7 +836,7 @@ tgsi_build_instruction_ext_nv(
instruction_ext_nv.CondSwizzleZ = cond_swizzle_z;
instruction_ext_nv.CondSwizzleW = cond_swizzle_w;
instruction_ext_nv.CondDstUpdate = cond_dst_update;
- instruction_ext_nv.CondFlowEnable = cond_flow_update;
+ instruction_ext_nv.CondFlowEnable = cond_flow_enable;
prev_token->Extended = 1;
instruction_grow( instruction, header );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h
index ed25830..7d62347 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.h
@@ -28,6 +28,10 @@
#ifndef TGSI_BUILD_H
#define TGSI_BUILD_H
+
+struct tgsi_token;
+
+
#if defined __cplusplus
extern "C" {
#endif
@@ -170,7 +174,7 @@ tgsi_build_instruction_ext_nv(
unsigned cond_swizzle_z,
unsigned cond_swizzle_w,
unsigned cond_dst_update,
- unsigned cond_flow_update,
+ unsigned cond_flow_enable,
struct tgsi_token *prev_token,
struct tgsi_instruction *instruction,
struct tgsi_header *header );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 29bb530..afc8ffa 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -28,6 +28,7 @@
#include "pipe/p_debug.h"
#include "util/u_string.h"
#include "tgsi_dump.h"
+#include "tgsi_info.h"
#include "tgsi_iterate.h"
struct dump_ctx
@@ -36,30 +37,40 @@ struct dump_ctx
uint instno;
- struct util_strbuf *sbuf;
+ void (*printf)(struct dump_ctx *ctx, const char *format, ...);
};
+static void
+dump_ctx_printf(struct dump_ctx *ctx, const char *format, ...)
+{
+ va_list ap;
+ (void)ctx;
+ va_start(ap, format);
+ debug_vprintf(format, ap);
+ va_end(ap);
+}
+
static void
dump_enum(
- struct util_strbuf *sbuf,
+ struct dump_ctx *ctx,
uint e,
const char **enums,
uint enum_count )
{
if (e >= enum_count)
- util_strbuf_printf( sbuf, "%u", e );
+ ctx->printf( ctx, "%u", e );
else
- util_strbuf_printf( sbuf, "%s", enums[e] );
+ ctx->printf( ctx, "%s", enums[e] );
}
-#define EOL() util_strbuf_printf( sbuf, "\n" )
-#define TXT(S) util_strbuf_printf( sbuf, "%s", S )
-#define CHR(C) util_strbuf_printf( sbuf, "%c", C )
-#define UIX(I) util_strbuf_printf( sbuf, "0x%x", I )
-#define UID(I) util_strbuf_printf( sbuf, "%u", I )
-#define SID(I) util_strbuf_printf( sbuf, "%d", I )
-#define FLT(F) util_strbuf_printf( sbuf, "%10.4f", F )
-#define ENM(E,ENUMS) dump_enum( sbuf, E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) )
+#define EOL() ctx->printf( ctx, "\n" )
+#define TXT(S) ctx->printf( ctx, "%s", S )
+#define CHR(C) ctx->printf( ctx, "%c", C )
+#define UIX(I) ctx->printf( ctx, "0x%x", I )
+#define UID(I) ctx->printf( ctx, "%u", I )
+#define SID(I) ctx->printf( ctx, "%d", I )
+#define FLT(F) ctx->printf( ctx, "%10.4f", F )
+#define ENM(E,ENUMS) dump_enum( ctx, E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) )
static const char *processor_type_names[] =
{
@@ -103,129 +114,6 @@ static const char *immediate_type_names[] =
"FLT32"
};
-static const char *opcode_names[TGSI_OPCODE_LAST] =
-{
- "ARL",
- "MOV",
- "LIT",
- "RCP",
- "RSQ",
- "EXP",
- "LOG",
- "MUL",
- "ADD",
- "DP3",
- "DP4",
- "DST",
- "MIN",
- "MAX",
- "SLT",
- "SGE",
- "MAD",
- "SUB",
- "LERP",
- "CND",
- "CND0",
- "DOT2ADD",
- "INDEX",
- "NEGATE",
- "FRAC",
- "CLAMP",
- "FLOOR",
- "ROUND",
- "EXPBASE2",
- "LOGBASE2",
- "POWER",
- "CROSSPRODUCT",
- "MULTIPLYMATRIX",
- "ABS",
- "RCC",
- "DPH",
- "COS",
- "DDX",
- "DDY",
- "KILP",
- "PK2H",
- "PK2US",
- "PK4B",
- "PK4UB",
- "RFL",
- "SEQ",
- "SFL",
- "SGT",
- "SIN",
- "SLE",
- "SNE",
- "STR",
- "TEX",
- "TXD",
- "TXP",
- "UP2H",
- "UP2US",
- "UP4B",
- "UP4UB",
- "X2D",
- "ARA",
- "ARR",
- "BRA",
- "CAL",
- "RET",
- "SSG",
- "CMP",
- "SCS",
- "TXB",
- "NRM",
- "DIV",
- "DP2",
- "TXL",
- "BRK",
- "IF",
- "LOOP",
- "REP",
- "ELSE",
- "ENDIF",
- "ENDLOOP",
- "ENDREP",
- "PUSHA",
- "POPA",
- "CEIL",
- "I2F",
- "NOT",
- "TRUNC",
- "SHL",
- "SHR",
- "AND",
- "OR",
- "MOD",
- "XOR",
- "SAD",
- "TXF",
- "TXQ",
- "CONT",
- "EMIT",
- "ENDPRIM",
- "BGNLOOP2",
- "BGNSUB",
- "ENDLOOP2",
- "ENDSUB",
- "NOISE1",
- "NOISE2",
- "NOISE3",
- "NOISE4",
- "NOP",
- "M4X3",
- "M3X4",
- "M3X3",
- "M3X2",
- "NRM4",
- "CALLNZ",
- "IFC",
- "BREAKC",
- "KIL",
- "END",
- "SWZ"
-};
-
static const char *swizzle_names[] =
{
"x",
@@ -270,7 +158,7 @@ static const char *modulate_names[TGSI_MODULATE_COUNT] =
static void
_dump_register(
- struct util_strbuf *sbuf,
+ struct dump_ctx *ctx,
uint file,
int first,
int last )
@@ -287,7 +175,7 @@ _dump_register(
static void
_dump_register_ind(
- struct util_strbuf *sbuf,
+ struct dump_ctx *ctx,
uint file,
int index,
uint ind_file,
@@ -309,7 +197,7 @@ _dump_register_ind(
static void
_dump_writemask(
- struct util_strbuf *sbuf,
+ struct dump_ctx *ctx,
uint writemask )
{
if (writemask != TGSI_WRITEMASK_XYZW) {
@@ -330,18 +218,17 @@ iter_declaration(
struct tgsi_iterate_context *iter,
struct tgsi_full_declaration *decl )
{
- struct dump_ctx *ctx = (struct dump_ctx *) iter;
- struct util_strbuf *sbuf = ctx->sbuf;
+ struct dump_ctx *ctx = (struct dump_ctx *)iter;
TXT( "DCL " );
_dump_register(
- sbuf,
+ ctx,
decl->Declaration.File,
decl->DeclarationRange.First,
decl->DeclarationRange.Last );
_dump_writemask(
- sbuf,
+ ctx,
decl->Declaration.UsageMask );
if (decl->Declaration.Semantic) {
@@ -367,17 +254,11 @@ void
tgsi_dump_declaration(
const struct tgsi_full_declaration *decl )
{
- static char str[1024];
- struct util_strbuf sbuf;
struct dump_ctx ctx;
- util_strbuf_init(&sbuf, str, sizeof(str));
-
- ctx.sbuf = &sbuf;
+ ctx.printf = dump_ctx_printf;
iter_declaration( &ctx.iter, (struct tgsi_full_declaration *)decl );
-
- debug_printf("%s", str);
}
static boolean
@@ -386,7 +267,6 @@ iter_immediate(
struct tgsi_full_immediate *imm )
{
struct dump_ctx *ctx = (struct dump_ctx *) iter;
- struct util_strbuf *sbuf = ctx->sbuf;
uint i;
@@ -417,17 +297,11 @@ void
tgsi_dump_immediate(
const struct tgsi_full_immediate *imm )
{
- static char str[1024];
- struct util_strbuf sbuf;
struct dump_ctx ctx;
- util_strbuf_init(&sbuf, str, sizeof(str));
-
- ctx.sbuf = &sbuf;
+ ctx.printf = dump_ctx_printf;
iter_immediate( &ctx.iter, (struct tgsi_full_immediate *)imm );
-
- debug_printf("%s", str);
}
static boolean
@@ -436,7 +310,6 @@ iter_instruction(
struct tgsi_full_instruction *inst )
{
struct dump_ctx *ctx = (struct dump_ctx *) iter;
- struct util_strbuf *sbuf = ctx->sbuf;
uint instno = ctx->instno++;
uint i;
@@ -444,7 +317,7 @@ iter_instruction(
UID( instno );
CHR( ':' );
- ENM( inst->Instruction.Opcode, opcode_names );
+ TXT( tgsi_get_opcode_info( inst->Instruction.Opcode )->mnemonic );
switch (inst->Instruction.Saturate) {
case TGSI_SAT_NONE:
@@ -467,12 +340,12 @@ iter_instruction(
CHR( ' ' );
_dump_register(
- sbuf,
+ ctx,
dst->DstRegister.File,
dst->DstRegister.Index,
dst->DstRegister.Index );
ENM( dst->DstRegisterExtModulate.Modulate, modulate_names );
- _dump_writemask( sbuf, dst->DstRegister.WriteMask );
+ _dump_writemask( ctx, dst->DstRegister.WriteMask );
first_reg = FALSE;
}
@@ -499,7 +372,7 @@ iter_instruction(
if (src->SrcRegister.Indirect) {
_dump_register_ind(
- sbuf,
+ ctx,
src->SrcRegister.File,
src->SrcRegister.Index,
src->SrcRegisterInd.File,
@@ -507,7 +380,7 @@ iter_instruction(
}
else {
_dump_register(
- sbuf,
+ ctx,
src->SrcRegister.File,
src->SrcRegister.Index,
src->SrcRegister.Index );
@@ -582,18 +455,12 @@ tgsi_dump_instruction(
const struct tgsi_full_instruction *inst,
uint instno )
{
- static char str[1024];
- struct util_strbuf sbuf;
struct dump_ctx ctx;
- util_strbuf_init(&sbuf, str, sizeof(str));
-
ctx.instno = instno;
- ctx.sbuf = &sbuf;
+ ctx.printf = dump_ctx_printf;
iter_instruction( &ctx.iter, (struct tgsi_full_instruction *)inst );
-
- debug_printf("%s", str);
}
static boolean
@@ -601,7 +468,6 @@ prolog(
struct tgsi_iterate_context *iter )
{
struct dump_ctx *ctx = (struct dump_ctx *) iter;
- struct util_strbuf *sbuf = ctx->sbuf;
ENM( iter->processor.Processor, processor_type_names );
UID( iter->version.MajorVersion );
CHR( '.' );
@@ -611,21 +477,12 @@ prolog(
}
void
-tgsi_dump_str(
+tgsi_dump(
const struct tgsi_token *tokens,
- uint flags,
- char *str,
- size_t size)
+ uint flags )
{
- struct util_strbuf sbuf;
struct dump_ctx ctx;
- util_strbuf_init(&sbuf, str, size);
-
- /* sanity checks */
- assert( strcmp( opcode_names[TGSI_OPCODE_CONT], "CONT" ) == 0 );
- assert( strcmp( opcode_names[TGSI_OPCODE_END], "END" ) == 0 );
-
ctx.iter.prolog = prolog;
ctx.iter.iterate_instruction = iter_instruction;
ctx.iter.iterate_declaration = iter_declaration;
@@ -633,19 +490,57 @@ tgsi_dump_str(
ctx.iter.epilog = NULL;
ctx.instno = 0;
- ctx.sbuf = &sbuf;
+ ctx.printf = dump_ctx_printf;
tgsi_iterate_shader( tokens, &ctx.iter );
}
+struct str_dump_ctx
+{
+ struct dump_ctx base;
+ char *str;
+ char *ptr;
+ size_t left;
+};
+
+static void
+str_dump_ctx_printf(struct dump_ctx *ctx, const char *format, ...)
+{
+ struct str_dump_ctx *sctx = (struct str_dump_ctx *)ctx;
+
+ if(sctx->left > 1) {
+ size_t written;
+ va_list ap;
+ va_start(ap, format);
+ written = util_vsnprintf(sctx->ptr, sctx->left, format, ap);
+ va_end(ap);
+ sctx->ptr += written;
+ sctx->left -= written;
+ }
+}
+
void
-tgsi_dump(
+tgsi_dump_str(
const struct tgsi_token *tokens,
- uint flags )
+ uint flags,
+ char *str,
+ size_t size)
{
- static char str[4096];
-
- tgsi_dump_str(tokens, flags, str, sizeof(str));
-
- debug_printf("%s", str);
+ struct str_dump_ctx ctx;
+
+ ctx.base.iter.prolog = prolog;
+ ctx.base.iter.iterate_instruction = iter_instruction;
+ ctx.base.iter.iterate_declaration = iter_declaration;
+ ctx.base.iter.iterate_immediate = iter_immediate;
+ ctx.base.iter.epilog = NULL;
+
+ ctx.base.instno = 0;
+ ctx.base.printf = &str_dump_ctx_printf;
+
+ ctx.str = str;
+ ctx.str[0] = 0;
+ ctx.ptr = str;
+ ctx.left = size;
+
+ tgsi_iterate_shader( tokens, &ctx.base.iter );
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
index eabd74b..be25cb4 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
@@ -26,11 +26,11 @@
**************************************************************************/
#include "pipe/p_debug.h"
-#include "pipe/p_util.h"
#include "util/u_string.h"
#include "tgsi_dump_c.h"
-#include "tgsi_parse.h"
#include "tgsi_build.h"
+#include "tgsi_info.h"
+#include "tgsi_parse.h"
static void
dump_enum(
@@ -104,128 +104,6 @@ static const char *TGSI_IMMS[] =
"IMM_FLOAT32"
};
-static const char *TGSI_OPCODES[TGSI_OPCODE_LAST] =
-{
- "OPCODE_ARL",
- "OPCODE_MOV",
- "OPCODE_LIT",
- "OPCODE_RCP",
- "OPCODE_RSQ",
- "OPCODE_EXP",
- "OPCODE_LOG",
- "OPCODE_MUL",
- "OPCODE_ADD",
- "OPCODE_DP3",
- "OPCODE_DP4",
- "OPCODE_DST",
- "OPCODE_MIN",
- "OPCODE_MAX",
- "OPCODE_SLT",
- "OPCODE_SGE",
- "OPCODE_MAD",
- "OPCODE_SUB",
- "OPCODE_LERP",
- "OPCODE_CND",
- "OPCODE_CND0",
- "OPCODE_DOT2ADD",
- "OPCODE_INDEX",
- "OPCODE_NEGATE",
- "OPCODE_FRAC",
- "OPCODE_CLAMP",
- "OPCODE_FLOOR",
- "OPCODE_ROUND",
- "OPCODE_EXPBASE2",
- "OPCODE_LOGBASE2",
- "OPCODE_POWER",
- "OPCODE_CROSSPRODUCT",
- "OPCODE_MULTIPLYMATRIX",
- "OPCODE_ABS",
- "OPCODE_RCC",
- "OPCODE_DPH",
- "OPCODE_COS",
- "OPCODE_DDX",
- "OPCODE_DDY",
- "OPCODE_KILP",
- "OPCODE_PK2H",
- "OPCODE_PK2US",
- "OPCODE_PK4B",
- "OPCODE_PK4UB",
- "OPCODE_RFL",
- "OPCODE_SEQ",
- "OPCODE_SFL",
- "OPCODE_SGT",
- "OPCODE_SIN",
- "OPCODE_SLE",
- "OPCODE_SNE",
- "OPCODE_STR",
- "OPCODE_TEX",
- "OPCODE_TXD",
- "OPCODE_TXP",
- "OPCODE_UP2H",
- "OPCODE_UP2US",
- "OPCODE_UP4B",
- "OPCODE_UP4UB",
- "OPCODE_X2D",
- "OPCODE_ARA",
- "OPCODE_ARR",
- "OPCODE_BRA",
- "OPCODE_CAL",
- "OPCODE_RET",
- "OPCODE_SSG",
- "OPCODE_CMP",
- "OPCODE_SCS",
- "OPCODE_TXB",
- "OPCODE_NRM",
- "OPCODE_DIV",
- "OPCODE_DP2",
- "OPCODE_TXL",
- "OPCODE_BRK",
- "OPCODE_IF",
- "OPCODE_LOOP",
- "OPCODE_REP",
- "OPCODE_ELSE",
- "OPCODE_ENDIF",
- "OPCODE_ENDLOOP",
- "OPCODE_ENDREP",
- "OPCODE_PUSHA",
- "OPCODE_POPA",
- "OPCODE_CEIL",
- "OPCODE_I2F",
- "OPCODE_NOT",
- "OPCODE_TRUNC",
- "OPCODE_SHL",
- "OPCODE_SHR",
- "OPCODE_AND",
- "OPCODE_OR",
- "OPCODE_MOD",
- "OPCODE_XOR",
- "OPCODE_SAD",
- "OPCODE_TXF",
- "OPCODE_TXQ",
- "OPCODE_CONT",
- "OPCODE_EMIT",
- "OPCODE_ENDPRIM",
- "OPCODE_BGNLOOP2",
- "OPCODE_BGNSUB",
- "OPCODE_ENDLOOP2",
- "OPCODE_ENDSUB",
- "OPCODE_NOISE1",
- "OPCODE_NOISE2",
- "OPCODE_NOISE3",
- "OPCODE_NOISE4",
- "OPCODE_NOP",
- "OPCODE_M4X3",
- "OPCODE_M3X4",
- "OPCODE_M3X3",
- "OPCODE_M3X2",
- "OPCODE_NRM4",
- "OPCODE_CALLNZ",
- "OPCODE_IFC",
- "OPCODE_BREAKC",
- "OPCODE_KIL",
- "OPCODE_END"
-};
-
static const char *TGSI_SATS[] =
{
"SAT_NONE",
@@ -428,8 +306,8 @@ dump_instruction_verbose(
{
unsigned i;
- TXT( "\nOpcode : " );
- ENM( inst->Instruction.Opcode, TGSI_OPCODES );
+ TXT( "\nOpcode : OPCODE_" );
+ TXT( tgsi_get_opcode_info( inst->Instruction.Opcode )->mnemonic );
if( deflt || fi->Instruction.Saturate != inst->Instruction.Saturate ) {
TXT( "\nSaturate : " );
ENM( inst->Instruction.Saturate, TGSI_SATS );
@@ -770,10 +648,6 @@ tgsi_dump_c(
uint deflt = flags & TGSI_DUMP_C_DEFAULT;
uint instno = 0;
- /* sanity checks */
- assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_CONT], "OPCODE_CONT") == 0);
- assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0);
-
tgsi_parse_init( &parse, tokens );
TXT( "tgsi-dump begin -----------------" );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index c4ba667..df00293 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -26,7 +26,7 @@
**************************************************************************/
/**
- * TGSI interpretor/executor.
+ * TGSI interpreter/executor.
*
* Flow control information:
*
@@ -52,17 +52,25 @@
#include "pipe/p_compiler.h"
#include "pipe/p_state.h"
-#include "pipe/p_util.h"
#include "pipe/p_shader_tokens.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi_exec.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#define FAST_MATH 1
#define TILE_TOP_LEFT 0
#define TILE_TOP_RIGHT 1
#define TILE_BOTTOM_LEFT 2
#define TILE_BOTTOM_RIGHT 3
+#define CHAN_X 0
+#define CHAN_Y 1
+#define CHAN_Z 2
+#define CHAN_W 3
+
/*
* Shorthand locations of various utility registers (_I = Index, _C = Channel)
*/
@@ -88,15 +96,14 @@
#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
+#define TEMP_CC_I TGSI_EXEC_TEMP_CC_I
+#define TEMP_CC_C TGSI_EXEC_TEMP_CC_C
#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
#define TEMP_R0 TGSI_EXEC_TEMP_R0
-#define FOR_EACH_CHANNEL(CHAN)\
- for (CHAN = 0; CHAN < 4; CHAN++)
-
#define IS_CHANNEL_ENABLED(INST, CHAN)\
((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
@@ -104,11 +111,11 @@
((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
- FOR_EACH_CHANNEL( CHAN )\
+ for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
if (IS_CHANNEL_ENABLED( INST, CHAN ))
#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
- FOR_EACH_CHANNEL( CHAN )\
+ for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
if (IS_CHANNEL_ENABLED2( INST, CHAN ))
@@ -116,14 +123,6 @@
#define UPDATE_EXEC_MASK(MACH) \
MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
-
-#define CHAN_X 0
-#define CHAN_Y 1
-#define CHAN_Z 2
-#define CHAN_W 3
-
-
-
/**
* Initialize machine state by expanding tokens to full instructions,
* allocating temporary storage, setting up constants, etc.
@@ -149,6 +148,8 @@ tgsi_exec_machine_bind_shader(
tgsi_dump(tokens, 0);
#endif
+ util_init_math();
+
mach->Tokens = tokens;
mach->Samplers = samplers;
@@ -452,10 +453,17 @@ micro_exp2(
union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src)
{
+#if FAST_MATH
+ dst->f[0] = util_fast_exp2( src->f[0] );
+ dst->f[1] = util_fast_exp2( src->f[1] );
+ dst->f[2] = util_fast_exp2( src->f[2] );
+ dst->f[3] = util_fast_exp2( src->f[3] );
+#else
dst->f[0] = powf( 2.0f, src->f[0] );
dst->f[1] = powf( 2.0f, src->f[1] );
dst->f[2] = powf( 2.0f, src->f[2] );
dst->f[3] = powf( 2.0f, src->f[3] );
+#endif
}
static void
@@ -532,10 +540,17 @@ micro_lg2(
union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src )
{
+#if FAST_MATH
+ dst->f[0] = util_fast_log2( src->f[0] );
+ dst->f[1] = util_fast_log2( src->f[1] );
+ dst->f[2] = util_fast_log2( src->f[2] );
+ dst->f[3] = util_fast_log2( src->f[3] );
+#else
dst->f[0] = logf( src->f[0] ) * 1.442695f;
dst->f[1] = logf( src->f[1] ) * 1.442695f;
dst->f[2] = logf( src->f[2] ) * 1.442695f;
dst->f[3] = logf( src->f[3] ) * 1.442695f;
+#endif
}
static void
@@ -800,10 +815,17 @@ micro_pow(
const union tgsi_exec_channel *src0,
const union tgsi_exec_channel *src1 )
{
+#if FAST_MATH
+ dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
+ dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
+ dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
+ dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
+#else
dst->f[0] = powf( src0->f[0], src1->f[0] );
dst->f[1] = powf( src0->f[1], src1->f[1] );
dst->f[2] = powf( src0->f[2], src1->f[2] );
dst->f[3] = powf( src0->f[3], src1->f[3] );
+#endif
}
static void
@@ -935,6 +957,7 @@ fetch_src_file_channel(
case TGSI_EXTSWIZZLE_W:
switch( file ) {
case TGSI_FILE_CONSTANT:
+ assert(mach->Consts);
chan->f[0] = mach->Consts[index->i[0]][swizzle];
chan->f[1] = mach->Consts[index->i[1]][swizzle];
chan->f[2] = mach->Consts[index->i[2]][swizzle];
@@ -1124,11 +1147,15 @@ store_dest(
const struct tgsi_full_instruction *inst,
uint chan_index )
{
+ uint i;
+ union tgsi_exec_channel null;
union tgsi_exec_channel *dst;
+ uint execmask = mach->ExecMask;
- switch( reg->DstRegister.File ) {
+ switch (reg->DstRegister.File) {
case TGSI_FILE_NULL:
- return;
+ dst = &null;
+ break;
case TGSI_FILE_OUTPUT:
dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
@@ -1136,7 +1163,7 @@ store_dest(
break;
case TGSI_FILE_TEMPORARY:
- assert(reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS);
+ assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS );
dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
break;
@@ -1149,32 +1176,193 @@ store_dest(
return;
}
- switch (inst->Instruction.Saturate)
- {
+ if (inst->InstructionExtNv.CondFlowEnable) {
+ union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
+ uint swizzle;
+ uint shift;
+ uint mask;
+ uint test;
+
+ /* Only CC0 supported.
+ */
+ assert( inst->InstructionExtNv.CondFlowIndex < 1 );
+
+ switch (chan_index) {
+ case CHAN_X:
+ swizzle = inst->InstructionExtNv.CondSwizzleX;
+ break;
+ case CHAN_Y:
+ swizzle = inst->InstructionExtNv.CondSwizzleY;
+ break;
+ case CHAN_Z:
+ swizzle = inst->InstructionExtNv.CondSwizzleZ;
+ break;
+ case CHAN_W:
+ swizzle = inst->InstructionExtNv.CondSwizzleW;
+ break;
+ default:
+ assert( 0 );
+ return;
+ }
+
+ switch (swizzle) {
+ case TGSI_SWIZZLE_X:
+ shift = TGSI_EXEC_CC_X_SHIFT;
+ mask = TGSI_EXEC_CC_X_MASK;
+ break;
+ case TGSI_SWIZZLE_Y:
+ shift = TGSI_EXEC_CC_Y_SHIFT;
+ mask = TGSI_EXEC_CC_Y_MASK;
+ break;
+ case TGSI_SWIZZLE_Z:
+ shift = TGSI_EXEC_CC_Z_SHIFT;
+ mask = TGSI_EXEC_CC_Z_MASK;
+ break;
+ case TGSI_SWIZZLE_W:
+ shift = TGSI_EXEC_CC_W_SHIFT;
+ mask = TGSI_EXEC_CC_W_MASK;
+ break;
+ default:
+ assert( 0 );
+ return;
+ }
+
+ switch (inst->InstructionExtNv.CondMask) {
+ case TGSI_CC_GT:
+ test = ~(TGSI_EXEC_CC_GT << shift) & mask;
+ for (i = 0; i < QUAD_SIZE; i++)
+ if (cc->u[i] & test)
+ execmask &= ~(1 << i);
+ break;
+
+ case TGSI_CC_EQ:
+ test = ~(TGSI_EXEC_CC_EQ << shift) & mask;
+ for (i = 0; i < QUAD_SIZE; i++)
+ if (cc->u[i] & test)
+ execmask &= ~(1 << i);
+ break;
+
+ case TGSI_CC_LT:
+ test = ~(TGSI_EXEC_CC_LT << shift) & mask;
+ for (i = 0; i < QUAD_SIZE; i++)
+ if (cc->u[i] & test)
+ execmask &= ~(1 << i);
+ break;
+
+ case TGSI_CC_GE:
+ test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask;
+ for (i = 0; i < QUAD_SIZE; i++)
+ if (cc->u[i] & test)
+ execmask &= ~(1 << i);
+ break;
+
+ case TGSI_CC_LE:
+ test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask;
+ for (i = 0; i < QUAD_SIZE; i++)
+ if (cc->u[i] & test)
+ execmask &= ~(1 << i);
+ break;
+
+ case TGSI_CC_NE:
+ test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask;
+ for (i = 0; i < QUAD_SIZE; i++)
+ if (cc->u[i] & test)
+ execmask &= ~(1 << i);
+ break;
+
+ case TGSI_CC_TR:
+ break;
+
+ case TGSI_CC_FL:
+ for (i = 0; i < QUAD_SIZE; i++)
+ execmask &= ~(1 << i);
+ break;
+
+ default:
+ assert( 0 );
+ return;
+ }
+ }
+
+ switch (inst->Instruction.Saturate) {
case TGSI_SAT_NONE:
- if (mach->ExecMask & 0x1)
- dst->i[0] = chan->i[0];
- if (mach->ExecMask & 0x2)
- dst->i[1] = chan->i[1];
- if (mach->ExecMask & 0x4)
- dst->i[2] = chan->i[2];
- if (mach->ExecMask & 0x8)
- dst->i[3] = chan->i[3];
+ for (i = 0; i < QUAD_SIZE; i++)
+ if (execmask & (1 << i))
+ dst->i[i] = chan->i[i];
break;
case TGSI_SAT_ZERO_ONE:
- /* XXX need to obey ExecMask here */
- micro_max(dst, chan, &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
- micro_min(dst, dst, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
+ for (i = 0; i < QUAD_SIZE; i++)
+ if (execmask & (1 << i)) {
+ if (chan->f[i] < 0.0f)
+ dst->f[i] = 0.0f;
+ else if (chan->f[i] > 1.0f)
+ dst->f[i] = 1.0f;
+ else
+ dst->i[i] = chan->i[i];
+ }
break;
case TGSI_SAT_MINUS_PLUS_ONE:
- assert( 0 );
+ for (i = 0; i < QUAD_SIZE; i++)
+ if (execmask & (1 << i)) {
+ if (chan->f[i] < -1.0f)
+ dst->f[i] = -1.0f;
+ else if (chan->f[i] > 1.0f)
+ dst->f[i] = 1.0f;
+ else
+ dst->i[i] = chan->i[i];
+ }
break;
default:
assert( 0 );
}
+
+ if (inst->InstructionExtNv.CondDstUpdate) {
+ union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
+ uint shift;
+ uint mask;
+
+ /* Only CC0 supported.
+ */
+ assert( inst->InstructionExtNv.CondDstIndex < 1 );
+
+ switch (chan_index) {
+ case CHAN_X:
+ shift = TGSI_EXEC_CC_X_SHIFT;
+ mask = ~TGSI_EXEC_CC_X_MASK;
+ break;
+ case CHAN_Y:
+ shift = TGSI_EXEC_CC_Y_SHIFT;
+ mask = ~TGSI_EXEC_CC_Y_MASK;
+ break;
+ case CHAN_Z:
+ shift = TGSI_EXEC_CC_Z_SHIFT;
+ mask = ~TGSI_EXEC_CC_Z_MASK;
+ break;
+ case CHAN_W:
+ shift = TGSI_EXEC_CC_W_SHIFT;
+ mask = ~TGSI_EXEC_CC_W_MASK;
+ break;
+ default:
+ assert( 0 );
+ return;
+ }
+
+ for (i = 0; i < QUAD_SIZE; i++)
+ if (execmask & (1 << i)) {
+ cc->u[i] &= mask;
+ if (dst->f[i] < 0.0f)
+ cc->u[i] |= TGSI_EXEC_CC_LT << shift;
+ else if (dst->f[i] > 0.0f)
+ cc->u[i] |= TGSI_EXEC_CC_GT << shift;
+ else if (dst->f[i] == 0.0f)
+ cc->u[i] |= TGSI_EXEC_CC_EQ << shift;
+ else
+ cc->u[i] |= TGSI_EXEC_CC_UN << shift;
+ }
+ }
}
#define FETCH(VAL,INDEX,CHAN)\
@@ -1234,10 +1422,34 @@ static void
exec_kilp(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst)
{
- uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
+ uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
- /* TODO: build kilmask from CC mask */
+ if (inst->InstructionExtNv.CondFlowEnable) {
+ uint swizzle[4];
+ uint chan_index;
+ kilmask = 0x0;
+
+ swizzle[0] = inst->InstructionExtNv.CondSwizzleX;
+ swizzle[1] = inst->InstructionExtNv.CondSwizzleY;
+ swizzle[2] = inst->InstructionExtNv.CondSwizzleZ;
+ swizzle[3] = inst->InstructionExtNv.CondSwizzleW;
+
+ for (chan_index = 0; chan_index < 4; chan_index++)
+ {
+ uint i;
+
+ for (i = 0; i < 4; i++) {
+ /* TODO: evaluate the condition code */
+ if (0)
+ kilmask |= 1 << i;
+ }
+ }
+ }
+ else {
+ /* "unconditional" kil */
+ kilmask = mach->ExecMask;
+ }
mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
}
@@ -1503,9 +1715,9 @@ exec_instruction(
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_ARL:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_f2it( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ FETCH( &r[0], 0, chan_index );
+ micro_f2it( &r[0], &r[0] );
+ STORE( &r[0], 0, chan_index );
}
break;
@@ -1571,18 +1783,18 @@ exec_instruction(
micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */
if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */
- STORE( &r[2], 0, CHAN_X ); /* store r2 */
+ STORE( &r[2], 0, CHAN_X ); /* store r2 */
}
if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */
- STORE( &r[2], 0, CHAN_Y ); /* store r2 */
+ STORE( &r[2], 0, CHAN_Y ); /* store r2 */
}
if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */
- STORE( &r[2], 0, CHAN_Z ); /* store r2 */
+ STORE( &r[2], 0, CHAN_Z ); /* store r2 */
}
if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
+ STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
}
break;
@@ -1592,18 +1804,18 @@ exec_instruction(
micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */
micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */
if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
- STORE( &r[0], 0, CHAN_X );
+ STORE( &r[0], 0, CHAN_X );
}
if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */
micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */
- STORE( &r[0], 0, CHAN_Y );
+ STORE( &r[0], 0, CHAN_Y );
}
if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- STORE( &r[1], 0, CHAN_Z );
+ STORE( &r[1], 0, CHAN_Z );
}
if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
+ STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
}
break;
@@ -1839,7 +2051,11 @@ exec_instruction(
/* TGSI_OPCODE_EX2 */
FETCH(&r[0], 0, CHAN_X);
+#if FAST_MATH
+ micro_exp2( &r[0], &r[0] );
+#else
micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
+#endif
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
STORE( &r[0], 0, chan_index );
@@ -2158,7 +2374,7 @@ exec_instruction(
mach->FuncMask &= ~mach->ExecMask;
UPDATE_EXEC_MASK(mach);
- if (mach->ExecMask == 0x0) {
+ if (mach->FuncMask == 0x0) {
/* really return now (otherwise, keep executing */
if (mach->CallStackTop == 0) {
@@ -2515,6 +2731,13 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
mach->Primitives[0] = 0;
}
+ for (i = 0; i < QUAD_SIZE; i++) {
+ mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] =
+ (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) |
+ (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) |
+ (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) |
+ (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT);
+ }
/* execute declarations (interpolants) */
for (i = 0; i < mach->NumDeclarations; i++) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 4f30650..c4e649e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -25,7 +25,7 @@
*
**************************************************************************/
-#if !defined TGSI_EXEC_H
+#ifndef TGSI_EXEC_H
#define TGSI_EXEC_H
#include "pipe/p_compiler.h"
@@ -140,11 +140,30 @@ struct tgsi_exec_labels
#define TGSI_EXEC_TEMP_PRIMITIVE_I (TGSI_EXEC_NUM_TEMPS + 2)
#define TGSI_EXEC_TEMP_PRIMITIVE_C 2
-#define TGSI_EXEC_TEMP_THREE_I (TGSI_EXEC_NUM_TEMPS + 2)
-#define TGSI_EXEC_TEMP_THREE_C 3
+/* NVIDIA condition code (CC) vector
+ */
+#define TGSI_EXEC_CC_GT 0x01
+#define TGSI_EXEC_CC_EQ 0x02
+#define TGSI_EXEC_CC_LT 0x04
+#define TGSI_EXEC_CC_UN 0x08
+
+#define TGSI_EXEC_CC_X_MASK 0x000000ff
+#define TGSI_EXEC_CC_X_SHIFT 0
+#define TGSI_EXEC_CC_Y_MASK 0x0000ff00
+#define TGSI_EXEC_CC_Y_SHIFT 8
+#define TGSI_EXEC_CC_Z_MASK 0x00ff0000
+#define TGSI_EXEC_CC_Z_SHIFT 16
+#define TGSI_EXEC_CC_W_MASK 0xff000000
+#define TGSI_EXEC_CC_W_SHIFT 24
+
+#define TGSI_EXEC_TEMP_CC_I (TGSI_EXEC_NUM_TEMPS + 2)
+#define TGSI_EXEC_TEMP_CC_C 3
+
+#define TGSI_EXEC_TEMP_THREE_I (TGSI_EXEC_NUM_TEMPS + 3)
+#define TGSI_EXEC_TEMP_THREE_C 0
#define TGSI_EXEC_TEMP_HALF_I (TGSI_EXEC_NUM_TEMPS + 3)
-#define TGSI_EXEC_TEMP_HALF_C 0
+#define TGSI_EXEC_TEMP_HALF_C 1
#define TGSI_EXEC_TEMP_R0 (TGSI_EXEC_NUM_TEMPS + 4)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
new file mode 100644
index 0000000..a4899cd
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -0,0 +1,161 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_debug.h"
+#include "tgsi_info.h"
+
+static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
+{
+ { 1, 1, 0, 0, "ARL" },
+ { 1, 1, 0, 0, "MOV" },
+ { 1, 1, 0, 0, "LIT" },
+ { 1, 1, 0, 0, "RCP" },
+ { 1, 1, 0, 0, "RSQ" },
+ { 1, 1, 0, 0, "EXP" },
+ { 1, 1, 0, 0, "LOG" },
+ { 1, 2, 0, 0, "MUL" },
+ { 1, 2, 0, 0, "ADD" },
+ { 1, 2, 0, 0, "DP3" },
+ { 1, 2, 0, 0, "DP4" },
+ { 1, 2, 0, 0, "DST" },
+ { 1, 2, 0, 0, "MIN" },
+ { 1, 2, 0, 0, "MAX" },
+ { 1, 2, 0, 0, "SLT" },
+ { 1, 2, 0, 0, "SGE" },
+ { 1, 3, 0, 0, "MAD" },
+ { 1, 2, 0, 0, "SUB" },
+ { 1, 3, 0, 0, "LERP" },
+ { 1, 3, 0, 0, "CND" },
+ { 1, 3, 0, 0, "CND0" },
+ { 1, 3, 0, 0, "DOT2ADD" },
+ { 1, 2, 0, 0, "INDEX" },
+ { 1, 1, 0, 0, "NEGATE" },
+ { 1, 1, 0, 0, "FRAC" },
+ { 1, 3, 0, 0, "CLAMP" },
+ { 1, 1, 0, 0, "FLOOR" },
+ { 1, 1, 0, 0, "ROUND" },
+ { 1, 1, 0, 0, "EXPBASE2" },
+ { 1, 1, 0, 0, "LOGBASE2" },
+ { 1, 2, 0, 0, "POWER" },
+ { 1, 2, 0, 0, "CROSSPRODUCT" },
+ { 1, 2, 0, 0, "MULTIPLYMATRIX" },
+ { 1, 1, 0, 0, "ABS" },
+ { 1, 1, 0, 0, "RCC" },
+ { 1, 2, 0, 0, "DPH" },
+ { 1, 1, 0, 0, "COS" },
+ { 1, 1, 0, 0, "DDX" },
+ { 1, 1, 0, 0, "DDY" },
+ { 0, 1, 0, 0, "KILP" },
+ { 1, 1, 0, 0, "PK2H" },
+ { 1, 1, 0, 0, "PK2US" },
+ { 1, 1, 0, 0, "PK4B" },
+ { 1, 1, 0, 0, "PK4UB" },
+ { 1, 2, 0, 0, "RFL" },
+ { 1, 2, 0, 0, "SEQ" },
+ { 1, 2, 0, 0, "SFL" },
+ { 1, 2, 0, 0, "SGT" },
+ { 1, 1, 0, 0, "SIN" },
+ { 1, 2, 0, 0, "SLE" },
+ { 1, 2, 0, 0, "SNE" },
+ { 1, 2, 0, 0, "STR" },
+ { 1, 2, 1, 0, "TEX" },
+ { 1, 4, 1, 0, "TXD" },
+ { 1, 2, 1, 0, "TXP" },
+ { 1, 1, 0, 0, "UP2H" },
+ { 1, 1, 0, 0, "UP2US" },
+ { 1, 1, 0, 0, "UP4B" },
+ { 1, 1, 0, 0, "UP4UB" },
+ { 1, 3, 0, 0, "X2D" },
+ { 1, 1, 0, 0, "ARA" },
+ { 1, 1, 0, 0, "ARR" },
+ { 0, 1, 0, 0, "BRA" },
+ { 0, 0, 0, 1, "CAL" },
+ { 0, 0, 0, 0, "RET" },
+ { 1, 1, 0, 0, "SSG" },
+ { 1, 3, 0, 0, "CMP" },
+ { 1, 1, 0, 0, "SCS" },
+ { 1, 2, 1, 0, "TXB" },
+ { 1, 1, 0, 0, "NRM" },
+ { 1, 2, 0, 0, "DIV" },
+ { 1, 2, 0, 0, "DP2" },
+ { 1, 2, 1, 0, "TXL" },
+ { 0, 0, 0, 0, "BRK" },
+ { 0, 1, 0, 1, "IF" },
+ { 0, 0, 0, 0, "LOOP" },
+ { 0, 1, 0, 0, "REP" },
+ { 0, 0, 0, 1, "ELSE" },
+ { 0, 0, 0, 0, "ENDIF" },
+ { 0, 0, 0, 0, "ENDLOOP" },
+ { 0, 0, 0, 0, "ENDREP" },
+ { 0, 1, 0, 0, "PUSHA" },
+ { 1, 0, 0, 0, "POPA" },
+ { 1, 1, 0, 0, "CEIL" },
+ { 1, 1, 0, 0, "I2F" },
+ { 1, 1, 0, 0, "NOT" },
+ { 1, 1, 0, 0, "TRUNC" },
+ { 1, 2, 0, 0, "SHL" },
+ { 1, 2, 0, 0, "SHR" },
+ { 1, 2, 0, 0, "AND" },
+ { 1, 2, 0, 0, "OR" },
+ { 1, 2, 0, 0, "MOD" },
+ { 1, 2, 0, 0, "XOR" },
+ { 1, 3, 0, 0, "SAD" },
+ { 1, 2, 1, 0, "TXF" },
+ { 1, 2, 1, 0, "TXQ" },
+ { 0, 0, 0, 0, "CONT" },
+ { 0, 0, 0, 0, "EMIT" },
+ { 0, 0, 0, 0, "ENDPRIM" },
+ { 0, 0, 0, 1, "BGNLOOP2" },
+ { 0, 0, 0, 0, "BGNSUB" },
+ { 0, 0, 0, 1, "ENDLOOP2" },
+ { 0, 0, 0, 0, "ENDSUB" },
+ { 1, 1, 0, 0, "NOISE1" },
+ { 1, 1, 0, 0, "NOISE2" },
+ { 1, 1, 0, 0, "NOISE3" },
+ { 1, 1, 0, 0, "NOISE4" },
+ { 0, 0, 0, 0, "NOP" },
+ { 1, 2, 0, 0, "M4X3" },
+ { 1, 2, 0, 0, "M3X4" },
+ { 1, 2, 0, 0, "M3X3" },
+ { 1, 2, 0, 0, "M3X2" },
+ { 1, 1, 0, 0, "NRM4" },
+ { 0, 1, 0, 0, "CALLNZ" },
+ { 0, 1, 0, 0, "IFC" },
+ { 0, 1, 0, 0, "BREAKC" },
+ { 0, 0, 0, 0, "KIL" },
+ { 0, 0, 0, 0, "END" },
+ { 1, 1, 0, 0, "SWZ" }
+};
+
+const struct tgsi_opcode_info *
+tgsi_get_opcode_info( uint opcode )
+{
+ if (opcode < TGSI_OPCODE_LAST)
+ return &opcode_info[opcode];
+ assert( 0 );
+ return NULL;
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.h b/src/gallium/auxiliary/tgsi/tgsi_info.h
new file mode 100644
index 0000000..7230bda
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.h
@@ -0,0 +1,53 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef TGSI_INFO_H
+#define TGSI_INFO_H
+
+#include "pipe/p_shader_tokens.h"
+
+#if defined __cplusplus
+extern "C" {
+#endif
+
+struct tgsi_opcode_info
+{
+ uint num_dst;
+ uint num_src;
+ boolean is_tex;
+ boolean is_branch;
+ const char *mnemonic;
+};
+
+const struct tgsi_opcode_info *
+tgsi_get_opcode_info( uint opcode );
+
+#if defined __cplusplus
+}
+#endif
+
+#endif /* TGSI_INFO_H */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index d16f0cd..3757486 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -26,10 +26,10 @@
**************************************************************************/
#include "pipe/p_debug.h"
-#include "pipe/p_util.h"
#include "pipe/p_shader_tokens.h"
#include "tgsi_parse.h"
#include "tgsi_build.h"
+#include "util/u_memory.h"
void
tgsi_full_token_init(
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index 2e3ec96..c659027 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -27,6 +27,7 @@
#include "pipe/p_debug.h"
#include "tgsi_sanity.h"
+#include "tgsi_info.h"
#include "tgsi_iterate.h"
#define MAX_REGISTERS 256
@@ -170,6 +171,7 @@ iter_instruction(
struct tgsi_full_instruction *inst )
{
struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;
+ const struct tgsi_opcode_info *info;
uint i;
/* There must be no other instructions after END.
@@ -181,6 +183,19 @@ iter_instruction(
ctx->index_of_END = ctx->num_instructions;
}
+ info = tgsi_get_opcode_info( inst->Instruction.Opcode );
+ if (info == NULL) {
+ report_error( ctx, "Invalid instruction opcode" );
+ return TRUE;
+ }
+
+ if (info->num_dst != inst->Instruction.NumDstRegs) {
+ report_error( ctx, "Invalid number of destination operands" );
+ }
+ if (info->num_src != inst->Instruction.NumSrcRegs) {
+ report_error( ctx, "Invalid number of source operands" );
+ }
+
/* Check destination and source registers' validity.
* Mark the registers as used.
*/
@@ -307,7 +322,7 @@ epilog(
/* Print totals, if any.
*/
if (ctx->errors || ctx->warnings)
- debug_printf( "\n%u errors, %u warnings", ctx->errors, ctx->warnings );
+ debug_printf( "%u errors, %u warnings\n", ctx->errors, ctx->warnings );
return TRUE;
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 59bcf10..be4870a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -33,11 +33,11 @@
*/
-#include "tgsi_scan.h"
-#include "tgsi/tgsi_parse.h"
+#include "util/u_math.h"
#include "tgsi/tgsi_build.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_scan.h"
-#include "pipe/p_util.h"
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 47dc06f..4681b29 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -1,6 +1,6 @@
/**************************************************************************
*
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -25,8 +25,9 @@
*
**************************************************************************/
-#include "pipe/p_util.h"
+#include "pipe/p_debug.h"
#include "pipe/p_shader_tokens.h"
+#include "util/u_math.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi_exec.h"
@@ -42,15 +43,17 @@
*/
#define HIGH_PRECISION 1
+#define FAST_MATH 1
+
#define FOR_EACH_CHANNEL( CHAN )\
- for( CHAN = 0; CHAN < 4; CHAN++ )
+ for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
- if( IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
+ if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
FOR_EACH_CHANNEL( CHAN )\
@@ -61,7 +64,11 @@
#define CHAN_Z 2
#define CHAN_W 3
+#define TEMP_ONE_I TGSI_EXEC_TEMP_ONE_I
+#define TEMP_ONE_C TGSI_EXEC_TEMP_ONE_C
+
#define TEMP_R0 TGSI_EXEC_TEMP_R0
+#define TEMP_ADDR TGSI_EXEC_TEMP_ADDR
/**
* X86 utility functions.
@@ -215,19 +222,61 @@ emit_ret(
static void
emit_const(
struct x86_function *func,
- unsigned xmm,
- unsigned vec,
- unsigned chan )
-{
- sse_movss(
- func,
- make_xmm( xmm ),
- get_const( vec, chan ) );
- sse_shufps(
- func,
- make_xmm( xmm ),
- make_xmm( xmm ),
- SHUF( 0, 0, 0, 0 ) );
+ uint xmm,
+ int vec,
+ uint chan,
+ uint indirect,
+ uint indirectFile,
+ int indirectIndex )
+{
+ if (indirect) {
+ struct x86_reg r0 = get_input_base();
+ struct x86_reg r1 = get_output_base();
+ uint i;
+
+ assert( indirectFile == TGSI_FILE_ADDRESS );
+ assert( indirectIndex == 0 );
+
+ x86_push( func, r0 );
+ x86_push( func, r1 );
+
+ for (i = 0; i < QUAD_SIZE; i++) {
+ x86_lea( func, r0, get_const( vec, chan ) );
+ x86_mov( func, r1, x86_make_disp( get_temp( TEMP_ADDR, CHAN_X ), i * 4 ) );
+
+ /* Quick hack to multiply by 16 -- need to add SHL to rtasm.
+ */
+ x86_add( func, r1, r1 );
+ x86_add( func, r1, r1 );
+ x86_add( func, r1, r1 );
+ x86_add( func, r1, r1 );
+
+ x86_add( func, r0, r1 );
+ x86_mov( func, r1, x86_deref( r0 ) );
+ x86_mov( func, x86_make_disp( get_temp( TEMP_R0, CHAN_X ), i * 4 ), r1 );
+ }
+
+ x86_pop( func, r1 );
+ x86_pop( func, r0 );
+
+ sse_movaps(
+ func,
+ make_xmm( xmm ),
+ get_temp( TEMP_R0, CHAN_X ) );
+ }
+ else {
+ assert( vec >= 0 );
+
+ sse_movss(
+ func,
+ make_xmm( xmm ),
+ get_const( vec, chan ) );
+ sse_shufps(
+ func,
+ make_xmm( xmm ),
+ make_xmm( xmm ),
+ SHUF( 0, 0, 0, 0 ) );
+ }
}
static void
@@ -369,10 +418,12 @@ emit_addrs(
unsigned vec,
unsigned chan )
{
+ assert( vec == 0 );
+
emit_temps(
func,
xmm,
- vec + TGSI_EXEC_NUM_TEMPS,
+ vec + TGSI_EXEC_TEMP_ADDR,
chan );
}
@@ -550,12 +601,10 @@ static void PIPE_CDECL
cos4f(
float *store )
{
- const unsigned X = 0;
-
- store[X + 0] = cosf( store[X + 0] );
- store[X + 1] = cosf( store[X + 1] );
- store[X + 2] = cosf( store[X + 2] );
- store[X + 3] = cosf( store[X + 3] );
+ store[0] = cosf( store[0] );
+ store[1] = cosf( store[1] );
+ store[2] = cosf( store[2] );
+ store[3] = cosf( store[3] );
}
static void
@@ -573,12 +622,17 @@ static void PIPE_CDECL
ex24f(
float *store )
{
- const unsigned X = 0;
-
- store[X + 0] = powf( 2.0f, store[X + 0] );
- store[X + 1] = powf( 2.0f, store[X + 1] );
- store[X + 2] = powf( 2.0f, store[X + 2] );
- store[X + 3] = powf( 2.0f, store[X + 3] );
+#if FAST_MATH
+ store[0] = util_fast_exp2( store[0] );
+ store[1] = util_fast_exp2( store[1] );
+ store[2] = util_fast_exp2( store[2] );
+ store[3] = util_fast_exp2( store[3] );
+#else
+ store[0] = powf( 2.0f, store[0] );
+ store[1] = powf( 2.0f, store[1] );
+ store[2] = powf( 2.0f, store[2] );
+ store[3] = powf( 2.0f, store[3] );
+#endif
}
static void
@@ -607,12 +661,10 @@ static void PIPE_CDECL
flr4f(
float *store )
{
- const unsigned X = 0;
-
- store[X + 0] = floorf( store[X + 0] );
- store[X + 1] = floorf( store[X + 1] );
- store[X + 2] = floorf( store[X + 2] );
- store[X + 3] = floorf( store[X + 3] );
+ store[0] = floorf( store[0] );
+ store[1] = floorf( store[1] );
+ store[2] = floorf( store[2] );
+ store[3] = floorf( store[3] );
}
static void
@@ -630,12 +682,10 @@ static void PIPE_CDECL
frc4f(
float *store )
{
- const unsigned X = 0;
-
- store[X + 0] -= floorf( store[X + 0] );
- store[X + 1] -= floorf( store[X + 1] );
- store[X + 2] -= floorf( store[X + 2] );
- store[X + 3] -= floorf( store[X + 3] );
+ store[0] -= floorf( store[0] );
+ store[1] -= floorf( store[1] );
+ store[2] -= floorf( store[2] );
+ store[3] -= floorf( store[3] );
}
static void
@@ -653,12 +703,10 @@ static void PIPE_CDECL
lg24f(
float *store )
{
- const unsigned X = 0;
-
- store[X + 0] = LOG2( store[X + 0] );
- store[X + 1] = LOG2( store[X + 1] );
- store[X + 2] = LOG2( store[X + 2] );
- store[X + 3] = LOG2( store[X + 3] );
+ store[0] = util_fast_log2( store[0] );
+ store[1] = util_fast_log2( store[1] );
+ store[2] = util_fast_log2( store[2] );
+ store[3] = util_fast_log2( store[3] );
}
static void
@@ -712,12 +760,17 @@ static void PIPE_CDECL
pow4f(
float *store )
{
- const unsigned X = 0;
-
- store[X + 0] = powf( store[X + 0], store[X + 4] );
- store[X + 1] = powf( store[X + 1], store[X + 5] );
- store[X + 2] = powf( store[X + 2], store[X + 6] );
- store[X + 3] = powf( store[X + 3], store[X + 7] );
+#if FAST_MATH
+ store[0] = util_fast_pow( store[0], store[4] );
+ store[1] = util_fast_pow( store[1], store[5] );
+ store[2] = util_fast_pow( store[2], store[6] );
+ store[3] = util_fast_pow( store[3], store[7] );
+#else
+ store[0] = powf( store[0], store[4] );
+ store[1] = powf( store[1], store[5] );
+ store[2] = powf( store[2], store[6] );
+ store[3] = powf( store[3], store[7] );
+#endif
}
static void
@@ -812,12 +865,10 @@ static void PIPE_CDECL
sin4f(
float *store )
{
- const unsigned X = 0;
-
- store[X + 0] = sinf( store[X + 0] );
- store[X + 1] = sinf( store[X + 1] );
- store[X + 2] = sinf( store[X + 2] );
- store[X + 3] = sinf( store[X + 3] );
+ store[0] = sinf( store[0] );
+ store[1] = sinf( store[1] );
+ store[2] = sinf( store[2] );
+ store[3] = sinf( store[3] );
}
static void
@@ -855,18 +906,21 @@ emit_fetch(
{
unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
- switch( swizzle ) {
+ switch (swizzle) {
case TGSI_EXTSWIZZLE_X:
case TGSI_EXTSWIZZLE_Y:
case TGSI_EXTSWIZZLE_Z:
case TGSI_EXTSWIZZLE_W:
- switch( reg->SrcRegister.File ) {
+ switch (reg->SrcRegister.File) {
case TGSI_FILE_CONSTANT:
emit_const(
func,
xmm,
reg->SrcRegister.Index,
- swizzle );
+ swizzle,
+ reg->SrcRegister.Indirect,
+ reg->SrcRegisterInd.File,
+ reg->SrcRegisterInd.Index );
break;
case TGSI_FILE_IMMEDIATE:
@@ -910,8 +964,8 @@ emit_fetch(
emit_tempf(
func,
xmm,
- TGSI_EXEC_TEMP_ONE_I,
- TGSI_EXEC_TEMP_ONE_C );
+ TEMP_ONE_I,
+ TEMP_ONE_C );
break;
default:
@@ -1125,8 +1179,8 @@ emit_setcc(
func,
make_xmm( 0 ),
get_temp(
- TGSI_EXEC_TEMP_ONE_I,
- TGSI_EXEC_TEMP_ONE_C ) );
+ TEMP_ONE_I,
+ TEMP_ONE_C ) );
STORE( func, *inst, 0, 0, chan_index );
}
}
@@ -1172,18 +1226,13 @@ emit_instruction(
{
unsigned chan_index;
- switch( inst->Instruction.Opcode ) {
+ switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_ARL:
-#if 0
- /* XXX this isn't working properly (see glean vertProg1 test) */
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
emit_f2it( func, 0 );
STORE( func, *inst, 0, 0, chan_index );
}
-#else
- return 0;
-#endif
break;
case TGSI_OPCODE_MOV:
@@ -1200,8 +1249,8 @@ emit_instruction(
emit_tempf(
func,
0,
- TGSI_EXEC_TEMP_ONE_I,
- TGSI_EXEC_TEMP_ONE_C);
+ TEMP_ONE_I,
+ TEMP_ONE_C);
if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
STORE( func, *inst, 0, 0, CHAN_X );
}
@@ -1286,11 +1335,73 @@ emit_instruction(
break;
case TGSI_OPCODE_EXP:
- return 0;
+ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+ IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
+ IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+ FETCH( func, *inst, 0, 0, CHAN_X );
+ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+ IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+ emit_MOV( func, 1, 0 );
+ emit_flr( func, 1 );
+ /* dst.x = ex2(floor(src.x)) */
+ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) {
+ emit_MOV( func, 2, 1 );
+ emit_ex2( func, 2 );
+ STORE( func, *inst, 2, 0, CHAN_X );
+ }
+ /* dst.y = src.x - floor(src.x) */
+ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+ emit_MOV( func, 2, 0 );
+ emit_sub( func, 2, 1 );
+ STORE( func, *inst, 2, 0, CHAN_Y );
+ }
+ }
+ /* dst.z = ex2(src.x) */
+ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+ emit_ex2( func, 0 );
+ STORE( func, *inst, 0, 0, CHAN_Z );
+ }
+ }
+ /* dst.w = 1.0 */
+ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) {
+ emit_tempf( func, 0, TEMP_ONE_I, TEMP_ONE_C );
+ STORE( func, *inst, 0, 0, CHAN_W );
+ }
break;
case TGSI_OPCODE_LOG:
- return 0;
+ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+ IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
+ IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+ FETCH( func, *inst, 0, 0, CHAN_X );
+ emit_abs( func, 0 );
+ emit_MOV( func, 1, 0 );
+ emit_lg2( func, 1 );
+ /* dst.z = lg2(abs(src.x)) */
+ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+ STORE( func, *inst, 1, 0, CHAN_Z );
+ }
+ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+ IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+ emit_flr( func, 1 );
+ /* dst.x = floor(lg2(abs(src.x))) */
+ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) {
+ STORE( func, *inst, 1, 0, CHAN_X );
+ }
+ /* dst.x = abs(src)/ex2(floor(lg2(abs(src.x)))) */
+ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+ emit_ex2( func, 1 );
+ emit_rcp( func, 1, 1 );
+ emit_mul( func, 0, 1 );
+ STORE( func, *inst, 0, 0, CHAN_Y );
+ }
+ }
+ }
+ /* dst.w = 1.0 */
+ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) {
+ emit_tempf( func, 0, TEMP_ONE_I, TEMP_ONE_C );
+ STORE( func, *inst, 0, 0, CHAN_W );
+ }
break;
case TGSI_OPCODE_MUL:
@@ -1356,8 +1467,8 @@ emit_instruction(
emit_tempf(
func,
0,
- TGSI_EXEC_TEMP_ONE_I,
- TGSI_EXEC_TEMP_ONE_C );
+ TEMP_ONE_I,
+ TEMP_ONE_C );
STORE( func, *inst, 0, 0, CHAN_X );
}
IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
@@ -1560,8 +1671,8 @@ emit_instruction(
emit_tempf(
func,
0,
- TGSI_EXEC_TEMP_ONE_I,
- TGSI_EXEC_TEMP_ONE_C );
+ TEMP_ONE_I,
+ TEMP_ONE_C );
STORE( func, *inst, 0, 0, CHAN_W );
}
break;
@@ -1688,8 +1799,8 @@ emit_instruction(
emit_tempf(
func,
0,
- TGSI_EXEC_TEMP_ONE_I,
- TGSI_EXEC_TEMP_ONE_C );
+ TEMP_ONE_I,
+ TEMP_ONE_C );
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
STORE( func, *inst, 0, 0, chan_index );
}
@@ -1777,8 +1888,8 @@ emit_instruction(
emit_tempf(
func,
0,
- TGSI_EXEC_TEMP_ONE_I,
- TGSI_EXEC_TEMP_ONE_C );
+ TEMP_ONE_I,
+ TEMP_ONE_C );
STORE( func, *inst, 0, 0, CHAN_W );
}
break;
@@ -2127,6 +2238,8 @@ tgsi_emit_sse2(
unsigned ok = 1;
uint num_immediates = 0;
+ util_init_math();
+
func->csr = func->store;
tgsi_parse_init( &parse, tokens );
@@ -2289,3 +2402,4 @@ tgsi_emit_sse2(
}
#endif /* PIPE_ARCH_X86 */
+
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index 35cb305..9454563 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -28,6 +28,7 @@
#include "pipe/p_debug.h"
#include "tgsi_text.h"
#include "tgsi_build.h"
+#include "tgsi_info.h"
#include "tgsi_parse.h"
#include "tgsi_sanity.h"
#include "tgsi_util.h"
@@ -50,11 +51,18 @@ static boolean is_digit_alpha_underscore( const char *cur )
return is_digit( cur ) || is_alpha_underscore( cur );
}
+static boolean uprcase( char c )
+{
+ if (c >= 'a' && c <= 'z')
+ return c += 'A' - 'a';
+ return c;
+}
+
static boolean str_match_no_case( const char **pcur, const char *str )
{
const char *cur = *pcur;
- while (*str != '\0' && *str == toupper( *cur )) {
+ while (*str != '\0' && *str == uprcase( *cur )) {
str++;
cur++;
}
@@ -130,7 +138,7 @@ static boolean parse_float( const char **pcur, float *val )
}
if (!integral_part && !fractional_part)
return FALSE;
- if (toupper( *cur ) == 'E') {
+ if (uprcase( *cur ) == 'E') {
cur++;
if (*cur == '-' || *cur == '+')
cur++;
@@ -257,19 +265,19 @@ parse_opt_writemask(
cur++;
*writemask = TGSI_WRITEMASK_NONE;
eat_opt_white( &cur );
- if (toupper( *cur ) == 'X') {
+ if (uprcase( *cur ) == 'X') {
cur++;
*writemask |= TGSI_WRITEMASK_X;
}
- if (toupper( *cur ) == 'Y') {
+ if (uprcase( *cur ) == 'Y') {
cur++;
*writemask |= TGSI_WRITEMASK_Y;
}
- if (toupper( *cur ) == 'Z') {
+ if (uprcase( *cur ) == 'Z') {
cur++;
*writemask |= TGSI_WRITEMASK_Z;
}
- if (toupper( *cur ) == 'W') {
+ if (uprcase( *cur ) == 'W') {
cur++;
*writemask |= TGSI_WRITEMASK_W;
}
@@ -515,13 +523,13 @@ parse_optional_swizzle(
cur++;
eat_opt_white( &cur );
for (i = 0; i < 4; i++) {
- if (toupper( *cur ) == 'X')
+ if (uprcase( *cur ) == 'X')
swizzle[i] = TGSI_SWIZZLE_X;
- else if (toupper( *cur ) == 'Y')
+ else if (uprcase( *cur ) == 'Y')
swizzle[i] = TGSI_SWIZZLE_Y;
- else if (toupper( *cur ) == 'Z')
+ else if (uprcase( *cur ) == 'Z')
swizzle[i] = TGSI_SWIZZLE_Z;
- else if (toupper( *cur ) == 'W')
+ else if (uprcase( *cur ) == 'W')
swizzle[i] = TGSI_SWIZZLE_W;
else {
if (*cur == '0')
@@ -719,138 +727,6 @@ parse_src_operand(
return TRUE;
}
-struct opcode_info
-{
- uint num_dst;
- uint num_src;
- uint is_tex;
- uint is_branch;
- const char *mnemonic;
-};
-
-static const struct opcode_info opcode_info[TGSI_OPCODE_LAST] =
-{
- { 1, 1, 0, 0, "ARL" },
- { 1, 1, 0, 0, "MOV" },
- { 1, 1, 0, 0, "LIT" },
- { 1, 1, 0, 0, "RCP" },
- { 1, 1, 0, 0, "RSQ" },
- { 1, 1, 0, 0, "EXP" },
- { 1, 1, 0, 0, "LOG" },
- { 1, 2, 0, 0, "MUL" },
- { 1, 2, 0, 0, "ADD" },
- { 1, 2, 0, 0, "DP3" },
- { 1, 2, 0, 0, "DP4" },
- { 1, 2, 0, 0, "DST" },
- { 1, 2, 0, 0, "MIN" },
- { 1, 2, 0, 0, "MAX" },
- { 1, 2, 0, 0, "SLT" },
- { 1, 2, 0, 0, "SGE" },
- { 1, 3, 0, 0, "MAD" },
- { 1, 2, 0, 0, "SUB" },
- { 1, 3, 0, 0, "LERP" },
- { 1, 3, 0, 0, "CND" },
- { 1, 3, 0, 0, "CND0" },
- { 1, 3, 0, 0, "DOT2ADD" },
- { 1, 2, 0, 0, "INDEX" },
- { 1, 1, 0, 0, "NEGATE" },
- { 1, 1, 0, 0, "FRAC" },
- { 1, 3, 0, 0, "CLAMP" },
- { 1, 1, 0, 0, "FLOOR" },
- { 1, 1, 0, 0, "ROUND" },
- { 1, 1, 0, 0, "EXPBASE2" },
- { 1, 1, 0, 0, "LOGBASE2" },
- { 1, 2, 0, 0, "POWER" },
- { 1, 2, 0, 0, "CROSSPRODUCT" },
- { 1, 2, 0, 0, "MULTIPLYMATRIX" },
- { 1, 1, 0, 0, "ABS" },
- { 1, 1, 0, 0, "RCC" },
- { 1, 2, 0, 0, "DPH" },
- { 1, 1, 0, 0, "COS" },
- { 1, 1, 0, 0, "DDX" },
- { 1, 1, 0, 0, "DDY" },
- { 0, 1, 0, 0, "KILP" },
- { 1, 1, 0, 0, "PK2H" },
- { 1, 1, 0, 0, "PK2US" },
- { 1, 1, 0, 0, "PK4B" },
- { 1, 1, 0, 0, "PK4UB" },
- { 1, 2, 0, 0, "RFL" },
- { 1, 2, 0, 0, "SEQ" },
- { 1, 2, 0, 0, "SFL" },
- { 1, 2, 0, 0, "SGT" },
- { 1, 1, 0, 0, "SIN" },
- { 1, 2, 0, 0, "SLE" },
- { 1, 2, 0, 0, "SNE" },
- { 1, 2, 0, 0, "STR" },
- { 1, 2, 1, 0, "TEX" },
- { 1, 4, 1, 0, "TXD" },
- { 1, 2, 1, 0, "TXP" },
- { 1, 1, 0, 0, "UP2H" },
- { 1, 1, 0, 0, "UP2US" },
- { 1, 1, 0, 0, "UP4B" },
- { 1, 1, 0, 0, "UP4UB" },
- { 1, 3, 0, 0, "X2D" },
- { 1, 1, 0, 0, "ARA" },
- { 1, 1, 0, 0, "ARR" },
- { 0, 1, 0, 0, "BRA" },
- { 0, 0, 0, 1, "CAL" },
- { 0, 0, 0, 0, "RET" },
- { 1, 1, 0, 0, "SSG" },
- { 1, 3, 0, 0, "CMP" },
- { 1, 1, 0, 0, "SCS" },
- { 1, 2, 1, 0, "TXB" },
- { 1, 1, 0, 0, "NRM" },
- { 1, 2, 0, 0, "DIV" },
- { 1, 2, 0, 0, "DP2" },
- { 1, 2, 1, 0, "TXL" },
- { 0, 0, 0, 0, "BRK" },
- { 0, 1, 0, 1, "IF" },
- { 0, 0, 0, 0, "LOOP" },
- { 0, 1, 0, 0, "REP" },
- { 0, 0, 0, 1, "ELSE" },
- { 0, 0, 0, 0, "ENDIF" },
- { 0, 0, 0, 0, "ENDLOOP" },
- { 0, 0, 0, 0, "ENDREP" },
- { 0, 1, 0, 0, "PUSHA" },
- { 1, 0, 0, 0, "POPA" },
- { 1, 1, 0, 0, "CEIL" },
- { 1, 1, 0, 0, "I2F" },
- { 1, 1, 0, 0, "NOT" },
- { 1, 1, 0, 0, "TRUNC" },
- { 1, 2, 0, 0, "SHL" },
- { 1, 2, 0, 0, "SHR" },
- { 1, 2, 0, 0, "AND" },
- { 1, 2, 0, 0, "OR" },
- { 1, 2, 0, 0, "MOD" },
- { 1, 2, 0, 0, "XOR" },
- { 1, 3, 0, 0, "SAD" },
- { 1, 2, 1, 0, "TXF" },
- { 1, 2, 1, 0, "TXQ" },
- { 0, 0, 0, 0, "CONT" },
- { 0, 0, 0, 0, "EMIT" },
- { 0, 0, 0, 0, "ENDPRIM" },
- { 0, 0, 0, 1, "BGNLOOP2" },
- { 0, 0, 0, 0, "BGNSUB" },
- { 0, 0, 0, 1, "ENDLOOP2" },
- { 0, 0, 0, 0, "ENDSUB" },
- { 1, 1, 0, 0, "NOISE1" },
- { 1, 1, 0, 0, "NOISE2" },
- { 1, 1, 0, 0, "NOISE3" },
- { 1, 1, 0, 0, "NOISE4" },
- { 0, 0, 0, 0, "NOP" },
- { 1, 2, 0, 0, "M4X3" },
- { 1, 2, 0, 0, "M3X4" },
- { 1, 2, 0, 0, "M3X3" },
- { 1, 2, 0, 0, "M3X2" },
- { 1, 1, 0, 0, "NRM4" },
- { 0, 1, 0, 0, "CALLNZ" },
- { 0, 1, 0, 0, "IFC" },
- { 0, 1, 0, 0, "BREAKC" },
- { 0, 0, 0, 0, "KIL" },
- { 0, 0, 0, 0, "END" },
- { 1, 1, 0, 0, "SWZ" }
-};
-
static const char *texture_names[TGSI_TEXTURE_COUNT] =
{
"UNKNOWN",
@@ -871,7 +747,7 @@ parse_instruction(
{
uint i;
uint saturate = TGSI_SAT_NONE;
- const struct opcode_info *info;
+ const struct tgsi_opcode_info *info;
struct tgsi_full_instruction inst;
uint advance;
@@ -881,7 +757,7 @@ parse_instruction(
for (i = 0; i < TGSI_OPCODE_LAST; i++) {
const char *cur = ctx->cur;
- info = &opcode_info[i];
+ info = tgsi_get_opcode_info( i );
if (str_match_no_case( &cur, info->mnemonic )) {
if (str_match_no_case( &cur, "_SATNV" ))
saturate = TGSI_SAT_MINUS_PLUS_ONE;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.c b/src/gallium/auxiliary/tgsi/tgsi_transform.c
index 357f77b..ea87da3 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_transform.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_transform.c
@@ -31,6 +31,7 @@
* Authors: Brian Paul
*/
+#include "pipe/p_debug.h"
#include "tgsi_transform.h"
diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.h b/src/gallium/auxiliary/tgsi/tgsi_transform.h
index 3da0b382..a121adb 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_transform.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_transform.h
@@ -29,7 +29,6 @@
#define TGSI_TRANSFORM_H
-#include "pipe/p_util.h"
#include "pipe/p_shader_tokens.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_build.h"
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c
index 09486e6..50101a9 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
@@ -26,7 +26,6 @@
**************************************************************************/
#include "pipe/p_debug.h"
-#include "pipe/p_util.h"
#include "pipe/p_shader_tokens.h"
#include "tgsi_parse.h"
#include "tgsi_build.h"