diff options
Diffstat (limited to 'src/mesa/pipe/tgsi/core/tgsi_exec.c')
-rw-r--r-- | src/mesa/pipe/tgsi/core/tgsi_exec.c | 2193 |
1 files changed, 2193 insertions, 0 deletions
diff --git a/src/mesa/pipe/tgsi/core/tgsi_exec.c b/src/mesa/pipe/tgsi/core/tgsi_exec.c new file mode 100644 index 0000000..189a411 --- /dev/null +++ b/src/mesa/pipe/tgsi/core/tgsi_exec.c @@ -0,0 +1,2193 @@ +#include "tgsi_platform.h" +#include "tgsi_core.h" + +#define TILE_BOTTOM_LEFT 0 +#define TILE_BOTTOM_RIGHT 1 +#define TILE_TOP_LEFT 2 +#define TILE_TOP_RIGHT 3 + +#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I +#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C +#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I +#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C +#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I +#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C +#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I +#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C +#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I +#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C +#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I +#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C +#define TEMP_128_I TGSI_EXEC_TEMP_128_I +#define TEMP_128_C TGSI_EXEC_TEMP_128_C +#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I +#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C +#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I +#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C +#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I +#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C +#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I +#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C +#define TEMP_R0 TGSI_EXEC_TEMP_R0 + +#define FOR_EACH_CHANNEL(CHAN)\ + for (CHAN = 0; CHAN < 4; CHAN++) + +#define IS_CHANNEL_ENABLED(INST, CHAN)\ + ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + +#define IS_CHANNEL_ENABLED2(INST, CHAN)\ + ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) + +#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ + FOR_EACH_CHANNEL( CHAN )\ + if (IS_CHANNEL_ENABLED( INST, CHAN )) + +#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ + FOR_EACH_CHANNEL( CHAN )\ + if (IS_CHANNEL_ENABLED2( INST, CHAN )) + +#define CHAN_X 0 +#define CHAN_Y 1 +#define CHAN_Z 2 +#define CHAN_W 3 + +void +tgsi_exec_machine_init( + struct tgsi_exec_machine *mach, + struct tgsi_token *tokens ) +{ + GLuint i; + struct tgsi_parse_context parse; + + mach->Tokens = tokens; + + tgsi_parse_init (&parse, mach->Tokens); + mach->Processor = parse.FullHeader.Processor.Processor; + tgsi_parse_free (&parse); + + mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps); + mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS]; + +#if XXX_SSE + tgsi_emit_sse (tokens, + &mach->Function); +#endif + + /* Setup constants. */ + for( i = 0; i < 4; i++ ) { + mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000; + mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF; + mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000; + mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF; + mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f; + mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; + mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; + mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; + } +} + +void +tgsi_exec_prepare( + struct tgsi_exec_machine *mach, + struct tgsi_exec_labels *labels ) +{ + struct tgsi_parse_context parse; + + mach->ImmLimit = 0; + labels->count = 0; + + tgsi_parse_init( &parse, mach->Tokens ); + while( !tgsi_parse_end_of_tokens( &parse ) ) { + GLuint pointer = parse.Position; + GLuint i; + tgsi_parse_token( &parse ); + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + break; + case TGSI_TOKEN_TYPE_IMMEDIATE: + assert( (parse.FullToken.FullImmediate.Immediate.Size - 1) % 4 == 0 ); + assert( mach->ImmLimit + (parse.FullToken.FullImmediate.Immediate.Size - 1) / 4 <= 256 ); + for( i = 0; i < parse.FullToken.FullImmediate.Immediate.Size - 1; i++ ) { + mach->Imms[mach->ImmLimit + i / 4][i % 4] = parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; + } + mach->ImmLimit += (parse.FullToken.FullImmediate.Immediate.Size - 1) / 4; + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + if( parse.FullToken.FullInstruction.InstructionExtLabel.Label && + parse.FullToken.FullInstruction.InstructionExtLabel.Target ) { + assert( labels->count < 128 ); + labels->labels[labels->count][0] = parse.FullToken.FullInstruction.InstructionExtLabel.Label; + labels->labels[labels->count][1] = pointer; + labels->count++; + } + break; + default: + assert( 0 ); + } + } + tgsi_parse_free (&parse); +} + +void +tgsi_exec_machine_run( + struct tgsi_exec_machine *mach ) +{ + struct tgsi_exec_labels labels; + + tgsi_exec_prepare( mach, &labels ); + tgsi_exec_machine_run2( mach, &labels ); +} + +static void +micro_abs( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = (GLfloat) fabs( (GLdouble) src->f[0] ); + dst->f[1] = (GLfloat) fabs( (GLdouble) src->f[1] ); + dst->f[2] = (GLfloat) fabs( (GLdouble) src->f[2] ); + dst->f[3] = (GLfloat) fabs( (GLdouble) src->f[3] ); +} + +static void +micro_add( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = src0->f[0] + src1->f[0]; + dst->f[1] = src0->f[1] + src1->f[1]; + dst->f[2] = src0->f[2] + src1->f[2]; + dst->f[3] = src0->f[3] + src1->f[3]; +} + +static void +micro_iadd( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->i[0] = src0->i[0] + src1->i[0]; + dst->i[1] = src0->i[1] + src1->i[1]; + dst->i[2] = src0->i[2] + src1->i[2]; + dst->i[3] = src0->i[3] + src1->i[3]; +} + +static void +micro_and( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] & src1->u[0]; + dst->u[1] = src0->u[1] & src1->u[1]; + dst->u[2] = src0->u[2] & src1->u[2]; + dst->u[3] = src0->u[3] & src1->u[3]; +} + +static void +micro_ceil( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = (GLfloat) ceil( (GLdouble) src->f[0] ); + dst->f[1] = (GLfloat) ceil( (GLdouble) src->f[1] ); + dst->f[2] = (GLfloat) ceil( (GLdouble) src->f[2] ); + dst->f[3] = (GLfloat) ceil( (GLdouble) src->f[3] ); +} + +static void +micro_cos( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = (GLfloat) cos( (GLdouble) src->f[0] ); + dst->f[1] = (GLfloat) cos( (GLdouble) src->f[1] ); + dst->f[2] = (GLfloat) cos( (GLdouble) src->f[2] ); + dst->f[3] = (GLfloat) cos( (GLdouble) src->f[3] ); +} + +static void +micro_div( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = src0->f[0] / src1->f[0]; + dst->f[1] = src0->f[1] / src1->f[1]; + dst->f[2] = src0->f[2] / src1->f[2]; + dst->f[3] = src0->f[3] / src1->f[3]; +} + +static void +micro_udiv( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] / src1->u[0]; + dst->u[1] = src0->u[1] / src1->u[1]; + dst->u[2] = src0->u[2] / src1->u[2]; + dst->u[3] = src0->u[3] / src1->u[3]; +} + +static void +micro_eq( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; + dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; + dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; + dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; +} + +static void +micro_ieq( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; + dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; + dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; + dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; +} + +static void +micro_exp2( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = (GLfloat) pow( 2.0, (GLdouble) src->f[0] ); + dst->f[1] = (GLfloat) pow( 2.0, (GLdouble) src->f[1] ); + dst->f[2] = (GLfloat) pow( 2.0, (GLdouble) src->f[2] ); + dst->f[3] = (GLfloat) pow( 2.0, (GLdouble) src->f[3] ); +} + +static void +micro_f2it( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->i[0] = (GLint) src->f[0]; + dst->i[1] = (GLint) src->f[1]; + dst->i[2] = (GLint) src->f[2]; + dst->i[3] = (GLint) src->f[3]; +} + +static void +micro_f2ut( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->u[0] = (GLuint) src->f[0]; + dst->u[1] = (GLuint) src->f[1]; + dst->u[2] = (GLuint) src->f[2]; + dst->u[3] = (GLuint) src->f[3]; +} + +static void +micro_flr( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = (GLfloat) floor( (GLdouble) src->f[0] ); + dst->f[1] = (GLfloat) floor( (GLdouble) src->f[1] ); + dst->f[2] = (GLfloat) floor( (GLdouble) src->f[2] ); + dst->f[3] = (GLfloat) floor( (GLdouble) src->f[3] ); +} + +static void +micro_frc( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = src->f[0] - (GLfloat) floor( (GLdouble) src->f[0] ); + dst->f[1] = src->f[1] - (GLfloat) floor( (GLdouble) src->f[1] ); + dst->f[2] = src->f[2] - (GLfloat) floor( (GLdouble) src->f[2] ); + dst->f[3] = src->f[3] - (GLfloat) floor( (GLdouble) src->f[3] ); +} + +static void +micro_i2f( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = (GLfloat) src->i[0]; + dst->f[1] = (GLfloat) src->i[1]; + dst->f[2] = (GLfloat) src->i[2]; + dst->f[3] = (GLfloat) src->i[3]; +} + +static void +micro_lg2( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = (GLfloat) log( (GLdouble) src->f[0] ) * 1.442695f; + dst->f[1] = (GLfloat) log( (GLdouble) src->f[1] ) * 1.442695f; + dst->f[2] = (GLfloat) log( (GLdouble) src->f[2] ) * 1.442695f; + dst->f[3] = (GLfloat) log( (GLdouble) src->f[3] ) * 1.442695f; +} + +static void +micro_lt( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; + dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; + dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; + dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; +} + +static void +micro_ilt( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; + dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; + dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; + dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; +} + +static void +micro_ult( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; + dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; + dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; + dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; +} + +static void +micro_max( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; + dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; + dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; + dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; +} + +static void +micro_imax( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; + dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; + dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; + dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; +} + +static void +micro_umax( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; + dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; + dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; + dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; +} + +static void +micro_min( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; + dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; + dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; + dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; +} + +static void +micro_imin( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; + dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; + dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; + dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; +} + +static void +micro_umin( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; + dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; + dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; + dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; +} + +static void +micro_umod( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] % src1->u[0]; + dst->u[1] = src0->u[1] % src1->u[1]; + dst->u[2] = src0->u[2] % src1->u[2]; + dst->u[3] = src0->u[3] % src1->u[3]; +} + +static void +micro_mul( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = src0->f[0] * src1->f[0]; + dst->f[1] = src0->f[1] * src1->f[1]; + dst->f[2] = src0->f[2] * src1->f[2]; + dst->f[3] = src0->f[3] * src1->f[3]; +} + +static void +micro_imul( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->i[0] = src0->i[0] * src1->i[0]; + dst->i[1] = src0->i[1] * src1->i[1]; + dst->i[2] = src0->i[2] * src1->i[2]; + dst->i[3] = src0->i[3] * src1->i[3]; +} + +static void +micro_imul64( + union tgsi_exec_channel *dst0, + union tgsi_exec_channel *dst1, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst1->i[0] = src0->i[0] * src1->i[0]; + dst1->i[1] = src0->i[1] * src1->i[1]; + dst1->i[2] = src0->i[2] * src1->i[2]; + dst1->i[3] = src0->i[3] * src1->i[3]; + dst0->i[0] = 0; + dst0->i[1] = 0; + dst0->i[2] = 0; + dst0->i[3] = 0; +} + +static void +micro_umul64( + union tgsi_exec_channel *dst0, + union tgsi_exec_channel *dst1, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst1->u[0] = src0->u[0] * src1->u[0]; + dst1->u[1] = src0->u[1] * src1->u[1]; + dst1->u[2] = src0->u[2] * src1->u[2]; + dst1->u[3] = src0->u[3] * src1->u[3]; + dst0->u[0] = 0; + dst0->u[1] = 0; + dst0->u[2] = 0; + dst0->u[3] = 0; +} + +static void +micro_movc( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2 ) +{ + dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; + dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; + dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; + dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; +} + +static void +micro_neg( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = -src->f[0]; + dst->f[1] = -src->f[1]; + dst->f[2] = -src->f[2]; + dst->f[3] = -src->f[3]; +} + +static void +micro_ineg( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->i[0] = -src->i[0]; + dst->i[1] = -src->i[1]; + dst->i[2] = -src->i[2]; + dst->i[3] = -src->i[3]; +} + +static void +micro_not( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->u[0] = ~src->u[0]; + dst->u[1] = ~src->u[1]; + dst->u[2] = ~src->u[2]; + dst->u[3] = ~src->u[3]; +} + +static void +micro_or( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] | src1->u[0]; + dst->u[1] = src0->u[1] | src1->u[1]; + dst->u[2] = src0->u[2] | src1->u[2]; + dst->u[3] = src0->u[3] | src1->u[3]; +} + +static void +micro_pow( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = (GLfloat) pow( (GLdouble) src0->f[0], (GLdouble) src1->f[0] ); + dst->f[1] = (GLfloat) pow( (GLdouble) src0->f[1], (GLdouble) src1->f[1] ); + dst->f[2] = (GLfloat) pow( (GLdouble) src0->f[2], (GLdouble) src1->f[2] ); + dst->f[3] = (GLfloat) pow( (GLdouble) src0->f[3], (GLdouble) src1->f[3] ); +} + +static void +micro_rnd( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = (GLfloat) floor( (GLdouble) (src->f[0] + 0.5f) ); + dst->f[1] = (GLfloat) floor( (GLdouble) (src->f[1] + 0.5f) ); + dst->f[2] = (GLfloat) floor( (GLdouble) (src->f[2] + 0.5f) ); + dst->f[3] = (GLfloat) floor( (GLdouble) (src->f[3] + 0.5f) ); +} + +static void +micro_shl( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->i[0] = src0->i[0] << src1->i[0]; + dst->i[1] = src0->i[1] << src1->i[1]; + dst->i[2] = src0->i[2] << src1->i[2]; + dst->i[3] = src0->i[3] << src1->i[3]; +} + +static void +micro_ishr( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->i[0] = src0->i[0] >> src1->i[0]; + dst->i[1] = src0->i[1] >> src1->i[1]; + dst->i[2] = src0->i[2] >> src1->i[2]; + dst->i[3] = src0->i[3] >> src1->i[3]; +} + +static void +micro_ushr( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] >> src1->u[0]; + dst->u[1] = src0->u[1] >> src1->u[1]; + dst->u[2] = src0->u[2] >> src1->u[2]; + dst->u[3] = src0->u[3] >> src1->u[3]; +} + +static void +micro_sin( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = (GLfloat) sin( (GLdouble) src->f[0] ); + dst->f[1] = (GLfloat) sin( (GLdouble) src->f[1] ); + dst->f[2] = (GLfloat) sin( (GLdouble) src->f[2] ); + dst->f[3] = (GLfloat) sin( (GLdouble) src->f[3] ); +} + +static void +micro_sqrt( union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = (GLfloat) sqrt( (GLdouble) src->f[0] ); + dst->f[1] = (GLfloat) sqrt( (GLdouble) src->f[1] ); + dst->f[2] = (GLfloat) sqrt( (GLdouble) src->f[2] ); + dst->f[3] = (GLfloat) sqrt( (GLdouble) src->f[3] ); +} + +static void +micro_sub( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = src0->f[0] - src1->f[0]; + dst->f[1] = src0->f[1] - src1->f[1]; + dst->f[2] = src0->f[2] - src1->f[2]; + dst->f[3] = src0->f[3] - src1->f[3]; +} + +static void +micro_u2f( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = (GLfloat) src->u[0]; + dst->f[1] = (GLfloat) src->u[1]; + dst->f[2] = (GLfloat) src->u[2]; + dst->f[3] = (GLfloat) src->u[3]; +} + +static void +micro_xor( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] ^ src1->u[0]; + dst->u[1] = src0->u[1] ^ src1->u[1]; + dst->u[2] = src0->u[2] ^ src1->u[2]; + dst->u[3] = src0->u[3] ^ src1->u[3]; +} + +static void +fetch_src_file_channel( + const struct tgsi_exec_machine *mach, + const GLuint file, + const GLuint swizzle, + const union tgsi_exec_channel *index, + union tgsi_exec_channel *chan ) +{ + switch( swizzle ) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + switch( file ) { + case TGSI_FILE_CONSTANT: + chan->f[0] = mach->Consts[index->i[0]][swizzle]; + chan->f[1] = mach->Consts[index->i[1]][swizzle]; + chan->f[2] = mach->Consts[index->i[2]][swizzle]; + chan->f[3] = mach->Consts[index->i[3]][swizzle]; + break; + + case TGSI_FILE_INPUT: + chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_TEMPORARY: + chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_IMMEDIATE: + assert( index->i[0] < (GLint) mach->ImmLimit ); + chan->f[0] = mach->Imms[index->i[0]][swizzle]; + assert( index->i[1] < (GLint) mach->ImmLimit ); + chan->f[1] = mach->Imms[index->i[1]][swizzle]; + assert( index->i[2] < (GLint) mach->ImmLimit ); + chan->f[2] = mach->Imms[index->i[2]][swizzle]; + assert( index->i[3] < (GLint) mach->ImmLimit ); + chan->f[3] = mach->Imms[index->i[3]][swizzle]; + break; + + case TGSI_FILE_ADDRESS: + chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; + break; + + default: + assert( 0 ); + } + break; + + case TGSI_EXTSWIZZLE_ZERO: + *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; + break; + + case TGSI_EXTSWIZZLE_ONE: + *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; + break; + + default: + assert( 0 ); + } +} + +static void +fetch_source( + const struct tgsi_exec_machine *mach, + union tgsi_exec_channel *chan, + const struct tgsi_full_src_register *reg, + const GLuint chan_index ) +{ + union tgsi_exec_channel index; + GLuint swizzle; + + index.i[0] = + index.i[1] = + index.i[2] = + index.i[3] = reg->SrcRegister.Index; + + if (reg->SrcRegister.Indirect) { + union tgsi_exec_channel index2; + union tgsi_exec_channel indir_index; + + index2.i[0] = + index2.i[1] = + index2.i[2] = + index2.i[3] = reg->SrcRegisterInd.Index; + + swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterInd, CHAN_X ); + fetch_src_file_channel( + mach, + reg->SrcRegisterInd.File, + swizzle, + &index2, + &indir_index ); + + index.i[0] += indir_index.i[0]; + index.i[1] += indir_index.i[1]; + index.i[2] += indir_index.i[2]; + index.i[3] += indir_index.i[3]; + } + + if( reg->SrcRegister.Dimension ) { + switch( reg->SrcRegister.File ) { + case TGSI_FILE_INPUT: + index.i[0] *= 17; + index.i[1] *= 17; + index.i[2] *= 17; + index.i[3] *= 17; + break; + case TGSI_FILE_CONSTANT: + index.i[0] *= 4096; + index.i[1] *= 4096; + index.i[2] *= 4096; + index.i[3] *= 4096; + break; + default: + assert( 0 ); + } + + index.i[0] += reg->SrcRegisterDim.Index; + index.i[1] += reg->SrcRegisterDim.Index; + index.i[2] += reg->SrcRegisterDim.Index; + index.i[3] += reg->SrcRegisterDim.Index; + + if (reg->SrcRegisterDim.Indirect) { + union tgsi_exec_channel index2; + union tgsi_exec_channel indir_index; + + index2.i[0] = + index2.i[1] = + index2.i[2] = + index2.i[3] = reg->SrcRegisterDimInd.Index; + + swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); + fetch_src_file_channel( + mach, + reg->SrcRegisterDimInd.File, + swizzle, + &index2, + &indir_index ); + + index.i[0] += indir_index.i[0]; + index.i[1] += indir_index.i[1]; + index.i[2] += indir_index.i[2]; + index.i[3] += indir_index.i[3]; + } + } + + swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); + fetch_src_file_channel( + mach, + reg->SrcRegister.File, + swizzle, + &index, + chan ); + + switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { + case TGSI_UTIL_SIGN_CLEAR: + micro_abs( chan, chan ); + break; + + case TGSI_UTIL_SIGN_SET: + micro_abs( chan, chan ); + micro_neg( chan, chan ); + break; + + case TGSI_UTIL_SIGN_TOGGLE: + micro_neg( chan, chan ); + break; + + case TGSI_UTIL_SIGN_KEEP: + break; + } +} + +static void +store_dest( + struct tgsi_exec_machine *mach, + const union tgsi_exec_channel *chan, + const struct tgsi_full_dst_register *reg, + const struct tgsi_full_instruction *inst, + GLuint chan_index ) +{ + union tgsi_exec_channel *dst; + + switch( reg->DstRegister.File ) { + case TGSI_FILE_NULL: + return; + + case TGSI_FILE_OUTPUT: + dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] + reg->DstRegister.Index].xyzw[chan_index]; + break; + + case TGSI_FILE_TEMPORARY: + dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index]; + break; + + case TGSI_FILE_ADDRESS: + dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index]; + break; + + default: + assert( 0 ); + } + + switch (inst->Instruction.Saturate) + { + case TGSI_SAT_NONE: + *dst = *chan; + break; + + case TGSI_SAT_ZERO_ONE: + micro_lt( dst, chan, &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], chan ); + micro_lt( dst, chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); + break; + + case TGSI_SAT_MINUS_PLUS_ONE: + assert( 0 ); + break; + + default: + assert( 0 ); + } +} + +#define FETCH(VAL,INDEX,CHAN)\ + fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) + +#define STORE(VAL,INDEX,CHAN)\ + store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN) + +static void +exec_kil (struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + GLuint uniquemask; + GLuint chan_index; + GLuint kilmask = 0; + union tgsi_exec_channel r[1]; + + /* This mask stores component bits that were already tested. Note that + * we test if the value is less than zero, so 1.0 and 0.0 need not to be + * tested. */ + uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); + + for (chan_index = 0; chan_index < 4; chan_index++) + { + GLuint swizzle; + GLuint i; + + /* unswizzle channel */ + swizzle = tgsi_util_get_full_src_register_extswizzle ( + &inst->FullSrcRegisters[0], + chan_index); + + /* check if the component has not been already tested */ + if (uniquemask & (1 << swizzle)) + continue; + uniquemask |= 1 << swizzle; + + FETCH(&r[0], 0, chan_index); + for (i = 0; i < 4; i++) + if (r[0].f[i] < 0.0f) + kilmask |= 1 << (i * 4); + } + + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; +} + +#if MESA +/* + * Fetch a texel using S texture coordinate. + */ +static void +fetch_texel_1d( GLcontext *ctx, + struct tgsi_sampler_state *sampler, + const union tgsi_exec_channel *s, + GLuint unit, + union tgsi_exec_channel *r, + union tgsi_exec_channel *g, + union tgsi_exec_channel *b, + union tgsi_exec_channel *a ) +{ + SWcontext *swrast = SWRAST_CONTEXT(ctx); + GLuint fragment_index; + GLfloat stpq[4][4]; + GLfloat lambdas[4]; + GLchan rgba[4][4]; + + for (fragment_index = 0; fragment_index < 4; fragment_index++) + { + stpq[fragment_index][0] = s->f[fragment_index]; + } + + if (sampler->NeedLambda) + { + GLfloat dsdx = s->f[TILE_BOTTOM_RIGHT] - s->f[TILE_BOTTOM_LEFT]; + GLfloat dsdy = s->f[TILE_TOP_LEFT] - s->f[TILE_BOTTOM_LEFT]; + + GLfloat rho, lambda; + + dsdx = FABSF(dsdx); + dsdy = FABSF(dsdy); + + rho = MAX2(dsdx, dsdy) * sampler->ImageWidth; + + lambda = LOG2(rho); + + if (sampler->NeedLodBias) + lambda += sampler->LodBias; + + if (sampler->NeedLambdaClamp) + lambda = CLAMP(lambda, sampler->MinLod, sampler->MaxLod); + + /* XXX: Use the same lambda value throughout the tile. Could + * end up with four unique values by recalculating partial + * derivs in the other row and column, and calculating lambda + * using the dx and dy values appropriate for each fragment in + * the tile. + */ + lambdas[0] = + lambdas[1] = + lambdas[2] = + lambdas[3] = lambda; + } + + /* XXX use a float-valued TextureSample routine here!!! */ + swrast->TextureSample[unit] (ctx, + ctx->Texture.Unit[unit]._Current, + 4, + (const GLfloat (*)[4])stpq, + lambdas, + rgba); + + for (fragment_index = 0; fragment_index < 4; fragment_index++) + { + r->f[fragment_index] = CHAN_TO_FLOAT(rgba[fragment_index][0]); + g->f[fragment_index] = CHAN_TO_FLOAT(rgba[fragment_index][1]); + b->f[fragment_index] = CHAN_TO_FLOAT(rgba[fragment_index][2]); + a->f[fragment_index] = CHAN_TO_FLOAT(rgba[fragment_index][3]); + } +} + +/* + * Fetch a texel using ST texture coordinates. + */ +static void +fetch_texel_2d( GLcontext *ctx, + struct tgsi_sampler_state *sampler, + const union tgsi_exec_channel *s, + const union tgsi_exec_channel *t, + GLuint unit, + union tgsi_exec_channel *r, + union tgsi_exec_channel *g, + union tgsi_exec_channel *b, + union tgsi_exec_channel *a ) +{ + SWcontext *swrast = SWRAST_CONTEXT( ctx ); + GLuint fragment_index; + GLfloat stpq[4][4]; + GLfloat lambdas[4]; + GLchan rgba[4][4]; + + for (fragment_index = 0; fragment_index < 4; fragment_index++) { + stpq[fragment_index][0] = s->f[fragment_index]; + stpq[fragment_index][1] = t->f[fragment_index]; + } + + if (sampler->NeedLambda) { + GLfloat dsdx = s->f[TILE_BOTTOM_RIGHT] - s->f[TILE_BOTTOM_LEFT]; + GLfloat dsdy = s->f[TILE_TOP_LEFT] - s->f[TILE_BOTTOM_LEFT]; + + GLfloat dtdx = t->f[TILE_BOTTOM_RIGHT] - t->f[TILE_BOTTOM_LEFT]; + GLfloat dtdy = t->f[TILE_TOP_LEFT] - t->f[TILE_BOTTOM_LEFT]; + + GLfloat maxU, maxV, rho, lambda; + + dsdx = FABSF( dsdx ); + dsdy = FABSF( dsdy ); + dtdx = FABSF( dtdx ); + dtdy = FABSF( dtdy ); + + maxU = MAX2( dsdx, dsdy ) * sampler->ImageWidth; + maxV = MAX2( dtdx, dtdy ) * sampler->ImageHeight; + + rho = MAX2( maxU, maxV ); + + lambda = LOG2( rho ); + + if (sampler->NeedLodBias) + lambda += sampler->LodBias; + + if (sampler->NeedLambdaClamp) + lambda = CLAMP( + lambda, + sampler->MinLod, + sampler->MaxLod ); + + /* XXX: Use the same lambda value throughout the tile. Could + * end up with four unique values by recalculating partial + * derivs in the other row and column, and calculating lambda + * using the dx and dy values appropriate for each fragment in + * the tile. + */ + lambdas[0] = + lambdas[1] = + lambdas[2] = + lambdas[3] = lambda; + } + + /* XXX use a float-valued TextureSample routine here!!! */ + swrast->TextureSample[unit]( + ctx, + ctx->Texture.Unit[unit]._Current, + 4, + (const GLfloat (*)[4]) stpq, + lambdas, + rgba ); + + for (fragment_index = 0; fragment_index < 4; fragment_index++) { + r->f[fragment_index] = CHAN_TO_FLOAT( rgba[fragment_index][0] ); + g->f[fragment_index] = CHAN_TO_FLOAT( rgba[fragment_index][1] ); + b->f[fragment_index] = CHAN_TO_FLOAT( rgba[fragment_index][2] ); + a->f[fragment_index] = CHAN_TO_FLOAT( rgba[fragment_index][3] ); + } +} + +/* + * Fetch a texel using STR texture coordinates. + */ +static void +fetch_texel_3d( GLcontext *ctx, + struct tgsi_sampler_state *sampler, + const union tgsi_exec_channel *s, + const union tgsi_exec_channel *t, + const union tgsi_exec_channel *p, + GLuint unit, + union tgsi_exec_channel *r, + union tgsi_exec_channel *g, + union tgsi_exec_channel *b, + union tgsi_exec_channel *a ) +{ + SWcontext *swrast = SWRAST_CONTEXT(ctx); + GLuint fragment_index; + GLfloat stpq[4][4]; + GLfloat lambdas[4]; + GLchan rgba[4][4]; + + for (fragment_index = 0; fragment_index < 4; fragment_index++) + { + stpq[fragment_index][0] = s->f[fragment_index]; + stpq[fragment_index][1] = t->f[fragment_index]; + stpq[fragment_index][2] = p->f[fragment_index]; + } + + if (sampler->NeedLambda) + { + GLfloat dsdx = s->f[TILE_BOTTOM_RIGHT] - s->f[TILE_BOTTOM_LEFT]; + GLfloat dsdy = s->f[TILE_TOP_LEFT] - s->f[TILE_BOTTOM_LEFT]; + + GLfloat dtdx = t->f[TILE_BOTTOM_RIGHT] - t->f[TILE_BOTTOM_LEFT]; + GLfloat dtdy = t->f[TILE_TOP_LEFT] - t->f[TILE_BOTTOM_LEFT]; + + GLfloat dpdx = p->f[TILE_BOTTOM_RIGHT] - p->f[TILE_BOTTOM_LEFT]; + GLfloat dpdy = p->f[TILE_TOP_LEFT] - p->f[TILE_BOTTOM_LEFT]; + + GLfloat maxU, maxV, maxW, rho, lambda; + + dsdx = FABSF(dsdx); + dsdy = FABSF(dsdy); + dtdx = FABSF(dtdx); + dtdy = FABSF(dtdy); + dpdx = FABSF(dpdx); + dpdy = FABSF(dpdy); + + maxU = MAX2(dsdx, dsdy) * sampler->ImageWidth; + maxV = MAX2(dtdx, dtdy) * sampler->ImageHeight; + maxW = MAX2(dpdx, dpdy) * sampler->ImageDepth; + + rho = MAX2(maxU, MAX2(maxV, maxW)); + + lambda = LOG2(rho); + + if (sampler->NeedLodBias) + lambda += sampler->LodBias; + + if (sampler->NeedLambdaClamp) + lambda = CLAMP(lambda, sampler->MinLod, sampler->MaxLod); + + /* XXX: Use the same lambda value throughout the tile. Could + * end up with four unique values by recalculating partial + * derivs in the other row and column, and calculating lambda + * using the dx and dy values appropriate for each fragment in + * the tile. + */ + lambdas[0] = + lambdas[1] = + lambdas[2] = + lambdas[3] = lambda; + } + + /* XXX use a float-valued TextureSample routine here!!! */ + swrast->TextureSample[unit] (ctx, + ctx->Texture.Unit[unit]._Current, + 4, + (const GLfloat (*)[4])stpq, + lambdas, + rgba); + + for (fragment_index = 0; fragment_index < 4; fragment_index++) + { + r->f[fragment_index] = CHAN_TO_FLOAT(rgba[fragment_index][0]); + g->f[fragment_index] = CHAN_TO_FLOAT(rgba[fragment_index][1]); + b->f[fragment_index] = CHAN_TO_FLOAT(rgba[fragment_index][2]); + a->f[fragment_index] = CHAN_TO_FLOAT(rgba[fragment_index][3]); + } +} +#endif + +static GLuint +map_label( + GLuint label, + struct tgsi_exec_labels *labels ) +{ + GLuint i; + + for( i = 0; i < labels->count; i++ ) { + if( labels->labels[i][0] == label ) { + return labels->labels[i][1]; + } + } + assert( 0 ); + return 0; +} + +static void +exec_instruction( + struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + struct tgsi_exec_labels *labels, + GLuint *programCounter ) +{ +#if MESA + GET_CURRENT_CONTEXT(ctx); +#endif + GLuint chan_index; + union tgsi_exec_channel r[8]; + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_f2it( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MOV: + /* TGSI_OPCODE_SWZ */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_LIT: + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[0], 0, CHAN_X ); + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + STORE( &r[0], 0, CHAN_Y ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[1], 0, CHAN_Y ); + micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + + FETCH( &r[2], 0, CHAN_W ); + micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); + micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); + micro_pow( &r[1], &r[1], &r[2] ); + micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + STORE( &r[0], 0, CHAN_Z ); + } + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_RCP: + /* TGSI_OPCODE_RECIP */ + FETCH( &r[0], 0, CHAN_X ); + micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_RSQ: + /* TGSI_OPCODE_RECIPSQRT */ + FETCH( &r[0], 0, CHAN_X ); + micro_sqrt( &r[0], &r[0] ); + micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_EXP: + assert (0); + break; + + case TGSI_OPCODE_LOG: + assert (0); + break; + + case TGSI_OPCODE_MUL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) + { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + micro_mul( &r[0], &r[0], &r[1] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_ADD: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_add( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP3: + /* TGSI_OPCODE_DOT3 */ + FETCH( &r[0], 0, CHAN_X ); + FETCH( &r[1], 1, CHAN_X ); + micro_mul( &r[0], &r[0], &r[1] ); + + FETCH( &r[1], 0, CHAN_Y ); + FETCH( &r[2], 1, CHAN_Y ); + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FETCH( &r[1], 0, CHAN_Z ); + FETCH( &r[2], 1, CHAN_Z ); + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP4: + /* TGSI_OPCODE_DOT4 */ + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + micro_mul( &r[0], &r[0], &r[1] ); + + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 1, CHAN_Y); + + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FETCH(&r[1], 0, CHAN_Z); + FETCH(&r[2], 1, CHAN_Z); + + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FETCH(&r[1], 0, CHAN_W); + FETCH(&r[2], 1, CHAN_W); + + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DST: + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + FETCH( &r[0], 0, CHAN_Y ); + FETCH( &r[1], 1, CHAN_Y); + micro_mul( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, CHAN_Y ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[0], 0, CHAN_Z ); + STORE( &r[0], 0, CHAN_Z ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + FETCH( &r[0], 1, CHAN_W ); + STORE( &r[0], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MIN: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) + { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_MAX: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) + { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_SLT: + /* TGSI_OPCODE_SETLT */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SGE: + /* TGSI_OPCODE_SETGE */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) + { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_MAD: + /* TGSI_OPCODE_MADD */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_mul( &r[0], &r[0], &r[1] ); + FETCH( &r[1], 2, chan_index ); + micro_add( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SUB: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) + { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + micro_sub( &r[0], &r[0], &r[1] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_LERP: + /* TGSI_OPCODE_LRP */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) + { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + FETCH(&r[2], 2, chan_index); + + micro_sub( &r[1], &r[1], &r[2] ); + micro_mul( &r[0], &r[0], &r[1] ); + micro_add( &r[0], &r[0], &r[2] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_CND: + assert (0); + break; + + case TGSI_OPCODE_CND0: + assert (0); + break; + + case TGSI_OPCODE_DOT2ADD: + /* TGSI_OPCODE_DP2A */ + assert (0); + break; + + case TGSI_OPCODE_INDEX: + assert (0); + break; + + case TGSI_OPCODE_NEGATE: + assert (0); + break; + + case TGSI_OPCODE_FRAC: + /* TGSI_OPCODE_FRC */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_frc( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_CLAMP: + assert (0); + break; + + case TGSI_OPCODE_FLOOR: + /* TGSI_OPCODE_FLR */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_flr( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_ROUND: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_rnd( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_EXPBASE2: + /* TGSI_OPCODE_EX2 */ + FETCH(&r[0], 0, CHAN_X); + + micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_LOGBASE2: + /* TGSI_OPCODE_LG2 */ + FETCH( &r[0], 0, CHAN_X ); + micro_lg2( &r[0], &r[0] ); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_POWER: + /* TGSI_OPCODE_POW */ + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + micro_pow( &r[0], &r[0], &r[1] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_CROSSPRODUCT: + /* TGSI_OPCODE_XPD */ + FETCH(&r[0], 0, CHAN_Y); + FETCH(&r[1], 1, CHAN_Z); + + micro_mul( &r[2], &r[0], &r[1] ); + + FETCH(&r[3], 0, CHAN_Z); + FETCH(&r[4], 1, CHAN_Y); + + micro_mul( &r[5], &r[3], &r[4] ); + micro_sub( &r[2], &r[2], &r[5] ); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &r[2], 0, CHAN_X ); + } + + FETCH(&r[2], 1, CHAN_X); + + micro_mul( &r[3], &r[3], &r[2] ); + + FETCH(&r[5], 0, CHAN_X); + + micro_mul( &r[1], &r[1], &r[5] ); + micro_sub( &r[3], &r[3], &r[1] ); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + STORE( &r[3], 0, CHAN_Y ); + } + + micro_mul( &r[5], &r[5], &r[4] ); + micro_mul( &r[0], &r[0], &r[2] ); + micro_sub( &r[5], &r[5], &r[0] ); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + STORE( &r[5], 0, CHAN_Z ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MULTIPLYMATRIX: + assert (0); + break; + + case TGSI_OPCODE_ABS: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) + { + FETCH(&r[0], 0, chan_index); + + micro_abs( &r[0], &r[0] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_RCC: + assert (0); + break; + + case TGSI_OPCODE_DPH: + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + micro_mul( &r[0], &r[0], &r[1] ); + + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 1, CHAN_Y); + + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FETCH(&r[1], 0, CHAN_Z); + FETCH(&r[2], 1, CHAN_Z); + + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FETCH(&r[1], 1, CHAN_W); + + micro_add( &r[0], &r[0], &r[1] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_COS: + FETCH(&r[0], 0, CHAN_X); + + micro_cos( &r[0], &r[0] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DDX: + assert (0); + break; + + case TGSI_OPCODE_DDY: + assert (0); + break; + + case TGSI_OPCODE_KIL: + exec_kil (mach, inst); + break; + + case TGSI_OPCODE_PK2H: + assert (0); + break; + + case TGSI_OPCODE_PK2US: + assert (0); + break; + + case TGSI_OPCODE_PK4B: + assert (0); + break; + + case TGSI_OPCODE_PK4UB: + assert (0); + break; + + case TGSI_OPCODE_RFL: + assert (0); + break; + + case TGSI_OPCODE_SEQ: + assert (0); + break; + + case TGSI_OPCODE_SFL: + assert (0); + break; + + case TGSI_OPCODE_SGT: + assert (0); + break; + + case TGSI_OPCODE_SIN: + FETCH(&r[0], 0, CHAN_X); + + micro_sin( &r[0], &r[0] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLE: + assert (0); + break; + + case TGSI_OPCODE_SNE: + assert (0); + break; + + case TGSI_OPCODE_STR: + assert (0); + break; + + case TGSI_OPCODE_TEX: + + switch (inst->InstructionExtTexture.Texture) + { + case TGSI_TEXTURE_1D: + + FETCH(&r[0], 0, CHAN_X); + + switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) + { + case TGSI_EXTSWIZZLE_W: + FETCH(&r[1], 0, CHAN_W); + micro_div( &r[0], &r[0], &r[1] ); + break; + + case TGSI_EXTSWIZZLE_ONE: + break; + + default: + assert (0); + } +#if MESA + fetch_texel_1d (ctx, + &mach->Samplers[inst->FullSrcRegisters[1].SrcRegister.Index], + &r[0], + inst->FullSrcRegisters[1].SrcRegister.Index, + &r[0], &r[1], &r[2], &r[3]); +#endif + break; + + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + + switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) + { + case TGSI_EXTSWIZZLE_W: + FETCH(&r[2], 0, CHAN_W); + micro_div( &r[0], &r[0], &r[2] ); + micro_div( &r[1], &r[1], &r[2] ); + break; + + case TGSI_EXTSWIZZLE_ONE: + break; + + default: + assert (0); + } + +#if MESA + fetch_texel_2d (ctx, + &mach->Samplers[inst->FullSrcRegisters[1].SrcRegister.Index], + &r[0], &r[1], + inst->FullSrcRegisters[1].SrcRegister.Index, + &r[0], &r[1], &r[2], &r[3]); +#endif + break; + + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) + { + case TGSI_EXTSWIZZLE_W: + FETCH(&r[3], 0, CHAN_W); + micro_div( &r[0], &r[0], &r[3] ); + micro_div( &r[1], &r[1], &r[3] ); + micro_div( &r[2], &r[2], &r[3] ); + break; + + case TGSI_EXTSWIZZLE_ONE: + break; + + default: + assert (0); + } + +#if MESA + fetch_texel_3d (ctx, + &mach->Samplers[inst->FullSrcRegisters[1].SrcRegister.Index], + &r[0], &r[1], &r[2], + inst->FullSrcRegisters[1].SrcRegister.Index, + &r[0], &r[1], &r[2], &r[3]); +#endif + break; + + default: + assert (0); + } + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[chan_index], 0, chan_index ); + } + break; + + case TGSI_OPCODE_TXD: + assert (0); + break; + + case TGSI_OPCODE_UP2H: + assert (0); + break; + + case TGSI_OPCODE_UP2US: + assert (0); + break; + + case TGSI_OPCODE_UP4B: + assert (0); + break; + + case TGSI_OPCODE_UP4UB: + assert (0); + break; + + case TGSI_OPCODE_X2D: + assert (0); + break; + + case TGSI_OPCODE_ARA: + assert (0); + break; + + case TGSI_OPCODE_ARR: + assert (0); + break; + + case TGSI_OPCODE_BRA: + assert (0); + break; + + case TGSI_OPCODE_CAL: + assert (0); + break; + + case TGSI_OPCODE_RET: + /* XXX: end of shader! */ + /*assert (0);*/ + break; + + case TGSI_OPCODE_SSG: + assert (0); + break; + + case TGSI_OPCODE_CMP: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) + { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + FETCH(&r[2], 2, chan_index); + + micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_SCS: + if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + FETCH( &r[0], 0, CHAN_X ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) { + micro_cos( &r[1], &r[0] ); + STORE( &r[1], 0, CHAN_X ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + micro_sin( &r[1], &r[0] ); + STORE( &r[1], 0, CHAN_Y ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_TXB: + assert (0); + break; + + case TGSI_OPCODE_NRM: + assert (0); + break; + + case TGSI_OPCODE_DIV: + assert( 0 ); + break; + + case TGSI_OPCODE_DP2: + FETCH( &r[0], 0, CHAN_X ); + FETCH( &r[1], 1, CHAN_X ); + micro_mul( &r[0], &r[0], &r[1] ); + + FETCH( &r[1], 0, CHAN_Y ); + FETCH( &r[2], 1, CHAN_Y ); + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_TXL: + assert (0); + break; + + case TGSI_OPCODE_BRK: + assert (0); + break; + + case TGSI_OPCODE_IF: + assert (0); + break; + + case TGSI_OPCODE_LOOP: + assert (0); + break; + + case TGSI_OPCODE_REP: + assert (0); + break; + + case TGSI_OPCODE_ELSE: + assert (0); + break; + + case TGSI_OPCODE_ENDIF: + assert (0); + break; + + case TGSI_OPCODE_ENDLOOP: + assert (0); + break; + + case TGSI_OPCODE_ENDREP: + assert (0); + break; + + case TGSI_OPCODE_PUSHA: + assert (0); + break; + + case TGSI_OPCODE_POPA: + assert (0); + break; + + case TGSI_OPCODE_CEIL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_ceil( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_I2F: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_i2f( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_NOT: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_not( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_TRUNC: + assert (0); + break; + + case TGSI_OPCODE_SHL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_shl( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SHR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_ishr( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_AND: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_and( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_OR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_or( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MOD: + assert (0); + break; + + case TGSI_OPCODE_XOR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_xor( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SAD: + assert (0); + break; + + case TGSI_OPCODE_TXF: + assert (0); + break; + + case TGSI_OPCODE_TXQ: + assert (0); + break; + + case TGSI_OPCODE_CONT: + assert (0); + break; + + case TGSI_OPCODE_EMIT: + mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; + break; + + case TGSI_OPCODE_ENDPRIM: + mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; + break; + + default: + assert( 0 ); + } +} + + +#if !defined(XSTDCALL) +#if defined(WIN32) +#define XSTDCALL __stdcall +#else +#define XSTDCALL +#endif +#endif + +typedef void (XSTDCALL *fp_function) (const struct tgsi_exec_vector *input, + struct tgsi_exec_vector *output, + GLfloat (*constant)[4], + struct tgsi_exec_vector *temporary); + +void +tgsi_exec_machine_run2( + struct tgsi_exec_machine *mach, + struct tgsi_exec_labels *labels ) +{ +#if MESA + GET_CURRENT_CONTEXT(ctx); + GLuint i; +#endif + +#if XXX_SSE + fp_function function; + + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; + + function = (fp_function) x86_get_func (&mach->Function); + + function (mach->Inputs, + mach->Outputs, + mach->Consts, + mach->Temps); +#else + struct tgsi_parse_context parse; + + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; + mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; + + if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { + mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; + mach->Primitives[0] = 0; + } + + tgsi_parse_init( &parse, mach->Tokens ); + while( !tgsi_parse_end_of_tokens( &parse ) ) { + tgsi_parse_token( &parse ); + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + break; + case TGSI_TOKEN_TYPE_IMMEDIATE: + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + exec_instruction( mach, &parse.FullToken.FullInstruction, labels, &parse.Position ); + break; + default: + assert( 0 ); + } + } + tgsi_parse_free (&parse); +#endif + +#if MESA + if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { + /* + * Scale back depth component. + */ + for (i = 0; i < 4; i++) + mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; + } +#endif +} + |